162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright(c) 2020 Cornelis Networks, Inc. 462306a36Sopenharmony_ci * Copyright(c) 2015-2018 Intel Corporation. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci#include <asm/page.h> 762306a36Sopenharmony_ci#include <linux/string.h> 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci#include "mmu_rb.h" 1062306a36Sopenharmony_ci#include "user_exp_rcv.h" 1162306a36Sopenharmony_ci#include "trace.h" 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_cistatic void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, 1462306a36Sopenharmony_ci struct exp_tid_set *set, 1562306a36Sopenharmony_ci struct hfi1_filedata *fd); 1662306a36Sopenharmony_cistatic u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages); 1762306a36Sopenharmony_cistatic int set_rcvarray_entry(struct hfi1_filedata *fd, 1862306a36Sopenharmony_ci struct tid_user_buf *tbuf, 1962306a36Sopenharmony_ci u32 rcventry, struct tid_group *grp, 2062306a36Sopenharmony_ci u16 pageidx, unsigned int npages); 2162306a36Sopenharmony_cistatic void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, 2262306a36Sopenharmony_ci struct tid_rb_node *tnode); 2362306a36Sopenharmony_cistatic bool tid_rb_invalidate(struct mmu_interval_notifier *mni, 2462306a36Sopenharmony_ci const struct mmu_notifier_range *range, 2562306a36Sopenharmony_ci unsigned long cur_seq); 2662306a36Sopenharmony_cistatic bool tid_cover_invalidate(struct mmu_interval_notifier *mni, 2762306a36Sopenharmony_ci const struct mmu_notifier_range *range, 2862306a36Sopenharmony_ci unsigned long cur_seq); 2962306a36Sopenharmony_cistatic int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, 3062306a36Sopenharmony_ci struct tid_group *grp, u16 count, 3162306a36Sopenharmony_ci u32 *tidlist, unsigned int *tididx, 3262306a36Sopenharmony_ci unsigned int *pmapped); 3362306a36Sopenharmony_cistatic int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo); 3462306a36Sopenharmony_cistatic void __clear_tid_node(struct hfi1_filedata *fd, 3562306a36Sopenharmony_ci struct tid_rb_node *node); 3662306a36Sopenharmony_cistatic void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_cistatic const struct mmu_interval_notifier_ops tid_mn_ops = { 3962306a36Sopenharmony_ci .invalidate = tid_rb_invalidate, 4062306a36Sopenharmony_ci}; 4162306a36Sopenharmony_cistatic const struct mmu_interval_notifier_ops tid_cover_ops = { 4262306a36Sopenharmony_ci .invalidate = tid_cover_invalidate, 4362306a36Sopenharmony_ci}; 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci/* 4662306a36Sopenharmony_ci * Initialize context and file private data needed for Expected 4762306a36Sopenharmony_ci * receive caching. This needs to be done after the context has 4862306a36Sopenharmony_ci * been configured with the eager/expected RcvEntry counts. 4962306a36Sopenharmony_ci */ 5062306a36Sopenharmony_ciint hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, 5162306a36Sopenharmony_ci struct hfi1_ctxtdata *uctxt) 5262306a36Sopenharmony_ci{ 5362306a36Sopenharmony_ci int ret = 0; 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci fd->entry_to_rb = kcalloc(uctxt->expected_count, 5662306a36Sopenharmony_ci sizeof(struct rb_node *), 5762306a36Sopenharmony_ci GFP_KERNEL); 5862306a36Sopenharmony_ci if (!fd->entry_to_rb) 5962306a36Sopenharmony_ci return -ENOMEM; 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci if (!HFI1_CAP_UGET_MASK(uctxt->flags, TID_UNMAP)) { 6262306a36Sopenharmony_ci fd->invalid_tid_idx = 0; 6362306a36Sopenharmony_ci fd->invalid_tids = kcalloc(uctxt->expected_count, 6462306a36Sopenharmony_ci sizeof(*fd->invalid_tids), 6562306a36Sopenharmony_ci GFP_KERNEL); 6662306a36Sopenharmony_ci if (!fd->invalid_tids) { 6762306a36Sopenharmony_ci kfree(fd->entry_to_rb); 6862306a36Sopenharmony_ci fd->entry_to_rb = NULL; 6962306a36Sopenharmony_ci return -ENOMEM; 7062306a36Sopenharmony_ci } 7162306a36Sopenharmony_ci fd->use_mn = true; 7262306a36Sopenharmony_ci } 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci /* 7562306a36Sopenharmony_ci * PSM does not have a good way to separate, count, and 7662306a36Sopenharmony_ci * effectively enforce a limit on RcvArray entries used by 7762306a36Sopenharmony_ci * subctxts (when context sharing is used) when TID caching 7862306a36Sopenharmony_ci * is enabled. To help with that, we calculate a per-process 7962306a36Sopenharmony_ci * RcvArray entry share and enforce that. 8062306a36Sopenharmony_ci * If TID caching is not in use, PSM deals with usage on its 8162306a36Sopenharmony_ci * own. In that case, we allow any subctxt to take all of the 8262306a36Sopenharmony_ci * entries. 8362306a36Sopenharmony_ci * 8462306a36Sopenharmony_ci * Make sure that we set the tid counts only after successful 8562306a36Sopenharmony_ci * init. 8662306a36Sopenharmony_ci */ 8762306a36Sopenharmony_ci spin_lock(&fd->tid_lock); 8862306a36Sopenharmony_ci if (uctxt->subctxt_cnt && fd->use_mn) { 8962306a36Sopenharmony_ci u16 remainder; 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt; 9262306a36Sopenharmony_ci remainder = uctxt->expected_count % uctxt->subctxt_cnt; 9362306a36Sopenharmony_ci if (remainder && fd->subctxt < remainder) 9462306a36Sopenharmony_ci fd->tid_limit++; 9562306a36Sopenharmony_ci } else { 9662306a36Sopenharmony_ci fd->tid_limit = uctxt->expected_count; 9762306a36Sopenharmony_ci } 9862306a36Sopenharmony_ci spin_unlock(&fd->tid_lock); 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci return ret; 10162306a36Sopenharmony_ci} 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_civoid hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) 10462306a36Sopenharmony_ci{ 10562306a36Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci mutex_lock(&uctxt->exp_mutex); 10862306a36Sopenharmony_ci if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) 10962306a36Sopenharmony_ci unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); 11062306a36Sopenharmony_ci if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) 11162306a36Sopenharmony_ci unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); 11262306a36Sopenharmony_ci mutex_unlock(&uctxt->exp_mutex); 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci kfree(fd->invalid_tids); 11562306a36Sopenharmony_ci fd->invalid_tids = NULL; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci kfree(fd->entry_to_rb); 11862306a36Sopenharmony_ci fd->entry_to_rb = NULL; 11962306a36Sopenharmony_ci} 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ci/* 12262306a36Sopenharmony_ci * Release pinned receive buffer pages. 12362306a36Sopenharmony_ci * 12462306a36Sopenharmony_ci * @mapped: true if the pages have been DMA mapped. false otherwise. 12562306a36Sopenharmony_ci * @idx: Index of the first page to unpin. 12662306a36Sopenharmony_ci * @npages: No of pages to unpin. 12762306a36Sopenharmony_ci * 12862306a36Sopenharmony_ci * If the pages have been DMA mapped (indicated by mapped parameter), their 12962306a36Sopenharmony_ci * info will be passed via a struct tid_rb_node. If they haven't been mapped, 13062306a36Sopenharmony_ci * their info will be passed via a struct tid_user_buf. 13162306a36Sopenharmony_ci */ 13262306a36Sopenharmony_cistatic void unpin_rcv_pages(struct hfi1_filedata *fd, 13362306a36Sopenharmony_ci struct tid_user_buf *tidbuf, 13462306a36Sopenharmony_ci struct tid_rb_node *node, 13562306a36Sopenharmony_ci unsigned int idx, 13662306a36Sopenharmony_ci unsigned int npages, 13762306a36Sopenharmony_ci bool mapped) 13862306a36Sopenharmony_ci{ 13962306a36Sopenharmony_ci struct page **pages; 14062306a36Sopenharmony_ci struct hfi1_devdata *dd = fd->uctxt->dd; 14162306a36Sopenharmony_ci struct mm_struct *mm; 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci if (mapped) { 14462306a36Sopenharmony_ci dma_unmap_single(&dd->pcidev->dev, node->dma_addr, 14562306a36Sopenharmony_ci node->npages * PAGE_SIZE, DMA_FROM_DEVICE); 14662306a36Sopenharmony_ci pages = &node->pages[idx]; 14762306a36Sopenharmony_ci mm = mm_from_tid_node(node); 14862306a36Sopenharmony_ci } else { 14962306a36Sopenharmony_ci pages = &tidbuf->pages[idx]; 15062306a36Sopenharmony_ci mm = current->mm; 15162306a36Sopenharmony_ci } 15262306a36Sopenharmony_ci hfi1_release_user_pages(mm, pages, npages, mapped); 15362306a36Sopenharmony_ci fd->tid_n_pinned -= npages; 15462306a36Sopenharmony_ci} 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci/* 15762306a36Sopenharmony_ci * Pin receive buffer pages. 15862306a36Sopenharmony_ci */ 15962306a36Sopenharmony_cistatic int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) 16062306a36Sopenharmony_ci{ 16162306a36Sopenharmony_ci int pinned; 16262306a36Sopenharmony_ci unsigned int npages = tidbuf->npages; 16362306a36Sopenharmony_ci unsigned long vaddr = tidbuf->vaddr; 16462306a36Sopenharmony_ci struct page **pages = NULL; 16562306a36Sopenharmony_ci struct hfi1_devdata *dd = fd->uctxt->dd; 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci if (npages > fd->uctxt->expected_count) { 16862306a36Sopenharmony_ci dd_dev_err(dd, "Expected buffer too big\n"); 16962306a36Sopenharmony_ci return -EINVAL; 17062306a36Sopenharmony_ci } 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci /* Allocate the array of struct page pointers needed for pinning */ 17362306a36Sopenharmony_ci pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); 17462306a36Sopenharmony_ci if (!pages) 17562306a36Sopenharmony_ci return -ENOMEM; 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci /* 17862306a36Sopenharmony_ci * Pin all the pages of the user buffer. If we can't pin all the 17962306a36Sopenharmony_ci * pages, accept the amount pinned so far and program only that. 18062306a36Sopenharmony_ci * User space knows how to deal with partially programmed buffers. 18162306a36Sopenharmony_ci */ 18262306a36Sopenharmony_ci if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) { 18362306a36Sopenharmony_ci kfree(pages); 18462306a36Sopenharmony_ci return -ENOMEM; 18562306a36Sopenharmony_ci } 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages); 18862306a36Sopenharmony_ci if (pinned <= 0) { 18962306a36Sopenharmony_ci kfree(pages); 19062306a36Sopenharmony_ci return pinned; 19162306a36Sopenharmony_ci } 19262306a36Sopenharmony_ci tidbuf->pages = pages; 19362306a36Sopenharmony_ci fd->tid_n_pinned += pinned; 19462306a36Sopenharmony_ci return pinned; 19562306a36Sopenharmony_ci} 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci/* 19862306a36Sopenharmony_ci * RcvArray entry allocation for Expected Receives is done by the 19962306a36Sopenharmony_ci * following algorithm: 20062306a36Sopenharmony_ci * 20162306a36Sopenharmony_ci * The context keeps 3 lists of groups of RcvArray entries: 20262306a36Sopenharmony_ci * 1. List of empty groups - tid_group_list 20362306a36Sopenharmony_ci * This list is created during user context creation and 20462306a36Sopenharmony_ci * contains elements which describe sets (of 8) of empty 20562306a36Sopenharmony_ci * RcvArray entries. 20662306a36Sopenharmony_ci * 2. List of partially used groups - tid_used_list 20762306a36Sopenharmony_ci * This list contains sets of RcvArray entries which are 20862306a36Sopenharmony_ci * not completely used up. Another mapping request could 20962306a36Sopenharmony_ci * use some of all of the remaining entries. 21062306a36Sopenharmony_ci * 3. List of full groups - tid_full_list 21162306a36Sopenharmony_ci * This is the list where sets that are completely used 21262306a36Sopenharmony_ci * up go. 21362306a36Sopenharmony_ci * 21462306a36Sopenharmony_ci * An attempt to optimize the usage of RcvArray entries is 21562306a36Sopenharmony_ci * made by finding all sets of physically contiguous pages in a 21662306a36Sopenharmony_ci * user's buffer. 21762306a36Sopenharmony_ci * These physically contiguous sets are further split into 21862306a36Sopenharmony_ci * sizes supported by the receive engine of the HFI. The 21962306a36Sopenharmony_ci * resulting sets of pages are stored in struct tid_pageset, 22062306a36Sopenharmony_ci * which describes the sets as: 22162306a36Sopenharmony_ci * * .count - number of pages in this set 22262306a36Sopenharmony_ci * * .idx - starting index into struct page ** array 22362306a36Sopenharmony_ci * of this set 22462306a36Sopenharmony_ci * 22562306a36Sopenharmony_ci * From this point on, the algorithm deals with the page sets 22662306a36Sopenharmony_ci * described above. The number of pagesets is divided by the 22762306a36Sopenharmony_ci * RcvArray group size to produce the number of full groups 22862306a36Sopenharmony_ci * needed. 22962306a36Sopenharmony_ci * 23062306a36Sopenharmony_ci * Groups from the 3 lists are manipulated using the following 23162306a36Sopenharmony_ci * rules: 23262306a36Sopenharmony_ci * 1. For each set of 8 pagesets, a complete group from 23362306a36Sopenharmony_ci * tid_group_list is taken, programmed, and moved to 23462306a36Sopenharmony_ci * the tid_full_list list. 23562306a36Sopenharmony_ci * 2. For all remaining pagesets: 23662306a36Sopenharmony_ci * 2.1 If the tid_used_list is empty and the tid_group_list 23762306a36Sopenharmony_ci * is empty, stop processing pageset and return only 23862306a36Sopenharmony_ci * what has been programmed up to this point. 23962306a36Sopenharmony_ci * 2.2 If the tid_used_list is empty and the tid_group_list 24062306a36Sopenharmony_ci * is not empty, move a group from tid_group_list to 24162306a36Sopenharmony_ci * tid_used_list. 24262306a36Sopenharmony_ci * 2.3 For each group is tid_used_group, program as much as 24362306a36Sopenharmony_ci * can fit into the group. If the group becomes fully 24462306a36Sopenharmony_ci * used, move it to tid_full_list. 24562306a36Sopenharmony_ci */ 24662306a36Sopenharmony_ciint hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, 24762306a36Sopenharmony_ci struct hfi1_tid_info *tinfo) 24862306a36Sopenharmony_ci{ 24962306a36Sopenharmony_ci int ret = 0, need_group = 0, pinned; 25062306a36Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 25162306a36Sopenharmony_ci struct hfi1_devdata *dd = uctxt->dd; 25262306a36Sopenharmony_ci unsigned int ngroups, pageset_count, 25362306a36Sopenharmony_ci tididx = 0, mapped, mapped_pages = 0; 25462306a36Sopenharmony_ci u32 *tidlist = NULL; 25562306a36Sopenharmony_ci struct tid_user_buf *tidbuf; 25662306a36Sopenharmony_ci unsigned long mmu_seq = 0; 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci if (!PAGE_ALIGNED(tinfo->vaddr)) 25962306a36Sopenharmony_ci return -EINVAL; 26062306a36Sopenharmony_ci if (tinfo->length == 0) 26162306a36Sopenharmony_ci return -EINVAL; 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL); 26462306a36Sopenharmony_ci if (!tidbuf) 26562306a36Sopenharmony_ci return -ENOMEM; 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci mutex_init(&tidbuf->cover_mutex); 26862306a36Sopenharmony_ci tidbuf->vaddr = tinfo->vaddr; 26962306a36Sopenharmony_ci tidbuf->length = tinfo->length; 27062306a36Sopenharmony_ci tidbuf->npages = num_user_pages(tidbuf->vaddr, tidbuf->length); 27162306a36Sopenharmony_ci tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets), 27262306a36Sopenharmony_ci GFP_KERNEL); 27362306a36Sopenharmony_ci if (!tidbuf->psets) { 27462306a36Sopenharmony_ci ret = -ENOMEM; 27562306a36Sopenharmony_ci goto fail_release_mem; 27662306a36Sopenharmony_ci } 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci if (fd->use_mn) { 27962306a36Sopenharmony_ci ret = mmu_interval_notifier_insert( 28062306a36Sopenharmony_ci &tidbuf->notifier, current->mm, 28162306a36Sopenharmony_ci tidbuf->vaddr, tidbuf->npages * PAGE_SIZE, 28262306a36Sopenharmony_ci &tid_cover_ops); 28362306a36Sopenharmony_ci if (ret) 28462306a36Sopenharmony_ci goto fail_release_mem; 28562306a36Sopenharmony_ci mmu_seq = mmu_interval_read_begin(&tidbuf->notifier); 28662306a36Sopenharmony_ci } 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci pinned = pin_rcv_pages(fd, tidbuf); 28962306a36Sopenharmony_ci if (pinned <= 0) { 29062306a36Sopenharmony_ci ret = (pinned < 0) ? pinned : -ENOSPC; 29162306a36Sopenharmony_ci goto fail_unpin; 29262306a36Sopenharmony_ci } 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci /* Find sets of physically contiguous pages */ 29562306a36Sopenharmony_ci tidbuf->n_psets = find_phys_blocks(tidbuf, pinned); 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci /* Reserve the number of expected tids to be used. */ 29862306a36Sopenharmony_ci spin_lock(&fd->tid_lock); 29962306a36Sopenharmony_ci if (fd->tid_used + tidbuf->n_psets > fd->tid_limit) 30062306a36Sopenharmony_ci pageset_count = fd->tid_limit - fd->tid_used; 30162306a36Sopenharmony_ci else 30262306a36Sopenharmony_ci pageset_count = tidbuf->n_psets; 30362306a36Sopenharmony_ci fd->tid_used += pageset_count; 30462306a36Sopenharmony_ci spin_unlock(&fd->tid_lock); 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci if (!pageset_count) { 30762306a36Sopenharmony_ci ret = -ENOSPC; 30862306a36Sopenharmony_ci goto fail_unreserve; 30962306a36Sopenharmony_ci } 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci ngroups = pageset_count / dd->rcv_entries.group_size; 31262306a36Sopenharmony_ci tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL); 31362306a36Sopenharmony_ci if (!tidlist) { 31462306a36Sopenharmony_ci ret = -ENOMEM; 31562306a36Sopenharmony_ci goto fail_unreserve; 31662306a36Sopenharmony_ci } 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci tididx = 0; 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ci /* 32162306a36Sopenharmony_ci * From this point on, we are going to be using shared (between master 32262306a36Sopenharmony_ci * and subcontexts) context resources. We need to take the lock. 32362306a36Sopenharmony_ci */ 32462306a36Sopenharmony_ci mutex_lock(&uctxt->exp_mutex); 32562306a36Sopenharmony_ci /* 32662306a36Sopenharmony_ci * The first step is to program the RcvArray entries which are complete 32762306a36Sopenharmony_ci * groups. 32862306a36Sopenharmony_ci */ 32962306a36Sopenharmony_ci while (ngroups && uctxt->tid_group_list.count) { 33062306a36Sopenharmony_ci struct tid_group *grp = 33162306a36Sopenharmony_ci tid_group_pop(&uctxt->tid_group_list); 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci ret = program_rcvarray(fd, tidbuf, grp, 33462306a36Sopenharmony_ci dd->rcv_entries.group_size, 33562306a36Sopenharmony_ci tidlist, &tididx, &mapped); 33662306a36Sopenharmony_ci /* 33762306a36Sopenharmony_ci * If there was a failure to program the RcvArray 33862306a36Sopenharmony_ci * entries for the entire group, reset the grp fields 33962306a36Sopenharmony_ci * and add the grp back to the free group list. 34062306a36Sopenharmony_ci */ 34162306a36Sopenharmony_ci if (ret <= 0) { 34262306a36Sopenharmony_ci tid_group_add_tail(grp, &uctxt->tid_group_list); 34362306a36Sopenharmony_ci hfi1_cdbg(TID, 34462306a36Sopenharmony_ci "Failed to program RcvArray group %d", ret); 34562306a36Sopenharmony_ci goto unlock; 34662306a36Sopenharmony_ci } 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci tid_group_add_tail(grp, &uctxt->tid_full_list); 34962306a36Sopenharmony_ci ngroups--; 35062306a36Sopenharmony_ci mapped_pages += mapped; 35162306a36Sopenharmony_ci } 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci while (tididx < pageset_count) { 35462306a36Sopenharmony_ci struct tid_group *grp, *ptr; 35562306a36Sopenharmony_ci /* 35662306a36Sopenharmony_ci * If we don't have any partially used tid groups, check 35762306a36Sopenharmony_ci * if we have empty groups. If so, take one from there and 35862306a36Sopenharmony_ci * put in the partially used list. 35962306a36Sopenharmony_ci */ 36062306a36Sopenharmony_ci if (!uctxt->tid_used_list.count || need_group) { 36162306a36Sopenharmony_ci if (!uctxt->tid_group_list.count) 36262306a36Sopenharmony_ci goto unlock; 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci grp = tid_group_pop(&uctxt->tid_group_list); 36562306a36Sopenharmony_ci tid_group_add_tail(grp, &uctxt->tid_used_list); 36662306a36Sopenharmony_ci need_group = 0; 36762306a36Sopenharmony_ci } 36862306a36Sopenharmony_ci /* 36962306a36Sopenharmony_ci * There is an optimization opportunity here - instead of 37062306a36Sopenharmony_ci * fitting as many page sets as we can, check for a group 37162306a36Sopenharmony_ci * later on in the list that could fit all of them. 37262306a36Sopenharmony_ci */ 37362306a36Sopenharmony_ci list_for_each_entry_safe(grp, ptr, &uctxt->tid_used_list.list, 37462306a36Sopenharmony_ci list) { 37562306a36Sopenharmony_ci unsigned use = min_t(unsigned, pageset_count - tididx, 37662306a36Sopenharmony_ci grp->size - grp->used); 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci ret = program_rcvarray(fd, tidbuf, grp, 37962306a36Sopenharmony_ci use, tidlist, 38062306a36Sopenharmony_ci &tididx, &mapped); 38162306a36Sopenharmony_ci if (ret < 0) { 38262306a36Sopenharmony_ci hfi1_cdbg(TID, 38362306a36Sopenharmony_ci "Failed to program RcvArray entries %d", 38462306a36Sopenharmony_ci ret); 38562306a36Sopenharmony_ci goto unlock; 38662306a36Sopenharmony_ci } else if (ret > 0) { 38762306a36Sopenharmony_ci if (grp->used == grp->size) 38862306a36Sopenharmony_ci tid_group_move(grp, 38962306a36Sopenharmony_ci &uctxt->tid_used_list, 39062306a36Sopenharmony_ci &uctxt->tid_full_list); 39162306a36Sopenharmony_ci mapped_pages += mapped; 39262306a36Sopenharmony_ci need_group = 0; 39362306a36Sopenharmony_ci /* Check if we are done so we break out early */ 39462306a36Sopenharmony_ci if (tididx >= pageset_count) 39562306a36Sopenharmony_ci break; 39662306a36Sopenharmony_ci } else if (WARN_ON(ret == 0)) { 39762306a36Sopenharmony_ci /* 39862306a36Sopenharmony_ci * If ret is 0, we did not program any entries 39962306a36Sopenharmony_ci * into this group, which can only happen if 40062306a36Sopenharmony_ci * we've screwed up the accounting somewhere. 40162306a36Sopenharmony_ci * Warn and try to continue. 40262306a36Sopenharmony_ci */ 40362306a36Sopenharmony_ci need_group = 1; 40462306a36Sopenharmony_ci } 40562306a36Sopenharmony_ci } 40662306a36Sopenharmony_ci } 40762306a36Sopenharmony_ciunlock: 40862306a36Sopenharmony_ci mutex_unlock(&uctxt->exp_mutex); 40962306a36Sopenharmony_ci hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, 41062306a36Sopenharmony_ci mapped_pages, ret); 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci /* fail if nothing was programmed, set error if none provided */ 41362306a36Sopenharmony_ci if (tididx == 0) { 41462306a36Sopenharmony_ci if (ret >= 0) 41562306a36Sopenharmony_ci ret = -ENOSPC; 41662306a36Sopenharmony_ci goto fail_unreserve; 41762306a36Sopenharmony_ci } 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci /* adjust reserved tid_used to actual count */ 42062306a36Sopenharmony_ci spin_lock(&fd->tid_lock); 42162306a36Sopenharmony_ci fd->tid_used -= pageset_count - tididx; 42262306a36Sopenharmony_ci spin_unlock(&fd->tid_lock); 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci /* unpin all pages not covered by a TID */ 42562306a36Sopenharmony_ci unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages, 42662306a36Sopenharmony_ci false); 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci if (fd->use_mn) { 42962306a36Sopenharmony_ci /* check for an invalidate during setup */ 43062306a36Sopenharmony_ci bool fail = false; 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci mutex_lock(&tidbuf->cover_mutex); 43362306a36Sopenharmony_ci fail = mmu_interval_read_retry(&tidbuf->notifier, mmu_seq); 43462306a36Sopenharmony_ci mutex_unlock(&tidbuf->cover_mutex); 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci if (fail) { 43762306a36Sopenharmony_ci ret = -EBUSY; 43862306a36Sopenharmony_ci goto fail_unprogram; 43962306a36Sopenharmony_ci } 44062306a36Sopenharmony_ci } 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci tinfo->tidcnt = tididx; 44362306a36Sopenharmony_ci tinfo->length = mapped_pages * PAGE_SIZE; 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), 44662306a36Sopenharmony_ci tidlist, sizeof(tidlist[0]) * tididx)) { 44762306a36Sopenharmony_ci ret = -EFAULT; 44862306a36Sopenharmony_ci goto fail_unprogram; 44962306a36Sopenharmony_ci } 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci if (fd->use_mn) 45262306a36Sopenharmony_ci mmu_interval_notifier_remove(&tidbuf->notifier); 45362306a36Sopenharmony_ci kfree(tidbuf->pages); 45462306a36Sopenharmony_ci kfree(tidbuf->psets); 45562306a36Sopenharmony_ci kfree(tidbuf); 45662306a36Sopenharmony_ci kfree(tidlist); 45762306a36Sopenharmony_ci return 0; 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_cifail_unprogram: 46062306a36Sopenharmony_ci /* unprogram, unmap, and unpin all allocated TIDs */ 46162306a36Sopenharmony_ci tinfo->tidlist = (unsigned long)tidlist; 46262306a36Sopenharmony_ci hfi1_user_exp_rcv_clear(fd, tinfo); 46362306a36Sopenharmony_ci tinfo->tidlist = 0; 46462306a36Sopenharmony_ci pinned = 0; /* nothing left to unpin */ 46562306a36Sopenharmony_ci pageset_count = 0; /* nothing left reserved */ 46662306a36Sopenharmony_cifail_unreserve: 46762306a36Sopenharmony_ci spin_lock(&fd->tid_lock); 46862306a36Sopenharmony_ci fd->tid_used -= pageset_count; 46962306a36Sopenharmony_ci spin_unlock(&fd->tid_lock); 47062306a36Sopenharmony_cifail_unpin: 47162306a36Sopenharmony_ci if (fd->use_mn) 47262306a36Sopenharmony_ci mmu_interval_notifier_remove(&tidbuf->notifier); 47362306a36Sopenharmony_ci if (pinned > 0) 47462306a36Sopenharmony_ci unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false); 47562306a36Sopenharmony_cifail_release_mem: 47662306a36Sopenharmony_ci kfree(tidbuf->pages); 47762306a36Sopenharmony_ci kfree(tidbuf->psets); 47862306a36Sopenharmony_ci kfree(tidbuf); 47962306a36Sopenharmony_ci kfree(tidlist); 48062306a36Sopenharmony_ci return ret; 48162306a36Sopenharmony_ci} 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ciint hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, 48462306a36Sopenharmony_ci struct hfi1_tid_info *tinfo) 48562306a36Sopenharmony_ci{ 48662306a36Sopenharmony_ci int ret = 0; 48762306a36Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 48862306a36Sopenharmony_ci u32 *tidinfo; 48962306a36Sopenharmony_ci unsigned tididx; 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_ci if (unlikely(tinfo->tidcnt > fd->tid_used)) 49262306a36Sopenharmony_ci return -EINVAL; 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci tidinfo = memdup_user(u64_to_user_ptr(tinfo->tidlist), 49562306a36Sopenharmony_ci sizeof(tidinfo[0]) * tinfo->tidcnt); 49662306a36Sopenharmony_ci if (IS_ERR(tidinfo)) 49762306a36Sopenharmony_ci return PTR_ERR(tidinfo); 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci mutex_lock(&uctxt->exp_mutex); 50062306a36Sopenharmony_ci for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { 50162306a36Sopenharmony_ci ret = unprogram_rcvarray(fd, tidinfo[tididx]); 50262306a36Sopenharmony_ci if (ret) { 50362306a36Sopenharmony_ci hfi1_cdbg(TID, "Failed to unprogram rcv array %d", 50462306a36Sopenharmony_ci ret); 50562306a36Sopenharmony_ci break; 50662306a36Sopenharmony_ci } 50762306a36Sopenharmony_ci } 50862306a36Sopenharmony_ci spin_lock(&fd->tid_lock); 50962306a36Sopenharmony_ci fd->tid_used -= tididx; 51062306a36Sopenharmony_ci spin_unlock(&fd->tid_lock); 51162306a36Sopenharmony_ci tinfo->tidcnt = tididx; 51262306a36Sopenharmony_ci mutex_unlock(&uctxt->exp_mutex); 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci kfree(tidinfo); 51562306a36Sopenharmony_ci return ret; 51662306a36Sopenharmony_ci} 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ciint hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, 51962306a36Sopenharmony_ci struct hfi1_tid_info *tinfo) 52062306a36Sopenharmony_ci{ 52162306a36Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 52262306a36Sopenharmony_ci unsigned long *ev = uctxt->dd->events + 52362306a36Sopenharmony_ci (uctxt_offset(uctxt) + fd->subctxt); 52462306a36Sopenharmony_ci u32 *array; 52562306a36Sopenharmony_ci int ret = 0; 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci /* 52862306a36Sopenharmony_ci * copy_to_user() can sleep, which will leave the invalid_lock 52962306a36Sopenharmony_ci * locked and cause the MMU notifier to be blocked on the lock 53062306a36Sopenharmony_ci * for a long time. 53162306a36Sopenharmony_ci * Copy the data to a local buffer so we can release the lock. 53262306a36Sopenharmony_ci */ 53362306a36Sopenharmony_ci array = kcalloc(uctxt->expected_count, sizeof(*array), GFP_KERNEL); 53462306a36Sopenharmony_ci if (!array) 53562306a36Sopenharmony_ci return -EFAULT; 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci spin_lock(&fd->invalid_lock); 53862306a36Sopenharmony_ci if (fd->invalid_tid_idx) { 53962306a36Sopenharmony_ci memcpy(array, fd->invalid_tids, sizeof(*array) * 54062306a36Sopenharmony_ci fd->invalid_tid_idx); 54162306a36Sopenharmony_ci memset(fd->invalid_tids, 0, sizeof(*fd->invalid_tids) * 54262306a36Sopenharmony_ci fd->invalid_tid_idx); 54362306a36Sopenharmony_ci tinfo->tidcnt = fd->invalid_tid_idx; 54462306a36Sopenharmony_ci fd->invalid_tid_idx = 0; 54562306a36Sopenharmony_ci /* 54662306a36Sopenharmony_ci * Reset the user flag while still holding the lock. 54762306a36Sopenharmony_ci * Otherwise, PSM can miss events. 54862306a36Sopenharmony_ci */ 54962306a36Sopenharmony_ci clear_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); 55062306a36Sopenharmony_ci } else { 55162306a36Sopenharmony_ci tinfo->tidcnt = 0; 55262306a36Sopenharmony_ci } 55362306a36Sopenharmony_ci spin_unlock(&fd->invalid_lock); 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci if (tinfo->tidcnt) { 55662306a36Sopenharmony_ci if (copy_to_user((void __user *)tinfo->tidlist, 55762306a36Sopenharmony_ci array, sizeof(*array) * tinfo->tidcnt)) 55862306a36Sopenharmony_ci ret = -EFAULT; 55962306a36Sopenharmony_ci } 56062306a36Sopenharmony_ci kfree(array); 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_ci return ret; 56362306a36Sopenharmony_ci} 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_cistatic u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages) 56662306a36Sopenharmony_ci{ 56762306a36Sopenharmony_ci unsigned pagecount, pageidx, setcount = 0, i; 56862306a36Sopenharmony_ci unsigned long pfn, this_pfn; 56962306a36Sopenharmony_ci struct page **pages = tidbuf->pages; 57062306a36Sopenharmony_ci struct tid_pageset *list = tidbuf->psets; 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_ci if (!npages) 57362306a36Sopenharmony_ci return 0; 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci /* 57662306a36Sopenharmony_ci * Look for sets of physically contiguous pages in the user buffer. 57762306a36Sopenharmony_ci * This will allow us to optimize Expected RcvArray entry usage by 57862306a36Sopenharmony_ci * using the bigger supported sizes. 57962306a36Sopenharmony_ci */ 58062306a36Sopenharmony_ci pfn = page_to_pfn(pages[0]); 58162306a36Sopenharmony_ci for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) { 58262306a36Sopenharmony_ci this_pfn = i < npages ? page_to_pfn(pages[i]) : 0; 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci /* 58562306a36Sopenharmony_ci * If the pfn's are not sequential, pages are not physically 58662306a36Sopenharmony_ci * contiguous. 58762306a36Sopenharmony_ci */ 58862306a36Sopenharmony_ci if (this_pfn != ++pfn) { 58962306a36Sopenharmony_ci /* 59062306a36Sopenharmony_ci * At this point we have to loop over the set of 59162306a36Sopenharmony_ci * physically contiguous pages and break them down it 59262306a36Sopenharmony_ci * sizes supported by the HW. 59362306a36Sopenharmony_ci * There are two main constraints: 59462306a36Sopenharmony_ci * 1. The max buffer size is MAX_EXPECTED_BUFFER. 59562306a36Sopenharmony_ci * If the total set size is bigger than that 59662306a36Sopenharmony_ci * program only a MAX_EXPECTED_BUFFER chunk. 59762306a36Sopenharmony_ci * 2. The buffer size has to be a power of two. If 59862306a36Sopenharmony_ci * it is not, round down to the closes power of 59962306a36Sopenharmony_ci * 2 and program that size. 60062306a36Sopenharmony_ci */ 60162306a36Sopenharmony_ci while (pagecount) { 60262306a36Sopenharmony_ci int maxpages = pagecount; 60362306a36Sopenharmony_ci u32 bufsize = pagecount * PAGE_SIZE; 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci if (bufsize > MAX_EXPECTED_BUFFER) 60662306a36Sopenharmony_ci maxpages = 60762306a36Sopenharmony_ci MAX_EXPECTED_BUFFER >> 60862306a36Sopenharmony_ci PAGE_SHIFT; 60962306a36Sopenharmony_ci else if (!is_power_of_2(bufsize)) 61062306a36Sopenharmony_ci maxpages = 61162306a36Sopenharmony_ci rounddown_pow_of_two(bufsize) >> 61262306a36Sopenharmony_ci PAGE_SHIFT; 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci list[setcount].idx = pageidx; 61562306a36Sopenharmony_ci list[setcount].count = maxpages; 61662306a36Sopenharmony_ci pagecount -= maxpages; 61762306a36Sopenharmony_ci pageidx += maxpages; 61862306a36Sopenharmony_ci setcount++; 61962306a36Sopenharmony_ci } 62062306a36Sopenharmony_ci pageidx = i; 62162306a36Sopenharmony_ci pagecount = 1; 62262306a36Sopenharmony_ci pfn = this_pfn; 62362306a36Sopenharmony_ci } else { 62462306a36Sopenharmony_ci pagecount++; 62562306a36Sopenharmony_ci } 62662306a36Sopenharmony_ci } 62762306a36Sopenharmony_ci return setcount; 62862306a36Sopenharmony_ci} 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci/** 63162306a36Sopenharmony_ci * program_rcvarray() - program an RcvArray group with receive buffers 63262306a36Sopenharmony_ci * @fd: filedata pointer 63362306a36Sopenharmony_ci * @tbuf: pointer to struct tid_user_buf that has the user buffer starting 63462306a36Sopenharmony_ci * virtual address, buffer length, page pointers, pagesets (array of 63562306a36Sopenharmony_ci * struct tid_pageset holding information on physically contiguous 63662306a36Sopenharmony_ci * chunks from the user buffer), and other fields. 63762306a36Sopenharmony_ci * @grp: RcvArray group 63862306a36Sopenharmony_ci * @count: number of struct tid_pageset's to program 63962306a36Sopenharmony_ci * @tidlist: the array of u32 elements when the information about the 64062306a36Sopenharmony_ci * programmed RcvArray entries is to be encoded. 64162306a36Sopenharmony_ci * @tididx: starting offset into tidlist 64262306a36Sopenharmony_ci * @pmapped: (output parameter) number of pages programmed into the RcvArray 64362306a36Sopenharmony_ci * entries. 64462306a36Sopenharmony_ci * 64562306a36Sopenharmony_ci * This function will program up to 'count' number of RcvArray entries from the 64662306a36Sopenharmony_ci * group 'grp'. To make best use of write-combining writes, the function will 64762306a36Sopenharmony_ci * perform writes to the unused RcvArray entries which will be ignored by the 64862306a36Sopenharmony_ci * HW. Each RcvArray entry will be programmed with a physically contiguous 64962306a36Sopenharmony_ci * buffer chunk from the user's virtual buffer. 65062306a36Sopenharmony_ci * 65162306a36Sopenharmony_ci * Return: 65262306a36Sopenharmony_ci * -EINVAL if the requested count is larger than the size of the group, 65362306a36Sopenharmony_ci * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or 65462306a36Sopenharmony_ci * number of RcvArray entries programmed. 65562306a36Sopenharmony_ci */ 65662306a36Sopenharmony_cistatic int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *tbuf, 65762306a36Sopenharmony_ci struct tid_group *grp, u16 count, 65862306a36Sopenharmony_ci u32 *tidlist, unsigned int *tididx, 65962306a36Sopenharmony_ci unsigned int *pmapped) 66062306a36Sopenharmony_ci{ 66162306a36Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 66262306a36Sopenharmony_ci struct hfi1_devdata *dd = uctxt->dd; 66362306a36Sopenharmony_ci u16 idx; 66462306a36Sopenharmony_ci unsigned int start = *tididx; 66562306a36Sopenharmony_ci u32 tidinfo = 0, rcventry, useidx = 0; 66662306a36Sopenharmony_ci int mapped = 0; 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_ci /* Count should never be larger than the group size */ 66962306a36Sopenharmony_ci if (count > grp->size) 67062306a36Sopenharmony_ci return -EINVAL; 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci /* Find the first unused entry in the group */ 67362306a36Sopenharmony_ci for (idx = 0; idx < grp->size; idx++) { 67462306a36Sopenharmony_ci if (!(grp->map & (1 << idx))) { 67562306a36Sopenharmony_ci useidx = idx; 67662306a36Sopenharmony_ci break; 67762306a36Sopenharmony_ci } 67862306a36Sopenharmony_ci rcv_array_wc_fill(dd, grp->base + idx); 67962306a36Sopenharmony_ci } 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci idx = 0; 68262306a36Sopenharmony_ci while (idx < count) { 68362306a36Sopenharmony_ci u16 npages, pageidx, setidx = start + idx; 68462306a36Sopenharmony_ci int ret = 0; 68562306a36Sopenharmony_ci 68662306a36Sopenharmony_ci /* 68762306a36Sopenharmony_ci * If this entry in the group is used, move to the next one. 68862306a36Sopenharmony_ci * If we go past the end of the group, exit the loop. 68962306a36Sopenharmony_ci */ 69062306a36Sopenharmony_ci if (useidx >= grp->size) { 69162306a36Sopenharmony_ci break; 69262306a36Sopenharmony_ci } else if (grp->map & (1 << useidx)) { 69362306a36Sopenharmony_ci rcv_array_wc_fill(dd, grp->base + useidx); 69462306a36Sopenharmony_ci useidx++; 69562306a36Sopenharmony_ci continue; 69662306a36Sopenharmony_ci } 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci rcventry = grp->base + useidx; 69962306a36Sopenharmony_ci npages = tbuf->psets[setidx].count; 70062306a36Sopenharmony_ci pageidx = tbuf->psets[setidx].idx; 70162306a36Sopenharmony_ci 70262306a36Sopenharmony_ci ret = set_rcvarray_entry(fd, tbuf, 70362306a36Sopenharmony_ci rcventry, grp, pageidx, 70462306a36Sopenharmony_ci npages); 70562306a36Sopenharmony_ci if (ret) 70662306a36Sopenharmony_ci return ret; 70762306a36Sopenharmony_ci mapped += npages; 70862306a36Sopenharmony_ci 70962306a36Sopenharmony_ci tidinfo = create_tid(rcventry - uctxt->expected_base, npages); 71062306a36Sopenharmony_ci tidlist[(*tididx)++] = tidinfo; 71162306a36Sopenharmony_ci grp->used++; 71262306a36Sopenharmony_ci grp->map |= 1 << useidx++; 71362306a36Sopenharmony_ci idx++; 71462306a36Sopenharmony_ci } 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci /* Fill the rest of the group with "blank" writes */ 71762306a36Sopenharmony_ci for (; useidx < grp->size; useidx++) 71862306a36Sopenharmony_ci rcv_array_wc_fill(dd, grp->base + useidx); 71962306a36Sopenharmony_ci *pmapped = mapped; 72062306a36Sopenharmony_ci return idx; 72162306a36Sopenharmony_ci} 72262306a36Sopenharmony_ci 72362306a36Sopenharmony_cistatic int set_rcvarray_entry(struct hfi1_filedata *fd, 72462306a36Sopenharmony_ci struct tid_user_buf *tbuf, 72562306a36Sopenharmony_ci u32 rcventry, struct tid_group *grp, 72662306a36Sopenharmony_ci u16 pageidx, unsigned int npages) 72762306a36Sopenharmony_ci{ 72862306a36Sopenharmony_ci int ret; 72962306a36Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 73062306a36Sopenharmony_ci struct tid_rb_node *node; 73162306a36Sopenharmony_ci struct hfi1_devdata *dd = uctxt->dd; 73262306a36Sopenharmony_ci dma_addr_t phys; 73362306a36Sopenharmony_ci struct page **pages = tbuf->pages + pageidx; 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci /* 73662306a36Sopenharmony_ci * Allocate the node first so we can handle a potential 73762306a36Sopenharmony_ci * failure before we've programmed anything. 73862306a36Sopenharmony_ci */ 73962306a36Sopenharmony_ci node = kzalloc(struct_size(node, pages, npages), GFP_KERNEL); 74062306a36Sopenharmony_ci if (!node) 74162306a36Sopenharmony_ci return -ENOMEM; 74262306a36Sopenharmony_ci 74362306a36Sopenharmony_ci phys = dma_map_single(&dd->pcidev->dev, __va(page_to_phys(pages[0])), 74462306a36Sopenharmony_ci npages * PAGE_SIZE, DMA_FROM_DEVICE); 74562306a36Sopenharmony_ci if (dma_mapping_error(&dd->pcidev->dev, phys)) { 74662306a36Sopenharmony_ci dd_dev_err(dd, "Failed to DMA map Exp Rcv pages 0x%llx\n", 74762306a36Sopenharmony_ci phys); 74862306a36Sopenharmony_ci kfree(node); 74962306a36Sopenharmony_ci return -EFAULT; 75062306a36Sopenharmony_ci } 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci node->fdata = fd; 75362306a36Sopenharmony_ci mutex_init(&node->invalidate_mutex); 75462306a36Sopenharmony_ci node->phys = page_to_phys(pages[0]); 75562306a36Sopenharmony_ci node->npages = npages; 75662306a36Sopenharmony_ci node->rcventry = rcventry; 75762306a36Sopenharmony_ci node->dma_addr = phys; 75862306a36Sopenharmony_ci node->grp = grp; 75962306a36Sopenharmony_ci node->freed = false; 76062306a36Sopenharmony_ci memcpy(node->pages, pages, flex_array_size(node, pages, npages)); 76162306a36Sopenharmony_ci 76262306a36Sopenharmony_ci if (fd->use_mn) { 76362306a36Sopenharmony_ci ret = mmu_interval_notifier_insert( 76462306a36Sopenharmony_ci &node->notifier, current->mm, 76562306a36Sopenharmony_ci tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE, 76662306a36Sopenharmony_ci &tid_mn_ops); 76762306a36Sopenharmony_ci if (ret) 76862306a36Sopenharmony_ci goto out_unmap; 76962306a36Sopenharmony_ci } 77062306a36Sopenharmony_ci fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node; 77162306a36Sopenharmony_ci 77262306a36Sopenharmony_ci hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); 77362306a36Sopenharmony_ci trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages, 77462306a36Sopenharmony_ci node->notifier.interval_tree.start, node->phys, 77562306a36Sopenharmony_ci phys); 77662306a36Sopenharmony_ci return 0; 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ciout_unmap: 77962306a36Sopenharmony_ci hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", 78062306a36Sopenharmony_ci node->rcventry, node->notifier.interval_tree.start, 78162306a36Sopenharmony_ci node->phys, ret); 78262306a36Sopenharmony_ci dma_unmap_single(&dd->pcidev->dev, phys, npages * PAGE_SIZE, 78362306a36Sopenharmony_ci DMA_FROM_DEVICE); 78462306a36Sopenharmony_ci kfree(node); 78562306a36Sopenharmony_ci return -EFAULT; 78662306a36Sopenharmony_ci} 78762306a36Sopenharmony_ci 78862306a36Sopenharmony_cistatic int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo) 78962306a36Sopenharmony_ci{ 79062306a36Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 79162306a36Sopenharmony_ci struct hfi1_devdata *dd = uctxt->dd; 79262306a36Sopenharmony_ci struct tid_rb_node *node; 79362306a36Sopenharmony_ci u32 tidctrl = EXP_TID_GET(tidinfo, CTRL); 79462306a36Sopenharmony_ci u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry; 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci if (tidctrl == 0x3 || tidctrl == 0x0) 79762306a36Sopenharmony_ci return -EINVAL; 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci rcventry = tididx + (tidctrl - 1); 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_ci if (rcventry >= uctxt->expected_count) { 80262306a36Sopenharmony_ci dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n", 80362306a36Sopenharmony_ci rcventry, uctxt->ctxt); 80462306a36Sopenharmony_ci return -EINVAL; 80562306a36Sopenharmony_ci } 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci node = fd->entry_to_rb[rcventry]; 80862306a36Sopenharmony_ci if (!node || node->rcventry != (uctxt->expected_base + rcventry)) 80962306a36Sopenharmony_ci return -EBADF; 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci if (fd->use_mn) 81262306a36Sopenharmony_ci mmu_interval_notifier_remove(&node->notifier); 81362306a36Sopenharmony_ci cacheless_tid_rb_remove(fd, node); 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci return 0; 81662306a36Sopenharmony_ci} 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_cistatic void __clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) 81962306a36Sopenharmony_ci{ 82062306a36Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 82162306a36Sopenharmony_ci struct hfi1_devdata *dd = uctxt->dd; 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_ci mutex_lock(&node->invalidate_mutex); 82462306a36Sopenharmony_ci if (node->freed) 82562306a36Sopenharmony_ci goto done; 82662306a36Sopenharmony_ci node->freed = true; 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, 82962306a36Sopenharmony_ci node->npages, 83062306a36Sopenharmony_ci node->notifier.interval_tree.start, node->phys, 83162306a36Sopenharmony_ci node->dma_addr); 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_ci /* Make sure device has seen the write before pages are unpinned */ 83462306a36Sopenharmony_ci hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci unpin_rcv_pages(fd, NULL, node, 0, node->npages, true); 83762306a36Sopenharmony_cidone: 83862306a36Sopenharmony_ci mutex_unlock(&node->invalidate_mutex); 83962306a36Sopenharmony_ci} 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_cistatic void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) 84262306a36Sopenharmony_ci{ 84362306a36Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 84462306a36Sopenharmony_ci 84562306a36Sopenharmony_ci __clear_tid_node(fd, node); 84662306a36Sopenharmony_ci 84762306a36Sopenharmony_ci node->grp->used--; 84862306a36Sopenharmony_ci node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); 84962306a36Sopenharmony_ci 85062306a36Sopenharmony_ci if (node->grp->used == node->grp->size - 1) 85162306a36Sopenharmony_ci tid_group_move(node->grp, &uctxt->tid_full_list, 85262306a36Sopenharmony_ci &uctxt->tid_used_list); 85362306a36Sopenharmony_ci else if (!node->grp->used) 85462306a36Sopenharmony_ci tid_group_move(node->grp, &uctxt->tid_used_list, 85562306a36Sopenharmony_ci &uctxt->tid_group_list); 85662306a36Sopenharmony_ci kfree(node); 85762306a36Sopenharmony_ci} 85862306a36Sopenharmony_ci 85962306a36Sopenharmony_ci/* 86062306a36Sopenharmony_ci * As a simple helper for hfi1_user_exp_rcv_free, this function deals with 86162306a36Sopenharmony_ci * clearing nodes in the non-cached case. 86262306a36Sopenharmony_ci */ 86362306a36Sopenharmony_cistatic void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, 86462306a36Sopenharmony_ci struct exp_tid_set *set, 86562306a36Sopenharmony_ci struct hfi1_filedata *fd) 86662306a36Sopenharmony_ci{ 86762306a36Sopenharmony_ci struct tid_group *grp, *ptr; 86862306a36Sopenharmony_ci int i; 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_ci list_for_each_entry_safe(grp, ptr, &set->list, list) { 87162306a36Sopenharmony_ci list_del_init(&grp->list); 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_ci for (i = 0; i < grp->size; i++) { 87462306a36Sopenharmony_ci if (grp->map & (1 << i)) { 87562306a36Sopenharmony_ci u16 rcventry = grp->base + i; 87662306a36Sopenharmony_ci struct tid_rb_node *node; 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci node = fd->entry_to_rb[rcventry - 87962306a36Sopenharmony_ci uctxt->expected_base]; 88062306a36Sopenharmony_ci if (!node || node->rcventry != rcventry) 88162306a36Sopenharmony_ci continue; 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_ci if (fd->use_mn) 88462306a36Sopenharmony_ci mmu_interval_notifier_remove( 88562306a36Sopenharmony_ci &node->notifier); 88662306a36Sopenharmony_ci cacheless_tid_rb_remove(fd, node); 88762306a36Sopenharmony_ci } 88862306a36Sopenharmony_ci } 88962306a36Sopenharmony_ci } 89062306a36Sopenharmony_ci} 89162306a36Sopenharmony_ci 89262306a36Sopenharmony_cistatic bool tid_rb_invalidate(struct mmu_interval_notifier *mni, 89362306a36Sopenharmony_ci const struct mmu_notifier_range *range, 89462306a36Sopenharmony_ci unsigned long cur_seq) 89562306a36Sopenharmony_ci{ 89662306a36Sopenharmony_ci struct tid_rb_node *node = 89762306a36Sopenharmony_ci container_of(mni, struct tid_rb_node, notifier); 89862306a36Sopenharmony_ci struct hfi1_filedata *fdata = node->fdata; 89962306a36Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fdata->uctxt; 90062306a36Sopenharmony_ci 90162306a36Sopenharmony_ci if (node->freed) 90262306a36Sopenharmony_ci return true; 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_ci /* take action only if unmapping */ 90562306a36Sopenharmony_ci if (range->event != MMU_NOTIFY_UNMAP) 90662306a36Sopenharmony_ci return true; 90762306a36Sopenharmony_ci 90862306a36Sopenharmony_ci trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, 90962306a36Sopenharmony_ci node->notifier.interval_tree.start, 91062306a36Sopenharmony_ci node->rcventry, node->npages, node->dma_addr); 91162306a36Sopenharmony_ci 91262306a36Sopenharmony_ci /* clear the hardware rcvarray entry */ 91362306a36Sopenharmony_ci __clear_tid_node(fdata, node); 91462306a36Sopenharmony_ci 91562306a36Sopenharmony_ci spin_lock(&fdata->invalid_lock); 91662306a36Sopenharmony_ci if (fdata->invalid_tid_idx < uctxt->expected_count) { 91762306a36Sopenharmony_ci fdata->invalid_tids[fdata->invalid_tid_idx] = 91862306a36Sopenharmony_ci create_tid(node->rcventry - uctxt->expected_base, 91962306a36Sopenharmony_ci node->npages); 92062306a36Sopenharmony_ci if (!fdata->invalid_tid_idx) { 92162306a36Sopenharmony_ci unsigned long *ev; 92262306a36Sopenharmony_ci 92362306a36Sopenharmony_ci /* 92462306a36Sopenharmony_ci * hfi1_set_uevent_bits() sets a user event flag 92562306a36Sopenharmony_ci * for all processes. Because calling into the 92662306a36Sopenharmony_ci * driver to process TID cache invalidations is 92762306a36Sopenharmony_ci * expensive and TID cache invalidations are 92862306a36Sopenharmony_ci * handled on a per-process basis, we can 92962306a36Sopenharmony_ci * optimize this to set the flag only for the 93062306a36Sopenharmony_ci * process in question. 93162306a36Sopenharmony_ci */ 93262306a36Sopenharmony_ci ev = uctxt->dd->events + 93362306a36Sopenharmony_ci (uctxt_offset(uctxt) + fdata->subctxt); 93462306a36Sopenharmony_ci set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); 93562306a36Sopenharmony_ci } 93662306a36Sopenharmony_ci fdata->invalid_tid_idx++; 93762306a36Sopenharmony_ci } 93862306a36Sopenharmony_ci spin_unlock(&fdata->invalid_lock); 93962306a36Sopenharmony_ci return true; 94062306a36Sopenharmony_ci} 94162306a36Sopenharmony_ci 94262306a36Sopenharmony_cistatic bool tid_cover_invalidate(struct mmu_interval_notifier *mni, 94362306a36Sopenharmony_ci const struct mmu_notifier_range *range, 94462306a36Sopenharmony_ci unsigned long cur_seq) 94562306a36Sopenharmony_ci{ 94662306a36Sopenharmony_ci struct tid_user_buf *tidbuf = 94762306a36Sopenharmony_ci container_of(mni, struct tid_user_buf, notifier); 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_ci /* take action only if unmapping */ 95062306a36Sopenharmony_ci if (range->event == MMU_NOTIFY_UNMAP) { 95162306a36Sopenharmony_ci mutex_lock(&tidbuf->cover_mutex); 95262306a36Sopenharmony_ci mmu_interval_set_seq(mni, cur_seq); 95362306a36Sopenharmony_ci mutex_unlock(&tidbuf->cover_mutex); 95462306a36Sopenharmony_ci } 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ci return true; 95762306a36Sopenharmony_ci} 95862306a36Sopenharmony_ci 95962306a36Sopenharmony_cistatic void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, 96062306a36Sopenharmony_ci struct tid_rb_node *tnode) 96162306a36Sopenharmony_ci{ 96262306a36Sopenharmony_ci u32 base = fdata->uctxt->expected_base; 96362306a36Sopenharmony_ci 96462306a36Sopenharmony_ci fdata->entry_to_rb[tnode->rcventry - base] = NULL; 96562306a36Sopenharmony_ci clear_tid_node(fdata, tnode); 96662306a36Sopenharmony_ci} 967