18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright(c) 2020 Cornelis Networks, Inc.
38c2ecf20Sopenharmony_ci * Copyright(c) 2015-2018 Intel Corporation.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * This file is provided under a dual BSD/GPLv2 license.  When using or
68c2ecf20Sopenharmony_ci * redistributing this file, you may do so under either license.
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * GPL LICENSE SUMMARY
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify
118c2ecf20Sopenharmony_ci * it under the terms of version 2 of the GNU General Public License as
128c2ecf20Sopenharmony_ci * published by the Free Software Foundation.
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci * This program is distributed in the hope that it will be useful, but
158c2ecf20Sopenharmony_ci * WITHOUT ANY WARRANTY; without even the implied warranty of
168c2ecf20Sopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
178c2ecf20Sopenharmony_ci * General Public License for more details.
188c2ecf20Sopenharmony_ci *
198c2ecf20Sopenharmony_ci * BSD LICENSE
208c2ecf20Sopenharmony_ci *
218c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or without
228c2ecf20Sopenharmony_ci * modification, are permitted provided that the following conditions
238c2ecf20Sopenharmony_ci * are met:
248c2ecf20Sopenharmony_ci *
258c2ecf20Sopenharmony_ci *  - Redistributions of source code must retain the above copyright
268c2ecf20Sopenharmony_ci *    notice, this list of conditions and the following disclaimer.
278c2ecf20Sopenharmony_ci *  - Redistributions in binary form must reproduce the above copyright
288c2ecf20Sopenharmony_ci *    notice, this list of conditions and the following disclaimer in
298c2ecf20Sopenharmony_ci *    the documentation and/or other materials provided with the
308c2ecf20Sopenharmony_ci *    distribution.
318c2ecf20Sopenharmony_ci *  - Neither the name of Intel Corporation nor the names of its
328c2ecf20Sopenharmony_ci *    contributors may be used to endorse or promote products derived
338c2ecf20Sopenharmony_ci *    from this software without specific prior written permission.
348c2ecf20Sopenharmony_ci *
358c2ecf20Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
368c2ecf20Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
378c2ecf20Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
388c2ecf20Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
398c2ecf20Sopenharmony_ci * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
408c2ecf20Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
418c2ecf20Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
428c2ecf20Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
438c2ecf20Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
448c2ecf20Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
458c2ecf20Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
468c2ecf20Sopenharmony_ci *
478c2ecf20Sopenharmony_ci */
488c2ecf20Sopenharmony_ci#include <asm/page.h>
498c2ecf20Sopenharmony_ci#include <linux/string.h>
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci#include "mmu_rb.h"
528c2ecf20Sopenharmony_ci#include "user_exp_rcv.h"
538c2ecf20Sopenharmony_ci#include "trace.h"
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_cistatic void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
568c2ecf20Sopenharmony_ci			    struct exp_tid_set *set,
578c2ecf20Sopenharmony_ci			    struct hfi1_filedata *fd);
588c2ecf20Sopenharmony_cistatic u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages);
598c2ecf20Sopenharmony_cistatic int set_rcvarray_entry(struct hfi1_filedata *fd,
608c2ecf20Sopenharmony_ci			      struct tid_user_buf *tbuf,
618c2ecf20Sopenharmony_ci			      u32 rcventry, struct tid_group *grp,
628c2ecf20Sopenharmony_ci			      u16 pageidx, unsigned int npages);
638c2ecf20Sopenharmony_cistatic void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
648c2ecf20Sopenharmony_ci				    struct tid_rb_node *tnode);
658c2ecf20Sopenharmony_cistatic bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
668c2ecf20Sopenharmony_ci			      const struct mmu_notifier_range *range,
678c2ecf20Sopenharmony_ci			      unsigned long cur_seq);
688c2ecf20Sopenharmony_cistatic bool tid_cover_invalidate(struct mmu_interval_notifier *mni,
698c2ecf20Sopenharmony_ci			         const struct mmu_notifier_range *range,
708c2ecf20Sopenharmony_ci			         unsigned long cur_seq);
718c2ecf20Sopenharmony_cistatic int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
728c2ecf20Sopenharmony_ci			    struct tid_group *grp,
738c2ecf20Sopenharmony_ci			    unsigned int start, u16 count,
748c2ecf20Sopenharmony_ci			    u32 *tidlist, unsigned int *tididx,
758c2ecf20Sopenharmony_ci			    unsigned int *pmapped);
768c2ecf20Sopenharmony_cistatic int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo);
778c2ecf20Sopenharmony_cistatic void __clear_tid_node(struct hfi1_filedata *fd,
788c2ecf20Sopenharmony_ci			     struct tid_rb_node *node);
798c2ecf20Sopenharmony_cistatic void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_cistatic const struct mmu_interval_notifier_ops tid_mn_ops = {
828c2ecf20Sopenharmony_ci	.invalidate = tid_rb_invalidate,
838c2ecf20Sopenharmony_ci};
848c2ecf20Sopenharmony_cistatic const struct mmu_interval_notifier_ops tid_cover_ops = {
858c2ecf20Sopenharmony_ci	.invalidate = tid_cover_invalidate,
868c2ecf20Sopenharmony_ci};
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci/*
898c2ecf20Sopenharmony_ci * Initialize context and file private data needed for Expected
908c2ecf20Sopenharmony_ci * receive caching. This needs to be done after the context has
918c2ecf20Sopenharmony_ci * been configured with the eager/expected RcvEntry counts.
928c2ecf20Sopenharmony_ci */
938c2ecf20Sopenharmony_ciint hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
948c2ecf20Sopenharmony_ci			   struct hfi1_ctxtdata *uctxt)
958c2ecf20Sopenharmony_ci{
968c2ecf20Sopenharmony_ci	int ret = 0;
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci	fd->entry_to_rb = kcalloc(uctxt->expected_count,
998c2ecf20Sopenharmony_ci				  sizeof(struct rb_node *),
1008c2ecf20Sopenharmony_ci				  GFP_KERNEL);
1018c2ecf20Sopenharmony_ci	if (!fd->entry_to_rb)
1028c2ecf20Sopenharmony_ci		return -ENOMEM;
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci	if (!HFI1_CAP_UGET_MASK(uctxt->flags, TID_UNMAP)) {
1058c2ecf20Sopenharmony_ci		fd->invalid_tid_idx = 0;
1068c2ecf20Sopenharmony_ci		fd->invalid_tids = kcalloc(uctxt->expected_count,
1078c2ecf20Sopenharmony_ci					   sizeof(*fd->invalid_tids),
1088c2ecf20Sopenharmony_ci					   GFP_KERNEL);
1098c2ecf20Sopenharmony_ci		if (!fd->invalid_tids) {
1108c2ecf20Sopenharmony_ci			kfree(fd->entry_to_rb);
1118c2ecf20Sopenharmony_ci			fd->entry_to_rb = NULL;
1128c2ecf20Sopenharmony_ci			return -ENOMEM;
1138c2ecf20Sopenharmony_ci		}
1148c2ecf20Sopenharmony_ci		fd->use_mn = true;
1158c2ecf20Sopenharmony_ci	}
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci	/*
1188c2ecf20Sopenharmony_ci	 * PSM does not have a good way to separate, count, and
1198c2ecf20Sopenharmony_ci	 * effectively enforce a limit on RcvArray entries used by
1208c2ecf20Sopenharmony_ci	 * subctxts (when context sharing is used) when TID caching
1218c2ecf20Sopenharmony_ci	 * is enabled. To help with that, we calculate a per-process
1228c2ecf20Sopenharmony_ci	 * RcvArray entry share and enforce that.
1238c2ecf20Sopenharmony_ci	 * If TID caching is not in use, PSM deals with usage on its
1248c2ecf20Sopenharmony_ci	 * own. In that case, we allow any subctxt to take all of the
1258c2ecf20Sopenharmony_ci	 * entries.
1268c2ecf20Sopenharmony_ci	 *
1278c2ecf20Sopenharmony_ci	 * Make sure that we set the tid counts only after successful
1288c2ecf20Sopenharmony_ci	 * init.
1298c2ecf20Sopenharmony_ci	 */
1308c2ecf20Sopenharmony_ci	spin_lock(&fd->tid_lock);
1318c2ecf20Sopenharmony_ci	if (uctxt->subctxt_cnt && fd->use_mn) {
1328c2ecf20Sopenharmony_ci		u16 remainder;
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_ci		fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
1358c2ecf20Sopenharmony_ci		remainder = uctxt->expected_count % uctxt->subctxt_cnt;
1368c2ecf20Sopenharmony_ci		if (remainder && fd->subctxt < remainder)
1378c2ecf20Sopenharmony_ci			fd->tid_limit++;
1388c2ecf20Sopenharmony_ci	} else {
1398c2ecf20Sopenharmony_ci		fd->tid_limit = uctxt->expected_count;
1408c2ecf20Sopenharmony_ci	}
1418c2ecf20Sopenharmony_ci	spin_unlock(&fd->tid_lock);
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	return ret;
1448c2ecf20Sopenharmony_ci}
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_civoid hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
1478c2ecf20Sopenharmony_ci{
1488c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *uctxt = fd->uctxt;
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	mutex_lock(&uctxt->exp_mutex);
1518c2ecf20Sopenharmony_ci	if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
1528c2ecf20Sopenharmony_ci		unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
1538c2ecf20Sopenharmony_ci	if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
1548c2ecf20Sopenharmony_ci		unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
1558c2ecf20Sopenharmony_ci	mutex_unlock(&uctxt->exp_mutex);
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci	kfree(fd->invalid_tids);
1588c2ecf20Sopenharmony_ci	fd->invalid_tids = NULL;
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	kfree(fd->entry_to_rb);
1618c2ecf20Sopenharmony_ci	fd->entry_to_rb = NULL;
1628c2ecf20Sopenharmony_ci}
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_ci/**
1658c2ecf20Sopenharmony_ci * Release pinned receive buffer pages.
1668c2ecf20Sopenharmony_ci *
1678c2ecf20Sopenharmony_ci * @mapped - true if the pages have been DMA mapped. false otherwise.
1688c2ecf20Sopenharmony_ci * @idx - Index of the first page to unpin.
1698c2ecf20Sopenharmony_ci * @npages - No of pages to unpin.
1708c2ecf20Sopenharmony_ci *
1718c2ecf20Sopenharmony_ci * If the pages have been DMA mapped (indicated by mapped parameter), their
1728c2ecf20Sopenharmony_ci * info will be passed via a struct tid_rb_node. If they haven't been mapped,
1738c2ecf20Sopenharmony_ci * their info will be passed via a struct tid_user_buf.
1748c2ecf20Sopenharmony_ci */
1758c2ecf20Sopenharmony_cistatic void unpin_rcv_pages(struct hfi1_filedata *fd,
1768c2ecf20Sopenharmony_ci			    struct tid_user_buf *tidbuf,
1778c2ecf20Sopenharmony_ci			    struct tid_rb_node *node,
1788c2ecf20Sopenharmony_ci			    unsigned int idx,
1798c2ecf20Sopenharmony_ci			    unsigned int npages,
1808c2ecf20Sopenharmony_ci			    bool mapped)
1818c2ecf20Sopenharmony_ci{
1828c2ecf20Sopenharmony_ci	struct page **pages;
1838c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = fd->uctxt->dd;
1848c2ecf20Sopenharmony_ci	struct mm_struct *mm;
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ci	if (mapped) {
1878c2ecf20Sopenharmony_ci		pci_unmap_single(dd->pcidev, node->dma_addr,
1888c2ecf20Sopenharmony_ci				 node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
1898c2ecf20Sopenharmony_ci		pages = &node->pages[idx];
1908c2ecf20Sopenharmony_ci		mm = mm_from_tid_node(node);
1918c2ecf20Sopenharmony_ci	} else {
1928c2ecf20Sopenharmony_ci		pages = &tidbuf->pages[idx];
1938c2ecf20Sopenharmony_ci		mm = current->mm;
1948c2ecf20Sopenharmony_ci	}
1958c2ecf20Sopenharmony_ci	hfi1_release_user_pages(mm, pages, npages, mapped);
1968c2ecf20Sopenharmony_ci	fd->tid_n_pinned -= npages;
1978c2ecf20Sopenharmony_ci}
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci/**
2008c2ecf20Sopenharmony_ci * Pin receive buffer pages.
2018c2ecf20Sopenharmony_ci */
2028c2ecf20Sopenharmony_cistatic int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf)
2038c2ecf20Sopenharmony_ci{
2048c2ecf20Sopenharmony_ci	int pinned;
2058c2ecf20Sopenharmony_ci	unsigned int npages = tidbuf->npages;
2068c2ecf20Sopenharmony_ci	unsigned long vaddr = tidbuf->vaddr;
2078c2ecf20Sopenharmony_ci	struct page **pages = NULL;
2088c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = fd->uctxt->dd;
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	if (npages > fd->uctxt->expected_count) {
2118c2ecf20Sopenharmony_ci		dd_dev_err(dd, "Expected buffer too big\n");
2128c2ecf20Sopenharmony_ci		return -EINVAL;
2138c2ecf20Sopenharmony_ci	}
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_ci	/* Allocate the array of struct page pointers needed for pinning */
2168c2ecf20Sopenharmony_ci	pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
2178c2ecf20Sopenharmony_ci	if (!pages)
2188c2ecf20Sopenharmony_ci		return -ENOMEM;
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci	/*
2218c2ecf20Sopenharmony_ci	 * Pin all the pages of the user buffer. If we can't pin all the
2228c2ecf20Sopenharmony_ci	 * pages, accept the amount pinned so far and program only that.
2238c2ecf20Sopenharmony_ci	 * User space knows how to deal with partially programmed buffers.
2248c2ecf20Sopenharmony_ci	 */
2258c2ecf20Sopenharmony_ci	if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) {
2268c2ecf20Sopenharmony_ci		kfree(pages);
2278c2ecf20Sopenharmony_ci		return -ENOMEM;
2288c2ecf20Sopenharmony_ci	}
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci	pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages);
2318c2ecf20Sopenharmony_ci	if (pinned <= 0) {
2328c2ecf20Sopenharmony_ci		kfree(pages);
2338c2ecf20Sopenharmony_ci		return pinned;
2348c2ecf20Sopenharmony_ci	}
2358c2ecf20Sopenharmony_ci	tidbuf->pages = pages;
2368c2ecf20Sopenharmony_ci	fd->tid_n_pinned += pinned;
2378c2ecf20Sopenharmony_ci	return pinned;
2388c2ecf20Sopenharmony_ci}
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci/*
2418c2ecf20Sopenharmony_ci * RcvArray entry allocation for Expected Receives is done by the
2428c2ecf20Sopenharmony_ci * following algorithm:
2438c2ecf20Sopenharmony_ci *
2448c2ecf20Sopenharmony_ci * The context keeps 3 lists of groups of RcvArray entries:
2458c2ecf20Sopenharmony_ci *   1. List of empty groups - tid_group_list
2468c2ecf20Sopenharmony_ci *      This list is created during user context creation and
2478c2ecf20Sopenharmony_ci *      contains elements which describe sets (of 8) of empty
2488c2ecf20Sopenharmony_ci *      RcvArray entries.
2498c2ecf20Sopenharmony_ci *   2. List of partially used groups - tid_used_list
2508c2ecf20Sopenharmony_ci *      This list contains sets of RcvArray entries which are
2518c2ecf20Sopenharmony_ci *      not completely used up. Another mapping request could
2528c2ecf20Sopenharmony_ci *      use some of all of the remaining entries.
2538c2ecf20Sopenharmony_ci *   3. List of full groups - tid_full_list
2548c2ecf20Sopenharmony_ci *      This is the list where sets that are completely used
2558c2ecf20Sopenharmony_ci *      up go.
2568c2ecf20Sopenharmony_ci *
2578c2ecf20Sopenharmony_ci * An attempt to optimize the usage of RcvArray entries is
2588c2ecf20Sopenharmony_ci * made by finding all sets of physically contiguous pages in a
2598c2ecf20Sopenharmony_ci * user's buffer.
2608c2ecf20Sopenharmony_ci * These physically contiguous sets are further split into
2618c2ecf20Sopenharmony_ci * sizes supported by the receive engine of the HFI. The
2628c2ecf20Sopenharmony_ci * resulting sets of pages are stored in struct tid_pageset,
2638c2ecf20Sopenharmony_ci * which describes the sets as:
2648c2ecf20Sopenharmony_ci *    * .count - number of pages in this set
2658c2ecf20Sopenharmony_ci *    * .idx - starting index into struct page ** array
2668c2ecf20Sopenharmony_ci *                    of this set
2678c2ecf20Sopenharmony_ci *
2688c2ecf20Sopenharmony_ci * From this point on, the algorithm deals with the page sets
2698c2ecf20Sopenharmony_ci * described above. The number of pagesets is divided by the
2708c2ecf20Sopenharmony_ci * RcvArray group size to produce the number of full groups
2718c2ecf20Sopenharmony_ci * needed.
2728c2ecf20Sopenharmony_ci *
2738c2ecf20Sopenharmony_ci * Groups from the 3 lists are manipulated using the following
2748c2ecf20Sopenharmony_ci * rules:
2758c2ecf20Sopenharmony_ci *   1. For each set of 8 pagesets, a complete group from
2768c2ecf20Sopenharmony_ci *      tid_group_list is taken, programmed, and moved to
2778c2ecf20Sopenharmony_ci *      the tid_full_list list.
2788c2ecf20Sopenharmony_ci *   2. For all remaining pagesets:
2798c2ecf20Sopenharmony_ci *      2.1 If the tid_used_list is empty and the tid_group_list
2808c2ecf20Sopenharmony_ci *          is empty, stop processing pageset and return only
2818c2ecf20Sopenharmony_ci *          what has been programmed up to this point.
2828c2ecf20Sopenharmony_ci *      2.2 If the tid_used_list is empty and the tid_group_list
2838c2ecf20Sopenharmony_ci *          is not empty, move a group from tid_group_list to
2848c2ecf20Sopenharmony_ci *          tid_used_list.
2858c2ecf20Sopenharmony_ci *      2.3 For each group is tid_used_group, program as much as
2868c2ecf20Sopenharmony_ci *          can fit into the group. If the group becomes fully
2878c2ecf20Sopenharmony_ci *          used, move it to tid_full_list.
2888c2ecf20Sopenharmony_ci */
2898c2ecf20Sopenharmony_ciint hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
2908c2ecf20Sopenharmony_ci			    struct hfi1_tid_info *tinfo)
2918c2ecf20Sopenharmony_ci{
2928c2ecf20Sopenharmony_ci	int ret = 0, need_group = 0, pinned;
2938c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *uctxt = fd->uctxt;
2948c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = uctxt->dd;
2958c2ecf20Sopenharmony_ci	unsigned int ngroups, pageidx = 0, pageset_count,
2968c2ecf20Sopenharmony_ci		tididx = 0, mapped, mapped_pages = 0;
2978c2ecf20Sopenharmony_ci	u32 *tidlist = NULL;
2988c2ecf20Sopenharmony_ci	struct tid_user_buf *tidbuf;
2998c2ecf20Sopenharmony_ci	unsigned long mmu_seq = 0;
3008c2ecf20Sopenharmony_ci
3018c2ecf20Sopenharmony_ci	if (!PAGE_ALIGNED(tinfo->vaddr))
3028c2ecf20Sopenharmony_ci		return -EINVAL;
3038c2ecf20Sopenharmony_ci	if (tinfo->length == 0)
3048c2ecf20Sopenharmony_ci		return -EINVAL;
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_ci	tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL);
3078c2ecf20Sopenharmony_ci	if (!tidbuf)
3088c2ecf20Sopenharmony_ci		return -ENOMEM;
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_ci	mutex_init(&tidbuf->cover_mutex);
3118c2ecf20Sopenharmony_ci	tidbuf->vaddr = tinfo->vaddr;
3128c2ecf20Sopenharmony_ci	tidbuf->length = tinfo->length;
3138c2ecf20Sopenharmony_ci	tidbuf->npages = num_user_pages(tidbuf->vaddr, tidbuf->length);
3148c2ecf20Sopenharmony_ci	tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets),
3158c2ecf20Sopenharmony_ci				GFP_KERNEL);
3168c2ecf20Sopenharmony_ci	if (!tidbuf->psets) {
3178c2ecf20Sopenharmony_ci		ret = -ENOMEM;
3188c2ecf20Sopenharmony_ci		goto fail_release_mem;
3198c2ecf20Sopenharmony_ci	}
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci	if (fd->use_mn) {
3228c2ecf20Sopenharmony_ci		ret = mmu_interval_notifier_insert(
3238c2ecf20Sopenharmony_ci			&tidbuf->notifier, current->mm,
3248c2ecf20Sopenharmony_ci			tidbuf->vaddr, tidbuf->npages * PAGE_SIZE,
3258c2ecf20Sopenharmony_ci			&tid_cover_ops);
3268c2ecf20Sopenharmony_ci		if (ret)
3278c2ecf20Sopenharmony_ci			goto fail_release_mem;
3288c2ecf20Sopenharmony_ci		mmu_seq = mmu_interval_read_begin(&tidbuf->notifier);
3298c2ecf20Sopenharmony_ci	}
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_ci	pinned = pin_rcv_pages(fd, tidbuf);
3328c2ecf20Sopenharmony_ci	if (pinned <= 0) {
3338c2ecf20Sopenharmony_ci		ret = (pinned < 0) ? pinned : -ENOSPC;
3348c2ecf20Sopenharmony_ci		goto fail_unpin;
3358c2ecf20Sopenharmony_ci	}
3368c2ecf20Sopenharmony_ci
3378c2ecf20Sopenharmony_ci	/* Find sets of physically contiguous pages */
3388c2ecf20Sopenharmony_ci	tidbuf->n_psets = find_phys_blocks(tidbuf, pinned);
3398c2ecf20Sopenharmony_ci
3408c2ecf20Sopenharmony_ci	/* Reserve the number of expected tids to be used. */
3418c2ecf20Sopenharmony_ci	spin_lock(&fd->tid_lock);
3428c2ecf20Sopenharmony_ci	if (fd->tid_used + tidbuf->n_psets > fd->tid_limit)
3438c2ecf20Sopenharmony_ci		pageset_count = fd->tid_limit - fd->tid_used;
3448c2ecf20Sopenharmony_ci	else
3458c2ecf20Sopenharmony_ci		pageset_count = tidbuf->n_psets;
3468c2ecf20Sopenharmony_ci	fd->tid_used += pageset_count;
3478c2ecf20Sopenharmony_ci	spin_unlock(&fd->tid_lock);
3488c2ecf20Sopenharmony_ci
3498c2ecf20Sopenharmony_ci	if (!pageset_count) {
3508c2ecf20Sopenharmony_ci		ret = -ENOSPC;
3518c2ecf20Sopenharmony_ci		goto fail_unreserve;
3528c2ecf20Sopenharmony_ci	}
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci	ngroups = pageset_count / dd->rcv_entries.group_size;
3558c2ecf20Sopenharmony_ci	tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL);
3568c2ecf20Sopenharmony_ci	if (!tidlist) {
3578c2ecf20Sopenharmony_ci		ret = -ENOMEM;
3588c2ecf20Sopenharmony_ci		goto fail_unreserve;
3598c2ecf20Sopenharmony_ci	}
3608c2ecf20Sopenharmony_ci
3618c2ecf20Sopenharmony_ci	tididx = 0;
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_ci	/*
3648c2ecf20Sopenharmony_ci	 * From this point on, we are going to be using shared (between master
3658c2ecf20Sopenharmony_ci	 * and subcontexts) context resources. We need to take the lock.
3668c2ecf20Sopenharmony_ci	 */
3678c2ecf20Sopenharmony_ci	mutex_lock(&uctxt->exp_mutex);
3688c2ecf20Sopenharmony_ci	/*
3698c2ecf20Sopenharmony_ci	 * The first step is to program the RcvArray entries which are complete
3708c2ecf20Sopenharmony_ci	 * groups.
3718c2ecf20Sopenharmony_ci	 */
3728c2ecf20Sopenharmony_ci	while (ngroups && uctxt->tid_group_list.count) {
3738c2ecf20Sopenharmony_ci		struct tid_group *grp =
3748c2ecf20Sopenharmony_ci			tid_group_pop(&uctxt->tid_group_list);
3758c2ecf20Sopenharmony_ci
3768c2ecf20Sopenharmony_ci		ret = program_rcvarray(fd, tidbuf, grp,
3778c2ecf20Sopenharmony_ci				       pageidx, dd->rcv_entries.group_size,
3788c2ecf20Sopenharmony_ci				       tidlist, &tididx, &mapped);
3798c2ecf20Sopenharmony_ci		/*
3808c2ecf20Sopenharmony_ci		 * If there was a failure to program the RcvArray
3818c2ecf20Sopenharmony_ci		 * entries for the entire group, reset the grp fields
3828c2ecf20Sopenharmony_ci		 * and add the grp back to the free group list.
3838c2ecf20Sopenharmony_ci		 */
3848c2ecf20Sopenharmony_ci		if (ret <= 0) {
3858c2ecf20Sopenharmony_ci			tid_group_add_tail(grp, &uctxt->tid_group_list);
3868c2ecf20Sopenharmony_ci			hfi1_cdbg(TID,
3878c2ecf20Sopenharmony_ci				  "Failed to program RcvArray group %d", ret);
3888c2ecf20Sopenharmony_ci			goto unlock;
3898c2ecf20Sopenharmony_ci		}
3908c2ecf20Sopenharmony_ci
3918c2ecf20Sopenharmony_ci		tid_group_add_tail(grp, &uctxt->tid_full_list);
3928c2ecf20Sopenharmony_ci		ngroups--;
3938c2ecf20Sopenharmony_ci		pageidx += ret;
3948c2ecf20Sopenharmony_ci		mapped_pages += mapped;
3958c2ecf20Sopenharmony_ci	}
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_ci	while (pageidx < pageset_count) {
3988c2ecf20Sopenharmony_ci		struct tid_group *grp, *ptr;
3998c2ecf20Sopenharmony_ci		/*
4008c2ecf20Sopenharmony_ci		 * If we don't have any partially used tid groups, check
4018c2ecf20Sopenharmony_ci		 * if we have empty groups. If so, take one from there and
4028c2ecf20Sopenharmony_ci		 * put in the partially used list.
4038c2ecf20Sopenharmony_ci		 */
4048c2ecf20Sopenharmony_ci		if (!uctxt->tid_used_list.count || need_group) {
4058c2ecf20Sopenharmony_ci			if (!uctxt->tid_group_list.count)
4068c2ecf20Sopenharmony_ci				goto unlock;
4078c2ecf20Sopenharmony_ci
4088c2ecf20Sopenharmony_ci			grp = tid_group_pop(&uctxt->tid_group_list);
4098c2ecf20Sopenharmony_ci			tid_group_add_tail(grp, &uctxt->tid_used_list);
4108c2ecf20Sopenharmony_ci			need_group = 0;
4118c2ecf20Sopenharmony_ci		}
4128c2ecf20Sopenharmony_ci		/*
4138c2ecf20Sopenharmony_ci		 * There is an optimization opportunity here - instead of
4148c2ecf20Sopenharmony_ci		 * fitting as many page sets as we can, check for a group
4158c2ecf20Sopenharmony_ci		 * later on in the list that could fit all of them.
4168c2ecf20Sopenharmony_ci		 */
4178c2ecf20Sopenharmony_ci		list_for_each_entry_safe(grp, ptr, &uctxt->tid_used_list.list,
4188c2ecf20Sopenharmony_ci					 list) {
4198c2ecf20Sopenharmony_ci			unsigned use = min_t(unsigned, pageset_count - pageidx,
4208c2ecf20Sopenharmony_ci					     grp->size - grp->used);
4218c2ecf20Sopenharmony_ci
4228c2ecf20Sopenharmony_ci			ret = program_rcvarray(fd, tidbuf, grp,
4238c2ecf20Sopenharmony_ci					       pageidx, use, tidlist,
4248c2ecf20Sopenharmony_ci					       &tididx, &mapped);
4258c2ecf20Sopenharmony_ci			if (ret < 0) {
4268c2ecf20Sopenharmony_ci				hfi1_cdbg(TID,
4278c2ecf20Sopenharmony_ci					  "Failed to program RcvArray entries %d",
4288c2ecf20Sopenharmony_ci					  ret);
4298c2ecf20Sopenharmony_ci				goto unlock;
4308c2ecf20Sopenharmony_ci			} else if (ret > 0) {
4318c2ecf20Sopenharmony_ci				if (grp->used == grp->size)
4328c2ecf20Sopenharmony_ci					tid_group_move(grp,
4338c2ecf20Sopenharmony_ci						       &uctxt->tid_used_list,
4348c2ecf20Sopenharmony_ci						       &uctxt->tid_full_list);
4358c2ecf20Sopenharmony_ci				pageidx += ret;
4368c2ecf20Sopenharmony_ci				mapped_pages += mapped;
4378c2ecf20Sopenharmony_ci				need_group = 0;
4388c2ecf20Sopenharmony_ci				/* Check if we are done so we break out early */
4398c2ecf20Sopenharmony_ci				if (pageidx >= pageset_count)
4408c2ecf20Sopenharmony_ci					break;
4418c2ecf20Sopenharmony_ci			} else if (WARN_ON(ret == 0)) {
4428c2ecf20Sopenharmony_ci				/*
4438c2ecf20Sopenharmony_ci				 * If ret is 0, we did not program any entries
4448c2ecf20Sopenharmony_ci				 * into this group, which can only happen if
4458c2ecf20Sopenharmony_ci				 * we've screwed up the accounting somewhere.
4468c2ecf20Sopenharmony_ci				 * Warn and try to continue.
4478c2ecf20Sopenharmony_ci				 */
4488c2ecf20Sopenharmony_ci				need_group = 1;
4498c2ecf20Sopenharmony_ci			}
4508c2ecf20Sopenharmony_ci		}
4518c2ecf20Sopenharmony_ci	}
4528c2ecf20Sopenharmony_ciunlock:
4538c2ecf20Sopenharmony_ci	mutex_unlock(&uctxt->exp_mutex);
4548c2ecf20Sopenharmony_ci	hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx,
4558c2ecf20Sopenharmony_ci		  mapped_pages, ret);
4568c2ecf20Sopenharmony_ci
4578c2ecf20Sopenharmony_ci	/* fail if nothing was programmed, set error if none provided */
4588c2ecf20Sopenharmony_ci	if (tididx == 0) {
4598c2ecf20Sopenharmony_ci		if (ret >= 0)
4608c2ecf20Sopenharmony_ci			ret = -ENOSPC;
4618c2ecf20Sopenharmony_ci		goto fail_unreserve;
4628c2ecf20Sopenharmony_ci	}
4638c2ecf20Sopenharmony_ci
4648c2ecf20Sopenharmony_ci	/* adjust reserved tid_used to actual count */
4658c2ecf20Sopenharmony_ci	spin_lock(&fd->tid_lock);
4668c2ecf20Sopenharmony_ci	fd->tid_used -= pageset_count - tididx;
4678c2ecf20Sopenharmony_ci	spin_unlock(&fd->tid_lock);
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci	/* unpin all pages not covered by a TID */
4708c2ecf20Sopenharmony_ci	unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages,
4718c2ecf20Sopenharmony_ci			false);
4728c2ecf20Sopenharmony_ci
4738c2ecf20Sopenharmony_ci	if (fd->use_mn) {
4748c2ecf20Sopenharmony_ci		/* check for an invalidate during setup */
4758c2ecf20Sopenharmony_ci		bool fail = false;
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_ci		mutex_lock(&tidbuf->cover_mutex);
4788c2ecf20Sopenharmony_ci		fail = mmu_interval_read_retry(&tidbuf->notifier, mmu_seq);
4798c2ecf20Sopenharmony_ci		mutex_unlock(&tidbuf->cover_mutex);
4808c2ecf20Sopenharmony_ci
4818c2ecf20Sopenharmony_ci		if (fail) {
4828c2ecf20Sopenharmony_ci			ret = -EBUSY;
4838c2ecf20Sopenharmony_ci			goto fail_unprogram;
4848c2ecf20Sopenharmony_ci		}
4858c2ecf20Sopenharmony_ci	}
4868c2ecf20Sopenharmony_ci
4878c2ecf20Sopenharmony_ci	tinfo->tidcnt = tididx;
4888c2ecf20Sopenharmony_ci	tinfo->length = mapped_pages * PAGE_SIZE;
4898c2ecf20Sopenharmony_ci
4908c2ecf20Sopenharmony_ci	if (copy_to_user(u64_to_user_ptr(tinfo->tidlist),
4918c2ecf20Sopenharmony_ci			 tidlist, sizeof(tidlist[0]) * tididx)) {
4928c2ecf20Sopenharmony_ci		ret = -EFAULT;
4938c2ecf20Sopenharmony_ci		goto fail_unprogram;
4948c2ecf20Sopenharmony_ci	}
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci	if (fd->use_mn)
4978c2ecf20Sopenharmony_ci		mmu_interval_notifier_remove(&tidbuf->notifier);
4988c2ecf20Sopenharmony_ci	kfree(tidbuf->pages);
4998c2ecf20Sopenharmony_ci	kfree(tidbuf->psets);
5008c2ecf20Sopenharmony_ci	kfree(tidbuf);
5018c2ecf20Sopenharmony_ci	kfree(tidlist);
5028c2ecf20Sopenharmony_ci	return 0;
5038c2ecf20Sopenharmony_ci
5048c2ecf20Sopenharmony_cifail_unprogram:
5058c2ecf20Sopenharmony_ci	/* unprogram, unmap, and unpin all allocated TIDs */
5068c2ecf20Sopenharmony_ci	tinfo->tidlist = (unsigned long)tidlist;
5078c2ecf20Sopenharmony_ci	hfi1_user_exp_rcv_clear(fd, tinfo);
5088c2ecf20Sopenharmony_ci	tinfo->tidlist = 0;
5098c2ecf20Sopenharmony_ci	pinned = 0;		/* nothing left to unpin */
5108c2ecf20Sopenharmony_ci	pageset_count = 0;	/* nothing left reserved */
5118c2ecf20Sopenharmony_cifail_unreserve:
5128c2ecf20Sopenharmony_ci	spin_lock(&fd->tid_lock);
5138c2ecf20Sopenharmony_ci	fd->tid_used -= pageset_count;
5148c2ecf20Sopenharmony_ci	spin_unlock(&fd->tid_lock);
5158c2ecf20Sopenharmony_cifail_unpin:
5168c2ecf20Sopenharmony_ci	if (fd->use_mn)
5178c2ecf20Sopenharmony_ci		mmu_interval_notifier_remove(&tidbuf->notifier);
5188c2ecf20Sopenharmony_ci	if (pinned > 0)
5198c2ecf20Sopenharmony_ci		unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false);
5208c2ecf20Sopenharmony_cifail_release_mem:
5218c2ecf20Sopenharmony_ci	kfree(tidbuf->pages);
5228c2ecf20Sopenharmony_ci	kfree(tidbuf->psets);
5238c2ecf20Sopenharmony_ci	kfree(tidbuf);
5248c2ecf20Sopenharmony_ci	kfree(tidlist);
5258c2ecf20Sopenharmony_ci	return ret;
5268c2ecf20Sopenharmony_ci}
5278c2ecf20Sopenharmony_ci
5288c2ecf20Sopenharmony_ciint hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
5298c2ecf20Sopenharmony_ci			    struct hfi1_tid_info *tinfo)
5308c2ecf20Sopenharmony_ci{
5318c2ecf20Sopenharmony_ci	int ret = 0;
5328c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *uctxt = fd->uctxt;
5338c2ecf20Sopenharmony_ci	u32 *tidinfo;
5348c2ecf20Sopenharmony_ci	unsigned tididx;
5358c2ecf20Sopenharmony_ci
5368c2ecf20Sopenharmony_ci	if (unlikely(tinfo->tidcnt > fd->tid_used))
5378c2ecf20Sopenharmony_ci		return -EINVAL;
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci	tidinfo = memdup_user(u64_to_user_ptr(tinfo->tidlist),
5408c2ecf20Sopenharmony_ci			      sizeof(tidinfo[0]) * tinfo->tidcnt);
5418c2ecf20Sopenharmony_ci	if (IS_ERR(tidinfo))
5428c2ecf20Sopenharmony_ci		return PTR_ERR(tidinfo);
5438c2ecf20Sopenharmony_ci
5448c2ecf20Sopenharmony_ci	mutex_lock(&uctxt->exp_mutex);
5458c2ecf20Sopenharmony_ci	for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
5468c2ecf20Sopenharmony_ci		ret = unprogram_rcvarray(fd, tidinfo[tididx]);
5478c2ecf20Sopenharmony_ci		if (ret) {
5488c2ecf20Sopenharmony_ci			hfi1_cdbg(TID, "Failed to unprogram rcv array %d",
5498c2ecf20Sopenharmony_ci				  ret);
5508c2ecf20Sopenharmony_ci			break;
5518c2ecf20Sopenharmony_ci		}
5528c2ecf20Sopenharmony_ci	}
5538c2ecf20Sopenharmony_ci	spin_lock(&fd->tid_lock);
5548c2ecf20Sopenharmony_ci	fd->tid_used -= tididx;
5558c2ecf20Sopenharmony_ci	spin_unlock(&fd->tid_lock);
5568c2ecf20Sopenharmony_ci	tinfo->tidcnt = tididx;
5578c2ecf20Sopenharmony_ci	mutex_unlock(&uctxt->exp_mutex);
5588c2ecf20Sopenharmony_ci
5598c2ecf20Sopenharmony_ci	kfree(tidinfo);
5608c2ecf20Sopenharmony_ci	return ret;
5618c2ecf20Sopenharmony_ci}
5628c2ecf20Sopenharmony_ci
5638c2ecf20Sopenharmony_ciint hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
5648c2ecf20Sopenharmony_ci			      struct hfi1_tid_info *tinfo)
5658c2ecf20Sopenharmony_ci{
5668c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *uctxt = fd->uctxt;
5678c2ecf20Sopenharmony_ci	unsigned long *ev = uctxt->dd->events +
5688c2ecf20Sopenharmony_ci		(uctxt_offset(uctxt) + fd->subctxt);
5698c2ecf20Sopenharmony_ci	u32 *array;
5708c2ecf20Sopenharmony_ci	int ret = 0;
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_ci	/*
5738c2ecf20Sopenharmony_ci	 * copy_to_user() can sleep, which will leave the invalid_lock
5748c2ecf20Sopenharmony_ci	 * locked and cause the MMU notifier to be blocked on the lock
5758c2ecf20Sopenharmony_ci	 * for a long time.
5768c2ecf20Sopenharmony_ci	 * Copy the data to a local buffer so we can release the lock.
5778c2ecf20Sopenharmony_ci	 */
5788c2ecf20Sopenharmony_ci	array = kcalloc(uctxt->expected_count, sizeof(*array), GFP_KERNEL);
5798c2ecf20Sopenharmony_ci	if (!array)
5808c2ecf20Sopenharmony_ci		return -EFAULT;
5818c2ecf20Sopenharmony_ci
5828c2ecf20Sopenharmony_ci	spin_lock(&fd->invalid_lock);
5838c2ecf20Sopenharmony_ci	if (fd->invalid_tid_idx) {
5848c2ecf20Sopenharmony_ci		memcpy(array, fd->invalid_tids, sizeof(*array) *
5858c2ecf20Sopenharmony_ci		       fd->invalid_tid_idx);
5868c2ecf20Sopenharmony_ci		memset(fd->invalid_tids, 0, sizeof(*fd->invalid_tids) *
5878c2ecf20Sopenharmony_ci		       fd->invalid_tid_idx);
5888c2ecf20Sopenharmony_ci		tinfo->tidcnt = fd->invalid_tid_idx;
5898c2ecf20Sopenharmony_ci		fd->invalid_tid_idx = 0;
5908c2ecf20Sopenharmony_ci		/*
5918c2ecf20Sopenharmony_ci		 * Reset the user flag while still holding the lock.
5928c2ecf20Sopenharmony_ci		 * Otherwise, PSM can miss events.
5938c2ecf20Sopenharmony_ci		 */
5948c2ecf20Sopenharmony_ci		clear_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
5958c2ecf20Sopenharmony_ci	} else {
5968c2ecf20Sopenharmony_ci		tinfo->tidcnt = 0;
5978c2ecf20Sopenharmony_ci	}
5988c2ecf20Sopenharmony_ci	spin_unlock(&fd->invalid_lock);
5998c2ecf20Sopenharmony_ci
6008c2ecf20Sopenharmony_ci	if (tinfo->tidcnt) {
6018c2ecf20Sopenharmony_ci		if (copy_to_user((void __user *)tinfo->tidlist,
6028c2ecf20Sopenharmony_ci				 array, sizeof(*array) * tinfo->tidcnt))
6038c2ecf20Sopenharmony_ci			ret = -EFAULT;
6048c2ecf20Sopenharmony_ci	}
6058c2ecf20Sopenharmony_ci	kfree(array);
6068c2ecf20Sopenharmony_ci
6078c2ecf20Sopenharmony_ci	return ret;
6088c2ecf20Sopenharmony_ci}
6098c2ecf20Sopenharmony_ci
6108c2ecf20Sopenharmony_cistatic u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages)
6118c2ecf20Sopenharmony_ci{
6128c2ecf20Sopenharmony_ci	unsigned pagecount, pageidx, setcount = 0, i;
6138c2ecf20Sopenharmony_ci	unsigned long pfn, this_pfn;
6148c2ecf20Sopenharmony_ci	struct page **pages = tidbuf->pages;
6158c2ecf20Sopenharmony_ci	struct tid_pageset *list = tidbuf->psets;
6168c2ecf20Sopenharmony_ci
6178c2ecf20Sopenharmony_ci	if (!npages)
6188c2ecf20Sopenharmony_ci		return 0;
6198c2ecf20Sopenharmony_ci
6208c2ecf20Sopenharmony_ci	/*
6218c2ecf20Sopenharmony_ci	 * Look for sets of physically contiguous pages in the user buffer.
6228c2ecf20Sopenharmony_ci	 * This will allow us to optimize Expected RcvArray entry usage by
6238c2ecf20Sopenharmony_ci	 * using the bigger supported sizes.
6248c2ecf20Sopenharmony_ci	 */
6258c2ecf20Sopenharmony_ci	pfn = page_to_pfn(pages[0]);
6268c2ecf20Sopenharmony_ci	for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) {
6278c2ecf20Sopenharmony_ci		this_pfn = i < npages ? page_to_pfn(pages[i]) : 0;
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci		/*
6308c2ecf20Sopenharmony_ci		 * If the pfn's are not sequential, pages are not physically
6318c2ecf20Sopenharmony_ci		 * contiguous.
6328c2ecf20Sopenharmony_ci		 */
6338c2ecf20Sopenharmony_ci		if (this_pfn != ++pfn) {
6348c2ecf20Sopenharmony_ci			/*
6358c2ecf20Sopenharmony_ci			 * At this point we have to loop over the set of
6368c2ecf20Sopenharmony_ci			 * physically contiguous pages and break them down it
6378c2ecf20Sopenharmony_ci			 * sizes supported by the HW.
6388c2ecf20Sopenharmony_ci			 * There are two main constraints:
6398c2ecf20Sopenharmony_ci			 *     1. The max buffer size is MAX_EXPECTED_BUFFER.
6408c2ecf20Sopenharmony_ci			 *        If the total set size is bigger than that
6418c2ecf20Sopenharmony_ci			 *        program only a MAX_EXPECTED_BUFFER chunk.
6428c2ecf20Sopenharmony_ci			 *     2. The buffer size has to be a power of two. If
6438c2ecf20Sopenharmony_ci			 *        it is not, round down to the closes power of
6448c2ecf20Sopenharmony_ci			 *        2 and program that size.
6458c2ecf20Sopenharmony_ci			 */
6468c2ecf20Sopenharmony_ci			while (pagecount) {
6478c2ecf20Sopenharmony_ci				int maxpages = pagecount;
6488c2ecf20Sopenharmony_ci				u32 bufsize = pagecount * PAGE_SIZE;
6498c2ecf20Sopenharmony_ci
6508c2ecf20Sopenharmony_ci				if (bufsize > MAX_EXPECTED_BUFFER)
6518c2ecf20Sopenharmony_ci					maxpages =
6528c2ecf20Sopenharmony_ci						MAX_EXPECTED_BUFFER >>
6538c2ecf20Sopenharmony_ci						PAGE_SHIFT;
6548c2ecf20Sopenharmony_ci				else if (!is_power_of_2(bufsize))
6558c2ecf20Sopenharmony_ci					maxpages =
6568c2ecf20Sopenharmony_ci						rounddown_pow_of_two(bufsize) >>
6578c2ecf20Sopenharmony_ci						PAGE_SHIFT;
6588c2ecf20Sopenharmony_ci
6598c2ecf20Sopenharmony_ci				list[setcount].idx = pageidx;
6608c2ecf20Sopenharmony_ci				list[setcount].count = maxpages;
6618c2ecf20Sopenharmony_ci				pagecount -= maxpages;
6628c2ecf20Sopenharmony_ci				pageidx += maxpages;
6638c2ecf20Sopenharmony_ci				setcount++;
6648c2ecf20Sopenharmony_ci			}
6658c2ecf20Sopenharmony_ci			pageidx = i;
6668c2ecf20Sopenharmony_ci			pagecount = 1;
6678c2ecf20Sopenharmony_ci			pfn = this_pfn;
6688c2ecf20Sopenharmony_ci		} else {
6698c2ecf20Sopenharmony_ci			pagecount++;
6708c2ecf20Sopenharmony_ci		}
6718c2ecf20Sopenharmony_ci	}
6728c2ecf20Sopenharmony_ci	return setcount;
6738c2ecf20Sopenharmony_ci}
6748c2ecf20Sopenharmony_ci
6758c2ecf20Sopenharmony_ci/**
6768c2ecf20Sopenharmony_ci * program_rcvarray() - program an RcvArray group with receive buffers
6778c2ecf20Sopenharmony_ci * @fd: filedata pointer
6788c2ecf20Sopenharmony_ci * @tbuf: pointer to struct tid_user_buf that has the user buffer starting
6798c2ecf20Sopenharmony_ci *	  virtual address, buffer length, page pointers, pagesets (array of
6808c2ecf20Sopenharmony_ci *	  struct tid_pageset holding information on physically contiguous
6818c2ecf20Sopenharmony_ci *	  chunks from the user buffer), and other fields.
6828c2ecf20Sopenharmony_ci * @grp: RcvArray group
6838c2ecf20Sopenharmony_ci * @start: starting index into sets array
6848c2ecf20Sopenharmony_ci * @count: number of struct tid_pageset's to program
6858c2ecf20Sopenharmony_ci * @tidlist: the array of u32 elements when the information about the
6868c2ecf20Sopenharmony_ci *           programmed RcvArray entries is to be encoded.
6878c2ecf20Sopenharmony_ci * @tididx: starting offset into tidlist
6888c2ecf20Sopenharmony_ci * @pmapped: (output parameter) number of pages programmed into the RcvArray
6898c2ecf20Sopenharmony_ci *           entries.
6908c2ecf20Sopenharmony_ci *
6918c2ecf20Sopenharmony_ci * This function will program up to 'count' number of RcvArray entries from the
6928c2ecf20Sopenharmony_ci * group 'grp'. To make best use of write-combining writes, the function will
6938c2ecf20Sopenharmony_ci * perform writes to the unused RcvArray entries which will be ignored by the
6948c2ecf20Sopenharmony_ci * HW. Each RcvArray entry will be programmed with a physically contiguous
6958c2ecf20Sopenharmony_ci * buffer chunk from the user's virtual buffer.
6968c2ecf20Sopenharmony_ci *
6978c2ecf20Sopenharmony_ci * Return:
6988c2ecf20Sopenharmony_ci * -EINVAL if the requested count is larger than the size of the group,
6998c2ecf20Sopenharmony_ci * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or
7008c2ecf20Sopenharmony_ci * number of RcvArray entries programmed.
7018c2ecf20Sopenharmony_ci */
7028c2ecf20Sopenharmony_cistatic int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *tbuf,
7038c2ecf20Sopenharmony_ci			    struct tid_group *grp,
7048c2ecf20Sopenharmony_ci			    unsigned int start, u16 count,
7058c2ecf20Sopenharmony_ci			    u32 *tidlist, unsigned int *tididx,
7068c2ecf20Sopenharmony_ci			    unsigned int *pmapped)
7078c2ecf20Sopenharmony_ci{
7088c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *uctxt = fd->uctxt;
7098c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = uctxt->dd;
7108c2ecf20Sopenharmony_ci	u16 idx;
7118c2ecf20Sopenharmony_ci	u32 tidinfo = 0, rcventry, useidx = 0;
7128c2ecf20Sopenharmony_ci	int mapped = 0;
7138c2ecf20Sopenharmony_ci
7148c2ecf20Sopenharmony_ci	/* Count should never be larger than the group size */
7158c2ecf20Sopenharmony_ci	if (count > grp->size)
7168c2ecf20Sopenharmony_ci		return -EINVAL;
7178c2ecf20Sopenharmony_ci
7188c2ecf20Sopenharmony_ci	/* Find the first unused entry in the group */
7198c2ecf20Sopenharmony_ci	for (idx = 0; idx < grp->size; idx++) {
7208c2ecf20Sopenharmony_ci		if (!(grp->map & (1 << idx))) {
7218c2ecf20Sopenharmony_ci			useidx = idx;
7228c2ecf20Sopenharmony_ci			break;
7238c2ecf20Sopenharmony_ci		}
7248c2ecf20Sopenharmony_ci		rcv_array_wc_fill(dd, grp->base + idx);
7258c2ecf20Sopenharmony_ci	}
7268c2ecf20Sopenharmony_ci
7278c2ecf20Sopenharmony_ci	idx = 0;
7288c2ecf20Sopenharmony_ci	while (idx < count) {
7298c2ecf20Sopenharmony_ci		u16 npages, pageidx, setidx = start + idx;
7308c2ecf20Sopenharmony_ci		int ret = 0;
7318c2ecf20Sopenharmony_ci
7328c2ecf20Sopenharmony_ci		/*
7338c2ecf20Sopenharmony_ci		 * If this entry in the group is used, move to the next one.
7348c2ecf20Sopenharmony_ci		 * If we go past the end of the group, exit the loop.
7358c2ecf20Sopenharmony_ci		 */
7368c2ecf20Sopenharmony_ci		if (useidx >= grp->size) {
7378c2ecf20Sopenharmony_ci			break;
7388c2ecf20Sopenharmony_ci		} else if (grp->map & (1 << useidx)) {
7398c2ecf20Sopenharmony_ci			rcv_array_wc_fill(dd, grp->base + useidx);
7408c2ecf20Sopenharmony_ci			useidx++;
7418c2ecf20Sopenharmony_ci			continue;
7428c2ecf20Sopenharmony_ci		}
7438c2ecf20Sopenharmony_ci
7448c2ecf20Sopenharmony_ci		rcventry = grp->base + useidx;
7458c2ecf20Sopenharmony_ci		npages = tbuf->psets[setidx].count;
7468c2ecf20Sopenharmony_ci		pageidx = tbuf->psets[setidx].idx;
7478c2ecf20Sopenharmony_ci
7488c2ecf20Sopenharmony_ci		ret = set_rcvarray_entry(fd, tbuf,
7498c2ecf20Sopenharmony_ci					 rcventry, grp, pageidx,
7508c2ecf20Sopenharmony_ci					 npages);
7518c2ecf20Sopenharmony_ci		if (ret)
7528c2ecf20Sopenharmony_ci			return ret;
7538c2ecf20Sopenharmony_ci		mapped += npages;
7548c2ecf20Sopenharmony_ci
7558c2ecf20Sopenharmony_ci		tidinfo = rcventry2tidinfo(rcventry - uctxt->expected_base) |
7568c2ecf20Sopenharmony_ci			EXP_TID_SET(LEN, npages);
7578c2ecf20Sopenharmony_ci		tidlist[(*tididx)++] = tidinfo;
7588c2ecf20Sopenharmony_ci		grp->used++;
7598c2ecf20Sopenharmony_ci		grp->map |= 1 << useidx++;
7608c2ecf20Sopenharmony_ci		idx++;
7618c2ecf20Sopenharmony_ci	}
7628c2ecf20Sopenharmony_ci
7638c2ecf20Sopenharmony_ci	/* Fill the rest of the group with "blank" writes */
7648c2ecf20Sopenharmony_ci	for (; useidx < grp->size; useidx++)
7658c2ecf20Sopenharmony_ci		rcv_array_wc_fill(dd, grp->base + useidx);
7668c2ecf20Sopenharmony_ci	*pmapped = mapped;
7678c2ecf20Sopenharmony_ci	return idx;
7688c2ecf20Sopenharmony_ci}
7698c2ecf20Sopenharmony_ci
7708c2ecf20Sopenharmony_cistatic int set_rcvarray_entry(struct hfi1_filedata *fd,
7718c2ecf20Sopenharmony_ci			      struct tid_user_buf *tbuf,
7728c2ecf20Sopenharmony_ci			      u32 rcventry, struct tid_group *grp,
7738c2ecf20Sopenharmony_ci			      u16 pageidx, unsigned int npages)
7748c2ecf20Sopenharmony_ci{
7758c2ecf20Sopenharmony_ci	int ret;
7768c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *uctxt = fd->uctxt;
7778c2ecf20Sopenharmony_ci	struct tid_rb_node *node;
7788c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = uctxt->dd;
7798c2ecf20Sopenharmony_ci	dma_addr_t phys;
7808c2ecf20Sopenharmony_ci	struct page **pages = tbuf->pages + pageidx;
7818c2ecf20Sopenharmony_ci
7828c2ecf20Sopenharmony_ci	/*
7838c2ecf20Sopenharmony_ci	 * Allocate the node first so we can handle a potential
7848c2ecf20Sopenharmony_ci	 * failure before we've programmed anything.
7858c2ecf20Sopenharmony_ci	 */
7868c2ecf20Sopenharmony_ci	node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages),
7878c2ecf20Sopenharmony_ci		       GFP_KERNEL);
7888c2ecf20Sopenharmony_ci	if (!node)
7898c2ecf20Sopenharmony_ci		return -ENOMEM;
7908c2ecf20Sopenharmony_ci
7918c2ecf20Sopenharmony_ci	phys = pci_map_single(dd->pcidev,
7928c2ecf20Sopenharmony_ci			      __va(page_to_phys(pages[0])),
7938c2ecf20Sopenharmony_ci			      npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
7948c2ecf20Sopenharmony_ci	if (dma_mapping_error(&dd->pcidev->dev, phys)) {
7958c2ecf20Sopenharmony_ci		dd_dev_err(dd, "Failed to DMA map Exp Rcv pages 0x%llx\n",
7968c2ecf20Sopenharmony_ci			   phys);
7978c2ecf20Sopenharmony_ci		kfree(node);
7988c2ecf20Sopenharmony_ci		return -EFAULT;
7998c2ecf20Sopenharmony_ci	}
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_ci	node->fdata = fd;
8028c2ecf20Sopenharmony_ci	mutex_init(&node->invalidate_mutex);
8038c2ecf20Sopenharmony_ci	node->phys = page_to_phys(pages[0]);
8048c2ecf20Sopenharmony_ci	node->npages = npages;
8058c2ecf20Sopenharmony_ci	node->rcventry = rcventry;
8068c2ecf20Sopenharmony_ci	node->dma_addr = phys;
8078c2ecf20Sopenharmony_ci	node->grp = grp;
8088c2ecf20Sopenharmony_ci	node->freed = false;
8098c2ecf20Sopenharmony_ci	memcpy(node->pages, pages, sizeof(struct page *) * npages);
8108c2ecf20Sopenharmony_ci
8118c2ecf20Sopenharmony_ci	if (fd->use_mn) {
8128c2ecf20Sopenharmony_ci		ret = mmu_interval_notifier_insert(
8138c2ecf20Sopenharmony_ci			&node->notifier, current->mm,
8148c2ecf20Sopenharmony_ci			tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE,
8158c2ecf20Sopenharmony_ci			&tid_mn_ops);
8168c2ecf20Sopenharmony_ci		if (ret)
8178c2ecf20Sopenharmony_ci			goto out_unmap;
8188c2ecf20Sopenharmony_ci	}
8198c2ecf20Sopenharmony_ci	fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node;
8208c2ecf20Sopenharmony_ci
8218c2ecf20Sopenharmony_ci	hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
8228c2ecf20Sopenharmony_ci	trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
8238c2ecf20Sopenharmony_ci			       node->notifier.interval_tree.start, node->phys,
8248c2ecf20Sopenharmony_ci			       phys);
8258c2ecf20Sopenharmony_ci	return 0;
8268c2ecf20Sopenharmony_ci
8278c2ecf20Sopenharmony_ciout_unmap:
8288c2ecf20Sopenharmony_ci	hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
8298c2ecf20Sopenharmony_ci		  node->rcventry, node->notifier.interval_tree.start,
8308c2ecf20Sopenharmony_ci		  node->phys, ret);
8318c2ecf20Sopenharmony_ci	pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
8328c2ecf20Sopenharmony_ci			 PCI_DMA_FROMDEVICE);
8338c2ecf20Sopenharmony_ci	kfree(node);
8348c2ecf20Sopenharmony_ci	return -EFAULT;
8358c2ecf20Sopenharmony_ci}
8368c2ecf20Sopenharmony_ci
8378c2ecf20Sopenharmony_cistatic int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo)
8388c2ecf20Sopenharmony_ci{
8398c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *uctxt = fd->uctxt;
8408c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = uctxt->dd;
8418c2ecf20Sopenharmony_ci	struct tid_rb_node *node;
8428c2ecf20Sopenharmony_ci	u8 tidctrl = EXP_TID_GET(tidinfo, CTRL);
8438c2ecf20Sopenharmony_ci	u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry;
8448c2ecf20Sopenharmony_ci
8458c2ecf20Sopenharmony_ci	if (tididx >= uctxt->expected_count) {
8468c2ecf20Sopenharmony_ci		dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n",
8478c2ecf20Sopenharmony_ci			   tididx, uctxt->ctxt);
8488c2ecf20Sopenharmony_ci		return -EINVAL;
8498c2ecf20Sopenharmony_ci	}
8508c2ecf20Sopenharmony_ci
8518c2ecf20Sopenharmony_ci	if (tidctrl == 0x3)
8528c2ecf20Sopenharmony_ci		return -EINVAL;
8538c2ecf20Sopenharmony_ci
8548c2ecf20Sopenharmony_ci	rcventry = tididx + (tidctrl - 1);
8558c2ecf20Sopenharmony_ci
8568c2ecf20Sopenharmony_ci	node = fd->entry_to_rb[rcventry];
8578c2ecf20Sopenharmony_ci	if (!node || node->rcventry != (uctxt->expected_base + rcventry))
8588c2ecf20Sopenharmony_ci		return -EBADF;
8598c2ecf20Sopenharmony_ci
8608c2ecf20Sopenharmony_ci	if (fd->use_mn)
8618c2ecf20Sopenharmony_ci		mmu_interval_notifier_remove(&node->notifier);
8628c2ecf20Sopenharmony_ci	cacheless_tid_rb_remove(fd, node);
8638c2ecf20Sopenharmony_ci
8648c2ecf20Sopenharmony_ci	return 0;
8658c2ecf20Sopenharmony_ci}
8668c2ecf20Sopenharmony_ci
8678c2ecf20Sopenharmony_cistatic void __clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
8688c2ecf20Sopenharmony_ci{
8698c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *uctxt = fd->uctxt;
8708c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = uctxt->dd;
8718c2ecf20Sopenharmony_ci
8728c2ecf20Sopenharmony_ci	mutex_lock(&node->invalidate_mutex);
8738c2ecf20Sopenharmony_ci	if (node->freed)
8748c2ecf20Sopenharmony_ci		goto done;
8758c2ecf20Sopenharmony_ci	node->freed = true;
8768c2ecf20Sopenharmony_ci
8778c2ecf20Sopenharmony_ci	trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
8788c2ecf20Sopenharmony_ci				 node->npages,
8798c2ecf20Sopenharmony_ci				 node->notifier.interval_tree.start, node->phys,
8808c2ecf20Sopenharmony_ci				 node->dma_addr);
8818c2ecf20Sopenharmony_ci
8828c2ecf20Sopenharmony_ci	/* Make sure device has seen the write before pages are unpinned */
8838c2ecf20Sopenharmony_ci	hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0);
8848c2ecf20Sopenharmony_ci
8858c2ecf20Sopenharmony_ci	unpin_rcv_pages(fd, NULL, node, 0, node->npages, true);
8868c2ecf20Sopenharmony_cidone:
8878c2ecf20Sopenharmony_ci	mutex_unlock(&node->invalidate_mutex);
8888c2ecf20Sopenharmony_ci}
8898c2ecf20Sopenharmony_ci
8908c2ecf20Sopenharmony_cistatic void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
8918c2ecf20Sopenharmony_ci{
8928c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *uctxt = fd->uctxt;
8938c2ecf20Sopenharmony_ci
8948c2ecf20Sopenharmony_ci	__clear_tid_node(fd, node);
8958c2ecf20Sopenharmony_ci
8968c2ecf20Sopenharmony_ci	node->grp->used--;
8978c2ecf20Sopenharmony_ci	node->grp->map &= ~(1 << (node->rcventry - node->grp->base));
8988c2ecf20Sopenharmony_ci
8998c2ecf20Sopenharmony_ci	if (node->grp->used == node->grp->size - 1)
9008c2ecf20Sopenharmony_ci		tid_group_move(node->grp, &uctxt->tid_full_list,
9018c2ecf20Sopenharmony_ci			       &uctxt->tid_used_list);
9028c2ecf20Sopenharmony_ci	else if (!node->grp->used)
9038c2ecf20Sopenharmony_ci		tid_group_move(node->grp, &uctxt->tid_used_list,
9048c2ecf20Sopenharmony_ci			       &uctxt->tid_group_list);
9058c2ecf20Sopenharmony_ci	kfree(node);
9068c2ecf20Sopenharmony_ci}
9078c2ecf20Sopenharmony_ci
9088c2ecf20Sopenharmony_ci/*
9098c2ecf20Sopenharmony_ci * As a simple helper for hfi1_user_exp_rcv_free, this function deals with
9108c2ecf20Sopenharmony_ci * clearing nodes in the non-cached case.
9118c2ecf20Sopenharmony_ci */
9128c2ecf20Sopenharmony_cistatic void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
9138c2ecf20Sopenharmony_ci			    struct exp_tid_set *set,
9148c2ecf20Sopenharmony_ci			    struct hfi1_filedata *fd)
9158c2ecf20Sopenharmony_ci{
9168c2ecf20Sopenharmony_ci	struct tid_group *grp, *ptr;
9178c2ecf20Sopenharmony_ci	int i;
9188c2ecf20Sopenharmony_ci
9198c2ecf20Sopenharmony_ci	list_for_each_entry_safe(grp, ptr, &set->list, list) {
9208c2ecf20Sopenharmony_ci		list_del_init(&grp->list);
9218c2ecf20Sopenharmony_ci
9228c2ecf20Sopenharmony_ci		for (i = 0; i < grp->size; i++) {
9238c2ecf20Sopenharmony_ci			if (grp->map & (1 << i)) {
9248c2ecf20Sopenharmony_ci				u16 rcventry = grp->base + i;
9258c2ecf20Sopenharmony_ci				struct tid_rb_node *node;
9268c2ecf20Sopenharmony_ci
9278c2ecf20Sopenharmony_ci				node = fd->entry_to_rb[rcventry -
9288c2ecf20Sopenharmony_ci							  uctxt->expected_base];
9298c2ecf20Sopenharmony_ci				if (!node || node->rcventry != rcventry)
9308c2ecf20Sopenharmony_ci					continue;
9318c2ecf20Sopenharmony_ci
9328c2ecf20Sopenharmony_ci				if (fd->use_mn)
9338c2ecf20Sopenharmony_ci					mmu_interval_notifier_remove(
9348c2ecf20Sopenharmony_ci						&node->notifier);
9358c2ecf20Sopenharmony_ci				cacheless_tid_rb_remove(fd, node);
9368c2ecf20Sopenharmony_ci			}
9378c2ecf20Sopenharmony_ci		}
9388c2ecf20Sopenharmony_ci	}
9398c2ecf20Sopenharmony_ci}
9408c2ecf20Sopenharmony_ci
9418c2ecf20Sopenharmony_cistatic bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
9428c2ecf20Sopenharmony_ci			      const struct mmu_notifier_range *range,
9438c2ecf20Sopenharmony_ci			      unsigned long cur_seq)
9448c2ecf20Sopenharmony_ci{
9458c2ecf20Sopenharmony_ci	struct tid_rb_node *node =
9468c2ecf20Sopenharmony_ci		container_of(mni, struct tid_rb_node, notifier);
9478c2ecf20Sopenharmony_ci	struct hfi1_filedata *fdata = node->fdata;
9488c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *uctxt = fdata->uctxt;
9498c2ecf20Sopenharmony_ci
9508c2ecf20Sopenharmony_ci	if (node->freed)
9518c2ecf20Sopenharmony_ci		return true;
9528c2ecf20Sopenharmony_ci
9538c2ecf20Sopenharmony_ci	/* take action only if unmapping */
9548c2ecf20Sopenharmony_ci	if (range->event != MMU_NOTIFY_UNMAP)
9558c2ecf20Sopenharmony_ci		return true;
9568c2ecf20Sopenharmony_ci
9578c2ecf20Sopenharmony_ci	trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt,
9588c2ecf20Sopenharmony_ci				 node->notifier.interval_tree.start,
9598c2ecf20Sopenharmony_ci				 node->rcventry, node->npages, node->dma_addr);
9608c2ecf20Sopenharmony_ci
9618c2ecf20Sopenharmony_ci	/* clear the hardware rcvarray entry */
9628c2ecf20Sopenharmony_ci	__clear_tid_node(fdata, node);
9638c2ecf20Sopenharmony_ci
9648c2ecf20Sopenharmony_ci	spin_lock(&fdata->invalid_lock);
9658c2ecf20Sopenharmony_ci	if (fdata->invalid_tid_idx < uctxt->expected_count) {
9668c2ecf20Sopenharmony_ci		fdata->invalid_tids[fdata->invalid_tid_idx] =
9678c2ecf20Sopenharmony_ci			rcventry2tidinfo(node->rcventry - uctxt->expected_base);
9688c2ecf20Sopenharmony_ci		fdata->invalid_tids[fdata->invalid_tid_idx] |=
9698c2ecf20Sopenharmony_ci			EXP_TID_SET(LEN, node->npages);
9708c2ecf20Sopenharmony_ci		if (!fdata->invalid_tid_idx) {
9718c2ecf20Sopenharmony_ci			unsigned long *ev;
9728c2ecf20Sopenharmony_ci
9738c2ecf20Sopenharmony_ci			/*
9748c2ecf20Sopenharmony_ci			 * hfi1_set_uevent_bits() sets a user event flag
9758c2ecf20Sopenharmony_ci			 * for all processes. Because calling into the
9768c2ecf20Sopenharmony_ci			 * driver to process TID cache invalidations is
9778c2ecf20Sopenharmony_ci			 * expensive and TID cache invalidations are
9788c2ecf20Sopenharmony_ci			 * handled on a per-process basis, we can
9798c2ecf20Sopenharmony_ci			 * optimize this to set the flag only for the
9808c2ecf20Sopenharmony_ci			 * process in question.
9818c2ecf20Sopenharmony_ci			 */
9828c2ecf20Sopenharmony_ci			ev = uctxt->dd->events +
9838c2ecf20Sopenharmony_ci				(uctxt_offset(uctxt) + fdata->subctxt);
9848c2ecf20Sopenharmony_ci			set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
9858c2ecf20Sopenharmony_ci		}
9868c2ecf20Sopenharmony_ci		fdata->invalid_tid_idx++;
9878c2ecf20Sopenharmony_ci	}
9888c2ecf20Sopenharmony_ci	spin_unlock(&fdata->invalid_lock);
9898c2ecf20Sopenharmony_ci	return true;
9908c2ecf20Sopenharmony_ci}
9918c2ecf20Sopenharmony_ci
9928c2ecf20Sopenharmony_cistatic bool tid_cover_invalidate(struct mmu_interval_notifier *mni,
9938c2ecf20Sopenharmony_ci			         const struct mmu_notifier_range *range,
9948c2ecf20Sopenharmony_ci			         unsigned long cur_seq)
9958c2ecf20Sopenharmony_ci{
9968c2ecf20Sopenharmony_ci	struct tid_user_buf *tidbuf =
9978c2ecf20Sopenharmony_ci		container_of(mni, struct tid_user_buf, notifier);
9988c2ecf20Sopenharmony_ci
9998c2ecf20Sopenharmony_ci	/* take action only if unmapping */
10008c2ecf20Sopenharmony_ci	if (range->event == MMU_NOTIFY_UNMAP) {
10018c2ecf20Sopenharmony_ci		mutex_lock(&tidbuf->cover_mutex);
10028c2ecf20Sopenharmony_ci		mmu_interval_set_seq(mni, cur_seq);
10038c2ecf20Sopenharmony_ci		mutex_unlock(&tidbuf->cover_mutex);
10048c2ecf20Sopenharmony_ci	}
10058c2ecf20Sopenharmony_ci
10068c2ecf20Sopenharmony_ci	return true;
10078c2ecf20Sopenharmony_ci}
10088c2ecf20Sopenharmony_ci
10098c2ecf20Sopenharmony_cistatic void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
10108c2ecf20Sopenharmony_ci				    struct tid_rb_node *tnode)
10118c2ecf20Sopenharmony_ci{
10128c2ecf20Sopenharmony_ci	u32 base = fdata->uctxt->expected_base;
10138c2ecf20Sopenharmony_ci
10148c2ecf20Sopenharmony_ci	fdata->entry_to_rb[tnode->rcventry - base] = NULL;
10158c2ecf20Sopenharmony_ci	clear_tid_node(fdata, tnode);
10168c2ecf20Sopenharmony_ci}
1017