18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright(c) 2015 - 2020 Intel Corporation.
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * This file is provided under a dual BSD/GPLv2 license.  When using or
58c2ecf20Sopenharmony_ci * redistributing this file, you may do so under either license.
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * GPL LICENSE SUMMARY
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify
108c2ecf20Sopenharmony_ci * it under the terms of version 2 of the GNU General Public License as
118c2ecf20Sopenharmony_ci * published by the Free Software Foundation.
128c2ecf20Sopenharmony_ci *
138c2ecf20Sopenharmony_ci * This program is distributed in the hope that it will be useful, but
148c2ecf20Sopenharmony_ci * WITHOUT ANY WARRANTY; without even the implied warranty of
158c2ecf20Sopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
168c2ecf20Sopenharmony_ci * General Public License for more details.
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci * BSD LICENSE
198c2ecf20Sopenharmony_ci *
208c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or without
218c2ecf20Sopenharmony_ci * modification, are permitted provided that the following conditions
228c2ecf20Sopenharmony_ci * are met:
238c2ecf20Sopenharmony_ci *
248c2ecf20Sopenharmony_ci *  - Redistributions of source code must retain the above copyright
258c2ecf20Sopenharmony_ci *    notice, this list of conditions and the following disclaimer.
268c2ecf20Sopenharmony_ci *  - Redistributions in binary form must reproduce the above copyright
278c2ecf20Sopenharmony_ci *    notice, this list of conditions and the following disclaimer in
288c2ecf20Sopenharmony_ci *    the documentation and/or other materials provided with the
298c2ecf20Sopenharmony_ci *    distribution.
308c2ecf20Sopenharmony_ci *  - Neither the name of Intel Corporation nor the names of its
318c2ecf20Sopenharmony_ci *    contributors may be used to endorse or promote products derived
328c2ecf20Sopenharmony_ci *    from this software without specific prior written permission.
338c2ecf20Sopenharmony_ci *
348c2ecf20Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
358c2ecf20Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
368c2ecf20Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
378c2ecf20Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
388c2ecf20Sopenharmony_ci * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
398c2ecf20Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
408c2ecf20Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
418c2ecf20Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
428c2ecf20Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
438c2ecf20Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
448c2ecf20Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
458c2ecf20Sopenharmony_ci *
468c2ecf20Sopenharmony_ci */
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci#include <linux/pci.h>
498c2ecf20Sopenharmony_ci#include <linux/netdevice.h>
508c2ecf20Sopenharmony_ci#include <linux/vmalloc.h>
518c2ecf20Sopenharmony_ci#include <linux/delay.h>
528c2ecf20Sopenharmony_ci#include <linux/xarray.h>
538c2ecf20Sopenharmony_ci#include <linux/module.h>
548c2ecf20Sopenharmony_ci#include <linux/printk.h>
558c2ecf20Sopenharmony_ci#include <linux/hrtimer.h>
568c2ecf20Sopenharmony_ci#include <linux/bitmap.h>
578c2ecf20Sopenharmony_ci#include <linux/numa.h>
588c2ecf20Sopenharmony_ci#include <rdma/rdma_vt.h>
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci#include "hfi.h"
618c2ecf20Sopenharmony_ci#include "device.h"
628c2ecf20Sopenharmony_ci#include "common.h"
638c2ecf20Sopenharmony_ci#include "trace.h"
648c2ecf20Sopenharmony_ci#include "mad.h"
658c2ecf20Sopenharmony_ci#include "sdma.h"
668c2ecf20Sopenharmony_ci#include "debugfs.h"
678c2ecf20Sopenharmony_ci#include "verbs.h"
688c2ecf20Sopenharmony_ci#include "aspm.h"
698c2ecf20Sopenharmony_ci#include "affinity.h"
708c2ecf20Sopenharmony_ci#include "vnic.h"
718c2ecf20Sopenharmony_ci#include "exp_rcv.h"
728c2ecf20Sopenharmony_ci#include "netdev.h"
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci#undef pr_fmt
758c2ecf20Sopenharmony_ci#define pr_fmt(fmt) DRIVER_NAME ": " fmt
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci/*
788c2ecf20Sopenharmony_ci * min buffers we want to have per context, after driver
798c2ecf20Sopenharmony_ci */
808c2ecf20Sopenharmony_ci#define HFI1_MIN_USER_CTXT_BUFCNT 7
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
838c2ecf20Sopenharmony_ci#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci#define NUM_IB_PORTS 1
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci/*
888c2ecf20Sopenharmony_ci * Number of user receive contexts we are configured to use (to allow for more
898c2ecf20Sopenharmony_ci * pio buffers per ctxt, etc.)  Zero means use one user context per CPU.
908c2ecf20Sopenharmony_ci */
918c2ecf20Sopenharmony_ciint num_user_contexts = -1;
928c2ecf20Sopenharmony_cimodule_param_named(num_user_contexts, num_user_contexts, int, 0444);
938c2ecf20Sopenharmony_ciMODULE_PARM_DESC(
948c2ecf20Sopenharmony_ci	num_user_contexts, "Set max number of user contexts to use (default: -1 will use the real (non-HT) CPU count)");
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ciuint krcvqs[RXE_NUM_DATA_VL];
978c2ecf20Sopenharmony_ciint krcvqsset;
988c2ecf20Sopenharmony_cimodule_param_array(krcvqs, uint, &krcvqsset, S_IRUGO);
998c2ecf20Sopenharmony_ciMODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL");
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci/* computed based on above array */
1028c2ecf20Sopenharmony_ciunsigned long n_krcvqs;
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_cistatic unsigned hfi1_rcvarr_split = 25;
1058c2ecf20Sopenharmony_cimodule_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO);
1068c2ecf20Sopenharmony_ciMODULE_PARM_DESC(rcvarr_split, "Percent of context's RcvArray entries used for Eager buffers");
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_cistatic uint eager_buffer_size = (8 << 20); /* 8MB */
1098c2ecf20Sopenharmony_cimodule_param(eager_buffer_size, uint, S_IRUGO);
1108c2ecf20Sopenharmony_ciMODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 8MB");
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_cistatic uint rcvhdrcnt = 2048; /* 2x the max eager buffer count */
1138c2ecf20Sopenharmony_cimodule_param_named(rcvhdrcnt, rcvhdrcnt, uint, S_IRUGO);
1148c2ecf20Sopenharmony_ciMODULE_PARM_DESC(rcvhdrcnt, "Receive header queue count (default 2048)");
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_cistatic uint hfi1_hdrq_entsize = 32;
1178c2ecf20Sopenharmony_cimodule_param_named(hdrq_entsize, hfi1_hdrq_entsize, uint, 0444);
1188c2ecf20Sopenharmony_ciMODULE_PARM_DESC(hdrq_entsize, "Size of header queue entries: 2 - 8B, 16 - 64B, 32 - 128B (default)");
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ciunsigned int user_credit_return_threshold = 33;	/* default is 33% */
1218c2ecf20Sopenharmony_cimodule_param(user_credit_return_threshold, uint, S_IRUGO);
1228c2ecf20Sopenharmony_ciMODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)");
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ciDEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_cistatic int hfi1_create_kctxt(struct hfi1_devdata *dd,
1278c2ecf20Sopenharmony_ci			     struct hfi1_pportdata *ppd)
1288c2ecf20Sopenharmony_ci{
1298c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd;
1308c2ecf20Sopenharmony_ci	int ret;
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci	/* Control context has to be always 0 */
1338c2ecf20Sopenharmony_ci	BUILD_BUG_ON(HFI1_CTRL_CTXT != 0);
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci	ret = hfi1_create_ctxtdata(ppd, dd->node, &rcd);
1368c2ecf20Sopenharmony_ci	if (ret < 0) {
1378c2ecf20Sopenharmony_ci		dd_dev_err(dd, "Kernel receive context allocation failed\n");
1388c2ecf20Sopenharmony_ci		return ret;
1398c2ecf20Sopenharmony_ci	}
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci	/*
1428c2ecf20Sopenharmony_ci	 * Set up the kernel context flags here and now because they use
1438c2ecf20Sopenharmony_ci	 * default values for all receive side memories.  User contexts will
1448c2ecf20Sopenharmony_ci	 * be handled as they are created.
1458c2ecf20Sopenharmony_ci	 */
1468c2ecf20Sopenharmony_ci	rcd->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
1478c2ecf20Sopenharmony_ci		HFI1_CAP_KGET(NODROP_RHQ_FULL) |
1488c2ecf20Sopenharmony_ci		HFI1_CAP_KGET(NODROP_EGR_FULL) |
1498c2ecf20Sopenharmony_ci		HFI1_CAP_KGET(DMA_RTAIL);
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci	/* Control context must use DMA_RTAIL */
1528c2ecf20Sopenharmony_ci	if (rcd->ctxt == HFI1_CTRL_CTXT)
1538c2ecf20Sopenharmony_ci		rcd->flags |= HFI1_CAP_DMA_RTAIL;
1548c2ecf20Sopenharmony_ci	rcd->fast_handler = get_dma_rtail_setting(rcd) ?
1558c2ecf20Sopenharmony_ci				handle_receive_interrupt_dma_rtail :
1568c2ecf20Sopenharmony_ci				handle_receive_interrupt_nodma_rtail;
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci	hfi1_set_seq_cnt(rcd, 1);
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);
1618c2ecf20Sopenharmony_ci	if (!rcd->sc) {
1628c2ecf20Sopenharmony_ci		dd_dev_err(dd, "Kernel send context allocation failed\n");
1638c2ecf20Sopenharmony_ci		return -ENOMEM;
1648c2ecf20Sopenharmony_ci	}
1658c2ecf20Sopenharmony_ci	hfi1_init_ctxt(rcd->sc);
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_ci	return 0;
1688c2ecf20Sopenharmony_ci}
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_ci/*
1718c2ecf20Sopenharmony_ci * Create the receive context array and one or more kernel contexts
1728c2ecf20Sopenharmony_ci */
1738c2ecf20Sopenharmony_ciint hfi1_create_kctxts(struct hfi1_devdata *dd)
1748c2ecf20Sopenharmony_ci{
1758c2ecf20Sopenharmony_ci	u16 i;
1768c2ecf20Sopenharmony_ci	int ret;
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci	dd->rcd = kcalloc_node(dd->num_rcv_contexts, sizeof(*dd->rcd),
1798c2ecf20Sopenharmony_ci			       GFP_KERNEL, dd->node);
1808c2ecf20Sopenharmony_ci	if (!dd->rcd)
1818c2ecf20Sopenharmony_ci		return -ENOMEM;
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci	for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) {
1848c2ecf20Sopenharmony_ci		ret = hfi1_create_kctxt(dd, dd->pport);
1858c2ecf20Sopenharmony_ci		if (ret)
1868c2ecf20Sopenharmony_ci			goto bail;
1878c2ecf20Sopenharmony_ci	}
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_ci	return 0;
1908c2ecf20Sopenharmony_cibail:
1918c2ecf20Sopenharmony_ci	for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i)
1928c2ecf20Sopenharmony_ci		hfi1_free_ctxt(dd->rcd[i]);
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	/* All the contexts should be freed, free the array */
1958c2ecf20Sopenharmony_ci	kfree(dd->rcd);
1968c2ecf20Sopenharmony_ci	dd->rcd = NULL;
1978c2ecf20Sopenharmony_ci	return ret;
1988c2ecf20Sopenharmony_ci}
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ci/*
2018c2ecf20Sopenharmony_ci * Helper routines for the receive context reference count (rcd and uctxt).
2028c2ecf20Sopenharmony_ci */
2038c2ecf20Sopenharmony_cistatic void hfi1_rcd_init(struct hfi1_ctxtdata *rcd)
2048c2ecf20Sopenharmony_ci{
2058c2ecf20Sopenharmony_ci	kref_init(&rcd->kref);
2068c2ecf20Sopenharmony_ci}
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci/**
2098c2ecf20Sopenharmony_ci * hfi1_rcd_free - When reference is zero clean up.
2108c2ecf20Sopenharmony_ci * @kref: pointer to an initialized rcd data structure
2118c2ecf20Sopenharmony_ci *
2128c2ecf20Sopenharmony_ci */
2138c2ecf20Sopenharmony_cistatic void hfi1_rcd_free(struct kref *kref)
2148c2ecf20Sopenharmony_ci{
2158c2ecf20Sopenharmony_ci	unsigned long flags;
2168c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd =
2178c2ecf20Sopenharmony_ci		container_of(kref, struct hfi1_ctxtdata, kref);
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci	spin_lock_irqsave(&rcd->dd->uctxt_lock, flags);
2208c2ecf20Sopenharmony_ci	rcd->dd->rcd[rcd->ctxt] = NULL;
2218c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags);
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_ci	hfi1_free_ctxtdata(rcd->dd, rcd);
2248c2ecf20Sopenharmony_ci
2258c2ecf20Sopenharmony_ci	kfree(rcd);
2268c2ecf20Sopenharmony_ci}
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci/**
2298c2ecf20Sopenharmony_ci * hfi1_rcd_put - decrement reference for rcd
2308c2ecf20Sopenharmony_ci * @rcd: pointer to an initialized rcd data structure
2318c2ecf20Sopenharmony_ci *
2328c2ecf20Sopenharmony_ci * Use this to put a reference after the init.
2338c2ecf20Sopenharmony_ci */
2348c2ecf20Sopenharmony_ciint hfi1_rcd_put(struct hfi1_ctxtdata *rcd)
2358c2ecf20Sopenharmony_ci{
2368c2ecf20Sopenharmony_ci	if (rcd)
2378c2ecf20Sopenharmony_ci		return kref_put(&rcd->kref, hfi1_rcd_free);
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci	return 0;
2408c2ecf20Sopenharmony_ci}
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci/**
2438c2ecf20Sopenharmony_ci * hfi1_rcd_get - increment reference for rcd
2448c2ecf20Sopenharmony_ci * @rcd: pointer to an initialized rcd data structure
2458c2ecf20Sopenharmony_ci *
2468c2ecf20Sopenharmony_ci * Use this to get a reference after the init.
2478c2ecf20Sopenharmony_ci *
2488c2ecf20Sopenharmony_ci * Return : reflect kref_get_unless_zero(), which returns non-zero on
2498c2ecf20Sopenharmony_ci * increment, otherwise 0.
2508c2ecf20Sopenharmony_ci */
2518c2ecf20Sopenharmony_ciint hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
2528c2ecf20Sopenharmony_ci{
2538c2ecf20Sopenharmony_ci	return kref_get_unless_zero(&rcd->kref);
2548c2ecf20Sopenharmony_ci}
2558c2ecf20Sopenharmony_ci
2568c2ecf20Sopenharmony_ci/**
2578c2ecf20Sopenharmony_ci * allocate_rcd_index - allocate an rcd index from the rcd array
2588c2ecf20Sopenharmony_ci * @dd: pointer to a valid devdata structure
2598c2ecf20Sopenharmony_ci * @rcd: rcd data structure to assign
2608c2ecf20Sopenharmony_ci * @index: pointer to index that is allocated
2618c2ecf20Sopenharmony_ci *
2628c2ecf20Sopenharmony_ci * Find an empty index in the rcd array, and assign the given rcd to it.
2638c2ecf20Sopenharmony_ci * If the array is full, we are EBUSY.
2648c2ecf20Sopenharmony_ci *
2658c2ecf20Sopenharmony_ci */
2668c2ecf20Sopenharmony_cistatic int allocate_rcd_index(struct hfi1_devdata *dd,
2678c2ecf20Sopenharmony_ci			      struct hfi1_ctxtdata *rcd, u16 *index)
2688c2ecf20Sopenharmony_ci{
2698c2ecf20Sopenharmony_ci	unsigned long flags;
2708c2ecf20Sopenharmony_ci	u16 ctxt;
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ci	spin_lock_irqsave(&dd->uctxt_lock, flags);
2738c2ecf20Sopenharmony_ci	for (ctxt = 0; ctxt < dd->num_rcv_contexts; ctxt++)
2748c2ecf20Sopenharmony_ci		if (!dd->rcd[ctxt])
2758c2ecf20Sopenharmony_ci			break;
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_ci	if (ctxt < dd->num_rcv_contexts) {
2788c2ecf20Sopenharmony_ci		rcd->ctxt = ctxt;
2798c2ecf20Sopenharmony_ci		dd->rcd[ctxt] = rcd;
2808c2ecf20Sopenharmony_ci		hfi1_rcd_init(rcd);
2818c2ecf20Sopenharmony_ci	}
2828c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&dd->uctxt_lock, flags);
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci	if (ctxt >= dd->num_rcv_contexts)
2858c2ecf20Sopenharmony_ci		return -EBUSY;
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_ci	*index = ctxt;
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_ci	return 0;
2908c2ecf20Sopenharmony_ci}
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_ci/**
2938c2ecf20Sopenharmony_ci * hfi1_rcd_get_by_index_safe - validate the ctxt index before accessing the
2948c2ecf20Sopenharmony_ci * array
2958c2ecf20Sopenharmony_ci * @dd: pointer to a valid devdata structure
2968c2ecf20Sopenharmony_ci * @ctxt: the index of an possilbe rcd
2978c2ecf20Sopenharmony_ci *
2988c2ecf20Sopenharmony_ci * This is a wrapper for hfi1_rcd_get_by_index() to validate that the given
2998c2ecf20Sopenharmony_ci * ctxt index is valid.
3008c2ecf20Sopenharmony_ci *
3018c2ecf20Sopenharmony_ci * The caller is responsible for making the _put().
3028c2ecf20Sopenharmony_ci *
3038c2ecf20Sopenharmony_ci */
3048c2ecf20Sopenharmony_cistruct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd,
3058c2ecf20Sopenharmony_ci						 u16 ctxt)
3068c2ecf20Sopenharmony_ci{
3078c2ecf20Sopenharmony_ci	if (ctxt < dd->num_rcv_contexts)
3088c2ecf20Sopenharmony_ci		return hfi1_rcd_get_by_index(dd, ctxt);
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_ci	return NULL;
3118c2ecf20Sopenharmony_ci}
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci/**
3148c2ecf20Sopenharmony_ci * hfi1_rcd_get_by_index
3158c2ecf20Sopenharmony_ci * @dd: pointer to a valid devdata structure
3168c2ecf20Sopenharmony_ci * @ctxt: the index of an possilbe rcd
3178c2ecf20Sopenharmony_ci *
3188c2ecf20Sopenharmony_ci * We need to protect access to the rcd array.  If access is needed to
3198c2ecf20Sopenharmony_ci * one or more index, get the protecting spinlock and then increment the
3208c2ecf20Sopenharmony_ci * kref.
3218c2ecf20Sopenharmony_ci *
3228c2ecf20Sopenharmony_ci * The caller is responsible for making the _put().
3238c2ecf20Sopenharmony_ci *
3248c2ecf20Sopenharmony_ci */
3258c2ecf20Sopenharmony_cistruct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt)
3268c2ecf20Sopenharmony_ci{
3278c2ecf20Sopenharmony_ci	unsigned long flags;
3288c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd = NULL;
3298c2ecf20Sopenharmony_ci
3308c2ecf20Sopenharmony_ci	spin_lock_irqsave(&dd->uctxt_lock, flags);
3318c2ecf20Sopenharmony_ci	if (dd->rcd[ctxt]) {
3328c2ecf20Sopenharmony_ci		rcd = dd->rcd[ctxt];
3338c2ecf20Sopenharmony_ci		if (!hfi1_rcd_get(rcd))
3348c2ecf20Sopenharmony_ci			rcd = NULL;
3358c2ecf20Sopenharmony_ci	}
3368c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&dd->uctxt_lock, flags);
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ci	return rcd;
3398c2ecf20Sopenharmony_ci}
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci/*
3428c2ecf20Sopenharmony_ci * Common code for user and kernel context create and setup.
3438c2ecf20Sopenharmony_ci * NOTE: the initial kref is done here (hf1_rcd_init()).
3448c2ecf20Sopenharmony_ci */
3458c2ecf20Sopenharmony_ciint hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
3468c2ecf20Sopenharmony_ci			 struct hfi1_ctxtdata **context)
3478c2ecf20Sopenharmony_ci{
3488c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = ppd->dd;
3498c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd;
3508c2ecf20Sopenharmony_ci	unsigned kctxt_ngroups = 0;
3518c2ecf20Sopenharmony_ci	u32 base;
3528c2ecf20Sopenharmony_ci
3538c2ecf20Sopenharmony_ci	if (dd->rcv_entries.nctxt_extra >
3548c2ecf20Sopenharmony_ci	    dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt)
3558c2ecf20Sopenharmony_ci		kctxt_ngroups = (dd->rcv_entries.nctxt_extra -
3568c2ecf20Sopenharmony_ci			 (dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt));
3578c2ecf20Sopenharmony_ci	rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa);
3588c2ecf20Sopenharmony_ci	if (rcd) {
3598c2ecf20Sopenharmony_ci		u32 rcvtids, max_entries;
3608c2ecf20Sopenharmony_ci		u16 ctxt;
3618c2ecf20Sopenharmony_ci		int ret;
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_ci		ret = allocate_rcd_index(dd, rcd, &ctxt);
3648c2ecf20Sopenharmony_ci		if (ret) {
3658c2ecf20Sopenharmony_ci			*context = NULL;
3668c2ecf20Sopenharmony_ci			kfree(rcd);
3678c2ecf20Sopenharmony_ci			return ret;
3688c2ecf20Sopenharmony_ci		}
3698c2ecf20Sopenharmony_ci
3708c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&rcd->qp_wait_list);
3718c2ecf20Sopenharmony_ci		hfi1_exp_tid_group_init(rcd);
3728c2ecf20Sopenharmony_ci		rcd->ppd = ppd;
3738c2ecf20Sopenharmony_ci		rcd->dd = dd;
3748c2ecf20Sopenharmony_ci		rcd->numa_id = numa;
3758c2ecf20Sopenharmony_ci		rcd->rcv_array_groups = dd->rcv_entries.ngroups;
3768c2ecf20Sopenharmony_ci		rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
3778c2ecf20Sopenharmony_ci		rcd->slow_handler = handle_receive_interrupt;
3788c2ecf20Sopenharmony_ci		rcd->do_interrupt = rcd->slow_handler;
3798c2ecf20Sopenharmony_ci		rcd->msix_intr = CCE_NUM_MSIX_VECTORS;
3808c2ecf20Sopenharmony_ci
3818c2ecf20Sopenharmony_ci		mutex_init(&rcd->exp_mutex);
3828c2ecf20Sopenharmony_ci		spin_lock_init(&rcd->exp_lock);
3838c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&rcd->flow_queue.queue_head);
3848c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&rcd->rarr_queue.queue_head);
3858c2ecf20Sopenharmony_ci
3868c2ecf20Sopenharmony_ci		hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt);
3878c2ecf20Sopenharmony_ci
3888c2ecf20Sopenharmony_ci		/*
3898c2ecf20Sopenharmony_ci		 * Calculate the context's RcvArray entry starting point.
3908c2ecf20Sopenharmony_ci		 * We do this here because we have to take into account all
3918c2ecf20Sopenharmony_ci		 * the RcvArray entries that previous context would have
3928c2ecf20Sopenharmony_ci		 * taken and we have to account for any extra groups assigned
3938c2ecf20Sopenharmony_ci		 * to the static (kernel) or dynamic (vnic/user) contexts.
3948c2ecf20Sopenharmony_ci		 */
3958c2ecf20Sopenharmony_ci		if (ctxt < dd->first_dyn_alloc_ctxt) {
3968c2ecf20Sopenharmony_ci			if (ctxt < kctxt_ngroups) {
3978c2ecf20Sopenharmony_ci				base = ctxt * (dd->rcv_entries.ngroups + 1);
3988c2ecf20Sopenharmony_ci				rcd->rcv_array_groups++;
3998c2ecf20Sopenharmony_ci			} else {
4008c2ecf20Sopenharmony_ci				base = kctxt_ngroups +
4018c2ecf20Sopenharmony_ci					(ctxt * dd->rcv_entries.ngroups);
4028c2ecf20Sopenharmony_ci			}
4038c2ecf20Sopenharmony_ci		} else {
4048c2ecf20Sopenharmony_ci			u16 ct = ctxt - dd->first_dyn_alloc_ctxt;
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_ci			base = ((dd->n_krcv_queues * dd->rcv_entries.ngroups) +
4078c2ecf20Sopenharmony_ci				kctxt_ngroups);
4088c2ecf20Sopenharmony_ci			if (ct < dd->rcv_entries.nctxt_extra) {
4098c2ecf20Sopenharmony_ci				base += ct * (dd->rcv_entries.ngroups + 1);
4108c2ecf20Sopenharmony_ci				rcd->rcv_array_groups++;
4118c2ecf20Sopenharmony_ci			} else {
4128c2ecf20Sopenharmony_ci				base += dd->rcv_entries.nctxt_extra +
4138c2ecf20Sopenharmony_ci					(ct * dd->rcv_entries.ngroups);
4148c2ecf20Sopenharmony_ci			}
4158c2ecf20Sopenharmony_ci		}
4168c2ecf20Sopenharmony_ci		rcd->eager_base = base * dd->rcv_entries.group_size;
4178c2ecf20Sopenharmony_ci
4188c2ecf20Sopenharmony_ci		rcd->rcvhdrq_cnt = rcvhdrcnt;
4198c2ecf20Sopenharmony_ci		rcd->rcvhdrqentsize = hfi1_hdrq_entsize;
4208c2ecf20Sopenharmony_ci		rcd->rhf_offset =
4218c2ecf20Sopenharmony_ci			rcd->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
4228c2ecf20Sopenharmony_ci		/*
4238c2ecf20Sopenharmony_ci		 * Simple Eager buffer allocation: we have already pre-allocated
4248c2ecf20Sopenharmony_ci		 * the number of RcvArray entry groups. Each ctxtdata structure
4258c2ecf20Sopenharmony_ci		 * holds the number of groups for that context.
4268c2ecf20Sopenharmony_ci		 *
4278c2ecf20Sopenharmony_ci		 * To follow CSR requirements and maintain cacheline alignment,
4288c2ecf20Sopenharmony_ci		 * make sure all sizes and bases are multiples of group_size.
4298c2ecf20Sopenharmony_ci		 *
4308c2ecf20Sopenharmony_ci		 * The expected entry count is what is left after assigning
4318c2ecf20Sopenharmony_ci		 * eager.
4328c2ecf20Sopenharmony_ci		 */
4338c2ecf20Sopenharmony_ci		max_entries = rcd->rcv_array_groups *
4348c2ecf20Sopenharmony_ci			dd->rcv_entries.group_size;
4358c2ecf20Sopenharmony_ci		rcvtids = ((max_entries * hfi1_rcvarr_split) / 100);
4368c2ecf20Sopenharmony_ci		rcd->egrbufs.count = round_down(rcvtids,
4378c2ecf20Sopenharmony_ci						dd->rcv_entries.group_size);
4388c2ecf20Sopenharmony_ci		if (rcd->egrbufs.count > MAX_EAGER_ENTRIES) {
4398c2ecf20Sopenharmony_ci			dd_dev_err(dd, "ctxt%u: requested too many RcvArray entries.\n",
4408c2ecf20Sopenharmony_ci				   rcd->ctxt);
4418c2ecf20Sopenharmony_ci			rcd->egrbufs.count = MAX_EAGER_ENTRIES;
4428c2ecf20Sopenharmony_ci		}
4438c2ecf20Sopenharmony_ci		hfi1_cdbg(PROC,
4448c2ecf20Sopenharmony_ci			  "ctxt%u: max Eager buffer RcvArray entries: %u\n",
4458c2ecf20Sopenharmony_ci			  rcd->ctxt, rcd->egrbufs.count);
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_ci		/*
4488c2ecf20Sopenharmony_ci		 * Allocate array that will hold the eager buffer accounting
4498c2ecf20Sopenharmony_ci		 * data.
4508c2ecf20Sopenharmony_ci		 * This will allocate the maximum possible buffer count based
4518c2ecf20Sopenharmony_ci		 * on the value of the RcvArray split parameter.
4528c2ecf20Sopenharmony_ci		 * The resulting value will be rounded down to the closest
4538c2ecf20Sopenharmony_ci		 * multiple of dd->rcv_entries.group_size.
4548c2ecf20Sopenharmony_ci		 */
4558c2ecf20Sopenharmony_ci		rcd->egrbufs.buffers =
4568c2ecf20Sopenharmony_ci			kcalloc_node(rcd->egrbufs.count,
4578c2ecf20Sopenharmony_ci				     sizeof(*rcd->egrbufs.buffers),
4588c2ecf20Sopenharmony_ci				     GFP_KERNEL, numa);
4598c2ecf20Sopenharmony_ci		if (!rcd->egrbufs.buffers)
4608c2ecf20Sopenharmony_ci			goto bail;
4618c2ecf20Sopenharmony_ci		rcd->egrbufs.rcvtids =
4628c2ecf20Sopenharmony_ci			kcalloc_node(rcd->egrbufs.count,
4638c2ecf20Sopenharmony_ci				     sizeof(*rcd->egrbufs.rcvtids),
4648c2ecf20Sopenharmony_ci				     GFP_KERNEL, numa);
4658c2ecf20Sopenharmony_ci		if (!rcd->egrbufs.rcvtids)
4668c2ecf20Sopenharmony_ci			goto bail;
4678c2ecf20Sopenharmony_ci		rcd->egrbufs.size = eager_buffer_size;
4688c2ecf20Sopenharmony_ci		/*
4698c2ecf20Sopenharmony_ci		 * The size of the buffers programmed into the RcvArray
4708c2ecf20Sopenharmony_ci		 * entries needs to be big enough to handle the highest
4718c2ecf20Sopenharmony_ci		 * MTU supported.
4728c2ecf20Sopenharmony_ci		 */
4738c2ecf20Sopenharmony_ci		if (rcd->egrbufs.size < hfi1_max_mtu) {
4748c2ecf20Sopenharmony_ci			rcd->egrbufs.size = __roundup_pow_of_two(hfi1_max_mtu);
4758c2ecf20Sopenharmony_ci			hfi1_cdbg(PROC,
4768c2ecf20Sopenharmony_ci				  "ctxt%u: eager bufs size too small. Adjusting to %u\n",
4778c2ecf20Sopenharmony_ci				    rcd->ctxt, rcd->egrbufs.size);
4788c2ecf20Sopenharmony_ci		}
4798c2ecf20Sopenharmony_ci		rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE;
4808c2ecf20Sopenharmony_ci
4818c2ecf20Sopenharmony_ci		/* Applicable only for statically created kernel contexts */
4828c2ecf20Sopenharmony_ci		if (ctxt < dd->first_dyn_alloc_ctxt) {
4838c2ecf20Sopenharmony_ci			rcd->opstats = kzalloc_node(sizeof(*rcd->opstats),
4848c2ecf20Sopenharmony_ci						    GFP_KERNEL, numa);
4858c2ecf20Sopenharmony_ci			if (!rcd->opstats)
4868c2ecf20Sopenharmony_ci				goto bail;
4878c2ecf20Sopenharmony_ci
4888c2ecf20Sopenharmony_ci			/* Initialize TID flow generations for the context */
4898c2ecf20Sopenharmony_ci			hfi1_kern_init_ctxt_generations(rcd);
4908c2ecf20Sopenharmony_ci		}
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_ci		*context = rcd;
4938c2ecf20Sopenharmony_ci		return 0;
4948c2ecf20Sopenharmony_ci	}
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_cibail:
4978c2ecf20Sopenharmony_ci	*context = NULL;
4988c2ecf20Sopenharmony_ci	hfi1_free_ctxt(rcd);
4998c2ecf20Sopenharmony_ci	return -ENOMEM;
5008c2ecf20Sopenharmony_ci}
5018c2ecf20Sopenharmony_ci
5028c2ecf20Sopenharmony_ci/**
5038c2ecf20Sopenharmony_ci * hfi1_free_ctxt
5048c2ecf20Sopenharmony_ci * @rcd: pointer to an initialized rcd data structure
5058c2ecf20Sopenharmony_ci *
5068c2ecf20Sopenharmony_ci * This wrapper is the free function that matches hfi1_create_ctxtdata().
5078c2ecf20Sopenharmony_ci * When a context is done being used (kernel or user), this function is called
5088c2ecf20Sopenharmony_ci * for the "final" put to match the kref init from hf1i_create_ctxtdata().
5098c2ecf20Sopenharmony_ci * Other users of the context do a get/put sequence to make sure that the
5108c2ecf20Sopenharmony_ci * structure isn't removed while in use.
5118c2ecf20Sopenharmony_ci */
5128c2ecf20Sopenharmony_civoid hfi1_free_ctxt(struct hfi1_ctxtdata *rcd)
5138c2ecf20Sopenharmony_ci{
5148c2ecf20Sopenharmony_ci	hfi1_rcd_put(rcd);
5158c2ecf20Sopenharmony_ci}
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_ci/*
5188c2ecf20Sopenharmony_ci * Select the largest ccti value over all SLs to determine the intra-
5198c2ecf20Sopenharmony_ci * packet gap for the link.
5208c2ecf20Sopenharmony_ci *
5218c2ecf20Sopenharmony_ci * called with cca_timer_lock held (to protect access to cca_timer
5228c2ecf20Sopenharmony_ci * array), and rcu_read_lock() (to protect access to cc_state).
5238c2ecf20Sopenharmony_ci */
5248c2ecf20Sopenharmony_civoid set_link_ipg(struct hfi1_pportdata *ppd)
5258c2ecf20Sopenharmony_ci{
5268c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = ppd->dd;
5278c2ecf20Sopenharmony_ci	struct cc_state *cc_state;
5288c2ecf20Sopenharmony_ci	int i;
5298c2ecf20Sopenharmony_ci	u16 cce, ccti_limit, max_ccti = 0;
5308c2ecf20Sopenharmony_ci	u16 shift, mult;
5318c2ecf20Sopenharmony_ci	u64 src;
5328c2ecf20Sopenharmony_ci	u32 current_egress_rate; /* Mbits /sec */
5338c2ecf20Sopenharmony_ci	u64 max_pkt_time;
5348c2ecf20Sopenharmony_ci	/*
5358c2ecf20Sopenharmony_ci	 * max_pkt_time is the maximum packet egress time in units
5368c2ecf20Sopenharmony_ci	 * of the fabric clock period 1/(805 MHz).
5378c2ecf20Sopenharmony_ci	 */
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci	cc_state = get_cc_state(ppd);
5408c2ecf20Sopenharmony_ci
5418c2ecf20Sopenharmony_ci	if (!cc_state)
5428c2ecf20Sopenharmony_ci		/*
5438c2ecf20Sopenharmony_ci		 * This should _never_ happen - rcu_read_lock() is held,
5448c2ecf20Sopenharmony_ci		 * and set_link_ipg() should not be called if cc_state
5458c2ecf20Sopenharmony_ci		 * is NULL.
5468c2ecf20Sopenharmony_ci		 */
5478c2ecf20Sopenharmony_ci		return;
5488c2ecf20Sopenharmony_ci
5498c2ecf20Sopenharmony_ci	for (i = 0; i < OPA_MAX_SLS; i++) {
5508c2ecf20Sopenharmony_ci		u16 ccti = ppd->cca_timer[i].ccti;
5518c2ecf20Sopenharmony_ci
5528c2ecf20Sopenharmony_ci		if (ccti > max_ccti)
5538c2ecf20Sopenharmony_ci			max_ccti = ccti;
5548c2ecf20Sopenharmony_ci	}
5558c2ecf20Sopenharmony_ci
5568c2ecf20Sopenharmony_ci	ccti_limit = cc_state->cct.ccti_limit;
5578c2ecf20Sopenharmony_ci	if (max_ccti > ccti_limit)
5588c2ecf20Sopenharmony_ci		max_ccti = ccti_limit;
5598c2ecf20Sopenharmony_ci
5608c2ecf20Sopenharmony_ci	cce = cc_state->cct.entries[max_ccti].entry;
5618c2ecf20Sopenharmony_ci	shift = (cce & 0xc000) >> 14;
5628c2ecf20Sopenharmony_ci	mult = (cce & 0x3fff);
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_ci	current_egress_rate = active_egress_rate(ppd);
5658c2ecf20Sopenharmony_ci
5668c2ecf20Sopenharmony_ci	max_pkt_time = egress_cycles(ppd->ibmaxlen, current_egress_rate);
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_ci	src = (max_pkt_time >> shift) * mult;
5698c2ecf20Sopenharmony_ci
5708c2ecf20Sopenharmony_ci	src &= SEND_STATIC_RATE_CONTROL_CSR_SRC_RELOAD_SMASK;
5718c2ecf20Sopenharmony_ci	src <<= SEND_STATIC_RATE_CONTROL_CSR_SRC_RELOAD_SHIFT;
5728c2ecf20Sopenharmony_ci
5738c2ecf20Sopenharmony_ci	write_csr(dd, SEND_STATIC_RATE_CONTROL, src);
5748c2ecf20Sopenharmony_ci}
5758c2ecf20Sopenharmony_ci
5768c2ecf20Sopenharmony_cistatic enum hrtimer_restart cca_timer_fn(struct hrtimer *t)
5778c2ecf20Sopenharmony_ci{
5788c2ecf20Sopenharmony_ci	struct cca_timer *cca_timer;
5798c2ecf20Sopenharmony_ci	struct hfi1_pportdata *ppd;
5808c2ecf20Sopenharmony_ci	int sl;
5818c2ecf20Sopenharmony_ci	u16 ccti_timer, ccti_min;
5828c2ecf20Sopenharmony_ci	struct cc_state *cc_state;
5838c2ecf20Sopenharmony_ci	unsigned long flags;
5848c2ecf20Sopenharmony_ci	enum hrtimer_restart ret = HRTIMER_NORESTART;
5858c2ecf20Sopenharmony_ci
5868c2ecf20Sopenharmony_ci	cca_timer = container_of(t, struct cca_timer, hrtimer);
5878c2ecf20Sopenharmony_ci	ppd = cca_timer->ppd;
5888c2ecf20Sopenharmony_ci	sl = cca_timer->sl;
5898c2ecf20Sopenharmony_ci
5908c2ecf20Sopenharmony_ci	rcu_read_lock();
5918c2ecf20Sopenharmony_ci
5928c2ecf20Sopenharmony_ci	cc_state = get_cc_state(ppd);
5938c2ecf20Sopenharmony_ci
5948c2ecf20Sopenharmony_ci	if (!cc_state) {
5958c2ecf20Sopenharmony_ci		rcu_read_unlock();
5968c2ecf20Sopenharmony_ci		return HRTIMER_NORESTART;
5978c2ecf20Sopenharmony_ci	}
5988c2ecf20Sopenharmony_ci
5998c2ecf20Sopenharmony_ci	/*
6008c2ecf20Sopenharmony_ci	 * 1) decrement ccti for SL
6018c2ecf20Sopenharmony_ci	 * 2) calculate IPG for link (set_link_ipg())
6028c2ecf20Sopenharmony_ci	 * 3) restart timer, unless ccti is at min value
6038c2ecf20Sopenharmony_ci	 */
6048c2ecf20Sopenharmony_ci
6058c2ecf20Sopenharmony_ci	ccti_min = cc_state->cong_setting.entries[sl].ccti_min;
6068c2ecf20Sopenharmony_ci	ccti_timer = cc_state->cong_setting.entries[sl].ccti_timer;
6078c2ecf20Sopenharmony_ci
6088c2ecf20Sopenharmony_ci	spin_lock_irqsave(&ppd->cca_timer_lock, flags);
6098c2ecf20Sopenharmony_ci
6108c2ecf20Sopenharmony_ci	if (cca_timer->ccti > ccti_min) {
6118c2ecf20Sopenharmony_ci		cca_timer->ccti--;
6128c2ecf20Sopenharmony_ci		set_link_ipg(ppd);
6138c2ecf20Sopenharmony_ci	}
6148c2ecf20Sopenharmony_ci
6158c2ecf20Sopenharmony_ci	if (cca_timer->ccti > ccti_min) {
6168c2ecf20Sopenharmony_ci		unsigned long nsec = 1024 * ccti_timer;
6178c2ecf20Sopenharmony_ci		/* ccti_timer is in units of 1.024 usec */
6188c2ecf20Sopenharmony_ci		hrtimer_forward_now(t, ns_to_ktime(nsec));
6198c2ecf20Sopenharmony_ci		ret = HRTIMER_RESTART;
6208c2ecf20Sopenharmony_ci	}
6218c2ecf20Sopenharmony_ci
6228c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
6238c2ecf20Sopenharmony_ci	rcu_read_unlock();
6248c2ecf20Sopenharmony_ci	return ret;
6258c2ecf20Sopenharmony_ci}
6268c2ecf20Sopenharmony_ci
6278c2ecf20Sopenharmony_ci/*
6288c2ecf20Sopenharmony_ci * Common code for initializing the physical port structure.
6298c2ecf20Sopenharmony_ci */
6308c2ecf20Sopenharmony_civoid hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
6318c2ecf20Sopenharmony_ci			 struct hfi1_devdata *dd, u8 hw_pidx, u8 port)
6328c2ecf20Sopenharmony_ci{
6338c2ecf20Sopenharmony_ci	int i;
6348c2ecf20Sopenharmony_ci	uint default_pkey_idx;
6358c2ecf20Sopenharmony_ci	struct cc_state *cc_state;
6368c2ecf20Sopenharmony_ci
6378c2ecf20Sopenharmony_ci	ppd->dd = dd;
6388c2ecf20Sopenharmony_ci	ppd->hw_pidx = hw_pidx;
6398c2ecf20Sopenharmony_ci	ppd->port = port; /* IB port number, not index */
6408c2ecf20Sopenharmony_ci	ppd->prev_link_width = LINK_WIDTH_DEFAULT;
6418c2ecf20Sopenharmony_ci	/*
6428c2ecf20Sopenharmony_ci	 * There are C_VL_COUNT number of PortVLXmitWait counters.
6438c2ecf20Sopenharmony_ci	 * Adding 1 to C_VL_COUNT to include the PortXmitWait counter.
6448c2ecf20Sopenharmony_ci	 */
6458c2ecf20Sopenharmony_ci	for (i = 0; i < C_VL_COUNT + 1; i++) {
6468c2ecf20Sopenharmony_ci		ppd->port_vl_xmit_wait_last[i] = 0;
6478c2ecf20Sopenharmony_ci		ppd->vl_xmit_flit_cnt[i] = 0;
6488c2ecf20Sopenharmony_ci	}
6498c2ecf20Sopenharmony_ci
6508c2ecf20Sopenharmony_ci	default_pkey_idx = 1;
6518c2ecf20Sopenharmony_ci
6528c2ecf20Sopenharmony_ci	ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY;
6538c2ecf20Sopenharmony_ci	ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
6548c2ecf20Sopenharmony_ci	ppd->pkeys[0] = 0x8001;
6558c2ecf20Sopenharmony_ci
6568c2ecf20Sopenharmony_ci	INIT_WORK(&ppd->link_vc_work, handle_verify_cap);
6578c2ecf20Sopenharmony_ci	INIT_WORK(&ppd->link_up_work, handle_link_up);
6588c2ecf20Sopenharmony_ci	INIT_WORK(&ppd->link_down_work, handle_link_down);
6598c2ecf20Sopenharmony_ci	INIT_WORK(&ppd->freeze_work, handle_freeze);
6608c2ecf20Sopenharmony_ci	INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
6618c2ecf20Sopenharmony_ci	INIT_WORK(&ppd->sma_message_work, handle_sma_message);
6628c2ecf20Sopenharmony_ci	INIT_WORK(&ppd->link_bounce_work, handle_link_bounce);
6638c2ecf20Sopenharmony_ci	INIT_DELAYED_WORK(&ppd->start_link_work, handle_start_link);
6648c2ecf20Sopenharmony_ci	INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work);
6658c2ecf20Sopenharmony_ci	INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
6668c2ecf20Sopenharmony_ci
6678c2ecf20Sopenharmony_ci	mutex_init(&ppd->hls_lock);
6688c2ecf20Sopenharmony_ci	spin_lock_init(&ppd->qsfp_info.qsfp_lock);
6698c2ecf20Sopenharmony_ci
6708c2ecf20Sopenharmony_ci	ppd->qsfp_info.ppd = ppd;
6718c2ecf20Sopenharmony_ci	ppd->sm_trap_qp = 0x0;
6728c2ecf20Sopenharmony_ci	ppd->sa_qp = 0x1;
6738c2ecf20Sopenharmony_ci
6748c2ecf20Sopenharmony_ci	ppd->hfi1_wq = NULL;
6758c2ecf20Sopenharmony_ci
6768c2ecf20Sopenharmony_ci	spin_lock_init(&ppd->cca_timer_lock);
6778c2ecf20Sopenharmony_ci
6788c2ecf20Sopenharmony_ci	for (i = 0; i < OPA_MAX_SLS; i++) {
6798c2ecf20Sopenharmony_ci		hrtimer_init(&ppd->cca_timer[i].hrtimer, CLOCK_MONOTONIC,
6808c2ecf20Sopenharmony_ci			     HRTIMER_MODE_REL);
6818c2ecf20Sopenharmony_ci		ppd->cca_timer[i].ppd = ppd;
6828c2ecf20Sopenharmony_ci		ppd->cca_timer[i].sl = i;
6838c2ecf20Sopenharmony_ci		ppd->cca_timer[i].ccti = 0;
6848c2ecf20Sopenharmony_ci		ppd->cca_timer[i].hrtimer.function = cca_timer_fn;
6858c2ecf20Sopenharmony_ci	}
6868c2ecf20Sopenharmony_ci
6878c2ecf20Sopenharmony_ci	ppd->cc_max_table_entries = IB_CC_TABLE_CAP_DEFAULT;
6888c2ecf20Sopenharmony_ci
6898c2ecf20Sopenharmony_ci	spin_lock_init(&ppd->cc_state_lock);
6908c2ecf20Sopenharmony_ci	spin_lock_init(&ppd->cc_log_lock);
6918c2ecf20Sopenharmony_ci	cc_state = kzalloc(sizeof(*cc_state), GFP_KERNEL);
6928c2ecf20Sopenharmony_ci	RCU_INIT_POINTER(ppd->cc_state, cc_state);
6938c2ecf20Sopenharmony_ci	if (!cc_state)
6948c2ecf20Sopenharmony_ci		goto bail;
6958c2ecf20Sopenharmony_ci	return;
6968c2ecf20Sopenharmony_ci
6978c2ecf20Sopenharmony_cibail:
6988c2ecf20Sopenharmony_ci	dd_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port);
6998c2ecf20Sopenharmony_ci}
7008c2ecf20Sopenharmony_ci
7018c2ecf20Sopenharmony_ci/*
7028c2ecf20Sopenharmony_ci * Do initialization for device that is only needed on
7038c2ecf20Sopenharmony_ci * first detect, not on resets.
7048c2ecf20Sopenharmony_ci */
7058c2ecf20Sopenharmony_cistatic int loadtime_init(struct hfi1_devdata *dd)
7068c2ecf20Sopenharmony_ci{
7078c2ecf20Sopenharmony_ci	return 0;
7088c2ecf20Sopenharmony_ci}
7098c2ecf20Sopenharmony_ci
7108c2ecf20Sopenharmony_ci/**
7118c2ecf20Sopenharmony_ci * init_after_reset - re-initialize after a reset
7128c2ecf20Sopenharmony_ci * @dd: the hfi1_ib device
7138c2ecf20Sopenharmony_ci *
7148c2ecf20Sopenharmony_ci * sanity check at least some of the values after reset, and
7158c2ecf20Sopenharmony_ci * ensure no receive or transmit (explicitly, in case reset
7168c2ecf20Sopenharmony_ci * failed
7178c2ecf20Sopenharmony_ci */
7188c2ecf20Sopenharmony_cistatic int init_after_reset(struct hfi1_devdata *dd)
7198c2ecf20Sopenharmony_ci{
7208c2ecf20Sopenharmony_ci	int i;
7218c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd;
7228c2ecf20Sopenharmony_ci	/*
7238c2ecf20Sopenharmony_ci	 * Ensure chip does no sends or receives, tail updates, or
7248c2ecf20Sopenharmony_ci	 * pioavail updates while we re-initialize.  This is mostly
7258c2ecf20Sopenharmony_ci	 * for the driver data structures, not chip registers.
7268c2ecf20Sopenharmony_ci	 */
7278c2ecf20Sopenharmony_ci	for (i = 0; i < dd->num_rcv_contexts; i++) {
7288c2ecf20Sopenharmony_ci		rcd = hfi1_rcd_get_by_index(dd, i);
7298c2ecf20Sopenharmony_ci		hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
7308c2ecf20Sopenharmony_ci			     HFI1_RCVCTRL_INTRAVAIL_DIS |
7318c2ecf20Sopenharmony_ci			     HFI1_RCVCTRL_TAILUPD_DIS, rcd);
7328c2ecf20Sopenharmony_ci		hfi1_rcd_put(rcd);
7338c2ecf20Sopenharmony_ci	}
7348c2ecf20Sopenharmony_ci	pio_send_control(dd, PSC_GLOBAL_DISABLE);
7358c2ecf20Sopenharmony_ci	for (i = 0; i < dd->num_send_contexts; i++)
7368c2ecf20Sopenharmony_ci		sc_disable(dd->send_contexts[i].sc);
7378c2ecf20Sopenharmony_ci
7388c2ecf20Sopenharmony_ci	return 0;
7398c2ecf20Sopenharmony_ci}
7408c2ecf20Sopenharmony_ci
7418c2ecf20Sopenharmony_cistatic void enable_chip(struct hfi1_devdata *dd)
7428c2ecf20Sopenharmony_ci{
7438c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd;
7448c2ecf20Sopenharmony_ci	u32 rcvmask;
7458c2ecf20Sopenharmony_ci	u16 i;
7468c2ecf20Sopenharmony_ci
7478c2ecf20Sopenharmony_ci	/* enable PIO send */
7488c2ecf20Sopenharmony_ci	pio_send_control(dd, PSC_GLOBAL_ENABLE);
7498c2ecf20Sopenharmony_ci
7508c2ecf20Sopenharmony_ci	/*
7518c2ecf20Sopenharmony_ci	 * Enable kernel ctxts' receive and receive interrupt.
7528c2ecf20Sopenharmony_ci	 * Other ctxts done as user opens and initializes them.
7538c2ecf20Sopenharmony_ci	 */
7548c2ecf20Sopenharmony_ci	for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) {
7558c2ecf20Sopenharmony_ci		rcd = hfi1_rcd_get_by_index(dd, i);
7568c2ecf20Sopenharmony_ci		if (!rcd)
7578c2ecf20Sopenharmony_ci			continue;
7588c2ecf20Sopenharmony_ci		rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
7598c2ecf20Sopenharmony_ci		rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ?
7608c2ecf20Sopenharmony_ci			HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
7618c2ecf20Sopenharmony_ci		if (!HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR))
7628c2ecf20Sopenharmony_ci			rcvmask |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
7638c2ecf20Sopenharmony_ci		if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_RHQ_FULL))
7648c2ecf20Sopenharmony_ci			rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
7658c2ecf20Sopenharmony_ci		if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL))
7668c2ecf20Sopenharmony_ci			rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
7678c2ecf20Sopenharmony_ci		if (HFI1_CAP_IS_KSET(TID_RDMA))
7688c2ecf20Sopenharmony_ci			rcvmask |= HFI1_RCVCTRL_TIDFLOW_ENB;
7698c2ecf20Sopenharmony_ci		hfi1_rcvctrl(dd, rcvmask, rcd);
7708c2ecf20Sopenharmony_ci		sc_enable(rcd->sc);
7718c2ecf20Sopenharmony_ci		hfi1_rcd_put(rcd);
7728c2ecf20Sopenharmony_ci	}
7738c2ecf20Sopenharmony_ci}
7748c2ecf20Sopenharmony_ci
7758c2ecf20Sopenharmony_ci/**
7768c2ecf20Sopenharmony_ci * create_workqueues - create per port workqueues
7778c2ecf20Sopenharmony_ci * @dd: the hfi1_ib device
7788c2ecf20Sopenharmony_ci */
7798c2ecf20Sopenharmony_cistatic int create_workqueues(struct hfi1_devdata *dd)
7808c2ecf20Sopenharmony_ci{
7818c2ecf20Sopenharmony_ci	int pidx;
7828c2ecf20Sopenharmony_ci	struct hfi1_pportdata *ppd;
7838c2ecf20Sopenharmony_ci
7848c2ecf20Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
7858c2ecf20Sopenharmony_ci		ppd = dd->pport + pidx;
7868c2ecf20Sopenharmony_ci		if (!ppd->hfi1_wq) {
7878c2ecf20Sopenharmony_ci			ppd->hfi1_wq =
7888c2ecf20Sopenharmony_ci				alloc_workqueue(
7898c2ecf20Sopenharmony_ci				    "hfi%d_%d",
7908c2ecf20Sopenharmony_ci				    WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
7918c2ecf20Sopenharmony_ci				    WQ_MEM_RECLAIM,
7928c2ecf20Sopenharmony_ci				    HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES,
7938c2ecf20Sopenharmony_ci				    dd->unit, pidx);
7948c2ecf20Sopenharmony_ci			if (!ppd->hfi1_wq)
7958c2ecf20Sopenharmony_ci				goto wq_error;
7968c2ecf20Sopenharmony_ci		}
7978c2ecf20Sopenharmony_ci		if (!ppd->link_wq) {
7988c2ecf20Sopenharmony_ci			/*
7998c2ecf20Sopenharmony_ci			 * Make the link workqueue single-threaded to enforce
8008c2ecf20Sopenharmony_ci			 * serialization.
8018c2ecf20Sopenharmony_ci			 */
8028c2ecf20Sopenharmony_ci			ppd->link_wq =
8038c2ecf20Sopenharmony_ci				alloc_workqueue(
8048c2ecf20Sopenharmony_ci				    "hfi_link_%d_%d",
8058c2ecf20Sopenharmony_ci				    WQ_SYSFS | WQ_MEM_RECLAIM | WQ_UNBOUND,
8068c2ecf20Sopenharmony_ci				    1, /* max_active */
8078c2ecf20Sopenharmony_ci				    dd->unit, pidx);
8088c2ecf20Sopenharmony_ci			if (!ppd->link_wq)
8098c2ecf20Sopenharmony_ci				goto wq_error;
8108c2ecf20Sopenharmony_ci		}
8118c2ecf20Sopenharmony_ci	}
8128c2ecf20Sopenharmony_ci	return 0;
8138c2ecf20Sopenharmony_ciwq_error:
8148c2ecf20Sopenharmony_ci	pr_err("alloc_workqueue failed for port %d\n", pidx + 1);
8158c2ecf20Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
8168c2ecf20Sopenharmony_ci		ppd = dd->pport + pidx;
8178c2ecf20Sopenharmony_ci		if (ppd->hfi1_wq) {
8188c2ecf20Sopenharmony_ci			destroy_workqueue(ppd->hfi1_wq);
8198c2ecf20Sopenharmony_ci			ppd->hfi1_wq = NULL;
8208c2ecf20Sopenharmony_ci		}
8218c2ecf20Sopenharmony_ci		if (ppd->link_wq) {
8228c2ecf20Sopenharmony_ci			destroy_workqueue(ppd->link_wq);
8238c2ecf20Sopenharmony_ci			ppd->link_wq = NULL;
8248c2ecf20Sopenharmony_ci		}
8258c2ecf20Sopenharmony_ci	}
8268c2ecf20Sopenharmony_ci	return -ENOMEM;
8278c2ecf20Sopenharmony_ci}
8288c2ecf20Sopenharmony_ci
8298c2ecf20Sopenharmony_ci/**
8308c2ecf20Sopenharmony_ci * destroy_workqueues - destroy per port workqueues
8318c2ecf20Sopenharmony_ci * @dd: the hfi1_ib device
8328c2ecf20Sopenharmony_ci */
8338c2ecf20Sopenharmony_cistatic void destroy_workqueues(struct hfi1_devdata *dd)
8348c2ecf20Sopenharmony_ci{
8358c2ecf20Sopenharmony_ci	int pidx;
8368c2ecf20Sopenharmony_ci	struct hfi1_pportdata *ppd;
8378c2ecf20Sopenharmony_ci
8388c2ecf20Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
8398c2ecf20Sopenharmony_ci		ppd = dd->pport + pidx;
8408c2ecf20Sopenharmony_ci
8418c2ecf20Sopenharmony_ci		if (ppd->hfi1_wq) {
8428c2ecf20Sopenharmony_ci			destroy_workqueue(ppd->hfi1_wq);
8438c2ecf20Sopenharmony_ci			ppd->hfi1_wq = NULL;
8448c2ecf20Sopenharmony_ci		}
8458c2ecf20Sopenharmony_ci		if (ppd->link_wq) {
8468c2ecf20Sopenharmony_ci			destroy_workqueue(ppd->link_wq);
8478c2ecf20Sopenharmony_ci			ppd->link_wq = NULL;
8488c2ecf20Sopenharmony_ci		}
8498c2ecf20Sopenharmony_ci	}
8508c2ecf20Sopenharmony_ci}
8518c2ecf20Sopenharmony_ci
8528c2ecf20Sopenharmony_ci/**
8538c2ecf20Sopenharmony_ci * enable_general_intr() - Enable the IRQs that will be handled by the
8548c2ecf20Sopenharmony_ci * general interrupt handler.
8558c2ecf20Sopenharmony_ci * @dd: valid devdata
8568c2ecf20Sopenharmony_ci *
8578c2ecf20Sopenharmony_ci */
8588c2ecf20Sopenharmony_cistatic void enable_general_intr(struct hfi1_devdata *dd)
8598c2ecf20Sopenharmony_ci{
8608c2ecf20Sopenharmony_ci	set_intr_bits(dd, CCE_ERR_INT, MISC_ERR_INT, true);
8618c2ecf20Sopenharmony_ci	set_intr_bits(dd, PIO_ERR_INT, TXE_ERR_INT, true);
8628c2ecf20Sopenharmony_ci	set_intr_bits(dd, IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, true);
8638c2ecf20Sopenharmony_ci	set_intr_bits(dd, PBC_INT, GPIO_ASSERT_INT, true);
8648c2ecf20Sopenharmony_ci	set_intr_bits(dd, TCRIT_INT, TCRIT_INT, true);
8658c2ecf20Sopenharmony_ci	set_intr_bits(dd, IS_DC_START, IS_DC_END, true);
8668c2ecf20Sopenharmony_ci	set_intr_bits(dd, IS_SENDCREDIT_START, IS_SENDCREDIT_END, true);
8678c2ecf20Sopenharmony_ci}
8688c2ecf20Sopenharmony_ci
8698c2ecf20Sopenharmony_ci/**
8708c2ecf20Sopenharmony_ci * hfi1_init - do the actual initialization sequence on the chip
8718c2ecf20Sopenharmony_ci * @dd: the hfi1_ib device
8728c2ecf20Sopenharmony_ci * @reinit: re-initializing, so don't allocate new memory
8738c2ecf20Sopenharmony_ci *
8748c2ecf20Sopenharmony_ci * Do the actual initialization sequence on the chip.  This is done
8758c2ecf20Sopenharmony_ci * both from the init routine called from the PCI infrastructure, and
8768c2ecf20Sopenharmony_ci * when we reset the chip, or detect that it was reset internally,
8778c2ecf20Sopenharmony_ci * or it's administratively re-enabled.
8788c2ecf20Sopenharmony_ci *
8798c2ecf20Sopenharmony_ci * Memory allocation here and in called routines is only done in
8808c2ecf20Sopenharmony_ci * the first case (reinit == 0).  We have to be careful, because even
8818c2ecf20Sopenharmony_ci * without memory allocation, we need to re-write all the chip registers
8828c2ecf20Sopenharmony_ci * TIDs, etc. after the reset or enable has completed.
8838c2ecf20Sopenharmony_ci */
8848c2ecf20Sopenharmony_ciint hfi1_init(struct hfi1_devdata *dd, int reinit)
8858c2ecf20Sopenharmony_ci{
8868c2ecf20Sopenharmony_ci	int ret = 0, pidx, lastfail = 0;
8878c2ecf20Sopenharmony_ci	unsigned long len;
8888c2ecf20Sopenharmony_ci	u16 i;
8898c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd;
8908c2ecf20Sopenharmony_ci	struct hfi1_pportdata *ppd;
8918c2ecf20Sopenharmony_ci
8928c2ecf20Sopenharmony_ci	/* Set up send low level handlers */
8938c2ecf20Sopenharmony_ci	dd->process_pio_send = hfi1_verbs_send_pio;
8948c2ecf20Sopenharmony_ci	dd->process_dma_send = hfi1_verbs_send_dma;
8958c2ecf20Sopenharmony_ci	dd->pio_inline_send = pio_copy;
8968c2ecf20Sopenharmony_ci	dd->process_vnic_dma_send = hfi1_vnic_send_dma;
8978c2ecf20Sopenharmony_ci
8988c2ecf20Sopenharmony_ci	if (is_ax(dd)) {
8998c2ecf20Sopenharmony_ci		atomic_set(&dd->drop_packet, DROP_PACKET_ON);
9008c2ecf20Sopenharmony_ci		dd->do_drop = true;
9018c2ecf20Sopenharmony_ci	} else {
9028c2ecf20Sopenharmony_ci		atomic_set(&dd->drop_packet, DROP_PACKET_OFF);
9038c2ecf20Sopenharmony_ci		dd->do_drop = false;
9048c2ecf20Sopenharmony_ci	}
9058c2ecf20Sopenharmony_ci
9068c2ecf20Sopenharmony_ci	/* make sure the link is not "up" */
9078c2ecf20Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
9088c2ecf20Sopenharmony_ci		ppd = dd->pport + pidx;
9098c2ecf20Sopenharmony_ci		ppd->linkup = 0;
9108c2ecf20Sopenharmony_ci	}
9118c2ecf20Sopenharmony_ci
9128c2ecf20Sopenharmony_ci	if (reinit)
9138c2ecf20Sopenharmony_ci		ret = init_after_reset(dd);
9148c2ecf20Sopenharmony_ci	else
9158c2ecf20Sopenharmony_ci		ret = loadtime_init(dd);
9168c2ecf20Sopenharmony_ci	if (ret)
9178c2ecf20Sopenharmony_ci		goto done;
9188c2ecf20Sopenharmony_ci
9198c2ecf20Sopenharmony_ci	/* dd->rcd can be NULL if early initialization failed */
9208c2ecf20Sopenharmony_ci	for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) {
9218c2ecf20Sopenharmony_ci		/*
9228c2ecf20Sopenharmony_ci		 * Set up the (kernel) rcvhdr queue and egr TIDs.  If doing
9238c2ecf20Sopenharmony_ci		 * re-init, the simplest way to handle this is to free
9248c2ecf20Sopenharmony_ci		 * existing, and re-allocate.
9258c2ecf20Sopenharmony_ci		 * Need to re-create rest of ctxt 0 ctxtdata as well.
9268c2ecf20Sopenharmony_ci		 */
9278c2ecf20Sopenharmony_ci		rcd = hfi1_rcd_get_by_index(dd, i);
9288c2ecf20Sopenharmony_ci		if (!rcd)
9298c2ecf20Sopenharmony_ci			continue;
9308c2ecf20Sopenharmony_ci
9318c2ecf20Sopenharmony_ci		lastfail = hfi1_create_rcvhdrq(dd, rcd);
9328c2ecf20Sopenharmony_ci		if (!lastfail)
9338c2ecf20Sopenharmony_ci			lastfail = hfi1_setup_eagerbufs(rcd);
9348c2ecf20Sopenharmony_ci		if (!lastfail)
9358c2ecf20Sopenharmony_ci			lastfail = hfi1_kern_exp_rcv_init(rcd, reinit);
9368c2ecf20Sopenharmony_ci		if (lastfail) {
9378c2ecf20Sopenharmony_ci			dd_dev_err(dd,
9388c2ecf20Sopenharmony_ci				   "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
9398c2ecf20Sopenharmony_ci			ret = lastfail;
9408c2ecf20Sopenharmony_ci		}
9418c2ecf20Sopenharmony_ci		/* enable IRQ */
9428c2ecf20Sopenharmony_ci		hfi1_rcd_put(rcd);
9438c2ecf20Sopenharmony_ci	}
9448c2ecf20Sopenharmony_ci
9458c2ecf20Sopenharmony_ci	/* Allocate enough memory for user event notification. */
9468c2ecf20Sopenharmony_ci	len = PAGE_ALIGN(chip_rcv_contexts(dd) * HFI1_MAX_SHARED_CTXTS *
9478c2ecf20Sopenharmony_ci			 sizeof(*dd->events));
9488c2ecf20Sopenharmony_ci	dd->events = vmalloc_user(len);
9498c2ecf20Sopenharmony_ci	if (!dd->events)
9508c2ecf20Sopenharmony_ci		dd_dev_err(dd, "Failed to allocate user events page\n");
9518c2ecf20Sopenharmony_ci	/*
9528c2ecf20Sopenharmony_ci	 * Allocate a page for device and port status.
9538c2ecf20Sopenharmony_ci	 * Page will be shared amongst all user processes.
9548c2ecf20Sopenharmony_ci	 */
9558c2ecf20Sopenharmony_ci	dd->status = vmalloc_user(PAGE_SIZE);
9568c2ecf20Sopenharmony_ci	if (!dd->status)
9578c2ecf20Sopenharmony_ci		dd_dev_err(dd, "Failed to allocate dev status page\n");
9588c2ecf20Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
9598c2ecf20Sopenharmony_ci		ppd = dd->pport + pidx;
9608c2ecf20Sopenharmony_ci		if (dd->status)
9618c2ecf20Sopenharmony_ci			/* Currently, we only have one port */
9628c2ecf20Sopenharmony_ci			ppd->statusp = &dd->status->port;
9638c2ecf20Sopenharmony_ci
9648c2ecf20Sopenharmony_ci		set_mtu(ppd);
9658c2ecf20Sopenharmony_ci	}
9668c2ecf20Sopenharmony_ci
9678c2ecf20Sopenharmony_ci	/* enable chip even if we have an error, so we can debug cause */
9688c2ecf20Sopenharmony_ci	enable_chip(dd);
9698c2ecf20Sopenharmony_ci
9708c2ecf20Sopenharmony_cidone:
9718c2ecf20Sopenharmony_ci	/*
9728c2ecf20Sopenharmony_ci	 * Set status even if port serdes is not initialized
9738c2ecf20Sopenharmony_ci	 * so that diags will work.
9748c2ecf20Sopenharmony_ci	 */
9758c2ecf20Sopenharmony_ci	if (dd->status)
9768c2ecf20Sopenharmony_ci		dd->status->dev |= HFI1_STATUS_CHIP_PRESENT |
9778c2ecf20Sopenharmony_ci			HFI1_STATUS_INITTED;
9788c2ecf20Sopenharmony_ci	if (!ret) {
9798c2ecf20Sopenharmony_ci		/* enable all interrupts from the chip */
9808c2ecf20Sopenharmony_ci		enable_general_intr(dd);
9818c2ecf20Sopenharmony_ci		init_qsfp_int(dd);
9828c2ecf20Sopenharmony_ci
9838c2ecf20Sopenharmony_ci		/* chip is OK for user apps; mark it as initialized */
9848c2ecf20Sopenharmony_ci		for (pidx = 0; pidx < dd->num_pports; ++pidx) {
9858c2ecf20Sopenharmony_ci			ppd = dd->pport + pidx;
9868c2ecf20Sopenharmony_ci
9878c2ecf20Sopenharmony_ci			/*
9888c2ecf20Sopenharmony_ci			 * start the serdes - must be after interrupts are
9898c2ecf20Sopenharmony_ci			 * enabled so we are notified when the link goes up
9908c2ecf20Sopenharmony_ci			 */
9918c2ecf20Sopenharmony_ci			lastfail = bringup_serdes(ppd);
9928c2ecf20Sopenharmony_ci			if (lastfail)
9938c2ecf20Sopenharmony_ci				dd_dev_info(dd,
9948c2ecf20Sopenharmony_ci					    "Failed to bring up port %u\n",
9958c2ecf20Sopenharmony_ci					    ppd->port);
9968c2ecf20Sopenharmony_ci
9978c2ecf20Sopenharmony_ci			/*
9988c2ecf20Sopenharmony_ci			 * Set status even if port serdes is not initialized
9998c2ecf20Sopenharmony_ci			 * so that diags will work.
10008c2ecf20Sopenharmony_ci			 */
10018c2ecf20Sopenharmony_ci			if (ppd->statusp)
10028c2ecf20Sopenharmony_ci				*ppd->statusp |= HFI1_STATUS_CHIP_PRESENT |
10038c2ecf20Sopenharmony_ci							HFI1_STATUS_INITTED;
10048c2ecf20Sopenharmony_ci			if (!ppd->link_speed_enabled)
10058c2ecf20Sopenharmony_ci				continue;
10068c2ecf20Sopenharmony_ci		}
10078c2ecf20Sopenharmony_ci	}
10088c2ecf20Sopenharmony_ci
10098c2ecf20Sopenharmony_ci	/* if ret is non-zero, we probably should do some cleanup here... */
10108c2ecf20Sopenharmony_ci	return ret;
10118c2ecf20Sopenharmony_ci}
10128c2ecf20Sopenharmony_ci
10138c2ecf20Sopenharmony_cistruct hfi1_devdata *hfi1_lookup(int unit)
10148c2ecf20Sopenharmony_ci{
10158c2ecf20Sopenharmony_ci	return xa_load(&hfi1_dev_table, unit);
10168c2ecf20Sopenharmony_ci}
10178c2ecf20Sopenharmony_ci
10188c2ecf20Sopenharmony_ci/*
10198c2ecf20Sopenharmony_ci * Stop the timers during unit shutdown, or after an error late
10208c2ecf20Sopenharmony_ci * in initialization.
10218c2ecf20Sopenharmony_ci */
10228c2ecf20Sopenharmony_cistatic void stop_timers(struct hfi1_devdata *dd)
10238c2ecf20Sopenharmony_ci{
10248c2ecf20Sopenharmony_ci	struct hfi1_pportdata *ppd;
10258c2ecf20Sopenharmony_ci	int pidx;
10268c2ecf20Sopenharmony_ci
10278c2ecf20Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
10288c2ecf20Sopenharmony_ci		ppd = dd->pport + pidx;
10298c2ecf20Sopenharmony_ci		if (ppd->led_override_timer.function) {
10308c2ecf20Sopenharmony_ci			del_timer_sync(&ppd->led_override_timer);
10318c2ecf20Sopenharmony_ci			atomic_set(&ppd->led_override_timer_active, 0);
10328c2ecf20Sopenharmony_ci		}
10338c2ecf20Sopenharmony_ci	}
10348c2ecf20Sopenharmony_ci}
10358c2ecf20Sopenharmony_ci
10368c2ecf20Sopenharmony_ci/**
10378c2ecf20Sopenharmony_ci * shutdown_device - shut down a device
10388c2ecf20Sopenharmony_ci * @dd: the hfi1_ib device
10398c2ecf20Sopenharmony_ci *
10408c2ecf20Sopenharmony_ci * This is called to make the device quiet when we are about to
10418c2ecf20Sopenharmony_ci * unload the driver, and also when the device is administratively
10428c2ecf20Sopenharmony_ci * disabled.   It does not free any data structures.
10438c2ecf20Sopenharmony_ci * Everything it does has to be setup again by hfi1_init(dd, 1)
10448c2ecf20Sopenharmony_ci */
10458c2ecf20Sopenharmony_cistatic void shutdown_device(struct hfi1_devdata *dd)
10468c2ecf20Sopenharmony_ci{
10478c2ecf20Sopenharmony_ci	struct hfi1_pportdata *ppd;
10488c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd;
10498c2ecf20Sopenharmony_ci	unsigned pidx;
10508c2ecf20Sopenharmony_ci	int i;
10518c2ecf20Sopenharmony_ci
10528c2ecf20Sopenharmony_ci	if (dd->flags & HFI1_SHUTDOWN)
10538c2ecf20Sopenharmony_ci		return;
10548c2ecf20Sopenharmony_ci	dd->flags |= HFI1_SHUTDOWN;
10558c2ecf20Sopenharmony_ci
10568c2ecf20Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
10578c2ecf20Sopenharmony_ci		ppd = dd->pport + pidx;
10588c2ecf20Sopenharmony_ci
10598c2ecf20Sopenharmony_ci		ppd->linkup = 0;
10608c2ecf20Sopenharmony_ci		if (ppd->statusp)
10618c2ecf20Sopenharmony_ci			*ppd->statusp &= ~(HFI1_STATUS_IB_CONF |
10628c2ecf20Sopenharmony_ci					   HFI1_STATUS_IB_READY);
10638c2ecf20Sopenharmony_ci	}
10648c2ecf20Sopenharmony_ci	dd->flags &= ~HFI1_INITTED;
10658c2ecf20Sopenharmony_ci
10668c2ecf20Sopenharmony_ci	/* mask and clean up interrupts */
10678c2ecf20Sopenharmony_ci	set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
10688c2ecf20Sopenharmony_ci	msix_clean_up_interrupts(dd);
10698c2ecf20Sopenharmony_ci
10708c2ecf20Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
10718c2ecf20Sopenharmony_ci		ppd = dd->pport + pidx;
10728c2ecf20Sopenharmony_ci		for (i = 0; i < dd->num_rcv_contexts; i++) {
10738c2ecf20Sopenharmony_ci			rcd = hfi1_rcd_get_by_index(dd, i);
10748c2ecf20Sopenharmony_ci			hfi1_rcvctrl(dd, HFI1_RCVCTRL_TAILUPD_DIS |
10758c2ecf20Sopenharmony_ci				     HFI1_RCVCTRL_CTXT_DIS |
10768c2ecf20Sopenharmony_ci				     HFI1_RCVCTRL_INTRAVAIL_DIS |
10778c2ecf20Sopenharmony_ci				     HFI1_RCVCTRL_PKEY_DIS |
10788c2ecf20Sopenharmony_ci				     HFI1_RCVCTRL_ONE_PKT_EGR_DIS, rcd);
10798c2ecf20Sopenharmony_ci			hfi1_rcd_put(rcd);
10808c2ecf20Sopenharmony_ci		}
10818c2ecf20Sopenharmony_ci		/*
10828c2ecf20Sopenharmony_ci		 * Gracefully stop all sends allowing any in progress to
10838c2ecf20Sopenharmony_ci		 * trickle out first.
10848c2ecf20Sopenharmony_ci		 */
10858c2ecf20Sopenharmony_ci		for (i = 0; i < dd->num_send_contexts; i++)
10868c2ecf20Sopenharmony_ci			sc_flush(dd->send_contexts[i].sc);
10878c2ecf20Sopenharmony_ci	}
10888c2ecf20Sopenharmony_ci
10898c2ecf20Sopenharmony_ci	/*
10908c2ecf20Sopenharmony_ci	 * Enough for anything that's going to trickle out to have actually
10918c2ecf20Sopenharmony_ci	 * done so.
10928c2ecf20Sopenharmony_ci	 */
10938c2ecf20Sopenharmony_ci	udelay(20);
10948c2ecf20Sopenharmony_ci
10958c2ecf20Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
10968c2ecf20Sopenharmony_ci		ppd = dd->pport + pidx;
10978c2ecf20Sopenharmony_ci
10988c2ecf20Sopenharmony_ci		/* disable all contexts */
10998c2ecf20Sopenharmony_ci		for (i = 0; i < dd->num_send_contexts; i++)
11008c2ecf20Sopenharmony_ci			sc_disable(dd->send_contexts[i].sc);
11018c2ecf20Sopenharmony_ci		/* disable the send device */
11028c2ecf20Sopenharmony_ci		pio_send_control(dd, PSC_GLOBAL_DISABLE);
11038c2ecf20Sopenharmony_ci
11048c2ecf20Sopenharmony_ci		shutdown_led_override(ppd);
11058c2ecf20Sopenharmony_ci
11068c2ecf20Sopenharmony_ci		/*
11078c2ecf20Sopenharmony_ci		 * Clear SerdesEnable.
11088c2ecf20Sopenharmony_ci		 * We can't count on interrupts since we are stopping.
11098c2ecf20Sopenharmony_ci		 */
11108c2ecf20Sopenharmony_ci		hfi1_quiet_serdes(ppd);
11118c2ecf20Sopenharmony_ci		if (ppd->hfi1_wq)
11128c2ecf20Sopenharmony_ci			flush_workqueue(ppd->hfi1_wq);
11138c2ecf20Sopenharmony_ci		if (ppd->link_wq)
11148c2ecf20Sopenharmony_ci			flush_workqueue(ppd->link_wq);
11158c2ecf20Sopenharmony_ci	}
11168c2ecf20Sopenharmony_ci	sdma_exit(dd);
11178c2ecf20Sopenharmony_ci}
11188c2ecf20Sopenharmony_ci
11198c2ecf20Sopenharmony_ci/**
11208c2ecf20Sopenharmony_ci * hfi1_free_ctxtdata - free a context's allocated data
11218c2ecf20Sopenharmony_ci * @dd: the hfi1_ib device
11228c2ecf20Sopenharmony_ci * @rcd: the ctxtdata structure
11238c2ecf20Sopenharmony_ci *
11248c2ecf20Sopenharmony_ci * free up any allocated data for a context
11258c2ecf20Sopenharmony_ci * It should never change any chip state, or global driver state.
11268c2ecf20Sopenharmony_ci */
11278c2ecf20Sopenharmony_civoid hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
11288c2ecf20Sopenharmony_ci{
11298c2ecf20Sopenharmony_ci	u32 e;
11308c2ecf20Sopenharmony_ci
11318c2ecf20Sopenharmony_ci	if (!rcd)
11328c2ecf20Sopenharmony_ci		return;
11338c2ecf20Sopenharmony_ci
11348c2ecf20Sopenharmony_ci	if (rcd->rcvhdrq) {
11358c2ecf20Sopenharmony_ci		dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd),
11368c2ecf20Sopenharmony_ci				  rcd->rcvhdrq, rcd->rcvhdrq_dma);
11378c2ecf20Sopenharmony_ci		rcd->rcvhdrq = NULL;
11388c2ecf20Sopenharmony_ci		if (hfi1_rcvhdrtail_kvaddr(rcd)) {
11398c2ecf20Sopenharmony_ci			dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
11408c2ecf20Sopenharmony_ci					  (void *)hfi1_rcvhdrtail_kvaddr(rcd),
11418c2ecf20Sopenharmony_ci					  rcd->rcvhdrqtailaddr_dma);
11428c2ecf20Sopenharmony_ci			rcd->rcvhdrtail_kvaddr = NULL;
11438c2ecf20Sopenharmony_ci		}
11448c2ecf20Sopenharmony_ci	}
11458c2ecf20Sopenharmony_ci
11468c2ecf20Sopenharmony_ci	/* all the RcvArray entries should have been cleared by now */
11478c2ecf20Sopenharmony_ci	kfree(rcd->egrbufs.rcvtids);
11488c2ecf20Sopenharmony_ci	rcd->egrbufs.rcvtids = NULL;
11498c2ecf20Sopenharmony_ci
11508c2ecf20Sopenharmony_ci	for (e = 0; e < rcd->egrbufs.alloced; e++) {
11518c2ecf20Sopenharmony_ci		if (rcd->egrbufs.buffers[e].addr)
11528c2ecf20Sopenharmony_ci			dma_free_coherent(&dd->pcidev->dev,
11538c2ecf20Sopenharmony_ci					  rcd->egrbufs.buffers[e].len,
11548c2ecf20Sopenharmony_ci					  rcd->egrbufs.buffers[e].addr,
11558c2ecf20Sopenharmony_ci					  rcd->egrbufs.buffers[e].dma);
11568c2ecf20Sopenharmony_ci	}
11578c2ecf20Sopenharmony_ci	kfree(rcd->egrbufs.buffers);
11588c2ecf20Sopenharmony_ci	rcd->egrbufs.alloced = 0;
11598c2ecf20Sopenharmony_ci	rcd->egrbufs.buffers = NULL;
11608c2ecf20Sopenharmony_ci
11618c2ecf20Sopenharmony_ci	sc_free(rcd->sc);
11628c2ecf20Sopenharmony_ci	rcd->sc = NULL;
11638c2ecf20Sopenharmony_ci
11648c2ecf20Sopenharmony_ci	vfree(rcd->subctxt_uregbase);
11658c2ecf20Sopenharmony_ci	vfree(rcd->subctxt_rcvegrbuf);
11668c2ecf20Sopenharmony_ci	vfree(rcd->subctxt_rcvhdr_base);
11678c2ecf20Sopenharmony_ci	kfree(rcd->opstats);
11688c2ecf20Sopenharmony_ci
11698c2ecf20Sopenharmony_ci	rcd->subctxt_uregbase = NULL;
11708c2ecf20Sopenharmony_ci	rcd->subctxt_rcvegrbuf = NULL;
11718c2ecf20Sopenharmony_ci	rcd->subctxt_rcvhdr_base = NULL;
11728c2ecf20Sopenharmony_ci	rcd->opstats = NULL;
11738c2ecf20Sopenharmony_ci}
11748c2ecf20Sopenharmony_ci
11758c2ecf20Sopenharmony_ci/*
11768c2ecf20Sopenharmony_ci * Release our hold on the shared asic data.  If we are the last one,
11778c2ecf20Sopenharmony_ci * return the structure to be finalized outside the lock.  Must be
11788c2ecf20Sopenharmony_ci * holding hfi1_dev_table lock.
11798c2ecf20Sopenharmony_ci */
11808c2ecf20Sopenharmony_cistatic struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd)
11818c2ecf20Sopenharmony_ci{
11828c2ecf20Sopenharmony_ci	struct hfi1_asic_data *ad;
11838c2ecf20Sopenharmony_ci	int other;
11848c2ecf20Sopenharmony_ci
11858c2ecf20Sopenharmony_ci	if (!dd->asic_data)
11868c2ecf20Sopenharmony_ci		return NULL;
11878c2ecf20Sopenharmony_ci	dd->asic_data->dds[dd->hfi1_id] = NULL;
11888c2ecf20Sopenharmony_ci	other = dd->hfi1_id ? 0 : 1;
11898c2ecf20Sopenharmony_ci	ad = dd->asic_data;
11908c2ecf20Sopenharmony_ci	dd->asic_data = NULL;
11918c2ecf20Sopenharmony_ci	/* return NULL if the other dd still has a link */
11928c2ecf20Sopenharmony_ci	return ad->dds[other] ? NULL : ad;
11938c2ecf20Sopenharmony_ci}
11948c2ecf20Sopenharmony_ci
11958c2ecf20Sopenharmony_cistatic void finalize_asic_data(struct hfi1_devdata *dd,
11968c2ecf20Sopenharmony_ci			       struct hfi1_asic_data *ad)
11978c2ecf20Sopenharmony_ci{
11988c2ecf20Sopenharmony_ci	clean_up_i2c(dd, ad);
11998c2ecf20Sopenharmony_ci	kfree(ad);
12008c2ecf20Sopenharmony_ci}
12018c2ecf20Sopenharmony_ci
12028c2ecf20Sopenharmony_ci/**
12038c2ecf20Sopenharmony_ci * hfi1_free_devdata - cleans up and frees per-unit data structure
12048c2ecf20Sopenharmony_ci * @dd: pointer to a valid devdata structure
12058c2ecf20Sopenharmony_ci *
12068c2ecf20Sopenharmony_ci * It cleans up and frees all data structures set up by
12078c2ecf20Sopenharmony_ci * by hfi1_alloc_devdata().
12088c2ecf20Sopenharmony_ci */
12098c2ecf20Sopenharmony_civoid hfi1_free_devdata(struct hfi1_devdata *dd)
12108c2ecf20Sopenharmony_ci{
12118c2ecf20Sopenharmony_ci	struct hfi1_asic_data *ad;
12128c2ecf20Sopenharmony_ci	unsigned long flags;
12138c2ecf20Sopenharmony_ci
12148c2ecf20Sopenharmony_ci	xa_lock_irqsave(&hfi1_dev_table, flags);
12158c2ecf20Sopenharmony_ci	__xa_erase(&hfi1_dev_table, dd->unit);
12168c2ecf20Sopenharmony_ci	ad = release_asic_data(dd);
12178c2ecf20Sopenharmony_ci	xa_unlock_irqrestore(&hfi1_dev_table, flags);
12188c2ecf20Sopenharmony_ci
12198c2ecf20Sopenharmony_ci	finalize_asic_data(dd, ad);
12208c2ecf20Sopenharmony_ci	free_platform_config(dd);
12218c2ecf20Sopenharmony_ci	rcu_barrier(); /* wait for rcu callbacks to complete */
12228c2ecf20Sopenharmony_ci	free_percpu(dd->int_counter);
12238c2ecf20Sopenharmony_ci	free_percpu(dd->rcv_limit);
12248c2ecf20Sopenharmony_ci	free_percpu(dd->send_schedule);
12258c2ecf20Sopenharmony_ci	free_percpu(dd->tx_opstats);
12268c2ecf20Sopenharmony_ci	dd->int_counter   = NULL;
12278c2ecf20Sopenharmony_ci	dd->rcv_limit     = NULL;
12288c2ecf20Sopenharmony_ci	dd->send_schedule = NULL;
12298c2ecf20Sopenharmony_ci	dd->tx_opstats    = NULL;
12308c2ecf20Sopenharmony_ci	kfree(dd->comp_vect);
12318c2ecf20Sopenharmony_ci	dd->comp_vect = NULL;
12328c2ecf20Sopenharmony_ci	if (dd->rcvhdrtail_dummy_kvaddr)
12338c2ecf20Sopenharmony_ci		dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
12348c2ecf20Sopenharmony_ci				  (void *)dd->rcvhdrtail_dummy_kvaddr,
12358c2ecf20Sopenharmony_ci				  dd->rcvhdrtail_dummy_dma);
12368c2ecf20Sopenharmony_ci	dd->rcvhdrtail_dummy_kvaddr = NULL;
12378c2ecf20Sopenharmony_ci	sdma_clean(dd, dd->num_sdma);
12388c2ecf20Sopenharmony_ci	rvt_dealloc_device(&dd->verbs_dev.rdi);
12398c2ecf20Sopenharmony_ci}
12408c2ecf20Sopenharmony_ci
12418c2ecf20Sopenharmony_ci/**
12428c2ecf20Sopenharmony_ci * hfi1_alloc_devdata - Allocate our primary per-unit data structure.
12438c2ecf20Sopenharmony_ci * @pdev: Valid PCI device
12448c2ecf20Sopenharmony_ci * @extra: How many bytes to alloc past the default
12458c2ecf20Sopenharmony_ci *
12468c2ecf20Sopenharmony_ci * Must be done via verbs allocator, because the verbs cleanup process
12478c2ecf20Sopenharmony_ci * both does cleanup and free of the data structure.
12488c2ecf20Sopenharmony_ci * "extra" is for chip-specific data.
12498c2ecf20Sopenharmony_ci */
12508c2ecf20Sopenharmony_cistatic struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
12518c2ecf20Sopenharmony_ci					       size_t extra)
12528c2ecf20Sopenharmony_ci{
12538c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd;
12548c2ecf20Sopenharmony_ci	int ret, nports;
12558c2ecf20Sopenharmony_ci
12568c2ecf20Sopenharmony_ci	/* extra is * number of ports */
12578c2ecf20Sopenharmony_ci	nports = extra / sizeof(struct hfi1_pportdata);
12588c2ecf20Sopenharmony_ci
12598c2ecf20Sopenharmony_ci	dd = (struct hfi1_devdata *)rvt_alloc_device(sizeof(*dd) + extra,
12608c2ecf20Sopenharmony_ci						     nports);
12618c2ecf20Sopenharmony_ci	if (!dd)
12628c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
12638c2ecf20Sopenharmony_ci	dd->num_pports = nports;
12648c2ecf20Sopenharmony_ci	dd->pport = (struct hfi1_pportdata *)(dd + 1);
12658c2ecf20Sopenharmony_ci	dd->pcidev = pdev;
12668c2ecf20Sopenharmony_ci	pci_set_drvdata(pdev, dd);
12678c2ecf20Sopenharmony_ci
12688c2ecf20Sopenharmony_ci	ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b,
12698c2ecf20Sopenharmony_ci			GFP_KERNEL);
12708c2ecf20Sopenharmony_ci	if (ret < 0) {
12718c2ecf20Sopenharmony_ci		dev_err(&pdev->dev,
12728c2ecf20Sopenharmony_ci			"Could not allocate unit ID: error %d\n", -ret);
12738c2ecf20Sopenharmony_ci		goto bail;
12748c2ecf20Sopenharmony_ci	}
12758c2ecf20Sopenharmony_ci	rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
12768c2ecf20Sopenharmony_ci	/*
12778c2ecf20Sopenharmony_ci	 * If the BIOS does not have the NUMA node information set, select
12788c2ecf20Sopenharmony_ci	 * NUMA 0 so we get consistent performance.
12798c2ecf20Sopenharmony_ci	 */
12808c2ecf20Sopenharmony_ci	dd->node = pcibus_to_node(pdev->bus);
12818c2ecf20Sopenharmony_ci	if (dd->node == NUMA_NO_NODE) {
12828c2ecf20Sopenharmony_ci		dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
12838c2ecf20Sopenharmony_ci		dd->node = 0;
12848c2ecf20Sopenharmony_ci	}
12858c2ecf20Sopenharmony_ci
12868c2ecf20Sopenharmony_ci	/*
12878c2ecf20Sopenharmony_ci	 * Initialize all locks for the device. This needs to be as early as
12888c2ecf20Sopenharmony_ci	 * possible so locks are usable.
12898c2ecf20Sopenharmony_ci	 */
12908c2ecf20Sopenharmony_ci	spin_lock_init(&dd->sc_lock);
12918c2ecf20Sopenharmony_ci	spin_lock_init(&dd->sendctrl_lock);
12928c2ecf20Sopenharmony_ci	spin_lock_init(&dd->rcvctrl_lock);
12938c2ecf20Sopenharmony_ci	spin_lock_init(&dd->uctxt_lock);
12948c2ecf20Sopenharmony_ci	spin_lock_init(&dd->hfi1_diag_trans_lock);
12958c2ecf20Sopenharmony_ci	spin_lock_init(&dd->sc_init_lock);
12968c2ecf20Sopenharmony_ci	spin_lock_init(&dd->dc8051_memlock);
12978c2ecf20Sopenharmony_ci	seqlock_init(&dd->sc2vl_lock);
12988c2ecf20Sopenharmony_ci	spin_lock_init(&dd->sde_map_lock);
12998c2ecf20Sopenharmony_ci	spin_lock_init(&dd->pio_map_lock);
13008c2ecf20Sopenharmony_ci	mutex_init(&dd->dc8051_lock);
13018c2ecf20Sopenharmony_ci	init_waitqueue_head(&dd->event_queue);
13028c2ecf20Sopenharmony_ci	spin_lock_init(&dd->irq_src_lock);
13038c2ecf20Sopenharmony_ci
13048c2ecf20Sopenharmony_ci	dd->int_counter = alloc_percpu(u64);
13058c2ecf20Sopenharmony_ci	if (!dd->int_counter) {
13068c2ecf20Sopenharmony_ci		ret = -ENOMEM;
13078c2ecf20Sopenharmony_ci		goto bail;
13088c2ecf20Sopenharmony_ci	}
13098c2ecf20Sopenharmony_ci
13108c2ecf20Sopenharmony_ci	dd->rcv_limit = alloc_percpu(u64);
13118c2ecf20Sopenharmony_ci	if (!dd->rcv_limit) {
13128c2ecf20Sopenharmony_ci		ret = -ENOMEM;
13138c2ecf20Sopenharmony_ci		goto bail;
13148c2ecf20Sopenharmony_ci	}
13158c2ecf20Sopenharmony_ci
13168c2ecf20Sopenharmony_ci	dd->send_schedule = alloc_percpu(u64);
13178c2ecf20Sopenharmony_ci	if (!dd->send_schedule) {
13188c2ecf20Sopenharmony_ci		ret = -ENOMEM;
13198c2ecf20Sopenharmony_ci		goto bail;
13208c2ecf20Sopenharmony_ci	}
13218c2ecf20Sopenharmony_ci
13228c2ecf20Sopenharmony_ci	dd->tx_opstats = alloc_percpu(struct hfi1_opcode_stats_perctx);
13238c2ecf20Sopenharmony_ci	if (!dd->tx_opstats) {
13248c2ecf20Sopenharmony_ci		ret = -ENOMEM;
13258c2ecf20Sopenharmony_ci		goto bail;
13268c2ecf20Sopenharmony_ci	}
13278c2ecf20Sopenharmony_ci
13288c2ecf20Sopenharmony_ci	dd->comp_vect = kzalloc(sizeof(*dd->comp_vect), GFP_KERNEL);
13298c2ecf20Sopenharmony_ci	if (!dd->comp_vect) {
13308c2ecf20Sopenharmony_ci		ret = -ENOMEM;
13318c2ecf20Sopenharmony_ci		goto bail;
13328c2ecf20Sopenharmony_ci	}
13338c2ecf20Sopenharmony_ci
13348c2ecf20Sopenharmony_ci	/* allocate dummy tail memory for all receive contexts */
13358c2ecf20Sopenharmony_ci	dd->rcvhdrtail_dummy_kvaddr =
13368c2ecf20Sopenharmony_ci		dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64),
13378c2ecf20Sopenharmony_ci				   &dd->rcvhdrtail_dummy_dma, GFP_KERNEL);
13388c2ecf20Sopenharmony_ci	if (!dd->rcvhdrtail_dummy_kvaddr) {
13398c2ecf20Sopenharmony_ci		ret = -ENOMEM;
13408c2ecf20Sopenharmony_ci		goto bail;
13418c2ecf20Sopenharmony_ci	}
13428c2ecf20Sopenharmony_ci
13438c2ecf20Sopenharmony_ci	atomic_set(&dd->ipoib_rsm_usr_num, 0);
13448c2ecf20Sopenharmony_ci	return dd;
13458c2ecf20Sopenharmony_ci
13468c2ecf20Sopenharmony_cibail:
13478c2ecf20Sopenharmony_ci	hfi1_free_devdata(dd);
13488c2ecf20Sopenharmony_ci	return ERR_PTR(ret);
13498c2ecf20Sopenharmony_ci}
13508c2ecf20Sopenharmony_ci
13518c2ecf20Sopenharmony_ci/*
13528c2ecf20Sopenharmony_ci * Called from freeze mode handlers, and from PCI error
13538c2ecf20Sopenharmony_ci * reporting code.  Should be paranoid about state of
13548c2ecf20Sopenharmony_ci * system and data structures.
13558c2ecf20Sopenharmony_ci */
13568c2ecf20Sopenharmony_civoid hfi1_disable_after_error(struct hfi1_devdata *dd)
13578c2ecf20Sopenharmony_ci{
13588c2ecf20Sopenharmony_ci	if (dd->flags & HFI1_INITTED) {
13598c2ecf20Sopenharmony_ci		u32 pidx;
13608c2ecf20Sopenharmony_ci
13618c2ecf20Sopenharmony_ci		dd->flags &= ~HFI1_INITTED;
13628c2ecf20Sopenharmony_ci		if (dd->pport)
13638c2ecf20Sopenharmony_ci			for (pidx = 0; pidx < dd->num_pports; ++pidx) {
13648c2ecf20Sopenharmony_ci				struct hfi1_pportdata *ppd;
13658c2ecf20Sopenharmony_ci
13668c2ecf20Sopenharmony_ci				ppd = dd->pport + pidx;
13678c2ecf20Sopenharmony_ci				if (dd->flags & HFI1_PRESENT)
13688c2ecf20Sopenharmony_ci					set_link_state(ppd, HLS_DN_DISABLE);
13698c2ecf20Sopenharmony_ci
13708c2ecf20Sopenharmony_ci				if (ppd->statusp)
13718c2ecf20Sopenharmony_ci					*ppd->statusp &= ~HFI1_STATUS_IB_READY;
13728c2ecf20Sopenharmony_ci			}
13738c2ecf20Sopenharmony_ci	}
13748c2ecf20Sopenharmony_ci
13758c2ecf20Sopenharmony_ci	/*
13768c2ecf20Sopenharmony_ci	 * Mark as having had an error for driver, and also
13778c2ecf20Sopenharmony_ci	 * for /sys and status word mapped to user programs.
13788c2ecf20Sopenharmony_ci	 * This marks unit as not usable, until reset.
13798c2ecf20Sopenharmony_ci	 */
13808c2ecf20Sopenharmony_ci	if (dd->status)
13818c2ecf20Sopenharmony_ci		dd->status->dev |= HFI1_STATUS_HWERROR;
13828c2ecf20Sopenharmony_ci}
13838c2ecf20Sopenharmony_ci
13848c2ecf20Sopenharmony_cistatic void remove_one(struct pci_dev *);
13858c2ecf20Sopenharmony_cistatic int init_one(struct pci_dev *, const struct pci_device_id *);
13868c2ecf20Sopenharmony_cistatic void shutdown_one(struct pci_dev *);
13878c2ecf20Sopenharmony_ci
13888c2ecf20Sopenharmony_ci#define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: "
13898c2ecf20Sopenharmony_ci#define PFX DRIVER_NAME ": "
13908c2ecf20Sopenharmony_ci
13918c2ecf20Sopenharmony_ciconst struct pci_device_id hfi1_pci_tbl[] = {
13928c2ecf20Sopenharmony_ci	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL0) },
13938c2ecf20Sopenharmony_ci	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL1) },
13948c2ecf20Sopenharmony_ci	{ 0, }
13958c2ecf20Sopenharmony_ci};
13968c2ecf20Sopenharmony_ci
13978c2ecf20Sopenharmony_ciMODULE_DEVICE_TABLE(pci, hfi1_pci_tbl);
13988c2ecf20Sopenharmony_ci
13998c2ecf20Sopenharmony_cistatic struct pci_driver hfi1_pci_driver = {
14008c2ecf20Sopenharmony_ci	.name = DRIVER_NAME,
14018c2ecf20Sopenharmony_ci	.probe = init_one,
14028c2ecf20Sopenharmony_ci	.remove = remove_one,
14038c2ecf20Sopenharmony_ci	.shutdown = shutdown_one,
14048c2ecf20Sopenharmony_ci	.id_table = hfi1_pci_tbl,
14058c2ecf20Sopenharmony_ci	.err_handler = &hfi1_pci_err_handler,
14068c2ecf20Sopenharmony_ci};
14078c2ecf20Sopenharmony_ci
14088c2ecf20Sopenharmony_cistatic void __init compute_krcvqs(void)
14098c2ecf20Sopenharmony_ci{
14108c2ecf20Sopenharmony_ci	int i;
14118c2ecf20Sopenharmony_ci
14128c2ecf20Sopenharmony_ci	for (i = 0; i < krcvqsset; i++)
14138c2ecf20Sopenharmony_ci		n_krcvqs += krcvqs[i];
14148c2ecf20Sopenharmony_ci}
14158c2ecf20Sopenharmony_ci
14168c2ecf20Sopenharmony_ci/*
14178c2ecf20Sopenharmony_ci * Do all the generic driver unit- and chip-independent memory
14188c2ecf20Sopenharmony_ci * allocation and initialization.
14198c2ecf20Sopenharmony_ci */
14208c2ecf20Sopenharmony_cistatic int __init hfi1_mod_init(void)
14218c2ecf20Sopenharmony_ci{
14228c2ecf20Sopenharmony_ci	int ret;
14238c2ecf20Sopenharmony_ci
14248c2ecf20Sopenharmony_ci	ret = dev_init();
14258c2ecf20Sopenharmony_ci	if (ret)
14268c2ecf20Sopenharmony_ci		goto bail;
14278c2ecf20Sopenharmony_ci
14288c2ecf20Sopenharmony_ci	ret = node_affinity_init();
14298c2ecf20Sopenharmony_ci	if (ret)
14308c2ecf20Sopenharmony_ci		goto bail;
14318c2ecf20Sopenharmony_ci
14328c2ecf20Sopenharmony_ci	/* validate max MTU before any devices start */
14338c2ecf20Sopenharmony_ci	if (!valid_opa_max_mtu(hfi1_max_mtu)) {
14348c2ecf20Sopenharmony_ci		pr_err("Invalid max_mtu 0x%x, using 0x%x instead\n",
14358c2ecf20Sopenharmony_ci		       hfi1_max_mtu, HFI1_DEFAULT_MAX_MTU);
14368c2ecf20Sopenharmony_ci		hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU;
14378c2ecf20Sopenharmony_ci	}
14388c2ecf20Sopenharmony_ci	/* valid CUs run from 1-128 in powers of 2 */
14398c2ecf20Sopenharmony_ci	if (hfi1_cu > 128 || !is_power_of_2(hfi1_cu))
14408c2ecf20Sopenharmony_ci		hfi1_cu = 1;
14418c2ecf20Sopenharmony_ci	/* valid credit return threshold is 0-100, variable is unsigned */
14428c2ecf20Sopenharmony_ci	if (user_credit_return_threshold > 100)
14438c2ecf20Sopenharmony_ci		user_credit_return_threshold = 100;
14448c2ecf20Sopenharmony_ci
14458c2ecf20Sopenharmony_ci	compute_krcvqs();
14468c2ecf20Sopenharmony_ci	/*
14478c2ecf20Sopenharmony_ci	 * sanitize receive interrupt count, time must wait until after
14488c2ecf20Sopenharmony_ci	 * the hardware type is known
14498c2ecf20Sopenharmony_ci	 */
14508c2ecf20Sopenharmony_ci	if (rcv_intr_count > RCV_HDR_HEAD_COUNTER_MASK)
14518c2ecf20Sopenharmony_ci		rcv_intr_count = RCV_HDR_HEAD_COUNTER_MASK;
14528c2ecf20Sopenharmony_ci	/* reject invalid combinations */
14538c2ecf20Sopenharmony_ci	if (rcv_intr_count == 0 && rcv_intr_timeout == 0) {
14548c2ecf20Sopenharmony_ci		pr_err("Invalid mode: both receive interrupt count and available timeout are zero - setting interrupt count to 1\n");
14558c2ecf20Sopenharmony_ci		rcv_intr_count = 1;
14568c2ecf20Sopenharmony_ci	}
14578c2ecf20Sopenharmony_ci	if (rcv_intr_count > 1 && rcv_intr_timeout == 0) {
14588c2ecf20Sopenharmony_ci		/*
14598c2ecf20Sopenharmony_ci		 * Avoid indefinite packet delivery by requiring a timeout
14608c2ecf20Sopenharmony_ci		 * if count is > 1.
14618c2ecf20Sopenharmony_ci		 */
14628c2ecf20Sopenharmony_ci		pr_err("Invalid mode: receive interrupt count greater than 1 and available timeout is zero - setting available timeout to 1\n");
14638c2ecf20Sopenharmony_ci		rcv_intr_timeout = 1;
14648c2ecf20Sopenharmony_ci	}
14658c2ecf20Sopenharmony_ci	if (rcv_intr_dynamic && !(rcv_intr_count > 1 && rcv_intr_timeout > 0)) {
14668c2ecf20Sopenharmony_ci		/*
14678c2ecf20Sopenharmony_ci		 * The dynamic algorithm expects a non-zero timeout
14688c2ecf20Sopenharmony_ci		 * and a count > 1.
14698c2ecf20Sopenharmony_ci		 */
14708c2ecf20Sopenharmony_ci		pr_err("Invalid mode: dynamic receive interrupt mitigation with invalid count and timeout - turning dynamic off\n");
14718c2ecf20Sopenharmony_ci		rcv_intr_dynamic = 0;
14728c2ecf20Sopenharmony_ci	}
14738c2ecf20Sopenharmony_ci
14748c2ecf20Sopenharmony_ci	/* sanitize link CRC options */
14758c2ecf20Sopenharmony_ci	link_crc_mask &= SUPPORTED_CRCS;
14768c2ecf20Sopenharmony_ci
14778c2ecf20Sopenharmony_ci	ret = opfn_init();
14788c2ecf20Sopenharmony_ci	if (ret < 0) {
14798c2ecf20Sopenharmony_ci		pr_err("Failed to allocate opfn_wq");
14808c2ecf20Sopenharmony_ci		goto bail_dev;
14818c2ecf20Sopenharmony_ci	}
14828c2ecf20Sopenharmony_ci
14838c2ecf20Sopenharmony_ci	/*
14848c2ecf20Sopenharmony_ci	 * These must be called before the driver is registered with
14858c2ecf20Sopenharmony_ci	 * the PCI subsystem.
14868c2ecf20Sopenharmony_ci	 */
14878c2ecf20Sopenharmony_ci	hfi1_dbg_init();
14888c2ecf20Sopenharmony_ci	ret = pci_register_driver(&hfi1_pci_driver);
14898c2ecf20Sopenharmony_ci	if (ret < 0) {
14908c2ecf20Sopenharmony_ci		pr_err("Unable to register driver: error %d\n", -ret);
14918c2ecf20Sopenharmony_ci		goto bail_dev;
14928c2ecf20Sopenharmony_ci	}
14938c2ecf20Sopenharmony_ci	goto bail; /* all OK */
14948c2ecf20Sopenharmony_ci
14958c2ecf20Sopenharmony_cibail_dev:
14968c2ecf20Sopenharmony_ci	hfi1_dbg_exit();
14978c2ecf20Sopenharmony_ci	dev_cleanup();
14988c2ecf20Sopenharmony_cibail:
14998c2ecf20Sopenharmony_ci	return ret;
15008c2ecf20Sopenharmony_ci}
15018c2ecf20Sopenharmony_ci
15028c2ecf20Sopenharmony_cimodule_init(hfi1_mod_init);
15038c2ecf20Sopenharmony_ci
15048c2ecf20Sopenharmony_ci/*
15058c2ecf20Sopenharmony_ci * Do the non-unit driver cleanup, memory free, etc. at unload.
15068c2ecf20Sopenharmony_ci */
15078c2ecf20Sopenharmony_cistatic void __exit hfi1_mod_cleanup(void)
15088c2ecf20Sopenharmony_ci{
15098c2ecf20Sopenharmony_ci	pci_unregister_driver(&hfi1_pci_driver);
15108c2ecf20Sopenharmony_ci	opfn_exit();
15118c2ecf20Sopenharmony_ci	node_affinity_destroy_all();
15128c2ecf20Sopenharmony_ci	hfi1_dbg_exit();
15138c2ecf20Sopenharmony_ci
15148c2ecf20Sopenharmony_ci	WARN_ON(!xa_empty(&hfi1_dev_table));
15158c2ecf20Sopenharmony_ci	dispose_firmware();	/* asymmetric with obtain_firmware() */
15168c2ecf20Sopenharmony_ci	dev_cleanup();
15178c2ecf20Sopenharmony_ci}
15188c2ecf20Sopenharmony_ci
15198c2ecf20Sopenharmony_cimodule_exit(hfi1_mod_cleanup);
15208c2ecf20Sopenharmony_ci
15218c2ecf20Sopenharmony_ci/* this can only be called after a successful initialization */
15228c2ecf20Sopenharmony_cistatic void cleanup_device_data(struct hfi1_devdata *dd)
15238c2ecf20Sopenharmony_ci{
15248c2ecf20Sopenharmony_ci	int ctxt;
15258c2ecf20Sopenharmony_ci	int pidx;
15268c2ecf20Sopenharmony_ci
15278c2ecf20Sopenharmony_ci	/* users can't do anything more with chip */
15288c2ecf20Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
15298c2ecf20Sopenharmony_ci		struct hfi1_pportdata *ppd = &dd->pport[pidx];
15308c2ecf20Sopenharmony_ci		struct cc_state *cc_state;
15318c2ecf20Sopenharmony_ci		int i;
15328c2ecf20Sopenharmony_ci
15338c2ecf20Sopenharmony_ci		if (ppd->statusp)
15348c2ecf20Sopenharmony_ci			*ppd->statusp &= ~HFI1_STATUS_CHIP_PRESENT;
15358c2ecf20Sopenharmony_ci
15368c2ecf20Sopenharmony_ci		for (i = 0; i < OPA_MAX_SLS; i++)
15378c2ecf20Sopenharmony_ci			hrtimer_cancel(&ppd->cca_timer[i].hrtimer);
15388c2ecf20Sopenharmony_ci
15398c2ecf20Sopenharmony_ci		spin_lock(&ppd->cc_state_lock);
15408c2ecf20Sopenharmony_ci		cc_state = get_cc_state_protected(ppd);
15418c2ecf20Sopenharmony_ci		RCU_INIT_POINTER(ppd->cc_state, NULL);
15428c2ecf20Sopenharmony_ci		spin_unlock(&ppd->cc_state_lock);
15438c2ecf20Sopenharmony_ci
15448c2ecf20Sopenharmony_ci		if (cc_state)
15458c2ecf20Sopenharmony_ci			kfree_rcu(cc_state, rcu);
15468c2ecf20Sopenharmony_ci	}
15478c2ecf20Sopenharmony_ci
15488c2ecf20Sopenharmony_ci	free_credit_return(dd);
15498c2ecf20Sopenharmony_ci
15508c2ecf20Sopenharmony_ci	/*
15518c2ecf20Sopenharmony_ci	 * Free any resources still in use (usually just kernel contexts)
15528c2ecf20Sopenharmony_ci	 * at unload; we do for ctxtcnt, because that's what we allocate.
15538c2ecf20Sopenharmony_ci	 */
15548c2ecf20Sopenharmony_ci	for (ctxt = 0; dd->rcd && ctxt < dd->num_rcv_contexts; ctxt++) {
15558c2ecf20Sopenharmony_ci		struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
15568c2ecf20Sopenharmony_ci
15578c2ecf20Sopenharmony_ci		if (rcd) {
15588c2ecf20Sopenharmony_ci			hfi1_free_ctxt_rcv_groups(rcd);
15598c2ecf20Sopenharmony_ci			hfi1_free_ctxt(rcd);
15608c2ecf20Sopenharmony_ci		}
15618c2ecf20Sopenharmony_ci	}
15628c2ecf20Sopenharmony_ci
15638c2ecf20Sopenharmony_ci	kfree(dd->rcd);
15648c2ecf20Sopenharmony_ci	dd->rcd = NULL;
15658c2ecf20Sopenharmony_ci
15668c2ecf20Sopenharmony_ci	free_pio_map(dd);
15678c2ecf20Sopenharmony_ci	/* must follow rcv context free - need to remove rcv's hooks */
15688c2ecf20Sopenharmony_ci	for (ctxt = 0; ctxt < dd->num_send_contexts; ctxt++)
15698c2ecf20Sopenharmony_ci		sc_free(dd->send_contexts[ctxt].sc);
15708c2ecf20Sopenharmony_ci	dd->num_send_contexts = 0;
15718c2ecf20Sopenharmony_ci	kfree(dd->send_contexts);
15728c2ecf20Sopenharmony_ci	dd->send_contexts = NULL;
15738c2ecf20Sopenharmony_ci	kfree(dd->hw_to_sw);
15748c2ecf20Sopenharmony_ci	dd->hw_to_sw = NULL;
15758c2ecf20Sopenharmony_ci	kfree(dd->boardname);
15768c2ecf20Sopenharmony_ci	vfree(dd->events);
15778c2ecf20Sopenharmony_ci	vfree(dd->status);
15788c2ecf20Sopenharmony_ci}
15798c2ecf20Sopenharmony_ci
15808c2ecf20Sopenharmony_ci/*
15818c2ecf20Sopenharmony_ci * Clean up on unit shutdown, or error during unit load after
15828c2ecf20Sopenharmony_ci * successful initialization.
15838c2ecf20Sopenharmony_ci */
15848c2ecf20Sopenharmony_cistatic void postinit_cleanup(struct hfi1_devdata *dd)
15858c2ecf20Sopenharmony_ci{
15868c2ecf20Sopenharmony_ci	hfi1_start_cleanup(dd);
15878c2ecf20Sopenharmony_ci	hfi1_comp_vectors_clean_up(dd);
15888c2ecf20Sopenharmony_ci	hfi1_dev_affinity_clean_up(dd);
15898c2ecf20Sopenharmony_ci
15908c2ecf20Sopenharmony_ci	hfi1_pcie_ddcleanup(dd);
15918c2ecf20Sopenharmony_ci	hfi1_pcie_cleanup(dd->pcidev);
15928c2ecf20Sopenharmony_ci
15938c2ecf20Sopenharmony_ci	cleanup_device_data(dd);
15948c2ecf20Sopenharmony_ci
15958c2ecf20Sopenharmony_ci	hfi1_free_devdata(dd);
15968c2ecf20Sopenharmony_ci}
15978c2ecf20Sopenharmony_ci
15988c2ecf20Sopenharmony_cistatic int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
15998c2ecf20Sopenharmony_ci{
16008c2ecf20Sopenharmony_ci	int ret = 0, j, pidx, initfail;
16018c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd;
16028c2ecf20Sopenharmony_ci	struct hfi1_pportdata *ppd;
16038c2ecf20Sopenharmony_ci
16048c2ecf20Sopenharmony_ci	/* First, lock the non-writable module parameters */
16058c2ecf20Sopenharmony_ci	HFI1_CAP_LOCK();
16068c2ecf20Sopenharmony_ci
16078c2ecf20Sopenharmony_ci	/* Validate dev ids */
16088c2ecf20Sopenharmony_ci	if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
16098c2ecf20Sopenharmony_ci	      ent->device == PCI_DEVICE_ID_INTEL1)) {
16108c2ecf20Sopenharmony_ci		dev_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n",
16118c2ecf20Sopenharmony_ci			ent->device);
16128c2ecf20Sopenharmony_ci		ret = -ENODEV;
16138c2ecf20Sopenharmony_ci		goto bail;
16148c2ecf20Sopenharmony_ci	}
16158c2ecf20Sopenharmony_ci
16168c2ecf20Sopenharmony_ci	/* Allocate the dd so we can get to work */
16178c2ecf20Sopenharmony_ci	dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
16188c2ecf20Sopenharmony_ci				sizeof(struct hfi1_pportdata));
16198c2ecf20Sopenharmony_ci	if (IS_ERR(dd)) {
16208c2ecf20Sopenharmony_ci		ret = PTR_ERR(dd);
16218c2ecf20Sopenharmony_ci		goto bail;
16228c2ecf20Sopenharmony_ci	}
16238c2ecf20Sopenharmony_ci
16248c2ecf20Sopenharmony_ci	/* Validate some global module parameters */
16258c2ecf20Sopenharmony_ci	ret = hfi1_validate_rcvhdrcnt(dd, rcvhdrcnt);
16268c2ecf20Sopenharmony_ci	if (ret)
16278c2ecf20Sopenharmony_ci		goto bail;
16288c2ecf20Sopenharmony_ci
16298c2ecf20Sopenharmony_ci	/* use the encoding function as a sanitization check */
16308c2ecf20Sopenharmony_ci	if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
16318c2ecf20Sopenharmony_ci		dd_dev_err(dd, "Invalid HdrQ Entry size %u\n",
16328c2ecf20Sopenharmony_ci			   hfi1_hdrq_entsize);
16338c2ecf20Sopenharmony_ci		ret = -EINVAL;
16348c2ecf20Sopenharmony_ci		goto bail;
16358c2ecf20Sopenharmony_ci	}
16368c2ecf20Sopenharmony_ci
16378c2ecf20Sopenharmony_ci	/* The receive eager buffer size must be set before the receive
16388c2ecf20Sopenharmony_ci	 * contexts are created.
16398c2ecf20Sopenharmony_ci	 *
16408c2ecf20Sopenharmony_ci	 * Set the eager buffer size.  Validate that it falls in a range
16418c2ecf20Sopenharmony_ci	 * allowed by the hardware - all powers of 2 between the min and
16428c2ecf20Sopenharmony_ci	 * max.  The maximum valid MTU is within the eager buffer range
16438c2ecf20Sopenharmony_ci	 * so we do not need to cap the max_mtu by an eager buffer size
16448c2ecf20Sopenharmony_ci	 * setting.
16458c2ecf20Sopenharmony_ci	 */
16468c2ecf20Sopenharmony_ci	if (eager_buffer_size) {
16478c2ecf20Sopenharmony_ci		if (!is_power_of_2(eager_buffer_size))
16488c2ecf20Sopenharmony_ci			eager_buffer_size =
16498c2ecf20Sopenharmony_ci				roundup_pow_of_two(eager_buffer_size);
16508c2ecf20Sopenharmony_ci		eager_buffer_size =
16518c2ecf20Sopenharmony_ci			clamp_val(eager_buffer_size,
16528c2ecf20Sopenharmony_ci				  MIN_EAGER_BUFFER * 8,
16538c2ecf20Sopenharmony_ci				  MAX_EAGER_BUFFER_TOTAL);
16548c2ecf20Sopenharmony_ci		dd_dev_info(dd, "Eager buffer size %u\n",
16558c2ecf20Sopenharmony_ci			    eager_buffer_size);
16568c2ecf20Sopenharmony_ci	} else {
16578c2ecf20Sopenharmony_ci		dd_dev_err(dd, "Invalid Eager buffer size of 0\n");
16588c2ecf20Sopenharmony_ci		ret = -EINVAL;
16598c2ecf20Sopenharmony_ci		goto bail;
16608c2ecf20Sopenharmony_ci	}
16618c2ecf20Sopenharmony_ci
16628c2ecf20Sopenharmony_ci	/* restrict value of hfi1_rcvarr_split */
16638c2ecf20Sopenharmony_ci	hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100);
16648c2ecf20Sopenharmony_ci
16658c2ecf20Sopenharmony_ci	ret = hfi1_pcie_init(dd);
16668c2ecf20Sopenharmony_ci	if (ret)
16678c2ecf20Sopenharmony_ci		goto bail;
16688c2ecf20Sopenharmony_ci
16698c2ecf20Sopenharmony_ci	/*
16708c2ecf20Sopenharmony_ci	 * Do device-specific initialization, function table setup, dd
16718c2ecf20Sopenharmony_ci	 * allocation, etc.
16728c2ecf20Sopenharmony_ci	 */
16738c2ecf20Sopenharmony_ci	ret = hfi1_init_dd(dd);
16748c2ecf20Sopenharmony_ci	if (ret)
16758c2ecf20Sopenharmony_ci		goto clean_bail; /* error already printed */
16768c2ecf20Sopenharmony_ci
16778c2ecf20Sopenharmony_ci	ret = create_workqueues(dd);
16788c2ecf20Sopenharmony_ci	if (ret)
16798c2ecf20Sopenharmony_ci		goto clean_bail;
16808c2ecf20Sopenharmony_ci
16818c2ecf20Sopenharmony_ci	/* do the generic initialization */
16828c2ecf20Sopenharmony_ci	initfail = hfi1_init(dd, 0);
16838c2ecf20Sopenharmony_ci
16848c2ecf20Sopenharmony_ci	ret = hfi1_register_ib_device(dd);
16858c2ecf20Sopenharmony_ci
16868c2ecf20Sopenharmony_ci	/*
16878c2ecf20Sopenharmony_ci	 * Now ready for use.  this should be cleared whenever we
16888c2ecf20Sopenharmony_ci	 * detect a reset, or initiate one.  If earlier failure,
16898c2ecf20Sopenharmony_ci	 * we still create devices, so diags, etc. can be used
16908c2ecf20Sopenharmony_ci	 * to determine cause of problem.
16918c2ecf20Sopenharmony_ci	 */
16928c2ecf20Sopenharmony_ci	if (!initfail && !ret) {
16938c2ecf20Sopenharmony_ci		dd->flags |= HFI1_INITTED;
16948c2ecf20Sopenharmony_ci		/* create debufs files after init and ib register */
16958c2ecf20Sopenharmony_ci		hfi1_dbg_ibdev_init(&dd->verbs_dev);
16968c2ecf20Sopenharmony_ci	}
16978c2ecf20Sopenharmony_ci
16988c2ecf20Sopenharmony_ci	j = hfi1_device_create(dd);
16998c2ecf20Sopenharmony_ci	if (j)
17008c2ecf20Sopenharmony_ci		dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j);
17018c2ecf20Sopenharmony_ci
17028c2ecf20Sopenharmony_ci	if (initfail || ret) {
17038c2ecf20Sopenharmony_ci		msix_clean_up_interrupts(dd);
17048c2ecf20Sopenharmony_ci		stop_timers(dd);
17058c2ecf20Sopenharmony_ci		flush_workqueue(ib_wq);
17068c2ecf20Sopenharmony_ci		for (pidx = 0; pidx < dd->num_pports; ++pidx) {
17078c2ecf20Sopenharmony_ci			hfi1_quiet_serdes(dd->pport + pidx);
17088c2ecf20Sopenharmony_ci			ppd = dd->pport + pidx;
17098c2ecf20Sopenharmony_ci			if (ppd->hfi1_wq) {
17108c2ecf20Sopenharmony_ci				destroy_workqueue(ppd->hfi1_wq);
17118c2ecf20Sopenharmony_ci				ppd->hfi1_wq = NULL;
17128c2ecf20Sopenharmony_ci			}
17138c2ecf20Sopenharmony_ci			if (ppd->link_wq) {
17148c2ecf20Sopenharmony_ci				destroy_workqueue(ppd->link_wq);
17158c2ecf20Sopenharmony_ci				ppd->link_wq = NULL;
17168c2ecf20Sopenharmony_ci			}
17178c2ecf20Sopenharmony_ci		}
17188c2ecf20Sopenharmony_ci		if (!j)
17198c2ecf20Sopenharmony_ci			hfi1_device_remove(dd);
17208c2ecf20Sopenharmony_ci		if (!ret)
17218c2ecf20Sopenharmony_ci			hfi1_unregister_ib_device(dd);
17228c2ecf20Sopenharmony_ci		postinit_cleanup(dd);
17238c2ecf20Sopenharmony_ci		if (initfail)
17248c2ecf20Sopenharmony_ci			ret = initfail;
17258c2ecf20Sopenharmony_ci		goto bail;	/* everything already cleaned */
17268c2ecf20Sopenharmony_ci	}
17278c2ecf20Sopenharmony_ci
17288c2ecf20Sopenharmony_ci	sdma_start(dd);
17298c2ecf20Sopenharmony_ci
17308c2ecf20Sopenharmony_ci	return 0;
17318c2ecf20Sopenharmony_ci
17328c2ecf20Sopenharmony_ciclean_bail:
17338c2ecf20Sopenharmony_ci	hfi1_pcie_cleanup(pdev);
17348c2ecf20Sopenharmony_cibail:
17358c2ecf20Sopenharmony_ci	return ret;
17368c2ecf20Sopenharmony_ci}
17378c2ecf20Sopenharmony_ci
17388c2ecf20Sopenharmony_cistatic void wait_for_clients(struct hfi1_devdata *dd)
17398c2ecf20Sopenharmony_ci{
17408c2ecf20Sopenharmony_ci	/*
17418c2ecf20Sopenharmony_ci	 * Remove the device init value and complete the device if there is
17428c2ecf20Sopenharmony_ci	 * no clients or wait for active clients to finish.
17438c2ecf20Sopenharmony_ci	 */
17448c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&dd->user_refcount))
17458c2ecf20Sopenharmony_ci		complete(&dd->user_comp);
17468c2ecf20Sopenharmony_ci
17478c2ecf20Sopenharmony_ci	wait_for_completion(&dd->user_comp);
17488c2ecf20Sopenharmony_ci}
17498c2ecf20Sopenharmony_ci
17508c2ecf20Sopenharmony_cistatic void remove_one(struct pci_dev *pdev)
17518c2ecf20Sopenharmony_ci{
17528c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = pci_get_drvdata(pdev);
17538c2ecf20Sopenharmony_ci
17548c2ecf20Sopenharmony_ci	/* close debugfs files before ib unregister */
17558c2ecf20Sopenharmony_ci	hfi1_dbg_ibdev_exit(&dd->verbs_dev);
17568c2ecf20Sopenharmony_ci
17578c2ecf20Sopenharmony_ci	/* remove the /dev hfi1 interface */
17588c2ecf20Sopenharmony_ci	hfi1_device_remove(dd);
17598c2ecf20Sopenharmony_ci
17608c2ecf20Sopenharmony_ci	/* wait for existing user space clients to finish */
17618c2ecf20Sopenharmony_ci	wait_for_clients(dd);
17628c2ecf20Sopenharmony_ci
17638c2ecf20Sopenharmony_ci	/* unregister from IB core */
17648c2ecf20Sopenharmony_ci	hfi1_unregister_ib_device(dd);
17658c2ecf20Sopenharmony_ci
17668c2ecf20Sopenharmony_ci	/* free netdev data */
17678c2ecf20Sopenharmony_ci	hfi1_netdev_free(dd);
17688c2ecf20Sopenharmony_ci
17698c2ecf20Sopenharmony_ci	/*
17708c2ecf20Sopenharmony_ci	 * Disable the IB link, disable interrupts on the device,
17718c2ecf20Sopenharmony_ci	 * clear dma engines, etc.
17728c2ecf20Sopenharmony_ci	 */
17738c2ecf20Sopenharmony_ci	shutdown_device(dd);
17748c2ecf20Sopenharmony_ci	destroy_workqueues(dd);
17758c2ecf20Sopenharmony_ci
17768c2ecf20Sopenharmony_ci	stop_timers(dd);
17778c2ecf20Sopenharmony_ci
17788c2ecf20Sopenharmony_ci	/* wait until all of our (qsfp) queue_work() calls complete */
17798c2ecf20Sopenharmony_ci	flush_workqueue(ib_wq);
17808c2ecf20Sopenharmony_ci
17818c2ecf20Sopenharmony_ci	postinit_cleanup(dd);
17828c2ecf20Sopenharmony_ci}
17838c2ecf20Sopenharmony_ci
17848c2ecf20Sopenharmony_cistatic void shutdown_one(struct pci_dev *pdev)
17858c2ecf20Sopenharmony_ci{
17868c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = pci_get_drvdata(pdev);
17878c2ecf20Sopenharmony_ci
17888c2ecf20Sopenharmony_ci	shutdown_device(dd);
17898c2ecf20Sopenharmony_ci}
17908c2ecf20Sopenharmony_ci
17918c2ecf20Sopenharmony_ci/**
17928c2ecf20Sopenharmony_ci * hfi1_create_rcvhdrq - create a receive header queue
17938c2ecf20Sopenharmony_ci * @dd: the hfi1_ib device
17948c2ecf20Sopenharmony_ci * @rcd: the context data
17958c2ecf20Sopenharmony_ci *
17968c2ecf20Sopenharmony_ci * This must be contiguous memory (from an i/o perspective), and must be
17978c2ecf20Sopenharmony_ci * DMA'able (which means for some systems, it will go through an IOMMU,
17988c2ecf20Sopenharmony_ci * or be forced into a low address range).
17998c2ecf20Sopenharmony_ci */
18008c2ecf20Sopenharmony_ciint hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
18018c2ecf20Sopenharmony_ci{
18028c2ecf20Sopenharmony_ci	unsigned amt;
18038c2ecf20Sopenharmony_ci
18048c2ecf20Sopenharmony_ci	if (!rcd->rcvhdrq) {
18058c2ecf20Sopenharmony_ci		gfp_t gfp_flags;
18068c2ecf20Sopenharmony_ci
18078c2ecf20Sopenharmony_ci		amt = rcvhdrq_size(rcd);
18088c2ecf20Sopenharmony_ci
18098c2ecf20Sopenharmony_ci		if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
18108c2ecf20Sopenharmony_ci			gfp_flags = GFP_KERNEL;
18118c2ecf20Sopenharmony_ci		else
18128c2ecf20Sopenharmony_ci			gfp_flags = GFP_USER;
18138c2ecf20Sopenharmony_ci		rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt,
18148c2ecf20Sopenharmony_ci						  &rcd->rcvhdrq_dma,
18158c2ecf20Sopenharmony_ci						  gfp_flags | __GFP_COMP);
18168c2ecf20Sopenharmony_ci
18178c2ecf20Sopenharmony_ci		if (!rcd->rcvhdrq) {
18188c2ecf20Sopenharmony_ci			dd_dev_err(dd,
18198c2ecf20Sopenharmony_ci				   "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n",
18208c2ecf20Sopenharmony_ci				   amt, rcd->ctxt);
18218c2ecf20Sopenharmony_ci			goto bail;
18228c2ecf20Sopenharmony_ci		}
18238c2ecf20Sopenharmony_ci
18248c2ecf20Sopenharmony_ci		if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
18258c2ecf20Sopenharmony_ci		    HFI1_CAP_UGET_MASK(rcd->flags, DMA_RTAIL)) {
18268c2ecf20Sopenharmony_ci			rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
18278c2ecf20Sopenharmony_ci								    PAGE_SIZE,
18288c2ecf20Sopenharmony_ci								    &rcd->rcvhdrqtailaddr_dma,
18298c2ecf20Sopenharmony_ci								    gfp_flags);
18308c2ecf20Sopenharmony_ci			if (!rcd->rcvhdrtail_kvaddr)
18318c2ecf20Sopenharmony_ci				goto bail_free;
18328c2ecf20Sopenharmony_ci		}
18338c2ecf20Sopenharmony_ci	}
18348c2ecf20Sopenharmony_ci
18358c2ecf20Sopenharmony_ci	set_hdrq_regs(rcd->dd, rcd->ctxt, rcd->rcvhdrqentsize,
18368c2ecf20Sopenharmony_ci		      rcd->rcvhdrq_cnt);
18378c2ecf20Sopenharmony_ci
18388c2ecf20Sopenharmony_ci	return 0;
18398c2ecf20Sopenharmony_ci
18408c2ecf20Sopenharmony_cibail_free:
18418c2ecf20Sopenharmony_ci	dd_dev_err(dd,
18428c2ecf20Sopenharmony_ci		   "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
18438c2ecf20Sopenharmony_ci		   rcd->ctxt);
18448c2ecf20Sopenharmony_ci	dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
18458c2ecf20Sopenharmony_ci			  rcd->rcvhdrq_dma);
18468c2ecf20Sopenharmony_ci	rcd->rcvhdrq = NULL;
18478c2ecf20Sopenharmony_cibail:
18488c2ecf20Sopenharmony_ci	return -ENOMEM;
18498c2ecf20Sopenharmony_ci}
18508c2ecf20Sopenharmony_ci
18518c2ecf20Sopenharmony_ci/**
18528c2ecf20Sopenharmony_ci * allocate eager buffers, both kernel and user contexts.
18538c2ecf20Sopenharmony_ci * @rcd: the context we are setting up.
18548c2ecf20Sopenharmony_ci *
18558c2ecf20Sopenharmony_ci * Allocate the eager TID buffers and program them into hip.
18568c2ecf20Sopenharmony_ci * They are no longer completely contiguous, we do multiple allocation
18578c2ecf20Sopenharmony_ci * calls.  Otherwise we get the OOM code involved, by asking for too
18588c2ecf20Sopenharmony_ci * much per call, with disastrous results on some kernels.
18598c2ecf20Sopenharmony_ci */
18608c2ecf20Sopenharmony_ciint hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
18618c2ecf20Sopenharmony_ci{
18628c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = rcd->dd;
18638c2ecf20Sopenharmony_ci	u32 max_entries, egrtop, alloced_bytes = 0;
18648c2ecf20Sopenharmony_ci	gfp_t gfp_flags;
18658c2ecf20Sopenharmony_ci	u16 order, idx = 0;
18668c2ecf20Sopenharmony_ci	int ret = 0;
18678c2ecf20Sopenharmony_ci	u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu);
18688c2ecf20Sopenharmony_ci
18698c2ecf20Sopenharmony_ci	/*
18708c2ecf20Sopenharmony_ci	 * GFP_USER, but without GFP_FS, so buffer cache can be
18718c2ecf20Sopenharmony_ci	 * coalesced (we hope); otherwise, even at order 4,
18728c2ecf20Sopenharmony_ci	 * heavy filesystem activity makes these fail, and we can
18738c2ecf20Sopenharmony_ci	 * use compound pages.
18748c2ecf20Sopenharmony_ci	 */
18758c2ecf20Sopenharmony_ci	gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
18768c2ecf20Sopenharmony_ci
18778c2ecf20Sopenharmony_ci	/*
18788c2ecf20Sopenharmony_ci	 * The minimum size of the eager buffers is a groups of MTU-sized
18798c2ecf20Sopenharmony_ci	 * buffers.
18808c2ecf20Sopenharmony_ci	 * The global eager_buffer_size parameter is checked against the
18818c2ecf20Sopenharmony_ci	 * theoretical lower limit of the value. Here, we check against the
18828c2ecf20Sopenharmony_ci	 * MTU.
18838c2ecf20Sopenharmony_ci	 */
18848c2ecf20Sopenharmony_ci	if (rcd->egrbufs.size < (round_mtu * dd->rcv_entries.group_size))
18858c2ecf20Sopenharmony_ci		rcd->egrbufs.size = round_mtu * dd->rcv_entries.group_size;
18868c2ecf20Sopenharmony_ci	/*
18878c2ecf20Sopenharmony_ci	 * If using one-pkt-per-egr-buffer, lower the eager buffer
18888c2ecf20Sopenharmony_ci	 * size to the max MTU (page-aligned).
18898c2ecf20Sopenharmony_ci	 */
18908c2ecf20Sopenharmony_ci	if (!HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR))
18918c2ecf20Sopenharmony_ci		rcd->egrbufs.rcvtid_size = round_mtu;
18928c2ecf20Sopenharmony_ci
18938c2ecf20Sopenharmony_ci	/*
18948c2ecf20Sopenharmony_ci	 * Eager buffers sizes of 1MB or less require smaller TID sizes
18958c2ecf20Sopenharmony_ci	 * to satisfy the "multiple of 8 RcvArray entries" requirement.
18968c2ecf20Sopenharmony_ci	 */
18978c2ecf20Sopenharmony_ci	if (rcd->egrbufs.size <= (1 << 20))
18988c2ecf20Sopenharmony_ci		rcd->egrbufs.rcvtid_size = max((unsigned long)round_mtu,
18998c2ecf20Sopenharmony_ci			rounddown_pow_of_two(rcd->egrbufs.size / 8));
19008c2ecf20Sopenharmony_ci
19018c2ecf20Sopenharmony_ci	while (alloced_bytes < rcd->egrbufs.size &&
19028c2ecf20Sopenharmony_ci	       rcd->egrbufs.alloced < rcd->egrbufs.count) {
19038c2ecf20Sopenharmony_ci		rcd->egrbufs.buffers[idx].addr =
19048c2ecf20Sopenharmony_ci			dma_alloc_coherent(&dd->pcidev->dev,
19058c2ecf20Sopenharmony_ci					   rcd->egrbufs.rcvtid_size,
19068c2ecf20Sopenharmony_ci					   &rcd->egrbufs.buffers[idx].dma,
19078c2ecf20Sopenharmony_ci					   gfp_flags);
19088c2ecf20Sopenharmony_ci		if (rcd->egrbufs.buffers[idx].addr) {
19098c2ecf20Sopenharmony_ci			rcd->egrbufs.buffers[idx].len =
19108c2ecf20Sopenharmony_ci				rcd->egrbufs.rcvtid_size;
19118c2ecf20Sopenharmony_ci			rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].addr =
19128c2ecf20Sopenharmony_ci				rcd->egrbufs.buffers[idx].addr;
19138c2ecf20Sopenharmony_ci			rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].dma =
19148c2ecf20Sopenharmony_ci				rcd->egrbufs.buffers[idx].dma;
19158c2ecf20Sopenharmony_ci			rcd->egrbufs.alloced++;
19168c2ecf20Sopenharmony_ci			alloced_bytes += rcd->egrbufs.rcvtid_size;
19178c2ecf20Sopenharmony_ci			idx++;
19188c2ecf20Sopenharmony_ci		} else {
19198c2ecf20Sopenharmony_ci			u32 new_size, i, j;
19208c2ecf20Sopenharmony_ci			u64 offset = 0;
19218c2ecf20Sopenharmony_ci
19228c2ecf20Sopenharmony_ci			/*
19238c2ecf20Sopenharmony_ci			 * Fail the eager buffer allocation if:
19248c2ecf20Sopenharmony_ci			 *   - we are already using the lowest acceptable size
19258c2ecf20Sopenharmony_ci			 *   - we are using one-pkt-per-egr-buffer (this implies
19268c2ecf20Sopenharmony_ci			 *     that we are accepting only one size)
19278c2ecf20Sopenharmony_ci			 */
19288c2ecf20Sopenharmony_ci			if (rcd->egrbufs.rcvtid_size == round_mtu ||
19298c2ecf20Sopenharmony_ci			    !HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) {
19308c2ecf20Sopenharmony_ci				dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n",
19318c2ecf20Sopenharmony_ci					   rcd->ctxt);
19328c2ecf20Sopenharmony_ci				ret = -ENOMEM;
19338c2ecf20Sopenharmony_ci				goto bail_rcvegrbuf_phys;
19348c2ecf20Sopenharmony_ci			}
19358c2ecf20Sopenharmony_ci
19368c2ecf20Sopenharmony_ci			new_size = rcd->egrbufs.rcvtid_size / 2;
19378c2ecf20Sopenharmony_ci
19388c2ecf20Sopenharmony_ci			/*
19398c2ecf20Sopenharmony_ci			 * If the first attempt to allocate memory failed, don't
19408c2ecf20Sopenharmony_ci			 * fail everything but continue with the next lower
19418c2ecf20Sopenharmony_ci			 * size.
19428c2ecf20Sopenharmony_ci			 */
19438c2ecf20Sopenharmony_ci			if (idx == 0) {
19448c2ecf20Sopenharmony_ci				rcd->egrbufs.rcvtid_size = new_size;
19458c2ecf20Sopenharmony_ci				continue;
19468c2ecf20Sopenharmony_ci			}
19478c2ecf20Sopenharmony_ci
19488c2ecf20Sopenharmony_ci			/*
19498c2ecf20Sopenharmony_ci			 * Re-partition already allocated buffers to a smaller
19508c2ecf20Sopenharmony_ci			 * size.
19518c2ecf20Sopenharmony_ci			 */
19528c2ecf20Sopenharmony_ci			rcd->egrbufs.alloced = 0;
19538c2ecf20Sopenharmony_ci			for (i = 0, j = 0, offset = 0; j < idx; i++) {
19548c2ecf20Sopenharmony_ci				if (i >= rcd->egrbufs.count)
19558c2ecf20Sopenharmony_ci					break;
19568c2ecf20Sopenharmony_ci				rcd->egrbufs.rcvtids[i].dma =
19578c2ecf20Sopenharmony_ci					rcd->egrbufs.buffers[j].dma + offset;
19588c2ecf20Sopenharmony_ci				rcd->egrbufs.rcvtids[i].addr =
19598c2ecf20Sopenharmony_ci					rcd->egrbufs.buffers[j].addr + offset;
19608c2ecf20Sopenharmony_ci				rcd->egrbufs.alloced++;
19618c2ecf20Sopenharmony_ci				if ((rcd->egrbufs.buffers[j].dma + offset +
19628c2ecf20Sopenharmony_ci				     new_size) ==
19638c2ecf20Sopenharmony_ci				    (rcd->egrbufs.buffers[j].dma +
19648c2ecf20Sopenharmony_ci				     rcd->egrbufs.buffers[j].len)) {
19658c2ecf20Sopenharmony_ci					j++;
19668c2ecf20Sopenharmony_ci					offset = 0;
19678c2ecf20Sopenharmony_ci				} else {
19688c2ecf20Sopenharmony_ci					offset += new_size;
19698c2ecf20Sopenharmony_ci				}
19708c2ecf20Sopenharmony_ci			}
19718c2ecf20Sopenharmony_ci			rcd->egrbufs.rcvtid_size = new_size;
19728c2ecf20Sopenharmony_ci		}
19738c2ecf20Sopenharmony_ci	}
19748c2ecf20Sopenharmony_ci	rcd->egrbufs.numbufs = idx;
19758c2ecf20Sopenharmony_ci	rcd->egrbufs.size = alloced_bytes;
19768c2ecf20Sopenharmony_ci
19778c2ecf20Sopenharmony_ci	hfi1_cdbg(PROC,
19788c2ecf20Sopenharmony_ci		  "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %uKB\n",
19798c2ecf20Sopenharmony_ci		  rcd->ctxt, rcd->egrbufs.alloced,
19808c2ecf20Sopenharmony_ci		  rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024);
19818c2ecf20Sopenharmony_ci
19828c2ecf20Sopenharmony_ci	/*
19838c2ecf20Sopenharmony_ci	 * Set the contexts rcv array head update threshold to the closest
19848c2ecf20Sopenharmony_ci	 * power of 2 (so we can use a mask instead of modulo) below half
19858c2ecf20Sopenharmony_ci	 * the allocated entries.
19868c2ecf20Sopenharmony_ci	 */
19878c2ecf20Sopenharmony_ci	rcd->egrbufs.threshold =
19888c2ecf20Sopenharmony_ci		rounddown_pow_of_two(rcd->egrbufs.alloced / 2);
19898c2ecf20Sopenharmony_ci	/*
19908c2ecf20Sopenharmony_ci	 * Compute the expected RcvArray entry base. This is done after
19918c2ecf20Sopenharmony_ci	 * allocating the eager buffers in order to maximize the
19928c2ecf20Sopenharmony_ci	 * expected RcvArray entries for the context.
19938c2ecf20Sopenharmony_ci	 */
19948c2ecf20Sopenharmony_ci	max_entries = rcd->rcv_array_groups * dd->rcv_entries.group_size;
19958c2ecf20Sopenharmony_ci	egrtop = roundup(rcd->egrbufs.alloced, dd->rcv_entries.group_size);
19968c2ecf20Sopenharmony_ci	rcd->expected_count = max_entries - egrtop;
19978c2ecf20Sopenharmony_ci	if (rcd->expected_count > MAX_TID_PAIR_ENTRIES * 2)
19988c2ecf20Sopenharmony_ci		rcd->expected_count = MAX_TID_PAIR_ENTRIES * 2;
19998c2ecf20Sopenharmony_ci
20008c2ecf20Sopenharmony_ci	rcd->expected_base = rcd->eager_base + egrtop;
20018c2ecf20Sopenharmony_ci	hfi1_cdbg(PROC, "ctxt%u: eager:%u, exp:%u, egrbase:%u, expbase:%u\n",
20028c2ecf20Sopenharmony_ci		  rcd->ctxt, rcd->egrbufs.alloced, rcd->expected_count,
20038c2ecf20Sopenharmony_ci		  rcd->eager_base, rcd->expected_base);
20048c2ecf20Sopenharmony_ci
20058c2ecf20Sopenharmony_ci	if (!hfi1_rcvbuf_validate(rcd->egrbufs.rcvtid_size, PT_EAGER, &order)) {
20068c2ecf20Sopenharmony_ci		hfi1_cdbg(PROC,
20078c2ecf20Sopenharmony_ci			  "ctxt%u: current Eager buffer size is invalid %u\n",
20088c2ecf20Sopenharmony_ci			  rcd->ctxt, rcd->egrbufs.rcvtid_size);
20098c2ecf20Sopenharmony_ci		ret = -EINVAL;
20108c2ecf20Sopenharmony_ci		goto bail_rcvegrbuf_phys;
20118c2ecf20Sopenharmony_ci	}
20128c2ecf20Sopenharmony_ci
20138c2ecf20Sopenharmony_ci	for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
20148c2ecf20Sopenharmony_ci		hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER,
20158c2ecf20Sopenharmony_ci			     rcd->egrbufs.rcvtids[idx].dma, order);
20168c2ecf20Sopenharmony_ci		cond_resched();
20178c2ecf20Sopenharmony_ci	}
20188c2ecf20Sopenharmony_ci
20198c2ecf20Sopenharmony_ci	return 0;
20208c2ecf20Sopenharmony_ci
20218c2ecf20Sopenharmony_cibail_rcvegrbuf_phys:
20228c2ecf20Sopenharmony_ci	for (idx = 0; idx < rcd->egrbufs.alloced &&
20238c2ecf20Sopenharmony_ci	     rcd->egrbufs.buffers[idx].addr;
20248c2ecf20Sopenharmony_ci	     idx++) {
20258c2ecf20Sopenharmony_ci		dma_free_coherent(&dd->pcidev->dev,
20268c2ecf20Sopenharmony_ci				  rcd->egrbufs.buffers[idx].len,
20278c2ecf20Sopenharmony_ci				  rcd->egrbufs.buffers[idx].addr,
20288c2ecf20Sopenharmony_ci				  rcd->egrbufs.buffers[idx].dma);
20298c2ecf20Sopenharmony_ci		rcd->egrbufs.buffers[idx].addr = NULL;
20308c2ecf20Sopenharmony_ci		rcd->egrbufs.buffers[idx].dma = 0;
20318c2ecf20Sopenharmony_ci		rcd->egrbufs.buffers[idx].len = 0;
20328c2ecf20Sopenharmony_ci	}
20338c2ecf20Sopenharmony_ci
20348c2ecf20Sopenharmony_ci	return ret;
20358c2ecf20Sopenharmony_ci}
2036