162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright(c) 2015 - 2020 Intel Corporation.
462306a36Sopenharmony_ci * Copyright(c) 2021 Cornelis Networks.
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci#include <linux/pci.h>
862306a36Sopenharmony_ci#include <linux/netdevice.h>
962306a36Sopenharmony_ci#include <linux/vmalloc.h>
1062306a36Sopenharmony_ci#include <linux/delay.h>
1162306a36Sopenharmony_ci#include <linux/xarray.h>
1262306a36Sopenharmony_ci#include <linux/module.h>
1362306a36Sopenharmony_ci#include <linux/printk.h>
1462306a36Sopenharmony_ci#include <linux/hrtimer.h>
1562306a36Sopenharmony_ci#include <linux/bitmap.h>
1662306a36Sopenharmony_ci#include <linux/numa.h>
1762306a36Sopenharmony_ci#include <rdma/rdma_vt.h>
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#include "hfi.h"
2062306a36Sopenharmony_ci#include "device.h"
2162306a36Sopenharmony_ci#include "common.h"
2262306a36Sopenharmony_ci#include "trace.h"
2362306a36Sopenharmony_ci#include "mad.h"
2462306a36Sopenharmony_ci#include "sdma.h"
2562306a36Sopenharmony_ci#include "debugfs.h"
2662306a36Sopenharmony_ci#include "verbs.h"
2762306a36Sopenharmony_ci#include "aspm.h"
2862306a36Sopenharmony_ci#include "affinity.h"
2962306a36Sopenharmony_ci#include "vnic.h"
3062306a36Sopenharmony_ci#include "exp_rcv.h"
3162306a36Sopenharmony_ci#include "netdev.h"
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci#undef pr_fmt
3462306a36Sopenharmony_ci#define pr_fmt(fmt) DRIVER_NAME ": " fmt
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci/*
3762306a36Sopenharmony_ci * min buffers we want to have per context, after driver
3862306a36Sopenharmony_ci */
3962306a36Sopenharmony_ci#define HFI1_MIN_USER_CTXT_BUFCNT 7
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
4262306a36Sopenharmony_ci#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci#define NUM_IB_PORTS 1
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci/*
4762306a36Sopenharmony_ci * Number of user receive contexts we are configured to use (to allow for more
4862306a36Sopenharmony_ci * pio buffers per ctxt, etc.)  Zero means use one user context per CPU.
4962306a36Sopenharmony_ci */
5062306a36Sopenharmony_ciint num_user_contexts = -1;
5162306a36Sopenharmony_cimodule_param_named(num_user_contexts, num_user_contexts, int, 0444);
5262306a36Sopenharmony_ciMODULE_PARM_DESC(
5362306a36Sopenharmony_ci	num_user_contexts, "Set max number of user contexts to use (default: -1 will use the real (non-HT) CPU count)");
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ciuint krcvqs[RXE_NUM_DATA_VL];
5662306a36Sopenharmony_ciint krcvqsset;
5762306a36Sopenharmony_cimodule_param_array(krcvqs, uint, &krcvqsset, S_IRUGO);
5862306a36Sopenharmony_ciMODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL");
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci/* computed based on above array */
6162306a36Sopenharmony_ciunsigned long n_krcvqs;
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_cistatic unsigned hfi1_rcvarr_split = 25;
6462306a36Sopenharmony_cimodule_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO);
6562306a36Sopenharmony_ciMODULE_PARM_DESC(rcvarr_split, "Percent of context's RcvArray entries used for Eager buffers");
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_cistatic uint eager_buffer_size = (8 << 20); /* 8MB */
6862306a36Sopenharmony_cimodule_param(eager_buffer_size, uint, S_IRUGO);
6962306a36Sopenharmony_ciMODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 8MB");
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_cistatic uint rcvhdrcnt = 2048; /* 2x the max eager buffer count */
7262306a36Sopenharmony_cimodule_param_named(rcvhdrcnt, rcvhdrcnt, uint, S_IRUGO);
7362306a36Sopenharmony_ciMODULE_PARM_DESC(rcvhdrcnt, "Receive header queue count (default 2048)");
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_cistatic uint hfi1_hdrq_entsize = 32;
7662306a36Sopenharmony_cimodule_param_named(hdrq_entsize, hfi1_hdrq_entsize, uint, 0444);
7762306a36Sopenharmony_ciMODULE_PARM_DESC(hdrq_entsize, "Size of header queue entries: 2 - 8B, 16 - 64B, 32 - 128B (default)");
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ciunsigned int user_credit_return_threshold = 33;	/* default is 33% */
8062306a36Sopenharmony_cimodule_param(user_credit_return_threshold, uint, S_IRUGO);
8162306a36Sopenharmony_ciMODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)");
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ciDEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_cistatic int hfi1_create_kctxt(struct hfi1_devdata *dd,
8662306a36Sopenharmony_ci			     struct hfi1_pportdata *ppd)
8762306a36Sopenharmony_ci{
8862306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd;
8962306a36Sopenharmony_ci	int ret;
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci	/* Control context has to be always 0 */
9262306a36Sopenharmony_ci	BUILD_BUG_ON(HFI1_CTRL_CTXT != 0);
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	ret = hfi1_create_ctxtdata(ppd, dd->node, &rcd);
9562306a36Sopenharmony_ci	if (ret < 0) {
9662306a36Sopenharmony_ci		dd_dev_err(dd, "Kernel receive context allocation failed\n");
9762306a36Sopenharmony_ci		return ret;
9862306a36Sopenharmony_ci	}
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	/*
10162306a36Sopenharmony_ci	 * Set up the kernel context flags here and now because they use
10262306a36Sopenharmony_ci	 * default values for all receive side memories.  User contexts will
10362306a36Sopenharmony_ci	 * be handled as they are created.
10462306a36Sopenharmony_ci	 */
10562306a36Sopenharmony_ci	rcd->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
10662306a36Sopenharmony_ci		HFI1_CAP_KGET(NODROP_RHQ_FULL) |
10762306a36Sopenharmony_ci		HFI1_CAP_KGET(NODROP_EGR_FULL) |
10862306a36Sopenharmony_ci		HFI1_CAP_KGET(DMA_RTAIL);
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	/* Control context must use DMA_RTAIL */
11162306a36Sopenharmony_ci	if (rcd->ctxt == HFI1_CTRL_CTXT)
11262306a36Sopenharmony_ci		rcd->flags |= HFI1_CAP_DMA_RTAIL;
11362306a36Sopenharmony_ci	rcd->fast_handler = get_dma_rtail_setting(rcd) ?
11462306a36Sopenharmony_ci				handle_receive_interrupt_dma_rtail :
11562306a36Sopenharmony_ci				handle_receive_interrupt_nodma_rtail;
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci	hfi1_set_seq_cnt(rcd, 1);
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci	rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);
12062306a36Sopenharmony_ci	if (!rcd->sc) {
12162306a36Sopenharmony_ci		dd_dev_err(dd, "Kernel send context allocation failed\n");
12262306a36Sopenharmony_ci		return -ENOMEM;
12362306a36Sopenharmony_ci	}
12462306a36Sopenharmony_ci	hfi1_init_ctxt(rcd->sc);
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	return 0;
12762306a36Sopenharmony_ci}
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci/*
13062306a36Sopenharmony_ci * Create the receive context array and one or more kernel contexts
13162306a36Sopenharmony_ci */
13262306a36Sopenharmony_ciint hfi1_create_kctxts(struct hfi1_devdata *dd)
13362306a36Sopenharmony_ci{
13462306a36Sopenharmony_ci	u16 i;
13562306a36Sopenharmony_ci	int ret;
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	dd->rcd = kcalloc_node(dd->num_rcv_contexts, sizeof(*dd->rcd),
13862306a36Sopenharmony_ci			       GFP_KERNEL, dd->node);
13962306a36Sopenharmony_ci	if (!dd->rcd)
14062306a36Sopenharmony_ci		return -ENOMEM;
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci	for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) {
14362306a36Sopenharmony_ci		ret = hfi1_create_kctxt(dd, dd->pport);
14462306a36Sopenharmony_ci		if (ret)
14562306a36Sopenharmony_ci			goto bail;
14662306a36Sopenharmony_ci	}
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	return 0;
14962306a36Sopenharmony_cibail:
15062306a36Sopenharmony_ci	for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i)
15162306a36Sopenharmony_ci		hfi1_free_ctxt(dd->rcd[i]);
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci	/* All the contexts should be freed, free the array */
15462306a36Sopenharmony_ci	kfree(dd->rcd);
15562306a36Sopenharmony_ci	dd->rcd = NULL;
15662306a36Sopenharmony_ci	return ret;
15762306a36Sopenharmony_ci}
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci/*
16062306a36Sopenharmony_ci * Helper routines for the receive context reference count (rcd and uctxt).
16162306a36Sopenharmony_ci */
16262306a36Sopenharmony_cistatic void hfi1_rcd_init(struct hfi1_ctxtdata *rcd)
16362306a36Sopenharmony_ci{
16462306a36Sopenharmony_ci	kref_init(&rcd->kref);
16562306a36Sopenharmony_ci}
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci/**
16862306a36Sopenharmony_ci * hfi1_rcd_free - When reference is zero clean up.
16962306a36Sopenharmony_ci * @kref: pointer to an initialized rcd data structure
17062306a36Sopenharmony_ci *
17162306a36Sopenharmony_ci */
17262306a36Sopenharmony_cistatic void hfi1_rcd_free(struct kref *kref)
17362306a36Sopenharmony_ci{
17462306a36Sopenharmony_ci	unsigned long flags;
17562306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd =
17662306a36Sopenharmony_ci		container_of(kref, struct hfi1_ctxtdata, kref);
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	spin_lock_irqsave(&rcd->dd->uctxt_lock, flags);
17962306a36Sopenharmony_ci	rcd->dd->rcd[rcd->ctxt] = NULL;
18062306a36Sopenharmony_ci	spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags);
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	hfi1_free_ctxtdata(rcd->dd, rcd);
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	kfree(rcd);
18562306a36Sopenharmony_ci}
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci/**
18862306a36Sopenharmony_ci * hfi1_rcd_put - decrement reference for rcd
18962306a36Sopenharmony_ci * @rcd: pointer to an initialized rcd data structure
19062306a36Sopenharmony_ci *
19162306a36Sopenharmony_ci * Use this to put a reference after the init.
19262306a36Sopenharmony_ci */
19362306a36Sopenharmony_ciint hfi1_rcd_put(struct hfi1_ctxtdata *rcd)
19462306a36Sopenharmony_ci{
19562306a36Sopenharmony_ci	if (rcd)
19662306a36Sopenharmony_ci		return kref_put(&rcd->kref, hfi1_rcd_free);
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	return 0;
19962306a36Sopenharmony_ci}
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci/**
20262306a36Sopenharmony_ci * hfi1_rcd_get - increment reference for rcd
20362306a36Sopenharmony_ci * @rcd: pointer to an initialized rcd data structure
20462306a36Sopenharmony_ci *
20562306a36Sopenharmony_ci * Use this to get a reference after the init.
20662306a36Sopenharmony_ci *
20762306a36Sopenharmony_ci * Return : reflect kref_get_unless_zero(), which returns non-zero on
20862306a36Sopenharmony_ci * increment, otherwise 0.
20962306a36Sopenharmony_ci */
21062306a36Sopenharmony_ciint hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
21162306a36Sopenharmony_ci{
21262306a36Sopenharmony_ci	return kref_get_unless_zero(&rcd->kref);
21362306a36Sopenharmony_ci}
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci/**
21662306a36Sopenharmony_ci * allocate_rcd_index - allocate an rcd index from the rcd array
21762306a36Sopenharmony_ci * @dd: pointer to a valid devdata structure
21862306a36Sopenharmony_ci * @rcd: rcd data structure to assign
21962306a36Sopenharmony_ci * @index: pointer to index that is allocated
22062306a36Sopenharmony_ci *
22162306a36Sopenharmony_ci * Find an empty index in the rcd array, and assign the given rcd to it.
22262306a36Sopenharmony_ci * If the array is full, we are EBUSY.
22362306a36Sopenharmony_ci *
22462306a36Sopenharmony_ci */
22562306a36Sopenharmony_cistatic int allocate_rcd_index(struct hfi1_devdata *dd,
22662306a36Sopenharmony_ci			      struct hfi1_ctxtdata *rcd, u16 *index)
22762306a36Sopenharmony_ci{
22862306a36Sopenharmony_ci	unsigned long flags;
22962306a36Sopenharmony_ci	u16 ctxt;
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci	spin_lock_irqsave(&dd->uctxt_lock, flags);
23262306a36Sopenharmony_ci	for (ctxt = 0; ctxt < dd->num_rcv_contexts; ctxt++)
23362306a36Sopenharmony_ci		if (!dd->rcd[ctxt])
23462306a36Sopenharmony_ci			break;
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	if (ctxt < dd->num_rcv_contexts) {
23762306a36Sopenharmony_ci		rcd->ctxt = ctxt;
23862306a36Sopenharmony_ci		dd->rcd[ctxt] = rcd;
23962306a36Sopenharmony_ci		hfi1_rcd_init(rcd);
24062306a36Sopenharmony_ci	}
24162306a36Sopenharmony_ci	spin_unlock_irqrestore(&dd->uctxt_lock, flags);
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	if (ctxt >= dd->num_rcv_contexts)
24462306a36Sopenharmony_ci		return -EBUSY;
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci	*index = ctxt;
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci	return 0;
24962306a36Sopenharmony_ci}
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci/**
25262306a36Sopenharmony_ci * hfi1_rcd_get_by_index_safe - validate the ctxt index before accessing the
25362306a36Sopenharmony_ci * array
25462306a36Sopenharmony_ci * @dd: pointer to a valid devdata structure
25562306a36Sopenharmony_ci * @ctxt: the index of an possilbe rcd
25662306a36Sopenharmony_ci *
25762306a36Sopenharmony_ci * This is a wrapper for hfi1_rcd_get_by_index() to validate that the given
25862306a36Sopenharmony_ci * ctxt index is valid.
25962306a36Sopenharmony_ci *
26062306a36Sopenharmony_ci * The caller is responsible for making the _put().
26162306a36Sopenharmony_ci *
26262306a36Sopenharmony_ci */
26362306a36Sopenharmony_cistruct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd,
26462306a36Sopenharmony_ci						 u16 ctxt)
26562306a36Sopenharmony_ci{
26662306a36Sopenharmony_ci	if (ctxt < dd->num_rcv_contexts)
26762306a36Sopenharmony_ci		return hfi1_rcd_get_by_index(dd, ctxt);
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	return NULL;
27062306a36Sopenharmony_ci}
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci/**
27362306a36Sopenharmony_ci * hfi1_rcd_get_by_index - get by index
27462306a36Sopenharmony_ci * @dd: pointer to a valid devdata structure
27562306a36Sopenharmony_ci * @ctxt: the index of an possilbe rcd
27662306a36Sopenharmony_ci *
27762306a36Sopenharmony_ci * We need to protect access to the rcd array.  If access is needed to
27862306a36Sopenharmony_ci * one or more index, get the protecting spinlock and then increment the
27962306a36Sopenharmony_ci * kref.
28062306a36Sopenharmony_ci *
28162306a36Sopenharmony_ci * The caller is responsible for making the _put().
28262306a36Sopenharmony_ci *
28362306a36Sopenharmony_ci */
28462306a36Sopenharmony_cistruct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt)
28562306a36Sopenharmony_ci{
28662306a36Sopenharmony_ci	unsigned long flags;
28762306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd = NULL;
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	spin_lock_irqsave(&dd->uctxt_lock, flags);
29062306a36Sopenharmony_ci	if (dd->rcd[ctxt]) {
29162306a36Sopenharmony_ci		rcd = dd->rcd[ctxt];
29262306a36Sopenharmony_ci		if (!hfi1_rcd_get(rcd))
29362306a36Sopenharmony_ci			rcd = NULL;
29462306a36Sopenharmony_ci	}
29562306a36Sopenharmony_ci	spin_unlock_irqrestore(&dd->uctxt_lock, flags);
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	return rcd;
29862306a36Sopenharmony_ci}
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci/*
30162306a36Sopenharmony_ci * Common code for user and kernel context create and setup.
30262306a36Sopenharmony_ci * NOTE: the initial kref is done here (hf1_rcd_init()).
30362306a36Sopenharmony_ci */
30462306a36Sopenharmony_ciint hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
30562306a36Sopenharmony_ci			 struct hfi1_ctxtdata **context)
30662306a36Sopenharmony_ci{
30762306a36Sopenharmony_ci	struct hfi1_devdata *dd = ppd->dd;
30862306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd;
30962306a36Sopenharmony_ci	unsigned kctxt_ngroups = 0;
31062306a36Sopenharmony_ci	u32 base;
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ci	if (dd->rcv_entries.nctxt_extra >
31362306a36Sopenharmony_ci	    dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt)
31462306a36Sopenharmony_ci		kctxt_ngroups = (dd->rcv_entries.nctxt_extra -
31562306a36Sopenharmony_ci			 (dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt));
31662306a36Sopenharmony_ci	rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa);
31762306a36Sopenharmony_ci	if (rcd) {
31862306a36Sopenharmony_ci		u32 rcvtids, max_entries;
31962306a36Sopenharmony_ci		u16 ctxt;
32062306a36Sopenharmony_ci		int ret;
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci		ret = allocate_rcd_index(dd, rcd, &ctxt);
32362306a36Sopenharmony_ci		if (ret) {
32462306a36Sopenharmony_ci			*context = NULL;
32562306a36Sopenharmony_ci			kfree(rcd);
32662306a36Sopenharmony_ci			return ret;
32762306a36Sopenharmony_ci		}
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_ci		INIT_LIST_HEAD(&rcd->qp_wait_list);
33062306a36Sopenharmony_ci		hfi1_exp_tid_group_init(rcd);
33162306a36Sopenharmony_ci		rcd->ppd = ppd;
33262306a36Sopenharmony_ci		rcd->dd = dd;
33362306a36Sopenharmony_ci		rcd->numa_id = numa;
33462306a36Sopenharmony_ci		rcd->rcv_array_groups = dd->rcv_entries.ngroups;
33562306a36Sopenharmony_ci		rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
33662306a36Sopenharmony_ci		rcd->slow_handler = handle_receive_interrupt;
33762306a36Sopenharmony_ci		rcd->do_interrupt = rcd->slow_handler;
33862306a36Sopenharmony_ci		rcd->msix_intr = CCE_NUM_MSIX_VECTORS;
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci		mutex_init(&rcd->exp_mutex);
34162306a36Sopenharmony_ci		spin_lock_init(&rcd->exp_lock);
34262306a36Sopenharmony_ci		INIT_LIST_HEAD(&rcd->flow_queue.queue_head);
34362306a36Sopenharmony_ci		INIT_LIST_HEAD(&rcd->rarr_queue.queue_head);
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_ci		hfi1_cdbg(PROC, "setting up context %u", rcd->ctxt);
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_ci		/*
34862306a36Sopenharmony_ci		 * Calculate the context's RcvArray entry starting point.
34962306a36Sopenharmony_ci		 * We do this here because we have to take into account all
35062306a36Sopenharmony_ci		 * the RcvArray entries that previous context would have
35162306a36Sopenharmony_ci		 * taken and we have to account for any extra groups assigned
35262306a36Sopenharmony_ci		 * to the static (kernel) or dynamic (vnic/user) contexts.
35362306a36Sopenharmony_ci		 */
35462306a36Sopenharmony_ci		if (ctxt < dd->first_dyn_alloc_ctxt) {
35562306a36Sopenharmony_ci			if (ctxt < kctxt_ngroups) {
35662306a36Sopenharmony_ci				base = ctxt * (dd->rcv_entries.ngroups + 1);
35762306a36Sopenharmony_ci				rcd->rcv_array_groups++;
35862306a36Sopenharmony_ci			} else {
35962306a36Sopenharmony_ci				base = kctxt_ngroups +
36062306a36Sopenharmony_ci					(ctxt * dd->rcv_entries.ngroups);
36162306a36Sopenharmony_ci			}
36262306a36Sopenharmony_ci		} else {
36362306a36Sopenharmony_ci			u16 ct = ctxt - dd->first_dyn_alloc_ctxt;
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_ci			base = ((dd->n_krcv_queues * dd->rcv_entries.ngroups) +
36662306a36Sopenharmony_ci				kctxt_ngroups);
36762306a36Sopenharmony_ci			if (ct < dd->rcv_entries.nctxt_extra) {
36862306a36Sopenharmony_ci				base += ct * (dd->rcv_entries.ngroups + 1);
36962306a36Sopenharmony_ci				rcd->rcv_array_groups++;
37062306a36Sopenharmony_ci			} else {
37162306a36Sopenharmony_ci				base += dd->rcv_entries.nctxt_extra +
37262306a36Sopenharmony_ci					(ct * dd->rcv_entries.ngroups);
37362306a36Sopenharmony_ci			}
37462306a36Sopenharmony_ci		}
37562306a36Sopenharmony_ci		rcd->eager_base = base * dd->rcv_entries.group_size;
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci		rcd->rcvhdrq_cnt = rcvhdrcnt;
37862306a36Sopenharmony_ci		rcd->rcvhdrqentsize = hfi1_hdrq_entsize;
37962306a36Sopenharmony_ci		rcd->rhf_offset =
38062306a36Sopenharmony_ci			rcd->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
38162306a36Sopenharmony_ci		/*
38262306a36Sopenharmony_ci		 * Simple Eager buffer allocation: we have already pre-allocated
38362306a36Sopenharmony_ci		 * the number of RcvArray entry groups. Each ctxtdata structure
38462306a36Sopenharmony_ci		 * holds the number of groups for that context.
38562306a36Sopenharmony_ci		 *
38662306a36Sopenharmony_ci		 * To follow CSR requirements and maintain cacheline alignment,
38762306a36Sopenharmony_ci		 * make sure all sizes and bases are multiples of group_size.
38862306a36Sopenharmony_ci		 *
38962306a36Sopenharmony_ci		 * The expected entry count is what is left after assigning
39062306a36Sopenharmony_ci		 * eager.
39162306a36Sopenharmony_ci		 */
39262306a36Sopenharmony_ci		max_entries = rcd->rcv_array_groups *
39362306a36Sopenharmony_ci			dd->rcv_entries.group_size;
39462306a36Sopenharmony_ci		rcvtids = ((max_entries * hfi1_rcvarr_split) / 100);
39562306a36Sopenharmony_ci		rcd->egrbufs.count = round_down(rcvtids,
39662306a36Sopenharmony_ci						dd->rcv_entries.group_size);
39762306a36Sopenharmony_ci		if (rcd->egrbufs.count > MAX_EAGER_ENTRIES) {
39862306a36Sopenharmony_ci			dd_dev_err(dd, "ctxt%u: requested too many RcvArray entries.\n",
39962306a36Sopenharmony_ci				   rcd->ctxt);
40062306a36Sopenharmony_ci			rcd->egrbufs.count = MAX_EAGER_ENTRIES;
40162306a36Sopenharmony_ci		}
40262306a36Sopenharmony_ci		hfi1_cdbg(PROC,
40362306a36Sopenharmony_ci			  "ctxt%u: max Eager buffer RcvArray entries: %u",
40462306a36Sopenharmony_ci			  rcd->ctxt, rcd->egrbufs.count);
40562306a36Sopenharmony_ci
40662306a36Sopenharmony_ci		/*
40762306a36Sopenharmony_ci		 * Allocate array that will hold the eager buffer accounting
40862306a36Sopenharmony_ci		 * data.
40962306a36Sopenharmony_ci		 * This will allocate the maximum possible buffer count based
41062306a36Sopenharmony_ci		 * on the value of the RcvArray split parameter.
41162306a36Sopenharmony_ci		 * The resulting value will be rounded down to the closest
41262306a36Sopenharmony_ci		 * multiple of dd->rcv_entries.group_size.
41362306a36Sopenharmony_ci		 */
41462306a36Sopenharmony_ci		rcd->egrbufs.buffers =
41562306a36Sopenharmony_ci			kcalloc_node(rcd->egrbufs.count,
41662306a36Sopenharmony_ci				     sizeof(*rcd->egrbufs.buffers),
41762306a36Sopenharmony_ci				     GFP_KERNEL, numa);
41862306a36Sopenharmony_ci		if (!rcd->egrbufs.buffers)
41962306a36Sopenharmony_ci			goto bail;
42062306a36Sopenharmony_ci		rcd->egrbufs.rcvtids =
42162306a36Sopenharmony_ci			kcalloc_node(rcd->egrbufs.count,
42262306a36Sopenharmony_ci				     sizeof(*rcd->egrbufs.rcvtids),
42362306a36Sopenharmony_ci				     GFP_KERNEL, numa);
42462306a36Sopenharmony_ci		if (!rcd->egrbufs.rcvtids)
42562306a36Sopenharmony_ci			goto bail;
42662306a36Sopenharmony_ci		rcd->egrbufs.size = eager_buffer_size;
42762306a36Sopenharmony_ci		/*
42862306a36Sopenharmony_ci		 * The size of the buffers programmed into the RcvArray
42962306a36Sopenharmony_ci		 * entries needs to be big enough to handle the highest
43062306a36Sopenharmony_ci		 * MTU supported.
43162306a36Sopenharmony_ci		 */
43262306a36Sopenharmony_ci		if (rcd->egrbufs.size < hfi1_max_mtu) {
43362306a36Sopenharmony_ci			rcd->egrbufs.size = __roundup_pow_of_two(hfi1_max_mtu);
43462306a36Sopenharmony_ci			hfi1_cdbg(PROC,
43562306a36Sopenharmony_ci				  "ctxt%u: eager bufs size too small. Adjusting to %u",
43662306a36Sopenharmony_ci				    rcd->ctxt, rcd->egrbufs.size);
43762306a36Sopenharmony_ci		}
43862306a36Sopenharmony_ci		rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE;
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci		/* Applicable only for statically created kernel contexts */
44162306a36Sopenharmony_ci		if (ctxt < dd->first_dyn_alloc_ctxt) {
44262306a36Sopenharmony_ci			rcd->opstats = kzalloc_node(sizeof(*rcd->opstats),
44362306a36Sopenharmony_ci						    GFP_KERNEL, numa);
44462306a36Sopenharmony_ci			if (!rcd->opstats)
44562306a36Sopenharmony_ci				goto bail;
44662306a36Sopenharmony_ci
44762306a36Sopenharmony_ci			/* Initialize TID flow generations for the context */
44862306a36Sopenharmony_ci			hfi1_kern_init_ctxt_generations(rcd);
44962306a36Sopenharmony_ci		}
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci		*context = rcd;
45262306a36Sopenharmony_ci		return 0;
45362306a36Sopenharmony_ci	}
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_cibail:
45662306a36Sopenharmony_ci	*context = NULL;
45762306a36Sopenharmony_ci	hfi1_free_ctxt(rcd);
45862306a36Sopenharmony_ci	return -ENOMEM;
45962306a36Sopenharmony_ci}
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_ci/**
46262306a36Sopenharmony_ci * hfi1_free_ctxt - free context
46362306a36Sopenharmony_ci * @rcd: pointer to an initialized rcd data structure
46462306a36Sopenharmony_ci *
46562306a36Sopenharmony_ci * This wrapper is the free function that matches hfi1_create_ctxtdata().
46662306a36Sopenharmony_ci * When a context is done being used (kernel or user), this function is called
46762306a36Sopenharmony_ci * for the "final" put to match the kref init from hfi1_create_ctxtdata().
46862306a36Sopenharmony_ci * Other users of the context do a get/put sequence to make sure that the
46962306a36Sopenharmony_ci * structure isn't removed while in use.
47062306a36Sopenharmony_ci */
47162306a36Sopenharmony_civoid hfi1_free_ctxt(struct hfi1_ctxtdata *rcd)
47262306a36Sopenharmony_ci{
47362306a36Sopenharmony_ci	hfi1_rcd_put(rcd);
47462306a36Sopenharmony_ci}
47562306a36Sopenharmony_ci
47662306a36Sopenharmony_ci/*
47762306a36Sopenharmony_ci * Select the largest ccti value over all SLs to determine the intra-
47862306a36Sopenharmony_ci * packet gap for the link.
47962306a36Sopenharmony_ci *
48062306a36Sopenharmony_ci * called with cca_timer_lock held (to protect access to cca_timer
48162306a36Sopenharmony_ci * array), and rcu_read_lock() (to protect access to cc_state).
48262306a36Sopenharmony_ci */
48362306a36Sopenharmony_civoid set_link_ipg(struct hfi1_pportdata *ppd)
48462306a36Sopenharmony_ci{
48562306a36Sopenharmony_ci	struct hfi1_devdata *dd = ppd->dd;
48662306a36Sopenharmony_ci	struct cc_state *cc_state;
48762306a36Sopenharmony_ci	int i;
48862306a36Sopenharmony_ci	u16 cce, ccti_limit, max_ccti = 0;
48962306a36Sopenharmony_ci	u16 shift, mult;
49062306a36Sopenharmony_ci	u64 src;
49162306a36Sopenharmony_ci	u32 current_egress_rate; /* Mbits /sec */
49262306a36Sopenharmony_ci	u64 max_pkt_time;
49362306a36Sopenharmony_ci	/*
49462306a36Sopenharmony_ci	 * max_pkt_time is the maximum packet egress time in units
49562306a36Sopenharmony_ci	 * of the fabric clock period 1/(805 MHz).
49662306a36Sopenharmony_ci	 */
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ci	cc_state = get_cc_state(ppd);
49962306a36Sopenharmony_ci
50062306a36Sopenharmony_ci	if (!cc_state)
50162306a36Sopenharmony_ci		/*
50262306a36Sopenharmony_ci		 * This should _never_ happen - rcu_read_lock() is held,
50362306a36Sopenharmony_ci		 * and set_link_ipg() should not be called if cc_state
50462306a36Sopenharmony_ci		 * is NULL.
50562306a36Sopenharmony_ci		 */
50662306a36Sopenharmony_ci		return;
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_ci	for (i = 0; i < OPA_MAX_SLS; i++) {
50962306a36Sopenharmony_ci		u16 ccti = ppd->cca_timer[i].ccti;
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ci		if (ccti > max_ccti)
51262306a36Sopenharmony_ci			max_ccti = ccti;
51362306a36Sopenharmony_ci	}
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	ccti_limit = cc_state->cct.ccti_limit;
51662306a36Sopenharmony_ci	if (max_ccti > ccti_limit)
51762306a36Sopenharmony_ci		max_ccti = ccti_limit;
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_ci	cce = cc_state->cct.entries[max_ccti].entry;
52062306a36Sopenharmony_ci	shift = (cce & 0xc000) >> 14;
52162306a36Sopenharmony_ci	mult = (cce & 0x3fff);
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci	current_egress_rate = active_egress_rate(ppd);
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci	max_pkt_time = egress_cycles(ppd->ibmaxlen, current_egress_rate);
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci	src = (max_pkt_time >> shift) * mult;
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_ci	src &= SEND_STATIC_RATE_CONTROL_CSR_SRC_RELOAD_SMASK;
53062306a36Sopenharmony_ci	src <<= SEND_STATIC_RATE_CONTROL_CSR_SRC_RELOAD_SHIFT;
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci	write_csr(dd, SEND_STATIC_RATE_CONTROL, src);
53362306a36Sopenharmony_ci}
53462306a36Sopenharmony_ci
53562306a36Sopenharmony_cistatic enum hrtimer_restart cca_timer_fn(struct hrtimer *t)
53662306a36Sopenharmony_ci{
53762306a36Sopenharmony_ci	struct cca_timer *cca_timer;
53862306a36Sopenharmony_ci	struct hfi1_pportdata *ppd;
53962306a36Sopenharmony_ci	int sl;
54062306a36Sopenharmony_ci	u16 ccti_timer, ccti_min;
54162306a36Sopenharmony_ci	struct cc_state *cc_state;
54262306a36Sopenharmony_ci	unsigned long flags;
54362306a36Sopenharmony_ci	enum hrtimer_restart ret = HRTIMER_NORESTART;
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci	cca_timer = container_of(t, struct cca_timer, hrtimer);
54662306a36Sopenharmony_ci	ppd = cca_timer->ppd;
54762306a36Sopenharmony_ci	sl = cca_timer->sl;
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_ci	rcu_read_lock();
55062306a36Sopenharmony_ci
55162306a36Sopenharmony_ci	cc_state = get_cc_state(ppd);
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_ci	if (!cc_state) {
55462306a36Sopenharmony_ci		rcu_read_unlock();
55562306a36Sopenharmony_ci		return HRTIMER_NORESTART;
55662306a36Sopenharmony_ci	}
55762306a36Sopenharmony_ci
55862306a36Sopenharmony_ci	/*
55962306a36Sopenharmony_ci	 * 1) decrement ccti for SL
56062306a36Sopenharmony_ci	 * 2) calculate IPG for link (set_link_ipg())
56162306a36Sopenharmony_ci	 * 3) restart timer, unless ccti is at min value
56262306a36Sopenharmony_ci	 */
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	ccti_min = cc_state->cong_setting.entries[sl].ccti_min;
56562306a36Sopenharmony_ci	ccti_timer = cc_state->cong_setting.entries[sl].ccti_timer;
56662306a36Sopenharmony_ci
56762306a36Sopenharmony_ci	spin_lock_irqsave(&ppd->cca_timer_lock, flags);
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci	if (cca_timer->ccti > ccti_min) {
57062306a36Sopenharmony_ci		cca_timer->ccti--;
57162306a36Sopenharmony_ci		set_link_ipg(ppd);
57262306a36Sopenharmony_ci	}
57362306a36Sopenharmony_ci
57462306a36Sopenharmony_ci	if (cca_timer->ccti > ccti_min) {
57562306a36Sopenharmony_ci		unsigned long nsec = 1024 * ccti_timer;
57662306a36Sopenharmony_ci		/* ccti_timer is in units of 1.024 usec */
57762306a36Sopenharmony_ci		hrtimer_forward_now(t, ns_to_ktime(nsec));
57862306a36Sopenharmony_ci		ret = HRTIMER_RESTART;
57962306a36Sopenharmony_ci	}
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci	spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
58262306a36Sopenharmony_ci	rcu_read_unlock();
58362306a36Sopenharmony_ci	return ret;
58462306a36Sopenharmony_ci}
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci/*
58762306a36Sopenharmony_ci * Common code for initializing the physical port structure.
58862306a36Sopenharmony_ci */
58962306a36Sopenharmony_civoid hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
59062306a36Sopenharmony_ci			 struct hfi1_devdata *dd, u8 hw_pidx, u32 port)
59162306a36Sopenharmony_ci{
59262306a36Sopenharmony_ci	int i;
59362306a36Sopenharmony_ci	uint default_pkey_idx;
59462306a36Sopenharmony_ci	struct cc_state *cc_state;
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci	ppd->dd = dd;
59762306a36Sopenharmony_ci	ppd->hw_pidx = hw_pidx;
59862306a36Sopenharmony_ci	ppd->port = port; /* IB port number, not index */
59962306a36Sopenharmony_ci	ppd->prev_link_width = LINK_WIDTH_DEFAULT;
60062306a36Sopenharmony_ci	/*
60162306a36Sopenharmony_ci	 * There are C_VL_COUNT number of PortVLXmitWait counters.
60262306a36Sopenharmony_ci	 * Adding 1 to C_VL_COUNT to include the PortXmitWait counter.
60362306a36Sopenharmony_ci	 */
60462306a36Sopenharmony_ci	for (i = 0; i < C_VL_COUNT + 1; i++) {
60562306a36Sopenharmony_ci		ppd->port_vl_xmit_wait_last[i] = 0;
60662306a36Sopenharmony_ci		ppd->vl_xmit_flit_cnt[i] = 0;
60762306a36Sopenharmony_ci	}
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_ci	default_pkey_idx = 1;
61062306a36Sopenharmony_ci
61162306a36Sopenharmony_ci	ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY;
61262306a36Sopenharmony_ci	ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
61362306a36Sopenharmony_ci	ppd->pkeys[0] = 0x8001;
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci	INIT_WORK(&ppd->link_vc_work, handle_verify_cap);
61662306a36Sopenharmony_ci	INIT_WORK(&ppd->link_up_work, handle_link_up);
61762306a36Sopenharmony_ci	INIT_WORK(&ppd->link_down_work, handle_link_down);
61862306a36Sopenharmony_ci	INIT_WORK(&ppd->freeze_work, handle_freeze);
61962306a36Sopenharmony_ci	INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
62062306a36Sopenharmony_ci	INIT_WORK(&ppd->sma_message_work, handle_sma_message);
62162306a36Sopenharmony_ci	INIT_WORK(&ppd->link_bounce_work, handle_link_bounce);
62262306a36Sopenharmony_ci	INIT_DELAYED_WORK(&ppd->start_link_work, handle_start_link);
62362306a36Sopenharmony_ci	INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work);
62462306a36Sopenharmony_ci	INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
62562306a36Sopenharmony_ci
62662306a36Sopenharmony_ci	mutex_init(&ppd->hls_lock);
62762306a36Sopenharmony_ci	spin_lock_init(&ppd->qsfp_info.qsfp_lock);
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci	ppd->qsfp_info.ppd = ppd;
63062306a36Sopenharmony_ci	ppd->sm_trap_qp = 0x0;
63162306a36Sopenharmony_ci	ppd->sa_qp = 0x1;
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_ci	ppd->hfi1_wq = NULL;
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci	spin_lock_init(&ppd->cca_timer_lock);
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ci	for (i = 0; i < OPA_MAX_SLS; i++) {
63862306a36Sopenharmony_ci		hrtimer_init(&ppd->cca_timer[i].hrtimer, CLOCK_MONOTONIC,
63962306a36Sopenharmony_ci			     HRTIMER_MODE_REL);
64062306a36Sopenharmony_ci		ppd->cca_timer[i].ppd = ppd;
64162306a36Sopenharmony_ci		ppd->cca_timer[i].sl = i;
64262306a36Sopenharmony_ci		ppd->cca_timer[i].ccti = 0;
64362306a36Sopenharmony_ci		ppd->cca_timer[i].hrtimer.function = cca_timer_fn;
64462306a36Sopenharmony_ci	}
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_ci	ppd->cc_max_table_entries = IB_CC_TABLE_CAP_DEFAULT;
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci	spin_lock_init(&ppd->cc_state_lock);
64962306a36Sopenharmony_ci	spin_lock_init(&ppd->cc_log_lock);
65062306a36Sopenharmony_ci	cc_state = kzalloc(sizeof(*cc_state), GFP_KERNEL);
65162306a36Sopenharmony_ci	RCU_INIT_POINTER(ppd->cc_state, cc_state);
65262306a36Sopenharmony_ci	if (!cc_state)
65362306a36Sopenharmony_ci		goto bail;
65462306a36Sopenharmony_ci	return;
65562306a36Sopenharmony_ci
65662306a36Sopenharmony_cibail:
65762306a36Sopenharmony_ci	dd_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port);
65862306a36Sopenharmony_ci}
65962306a36Sopenharmony_ci
66062306a36Sopenharmony_ci/*
66162306a36Sopenharmony_ci * Do initialization for device that is only needed on
66262306a36Sopenharmony_ci * first detect, not on resets.
66362306a36Sopenharmony_ci */
66462306a36Sopenharmony_cistatic int loadtime_init(struct hfi1_devdata *dd)
66562306a36Sopenharmony_ci{
66662306a36Sopenharmony_ci	return 0;
66762306a36Sopenharmony_ci}
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ci/**
67062306a36Sopenharmony_ci * init_after_reset - re-initialize after a reset
67162306a36Sopenharmony_ci * @dd: the hfi1_ib device
67262306a36Sopenharmony_ci *
67362306a36Sopenharmony_ci * sanity check at least some of the values after reset, and
67462306a36Sopenharmony_ci * ensure no receive or transmit (explicitly, in case reset
67562306a36Sopenharmony_ci * failed
67662306a36Sopenharmony_ci */
67762306a36Sopenharmony_cistatic int init_after_reset(struct hfi1_devdata *dd)
67862306a36Sopenharmony_ci{
67962306a36Sopenharmony_ci	int i;
68062306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd;
68162306a36Sopenharmony_ci	/*
68262306a36Sopenharmony_ci	 * Ensure chip does no sends or receives, tail updates, or
68362306a36Sopenharmony_ci	 * pioavail updates while we re-initialize.  This is mostly
68462306a36Sopenharmony_ci	 * for the driver data structures, not chip registers.
68562306a36Sopenharmony_ci	 */
68662306a36Sopenharmony_ci	for (i = 0; i < dd->num_rcv_contexts; i++) {
68762306a36Sopenharmony_ci		rcd = hfi1_rcd_get_by_index(dd, i);
68862306a36Sopenharmony_ci		hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
68962306a36Sopenharmony_ci			     HFI1_RCVCTRL_INTRAVAIL_DIS |
69062306a36Sopenharmony_ci			     HFI1_RCVCTRL_TAILUPD_DIS, rcd);
69162306a36Sopenharmony_ci		hfi1_rcd_put(rcd);
69262306a36Sopenharmony_ci	}
69362306a36Sopenharmony_ci	pio_send_control(dd, PSC_GLOBAL_DISABLE);
69462306a36Sopenharmony_ci	for (i = 0; i < dd->num_send_contexts; i++)
69562306a36Sopenharmony_ci		sc_disable(dd->send_contexts[i].sc);
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci	return 0;
69862306a36Sopenharmony_ci}
69962306a36Sopenharmony_ci
70062306a36Sopenharmony_cistatic void enable_chip(struct hfi1_devdata *dd)
70162306a36Sopenharmony_ci{
70262306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd;
70362306a36Sopenharmony_ci	u32 rcvmask;
70462306a36Sopenharmony_ci	u16 i;
70562306a36Sopenharmony_ci
70662306a36Sopenharmony_ci	/* enable PIO send */
70762306a36Sopenharmony_ci	pio_send_control(dd, PSC_GLOBAL_ENABLE);
70862306a36Sopenharmony_ci
70962306a36Sopenharmony_ci	/*
71062306a36Sopenharmony_ci	 * Enable kernel ctxts' receive and receive interrupt.
71162306a36Sopenharmony_ci	 * Other ctxts done as user opens and initializes them.
71262306a36Sopenharmony_ci	 */
71362306a36Sopenharmony_ci	for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) {
71462306a36Sopenharmony_ci		rcd = hfi1_rcd_get_by_index(dd, i);
71562306a36Sopenharmony_ci		if (!rcd)
71662306a36Sopenharmony_ci			continue;
71762306a36Sopenharmony_ci		rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
71862306a36Sopenharmony_ci		rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ?
71962306a36Sopenharmony_ci			HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
72062306a36Sopenharmony_ci		if (!HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR))
72162306a36Sopenharmony_ci			rcvmask |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
72262306a36Sopenharmony_ci		if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_RHQ_FULL))
72362306a36Sopenharmony_ci			rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
72462306a36Sopenharmony_ci		if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL))
72562306a36Sopenharmony_ci			rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
72662306a36Sopenharmony_ci		if (HFI1_CAP_IS_KSET(TID_RDMA))
72762306a36Sopenharmony_ci			rcvmask |= HFI1_RCVCTRL_TIDFLOW_ENB;
72862306a36Sopenharmony_ci		hfi1_rcvctrl(dd, rcvmask, rcd);
72962306a36Sopenharmony_ci		sc_enable(rcd->sc);
73062306a36Sopenharmony_ci		hfi1_rcd_put(rcd);
73162306a36Sopenharmony_ci	}
73262306a36Sopenharmony_ci}
73362306a36Sopenharmony_ci
73462306a36Sopenharmony_ci/**
73562306a36Sopenharmony_ci * create_workqueues - create per port workqueues
73662306a36Sopenharmony_ci * @dd: the hfi1_ib device
73762306a36Sopenharmony_ci */
73862306a36Sopenharmony_cistatic int create_workqueues(struct hfi1_devdata *dd)
73962306a36Sopenharmony_ci{
74062306a36Sopenharmony_ci	int pidx;
74162306a36Sopenharmony_ci	struct hfi1_pportdata *ppd;
74262306a36Sopenharmony_ci
74362306a36Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
74462306a36Sopenharmony_ci		ppd = dd->pport + pidx;
74562306a36Sopenharmony_ci		if (!ppd->hfi1_wq) {
74662306a36Sopenharmony_ci			ppd->hfi1_wq =
74762306a36Sopenharmony_ci				alloc_workqueue(
74862306a36Sopenharmony_ci				    "hfi%d_%d",
74962306a36Sopenharmony_ci				    WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
75062306a36Sopenharmony_ci				    WQ_MEM_RECLAIM,
75162306a36Sopenharmony_ci				    HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES,
75262306a36Sopenharmony_ci				    dd->unit, pidx);
75362306a36Sopenharmony_ci			if (!ppd->hfi1_wq)
75462306a36Sopenharmony_ci				goto wq_error;
75562306a36Sopenharmony_ci		}
75662306a36Sopenharmony_ci		if (!ppd->link_wq) {
75762306a36Sopenharmony_ci			/*
75862306a36Sopenharmony_ci			 * Make the link workqueue single-threaded to enforce
75962306a36Sopenharmony_ci			 * serialization.
76062306a36Sopenharmony_ci			 */
76162306a36Sopenharmony_ci			ppd->link_wq =
76262306a36Sopenharmony_ci				alloc_workqueue(
76362306a36Sopenharmony_ci				    "hfi_link_%d_%d",
76462306a36Sopenharmony_ci				    WQ_SYSFS | WQ_MEM_RECLAIM | WQ_UNBOUND,
76562306a36Sopenharmony_ci				    1, /* max_active */
76662306a36Sopenharmony_ci				    dd->unit, pidx);
76762306a36Sopenharmony_ci			if (!ppd->link_wq)
76862306a36Sopenharmony_ci				goto wq_error;
76962306a36Sopenharmony_ci		}
77062306a36Sopenharmony_ci	}
77162306a36Sopenharmony_ci	return 0;
77262306a36Sopenharmony_ciwq_error:
77362306a36Sopenharmony_ci	pr_err("alloc_workqueue failed for port %d\n", pidx + 1);
77462306a36Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
77562306a36Sopenharmony_ci		ppd = dd->pport + pidx;
77662306a36Sopenharmony_ci		if (ppd->hfi1_wq) {
77762306a36Sopenharmony_ci			destroy_workqueue(ppd->hfi1_wq);
77862306a36Sopenharmony_ci			ppd->hfi1_wq = NULL;
77962306a36Sopenharmony_ci		}
78062306a36Sopenharmony_ci		if (ppd->link_wq) {
78162306a36Sopenharmony_ci			destroy_workqueue(ppd->link_wq);
78262306a36Sopenharmony_ci			ppd->link_wq = NULL;
78362306a36Sopenharmony_ci		}
78462306a36Sopenharmony_ci	}
78562306a36Sopenharmony_ci	return -ENOMEM;
78662306a36Sopenharmony_ci}
78762306a36Sopenharmony_ci
78862306a36Sopenharmony_ci/**
78962306a36Sopenharmony_ci * destroy_workqueues - destroy per port workqueues
79062306a36Sopenharmony_ci * @dd: the hfi1_ib device
79162306a36Sopenharmony_ci */
79262306a36Sopenharmony_cistatic void destroy_workqueues(struct hfi1_devdata *dd)
79362306a36Sopenharmony_ci{
79462306a36Sopenharmony_ci	int pidx;
79562306a36Sopenharmony_ci	struct hfi1_pportdata *ppd;
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
79862306a36Sopenharmony_ci		ppd = dd->pport + pidx;
79962306a36Sopenharmony_ci
80062306a36Sopenharmony_ci		if (ppd->hfi1_wq) {
80162306a36Sopenharmony_ci			destroy_workqueue(ppd->hfi1_wq);
80262306a36Sopenharmony_ci			ppd->hfi1_wq = NULL;
80362306a36Sopenharmony_ci		}
80462306a36Sopenharmony_ci		if (ppd->link_wq) {
80562306a36Sopenharmony_ci			destroy_workqueue(ppd->link_wq);
80662306a36Sopenharmony_ci			ppd->link_wq = NULL;
80762306a36Sopenharmony_ci		}
80862306a36Sopenharmony_ci	}
80962306a36Sopenharmony_ci}
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_ci/**
81262306a36Sopenharmony_ci * enable_general_intr() - Enable the IRQs that will be handled by the
81362306a36Sopenharmony_ci * general interrupt handler.
81462306a36Sopenharmony_ci * @dd: valid devdata
81562306a36Sopenharmony_ci *
81662306a36Sopenharmony_ci */
81762306a36Sopenharmony_cistatic void enable_general_intr(struct hfi1_devdata *dd)
81862306a36Sopenharmony_ci{
81962306a36Sopenharmony_ci	set_intr_bits(dd, CCE_ERR_INT, MISC_ERR_INT, true);
82062306a36Sopenharmony_ci	set_intr_bits(dd, PIO_ERR_INT, TXE_ERR_INT, true);
82162306a36Sopenharmony_ci	set_intr_bits(dd, IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, true);
82262306a36Sopenharmony_ci	set_intr_bits(dd, PBC_INT, GPIO_ASSERT_INT, true);
82362306a36Sopenharmony_ci	set_intr_bits(dd, TCRIT_INT, TCRIT_INT, true);
82462306a36Sopenharmony_ci	set_intr_bits(dd, IS_DC_START, IS_DC_END, true);
82562306a36Sopenharmony_ci	set_intr_bits(dd, IS_SENDCREDIT_START, IS_SENDCREDIT_END, true);
82662306a36Sopenharmony_ci}
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci/**
82962306a36Sopenharmony_ci * hfi1_init - do the actual initialization sequence on the chip
83062306a36Sopenharmony_ci * @dd: the hfi1_ib device
83162306a36Sopenharmony_ci * @reinit: re-initializing, so don't allocate new memory
83262306a36Sopenharmony_ci *
83362306a36Sopenharmony_ci * Do the actual initialization sequence on the chip.  This is done
83462306a36Sopenharmony_ci * both from the init routine called from the PCI infrastructure, and
83562306a36Sopenharmony_ci * when we reset the chip, or detect that it was reset internally,
83662306a36Sopenharmony_ci * or it's administratively re-enabled.
83762306a36Sopenharmony_ci *
83862306a36Sopenharmony_ci * Memory allocation here and in called routines is only done in
83962306a36Sopenharmony_ci * the first case (reinit == 0).  We have to be careful, because even
84062306a36Sopenharmony_ci * without memory allocation, we need to re-write all the chip registers
84162306a36Sopenharmony_ci * TIDs, etc. after the reset or enable has completed.
84262306a36Sopenharmony_ci */
84362306a36Sopenharmony_ciint hfi1_init(struct hfi1_devdata *dd, int reinit)
84462306a36Sopenharmony_ci{
84562306a36Sopenharmony_ci	int ret = 0, pidx, lastfail = 0;
84662306a36Sopenharmony_ci	unsigned long len;
84762306a36Sopenharmony_ci	u16 i;
84862306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd;
84962306a36Sopenharmony_ci	struct hfi1_pportdata *ppd;
85062306a36Sopenharmony_ci
85162306a36Sopenharmony_ci	/* Set up send low level handlers */
85262306a36Sopenharmony_ci	dd->process_pio_send = hfi1_verbs_send_pio;
85362306a36Sopenharmony_ci	dd->process_dma_send = hfi1_verbs_send_dma;
85462306a36Sopenharmony_ci	dd->pio_inline_send = pio_copy;
85562306a36Sopenharmony_ci	dd->process_vnic_dma_send = hfi1_vnic_send_dma;
85662306a36Sopenharmony_ci
85762306a36Sopenharmony_ci	if (is_ax(dd)) {
85862306a36Sopenharmony_ci		atomic_set(&dd->drop_packet, DROP_PACKET_ON);
85962306a36Sopenharmony_ci		dd->do_drop = true;
86062306a36Sopenharmony_ci	} else {
86162306a36Sopenharmony_ci		atomic_set(&dd->drop_packet, DROP_PACKET_OFF);
86262306a36Sopenharmony_ci		dd->do_drop = false;
86362306a36Sopenharmony_ci	}
86462306a36Sopenharmony_ci
86562306a36Sopenharmony_ci	/* make sure the link is not "up" */
86662306a36Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
86762306a36Sopenharmony_ci		ppd = dd->pport + pidx;
86862306a36Sopenharmony_ci		ppd->linkup = 0;
86962306a36Sopenharmony_ci	}
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci	if (reinit)
87262306a36Sopenharmony_ci		ret = init_after_reset(dd);
87362306a36Sopenharmony_ci	else
87462306a36Sopenharmony_ci		ret = loadtime_init(dd);
87562306a36Sopenharmony_ci	if (ret)
87662306a36Sopenharmony_ci		goto done;
87762306a36Sopenharmony_ci
87862306a36Sopenharmony_ci	/* dd->rcd can be NULL if early initialization failed */
87962306a36Sopenharmony_ci	for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) {
88062306a36Sopenharmony_ci		/*
88162306a36Sopenharmony_ci		 * Set up the (kernel) rcvhdr queue and egr TIDs.  If doing
88262306a36Sopenharmony_ci		 * re-init, the simplest way to handle this is to free
88362306a36Sopenharmony_ci		 * existing, and re-allocate.
88462306a36Sopenharmony_ci		 * Need to re-create rest of ctxt 0 ctxtdata as well.
88562306a36Sopenharmony_ci		 */
88662306a36Sopenharmony_ci		rcd = hfi1_rcd_get_by_index(dd, i);
88762306a36Sopenharmony_ci		if (!rcd)
88862306a36Sopenharmony_ci			continue;
88962306a36Sopenharmony_ci
89062306a36Sopenharmony_ci		lastfail = hfi1_create_rcvhdrq(dd, rcd);
89162306a36Sopenharmony_ci		if (!lastfail)
89262306a36Sopenharmony_ci			lastfail = hfi1_setup_eagerbufs(rcd);
89362306a36Sopenharmony_ci		if (!lastfail)
89462306a36Sopenharmony_ci			lastfail = hfi1_kern_exp_rcv_init(rcd, reinit);
89562306a36Sopenharmony_ci		if (lastfail) {
89662306a36Sopenharmony_ci			dd_dev_err(dd,
89762306a36Sopenharmony_ci				   "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
89862306a36Sopenharmony_ci			ret = lastfail;
89962306a36Sopenharmony_ci		}
90062306a36Sopenharmony_ci		/* enable IRQ */
90162306a36Sopenharmony_ci		hfi1_rcd_put(rcd);
90262306a36Sopenharmony_ci	}
90362306a36Sopenharmony_ci
90462306a36Sopenharmony_ci	/* Allocate enough memory for user event notification. */
90562306a36Sopenharmony_ci	len = PAGE_ALIGN(chip_rcv_contexts(dd) * HFI1_MAX_SHARED_CTXTS *
90662306a36Sopenharmony_ci			 sizeof(*dd->events));
90762306a36Sopenharmony_ci	dd->events = vmalloc_user(len);
90862306a36Sopenharmony_ci	if (!dd->events)
90962306a36Sopenharmony_ci		dd_dev_err(dd, "Failed to allocate user events page\n");
91062306a36Sopenharmony_ci	/*
91162306a36Sopenharmony_ci	 * Allocate a page for device and port status.
91262306a36Sopenharmony_ci	 * Page will be shared amongst all user processes.
91362306a36Sopenharmony_ci	 */
91462306a36Sopenharmony_ci	dd->status = vmalloc_user(PAGE_SIZE);
91562306a36Sopenharmony_ci	if (!dd->status)
91662306a36Sopenharmony_ci		dd_dev_err(dd, "Failed to allocate dev status page\n");
91762306a36Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
91862306a36Sopenharmony_ci		ppd = dd->pport + pidx;
91962306a36Sopenharmony_ci		if (dd->status)
92062306a36Sopenharmony_ci			/* Currently, we only have one port */
92162306a36Sopenharmony_ci			ppd->statusp = &dd->status->port;
92262306a36Sopenharmony_ci
92362306a36Sopenharmony_ci		set_mtu(ppd);
92462306a36Sopenharmony_ci	}
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci	/* enable chip even if we have an error, so we can debug cause */
92762306a36Sopenharmony_ci	enable_chip(dd);
92862306a36Sopenharmony_ci
92962306a36Sopenharmony_cidone:
93062306a36Sopenharmony_ci	/*
93162306a36Sopenharmony_ci	 * Set status even if port serdes is not initialized
93262306a36Sopenharmony_ci	 * so that diags will work.
93362306a36Sopenharmony_ci	 */
93462306a36Sopenharmony_ci	if (dd->status)
93562306a36Sopenharmony_ci		dd->status->dev |= HFI1_STATUS_CHIP_PRESENT |
93662306a36Sopenharmony_ci			HFI1_STATUS_INITTED;
93762306a36Sopenharmony_ci	if (!ret) {
93862306a36Sopenharmony_ci		/* enable all interrupts from the chip */
93962306a36Sopenharmony_ci		enable_general_intr(dd);
94062306a36Sopenharmony_ci		init_qsfp_int(dd);
94162306a36Sopenharmony_ci
94262306a36Sopenharmony_ci		/* chip is OK for user apps; mark it as initialized */
94362306a36Sopenharmony_ci		for (pidx = 0; pidx < dd->num_pports; ++pidx) {
94462306a36Sopenharmony_ci			ppd = dd->pport + pidx;
94562306a36Sopenharmony_ci
94662306a36Sopenharmony_ci			/*
94762306a36Sopenharmony_ci			 * start the serdes - must be after interrupts are
94862306a36Sopenharmony_ci			 * enabled so we are notified when the link goes up
94962306a36Sopenharmony_ci			 */
95062306a36Sopenharmony_ci			lastfail = bringup_serdes(ppd);
95162306a36Sopenharmony_ci			if (lastfail)
95262306a36Sopenharmony_ci				dd_dev_info(dd,
95362306a36Sopenharmony_ci					    "Failed to bring up port %u\n",
95462306a36Sopenharmony_ci					    ppd->port);
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_ci			/*
95762306a36Sopenharmony_ci			 * Set status even if port serdes is not initialized
95862306a36Sopenharmony_ci			 * so that diags will work.
95962306a36Sopenharmony_ci			 */
96062306a36Sopenharmony_ci			if (ppd->statusp)
96162306a36Sopenharmony_ci				*ppd->statusp |= HFI1_STATUS_CHIP_PRESENT |
96262306a36Sopenharmony_ci							HFI1_STATUS_INITTED;
96362306a36Sopenharmony_ci			if (!ppd->link_speed_enabled)
96462306a36Sopenharmony_ci				continue;
96562306a36Sopenharmony_ci		}
96662306a36Sopenharmony_ci	}
96762306a36Sopenharmony_ci
96862306a36Sopenharmony_ci	/* if ret is non-zero, we probably should do some cleanup here... */
96962306a36Sopenharmony_ci	return ret;
97062306a36Sopenharmony_ci}
97162306a36Sopenharmony_ci
97262306a36Sopenharmony_cistruct hfi1_devdata *hfi1_lookup(int unit)
97362306a36Sopenharmony_ci{
97462306a36Sopenharmony_ci	return xa_load(&hfi1_dev_table, unit);
97562306a36Sopenharmony_ci}
97662306a36Sopenharmony_ci
97762306a36Sopenharmony_ci/*
97862306a36Sopenharmony_ci * Stop the timers during unit shutdown, or after an error late
97962306a36Sopenharmony_ci * in initialization.
98062306a36Sopenharmony_ci */
98162306a36Sopenharmony_cistatic void stop_timers(struct hfi1_devdata *dd)
98262306a36Sopenharmony_ci{
98362306a36Sopenharmony_ci	struct hfi1_pportdata *ppd;
98462306a36Sopenharmony_ci	int pidx;
98562306a36Sopenharmony_ci
98662306a36Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
98762306a36Sopenharmony_ci		ppd = dd->pport + pidx;
98862306a36Sopenharmony_ci		if (ppd->led_override_timer.function) {
98962306a36Sopenharmony_ci			del_timer_sync(&ppd->led_override_timer);
99062306a36Sopenharmony_ci			atomic_set(&ppd->led_override_timer_active, 0);
99162306a36Sopenharmony_ci		}
99262306a36Sopenharmony_ci	}
99362306a36Sopenharmony_ci}
99462306a36Sopenharmony_ci
99562306a36Sopenharmony_ci/**
99662306a36Sopenharmony_ci * shutdown_device - shut down a device
99762306a36Sopenharmony_ci * @dd: the hfi1_ib device
99862306a36Sopenharmony_ci *
99962306a36Sopenharmony_ci * This is called to make the device quiet when we are about to
100062306a36Sopenharmony_ci * unload the driver, and also when the device is administratively
100162306a36Sopenharmony_ci * disabled.   It does not free any data structures.
100262306a36Sopenharmony_ci * Everything it does has to be setup again by hfi1_init(dd, 1)
100362306a36Sopenharmony_ci */
100462306a36Sopenharmony_cistatic void shutdown_device(struct hfi1_devdata *dd)
100562306a36Sopenharmony_ci{
100662306a36Sopenharmony_ci	struct hfi1_pportdata *ppd;
100762306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd;
100862306a36Sopenharmony_ci	unsigned pidx;
100962306a36Sopenharmony_ci	int i;
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_ci	if (dd->flags & HFI1_SHUTDOWN)
101262306a36Sopenharmony_ci		return;
101362306a36Sopenharmony_ci	dd->flags |= HFI1_SHUTDOWN;
101462306a36Sopenharmony_ci
101562306a36Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
101662306a36Sopenharmony_ci		ppd = dd->pport + pidx;
101762306a36Sopenharmony_ci
101862306a36Sopenharmony_ci		ppd->linkup = 0;
101962306a36Sopenharmony_ci		if (ppd->statusp)
102062306a36Sopenharmony_ci			*ppd->statusp &= ~(HFI1_STATUS_IB_CONF |
102162306a36Sopenharmony_ci					   HFI1_STATUS_IB_READY);
102262306a36Sopenharmony_ci	}
102362306a36Sopenharmony_ci	dd->flags &= ~HFI1_INITTED;
102462306a36Sopenharmony_ci
102562306a36Sopenharmony_ci	/* mask and clean up interrupts */
102662306a36Sopenharmony_ci	set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
102762306a36Sopenharmony_ci	msix_clean_up_interrupts(dd);
102862306a36Sopenharmony_ci
102962306a36Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
103062306a36Sopenharmony_ci		ppd = dd->pport + pidx;
103162306a36Sopenharmony_ci		for (i = 0; i < dd->num_rcv_contexts; i++) {
103262306a36Sopenharmony_ci			rcd = hfi1_rcd_get_by_index(dd, i);
103362306a36Sopenharmony_ci			hfi1_rcvctrl(dd, HFI1_RCVCTRL_TAILUPD_DIS |
103462306a36Sopenharmony_ci				     HFI1_RCVCTRL_CTXT_DIS |
103562306a36Sopenharmony_ci				     HFI1_RCVCTRL_INTRAVAIL_DIS |
103662306a36Sopenharmony_ci				     HFI1_RCVCTRL_PKEY_DIS |
103762306a36Sopenharmony_ci				     HFI1_RCVCTRL_ONE_PKT_EGR_DIS, rcd);
103862306a36Sopenharmony_ci			hfi1_rcd_put(rcd);
103962306a36Sopenharmony_ci		}
104062306a36Sopenharmony_ci		/*
104162306a36Sopenharmony_ci		 * Gracefully stop all sends allowing any in progress to
104262306a36Sopenharmony_ci		 * trickle out first.
104362306a36Sopenharmony_ci		 */
104462306a36Sopenharmony_ci		for (i = 0; i < dd->num_send_contexts; i++)
104562306a36Sopenharmony_ci			sc_flush(dd->send_contexts[i].sc);
104662306a36Sopenharmony_ci	}
104762306a36Sopenharmony_ci
104862306a36Sopenharmony_ci	/*
104962306a36Sopenharmony_ci	 * Enough for anything that's going to trickle out to have actually
105062306a36Sopenharmony_ci	 * done so.
105162306a36Sopenharmony_ci	 */
105262306a36Sopenharmony_ci	udelay(20);
105362306a36Sopenharmony_ci
105462306a36Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
105562306a36Sopenharmony_ci		ppd = dd->pport + pidx;
105662306a36Sopenharmony_ci
105762306a36Sopenharmony_ci		/* disable all contexts */
105862306a36Sopenharmony_ci		for (i = 0; i < dd->num_send_contexts; i++)
105962306a36Sopenharmony_ci			sc_disable(dd->send_contexts[i].sc);
106062306a36Sopenharmony_ci		/* disable the send device */
106162306a36Sopenharmony_ci		pio_send_control(dd, PSC_GLOBAL_DISABLE);
106262306a36Sopenharmony_ci
106362306a36Sopenharmony_ci		shutdown_led_override(ppd);
106462306a36Sopenharmony_ci
106562306a36Sopenharmony_ci		/*
106662306a36Sopenharmony_ci		 * Clear SerdesEnable.
106762306a36Sopenharmony_ci		 * We can't count on interrupts since we are stopping.
106862306a36Sopenharmony_ci		 */
106962306a36Sopenharmony_ci		hfi1_quiet_serdes(ppd);
107062306a36Sopenharmony_ci		if (ppd->hfi1_wq)
107162306a36Sopenharmony_ci			flush_workqueue(ppd->hfi1_wq);
107262306a36Sopenharmony_ci		if (ppd->link_wq)
107362306a36Sopenharmony_ci			flush_workqueue(ppd->link_wq);
107462306a36Sopenharmony_ci	}
107562306a36Sopenharmony_ci	sdma_exit(dd);
107662306a36Sopenharmony_ci}
107762306a36Sopenharmony_ci
107862306a36Sopenharmony_ci/**
107962306a36Sopenharmony_ci * hfi1_free_ctxtdata - free a context's allocated data
108062306a36Sopenharmony_ci * @dd: the hfi1_ib device
108162306a36Sopenharmony_ci * @rcd: the ctxtdata structure
108262306a36Sopenharmony_ci *
108362306a36Sopenharmony_ci * free up any allocated data for a context
108462306a36Sopenharmony_ci * It should never change any chip state, or global driver state.
108562306a36Sopenharmony_ci */
108662306a36Sopenharmony_civoid hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
108762306a36Sopenharmony_ci{
108862306a36Sopenharmony_ci	u32 e;
108962306a36Sopenharmony_ci
109062306a36Sopenharmony_ci	if (!rcd)
109162306a36Sopenharmony_ci		return;
109262306a36Sopenharmony_ci
109362306a36Sopenharmony_ci	if (rcd->rcvhdrq) {
109462306a36Sopenharmony_ci		dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd),
109562306a36Sopenharmony_ci				  rcd->rcvhdrq, rcd->rcvhdrq_dma);
109662306a36Sopenharmony_ci		rcd->rcvhdrq = NULL;
109762306a36Sopenharmony_ci		if (hfi1_rcvhdrtail_kvaddr(rcd)) {
109862306a36Sopenharmony_ci			dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
109962306a36Sopenharmony_ci					  (void *)hfi1_rcvhdrtail_kvaddr(rcd),
110062306a36Sopenharmony_ci					  rcd->rcvhdrqtailaddr_dma);
110162306a36Sopenharmony_ci			rcd->rcvhdrtail_kvaddr = NULL;
110262306a36Sopenharmony_ci		}
110362306a36Sopenharmony_ci	}
110462306a36Sopenharmony_ci
110562306a36Sopenharmony_ci	/* all the RcvArray entries should have been cleared by now */
110662306a36Sopenharmony_ci	kfree(rcd->egrbufs.rcvtids);
110762306a36Sopenharmony_ci	rcd->egrbufs.rcvtids = NULL;
110862306a36Sopenharmony_ci
110962306a36Sopenharmony_ci	for (e = 0; e < rcd->egrbufs.alloced; e++) {
111062306a36Sopenharmony_ci		if (rcd->egrbufs.buffers[e].addr)
111162306a36Sopenharmony_ci			dma_free_coherent(&dd->pcidev->dev,
111262306a36Sopenharmony_ci					  rcd->egrbufs.buffers[e].len,
111362306a36Sopenharmony_ci					  rcd->egrbufs.buffers[e].addr,
111462306a36Sopenharmony_ci					  rcd->egrbufs.buffers[e].dma);
111562306a36Sopenharmony_ci	}
111662306a36Sopenharmony_ci	kfree(rcd->egrbufs.buffers);
111762306a36Sopenharmony_ci	rcd->egrbufs.alloced = 0;
111862306a36Sopenharmony_ci	rcd->egrbufs.buffers = NULL;
111962306a36Sopenharmony_ci
112062306a36Sopenharmony_ci	sc_free(rcd->sc);
112162306a36Sopenharmony_ci	rcd->sc = NULL;
112262306a36Sopenharmony_ci
112362306a36Sopenharmony_ci	vfree(rcd->subctxt_uregbase);
112462306a36Sopenharmony_ci	vfree(rcd->subctxt_rcvegrbuf);
112562306a36Sopenharmony_ci	vfree(rcd->subctxt_rcvhdr_base);
112662306a36Sopenharmony_ci	kfree(rcd->opstats);
112762306a36Sopenharmony_ci
112862306a36Sopenharmony_ci	rcd->subctxt_uregbase = NULL;
112962306a36Sopenharmony_ci	rcd->subctxt_rcvegrbuf = NULL;
113062306a36Sopenharmony_ci	rcd->subctxt_rcvhdr_base = NULL;
113162306a36Sopenharmony_ci	rcd->opstats = NULL;
113262306a36Sopenharmony_ci}
113362306a36Sopenharmony_ci
113462306a36Sopenharmony_ci/*
113562306a36Sopenharmony_ci * Release our hold on the shared asic data.  If we are the last one,
113662306a36Sopenharmony_ci * return the structure to be finalized outside the lock.  Must be
113762306a36Sopenharmony_ci * holding hfi1_dev_table lock.
113862306a36Sopenharmony_ci */
113962306a36Sopenharmony_cistatic struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd)
114062306a36Sopenharmony_ci{
114162306a36Sopenharmony_ci	struct hfi1_asic_data *ad;
114262306a36Sopenharmony_ci	int other;
114362306a36Sopenharmony_ci
114462306a36Sopenharmony_ci	if (!dd->asic_data)
114562306a36Sopenharmony_ci		return NULL;
114662306a36Sopenharmony_ci	dd->asic_data->dds[dd->hfi1_id] = NULL;
114762306a36Sopenharmony_ci	other = dd->hfi1_id ? 0 : 1;
114862306a36Sopenharmony_ci	ad = dd->asic_data;
114962306a36Sopenharmony_ci	dd->asic_data = NULL;
115062306a36Sopenharmony_ci	/* return NULL if the other dd still has a link */
115162306a36Sopenharmony_ci	return ad->dds[other] ? NULL : ad;
115262306a36Sopenharmony_ci}
115362306a36Sopenharmony_ci
115462306a36Sopenharmony_cistatic void finalize_asic_data(struct hfi1_devdata *dd,
115562306a36Sopenharmony_ci			       struct hfi1_asic_data *ad)
115662306a36Sopenharmony_ci{
115762306a36Sopenharmony_ci	clean_up_i2c(dd, ad);
115862306a36Sopenharmony_ci	kfree(ad);
115962306a36Sopenharmony_ci}
116062306a36Sopenharmony_ci
116162306a36Sopenharmony_ci/**
116262306a36Sopenharmony_ci * hfi1_free_devdata - cleans up and frees per-unit data structure
116362306a36Sopenharmony_ci * @dd: pointer to a valid devdata structure
116462306a36Sopenharmony_ci *
116562306a36Sopenharmony_ci * It cleans up and frees all data structures set up by
116662306a36Sopenharmony_ci * by hfi1_alloc_devdata().
116762306a36Sopenharmony_ci */
116862306a36Sopenharmony_civoid hfi1_free_devdata(struct hfi1_devdata *dd)
116962306a36Sopenharmony_ci{
117062306a36Sopenharmony_ci	struct hfi1_asic_data *ad;
117162306a36Sopenharmony_ci	unsigned long flags;
117262306a36Sopenharmony_ci
117362306a36Sopenharmony_ci	xa_lock_irqsave(&hfi1_dev_table, flags);
117462306a36Sopenharmony_ci	__xa_erase(&hfi1_dev_table, dd->unit);
117562306a36Sopenharmony_ci	ad = release_asic_data(dd);
117662306a36Sopenharmony_ci	xa_unlock_irqrestore(&hfi1_dev_table, flags);
117762306a36Sopenharmony_ci
117862306a36Sopenharmony_ci	finalize_asic_data(dd, ad);
117962306a36Sopenharmony_ci	free_platform_config(dd);
118062306a36Sopenharmony_ci	rcu_barrier(); /* wait for rcu callbacks to complete */
118162306a36Sopenharmony_ci	free_percpu(dd->int_counter);
118262306a36Sopenharmony_ci	free_percpu(dd->rcv_limit);
118362306a36Sopenharmony_ci	free_percpu(dd->send_schedule);
118462306a36Sopenharmony_ci	free_percpu(dd->tx_opstats);
118562306a36Sopenharmony_ci	dd->int_counter   = NULL;
118662306a36Sopenharmony_ci	dd->rcv_limit     = NULL;
118762306a36Sopenharmony_ci	dd->send_schedule = NULL;
118862306a36Sopenharmony_ci	dd->tx_opstats    = NULL;
118962306a36Sopenharmony_ci	kfree(dd->comp_vect);
119062306a36Sopenharmony_ci	dd->comp_vect = NULL;
119162306a36Sopenharmony_ci	if (dd->rcvhdrtail_dummy_kvaddr)
119262306a36Sopenharmony_ci		dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
119362306a36Sopenharmony_ci				  (void *)dd->rcvhdrtail_dummy_kvaddr,
119462306a36Sopenharmony_ci				  dd->rcvhdrtail_dummy_dma);
119562306a36Sopenharmony_ci	dd->rcvhdrtail_dummy_kvaddr = NULL;
119662306a36Sopenharmony_ci	sdma_clean(dd, dd->num_sdma);
119762306a36Sopenharmony_ci	rvt_dealloc_device(&dd->verbs_dev.rdi);
119862306a36Sopenharmony_ci}
119962306a36Sopenharmony_ci
120062306a36Sopenharmony_ci/**
120162306a36Sopenharmony_ci * hfi1_alloc_devdata - Allocate our primary per-unit data structure.
120262306a36Sopenharmony_ci * @pdev: Valid PCI device
120362306a36Sopenharmony_ci * @extra: How many bytes to alloc past the default
120462306a36Sopenharmony_ci *
120562306a36Sopenharmony_ci * Must be done via verbs allocator, because the verbs cleanup process
120662306a36Sopenharmony_ci * both does cleanup and free of the data structure.
120762306a36Sopenharmony_ci * "extra" is for chip-specific data.
120862306a36Sopenharmony_ci */
120962306a36Sopenharmony_cistatic struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
121062306a36Sopenharmony_ci					       size_t extra)
121162306a36Sopenharmony_ci{
121262306a36Sopenharmony_ci	struct hfi1_devdata *dd;
121362306a36Sopenharmony_ci	int ret, nports;
121462306a36Sopenharmony_ci
121562306a36Sopenharmony_ci	/* extra is * number of ports */
121662306a36Sopenharmony_ci	nports = extra / sizeof(struct hfi1_pportdata);
121762306a36Sopenharmony_ci
121862306a36Sopenharmony_ci	dd = (struct hfi1_devdata *)rvt_alloc_device(sizeof(*dd) + extra,
121962306a36Sopenharmony_ci						     nports);
122062306a36Sopenharmony_ci	if (!dd)
122162306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
122262306a36Sopenharmony_ci	dd->num_pports = nports;
122362306a36Sopenharmony_ci	dd->pport = (struct hfi1_pportdata *)(dd + 1);
122462306a36Sopenharmony_ci	dd->pcidev = pdev;
122562306a36Sopenharmony_ci	pci_set_drvdata(pdev, dd);
122662306a36Sopenharmony_ci
122762306a36Sopenharmony_ci	ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b,
122862306a36Sopenharmony_ci			GFP_KERNEL);
122962306a36Sopenharmony_ci	if (ret < 0) {
123062306a36Sopenharmony_ci		dev_err(&pdev->dev,
123162306a36Sopenharmony_ci			"Could not allocate unit ID: error %d\n", -ret);
123262306a36Sopenharmony_ci		goto bail;
123362306a36Sopenharmony_ci	}
123462306a36Sopenharmony_ci	rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
123562306a36Sopenharmony_ci	/*
123662306a36Sopenharmony_ci	 * If the BIOS does not have the NUMA node information set, select
123762306a36Sopenharmony_ci	 * NUMA 0 so we get consistent performance.
123862306a36Sopenharmony_ci	 */
123962306a36Sopenharmony_ci	dd->node = pcibus_to_node(pdev->bus);
124062306a36Sopenharmony_ci	if (dd->node == NUMA_NO_NODE) {
124162306a36Sopenharmony_ci		dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
124262306a36Sopenharmony_ci		dd->node = 0;
124362306a36Sopenharmony_ci	}
124462306a36Sopenharmony_ci
124562306a36Sopenharmony_ci	/*
124662306a36Sopenharmony_ci	 * Initialize all locks for the device. This needs to be as early as
124762306a36Sopenharmony_ci	 * possible so locks are usable.
124862306a36Sopenharmony_ci	 */
124962306a36Sopenharmony_ci	spin_lock_init(&dd->sc_lock);
125062306a36Sopenharmony_ci	spin_lock_init(&dd->sendctrl_lock);
125162306a36Sopenharmony_ci	spin_lock_init(&dd->rcvctrl_lock);
125262306a36Sopenharmony_ci	spin_lock_init(&dd->uctxt_lock);
125362306a36Sopenharmony_ci	spin_lock_init(&dd->hfi1_diag_trans_lock);
125462306a36Sopenharmony_ci	spin_lock_init(&dd->sc_init_lock);
125562306a36Sopenharmony_ci	spin_lock_init(&dd->dc8051_memlock);
125662306a36Sopenharmony_ci	seqlock_init(&dd->sc2vl_lock);
125762306a36Sopenharmony_ci	spin_lock_init(&dd->sde_map_lock);
125862306a36Sopenharmony_ci	spin_lock_init(&dd->pio_map_lock);
125962306a36Sopenharmony_ci	mutex_init(&dd->dc8051_lock);
126062306a36Sopenharmony_ci	init_waitqueue_head(&dd->event_queue);
126162306a36Sopenharmony_ci	spin_lock_init(&dd->irq_src_lock);
126262306a36Sopenharmony_ci
126362306a36Sopenharmony_ci	dd->int_counter = alloc_percpu(u64);
126462306a36Sopenharmony_ci	if (!dd->int_counter) {
126562306a36Sopenharmony_ci		ret = -ENOMEM;
126662306a36Sopenharmony_ci		goto bail;
126762306a36Sopenharmony_ci	}
126862306a36Sopenharmony_ci
126962306a36Sopenharmony_ci	dd->rcv_limit = alloc_percpu(u64);
127062306a36Sopenharmony_ci	if (!dd->rcv_limit) {
127162306a36Sopenharmony_ci		ret = -ENOMEM;
127262306a36Sopenharmony_ci		goto bail;
127362306a36Sopenharmony_ci	}
127462306a36Sopenharmony_ci
127562306a36Sopenharmony_ci	dd->send_schedule = alloc_percpu(u64);
127662306a36Sopenharmony_ci	if (!dd->send_schedule) {
127762306a36Sopenharmony_ci		ret = -ENOMEM;
127862306a36Sopenharmony_ci		goto bail;
127962306a36Sopenharmony_ci	}
128062306a36Sopenharmony_ci
128162306a36Sopenharmony_ci	dd->tx_opstats = alloc_percpu(struct hfi1_opcode_stats_perctx);
128262306a36Sopenharmony_ci	if (!dd->tx_opstats) {
128362306a36Sopenharmony_ci		ret = -ENOMEM;
128462306a36Sopenharmony_ci		goto bail;
128562306a36Sopenharmony_ci	}
128662306a36Sopenharmony_ci
128762306a36Sopenharmony_ci	dd->comp_vect = kzalloc(sizeof(*dd->comp_vect), GFP_KERNEL);
128862306a36Sopenharmony_ci	if (!dd->comp_vect) {
128962306a36Sopenharmony_ci		ret = -ENOMEM;
129062306a36Sopenharmony_ci		goto bail;
129162306a36Sopenharmony_ci	}
129262306a36Sopenharmony_ci
129362306a36Sopenharmony_ci	/* allocate dummy tail memory for all receive contexts */
129462306a36Sopenharmony_ci	dd->rcvhdrtail_dummy_kvaddr =
129562306a36Sopenharmony_ci		dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64),
129662306a36Sopenharmony_ci				   &dd->rcvhdrtail_dummy_dma, GFP_KERNEL);
129762306a36Sopenharmony_ci	if (!dd->rcvhdrtail_dummy_kvaddr) {
129862306a36Sopenharmony_ci		ret = -ENOMEM;
129962306a36Sopenharmony_ci		goto bail;
130062306a36Sopenharmony_ci	}
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_ci	atomic_set(&dd->ipoib_rsm_usr_num, 0);
130362306a36Sopenharmony_ci	return dd;
130462306a36Sopenharmony_ci
130562306a36Sopenharmony_cibail:
130662306a36Sopenharmony_ci	hfi1_free_devdata(dd);
130762306a36Sopenharmony_ci	return ERR_PTR(ret);
130862306a36Sopenharmony_ci}
130962306a36Sopenharmony_ci
131062306a36Sopenharmony_ci/*
131162306a36Sopenharmony_ci * Called from freeze mode handlers, and from PCI error
131262306a36Sopenharmony_ci * reporting code.  Should be paranoid about state of
131362306a36Sopenharmony_ci * system and data structures.
131462306a36Sopenharmony_ci */
131562306a36Sopenharmony_civoid hfi1_disable_after_error(struct hfi1_devdata *dd)
131662306a36Sopenharmony_ci{
131762306a36Sopenharmony_ci	if (dd->flags & HFI1_INITTED) {
131862306a36Sopenharmony_ci		u32 pidx;
131962306a36Sopenharmony_ci
132062306a36Sopenharmony_ci		dd->flags &= ~HFI1_INITTED;
132162306a36Sopenharmony_ci		if (dd->pport)
132262306a36Sopenharmony_ci			for (pidx = 0; pidx < dd->num_pports; ++pidx) {
132362306a36Sopenharmony_ci				struct hfi1_pportdata *ppd;
132462306a36Sopenharmony_ci
132562306a36Sopenharmony_ci				ppd = dd->pport + pidx;
132662306a36Sopenharmony_ci				if (dd->flags & HFI1_PRESENT)
132762306a36Sopenharmony_ci					set_link_state(ppd, HLS_DN_DISABLE);
132862306a36Sopenharmony_ci
132962306a36Sopenharmony_ci				if (ppd->statusp)
133062306a36Sopenharmony_ci					*ppd->statusp &= ~HFI1_STATUS_IB_READY;
133162306a36Sopenharmony_ci			}
133262306a36Sopenharmony_ci	}
133362306a36Sopenharmony_ci
133462306a36Sopenharmony_ci	/*
133562306a36Sopenharmony_ci	 * Mark as having had an error for driver, and also
133662306a36Sopenharmony_ci	 * for /sys and status word mapped to user programs.
133762306a36Sopenharmony_ci	 * This marks unit as not usable, until reset.
133862306a36Sopenharmony_ci	 */
133962306a36Sopenharmony_ci	if (dd->status)
134062306a36Sopenharmony_ci		dd->status->dev |= HFI1_STATUS_HWERROR;
134162306a36Sopenharmony_ci}
134262306a36Sopenharmony_ci
134362306a36Sopenharmony_cistatic void remove_one(struct pci_dev *);
134462306a36Sopenharmony_cistatic int init_one(struct pci_dev *, const struct pci_device_id *);
134562306a36Sopenharmony_cistatic void shutdown_one(struct pci_dev *);
134662306a36Sopenharmony_ci
134762306a36Sopenharmony_ci#define DRIVER_LOAD_MSG "Cornelis " DRIVER_NAME " loaded: "
134862306a36Sopenharmony_ci#define PFX DRIVER_NAME ": "
134962306a36Sopenharmony_ci
135062306a36Sopenharmony_ciconst struct pci_device_id hfi1_pci_tbl[] = {
135162306a36Sopenharmony_ci	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL0) },
135262306a36Sopenharmony_ci	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL1) },
135362306a36Sopenharmony_ci	{ 0, }
135462306a36Sopenharmony_ci};
135562306a36Sopenharmony_ci
135662306a36Sopenharmony_ciMODULE_DEVICE_TABLE(pci, hfi1_pci_tbl);
135762306a36Sopenharmony_ci
135862306a36Sopenharmony_cistatic struct pci_driver hfi1_pci_driver = {
135962306a36Sopenharmony_ci	.name = DRIVER_NAME,
136062306a36Sopenharmony_ci	.probe = init_one,
136162306a36Sopenharmony_ci	.remove = remove_one,
136262306a36Sopenharmony_ci	.shutdown = shutdown_one,
136362306a36Sopenharmony_ci	.id_table = hfi1_pci_tbl,
136462306a36Sopenharmony_ci	.err_handler = &hfi1_pci_err_handler,
136562306a36Sopenharmony_ci};
136662306a36Sopenharmony_ci
136762306a36Sopenharmony_cistatic void __init compute_krcvqs(void)
136862306a36Sopenharmony_ci{
136962306a36Sopenharmony_ci	int i;
137062306a36Sopenharmony_ci
137162306a36Sopenharmony_ci	for (i = 0; i < krcvqsset; i++)
137262306a36Sopenharmony_ci		n_krcvqs += krcvqs[i];
137362306a36Sopenharmony_ci}
137462306a36Sopenharmony_ci
137562306a36Sopenharmony_ci/*
137662306a36Sopenharmony_ci * Do all the generic driver unit- and chip-independent memory
137762306a36Sopenharmony_ci * allocation and initialization.
137862306a36Sopenharmony_ci */
137962306a36Sopenharmony_cistatic int __init hfi1_mod_init(void)
138062306a36Sopenharmony_ci{
138162306a36Sopenharmony_ci	int ret;
138262306a36Sopenharmony_ci
138362306a36Sopenharmony_ci	ret = dev_init();
138462306a36Sopenharmony_ci	if (ret)
138562306a36Sopenharmony_ci		goto bail;
138662306a36Sopenharmony_ci
138762306a36Sopenharmony_ci	ret = node_affinity_init();
138862306a36Sopenharmony_ci	if (ret)
138962306a36Sopenharmony_ci		goto bail;
139062306a36Sopenharmony_ci
139162306a36Sopenharmony_ci	/* validate max MTU before any devices start */
139262306a36Sopenharmony_ci	if (!valid_opa_max_mtu(hfi1_max_mtu)) {
139362306a36Sopenharmony_ci		pr_err("Invalid max_mtu 0x%x, using 0x%x instead\n",
139462306a36Sopenharmony_ci		       hfi1_max_mtu, HFI1_DEFAULT_MAX_MTU);
139562306a36Sopenharmony_ci		hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU;
139662306a36Sopenharmony_ci	}
139762306a36Sopenharmony_ci	/* valid CUs run from 1-128 in powers of 2 */
139862306a36Sopenharmony_ci	if (hfi1_cu > 128 || !is_power_of_2(hfi1_cu))
139962306a36Sopenharmony_ci		hfi1_cu = 1;
140062306a36Sopenharmony_ci	/* valid credit return threshold is 0-100, variable is unsigned */
140162306a36Sopenharmony_ci	if (user_credit_return_threshold > 100)
140262306a36Sopenharmony_ci		user_credit_return_threshold = 100;
140362306a36Sopenharmony_ci
140462306a36Sopenharmony_ci	compute_krcvqs();
140562306a36Sopenharmony_ci	/*
140662306a36Sopenharmony_ci	 * sanitize receive interrupt count, time must wait until after
140762306a36Sopenharmony_ci	 * the hardware type is known
140862306a36Sopenharmony_ci	 */
140962306a36Sopenharmony_ci	if (rcv_intr_count > RCV_HDR_HEAD_COUNTER_MASK)
141062306a36Sopenharmony_ci		rcv_intr_count = RCV_HDR_HEAD_COUNTER_MASK;
141162306a36Sopenharmony_ci	/* reject invalid combinations */
141262306a36Sopenharmony_ci	if (rcv_intr_count == 0 && rcv_intr_timeout == 0) {
141362306a36Sopenharmony_ci		pr_err("Invalid mode: both receive interrupt count and available timeout are zero - setting interrupt count to 1\n");
141462306a36Sopenharmony_ci		rcv_intr_count = 1;
141562306a36Sopenharmony_ci	}
141662306a36Sopenharmony_ci	if (rcv_intr_count > 1 && rcv_intr_timeout == 0) {
141762306a36Sopenharmony_ci		/*
141862306a36Sopenharmony_ci		 * Avoid indefinite packet delivery by requiring a timeout
141962306a36Sopenharmony_ci		 * if count is > 1.
142062306a36Sopenharmony_ci		 */
142162306a36Sopenharmony_ci		pr_err("Invalid mode: receive interrupt count greater than 1 and available timeout is zero - setting available timeout to 1\n");
142262306a36Sopenharmony_ci		rcv_intr_timeout = 1;
142362306a36Sopenharmony_ci	}
142462306a36Sopenharmony_ci	if (rcv_intr_dynamic && !(rcv_intr_count > 1 && rcv_intr_timeout > 0)) {
142562306a36Sopenharmony_ci		/*
142662306a36Sopenharmony_ci		 * The dynamic algorithm expects a non-zero timeout
142762306a36Sopenharmony_ci		 * and a count > 1.
142862306a36Sopenharmony_ci		 */
142962306a36Sopenharmony_ci		pr_err("Invalid mode: dynamic receive interrupt mitigation with invalid count and timeout - turning dynamic off\n");
143062306a36Sopenharmony_ci		rcv_intr_dynamic = 0;
143162306a36Sopenharmony_ci	}
143262306a36Sopenharmony_ci
143362306a36Sopenharmony_ci	/* sanitize link CRC options */
143462306a36Sopenharmony_ci	link_crc_mask &= SUPPORTED_CRCS;
143562306a36Sopenharmony_ci
143662306a36Sopenharmony_ci	ret = opfn_init();
143762306a36Sopenharmony_ci	if (ret < 0) {
143862306a36Sopenharmony_ci		pr_err("Failed to allocate opfn_wq");
143962306a36Sopenharmony_ci		goto bail_dev;
144062306a36Sopenharmony_ci	}
144162306a36Sopenharmony_ci
144262306a36Sopenharmony_ci	/*
144362306a36Sopenharmony_ci	 * These must be called before the driver is registered with
144462306a36Sopenharmony_ci	 * the PCI subsystem.
144562306a36Sopenharmony_ci	 */
144662306a36Sopenharmony_ci	hfi1_dbg_init();
144762306a36Sopenharmony_ci	ret = pci_register_driver(&hfi1_pci_driver);
144862306a36Sopenharmony_ci	if (ret < 0) {
144962306a36Sopenharmony_ci		pr_err("Unable to register driver: error %d\n", -ret);
145062306a36Sopenharmony_ci		goto bail_dev;
145162306a36Sopenharmony_ci	}
145262306a36Sopenharmony_ci	goto bail; /* all OK */
145362306a36Sopenharmony_ci
145462306a36Sopenharmony_cibail_dev:
145562306a36Sopenharmony_ci	hfi1_dbg_exit();
145662306a36Sopenharmony_ci	dev_cleanup();
145762306a36Sopenharmony_cibail:
145862306a36Sopenharmony_ci	return ret;
145962306a36Sopenharmony_ci}
146062306a36Sopenharmony_ci
146162306a36Sopenharmony_cimodule_init(hfi1_mod_init);
146262306a36Sopenharmony_ci
146362306a36Sopenharmony_ci/*
146462306a36Sopenharmony_ci * Do the non-unit driver cleanup, memory free, etc. at unload.
146562306a36Sopenharmony_ci */
146662306a36Sopenharmony_cistatic void __exit hfi1_mod_cleanup(void)
146762306a36Sopenharmony_ci{
146862306a36Sopenharmony_ci	pci_unregister_driver(&hfi1_pci_driver);
146962306a36Sopenharmony_ci	opfn_exit();
147062306a36Sopenharmony_ci	node_affinity_destroy_all();
147162306a36Sopenharmony_ci	hfi1_dbg_exit();
147262306a36Sopenharmony_ci
147362306a36Sopenharmony_ci	WARN_ON(!xa_empty(&hfi1_dev_table));
147462306a36Sopenharmony_ci	dispose_firmware();	/* asymmetric with obtain_firmware() */
147562306a36Sopenharmony_ci	dev_cleanup();
147662306a36Sopenharmony_ci}
147762306a36Sopenharmony_ci
147862306a36Sopenharmony_cimodule_exit(hfi1_mod_cleanup);
147962306a36Sopenharmony_ci
148062306a36Sopenharmony_ci/* this can only be called after a successful initialization */
148162306a36Sopenharmony_cistatic void cleanup_device_data(struct hfi1_devdata *dd)
148262306a36Sopenharmony_ci{
148362306a36Sopenharmony_ci	int ctxt;
148462306a36Sopenharmony_ci	int pidx;
148562306a36Sopenharmony_ci
148662306a36Sopenharmony_ci	/* users can't do anything more with chip */
148762306a36Sopenharmony_ci	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
148862306a36Sopenharmony_ci		struct hfi1_pportdata *ppd = &dd->pport[pidx];
148962306a36Sopenharmony_ci		struct cc_state *cc_state;
149062306a36Sopenharmony_ci		int i;
149162306a36Sopenharmony_ci
149262306a36Sopenharmony_ci		if (ppd->statusp)
149362306a36Sopenharmony_ci			*ppd->statusp &= ~HFI1_STATUS_CHIP_PRESENT;
149462306a36Sopenharmony_ci
149562306a36Sopenharmony_ci		for (i = 0; i < OPA_MAX_SLS; i++)
149662306a36Sopenharmony_ci			hrtimer_cancel(&ppd->cca_timer[i].hrtimer);
149762306a36Sopenharmony_ci
149862306a36Sopenharmony_ci		spin_lock(&ppd->cc_state_lock);
149962306a36Sopenharmony_ci		cc_state = get_cc_state_protected(ppd);
150062306a36Sopenharmony_ci		RCU_INIT_POINTER(ppd->cc_state, NULL);
150162306a36Sopenharmony_ci		spin_unlock(&ppd->cc_state_lock);
150262306a36Sopenharmony_ci
150362306a36Sopenharmony_ci		if (cc_state)
150462306a36Sopenharmony_ci			kfree_rcu(cc_state, rcu);
150562306a36Sopenharmony_ci	}
150662306a36Sopenharmony_ci
150762306a36Sopenharmony_ci	free_credit_return(dd);
150862306a36Sopenharmony_ci
150962306a36Sopenharmony_ci	/*
151062306a36Sopenharmony_ci	 * Free any resources still in use (usually just kernel contexts)
151162306a36Sopenharmony_ci	 * at unload; we do for ctxtcnt, because that's what we allocate.
151262306a36Sopenharmony_ci	 */
151362306a36Sopenharmony_ci	for (ctxt = 0; dd->rcd && ctxt < dd->num_rcv_contexts; ctxt++) {
151462306a36Sopenharmony_ci		struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
151562306a36Sopenharmony_ci
151662306a36Sopenharmony_ci		if (rcd) {
151762306a36Sopenharmony_ci			hfi1_free_ctxt_rcv_groups(rcd);
151862306a36Sopenharmony_ci			hfi1_free_ctxt(rcd);
151962306a36Sopenharmony_ci		}
152062306a36Sopenharmony_ci	}
152162306a36Sopenharmony_ci
152262306a36Sopenharmony_ci	kfree(dd->rcd);
152362306a36Sopenharmony_ci	dd->rcd = NULL;
152462306a36Sopenharmony_ci
152562306a36Sopenharmony_ci	free_pio_map(dd);
152662306a36Sopenharmony_ci	/* must follow rcv context free - need to remove rcv's hooks */
152762306a36Sopenharmony_ci	for (ctxt = 0; ctxt < dd->num_send_contexts; ctxt++)
152862306a36Sopenharmony_ci		sc_free(dd->send_contexts[ctxt].sc);
152962306a36Sopenharmony_ci	dd->num_send_contexts = 0;
153062306a36Sopenharmony_ci	kfree(dd->send_contexts);
153162306a36Sopenharmony_ci	dd->send_contexts = NULL;
153262306a36Sopenharmony_ci	kfree(dd->hw_to_sw);
153362306a36Sopenharmony_ci	dd->hw_to_sw = NULL;
153462306a36Sopenharmony_ci	kfree(dd->boardname);
153562306a36Sopenharmony_ci	vfree(dd->events);
153662306a36Sopenharmony_ci	vfree(dd->status);
153762306a36Sopenharmony_ci}
153862306a36Sopenharmony_ci
153962306a36Sopenharmony_ci/*
154062306a36Sopenharmony_ci * Clean up on unit shutdown, or error during unit load after
154162306a36Sopenharmony_ci * successful initialization.
154262306a36Sopenharmony_ci */
154362306a36Sopenharmony_cistatic void postinit_cleanup(struct hfi1_devdata *dd)
154462306a36Sopenharmony_ci{
154562306a36Sopenharmony_ci	hfi1_start_cleanup(dd);
154662306a36Sopenharmony_ci	hfi1_comp_vectors_clean_up(dd);
154762306a36Sopenharmony_ci	hfi1_dev_affinity_clean_up(dd);
154862306a36Sopenharmony_ci
154962306a36Sopenharmony_ci	hfi1_pcie_ddcleanup(dd);
155062306a36Sopenharmony_ci	hfi1_pcie_cleanup(dd->pcidev);
155162306a36Sopenharmony_ci
155262306a36Sopenharmony_ci	cleanup_device_data(dd);
155362306a36Sopenharmony_ci
155462306a36Sopenharmony_ci	hfi1_free_devdata(dd);
155562306a36Sopenharmony_ci}
155662306a36Sopenharmony_ci
155762306a36Sopenharmony_cistatic int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
155862306a36Sopenharmony_ci{
155962306a36Sopenharmony_ci	int ret = 0, j, pidx, initfail;
156062306a36Sopenharmony_ci	struct hfi1_devdata *dd;
156162306a36Sopenharmony_ci	struct hfi1_pportdata *ppd;
156262306a36Sopenharmony_ci
156362306a36Sopenharmony_ci	/* First, lock the non-writable module parameters */
156462306a36Sopenharmony_ci	HFI1_CAP_LOCK();
156562306a36Sopenharmony_ci
156662306a36Sopenharmony_ci	/* Validate dev ids */
156762306a36Sopenharmony_ci	if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
156862306a36Sopenharmony_ci	      ent->device == PCI_DEVICE_ID_INTEL1)) {
156962306a36Sopenharmony_ci		dev_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n",
157062306a36Sopenharmony_ci			ent->device);
157162306a36Sopenharmony_ci		ret = -ENODEV;
157262306a36Sopenharmony_ci		goto bail;
157362306a36Sopenharmony_ci	}
157462306a36Sopenharmony_ci
157562306a36Sopenharmony_ci	/* Allocate the dd so we can get to work */
157662306a36Sopenharmony_ci	dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
157762306a36Sopenharmony_ci				sizeof(struct hfi1_pportdata));
157862306a36Sopenharmony_ci	if (IS_ERR(dd)) {
157962306a36Sopenharmony_ci		ret = PTR_ERR(dd);
158062306a36Sopenharmony_ci		goto bail;
158162306a36Sopenharmony_ci	}
158262306a36Sopenharmony_ci
158362306a36Sopenharmony_ci	/* Validate some global module parameters */
158462306a36Sopenharmony_ci	ret = hfi1_validate_rcvhdrcnt(dd, rcvhdrcnt);
158562306a36Sopenharmony_ci	if (ret)
158662306a36Sopenharmony_ci		goto bail;
158762306a36Sopenharmony_ci
158862306a36Sopenharmony_ci	/* use the encoding function as a sanitization check */
158962306a36Sopenharmony_ci	if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
159062306a36Sopenharmony_ci		dd_dev_err(dd, "Invalid HdrQ Entry size %u\n",
159162306a36Sopenharmony_ci			   hfi1_hdrq_entsize);
159262306a36Sopenharmony_ci		ret = -EINVAL;
159362306a36Sopenharmony_ci		goto bail;
159462306a36Sopenharmony_ci	}
159562306a36Sopenharmony_ci
159662306a36Sopenharmony_ci	/* The receive eager buffer size must be set before the receive
159762306a36Sopenharmony_ci	 * contexts are created.
159862306a36Sopenharmony_ci	 *
159962306a36Sopenharmony_ci	 * Set the eager buffer size.  Validate that it falls in a range
160062306a36Sopenharmony_ci	 * allowed by the hardware - all powers of 2 between the min and
160162306a36Sopenharmony_ci	 * max.  The maximum valid MTU is within the eager buffer range
160262306a36Sopenharmony_ci	 * so we do not need to cap the max_mtu by an eager buffer size
160362306a36Sopenharmony_ci	 * setting.
160462306a36Sopenharmony_ci	 */
160562306a36Sopenharmony_ci	if (eager_buffer_size) {
160662306a36Sopenharmony_ci		if (!is_power_of_2(eager_buffer_size))
160762306a36Sopenharmony_ci			eager_buffer_size =
160862306a36Sopenharmony_ci				roundup_pow_of_two(eager_buffer_size);
160962306a36Sopenharmony_ci		eager_buffer_size =
161062306a36Sopenharmony_ci			clamp_val(eager_buffer_size,
161162306a36Sopenharmony_ci				  MIN_EAGER_BUFFER * 8,
161262306a36Sopenharmony_ci				  MAX_EAGER_BUFFER_TOTAL);
161362306a36Sopenharmony_ci		dd_dev_info(dd, "Eager buffer size %u\n",
161462306a36Sopenharmony_ci			    eager_buffer_size);
161562306a36Sopenharmony_ci	} else {
161662306a36Sopenharmony_ci		dd_dev_err(dd, "Invalid Eager buffer size of 0\n");
161762306a36Sopenharmony_ci		ret = -EINVAL;
161862306a36Sopenharmony_ci		goto bail;
161962306a36Sopenharmony_ci	}
162062306a36Sopenharmony_ci
162162306a36Sopenharmony_ci	/* restrict value of hfi1_rcvarr_split */
162262306a36Sopenharmony_ci	hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100);
162362306a36Sopenharmony_ci
162462306a36Sopenharmony_ci	ret = hfi1_pcie_init(dd);
162562306a36Sopenharmony_ci	if (ret)
162662306a36Sopenharmony_ci		goto bail;
162762306a36Sopenharmony_ci
162862306a36Sopenharmony_ci	/*
162962306a36Sopenharmony_ci	 * Do device-specific initialization, function table setup, dd
163062306a36Sopenharmony_ci	 * allocation, etc.
163162306a36Sopenharmony_ci	 */
163262306a36Sopenharmony_ci	ret = hfi1_init_dd(dd);
163362306a36Sopenharmony_ci	if (ret)
163462306a36Sopenharmony_ci		goto clean_bail; /* error already printed */
163562306a36Sopenharmony_ci
163662306a36Sopenharmony_ci	ret = create_workqueues(dd);
163762306a36Sopenharmony_ci	if (ret)
163862306a36Sopenharmony_ci		goto clean_bail;
163962306a36Sopenharmony_ci
164062306a36Sopenharmony_ci	/* do the generic initialization */
164162306a36Sopenharmony_ci	initfail = hfi1_init(dd, 0);
164262306a36Sopenharmony_ci
164362306a36Sopenharmony_ci	ret = hfi1_register_ib_device(dd);
164462306a36Sopenharmony_ci
164562306a36Sopenharmony_ci	/*
164662306a36Sopenharmony_ci	 * Now ready for use.  this should be cleared whenever we
164762306a36Sopenharmony_ci	 * detect a reset, or initiate one.  If earlier failure,
164862306a36Sopenharmony_ci	 * we still create devices, so diags, etc. can be used
164962306a36Sopenharmony_ci	 * to determine cause of problem.
165062306a36Sopenharmony_ci	 */
165162306a36Sopenharmony_ci	if (!initfail && !ret) {
165262306a36Sopenharmony_ci		dd->flags |= HFI1_INITTED;
165362306a36Sopenharmony_ci		/* create debufs files after init and ib register */
165462306a36Sopenharmony_ci		hfi1_dbg_ibdev_init(&dd->verbs_dev);
165562306a36Sopenharmony_ci	}
165662306a36Sopenharmony_ci
165762306a36Sopenharmony_ci	j = hfi1_device_create(dd);
165862306a36Sopenharmony_ci	if (j)
165962306a36Sopenharmony_ci		dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j);
166062306a36Sopenharmony_ci
166162306a36Sopenharmony_ci	if (initfail || ret) {
166262306a36Sopenharmony_ci		msix_clean_up_interrupts(dd);
166362306a36Sopenharmony_ci		stop_timers(dd);
166462306a36Sopenharmony_ci		flush_workqueue(ib_wq);
166562306a36Sopenharmony_ci		for (pidx = 0; pidx < dd->num_pports; ++pidx) {
166662306a36Sopenharmony_ci			hfi1_quiet_serdes(dd->pport + pidx);
166762306a36Sopenharmony_ci			ppd = dd->pport + pidx;
166862306a36Sopenharmony_ci			if (ppd->hfi1_wq) {
166962306a36Sopenharmony_ci				destroy_workqueue(ppd->hfi1_wq);
167062306a36Sopenharmony_ci				ppd->hfi1_wq = NULL;
167162306a36Sopenharmony_ci			}
167262306a36Sopenharmony_ci			if (ppd->link_wq) {
167362306a36Sopenharmony_ci				destroy_workqueue(ppd->link_wq);
167462306a36Sopenharmony_ci				ppd->link_wq = NULL;
167562306a36Sopenharmony_ci			}
167662306a36Sopenharmony_ci		}
167762306a36Sopenharmony_ci		if (!j)
167862306a36Sopenharmony_ci			hfi1_device_remove(dd);
167962306a36Sopenharmony_ci		if (!ret)
168062306a36Sopenharmony_ci			hfi1_unregister_ib_device(dd);
168162306a36Sopenharmony_ci		postinit_cleanup(dd);
168262306a36Sopenharmony_ci		if (initfail)
168362306a36Sopenharmony_ci			ret = initfail;
168462306a36Sopenharmony_ci		goto bail;	/* everything already cleaned */
168562306a36Sopenharmony_ci	}
168662306a36Sopenharmony_ci
168762306a36Sopenharmony_ci	sdma_start(dd);
168862306a36Sopenharmony_ci
168962306a36Sopenharmony_ci	return 0;
169062306a36Sopenharmony_ci
169162306a36Sopenharmony_ciclean_bail:
169262306a36Sopenharmony_ci	hfi1_pcie_cleanup(pdev);
169362306a36Sopenharmony_cibail:
169462306a36Sopenharmony_ci	return ret;
169562306a36Sopenharmony_ci}
169662306a36Sopenharmony_ci
169762306a36Sopenharmony_cistatic void wait_for_clients(struct hfi1_devdata *dd)
169862306a36Sopenharmony_ci{
169962306a36Sopenharmony_ci	/*
170062306a36Sopenharmony_ci	 * Remove the device init value and complete the device if there is
170162306a36Sopenharmony_ci	 * no clients or wait for active clients to finish.
170262306a36Sopenharmony_ci	 */
170362306a36Sopenharmony_ci	if (refcount_dec_and_test(&dd->user_refcount))
170462306a36Sopenharmony_ci		complete(&dd->user_comp);
170562306a36Sopenharmony_ci
170662306a36Sopenharmony_ci	wait_for_completion(&dd->user_comp);
170762306a36Sopenharmony_ci}
170862306a36Sopenharmony_ci
170962306a36Sopenharmony_cistatic void remove_one(struct pci_dev *pdev)
171062306a36Sopenharmony_ci{
171162306a36Sopenharmony_ci	struct hfi1_devdata *dd = pci_get_drvdata(pdev);
171262306a36Sopenharmony_ci
171362306a36Sopenharmony_ci	/* close debugfs files before ib unregister */
171462306a36Sopenharmony_ci	hfi1_dbg_ibdev_exit(&dd->verbs_dev);
171562306a36Sopenharmony_ci
171662306a36Sopenharmony_ci	/* remove the /dev hfi1 interface */
171762306a36Sopenharmony_ci	hfi1_device_remove(dd);
171862306a36Sopenharmony_ci
171962306a36Sopenharmony_ci	/* wait for existing user space clients to finish */
172062306a36Sopenharmony_ci	wait_for_clients(dd);
172162306a36Sopenharmony_ci
172262306a36Sopenharmony_ci	/* unregister from IB core */
172362306a36Sopenharmony_ci	hfi1_unregister_ib_device(dd);
172462306a36Sopenharmony_ci
172562306a36Sopenharmony_ci	/* free netdev data */
172662306a36Sopenharmony_ci	hfi1_free_rx(dd);
172762306a36Sopenharmony_ci
172862306a36Sopenharmony_ci	/*
172962306a36Sopenharmony_ci	 * Disable the IB link, disable interrupts on the device,
173062306a36Sopenharmony_ci	 * clear dma engines, etc.
173162306a36Sopenharmony_ci	 */
173262306a36Sopenharmony_ci	shutdown_device(dd);
173362306a36Sopenharmony_ci	destroy_workqueues(dd);
173462306a36Sopenharmony_ci
173562306a36Sopenharmony_ci	stop_timers(dd);
173662306a36Sopenharmony_ci
173762306a36Sopenharmony_ci	/* wait until all of our (qsfp) queue_work() calls complete */
173862306a36Sopenharmony_ci	flush_workqueue(ib_wq);
173962306a36Sopenharmony_ci
174062306a36Sopenharmony_ci	postinit_cleanup(dd);
174162306a36Sopenharmony_ci}
174262306a36Sopenharmony_ci
174362306a36Sopenharmony_cistatic void shutdown_one(struct pci_dev *pdev)
174462306a36Sopenharmony_ci{
174562306a36Sopenharmony_ci	struct hfi1_devdata *dd = pci_get_drvdata(pdev);
174662306a36Sopenharmony_ci
174762306a36Sopenharmony_ci	shutdown_device(dd);
174862306a36Sopenharmony_ci}
174962306a36Sopenharmony_ci
175062306a36Sopenharmony_ci/**
175162306a36Sopenharmony_ci * hfi1_create_rcvhdrq - create a receive header queue
175262306a36Sopenharmony_ci * @dd: the hfi1_ib device
175362306a36Sopenharmony_ci * @rcd: the context data
175462306a36Sopenharmony_ci *
175562306a36Sopenharmony_ci * This must be contiguous memory (from an i/o perspective), and must be
175662306a36Sopenharmony_ci * DMA'able (which means for some systems, it will go through an IOMMU,
175762306a36Sopenharmony_ci * or be forced into a low address range).
175862306a36Sopenharmony_ci */
175962306a36Sopenharmony_ciint hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
176062306a36Sopenharmony_ci{
176162306a36Sopenharmony_ci	unsigned amt;
176262306a36Sopenharmony_ci
176362306a36Sopenharmony_ci	if (!rcd->rcvhdrq) {
176462306a36Sopenharmony_ci		amt = rcvhdrq_size(rcd);
176562306a36Sopenharmony_ci
176662306a36Sopenharmony_ci		rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt,
176762306a36Sopenharmony_ci						  &rcd->rcvhdrq_dma,
176862306a36Sopenharmony_ci						  GFP_KERNEL);
176962306a36Sopenharmony_ci
177062306a36Sopenharmony_ci		if (!rcd->rcvhdrq) {
177162306a36Sopenharmony_ci			dd_dev_err(dd,
177262306a36Sopenharmony_ci				   "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n",
177362306a36Sopenharmony_ci				   amt, rcd->ctxt);
177462306a36Sopenharmony_ci			goto bail;
177562306a36Sopenharmony_ci		}
177662306a36Sopenharmony_ci
177762306a36Sopenharmony_ci		if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
177862306a36Sopenharmony_ci		    HFI1_CAP_UGET_MASK(rcd->flags, DMA_RTAIL)) {
177962306a36Sopenharmony_ci			rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
178062306a36Sopenharmony_ci								    PAGE_SIZE,
178162306a36Sopenharmony_ci								    &rcd->rcvhdrqtailaddr_dma,
178262306a36Sopenharmony_ci								    GFP_KERNEL);
178362306a36Sopenharmony_ci			if (!rcd->rcvhdrtail_kvaddr)
178462306a36Sopenharmony_ci				goto bail_free;
178562306a36Sopenharmony_ci		}
178662306a36Sopenharmony_ci	}
178762306a36Sopenharmony_ci
178862306a36Sopenharmony_ci	set_hdrq_regs(rcd->dd, rcd->ctxt, rcd->rcvhdrqentsize,
178962306a36Sopenharmony_ci		      rcd->rcvhdrq_cnt);
179062306a36Sopenharmony_ci
179162306a36Sopenharmony_ci	return 0;
179262306a36Sopenharmony_ci
179362306a36Sopenharmony_cibail_free:
179462306a36Sopenharmony_ci	dd_dev_err(dd,
179562306a36Sopenharmony_ci		   "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
179662306a36Sopenharmony_ci		   rcd->ctxt);
179762306a36Sopenharmony_ci	dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
179862306a36Sopenharmony_ci			  rcd->rcvhdrq_dma);
179962306a36Sopenharmony_ci	rcd->rcvhdrq = NULL;
180062306a36Sopenharmony_cibail:
180162306a36Sopenharmony_ci	return -ENOMEM;
180262306a36Sopenharmony_ci}
180362306a36Sopenharmony_ci
180462306a36Sopenharmony_ci/**
180562306a36Sopenharmony_ci * hfi1_setup_eagerbufs - llocate eager buffers, both kernel and user
180662306a36Sopenharmony_ci * contexts.
180762306a36Sopenharmony_ci * @rcd: the context we are setting up.
180862306a36Sopenharmony_ci *
180962306a36Sopenharmony_ci * Allocate the eager TID buffers and program them into hip.
181062306a36Sopenharmony_ci * They are no longer completely contiguous, we do multiple allocation
181162306a36Sopenharmony_ci * calls.  Otherwise we get the OOM code involved, by asking for too
181262306a36Sopenharmony_ci * much per call, with disastrous results on some kernels.
181362306a36Sopenharmony_ci */
181462306a36Sopenharmony_ciint hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
181562306a36Sopenharmony_ci{
181662306a36Sopenharmony_ci	struct hfi1_devdata *dd = rcd->dd;
181762306a36Sopenharmony_ci	u32 max_entries, egrtop, alloced_bytes = 0;
181862306a36Sopenharmony_ci	u16 order, idx = 0;
181962306a36Sopenharmony_ci	int ret = 0;
182062306a36Sopenharmony_ci	u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu);
182162306a36Sopenharmony_ci
182262306a36Sopenharmony_ci	/*
182362306a36Sopenharmony_ci	 * The minimum size of the eager buffers is a groups of MTU-sized
182462306a36Sopenharmony_ci	 * buffers.
182562306a36Sopenharmony_ci	 * The global eager_buffer_size parameter is checked against the
182662306a36Sopenharmony_ci	 * theoretical lower limit of the value. Here, we check against the
182762306a36Sopenharmony_ci	 * MTU.
182862306a36Sopenharmony_ci	 */
182962306a36Sopenharmony_ci	if (rcd->egrbufs.size < (round_mtu * dd->rcv_entries.group_size))
183062306a36Sopenharmony_ci		rcd->egrbufs.size = round_mtu * dd->rcv_entries.group_size;
183162306a36Sopenharmony_ci	/*
183262306a36Sopenharmony_ci	 * If using one-pkt-per-egr-buffer, lower the eager buffer
183362306a36Sopenharmony_ci	 * size to the max MTU (page-aligned).
183462306a36Sopenharmony_ci	 */
183562306a36Sopenharmony_ci	if (!HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR))
183662306a36Sopenharmony_ci		rcd->egrbufs.rcvtid_size = round_mtu;
183762306a36Sopenharmony_ci
183862306a36Sopenharmony_ci	/*
183962306a36Sopenharmony_ci	 * Eager buffers sizes of 1MB or less require smaller TID sizes
184062306a36Sopenharmony_ci	 * to satisfy the "multiple of 8 RcvArray entries" requirement.
184162306a36Sopenharmony_ci	 */
184262306a36Sopenharmony_ci	if (rcd->egrbufs.size <= (1 << 20))
184362306a36Sopenharmony_ci		rcd->egrbufs.rcvtid_size = max((unsigned long)round_mtu,
184462306a36Sopenharmony_ci			rounddown_pow_of_two(rcd->egrbufs.size / 8));
184562306a36Sopenharmony_ci
184662306a36Sopenharmony_ci	while (alloced_bytes < rcd->egrbufs.size &&
184762306a36Sopenharmony_ci	       rcd->egrbufs.alloced < rcd->egrbufs.count) {
184862306a36Sopenharmony_ci		rcd->egrbufs.buffers[idx].addr =
184962306a36Sopenharmony_ci			dma_alloc_coherent(&dd->pcidev->dev,
185062306a36Sopenharmony_ci					   rcd->egrbufs.rcvtid_size,
185162306a36Sopenharmony_ci					   &rcd->egrbufs.buffers[idx].dma,
185262306a36Sopenharmony_ci					   GFP_KERNEL);
185362306a36Sopenharmony_ci		if (rcd->egrbufs.buffers[idx].addr) {
185462306a36Sopenharmony_ci			rcd->egrbufs.buffers[idx].len =
185562306a36Sopenharmony_ci				rcd->egrbufs.rcvtid_size;
185662306a36Sopenharmony_ci			rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].addr =
185762306a36Sopenharmony_ci				rcd->egrbufs.buffers[idx].addr;
185862306a36Sopenharmony_ci			rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].dma =
185962306a36Sopenharmony_ci				rcd->egrbufs.buffers[idx].dma;
186062306a36Sopenharmony_ci			rcd->egrbufs.alloced++;
186162306a36Sopenharmony_ci			alloced_bytes += rcd->egrbufs.rcvtid_size;
186262306a36Sopenharmony_ci			idx++;
186362306a36Sopenharmony_ci		} else {
186462306a36Sopenharmony_ci			u32 new_size, i, j;
186562306a36Sopenharmony_ci			u64 offset = 0;
186662306a36Sopenharmony_ci
186762306a36Sopenharmony_ci			/*
186862306a36Sopenharmony_ci			 * Fail the eager buffer allocation if:
186962306a36Sopenharmony_ci			 *   - we are already using the lowest acceptable size
187062306a36Sopenharmony_ci			 *   - we are using one-pkt-per-egr-buffer (this implies
187162306a36Sopenharmony_ci			 *     that we are accepting only one size)
187262306a36Sopenharmony_ci			 */
187362306a36Sopenharmony_ci			if (rcd->egrbufs.rcvtid_size == round_mtu ||
187462306a36Sopenharmony_ci			    !HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) {
187562306a36Sopenharmony_ci				dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n",
187662306a36Sopenharmony_ci					   rcd->ctxt);
187762306a36Sopenharmony_ci				ret = -ENOMEM;
187862306a36Sopenharmony_ci				goto bail_rcvegrbuf_phys;
187962306a36Sopenharmony_ci			}
188062306a36Sopenharmony_ci
188162306a36Sopenharmony_ci			new_size = rcd->egrbufs.rcvtid_size / 2;
188262306a36Sopenharmony_ci
188362306a36Sopenharmony_ci			/*
188462306a36Sopenharmony_ci			 * If the first attempt to allocate memory failed, don't
188562306a36Sopenharmony_ci			 * fail everything but continue with the next lower
188662306a36Sopenharmony_ci			 * size.
188762306a36Sopenharmony_ci			 */
188862306a36Sopenharmony_ci			if (idx == 0) {
188962306a36Sopenharmony_ci				rcd->egrbufs.rcvtid_size = new_size;
189062306a36Sopenharmony_ci				continue;
189162306a36Sopenharmony_ci			}
189262306a36Sopenharmony_ci
189362306a36Sopenharmony_ci			/*
189462306a36Sopenharmony_ci			 * Re-partition already allocated buffers to a smaller
189562306a36Sopenharmony_ci			 * size.
189662306a36Sopenharmony_ci			 */
189762306a36Sopenharmony_ci			rcd->egrbufs.alloced = 0;
189862306a36Sopenharmony_ci			for (i = 0, j = 0, offset = 0; j < idx; i++) {
189962306a36Sopenharmony_ci				if (i >= rcd->egrbufs.count)
190062306a36Sopenharmony_ci					break;
190162306a36Sopenharmony_ci				rcd->egrbufs.rcvtids[i].dma =
190262306a36Sopenharmony_ci					rcd->egrbufs.buffers[j].dma + offset;
190362306a36Sopenharmony_ci				rcd->egrbufs.rcvtids[i].addr =
190462306a36Sopenharmony_ci					rcd->egrbufs.buffers[j].addr + offset;
190562306a36Sopenharmony_ci				rcd->egrbufs.alloced++;
190662306a36Sopenharmony_ci				if ((rcd->egrbufs.buffers[j].dma + offset +
190762306a36Sopenharmony_ci				     new_size) ==
190862306a36Sopenharmony_ci				    (rcd->egrbufs.buffers[j].dma +
190962306a36Sopenharmony_ci				     rcd->egrbufs.buffers[j].len)) {
191062306a36Sopenharmony_ci					j++;
191162306a36Sopenharmony_ci					offset = 0;
191262306a36Sopenharmony_ci				} else {
191362306a36Sopenharmony_ci					offset += new_size;
191462306a36Sopenharmony_ci				}
191562306a36Sopenharmony_ci			}
191662306a36Sopenharmony_ci			rcd->egrbufs.rcvtid_size = new_size;
191762306a36Sopenharmony_ci		}
191862306a36Sopenharmony_ci	}
191962306a36Sopenharmony_ci	rcd->egrbufs.numbufs = idx;
192062306a36Sopenharmony_ci	rcd->egrbufs.size = alloced_bytes;
192162306a36Sopenharmony_ci
192262306a36Sopenharmony_ci	hfi1_cdbg(PROC,
192362306a36Sopenharmony_ci		  "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %uKB",
192462306a36Sopenharmony_ci		  rcd->ctxt, rcd->egrbufs.alloced,
192562306a36Sopenharmony_ci		  rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024);
192662306a36Sopenharmony_ci
192762306a36Sopenharmony_ci	/*
192862306a36Sopenharmony_ci	 * Set the contexts rcv array head update threshold to the closest
192962306a36Sopenharmony_ci	 * power of 2 (so we can use a mask instead of modulo) below half
193062306a36Sopenharmony_ci	 * the allocated entries.
193162306a36Sopenharmony_ci	 */
193262306a36Sopenharmony_ci	rcd->egrbufs.threshold =
193362306a36Sopenharmony_ci		rounddown_pow_of_two(rcd->egrbufs.alloced / 2);
193462306a36Sopenharmony_ci	/*
193562306a36Sopenharmony_ci	 * Compute the expected RcvArray entry base. This is done after
193662306a36Sopenharmony_ci	 * allocating the eager buffers in order to maximize the
193762306a36Sopenharmony_ci	 * expected RcvArray entries for the context.
193862306a36Sopenharmony_ci	 */
193962306a36Sopenharmony_ci	max_entries = rcd->rcv_array_groups * dd->rcv_entries.group_size;
194062306a36Sopenharmony_ci	egrtop = roundup(rcd->egrbufs.alloced, dd->rcv_entries.group_size);
194162306a36Sopenharmony_ci	rcd->expected_count = max_entries - egrtop;
194262306a36Sopenharmony_ci	if (rcd->expected_count > MAX_TID_PAIR_ENTRIES * 2)
194362306a36Sopenharmony_ci		rcd->expected_count = MAX_TID_PAIR_ENTRIES * 2;
194462306a36Sopenharmony_ci
194562306a36Sopenharmony_ci	rcd->expected_base = rcd->eager_base + egrtop;
194662306a36Sopenharmony_ci	hfi1_cdbg(PROC, "ctxt%u: eager:%u, exp:%u, egrbase:%u, expbase:%u",
194762306a36Sopenharmony_ci		  rcd->ctxt, rcd->egrbufs.alloced, rcd->expected_count,
194862306a36Sopenharmony_ci		  rcd->eager_base, rcd->expected_base);
194962306a36Sopenharmony_ci
195062306a36Sopenharmony_ci	if (!hfi1_rcvbuf_validate(rcd->egrbufs.rcvtid_size, PT_EAGER, &order)) {
195162306a36Sopenharmony_ci		hfi1_cdbg(PROC,
195262306a36Sopenharmony_ci			  "ctxt%u: current Eager buffer size is invalid %u",
195362306a36Sopenharmony_ci			  rcd->ctxt, rcd->egrbufs.rcvtid_size);
195462306a36Sopenharmony_ci		ret = -EINVAL;
195562306a36Sopenharmony_ci		goto bail_rcvegrbuf_phys;
195662306a36Sopenharmony_ci	}
195762306a36Sopenharmony_ci
195862306a36Sopenharmony_ci	for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
195962306a36Sopenharmony_ci		hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER,
196062306a36Sopenharmony_ci			     rcd->egrbufs.rcvtids[idx].dma, order);
196162306a36Sopenharmony_ci		cond_resched();
196262306a36Sopenharmony_ci	}
196362306a36Sopenharmony_ci
196462306a36Sopenharmony_ci	return 0;
196562306a36Sopenharmony_ci
196662306a36Sopenharmony_cibail_rcvegrbuf_phys:
196762306a36Sopenharmony_ci	for (idx = 0; idx < rcd->egrbufs.alloced &&
196862306a36Sopenharmony_ci	     rcd->egrbufs.buffers[idx].addr;
196962306a36Sopenharmony_ci	     idx++) {
197062306a36Sopenharmony_ci		dma_free_coherent(&dd->pcidev->dev,
197162306a36Sopenharmony_ci				  rcd->egrbufs.buffers[idx].len,
197262306a36Sopenharmony_ci				  rcd->egrbufs.buffers[idx].addr,
197362306a36Sopenharmony_ci				  rcd->egrbufs.buffers[idx].dma);
197462306a36Sopenharmony_ci		rcd->egrbufs.buffers[idx].addr = NULL;
197562306a36Sopenharmony_ci		rcd->egrbufs.buffers[idx].dma = 0;
197662306a36Sopenharmony_ci		rcd->egrbufs.buffers[idx].len = 0;
197762306a36Sopenharmony_ci	}
197862306a36Sopenharmony_ci
197962306a36Sopenharmony_ci	return ret;
198062306a36Sopenharmony_ci}
1981