1#ifndef _HFI1_KERNEL_H
2#define _HFI1_KERNEL_H
3/*
4 * Copyright(c) 2020 Cornelis Networks, Inc.
5 * Copyright(c) 2015-2020 Intel Corporation.
6 *
7 * This file is provided under a dual BSD/GPLv2 license.  When using or
8 * redistributing this file, you may do so under either license.
9 *
10 * GPL LICENSE SUMMARY
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of version 2 of the GNU General Public License as
14 * published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 * General Public License for more details.
20 *
21 * BSD LICENSE
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 *
27 *  - Redistributions of source code must retain the above copyright
28 *    notice, this list of conditions and the following disclaimer.
29 *  - Redistributions in binary form must reproduce the above copyright
30 *    notice, this list of conditions and the following disclaimer in
31 *    the documentation and/or other materials provided with the
32 *    distribution.
33 *  - Neither the name of Intel Corporation nor the names of its
34 *    contributors may be used to endorse or promote products derived
35 *    from this software without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 *
49 */
50
51#include <linux/interrupt.h>
52#include <linux/pci.h>
53#include <linux/dma-mapping.h>
54#include <linux/mutex.h>
55#include <linux/list.h>
56#include <linux/scatterlist.h>
57#include <linux/slab.h>
58#include <linux/io.h>
59#include <linux/fs.h>
60#include <linux/completion.h>
61#include <linux/kref.h>
62#include <linux/sched.h>
63#include <linux/cdev.h>
64#include <linux/delay.h>
65#include <linux/kthread.h>
66#include <linux/i2c.h>
67#include <linux/i2c-algo-bit.h>
68#include <linux/xarray.h>
69#include <rdma/ib_hdrs.h>
70#include <rdma/opa_addr.h>
71#include <linux/rhashtable.h>
72#include <linux/netdevice.h>
73#include <rdma/rdma_vt.h>
74
75#include "chip_registers.h"
76#include "common.h"
77#include "opfn.h"
78#include "verbs.h"
79#include "pio.h"
80#include "chip.h"
81#include "mad.h"
82#include "qsfp.h"
83#include "platform.h"
84#include "affinity.h"
85#include "msix.h"
86
87/* bumped 1 from s/w major version of TrueScale */
88#define HFI1_CHIP_VERS_MAJ 3U
89
90/* don't care about this except printing */
91#define HFI1_CHIP_VERS_MIN 0U
92
93/* The Organization Unique Identifier (Mfg code), and its position in GUID */
94#define HFI1_OUI 0x001175
95#define HFI1_OUI_LSB 40
96
97#define DROP_PACKET_OFF		0
98#define DROP_PACKET_ON		1
99
100#define NEIGHBOR_TYPE_HFI		0
101#define NEIGHBOR_TYPE_SWITCH	1
102
103#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
104
105extern unsigned long hfi1_cap_mask;
106#define HFI1_CAP_KGET_MASK(mask, cap) ((mask) & HFI1_CAP_##cap)
107#define HFI1_CAP_UGET_MASK(mask, cap) \
108	(((mask) >> HFI1_CAP_USER_SHIFT) & HFI1_CAP_##cap)
109#define HFI1_CAP_KGET(cap) (HFI1_CAP_KGET_MASK(hfi1_cap_mask, cap))
110#define HFI1_CAP_UGET(cap) (HFI1_CAP_UGET_MASK(hfi1_cap_mask, cap))
111#define HFI1_CAP_IS_KSET(cap) (!!HFI1_CAP_KGET(cap))
112#define HFI1_CAP_IS_USET(cap) (!!HFI1_CAP_UGET(cap))
113#define HFI1_MISC_GET() ((hfi1_cap_mask >> HFI1_CAP_MISC_SHIFT) & \
114			HFI1_CAP_MISC_MASK)
115/* Offline Disabled Reason is 4-bits */
116#define HFI1_ODR_MASK(rsn) ((rsn) & OPA_PI_MASK_OFFLINE_REASON)
117
118/*
119 * Control context is always 0 and handles the error packets.
120 * It also handles the VL15 and multicast packets.
121 */
122#define HFI1_CTRL_CTXT    0
123
124/*
125 * Driver context will store software counters for each of the events
126 * associated with these status registers
127 */
128#define NUM_CCE_ERR_STATUS_COUNTERS 41
129#define NUM_RCV_ERR_STATUS_COUNTERS 64
130#define NUM_MISC_ERR_STATUS_COUNTERS 13
131#define NUM_SEND_PIO_ERR_STATUS_COUNTERS 36
132#define NUM_SEND_DMA_ERR_STATUS_COUNTERS 4
133#define NUM_SEND_EGRESS_ERR_STATUS_COUNTERS 64
134#define NUM_SEND_ERR_STATUS_COUNTERS 3
135#define NUM_SEND_CTXT_ERR_STATUS_COUNTERS 5
136#define NUM_SEND_DMA_ENG_ERR_STATUS_COUNTERS 24
137
138/*
139 * per driver stats, either not device nor port-specific, or
140 * summed over all of the devices and ports.
141 * They are described by name via ipathfs filesystem, so layout
142 * and number of elements can change without breaking compatibility.
143 * If members are added or deleted hfi1_statnames[] in debugfs.c must
144 * change to match.
145 */
146struct hfi1_ib_stats {
147	__u64 sps_ints; /* number of interrupts handled */
148	__u64 sps_errints; /* number of error interrupts */
149	__u64 sps_txerrs; /* tx-related packet errors */
150	__u64 sps_rcverrs; /* non-crc rcv packet errors */
151	__u64 sps_hwerrs; /* hardware errors reported (parity, etc.) */
152	__u64 sps_nopiobufs; /* no pio bufs avail from kernel */
153	__u64 sps_ctxts; /* number of contexts currently open */
154	__u64 sps_lenerrs; /* number of kernel packets where RHF != LRH len */
155	__u64 sps_buffull;
156	__u64 sps_hdrfull;
157};
158
159extern struct hfi1_ib_stats hfi1_stats;
160extern const struct pci_error_handlers hfi1_pci_err_handler;
161
162extern int num_driver_cntrs;
163
164/*
165 * First-cut criterion for "device is active" is
166 * two thousand dwords combined Tx, Rx traffic per
167 * 5-second interval. SMA packets are 64 dwords,
168 * and occur "a few per second", presumably each way.
169 */
170#define HFI1_TRAFFIC_ACTIVE_THRESHOLD (2000)
171
172/*
173 * Below contains all data related to a single context (formerly called port).
174 */
175
176struct hfi1_opcode_stats_perctx;
177
178struct ctxt_eager_bufs {
179	struct eager_buffer {
180		void *addr;
181		dma_addr_t dma;
182		ssize_t len;
183	} *buffers;
184	struct {
185		void *addr;
186		dma_addr_t dma;
187	} *rcvtids;
188	u32 size;                /* total size of eager buffers */
189	u32 rcvtid_size;         /* size of each eager rcv tid */
190	u16 count;               /* size of buffers array */
191	u16 numbufs;             /* number of buffers allocated */
192	u16 alloced;             /* number of rcvarray entries used */
193	u16 threshold;           /* head update threshold */
194};
195
196struct exp_tid_set {
197	struct list_head list;
198	u32 count;
199};
200
201struct hfi1_ctxtdata;
202typedef int (*intr_handler)(struct hfi1_ctxtdata *rcd, int data);
203typedef void (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
204
205struct tid_queue {
206	struct list_head queue_head;
207			/* queue head for QP TID resource waiters */
208	u32 enqueue;	/* count of tid enqueues */
209	u32 dequeue;	/* count of tid dequeues */
210};
211
212struct hfi1_ctxtdata {
213	/* rcvhdrq base, needs mmap before useful */
214	void *rcvhdrq;
215	/* kernel virtual address where hdrqtail is updated */
216	volatile __le64 *rcvhdrtail_kvaddr;
217	/* so functions that need physical port can get it easily */
218	struct hfi1_pportdata *ppd;
219	/* so file ops can get at unit */
220	struct hfi1_devdata *dd;
221	/* this receive context's assigned PIO ACK send context */
222	struct send_context *sc;
223	/* per context recv functions */
224	const rhf_rcv_function_ptr *rhf_rcv_function_map;
225	/*
226	 * The interrupt handler for a particular receive context can vary
227	 * throughout it's lifetime. This is not a lock protected data member so
228	 * it must be updated atomically and the prev and new value must always
229	 * be valid. Worst case is we process an extra interrupt and up to 64
230	 * packets with the wrong interrupt handler.
231	 */
232	intr_handler do_interrupt;
233	/** fast handler after autoactive */
234	intr_handler fast_handler;
235	/** slow handler */
236	intr_handler slow_handler;
237	/* napi pointer assiociated with netdev */
238	struct napi_struct *napi;
239	/* verbs rx_stats per rcd */
240	struct hfi1_opcode_stats_perctx *opstats;
241	/* clear interrupt mask */
242	u64 imask;
243	/* ctxt rcvhdrq head offset */
244	u32 head;
245	/* number of rcvhdrq entries */
246	u16 rcvhdrq_cnt;
247	u8 ireg;	/* clear interrupt register */
248	/* receive packet sequence counter */
249	u8 seq_cnt;
250	/* size of each of the rcvhdrq entries */
251	u8 rcvhdrqentsize;
252	/* offset of RHF within receive header entry */
253	u8 rhf_offset;
254	/* dynamic receive available interrupt timeout */
255	u8 rcvavail_timeout;
256	/* Indicates that this is vnic context */
257	bool is_vnic;
258	/* vnic queue index this context is mapped to */
259	u8 vnic_q_idx;
260	/* Is ASPM interrupt supported for this context */
261	bool aspm_intr_supported;
262	/* ASPM state (enabled/disabled) for this context */
263	bool aspm_enabled;
264	/* Is ASPM processing enabled for this context (in intr context) */
265	bool aspm_intr_enable;
266	struct ctxt_eager_bufs egrbufs;
267	/* QPs waiting for context processing */
268	struct list_head qp_wait_list;
269	/* tid allocation lists */
270	struct exp_tid_set tid_group_list;
271	struct exp_tid_set tid_used_list;
272	struct exp_tid_set tid_full_list;
273
274	/* Timer for re-enabling ASPM if interrupt activity quiets down */
275	struct timer_list aspm_timer;
276	/* per-context configuration flags */
277	unsigned long flags;
278	/* array of tid_groups */
279	struct tid_group  *groups;
280	/* mmap of hdrq, must fit in 44 bits */
281	dma_addr_t rcvhdrq_dma;
282	dma_addr_t rcvhdrqtailaddr_dma;
283	/* Last interrupt timestamp */
284	ktime_t aspm_ts_last_intr;
285	/* Last timestamp at which we scheduled a timer for this context */
286	ktime_t aspm_ts_timer_sched;
287	/* Lock to serialize between intr, timer intr and user threads */
288	spinlock_t aspm_lock;
289	/* Reference count the base context usage */
290	struct kref kref;
291	/* numa node of this context */
292	int numa_id;
293	/* associated msix interrupt. */
294	s16 msix_intr;
295	/* job key */
296	u16 jkey;
297	/* number of RcvArray groups for this context. */
298	u16 rcv_array_groups;
299	/* index of first eager TID entry. */
300	u16 eager_base;
301	/* number of expected TID entries */
302	u16 expected_count;
303	/* index of first expected TID entry. */
304	u16 expected_base;
305	/* Device context index */
306	u8 ctxt;
307
308	/* PSM Specific fields */
309	/* lock protecting all Expected TID data */
310	struct mutex exp_mutex;
311	/* lock protecting all Expected TID data of kernel contexts */
312	spinlock_t exp_lock;
313	/* Queue for QP's waiting for HW TID flows */
314	struct tid_queue flow_queue;
315	/* Queue for QP's waiting for HW receive array entries */
316	struct tid_queue rarr_queue;
317	/* when waiting for rcv or pioavail */
318	wait_queue_head_t wait;
319	/* uuid from PSM */
320	u8 uuid[16];
321	/* same size as task_struct .comm[], command that opened context */
322	char comm[TASK_COMM_LEN];
323	/* Bitmask of in use context(s) */
324	DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS);
325	/* per-context event flags for fileops/intr communication */
326	unsigned long event_flags;
327	/* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
328	void *subctxt_uregbase;
329	/* An array of pages for the eager receive buffers * N */
330	void *subctxt_rcvegrbuf;
331	/* An array of pages for the eager header queue entries * N */
332	void *subctxt_rcvhdr_base;
333	/* total number of polled urgent packets */
334	u32 urgent;
335	/* saved total number of polled urgent packets for poll edge trigger */
336	u32 urgent_poll;
337	/* Type of packets or conditions we want to poll for */
338	u16 poll_type;
339	/* non-zero if ctxt is being shared. */
340	u16 subctxt_id;
341	/* The version of the library which opened this ctxt */
342	u32 userversion;
343	/*
344	 * non-zero if ctxt can be shared, and defines the maximum number of
345	 * sub-contexts for this device context.
346	 */
347	u8 subctxt_cnt;
348
349	/* Bit mask to track free TID RDMA HW flows */
350	unsigned long flow_mask;
351	struct tid_flow_state flows[RXE_NUM_TID_FLOWS];
352};
353
354/**
355 * rcvhdrq_size - return total size in bytes for header queue
356 * @rcd: the receive context
357 *
358 * rcvhdrqentsize is in DWs, so we have to convert to bytes
359 *
360 */
361static inline u32 rcvhdrq_size(struct hfi1_ctxtdata *rcd)
362{
363	return PAGE_ALIGN(rcd->rcvhdrq_cnt *
364			  rcd->rcvhdrqentsize * sizeof(u32));
365}
366
367/*
368 * Represents a single packet at a high level. Put commonly computed things in
369 * here so we do not have to keep doing them over and over. The rule of thumb is
370 * if something is used one time to derive some value, store that something in
371 * here. If it is used multiple times, then store the result of that derivation
372 * in here.
373 */
374struct hfi1_packet {
375	void *ebuf;
376	void *hdr;
377	void *payload;
378	struct hfi1_ctxtdata *rcd;
379	__le32 *rhf_addr;
380	struct rvt_qp *qp;
381	struct ib_other_headers *ohdr;
382	struct ib_grh *grh;
383	struct opa_16b_mgmt *mgmt;
384	u64 rhf;
385	u32 maxcnt;
386	u32 rhqoff;
387	u32 dlid;
388	u32 slid;
389	int numpkt;
390	u16 tlen;
391	s16 etail;
392	u16 pkey;
393	u8 hlen;
394	u8 rsize;
395	u8 updegr;
396	u8 etype;
397	u8 extra_byte;
398	u8 pad;
399	u8 sc;
400	u8 sl;
401	u8 opcode;
402	bool migrated;
403};
404
405/* Packet types */
406#define HFI1_PKT_TYPE_9B  0
407#define HFI1_PKT_TYPE_16B 1
408
409/*
410 * OPA 16B Header
411 */
412#define OPA_16B_L4_MASK		0xFFull
413#define OPA_16B_SC_MASK		0x1F00000ull
414#define OPA_16B_SC_SHIFT	20
415#define OPA_16B_LID_MASK	0xFFFFFull
416#define OPA_16B_DLID_MASK	0xF000ull
417#define OPA_16B_DLID_SHIFT	20
418#define OPA_16B_DLID_HIGH_SHIFT	12
419#define OPA_16B_SLID_MASK	0xF00ull
420#define OPA_16B_SLID_SHIFT	20
421#define OPA_16B_SLID_HIGH_SHIFT	8
422#define OPA_16B_BECN_MASK       0x80000000ull
423#define OPA_16B_BECN_SHIFT      31
424#define OPA_16B_FECN_MASK       0x10000000ull
425#define OPA_16B_FECN_SHIFT      28
426#define OPA_16B_L2_MASK		0x60000000ull
427#define OPA_16B_L2_SHIFT	29
428#define OPA_16B_PKEY_MASK	0xFFFF0000ull
429#define OPA_16B_PKEY_SHIFT	16
430#define OPA_16B_LEN_MASK	0x7FF00000ull
431#define OPA_16B_LEN_SHIFT	20
432#define OPA_16B_RC_MASK		0xE000000ull
433#define OPA_16B_RC_SHIFT	25
434#define OPA_16B_AGE_MASK	0xFF0000ull
435#define OPA_16B_AGE_SHIFT	16
436#define OPA_16B_ENTROPY_MASK	0xFFFFull
437
438/*
439 * OPA 16B L2/L4 Encodings
440 */
441#define OPA_16B_L4_9B		0x00
442#define OPA_16B_L2_TYPE		0x02
443#define OPA_16B_L4_FM		0x08
444#define OPA_16B_L4_IB_LOCAL	0x09
445#define OPA_16B_L4_IB_GLOBAL	0x0A
446#define OPA_16B_L4_ETHR		OPA_VNIC_L4_ETHR
447
448/*
449 * OPA 16B Management
450 */
451#define OPA_16B_L4_FM_PAD	3  /* fixed 3B pad */
452#define OPA_16B_L4_FM_HLEN	24 /* 16B(16) + L4_FM(8) */
453
454static inline u8 hfi1_16B_get_l4(struct hfi1_16b_header *hdr)
455{
456	return (u8)(hdr->lrh[2] & OPA_16B_L4_MASK);
457}
458
459static inline u8 hfi1_16B_get_sc(struct hfi1_16b_header *hdr)
460{
461	return (u8)((hdr->lrh[1] & OPA_16B_SC_MASK) >> OPA_16B_SC_SHIFT);
462}
463
464static inline u32 hfi1_16B_get_dlid(struct hfi1_16b_header *hdr)
465{
466	return (u32)((hdr->lrh[1] & OPA_16B_LID_MASK) |
467		     (((hdr->lrh[2] & OPA_16B_DLID_MASK) >>
468		     OPA_16B_DLID_HIGH_SHIFT) << OPA_16B_DLID_SHIFT));
469}
470
471static inline u32 hfi1_16B_get_slid(struct hfi1_16b_header *hdr)
472{
473	return (u32)((hdr->lrh[0] & OPA_16B_LID_MASK) |
474		     (((hdr->lrh[2] & OPA_16B_SLID_MASK) >>
475		     OPA_16B_SLID_HIGH_SHIFT) << OPA_16B_SLID_SHIFT));
476}
477
478static inline u8 hfi1_16B_get_becn(struct hfi1_16b_header *hdr)
479{
480	return (u8)((hdr->lrh[0] & OPA_16B_BECN_MASK) >> OPA_16B_BECN_SHIFT);
481}
482
483static inline u8 hfi1_16B_get_fecn(struct hfi1_16b_header *hdr)
484{
485	return (u8)((hdr->lrh[1] & OPA_16B_FECN_MASK) >> OPA_16B_FECN_SHIFT);
486}
487
488static inline u8 hfi1_16B_get_l2(struct hfi1_16b_header *hdr)
489{
490	return (u8)((hdr->lrh[1] & OPA_16B_L2_MASK) >> OPA_16B_L2_SHIFT);
491}
492
493static inline u16 hfi1_16B_get_pkey(struct hfi1_16b_header *hdr)
494{
495	return (u16)((hdr->lrh[2] & OPA_16B_PKEY_MASK) >> OPA_16B_PKEY_SHIFT);
496}
497
498static inline u8 hfi1_16B_get_rc(struct hfi1_16b_header *hdr)
499{
500	return (u8)((hdr->lrh[1] & OPA_16B_RC_MASK) >> OPA_16B_RC_SHIFT);
501}
502
503static inline u8 hfi1_16B_get_age(struct hfi1_16b_header *hdr)
504{
505	return (u8)((hdr->lrh[3] & OPA_16B_AGE_MASK) >> OPA_16B_AGE_SHIFT);
506}
507
508static inline u16 hfi1_16B_get_len(struct hfi1_16b_header *hdr)
509{
510	return (u16)((hdr->lrh[0] & OPA_16B_LEN_MASK) >> OPA_16B_LEN_SHIFT);
511}
512
513static inline u16 hfi1_16B_get_entropy(struct hfi1_16b_header *hdr)
514{
515	return (u16)(hdr->lrh[3] & OPA_16B_ENTROPY_MASK);
516}
517
518#define OPA_16B_MAKE_QW(low_dw, high_dw) (((u64)(high_dw) << 32) | (low_dw))
519
520/*
521 * BTH
522 */
523#define OPA_16B_BTH_PAD_MASK	7
524static inline u8 hfi1_16B_bth_get_pad(struct ib_other_headers *ohdr)
525{
526	return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_PAD_SHIFT) &
527		   OPA_16B_BTH_PAD_MASK);
528}
529
530/*
531 * 16B Management
532 */
533#define OPA_16B_MGMT_QPN_MASK	0xFFFFFF
534static inline u32 hfi1_16B_get_dest_qpn(struct opa_16b_mgmt *mgmt)
535{
536	return be32_to_cpu(mgmt->dest_qpn) & OPA_16B_MGMT_QPN_MASK;
537}
538
539static inline u32 hfi1_16B_get_src_qpn(struct opa_16b_mgmt *mgmt)
540{
541	return be32_to_cpu(mgmt->src_qpn) & OPA_16B_MGMT_QPN_MASK;
542}
543
544static inline void hfi1_16B_set_qpn(struct opa_16b_mgmt *mgmt,
545				    u32 dest_qp, u32 src_qp)
546{
547	mgmt->dest_qpn = cpu_to_be32(dest_qp & OPA_16B_MGMT_QPN_MASK);
548	mgmt->src_qpn = cpu_to_be32(src_qp & OPA_16B_MGMT_QPN_MASK);
549}
550
551/**
552 * hfi1_get_rc_ohdr - get extended header
553 * @opah - the opaheader
554 */
555static inline struct ib_other_headers *
556hfi1_get_rc_ohdr(struct hfi1_opa_header *opah)
557{
558	struct ib_other_headers *ohdr;
559	struct ib_header *hdr = NULL;
560	struct hfi1_16b_header *hdr_16b = NULL;
561
562	/* Find out where the BTH is */
563	if (opah->hdr_type == HFI1_PKT_TYPE_9B) {
564		hdr = &opah->ibh;
565		if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
566			ohdr = &hdr->u.oth;
567		else
568			ohdr = &hdr->u.l.oth;
569	} else {
570		u8 l4;
571
572		hdr_16b = &opah->opah;
573		l4  = hfi1_16B_get_l4(hdr_16b);
574		if (l4 == OPA_16B_L4_IB_LOCAL)
575			ohdr = &hdr_16b->u.oth;
576		else
577			ohdr = &hdr_16b->u.l.oth;
578	}
579	return ohdr;
580}
581
582struct rvt_sge_state;
583
584/*
585 * Get/Set IB link-level config parameters for f_get/set_ib_cfg()
586 * Mostly for MADs that set or query link parameters, also ipath
587 * config interfaces
588 */
589#define HFI1_IB_CFG_LIDLMC 0 /* LID (LS16b) and Mask (MS16b) */
590#define HFI1_IB_CFG_LWID_DG_ENB 1 /* allowed Link-width downgrade */
591#define HFI1_IB_CFG_LWID_ENB 2 /* allowed Link-width */
592#define HFI1_IB_CFG_LWID 3 /* currently active Link-width */
593#define HFI1_IB_CFG_SPD_ENB 4 /* allowed Link speeds */
594#define HFI1_IB_CFG_SPD 5 /* current Link spd */
595#define HFI1_IB_CFG_RXPOL_ENB 6 /* Auto-RX-polarity enable */
596#define HFI1_IB_CFG_LREV_ENB 7 /* Auto-Lane-reversal enable */
597#define HFI1_IB_CFG_LINKLATENCY 8 /* Link Latency (IB1.2 only) */
598#define HFI1_IB_CFG_HRTBT 9 /* IB heartbeat off/enable/auto; DDR/QDR only */
599#define HFI1_IB_CFG_OP_VLS 10 /* operational VLs */
600#define HFI1_IB_CFG_VL_HIGH_CAP 11 /* num of VL high priority weights */
601#define HFI1_IB_CFG_VL_LOW_CAP 12 /* num of VL low priority weights */
602#define HFI1_IB_CFG_OVERRUN_THRESH 13 /* IB overrun threshold */
603#define HFI1_IB_CFG_PHYERR_THRESH 14 /* IB PHY error threshold */
604#define HFI1_IB_CFG_LINKDEFAULT 15 /* IB link default (sleep/poll) */
605#define HFI1_IB_CFG_PKEYS 16 /* update partition keys */
606#define HFI1_IB_CFG_MTU 17 /* update MTU in IBC */
607#define HFI1_IB_CFG_VL_HIGH_LIMIT 19
608#define HFI1_IB_CFG_PMA_TICKS 20 /* PMA sample tick resolution */
609#define HFI1_IB_CFG_PORT 21 /* switch port we are connected to */
610
611/*
612 * HFI or Host Link States
613 *
614 * These describe the states the driver thinks the logical and physical
615 * states are in.  Used as an argument to set_link_state().  Implemented
616 * as bits for easy multi-state checking.  The actual state can only be
617 * one.
618 */
619#define __HLS_UP_INIT_BP	0
620#define __HLS_UP_ARMED_BP	1
621#define __HLS_UP_ACTIVE_BP	2
622#define __HLS_DN_DOWNDEF_BP	3	/* link down default */
623#define __HLS_DN_POLL_BP	4
624#define __HLS_DN_DISABLE_BP	5
625#define __HLS_DN_OFFLINE_BP	6
626#define __HLS_VERIFY_CAP_BP	7
627#define __HLS_GOING_UP_BP	8
628#define __HLS_GOING_OFFLINE_BP  9
629#define __HLS_LINK_COOLDOWN_BP 10
630
631#define HLS_UP_INIT	  BIT(__HLS_UP_INIT_BP)
632#define HLS_UP_ARMED	  BIT(__HLS_UP_ARMED_BP)
633#define HLS_UP_ACTIVE	  BIT(__HLS_UP_ACTIVE_BP)
634#define HLS_DN_DOWNDEF	  BIT(__HLS_DN_DOWNDEF_BP) /* link down default */
635#define HLS_DN_POLL	  BIT(__HLS_DN_POLL_BP)
636#define HLS_DN_DISABLE	  BIT(__HLS_DN_DISABLE_BP)
637#define HLS_DN_OFFLINE	  BIT(__HLS_DN_OFFLINE_BP)
638#define HLS_VERIFY_CAP	  BIT(__HLS_VERIFY_CAP_BP)
639#define HLS_GOING_UP	  BIT(__HLS_GOING_UP_BP)
640#define HLS_GOING_OFFLINE BIT(__HLS_GOING_OFFLINE_BP)
641#define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP)
642
643#define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE)
644#define HLS_DOWN ~(HLS_UP)
645
646#define HLS_DEFAULT HLS_DN_POLL
647
648/* use this MTU size if none other is given */
649#define HFI1_DEFAULT_ACTIVE_MTU 10240
650/* use this MTU size as the default maximum */
651#define HFI1_DEFAULT_MAX_MTU 10240
652/* default partition key */
653#define DEFAULT_PKEY 0xffff
654
655/*
656 * Possible fabric manager config parameters for fm_{get,set}_table()
657 */
658#define FM_TBL_VL_HIGH_ARB		1 /* Get/set VL high prio weights */
659#define FM_TBL_VL_LOW_ARB		2 /* Get/set VL low prio weights */
660#define FM_TBL_BUFFER_CONTROL		3 /* Get/set Buffer Control */
661#define FM_TBL_SC2VLNT			4 /* Get/set SC->VLnt */
662#define FM_TBL_VL_PREEMPT_ELEMS		5 /* Get (no set) VL preempt elems */
663#define FM_TBL_VL_PREEMPT_MATRIX	6 /* Get (no set) VL preempt matrix */
664
665/*
666 * Possible "operations" for f_rcvctrl(ppd, op, ctxt)
667 * these are bits so they can be combined, e.g.
668 * HFI1_RCVCTRL_INTRAVAIL_ENB | HFI1_RCVCTRL_CTXT_ENB
669 */
670#define HFI1_RCVCTRL_TAILUPD_ENB 0x01
671#define HFI1_RCVCTRL_TAILUPD_DIS 0x02
672#define HFI1_RCVCTRL_CTXT_ENB 0x04
673#define HFI1_RCVCTRL_CTXT_DIS 0x08
674#define HFI1_RCVCTRL_INTRAVAIL_ENB 0x10
675#define HFI1_RCVCTRL_INTRAVAIL_DIS 0x20
676#define HFI1_RCVCTRL_PKEY_ENB 0x40  /* Note, default is enabled */
677#define HFI1_RCVCTRL_PKEY_DIS 0x80
678#define HFI1_RCVCTRL_TIDFLOW_ENB 0x0400
679#define HFI1_RCVCTRL_TIDFLOW_DIS 0x0800
680#define HFI1_RCVCTRL_ONE_PKT_EGR_ENB 0x1000
681#define HFI1_RCVCTRL_ONE_PKT_EGR_DIS 0x2000
682#define HFI1_RCVCTRL_NO_RHQ_DROP_ENB 0x4000
683#define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000
684#define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000
685#define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000
686#define HFI1_RCVCTRL_URGENT_ENB 0x40000
687#define HFI1_RCVCTRL_URGENT_DIS 0x80000
688
689/* partition enforcement flags */
690#define HFI1_PART_ENFORCE_IN	0x1
691#define HFI1_PART_ENFORCE_OUT	0x2
692
693/* how often we check for synthetic counter wrap around */
694#define SYNTH_CNT_TIME 3
695
696/* Counter flags */
697#define CNTR_NORMAL		0x0 /* Normal counters, just read register */
698#define CNTR_SYNTH		0x1 /* Synthetic counters, saturate at all 1s */
699#define CNTR_DISABLED		0x2 /* Disable this counter */
700#define CNTR_32BIT		0x4 /* Simulate 64 bits for this counter */
701#define CNTR_VL			0x8 /* Per VL counter */
702#define CNTR_SDMA              0x10
703#define CNTR_INVALID_VL		-1  /* Specifies invalid VL */
704#define CNTR_MODE_W		0x0
705#define CNTR_MODE_R		0x1
706
707/* VLs Supported/Operational */
708#define HFI1_MIN_VLS_SUPPORTED 1
709#define HFI1_MAX_VLS_SUPPORTED 8
710
711#define HFI1_GUIDS_PER_PORT  5
712#define HFI1_PORT_GUID_INDEX 0
713
714static inline void incr_cntr64(u64 *cntr)
715{
716	if (*cntr < (u64)-1LL)
717		(*cntr)++;
718}
719
720static inline void incr_cntr32(u32 *cntr)
721{
722	if (*cntr < (u32)-1LL)
723		(*cntr)++;
724}
725
726#define MAX_NAME_SIZE 64
727struct hfi1_msix_entry {
728	enum irq_type type;
729	int irq;
730	void *arg;
731	cpumask_t mask;
732	struct irq_affinity_notify notify;
733};
734
735struct hfi1_msix_info {
736	/* lock to synchronize in_use_msix access */
737	spinlock_t msix_lock;
738	DECLARE_BITMAP(in_use_msix, CCE_NUM_MSIX_VECTORS);
739	struct hfi1_msix_entry *msix_entries;
740	u16 max_requested;
741};
742
743/* per-SL CCA information */
744struct cca_timer {
745	struct hrtimer hrtimer;
746	struct hfi1_pportdata *ppd; /* read-only */
747	int sl; /* read-only */
748	u16 ccti; /* read/write - current value of CCTI */
749};
750
751struct link_down_reason {
752	/*
753	 * SMA-facing value.  Should be set from .latest when
754	 * HLS_UP_* -> HLS_DN_* transition actually occurs.
755	 */
756	u8 sma;
757	u8 latest;
758};
759
760enum {
761	LO_PRIO_TABLE,
762	HI_PRIO_TABLE,
763	MAX_PRIO_TABLE
764};
765
766struct vl_arb_cache {
767	/* protect vl arb cache */
768	spinlock_t lock;
769	struct ib_vl_weight_elem table[VL_ARB_TABLE_SIZE];
770};
771
772/*
773 * The structure below encapsulates data relevant to a physical IB Port.
774 * Current chips support only one such port, but the separation
775 * clarifies things a bit. Note that to conform to IB conventions,
776 * port-numbers are one-based. The first or only port is port1.
777 */
778struct hfi1_pportdata {
779	struct hfi1_ibport ibport_data;
780
781	struct hfi1_devdata *dd;
782	struct kobject pport_cc_kobj;
783	struct kobject sc2vl_kobj;
784	struct kobject sl2sc_kobj;
785	struct kobject vl2mtu_kobj;
786
787	/* PHY support */
788	struct qsfp_data qsfp_info;
789	/* Values for SI tuning of SerDes */
790	u32 port_type;
791	u32 tx_preset_eq;
792	u32 tx_preset_noeq;
793	u32 rx_preset;
794	u8  local_atten;
795	u8  remote_atten;
796	u8  default_atten;
797	u8  max_power_class;
798
799	/* did we read platform config from scratch registers? */
800	bool config_from_scratch;
801
802	/* GUIDs for this interface, in host order, guids[0] is a port guid */
803	u64 guids[HFI1_GUIDS_PER_PORT];
804
805	/* GUID for peer interface, in host order */
806	u64 neighbor_guid;
807
808	/* up or down physical link state */
809	u32 linkup;
810
811	/*
812	 * this address is mapped read-only into user processes so they can
813	 * get status cheaply, whenever they want.  One qword of status per port
814	 */
815	u64 *statusp;
816
817	/* SendDMA related entries */
818
819	struct workqueue_struct *hfi1_wq;
820	struct workqueue_struct *link_wq;
821
822	/* move out of interrupt context */
823	struct work_struct link_vc_work;
824	struct work_struct link_up_work;
825	struct work_struct link_down_work;
826	struct work_struct sma_message_work;
827	struct work_struct freeze_work;
828	struct work_struct link_downgrade_work;
829	struct work_struct link_bounce_work;
830	struct delayed_work start_link_work;
831	/* host link state variables */
832	struct mutex hls_lock;
833	u32 host_link_state;
834
835	/* these are the "32 bit" regs */
836
837	u32 ibmtu; /* The MTU programmed for this unit */
838	/*
839	 * Current max size IB packet (in bytes) including IB headers, that
840	 * we can send. Changes when ibmtu changes.
841	 */
842	u32 ibmaxlen;
843	u32 current_egress_rate; /* units [10^6 bits/sec] */
844	/* LID programmed for this instance */
845	u32 lid;
846	/* list of pkeys programmed; 0 if not set */
847	u16 pkeys[MAX_PKEY_VALUES];
848	u16 link_width_supported;
849	u16 link_width_downgrade_supported;
850	u16 link_speed_supported;
851	u16 link_width_enabled;
852	u16 link_width_downgrade_enabled;
853	u16 link_speed_enabled;
854	u16 link_width_active;
855	u16 link_width_downgrade_tx_active;
856	u16 link_width_downgrade_rx_active;
857	u16 link_speed_active;
858	u8 vls_supported;
859	u8 vls_operational;
860	u8 actual_vls_operational;
861	/* LID mask control */
862	u8 lmc;
863	/* Rx Polarity inversion (compensate for ~tx on partner) */
864	u8 rx_pol_inv;
865
866	u8 hw_pidx;     /* physical port index */
867	u8 port;        /* IB port number and index into dd->pports - 1 */
868	/* type of neighbor node */
869	u8 neighbor_type;
870	u8 neighbor_normal;
871	u8 neighbor_fm_security; /* 1 if firmware checking is disabled */
872	u8 neighbor_port_number;
873	u8 is_sm_config_started;
874	u8 offline_disabled_reason;
875	u8 is_active_optimize_enabled;
876	u8 driver_link_ready;	/* driver ready for active link */
877	u8 link_enabled;	/* link enabled? */
878	u8 linkinit_reason;
879	u8 local_tx_rate;	/* rate given to 8051 firmware */
880	u8 qsfp_retry_count;
881
882	/* placeholders for IB MAD packet settings */
883	u8 overrun_threshold;
884	u8 phy_error_threshold;
885	unsigned int is_link_down_queued;
886
887	/* Used to override LED behavior for things like maintenance beaconing*/
888	/*
889	 * Alternates per phase of blink
890	 * [0] holds LED off duration, [1] holds LED on duration
891	 */
892	unsigned long led_override_vals[2];
893	u8 led_override_phase; /* LSB picks from vals[] */
894	atomic_t led_override_timer_active;
895	/* Used to flash LEDs in override mode */
896	struct timer_list led_override_timer;
897
898	u32 sm_trap_qp;
899	u32 sa_qp;
900
901	/*
902	 * cca_timer_lock protects access to the per-SL cca_timer
903	 * structures (specifically the ccti member).
904	 */
905	spinlock_t cca_timer_lock ____cacheline_aligned_in_smp;
906	struct cca_timer cca_timer[OPA_MAX_SLS];
907
908	/* List of congestion control table entries */
909	struct ib_cc_table_entry_shadow ccti_entries[CC_TABLE_SHADOW_MAX];
910
911	/* congestion entries, each entry corresponding to a SL */
912	struct opa_congestion_setting_entry_shadow
913		congestion_entries[OPA_MAX_SLS];
914
915	/*
916	 * cc_state_lock protects (write) access to the per-port
917	 * struct cc_state.
918	 */
919	spinlock_t cc_state_lock ____cacheline_aligned_in_smp;
920
921	struct cc_state __rcu *cc_state;
922
923	/* Total number of congestion control table entries */
924	u16 total_cct_entry;
925
926	/* Bit map identifying service level */
927	u32 cc_sl_control_map;
928
929	/* CA's max number of 64 entry units in the congestion control table */
930	u8 cc_max_table_entries;
931
932	/*
933	 * begin congestion log related entries
934	 * cc_log_lock protects all congestion log related data
935	 */
936	spinlock_t cc_log_lock ____cacheline_aligned_in_smp;
937	u8 threshold_cong_event_map[OPA_MAX_SLS / 8];
938	u16 threshold_event_counter;
939	struct opa_hfi1_cong_log_event_internal cc_events[OPA_CONG_LOG_ELEMS];
940	int cc_log_idx; /* index for logging events */
941	int cc_mad_idx; /* index for reporting events */
942	/* end congestion log related entries */
943
944	struct vl_arb_cache vl_arb_cache[MAX_PRIO_TABLE];
945
946	/* port relative counter buffer */
947	u64 *cntrs;
948	/* port relative synthetic counter buffer */
949	u64 *scntrs;
950	/* port_xmit_discards are synthesized from different egress errors */
951	u64 port_xmit_discards;
952	u64 port_xmit_discards_vl[C_VL_COUNT];
953	u64 port_xmit_constraint_errors;
954	u64 port_rcv_constraint_errors;
955	/* count of 'link_err' interrupts from DC */
956	u64 link_downed;
957	/* number of times link retrained successfully */
958	u64 link_up;
959	/* number of times a link unknown frame was reported */
960	u64 unknown_frame_count;
961	/* port_ltp_crc_mode is returned in 'portinfo' MADs */
962	u16 port_ltp_crc_mode;
963	/* port_crc_mode_enabled is the crc we support */
964	u8 port_crc_mode_enabled;
965	/* mgmt_allowed is also returned in 'portinfo' MADs */
966	u8 mgmt_allowed;
967	u8 part_enforce; /* partition enforcement flags */
968	struct link_down_reason local_link_down_reason;
969	struct link_down_reason neigh_link_down_reason;
970	/* Value to be sent to link peer on LinkDown .*/
971	u8 remote_link_down_reason;
972	/* Error events that will cause a port bounce. */
973	u32 port_error_action;
974	struct work_struct linkstate_active_work;
975	/* Does this port need to prescan for FECNs */
976	bool cc_prescan;
977	/*
978	 * Sample sendWaitCnt & sendWaitVlCnt during link transition
979	 * and counter request.
980	 */
981	u64 port_vl_xmit_wait_last[C_VL_COUNT + 1];
982	u16 prev_link_width;
983	u64 vl_xmit_flit_cnt[C_VL_COUNT + 1];
984};
985
986typedef void (*opcode_handler)(struct hfi1_packet *packet);
987typedef void (*hfi1_make_req)(struct rvt_qp *qp,
988			      struct hfi1_pkt_state *ps,
989			      struct rvt_swqe *wqe);
990extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[];
991extern const rhf_rcv_function_ptr netdev_rhf_rcv_functions[];
992
993/* return values for the RHF receive functions */
994#define RHF_RCV_CONTINUE  0	/* keep going */
995#define RHF_RCV_DONE	  1	/* stop, this packet processed */
996#define RHF_RCV_REPROCESS 2	/* stop. retain this packet */
997
998struct rcv_array_data {
999	u16 ngroups;
1000	u16 nctxt_extra;
1001	u8 group_size;
1002};
1003
1004struct per_vl_data {
1005	u16 mtu;
1006	struct send_context *sc;
1007};
1008
1009/* 16 to directly index */
1010#define PER_VL_SEND_CONTEXTS 16
1011
1012struct err_info_rcvport {
1013	u8 status_and_code;
1014	u64 packet_flit1;
1015	u64 packet_flit2;
1016};
1017
1018struct err_info_constraint {
1019	u8 status;
1020	u16 pkey;
1021	u32 slid;
1022};
1023
1024struct hfi1_temp {
1025	unsigned int curr;       /* current temperature */
1026	unsigned int lo_lim;     /* low temperature limit */
1027	unsigned int hi_lim;     /* high temperature limit */
1028	unsigned int crit_lim;   /* critical temperature limit */
1029	u8 triggers;      /* temperature triggers */
1030};
1031
1032struct hfi1_i2c_bus {
1033	struct hfi1_devdata *controlling_dd; /* current controlling device */
1034	struct i2c_adapter adapter;	/* bus details */
1035	struct i2c_algo_bit_data algo;	/* bus algorithm details */
1036	int num;			/* bus number, 0 or 1 */
1037};
1038
1039/* common data between shared ASIC HFIs */
1040struct hfi1_asic_data {
1041	struct hfi1_devdata *dds[2];	/* back pointers */
1042	struct mutex asic_resource_mutex;
1043	struct hfi1_i2c_bus *i2c_bus0;
1044	struct hfi1_i2c_bus *i2c_bus1;
1045};
1046
1047/* sizes for both the QP and RSM map tables */
1048#define NUM_MAP_ENTRIES	 256
1049#define NUM_MAP_REGS      32
1050
1051/* Virtual NIC information */
1052struct hfi1_vnic_data {
1053	struct kmem_cache *txreq_cache;
1054	u8 num_vports;
1055};
1056
1057struct hfi1_vnic_vport_info;
1058
1059/* device data struct now contains only "general per-device" info.
1060 * fields related to a physical IB port are in a hfi1_pportdata struct.
1061 */
1062struct sdma_engine;
1063struct sdma_vl_map;
1064
1065#define BOARD_VERS_MAX 96 /* how long the version string can be */
1066#define SERIAL_MAX 16 /* length of the serial number */
1067
1068typedef int (*send_routine)(struct rvt_qp *, struct hfi1_pkt_state *, u64);
1069struct hfi1_devdata {
1070	struct hfi1_ibdev verbs_dev;     /* must be first */
1071	/* pointers to related structs for this device */
1072	/* pci access data structure */
1073	struct pci_dev *pcidev;
1074	struct cdev user_cdev;
1075	struct cdev diag_cdev;
1076	struct cdev ui_cdev;
1077	struct device *user_device;
1078	struct device *diag_device;
1079	struct device *ui_device;
1080
1081	/* first mapping up to RcvArray */
1082	u8 __iomem *kregbase1;
1083	resource_size_t physaddr;
1084
1085	/* second uncached mapping from RcvArray to pio send buffers */
1086	u8 __iomem *kregbase2;
1087	/* for detecting offset above kregbase2 address */
1088	u32 base2_start;
1089
1090	/* Per VL data. Enough for all VLs but not all elements are set/used. */
1091	struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
1092	/* send context data */
1093	struct send_context_info *send_contexts;
1094	/* map hardware send contexts to software index */
1095	u8 *hw_to_sw;
1096	/* spinlock for allocating and releasing send context resources */
1097	spinlock_t sc_lock;
1098	/* lock for pio_map */
1099	spinlock_t pio_map_lock;
1100	/* Send Context initialization lock. */
1101	spinlock_t sc_init_lock;
1102	/* lock for sdma_map */
1103	spinlock_t                          sde_map_lock;
1104	/* array of kernel send contexts */
1105	struct send_context **kernel_send_context;
1106	/* array of vl maps */
1107	struct pio_vl_map __rcu *pio_map;
1108	/* default flags to last descriptor */
1109	u64 default_desc1;
1110
1111	/* fields common to all SDMA engines */
1112
1113	volatile __le64                    *sdma_heads_dma; /* DMA'ed by chip */
1114	dma_addr_t                          sdma_heads_phys;
1115	void                               *sdma_pad_dma; /* DMA'ed by chip */
1116	dma_addr_t                          sdma_pad_phys;
1117	/* for deallocation */
1118	size_t                              sdma_heads_size;
1119	/* num used */
1120	u32                                 num_sdma;
1121	/* array of engines sized by num_sdma */
1122	struct sdma_engine                 *per_sdma;
1123	/* array of vl maps */
1124	struct sdma_vl_map __rcu           *sdma_map;
1125	/* SPC freeze waitqueue and variable */
1126	wait_queue_head_t		  sdma_unfreeze_wq;
1127	atomic_t			  sdma_unfreeze_count;
1128
1129	u32 lcb_access_count;		/* count of LCB users */
1130
1131	/* common data between shared ASIC HFIs in this OS */
1132	struct hfi1_asic_data *asic_data;
1133
1134	/* mem-mapped pointer to base of PIO buffers */
1135	void __iomem *piobase;
1136	/*
1137	 * write-combining mem-mapped pointer to base of RcvArray
1138	 * memory.
1139	 */
1140	void __iomem *rcvarray_wc;
1141	/*
1142	 * credit return base - a per-NUMA range of DMA address that
1143	 * the chip will use to update the per-context free counter
1144	 */
1145	struct credit_return_base *cr_base;
1146
1147	/* send context numbers and sizes for each type */
1148	struct sc_config_sizes sc_sizes[SC_MAX];
1149
1150	char *boardname; /* human readable board info */
1151
1152	u64 ctx0_seq_drop;
1153
1154	/* reset value */
1155	u64 z_int_counter;
1156	u64 z_rcv_limit;
1157	u64 z_send_schedule;
1158
1159	u64 __percpu *send_schedule;
1160	/* number of reserved contexts for netdev usage */
1161	u16 num_netdev_contexts;
1162	/* number of receive contexts in use by the driver */
1163	u32 num_rcv_contexts;
1164	/* number of pio send contexts in use by the driver */
1165	u32 num_send_contexts;
1166	/*
1167	 * number of ctxts available for PSM open
1168	 */
1169	u32 freectxts;
1170	/* total number of available user/PSM contexts */
1171	u32 num_user_contexts;
1172	/* base receive interrupt timeout, in CSR units */
1173	u32 rcv_intr_timeout_csr;
1174
1175	spinlock_t sendctrl_lock; /* protect changes to SendCtrl */
1176	spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
1177	spinlock_t uctxt_lock; /* protect rcd changes */
1178	struct mutex dc8051_lock; /* exclusive access to 8051 */
1179	struct workqueue_struct *update_cntr_wq;
1180	struct work_struct update_cntr_work;
1181	/* exclusive access to 8051 memory */
1182	spinlock_t dc8051_memlock;
1183	int dc8051_timed_out;	/* remember if the 8051 timed out */
1184	/*
1185	 * A page that will hold event notification bitmaps for all
1186	 * contexts. This page will be mapped into all processes.
1187	 */
1188	unsigned long *events;
1189	/*
1190	 * per unit status, see also portdata statusp
1191	 * mapped read-only into user processes so they can get unit and
1192	 * IB link status cheaply
1193	 */
1194	struct hfi1_status *status;
1195
1196	/* revision register shadow */
1197	u64 revision;
1198	/* Base GUID for device (network order) */
1199	u64 base_guid;
1200
1201	/* both sides of the PCIe link are gen3 capable */
1202	u8 link_gen3_capable;
1203	u8 dc_shutdown;
1204	/* localbus width (1, 2,4,8,16,32) from config space  */
1205	u32 lbus_width;
1206	/* localbus speed in MHz */
1207	u32 lbus_speed;
1208	int unit; /* unit # of this chip */
1209	int node; /* home node of this chip */
1210
1211	/* save these PCI fields to restore after a reset */
1212	u32 pcibar0;
1213	u32 pcibar1;
1214	u32 pci_rom;
1215	u16 pci_command;
1216	u16 pcie_devctl;
1217	u16 pcie_lnkctl;
1218	u16 pcie_devctl2;
1219	u32 pci_msix0;
1220	u32 pci_tph2;
1221
1222	/*
1223	 * ASCII serial number, from flash, large enough for original
1224	 * all digit strings, and longer serial number format
1225	 */
1226	u8 serial[SERIAL_MAX];
1227	/* human readable board version */
1228	u8 boardversion[BOARD_VERS_MAX];
1229	u8 lbus_info[32]; /* human readable localbus info */
1230	/* chip major rev, from CceRevision */
1231	u8 majrev;
1232	/* chip minor rev, from CceRevision */
1233	u8 minrev;
1234	/* hardware ID */
1235	u8 hfi1_id;
1236	/* implementation code */
1237	u8 icode;
1238	/* vAU of this device */
1239	u8 vau;
1240	/* vCU of this device */
1241	u8 vcu;
1242	/* link credits of this device */
1243	u16 link_credits;
1244	/* initial vl15 credits to use */
1245	u16 vl15_init;
1246
1247	/*
1248	 * Cached value for vl15buf, read during verify cap interrupt. VL15
1249	 * credits are to be kept at 0 and set when handling the link-up
1250	 * interrupt. This removes the possibility of receiving VL15 MAD
1251	 * packets before this HFI is ready.
1252	 */
1253	u16 vl15buf_cached;
1254
1255	/* Misc small ints */
1256	u8 n_krcv_queues;
1257	u8 qos_shift;
1258
1259	u16 irev;	/* implementation revision */
1260	u32 dc8051_ver; /* 8051 firmware version */
1261
1262	spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
1263	struct platform_config platform_config;
1264	struct platform_config_cache pcfg_cache;
1265
1266	struct diag_client *diag_client;
1267
1268	/* general interrupt: mask of handled interrupts */
1269	u64 gi_mask[CCE_NUM_INT_CSRS];
1270
1271	struct rcv_array_data rcv_entries;
1272
1273	/* cycle length of PS* counters in HW (in picoseconds) */
1274	u16 psxmitwait_check_rate;
1275
1276	/*
1277	 * 64 bit synthetic counters
1278	 */
1279	struct timer_list synth_stats_timer;
1280
1281	/* MSI-X information */
1282	struct hfi1_msix_info msix_info;
1283
1284	/*
1285	 * device counters
1286	 */
1287	char *cntrnames;
1288	size_t cntrnameslen;
1289	size_t ndevcntrs;
1290	u64 *cntrs;
1291	u64 *scntrs;
1292
1293	/*
1294	 * remembered values for synthetic counters
1295	 */
1296	u64 last_tx;
1297	u64 last_rx;
1298
1299	/*
1300	 * per-port counters
1301	 */
1302	size_t nportcntrs;
1303	char *portcntrnames;
1304	size_t portcntrnameslen;
1305
1306	struct err_info_rcvport err_info_rcvport;
1307	struct err_info_constraint err_info_rcv_constraint;
1308	struct err_info_constraint err_info_xmit_constraint;
1309
1310	atomic_t drop_packet;
1311	bool do_drop;
1312	u8 err_info_uncorrectable;
1313	u8 err_info_fmconfig;
1314
1315	/*
1316	 * Software counters for the status bits defined by the
1317	 * associated error status registers
1318	 */
1319	u64 cce_err_status_cnt[NUM_CCE_ERR_STATUS_COUNTERS];
1320	u64 rcv_err_status_cnt[NUM_RCV_ERR_STATUS_COUNTERS];
1321	u64 misc_err_status_cnt[NUM_MISC_ERR_STATUS_COUNTERS];
1322	u64 send_pio_err_status_cnt[NUM_SEND_PIO_ERR_STATUS_COUNTERS];
1323	u64 send_dma_err_status_cnt[NUM_SEND_DMA_ERR_STATUS_COUNTERS];
1324	u64 send_egress_err_status_cnt[NUM_SEND_EGRESS_ERR_STATUS_COUNTERS];
1325	u64 send_err_status_cnt[NUM_SEND_ERR_STATUS_COUNTERS];
1326
1327	/* Software counter that spans all contexts */
1328	u64 sw_ctxt_err_status_cnt[NUM_SEND_CTXT_ERR_STATUS_COUNTERS];
1329	/* Software counter that spans all DMA engines */
1330	u64 sw_send_dma_eng_err_status_cnt[
1331		NUM_SEND_DMA_ENG_ERR_STATUS_COUNTERS];
1332	/* Software counter that aggregates all cce_err_status errors */
1333	u64 sw_cce_err_status_aggregate;
1334	/* Software counter that aggregates all bypass packet rcv errors */
1335	u64 sw_rcv_bypass_packet_errors;
1336
1337	/* Save the enabled LCB error bits */
1338	u64 lcb_err_en;
1339	struct cpu_mask_set *comp_vect;
1340	int *comp_vect_mappings;
1341	u32 comp_vect_possible_cpus;
1342
1343	/*
1344	 * Capability to have different send engines simply by changing a
1345	 * pointer value.
1346	 */
1347	send_routine process_pio_send ____cacheline_aligned_in_smp;
1348	send_routine process_dma_send;
1349	void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
1350				u64 pbc, const void *from, size_t count);
1351	int (*process_vnic_dma_send)(struct hfi1_devdata *dd, u8 q_idx,
1352				     struct hfi1_vnic_vport_info *vinfo,
1353				     struct sk_buff *skb, u64 pbc, u8 plen);
1354	/* hfi1_pportdata, points to array of (physical) port-specific
1355	 * data structs, indexed by pidx (0..n-1)
1356	 */
1357	struct hfi1_pportdata *pport;
1358	/* receive context data */
1359	struct hfi1_ctxtdata **rcd;
1360	u64 __percpu *int_counter;
1361	/* verbs tx opcode stats */
1362	struct hfi1_opcode_stats_perctx __percpu *tx_opstats;
1363	/* device (not port) flags, basically device capabilities */
1364	u16 flags;
1365	/* Number of physical ports available */
1366	u8 num_pports;
1367	/* Lowest context number which can be used by user processes or VNIC */
1368	u8 first_dyn_alloc_ctxt;
1369	/* adding a new field here would make it part of this cacheline */
1370
1371	/* seqlock for sc2vl */
1372	seqlock_t sc2vl_lock ____cacheline_aligned_in_smp;
1373	u64 sc2vl[4];
1374	u64 __percpu *rcv_limit;
1375	/* adding a new field here would make it part of this cacheline */
1376
1377	/* OUI comes from the HW. Used everywhere as 3 separate bytes. */
1378	u8 oui1;
1379	u8 oui2;
1380	u8 oui3;
1381
1382	/* Timer and counter used to detect RcvBufOvflCnt changes */
1383	struct timer_list rcverr_timer;
1384
1385	wait_queue_head_t event_queue;
1386
1387	/* receive context tail dummy address */
1388	__le64 *rcvhdrtail_dummy_kvaddr;
1389	dma_addr_t rcvhdrtail_dummy_dma;
1390
1391	u32 rcv_ovfl_cnt;
1392	/* Serialize ASPM enable/disable between multiple verbs contexts */
1393	spinlock_t aspm_lock;
1394	/* Number of verbs contexts which have disabled ASPM */
1395	atomic_t aspm_disabled_cnt;
1396	/* Keeps track of user space clients */
1397	atomic_t user_refcount;
1398	/* Used to wait for outstanding user space clients before dev removal */
1399	struct completion user_comp;
1400
1401	bool eprom_available;	/* true if EPROM is available for this device */
1402	bool aspm_supported;	/* Does HW support ASPM */
1403	bool aspm_enabled;	/* ASPM state: enabled/disabled */
1404	struct rhashtable *sdma_rht;
1405
1406	/* vnic data */
1407	struct hfi1_vnic_data vnic;
1408	/* Lock to protect IRQ SRC register access */
1409	spinlock_t irq_src_lock;
1410	int vnic_num_vports;
1411	struct net_device *dummy_netdev;
1412	struct hfi1_affinity_node *affinity_entry;
1413
1414	/* Keeps track of IPoIB RSM rule users */
1415	atomic_t ipoib_rsm_usr_num;
1416};
1417
1418/* 8051 firmware version helper */
1419#define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c))
1420#define dc8051_ver_maj(a) (((a) & 0xff0000) >> 16)
1421#define dc8051_ver_min(a) (((a) & 0x00ff00) >> 8)
1422#define dc8051_ver_patch(a) ((a) & 0x0000ff)
1423
1424/* f_put_tid types */
1425#define PT_EXPECTED       0
1426#define PT_EAGER          1
1427#define PT_INVALID_FLUSH  2
1428#define PT_INVALID        3
1429
1430struct tid_rb_node;
1431struct mmu_rb_node;
1432struct mmu_rb_handler;
1433
1434/* Private data for file operations */
1435struct hfi1_filedata {
1436	struct srcu_struct pq_srcu;
1437	struct hfi1_devdata *dd;
1438	struct hfi1_ctxtdata *uctxt;
1439	struct hfi1_user_sdma_comp_q *cq;
1440	/* update side lock for SRCU */
1441	spinlock_t pq_rcu_lock;
1442	struct hfi1_user_sdma_pkt_q __rcu *pq;
1443	u16 subctxt;
1444	/* for cpu affinity; -1 if none */
1445	int rec_cpu_num;
1446	u32 tid_n_pinned;
1447	bool use_mn;
1448	struct tid_rb_node **entry_to_rb;
1449	spinlock_t tid_lock; /* protect tid_[limit,used] counters */
1450	u32 tid_limit;
1451	u32 tid_used;
1452	u32 *invalid_tids;
1453	u32 invalid_tid_idx;
1454	/* protect invalid_tids array and invalid_tid_idx */
1455	spinlock_t invalid_lock;
1456};
1457
1458extern struct xarray hfi1_dev_table;
1459struct hfi1_devdata *hfi1_lookup(int unit);
1460
1461static inline unsigned long uctxt_offset(struct hfi1_ctxtdata *uctxt)
1462{
1463	return (uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
1464		HFI1_MAX_SHARED_CTXTS;
1465}
1466
1467int hfi1_init(struct hfi1_devdata *dd, int reinit);
1468int hfi1_count_active_units(void);
1469
1470int hfi1_diag_add(struct hfi1_devdata *dd);
1471void hfi1_diag_remove(struct hfi1_devdata *dd);
1472void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup);
1473
1474void handle_user_interrupt(struct hfi1_ctxtdata *rcd);
1475
1476int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
1477int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd);
1478int hfi1_create_kctxts(struct hfi1_devdata *dd);
1479int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
1480			 struct hfi1_ctxtdata **rcd);
1481void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd);
1482void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
1483			 struct hfi1_devdata *dd, u8 hw_pidx, u8 port);
1484void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
1485int hfi1_rcd_put(struct hfi1_ctxtdata *rcd);
1486int hfi1_rcd_get(struct hfi1_ctxtdata *rcd);
1487struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd,
1488						 u16 ctxt);
1489struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt);
1490int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
1491int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
1492int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
1493int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget);
1494int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget);
1495void set_all_slowpath(struct hfi1_devdata *dd);
1496
1497extern const struct pci_device_id hfi1_pci_tbl[];
1498void hfi1_make_ud_req_9B(struct rvt_qp *qp,
1499			 struct hfi1_pkt_state *ps,
1500			 struct rvt_swqe *wqe);
1501
1502void hfi1_make_ud_req_16B(struct rvt_qp *qp,
1503			  struct hfi1_pkt_state *ps,
1504			  struct rvt_swqe *wqe);
1505
1506/* receive packet handler dispositions */
1507#define RCV_PKT_OK      0x0 /* keep going */
1508#define RCV_PKT_LIMIT   0x1 /* stop, hit limit, start thread */
1509#define RCV_PKT_DONE    0x2 /* stop, no more packets detected */
1510
1511/**
1512 * hfi1_rcd_head - add accessor for rcd head
1513 * @rcd: the context
1514 */
1515static inline u32 hfi1_rcd_head(struct hfi1_ctxtdata *rcd)
1516{
1517	return rcd->head;
1518}
1519
1520/**
1521 * hfi1_set_rcd_head - add accessor for rcd head
1522 * @rcd: the context
1523 * @head: the new head
1524 */
1525static inline void hfi1_set_rcd_head(struct hfi1_ctxtdata *rcd, u32 head)
1526{
1527	rcd->head = head;
1528}
1529
1530/* calculate the current RHF address */
1531static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd)
1532{
1533	return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset;
1534}
1535
1536/* return DMA_RTAIL configuration */
1537static inline bool get_dma_rtail_setting(struct hfi1_ctxtdata *rcd)
1538{
1539	return !!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL);
1540}
1541
1542/**
1543 * hfi1_seq_incr_wrap - wrapping increment for sequence
1544 * @seq: the current sequence number
1545 *
1546 * Returns: the incremented seq
1547 */
1548static inline u8 hfi1_seq_incr_wrap(u8 seq)
1549{
1550	if (++seq > RHF_MAX_SEQ)
1551		seq = 1;
1552	return seq;
1553}
1554
1555/**
1556 * hfi1_seq_cnt - return seq_cnt member
1557 * @rcd: the receive context
1558 *
1559 * Return seq_cnt member
1560 */
1561static inline u8 hfi1_seq_cnt(struct hfi1_ctxtdata *rcd)
1562{
1563	return rcd->seq_cnt;
1564}
1565
1566/**
1567 * hfi1_set_seq_cnt - return seq_cnt member
1568 * @rcd: the receive context
1569 *
1570 * Return seq_cnt member
1571 */
1572static inline void hfi1_set_seq_cnt(struct hfi1_ctxtdata *rcd, u8 cnt)
1573{
1574	rcd->seq_cnt = cnt;
1575}
1576
1577/**
1578 * last_rcv_seq - is last
1579 * @rcd: the receive context
1580 * @seq: sequence
1581 *
1582 * return true if last packet
1583 */
1584static inline bool last_rcv_seq(struct hfi1_ctxtdata *rcd, u32 seq)
1585{
1586	return seq != rcd->seq_cnt;
1587}
1588
1589/**
1590 * rcd_seq_incr - increment context sequence number
1591 * @rcd: the receive context
1592 * @seq: the current sequence number
1593 *
1594 * Returns: true if the this was the last packet
1595 */
1596static inline bool hfi1_seq_incr(struct hfi1_ctxtdata *rcd, u32 seq)
1597{
1598	rcd->seq_cnt = hfi1_seq_incr_wrap(rcd->seq_cnt);
1599	return last_rcv_seq(rcd, seq);
1600}
1601
1602/**
1603 * get_hdrqentsize - return hdrq entry size
1604 * @rcd: the receive context
1605 */
1606static inline u8 get_hdrqentsize(struct hfi1_ctxtdata *rcd)
1607{
1608	return rcd->rcvhdrqentsize;
1609}
1610
1611/**
1612 * get_hdrq_cnt - return hdrq count
1613 * @rcd: the receive context
1614 */
1615static inline u16 get_hdrq_cnt(struct hfi1_ctxtdata *rcd)
1616{
1617	return rcd->rcvhdrq_cnt;
1618}
1619
1620/**
1621 * hfi1_is_slowpath - check if this context is slow path
1622 * @rcd: the receive context
1623 */
1624static inline bool hfi1_is_slowpath(struct hfi1_ctxtdata *rcd)
1625{
1626	return rcd->do_interrupt == rcd->slow_handler;
1627}
1628
1629/**
1630 * hfi1_is_fastpath - check if this context is fast path
1631 * @rcd: the receive context
1632 */
1633static inline bool hfi1_is_fastpath(struct hfi1_ctxtdata *rcd)
1634{
1635	if (rcd->ctxt == HFI1_CTRL_CTXT)
1636		return false;
1637
1638	return rcd->do_interrupt == rcd->fast_handler;
1639}
1640
1641/**
1642 * hfi1_set_fast - change to the fast handler
1643 * @rcd: the receive context
1644 */
1645static inline void hfi1_set_fast(struct hfi1_ctxtdata *rcd)
1646{
1647	if (unlikely(!rcd))
1648		return;
1649	if (unlikely(!hfi1_is_fastpath(rcd)))
1650		rcd->do_interrupt = rcd->fast_handler;
1651}
1652
1653int hfi1_reset_device(int);
1654
1655void receive_interrupt_work(struct work_struct *work);
1656
1657/* extract service channel from header and rhf */
1658static inline int hfi1_9B_get_sc5(struct ib_header *hdr, u64 rhf)
1659{
1660	return ib_get_sc(hdr) | ((!!(rhf_dc_info(rhf))) << 4);
1661}
1662
1663#define HFI1_JKEY_WIDTH       16
1664#define HFI1_JKEY_MASK        (BIT(16) - 1)
1665#define HFI1_ADMIN_JKEY_RANGE 32
1666
1667/*
1668 * J_KEYs are split and allocated in the following groups:
1669 *   0 - 31    - users with administrator privileges
1670 *  32 - 63    - kernel protocols using KDETH packets
1671 *  64 - 65535 - all other users using KDETH packets
1672 */
1673static inline u16 generate_jkey(kuid_t uid)
1674{
1675	u16 jkey = from_kuid(current_user_ns(), uid) & HFI1_JKEY_MASK;
1676
1677	if (capable(CAP_SYS_ADMIN))
1678		jkey &= HFI1_ADMIN_JKEY_RANGE - 1;
1679	else if (jkey < 64)
1680		jkey |= BIT(HFI1_JKEY_WIDTH - 1);
1681
1682	return jkey;
1683}
1684
1685/*
1686 * active_egress_rate
1687 *
1688 * returns the active egress rate in units of [10^6 bits/sec]
1689 */
1690static inline u32 active_egress_rate(struct hfi1_pportdata *ppd)
1691{
1692	u16 link_speed = ppd->link_speed_active;
1693	u16 link_width = ppd->link_width_active;
1694	u32 egress_rate;
1695
1696	if (link_speed == OPA_LINK_SPEED_25G)
1697		egress_rate = 25000;
1698	else /* assume OPA_LINK_SPEED_12_5G */
1699		egress_rate = 12500;
1700
1701	switch (link_width) {
1702	case OPA_LINK_WIDTH_4X:
1703		egress_rate *= 4;
1704		break;
1705	case OPA_LINK_WIDTH_3X:
1706		egress_rate *= 3;
1707		break;
1708	case OPA_LINK_WIDTH_2X:
1709		egress_rate *= 2;
1710		break;
1711	default:
1712		/* assume IB_WIDTH_1X */
1713		break;
1714	}
1715
1716	return egress_rate;
1717}
1718
1719/*
1720 * egress_cycles
1721 *
1722 * Returns the number of 'fabric clock cycles' to egress a packet
1723 * of length 'len' bytes, at 'rate' Mbit/s. Since the fabric clock
1724 * rate is (approximately) 805 MHz, the units of the returned value
1725 * are (1/805 MHz).
1726 */
1727static inline u32 egress_cycles(u32 len, u32 rate)
1728{
1729	u32 cycles;
1730
1731	/*
1732	 * cycles is:
1733	 *
1734	 *          (length) [bits] / (rate) [bits/sec]
1735	 *  ---------------------------------------------------
1736	 *  fabric_clock_period == 1 /(805 * 10^6) [cycles/sec]
1737	 */
1738
1739	cycles = len * 8; /* bits */
1740	cycles *= 805;
1741	cycles /= rate;
1742
1743	return cycles;
1744}
1745
1746void set_link_ipg(struct hfi1_pportdata *ppd);
1747void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn,
1748		  u32 rqpn, u8 svc_type);
1749void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
1750		u16 pkey, u32 slid, u32 dlid, u8 sc5,
1751		const struct ib_grh *old_grh);
1752void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
1753		    u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
1754		    u8 sc5, const struct ib_grh *old_grh);
1755typedef void (*hfi1_handle_cnp)(struct hfi1_ibport *ibp, struct rvt_qp *qp,
1756				u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
1757				u8 sc5, const struct ib_grh *old_grh);
1758
1759#define PKEY_CHECK_INVALID -1
1760int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey,
1761		      u8 sc5, int8_t s_pkey_index);
1762
1763#define PACKET_EGRESS_TIMEOUT 350
1764static inline void pause_for_credit_return(struct hfi1_devdata *dd)
1765{
1766	/* Pause at least 1us, to ensure chip returns all credits */
1767	u32 usec = cclock_to_ns(dd, PACKET_EGRESS_TIMEOUT) / 1000;
1768
1769	udelay(usec ? usec : 1);
1770}
1771
1772/**
1773 * sc_to_vlt() reverse lookup sc to vl
1774 * @dd - devdata
1775 * @sc5 - 5 bit sc
1776 */
1777static inline u8 sc_to_vlt(struct hfi1_devdata *dd, u8 sc5)
1778{
1779	unsigned seq;
1780	u8 rval;
1781
1782	if (sc5 >= OPA_MAX_SCS)
1783		return (u8)(0xff);
1784
1785	do {
1786		seq = read_seqbegin(&dd->sc2vl_lock);
1787		rval = *(((u8 *)dd->sc2vl) + sc5);
1788	} while (read_seqretry(&dd->sc2vl_lock, seq));
1789
1790	return rval;
1791}
1792
1793#define PKEY_MEMBER_MASK 0x8000
1794#define PKEY_LOW_15_MASK 0x7fff
1795
1796/*
1797 * ingress_pkey_matches_entry - return 1 if the pkey matches ent (ent
1798 * being an entry from the ingress partition key table), return 0
1799 * otherwise. Use the matching criteria for ingress partition keys
1800 * specified in the OPAv1 spec., section 9.10.14.
1801 */
1802static inline int ingress_pkey_matches_entry(u16 pkey, u16 ent)
1803{
1804	u16 mkey = pkey & PKEY_LOW_15_MASK;
1805	u16 ment = ent & PKEY_LOW_15_MASK;
1806
1807	if (mkey == ment) {
1808		/*
1809		 * If pkey[15] is clear (limited partition member),
1810		 * is bit 15 in the corresponding table element
1811		 * clear (limited member)?
1812		 */
1813		if (!(pkey & PKEY_MEMBER_MASK))
1814			return !!(ent & PKEY_MEMBER_MASK);
1815		return 1;
1816	}
1817	return 0;
1818}
1819
1820/*
1821 * ingress_pkey_table_search - search the entire pkey table for
1822 * an entry which matches 'pkey'. return 0 if a match is found,
1823 * and 1 otherwise.
1824 */
1825static int ingress_pkey_table_search(struct hfi1_pportdata *ppd, u16 pkey)
1826{
1827	int i;
1828
1829	for (i = 0; i < MAX_PKEY_VALUES; i++) {
1830		if (ingress_pkey_matches_entry(pkey, ppd->pkeys[i]))
1831			return 0;
1832	}
1833	return 1;
1834}
1835
1836/*
1837 * ingress_pkey_table_fail - record a failure of ingress pkey validation,
1838 * i.e., increment port_rcv_constraint_errors for the port, and record
1839 * the 'error info' for this failure.
1840 */
1841static void ingress_pkey_table_fail(struct hfi1_pportdata *ppd, u16 pkey,
1842				    u32 slid)
1843{
1844	struct hfi1_devdata *dd = ppd->dd;
1845
1846	incr_cntr64(&ppd->port_rcv_constraint_errors);
1847	if (!(dd->err_info_rcv_constraint.status & OPA_EI_STATUS_SMASK)) {
1848		dd->err_info_rcv_constraint.status |= OPA_EI_STATUS_SMASK;
1849		dd->err_info_rcv_constraint.slid = slid;
1850		dd->err_info_rcv_constraint.pkey = pkey;
1851	}
1852}
1853
1854/*
1855 * ingress_pkey_check - Return 0 if the ingress pkey is valid, return 1
1856 * otherwise. Use the criteria in the OPAv1 spec, section 9.10.14. idx
1857 * is a hint as to the best place in the partition key table to begin
1858 * searching. This function should not be called on the data path because
1859 * of performance reasons. On datapath pkey check is expected to be done
1860 * by HW and rcv_pkey_check function should be called instead.
1861 */
1862static inline int ingress_pkey_check(struct hfi1_pportdata *ppd, u16 pkey,
1863				     u8 sc5, u8 idx, u32 slid, bool force)
1864{
1865	if (!(force) && !(ppd->part_enforce & HFI1_PART_ENFORCE_IN))
1866		return 0;
1867
1868	/* If SC15, pkey[0:14] must be 0x7fff */
1869	if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK))
1870		goto bad;
1871
1872	/* Is the pkey = 0x0, or 0x8000? */
1873	if ((pkey & PKEY_LOW_15_MASK) == 0)
1874		goto bad;
1875
1876	/* The most likely matching pkey has index 'idx' */
1877	if (ingress_pkey_matches_entry(pkey, ppd->pkeys[idx]))
1878		return 0;
1879
1880	/* no match - try the whole table */
1881	if (!ingress_pkey_table_search(ppd, pkey))
1882		return 0;
1883
1884bad:
1885	ingress_pkey_table_fail(ppd, pkey, slid);
1886	return 1;
1887}
1888
1889/*
1890 * rcv_pkey_check - Return 0 if the ingress pkey is valid, return 1
1891 * otherwise. It only ensures pkey is vlid for QP0. This function
1892 * should be called on the data path instead of ingress_pkey_check
1893 * as on data path, pkey check is done by HW (except for QP0).
1894 */
1895static inline int rcv_pkey_check(struct hfi1_pportdata *ppd, u16 pkey,
1896				 u8 sc5, u16 slid)
1897{
1898	if (!(ppd->part_enforce & HFI1_PART_ENFORCE_IN))
1899		return 0;
1900
1901	/* If SC15, pkey[0:14] must be 0x7fff */
1902	if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK))
1903		goto bad;
1904
1905	return 0;
1906bad:
1907	ingress_pkey_table_fail(ppd, pkey, slid);
1908	return 1;
1909}
1910
1911/* MTU handling */
1912
1913/* MTU enumeration, 256-4k match IB */
1914#define OPA_MTU_0     0
1915#define OPA_MTU_256   1
1916#define OPA_MTU_512   2
1917#define OPA_MTU_1024  3
1918#define OPA_MTU_2048  4
1919#define OPA_MTU_4096  5
1920
1921u32 lrh_max_header_bytes(struct hfi1_devdata *dd);
1922int mtu_to_enum(u32 mtu, int default_if_bad);
1923u16 enum_to_mtu(int mtu);
1924static inline int valid_ib_mtu(unsigned int mtu)
1925{
1926	return mtu == 256 || mtu == 512 ||
1927		mtu == 1024 || mtu == 2048 ||
1928		mtu == 4096;
1929}
1930
1931static inline int valid_opa_max_mtu(unsigned int mtu)
1932{
1933	return mtu >= 2048 &&
1934		(valid_ib_mtu(mtu) || mtu == 8192 || mtu == 10240);
1935}
1936
1937int set_mtu(struct hfi1_pportdata *ppd);
1938
1939int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc);
1940void hfi1_disable_after_error(struct hfi1_devdata *dd);
1941int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit);
1942int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encode);
1943
1944int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t);
1945int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t);
1946
1947void set_up_vau(struct hfi1_devdata *dd, u8 vau);
1948void set_up_vl15(struct hfi1_devdata *dd, u16 vl15buf);
1949void reset_link_credits(struct hfi1_devdata *dd);
1950void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu);
1951
1952int set_buffer_control(struct hfi1_pportdata *ppd, struct buffer_control *bc);
1953
1954static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd)
1955{
1956	return ppd->dd;
1957}
1958
1959static inline struct hfi1_devdata *dd_from_dev(struct hfi1_ibdev *dev)
1960{
1961	return container_of(dev, struct hfi1_devdata, verbs_dev);
1962}
1963
1964static inline struct hfi1_devdata *dd_from_ibdev(struct ib_device *ibdev)
1965{
1966	return dd_from_dev(to_idev(ibdev));
1967}
1968
1969static inline struct hfi1_pportdata *ppd_from_ibp(struct hfi1_ibport *ibp)
1970{
1971	return container_of(ibp, struct hfi1_pportdata, ibport_data);
1972}
1973
1974static inline struct hfi1_ibdev *dev_from_rdi(struct rvt_dev_info *rdi)
1975{
1976	return container_of(rdi, struct hfi1_ibdev, rdi);
1977}
1978
1979static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u8 port)
1980{
1981	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1982	unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */
1983
1984	WARN_ON(pidx >= dd->num_pports);
1985	return &dd->pport[pidx].ibport_data;
1986}
1987
1988static inline struct hfi1_ibport *rcd_to_iport(struct hfi1_ctxtdata *rcd)
1989{
1990	return &rcd->ppd->ibport_data;
1991}
1992
1993/**
1994 * hfi1_may_ecn - Check whether FECN or BECN processing should be done
1995 * @pkt: the packet to be evaluated
1996 *
1997 * Check whether the FECN or BECN bits in the packet's header are
1998 * enabled, depending on packet type.
1999 *
2000 * This function only checks for FECN and BECN bits. Additional checks
2001 * are done in the slowpath (hfi1_process_ecn_slowpath()) in order to
2002 * ensure correct handling.
2003 */
2004static inline bool hfi1_may_ecn(struct hfi1_packet *pkt)
2005{
2006	bool fecn, becn;
2007
2008	if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
2009		fecn = hfi1_16B_get_fecn(pkt->hdr);
2010		becn = hfi1_16B_get_becn(pkt->hdr);
2011	} else {
2012		fecn = ib_bth_get_fecn(pkt->ohdr);
2013		becn = ib_bth_get_becn(pkt->ohdr);
2014	}
2015	return fecn || becn;
2016}
2017
2018bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
2019			       bool prescan);
2020static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt)
2021{
2022	bool do_work;
2023
2024	do_work = hfi1_may_ecn(pkt);
2025	if (unlikely(do_work))
2026		return hfi1_process_ecn_slowpath(qp, pkt, false);
2027	return false;
2028}
2029
2030/*
2031 * Return the indexed PKEY from the port PKEY table.
2032 */
2033static inline u16 hfi1_get_pkey(struct hfi1_ibport *ibp, unsigned index)
2034{
2035	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2036	u16 ret;
2037
2038	if (index >= ARRAY_SIZE(ppd->pkeys))
2039		ret = 0;
2040	else
2041		ret = ppd->pkeys[index];
2042
2043	return ret;
2044}
2045
2046/*
2047 * Return the indexed GUID from the port GUIDs table.
2048 */
2049static inline __be64 get_sguid(struct hfi1_ibport *ibp, unsigned int index)
2050{
2051	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2052
2053	WARN_ON(index >= HFI1_GUIDS_PER_PORT);
2054	return cpu_to_be64(ppd->guids[index]);
2055}
2056
2057/*
2058 * Called by readers of cc_state only, must call under rcu_read_lock().
2059 */
2060static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd)
2061{
2062	return rcu_dereference(ppd->cc_state);
2063}
2064
2065/*
2066 * Called by writers of cc_state only,  must call under cc_state_lock.
2067 */
2068static inline
2069struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
2070{
2071	return rcu_dereference_protected(ppd->cc_state,
2072					 lockdep_is_held(&ppd->cc_state_lock));
2073}
2074
2075/*
2076 * values for dd->flags (_device_ related flags)
2077 */
2078#define HFI1_INITTED           0x1    /* chip and driver up and initted */
2079#define HFI1_PRESENT           0x2    /* chip accesses can be done */
2080#define HFI1_FROZEN            0x4    /* chip in SPC freeze */
2081#define HFI1_HAS_SDMA_TIMEOUT  0x8
2082#define HFI1_HAS_SEND_DMA      0x10   /* Supports Send DMA */
2083#define HFI1_FORCED_FREEZE     0x80   /* driver forced freeze mode */
2084#define HFI1_SHUTDOWN          0x100  /* device is shutting down */
2085
2086/* IB dword length mask in PBC (lower 11 bits); same for all chips */
2087#define HFI1_PBC_LENGTH_MASK                     ((1 << 11) - 1)
2088
2089/* ctxt_flag bit offsets */
2090		/* base context has not finished initializing */
2091#define HFI1_CTXT_BASE_UNINIT 1
2092		/* base context initaliation failed */
2093#define HFI1_CTXT_BASE_FAILED 2
2094		/* waiting for a packet to arrive */
2095#define HFI1_CTXT_WAITING_RCV 3
2096		/* waiting for an urgent packet to arrive */
2097#define HFI1_CTXT_WAITING_URG 4
2098
2099/* free up any allocated data at closes */
2100int hfi1_init_dd(struct hfi1_devdata *dd);
2101void hfi1_free_devdata(struct hfi1_devdata *dd);
2102
2103/* LED beaconing functions */
2104void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
2105			     unsigned int timeoff);
2106void shutdown_led_override(struct hfi1_pportdata *ppd);
2107
2108#define HFI1_CREDIT_RETURN_RATE (100)
2109
2110/*
2111 * The number of words for the KDETH protocol field.  If this is
2112 * larger then the actual field used, then part of the payload
2113 * will be in the header.
2114 *
2115 * Optimally, we want this sized so that a typical case will
2116 * use full cache lines.  The typical local KDETH header would
2117 * be:
2118 *
2119 *	Bytes	Field
2120 *	  8	LRH
2121 *	 12	BHT
2122 *	 ??	KDETH
2123 *	  8	RHF
2124 *	---
2125 *	 28 + KDETH
2126 *
2127 * For a 64-byte cache line, KDETH would need to be 36 bytes or 9 DWORDS
2128 */
2129#define DEFAULT_RCVHDRSIZE 9
2130
2131/*
2132 * Maximal header byte count:
2133 *
2134 *	Bytes	Field
2135 *	  8	LRH
2136 *	 40	GRH (optional)
2137 *	 12	BTH
2138 *	 ??	KDETH
2139 *	  8	RHF
2140 *	---
2141 *	 68 + KDETH
2142 *
2143 * We also want to maintain a cache line alignment to assist DMA'ing
2144 * of the header bytes.  Round up to a good size.
2145 */
2146#define DEFAULT_RCVHDR_ENTSIZE 32
2147
2148bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
2149			u32 nlocked, u32 npages);
2150int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr,
2151			    size_t npages, bool writable, struct page **pages);
2152void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
2153			     size_t npages, bool dirty);
2154
2155/**
2156 * hfi1_rcvhdrtail_kvaddr - return tail kvaddr
2157 * @rcd - the receive context
2158 */
2159static inline __le64 *hfi1_rcvhdrtail_kvaddr(const struct hfi1_ctxtdata *rcd)
2160{
2161	return (__le64 *)rcd->rcvhdrtail_kvaddr;
2162}
2163
2164static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
2165{
2166	u64 *kv = (u64 *)hfi1_rcvhdrtail_kvaddr(rcd);
2167
2168	if (kv)
2169		*kv = 0ULL;
2170}
2171
2172static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
2173{
2174	/*
2175	 * volatile because it's a DMA target from the chip, routine is
2176	 * inlined, and don't want register caching or reordering.
2177	 */
2178	return (u32)le64_to_cpu(*hfi1_rcvhdrtail_kvaddr(rcd));
2179}
2180
2181static inline bool hfi1_packet_present(struct hfi1_ctxtdata *rcd)
2182{
2183	if (likely(!rcd->rcvhdrtail_kvaddr)) {
2184		u32 seq = rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd)));
2185
2186		return !last_rcv_seq(rcd, seq);
2187	}
2188	return hfi1_rcd_head(rcd) != get_rcvhdrtail(rcd);
2189}
2190
2191/*
2192 * sysfs interface.
2193 */
2194
2195extern const char ib_hfi1_version[];
2196extern const struct attribute_group ib_hfi1_attr_group;
2197
2198int hfi1_device_create(struct hfi1_devdata *dd);
2199void hfi1_device_remove(struct hfi1_devdata *dd);
2200
2201int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
2202			   struct kobject *kobj);
2203int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd);
2204void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd);
2205/* Hook for sysfs read of QSFP */
2206int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
2207
2208int hfi1_pcie_init(struct hfi1_devdata *dd);
2209void hfi1_pcie_cleanup(struct pci_dev *pdev);
2210int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
2211void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
2212int pcie_speeds(struct hfi1_devdata *dd);
2213int restore_pci_variables(struct hfi1_devdata *dd);
2214int save_pci_variables(struct hfi1_devdata *dd);
2215int do_pcie_gen3_transition(struct hfi1_devdata *dd);
2216void tune_pcie_caps(struct hfi1_devdata *dd);
2217int parse_platform_config(struct hfi1_devdata *dd);
2218int get_platform_config_field(struct hfi1_devdata *dd,
2219			      enum platform_config_table_type_encoding
2220			      table_type, int table_index, int field_index,
2221			      u32 *data, u32 len);
2222
2223struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi);
2224
2225/*
2226 * Flush write combining store buffers (if present) and perform a write
2227 * barrier.
2228 */
2229static inline void flush_wc(void)
2230{
2231	asm volatile("sfence" : : : "memory");
2232}
2233
2234void handle_eflags(struct hfi1_packet *packet);
2235void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd);
2236
2237/* global module parameter variables */
2238extern unsigned int hfi1_max_mtu;
2239extern unsigned int hfi1_cu;
2240extern unsigned int user_credit_return_threshold;
2241extern int num_user_contexts;
2242extern unsigned long n_krcvqs;
2243extern uint krcvqs[];
2244extern int krcvqsset;
2245extern uint loopback;
2246extern uint quick_linkup;
2247extern uint rcv_intr_timeout;
2248extern uint rcv_intr_count;
2249extern uint rcv_intr_dynamic;
2250extern ushort link_crc_mask;
2251
2252extern struct mutex hfi1_mutex;
2253
2254/* Number of seconds before our card status check...  */
2255#define STATUS_TIMEOUT 60
2256
2257#define DRIVER_NAME		"hfi1"
2258#define HFI1_USER_MINOR_BASE     0
2259#define HFI1_TRACE_MINOR         127
2260#define HFI1_NMINORS             255
2261
2262#define PCI_VENDOR_ID_INTEL 0x8086
2263#define PCI_DEVICE_ID_INTEL0 0x24f0
2264#define PCI_DEVICE_ID_INTEL1 0x24f1
2265
2266#define HFI1_PKT_USER_SC_INTEGRITY					    \
2267	(SEND_CTXT_CHECK_ENABLE_DISALLOW_NON_KDETH_PACKETS_SMASK	    \
2268	| SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK		\
2269	| SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_SMASK		    \
2270	| SEND_CTXT_CHECK_ENABLE_DISALLOW_GRH_SMASK)
2271
2272#define HFI1_PKT_KERNEL_SC_INTEGRITY					    \
2273	(SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK)
2274
2275static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
2276						  u16 ctxt_type)
2277{
2278	u64 base_sc_integrity;
2279
2280	/* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */
2281	if (HFI1_CAP_IS_KSET(NO_INTEGRITY))
2282		return 0;
2283
2284	base_sc_integrity =
2285	SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK
2286	| SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK
2287	| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
2288	| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK
2289	| SEND_CTXT_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK
2290#ifndef CONFIG_FAULT_INJECTION
2291	| SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK
2292#endif
2293	| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_BYPASS_PACKETS_SMASK
2294	| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_IB_PACKETS_SMASK
2295	| SEND_CTXT_CHECK_ENABLE_DISALLOW_RAW_IPV6_SMASK
2296	| SEND_CTXT_CHECK_ENABLE_DISALLOW_RAW_SMASK
2297	| SEND_CTXT_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK
2298	| SEND_CTXT_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK
2299	| SEND_CTXT_CHECK_ENABLE_CHECK_OPCODE_SMASK
2300	| SEND_CTXT_CHECK_ENABLE_CHECK_SLID_SMASK
2301	| SEND_CTXT_CHECK_ENABLE_CHECK_VL_SMASK
2302	| SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK;
2303
2304	if (ctxt_type == SC_USER)
2305		base_sc_integrity |=
2306#ifndef CONFIG_FAULT_INJECTION
2307			SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK |
2308#endif
2309			HFI1_PKT_USER_SC_INTEGRITY;
2310	else if (ctxt_type != SC_KERNEL)
2311		base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY;
2312
2313	/* turn on send-side job key checks if !A0 */
2314	if (!is_ax(dd))
2315		base_sc_integrity |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
2316
2317	return base_sc_integrity;
2318}
2319
2320static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
2321{
2322	u64 base_sdma_integrity;
2323
2324	/* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */
2325	if (HFI1_CAP_IS_KSET(NO_INTEGRITY))
2326		return 0;
2327
2328	base_sdma_integrity =
2329	SEND_DMA_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK
2330	| SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
2331	| SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK
2332	| SEND_DMA_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK
2333	| SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_SMALL_BYPASS_PACKETS_SMASK
2334	| SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_SMALL_IB_PACKETS_SMASK
2335	| SEND_DMA_CHECK_ENABLE_DISALLOW_RAW_IPV6_SMASK
2336	| SEND_DMA_CHECK_ENABLE_DISALLOW_RAW_SMASK
2337	| SEND_DMA_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK
2338	| SEND_DMA_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK
2339	| SEND_DMA_CHECK_ENABLE_CHECK_OPCODE_SMASK
2340	| SEND_DMA_CHECK_ENABLE_CHECK_SLID_SMASK
2341	| SEND_DMA_CHECK_ENABLE_CHECK_VL_SMASK
2342	| SEND_DMA_CHECK_ENABLE_CHECK_ENABLE_SMASK;
2343
2344	if (!HFI1_CAP_IS_KSET(STATIC_RATE_CTRL))
2345		base_sdma_integrity |=
2346		SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK;
2347
2348	/* turn on send-side job key checks if !A0 */
2349	if (!is_ax(dd))
2350		base_sdma_integrity |=
2351			SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
2352
2353	return base_sdma_integrity;
2354}
2355
2356#define dd_dev_emerg(dd, fmt, ...) \
2357	dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \
2358		  rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
2359
2360#define dd_dev_err(dd, fmt, ...) \
2361	dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
2362		rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
2363
2364#define dd_dev_err_ratelimited(dd, fmt, ...) \
2365	dev_err_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
2366			    rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
2367			    ##__VA_ARGS__)
2368
2369#define dd_dev_warn(dd, fmt, ...) \
2370	dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \
2371		 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
2372
2373#define dd_dev_warn_ratelimited(dd, fmt, ...) \
2374	dev_warn_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
2375			     rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
2376			     ##__VA_ARGS__)
2377
2378#define dd_dev_info(dd, fmt, ...) \
2379	dev_info(&(dd)->pcidev->dev, "%s: " fmt, \
2380		 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
2381
2382#define dd_dev_info_ratelimited(dd, fmt, ...) \
2383	dev_info_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
2384			     rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
2385			     ##__VA_ARGS__)
2386
2387#define dd_dev_dbg(dd, fmt, ...) \
2388	dev_dbg(&(dd)->pcidev->dev, "%s: " fmt, \
2389		rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
2390
2391#define hfi1_dev_porterr(dd, port, fmt, ...) \
2392	dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \
2393		rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), (port), ##__VA_ARGS__)
2394
2395/*
2396 * this is used for formatting hw error messages...
2397 */
2398struct hfi1_hwerror_msgs {
2399	u64 mask;
2400	const char *msg;
2401	size_t sz;
2402};
2403
2404/* in intr.c... */
2405void hfi1_format_hwerrors(u64 hwerrs,
2406			  const struct hfi1_hwerror_msgs *hwerrmsgs,
2407			  size_t nhwerrmsgs, char *msg, size_t lmsg);
2408
2409#define USER_OPCODE_CHECK_VAL 0xC0
2410#define USER_OPCODE_CHECK_MASK 0xC0
2411#define OPCODE_CHECK_VAL_DISABLED 0x0
2412#define OPCODE_CHECK_MASK_DISABLED 0x0
2413
2414static inline void hfi1_reset_cpu_counters(struct hfi1_devdata *dd)
2415{
2416	struct hfi1_pportdata *ppd;
2417	int i;
2418
2419	dd->z_int_counter = get_all_cpu_total(dd->int_counter);
2420	dd->z_rcv_limit = get_all_cpu_total(dd->rcv_limit);
2421	dd->z_send_schedule = get_all_cpu_total(dd->send_schedule);
2422
2423	ppd = (struct hfi1_pportdata *)(dd + 1);
2424	for (i = 0; i < dd->num_pports; i++, ppd++) {
2425		ppd->ibport_data.rvp.z_rc_acks =
2426			get_all_cpu_total(ppd->ibport_data.rvp.rc_acks);
2427		ppd->ibport_data.rvp.z_rc_qacks =
2428			get_all_cpu_total(ppd->ibport_data.rvp.rc_qacks);
2429	}
2430}
2431
2432/* Control LED state */
2433static inline void setextled(struct hfi1_devdata *dd, u32 on)
2434{
2435	if (on)
2436		write_csr(dd, DCC_CFG_LED_CNTRL, 0x1F);
2437	else
2438		write_csr(dd, DCC_CFG_LED_CNTRL, 0x10);
2439}
2440
2441/* return the i2c resource given the target */
2442static inline u32 i2c_target(u32 target)
2443{
2444	return target ? CR_I2C2 : CR_I2C1;
2445}
2446
2447/* return the i2c chain chip resource that this HFI uses for QSFP */
2448static inline u32 qsfp_resource(struct hfi1_devdata *dd)
2449{
2450	return i2c_target(dd->hfi1_id);
2451}
2452
2453/* Is this device integrated or discrete? */
2454static inline bool is_integrated(struct hfi1_devdata *dd)
2455{
2456	return dd->pcidev->device == PCI_DEVICE_ID_INTEL1;
2457}
2458
2459/**
2460 * hfi1_need_drop - detect need for drop
2461 * @dd: - the device
2462 *
2463 * In some cases, the first packet needs to be dropped.
2464 *
2465 * Return true is the current packet needs to be dropped and false otherwise.
2466 */
2467static inline bool hfi1_need_drop(struct hfi1_devdata *dd)
2468{
2469	if (unlikely(dd->do_drop &&
2470		     atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
2471		     DROP_PACKET_ON)) {
2472		dd->do_drop = false;
2473		return true;
2474	}
2475	return false;
2476}
2477
2478int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
2479
2480#define DD_DEV_ENTRY(dd)       __string(dev, dev_name(&(dd)->pcidev->dev))
2481#define DD_DEV_ASSIGN(dd)      __assign_str(dev, dev_name(&(dd)->pcidev->dev))
2482
2483static inline void hfi1_update_ah_attr(struct ib_device *ibdev,
2484				       struct rdma_ah_attr *attr)
2485{
2486	struct hfi1_pportdata *ppd;
2487	struct hfi1_ibport *ibp;
2488	u32 dlid = rdma_ah_get_dlid(attr);
2489
2490	/*
2491	 * Kernel clients may not have setup GRH information
2492	 * Set that here.
2493	 */
2494	ibp = to_iport(ibdev, rdma_ah_get_port_num(attr));
2495	ppd = ppd_from_ibp(ibp);
2496	if ((((dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) ||
2497	      (ppd->lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))) &&
2498	    (dlid != be32_to_cpu(OPA_LID_PERMISSIVE)) &&
2499	    (dlid != be16_to_cpu(IB_LID_PERMISSIVE)) &&
2500	    (!(rdma_ah_get_ah_flags(attr) & IB_AH_GRH))) ||
2501	    (rdma_ah_get_make_grd(attr))) {
2502		rdma_ah_set_ah_flags(attr, IB_AH_GRH);
2503		rdma_ah_set_interface_id(attr, OPA_MAKE_ID(dlid));
2504		rdma_ah_set_subnet_prefix(attr, ibp->rvp.gid_prefix);
2505	}
2506}
2507
2508/*
2509 * hfi1_check_mcast- Check if the given lid is
2510 * in the OPA multicast range.
2511 *
2512 * The LID might either reside in ah.dlid or might be
2513 * in the GRH of the address handle as DGID if extended
2514 * addresses are in use.
2515 */
2516static inline bool hfi1_check_mcast(u32 lid)
2517{
2518	return ((lid >= opa_get_mcast_base(OPA_MCAST_NR)) &&
2519		(lid != be32_to_cpu(OPA_LID_PERMISSIVE)));
2520}
2521
2522#define opa_get_lid(lid, format)	\
2523	__opa_get_lid(lid, OPA_PORT_PACKET_FORMAT_##format)
2524
2525/* Convert a lid to a specific lid space */
2526static inline u32 __opa_get_lid(u32 lid, u8 format)
2527{
2528	bool is_mcast = hfi1_check_mcast(lid);
2529
2530	switch (format) {
2531	case OPA_PORT_PACKET_FORMAT_8B:
2532	case OPA_PORT_PACKET_FORMAT_10B:
2533		if (is_mcast)
2534			return (lid - opa_get_mcast_base(OPA_MCAST_NR) +
2535				0xF0000);
2536		return lid & 0xFFFFF;
2537	case OPA_PORT_PACKET_FORMAT_16B:
2538		if (is_mcast)
2539			return (lid - opa_get_mcast_base(OPA_MCAST_NR) +
2540				0xF00000);
2541		return lid & 0xFFFFFF;
2542	case OPA_PORT_PACKET_FORMAT_9B:
2543		if (is_mcast)
2544			return (lid -
2545				opa_get_mcast_base(OPA_MCAST_NR) +
2546				be16_to_cpu(IB_MULTICAST_LID_BASE));
2547		else
2548			return lid & 0xFFFF;
2549	default:
2550		return lid;
2551	}
2552}
2553
2554/* Return true if the given lid is the OPA 16B multicast range */
2555static inline bool hfi1_is_16B_mcast(u32 lid)
2556{
2557	return ((lid >=
2558		opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR), 16B)) &&
2559		(lid != opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B)));
2560}
2561
2562static inline void hfi1_make_opa_lid(struct rdma_ah_attr *attr)
2563{
2564	const struct ib_global_route *grh = rdma_ah_read_grh(attr);
2565	u32 dlid = rdma_ah_get_dlid(attr);
2566
2567	/* Modify ah_attr.dlid to be in the 32 bit LID space.
2568	 * This is how the address will be laid out:
2569	 * Assuming MCAST_NR to be 4,
2570	 * 32 bit permissive LID = 0xFFFFFFFF
2571	 * Multicast LID range = 0xFFFFFFFE to 0xF0000000
2572	 * Unicast LID range = 0xEFFFFFFF to 1
2573	 * Invalid LID = 0
2574	 */
2575	if (ib_is_opa_gid(&grh->dgid))
2576		dlid = opa_get_lid_from_gid(&grh->dgid);
2577	else if ((dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
2578		 (dlid != be16_to_cpu(IB_LID_PERMISSIVE)) &&
2579		 (dlid != be32_to_cpu(OPA_LID_PERMISSIVE)))
2580		dlid = dlid - be16_to_cpu(IB_MULTICAST_LID_BASE) +
2581			opa_get_mcast_base(OPA_MCAST_NR);
2582	else if (dlid == be16_to_cpu(IB_LID_PERMISSIVE))
2583		dlid = be32_to_cpu(OPA_LID_PERMISSIVE);
2584
2585	rdma_ah_set_dlid(attr, dlid);
2586}
2587
2588static inline u8 hfi1_get_packet_type(u32 lid)
2589{
2590	/* 9B if lid > 0xF0000000 */
2591	if (lid >= opa_get_mcast_base(OPA_MCAST_NR))
2592		return HFI1_PKT_TYPE_9B;
2593
2594	/* 16B if lid > 0xC000 */
2595	if (lid >= opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR), 9B))
2596		return HFI1_PKT_TYPE_16B;
2597
2598	return HFI1_PKT_TYPE_9B;
2599}
2600
2601static inline bool hfi1_get_hdr_type(u32 lid, struct rdma_ah_attr *attr)
2602{
2603	/*
2604	 * If there was an incoming 16B packet with permissive
2605	 * LIDs, OPA GIDs would have been programmed when those
2606	 * packets were received. A 16B packet will have to
2607	 * be sent in response to that packet. Return a 16B
2608	 * header type if that's the case.
2609	 */
2610	if (rdma_ah_get_dlid(attr) == be32_to_cpu(OPA_LID_PERMISSIVE))
2611		return (ib_is_opa_gid(&rdma_ah_read_grh(attr)->dgid)) ?
2612			HFI1_PKT_TYPE_16B : HFI1_PKT_TYPE_9B;
2613
2614	/*
2615	 * Return a 16B header type if either the the destination
2616	 * or source lid is extended.
2617	 */
2618	if (hfi1_get_packet_type(rdma_ah_get_dlid(attr)) == HFI1_PKT_TYPE_16B)
2619		return HFI1_PKT_TYPE_16B;
2620
2621	return hfi1_get_packet_type(lid);
2622}
2623
2624static inline void hfi1_make_ext_grh(struct hfi1_packet *packet,
2625				     struct ib_grh *grh, u32 slid,
2626				     u32 dlid)
2627{
2628	struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
2629	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2630
2631	if (!ibp)
2632		return;
2633
2634	grh->hop_limit = 1;
2635	grh->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
2636	if (slid == opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))
2637		grh->sgid.global.interface_id =
2638			OPA_MAKE_ID(be32_to_cpu(OPA_LID_PERMISSIVE));
2639	else
2640		grh->sgid.global.interface_id = OPA_MAKE_ID(slid);
2641
2642	/*
2643	 * Upper layers (like mad) may compare the dgid in the
2644	 * wc that is obtained here with the sgid_index in
2645	 * the wr. Since sgid_index in wr is always 0 for
2646	 * extended lids, set the dgid here to the default
2647	 * IB gid.
2648	 */
2649	grh->dgid.global.subnet_prefix = ibp->rvp.gid_prefix;
2650	grh->dgid.global.interface_id =
2651		cpu_to_be64(ppd->guids[HFI1_PORT_GUID_INDEX]);
2652}
2653
2654static inline int hfi1_get_16b_padding(u32 hdr_size, u32 payload)
2655{
2656	return -(hdr_size + payload + (SIZE_OF_CRC << 2) +
2657		     SIZE_OF_LT) & 0x7;
2658}
2659
2660static inline void hfi1_make_ib_hdr(struct ib_header *hdr,
2661				    u16 lrh0, u16 len,
2662				    u16 dlid, u16 slid)
2663{
2664	hdr->lrh[0] = cpu_to_be16(lrh0);
2665	hdr->lrh[1] = cpu_to_be16(dlid);
2666	hdr->lrh[2] = cpu_to_be16(len);
2667	hdr->lrh[3] = cpu_to_be16(slid);
2668}
2669
2670static inline void hfi1_make_16b_hdr(struct hfi1_16b_header *hdr,
2671				     u32 slid, u32 dlid,
2672				     u16 len, u16 pkey,
2673				     bool becn, bool fecn, u8 l4,
2674				     u8 sc)
2675{
2676	u32 lrh0 = 0;
2677	u32 lrh1 = 0x40000000;
2678	u32 lrh2 = 0;
2679	u32 lrh3 = 0;
2680
2681	lrh0 = (lrh0 & ~OPA_16B_BECN_MASK) | (becn << OPA_16B_BECN_SHIFT);
2682	lrh0 = (lrh0 & ~OPA_16B_LEN_MASK) | (len << OPA_16B_LEN_SHIFT);
2683	lrh0 = (lrh0 & ~OPA_16B_LID_MASK)  | (slid & OPA_16B_LID_MASK);
2684	lrh1 = (lrh1 & ~OPA_16B_FECN_MASK) | (fecn << OPA_16B_FECN_SHIFT);
2685	lrh1 = (lrh1 & ~OPA_16B_SC_MASK) | (sc << OPA_16B_SC_SHIFT);
2686	lrh1 = (lrh1 & ~OPA_16B_LID_MASK) | (dlid & OPA_16B_LID_MASK);
2687	lrh2 = (lrh2 & ~OPA_16B_SLID_MASK) |
2688		((slid >> OPA_16B_SLID_SHIFT) << OPA_16B_SLID_HIGH_SHIFT);
2689	lrh2 = (lrh2 & ~OPA_16B_DLID_MASK) |
2690		((dlid >> OPA_16B_DLID_SHIFT) << OPA_16B_DLID_HIGH_SHIFT);
2691	lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | ((u32)pkey << OPA_16B_PKEY_SHIFT);
2692	lrh2 = (lrh2 & ~OPA_16B_L4_MASK) | l4;
2693
2694	hdr->lrh[0] = lrh0;
2695	hdr->lrh[1] = lrh1;
2696	hdr->lrh[2] = lrh2;
2697	hdr->lrh[3] = lrh3;
2698}
2699#endif                          /* _HFI1_KERNEL_H */
2700