1#ifndef _HFI1_SDMA_H
2#define _HFI1_SDMA_H
3/*
4 * Copyright(c) 2015 - 2018 Intel Corporation.
5 *
6 * This file is provided under a dual BSD/GPLv2 license.  When using or
7 * redistributing this file, you may do so under either license.
8 *
9 * GPL LICENSE SUMMARY
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * General Public License for more details.
19 *
20 * BSD LICENSE
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 *
26 *  - Redistributions of source code must retain the above copyright
27 *    notice, this list of conditions and the following disclaimer.
28 *  - Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in
30 *    the documentation and/or other materials provided with the
31 *    distribution.
32 *  - Neither the name of Intel Corporation nor the names of its
33 *    contributors may be used to endorse or promote products derived
34 *    from this software without specific prior written permission.
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
37 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
38 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
39 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
40 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
43 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
44 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
45 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
46 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47 *
48 */
49
50#include <linux/types.h>
51#include <linux/list.h>
52#include <asm/byteorder.h>
53#include <linux/workqueue.h>
54#include <linux/rculist.h>
55
56#include "hfi.h"
57#include "verbs.h"
58#include "sdma_txreq.h"
59
60/* Hardware limit */
61#define MAX_DESC 64
62/* Hardware limit for SDMA packet size */
63#define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1)
64
65#define SDMA_MAP_NONE          0
66#define SDMA_MAP_SINGLE        1
67#define SDMA_MAP_PAGE          2
68
69#define SDMA_AHG_VALUE_MASK          0xffff
70#define SDMA_AHG_VALUE_SHIFT         0
71#define SDMA_AHG_INDEX_MASK          0xf
72#define SDMA_AHG_INDEX_SHIFT         16
73#define SDMA_AHG_FIELD_LEN_MASK      0xf
74#define SDMA_AHG_FIELD_LEN_SHIFT     20
75#define SDMA_AHG_FIELD_START_MASK    0x1f
76#define SDMA_AHG_FIELD_START_SHIFT   24
77#define SDMA_AHG_UPDATE_ENABLE_MASK  0x1
78#define SDMA_AHG_UPDATE_ENABLE_SHIFT 31
79
80/* AHG modes */
81
82/*
83 * Be aware the ordering and values
84 * for SDMA_AHG_APPLY_UPDATE[123]
85 * are assumed in generating a skip
86 * count in submit_tx() in sdma.c
87 */
88#define SDMA_AHG_NO_AHG              0
89#define SDMA_AHG_COPY                1
90#define SDMA_AHG_APPLY_UPDATE1       2
91#define SDMA_AHG_APPLY_UPDATE2       3
92#define SDMA_AHG_APPLY_UPDATE3       4
93
94/*
95 * Bits defined in the send DMA descriptor.
96 */
97#define SDMA_DESC0_FIRST_DESC_FLAG      BIT_ULL(63)
98#define SDMA_DESC0_LAST_DESC_FLAG       BIT_ULL(62)
99#define SDMA_DESC0_BYTE_COUNT_SHIFT     48
100#define SDMA_DESC0_BYTE_COUNT_WIDTH     14
101#define SDMA_DESC0_BYTE_COUNT_MASK \
102	((1ULL << SDMA_DESC0_BYTE_COUNT_WIDTH) - 1)
103#define SDMA_DESC0_BYTE_COUNT_SMASK \
104	(SDMA_DESC0_BYTE_COUNT_MASK << SDMA_DESC0_BYTE_COUNT_SHIFT)
105#define SDMA_DESC0_PHY_ADDR_SHIFT       0
106#define SDMA_DESC0_PHY_ADDR_WIDTH       48
107#define SDMA_DESC0_PHY_ADDR_MASK \
108	((1ULL << SDMA_DESC0_PHY_ADDR_WIDTH) - 1)
109#define SDMA_DESC0_PHY_ADDR_SMASK \
110	(SDMA_DESC0_PHY_ADDR_MASK << SDMA_DESC0_PHY_ADDR_SHIFT)
111
112#define SDMA_DESC1_HEADER_UPDATE1_SHIFT 32
113#define SDMA_DESC1_HEADER_UPDATE1_WIDTH 32
114#define SDMA_DESC1_HEADER_UPDATE1_MASK \
115	((1ULL << SDMA_DESC1_HEADER_UPDATE1_WIDTH) - 1)
116#define SDMA_DESC1_HEADER_UPDATE1_SMASK \
117	(SDMA_DESC1_HEADER_UPDATE1_MASK << SDMA_DESC1_HEADER_UPDATE1_SHIFT)
118#define SDMA_DESC1_HEADER_MODE_SHIFT    13
119#define SDMA_DESC1_HEADER_MODE_WIDTH    3
120#define SDMA_DESC1_HEADER_MODE_MASK \
121	((1ULL << SDMA_DESC1_HEADER_MODE_WIDTH) - 1)
122#define SDMA_DESC1_HEADER_MODE_SMASK \
123	(SDMA_DESC1_HEADER_MODE_MASK << SDMA_DESC1_HEADER_MODE_SHIFT)
124#define SDMA_DESC1_HEADER_INDEX_SHIFT   8
125#define SDMA_DESC1_HEADER_INDEX_WIDTH   5
126#define SDMA_DESC1_HEADER_INDEX_MASK \
127	((1ULL << SDMA_DESC1_HEADER_INDEX_WIDTH) - 1)
128#define SDMA_DESC1_HEADER_INDEX_SMASK \
129	(SDMA_DESC1_HEADER_INDEX_MASK << SDMA_DESC1_HEADER_INDEX_SHIFT)
130#define SDMA_DESC1_HEADER_DWS_SHIFT     4
131#define SDMA_DESC1_HEADER_DWS_WIDTH     4
132#define SDMA_DESC1_HEADER_DWS_MASK \
133	((1ULL << SDMA_DESC1_HEADER_DWS_WIDTH) - 1)
134#define SDMA_DESC1_HEADER_DWS_SMASK \
135	(SDMA_DESC1_HEADER_DWS_MASK << SDMA_DESC1_HEADER_DWS_SHIFT)
136#define SDMA_DESC1_GENERATION_SHIFT     2
137#define SDMA_DESC1_GENERATION_WIDTH     2
138#define SDMA_DESC1_GENERATION_MASK \
139	((1ULL << SDMA_DESC1_GENERATION_WIDTH) - 1)
140#define SDMA_DESC1_GENERATION_SMASK \
141	(SDMA_DESC1_GENERATION_MASK << SDMA_DESC1_GENERATION_SHIFT)
142#define SDMA_DESC1_INT_REQ_FLAG         BIT_ULL(1)
143#define SDMA_DESC1_HEAD_TO_HOST_FLAG    BIT_ULL(0)
144
145enum sdma_states {
146	sdma_state_s00_hw_down,
147	sdma_state_s10_hw_start_up_halt_wait,
148	sdma_state_s15_hw_start_up_clean_wait,
149	sdma_state_s20_idle,
150	sdma_state_s30_sw_clean_up_wait,
151	sdma_state_s40_hw_clean_up_wait,
152	sdma_state_s50_hw_halt_wait,
153	sdma_state_s60_idle_halt_wait,
154	sdma_state_s80_hw_freeze,
155	sdma_state_s82_freeze_sw_clean,
156	sdma_state_s99_running,
157};
158
159enum sdma_events {
160	sdma_event_e00_go_hw_down,
161	sdma_event_e10_go_hw_start,
162	sdma_event_e15_hw_halt_done,
163	sdma_event_e25_hw_clean_up_done,
164	sdma_event_e30_go_running,
165	sdma_event_e40_sw_cleaned,
166	sdma_event_e50_hw_cleaned,
167	sdma_event_e60_hw_halted,
168	sdma_event_e70_go_idle,
169	sdma_event_e80_hw_freeze,
170	sdma_event_e81_hw_frozen,
171	sdma_event_e82_hw_unfreeze,
172	sdma_event_e85_link_down,
173	sdma_event_e90_sw_halted,
174};
175
176struct sdma_set_state_action {
177	unsigned op_enable:1;
178	unsigned op_intenable:1;
179	unsigned op_halt:1;
180	unsigned op_cleanup:1;
181	unsigned go_s99_running_tofalse:1;
182	unsigned go_s99_running_totrue:1;
183};
184
185struct sdma_state {
186	struct kref          kref;
187	struct completion    comp;
188	enum sdma_states current_state;
189	unsigned             current_op;
190	unsigned             go_s99_running;
191	/* debugging/development */
192	enum sdma_states previous_state;
193	unsigned             previous_op;
194	enum sdma_events last_event;
195};
196
197/**
198 * DOC: sdma exported routines
199 *
200 * These sdma routines fit into three categories:
201 * - The SDMA API for building and submitting packets
202 *   to the ring
203 *
204 * - Initialization and tear down routines to buildup
205 *   and tear down SDMA
206 *
207 * - ISR entrances to handle interrupts, state changes
208 *   and errors
209 */
210
211/**
212 * DOC: sdma PSM/verbs API
213 *
214 * The sdma API is designed to be used by both PSM
215 * and verbs to supply packets to the SDMA ring.
216 *
217 * The usage of the API is as follows:
218 *
219 * Embed a struct iowait in the QP or
220 * PQ.  The iowait should be initialized with a
221 * call to iowait_init().
222 *
223 * The user of the API should create an allocation method
224 * for their version of the txreq. slabs, pre-allocated lists,
225 * and dma pools can be used.  Once the user's overload of
226 * the sdma_txreq has been allocated, the sdma_txreq member
227 * must be initialized with sdma_txinit() or sdma_txinit_ahg().
228 *
229 * The txreq must be declared with the sdma_txreq first.
230 *
231 * The tx request, once initialized,  is manipulated with calls to
232 * sdma_txadd_daddr(), sdma_txadd_page(), or sdma_txadd_kvaddr()
233 * for each disjoint memory location.  It is the user's responsibility
234 * to understand the packet boundaries and page boundaries to do the
235 * appropriate number of sdma_txadd_* calls..  The user
236 * must be prepared to deal with failures from these routines due to
237 * either memory allocation or dma_mapping failures.
238 *
239 * The mapping specifics for each memory location are recorded
240 * in the tx. Memory locations added with sdma_txadd_page()
241 * and sdma_txadd_kvaddr() are automatically mapped when added
242 * to the tx and nmapped as part of the progress processing in the
243 * SDMA interrupt handling.
244 *
245 * sdma_txadd_daddr() is used to add an dma_addr_t memory to the
246 * tx.   An example of a use case would be a pre-allocated
247 * set of headers allocated via dma_pool_alloc() or
248 * dma_alloc_coherent().  For these memory locations, it
249 * is the responsibility of the user to handle that unmapping.
250 * (This would usually be at an unload or job termination.)
251 *
252 * The routine sdma_send_txreq() is used to submit
253 * a tx to the ring after the appropriate number of
254 * sdma_txadd_* have been done.
255 *
256 * If it is desired to send a burst of sdma_txreqs, sdma_send_txlist()
257 * can be used to submit a list of packets.
258 *
259 * The user is free to use the link overhead in the struct sdma_txreq as
260 * long as the tx isn't in flight.
261 *
262 * The extreme degenerate case of the number of descriptors
263 * exceeding the ring size is automatically handled as
264 * memory locations are added.  An overflow of the descriptor
265 * array that is part of the sdma_txreq is also automatically
266 * handled.
267 *
268 */
269
270/**
271 * DOC: Infrastructure calls
272 *
273 * sdma_init() is used to initialize data structures and
274 * CSRs for the desired number of SDMA engines.
275 *
276 * sdma_start() is used to kick the SDMA engines initialized
277 * with sdma_init().   Interrupts must be enabled at this
278 * point since aspects of the state machine are interrupt
279 * driven.
280 *
281 * sdma_engine_error() and sdma_engine_interrupt() are
282 * entrances for interrupts.
283 *
284 * sdma_map_init() is for the management of the mapping
285 * table when the number of vls is changed.
286 *
287 */
288
289/*
290 * struct hw_sdma_desc - raw 128 bit SDMA descriptor
291 *
292 * This is the raw descriptor in the SDMA ring
293 */
294struct hw_sdma_desc {
295	/* private:  don't use directly */
296	__le64 qw[2];
297};
298
299/**
300 * struct sdma_engine - Data pertaining to each SDMA engine.
301 * @dd: a back-pointer to the device data
302 * @ppd: per port back-pointer
303 * @imask: mask for irq manipulation
304 * @idle_mask: mask for determining if an interrupt is due to sdma_idle
305 *
306 * This structure has the state for each sdma_engine.
307 *
308 * Accessing to non public fields are not supported
309 * since the private members are subject to change.
310 */
311struct sdma_engine {
312	/* read mostly */
313	struct hfi1_devdata *dd;
314	struct hfi1_pportdata *ppd;
315	/* private: */
316	void __iomem *tail_csr;
317	u64 imask;			/* clear interrupt mask */
318	u64 idle_mask;
319	u64 progress_mask;
320	u64 int_mask;
321	/* private: */
322	volatile __le64      *head_dma; /* DMA'ed by chip */
323	/* private: */
324	dma_addr_t            head_phys;
325	/* private: */
326	struct hw_sdma_desc *descq;
327	/* private: */
328	unsigned descq_full_count;
329	struct sdma_txreq **tx_ring;
330	/* private: */
331	dma_addr_t            descq_phys;
332	/* private */
333	u32 sdma_mask;
334	/* private */
335	struct sdma_state state;
336	/* private */
337	int cpu;
338	/* private: */
339	u8 sdma_shift;
340	/* private: */
341	u8 this_idx; /* zero relative engine */
342	/* protect changes to senddmactrl shadow */
343	spinlock_t senddmactrl_lock;
344	/* private: */
345	u64 p_senddmactrl;		/* shadow per-engine SendDmaCtrl */
346
347	/* read/write using tail_lock */
348	spinlock_t            tail_lock ____cacheline_aligned_in_smp;
349#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
350	/* private: */
351	u64                   tail_sn;
352#endif
353	/* private: */
354	u32                   descq_tail;
355	/* private: */
356	unsigned long         ahg_bits;
357	/* private: */
358	u16                   desc_avail;
359	/* private: */
360	u16                   tx_tail;
361	/* private: */
362	u16 descq_cnt;
363
364	/* read/write using head_lock */
365	/* private: */
366	seqlock_t            head_lock ____cacheline_aligned_in_smp;
367#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
368	/* private: */
369	u64                   head_sn;
370#endif
371	/* private: */
372	u32                   descq_head;
373	/* private: */
374	u16                   tx_head;
375	/* private: */
376	u64                   last_status;
377	/* private */
378	u64                     err_cnt;
379	/* private */
380	u64                     sdma_int_cnt;
381	u64                     idle_int_cnt;
382	u64                     progress_int_cnt;
383
384	/* private: */
385	seqlock_t            waitlock;
386	struct list_head      dmawait;
387
388	/* CONFIG SDMA for now, just blindly duplicate */
389	/* private: */
390	struct tasklet_struct sdma_hw_clean_up_task
391		____cacheline_aligned_in_smp;
392
393	/* private: */
394	struct tasklet_struct sdma_sw_clean_up_task
395		____cacheline_aligned_in_smp;
396	/* private: */
397	struct work_struct err_halt_worker;
398	/* private */
399	struct timer_list     err_progress_check_timer;
400	u32                   progress_check_head;
401	/* private: */
402	struct work_struct flush_worker;
403	/* protect flush list */
404	spinlock_t flushlist_lock;
405	/* private: */
406	struct list_head flushlist;
407	struct cpumask cpu_mask;
408	struct kobject kobj;
409	u32 msix_intr;
410};
411
412int sdma_init(struct hfi1_devdata *dd, u8 port);
413void sdma_start(struct hfi1_devdata *dd);
414void sdma_exit(struct hfi1_devdata *dd);
415void sdma_clean(struct hfi1_devdata *dd, size_t num_engines);
416void sdma_all_running(struct hfi1_devdata *dd);
417void sdma_all_idle(struct hfi1_devdata *dd);
418void sdma_freeze_notify(struct hfi1_devdata *dd, int go_idle);
419void sdma_freeze(struct hfi1_devdata *dd);
420void sdma_unfreeze(struct hfi1_devdata *dd);
421void sdma_wait(struct hfi1_devdata *dd);
422
423/**
424 * sdma_empty() - idle engine test
425 * @engine: sdma engine
426 *
427 * Currently used by verbs as a latency optimization.
428 *
429 * Return:
430 * 1 - empty, 0 - non-empty
431 */
432static inline int sdma_empty(struct sdma_engine *sde)
433{
434	return sde->descq_tail == sde->descq_head;
435}
436
437static inline u16 sdma_descq_freecnt(struct sdma_engine *sde)
438{
439	return sde->descq_cnt -
440		(sde->descq_tail -
441		 READ_ONCE(sde->descq_head)) - 1;
442}
443
444static inline u16 sdma_descq_inprocess(struct sdma_engine *sde)
445{
446	return sde->descq_cnt - sdma_descq_freecnt(sde);
447}
448
449/*
450 * Either head_lock or tail lock required to see
451 * a steady state.
452 */
453static inline int __sdma_running(struct sdma_engine *engine)
454{
455	return engine->state.current_state == sdma_state_s99_running;
456}
457
458/**
459 * sdma_running() - state suitability test
460 * @engine: sdma engine
461 *
462 * sdma_running probes the internal state to determine if it is suitable
463 * for submitting packets.
464 *
465 * Return:
466 * 1 - ok to submit, 0 - not ok to submit
467 *
468 */
469static inline int sdma_running(struct sdma_engine *engine)
470{
471	unsigned long flags;
472	int ret;
473
474	spin_lock_irqsave(&engine->tail_lock, flags);
475	ret = __sdma_running(engine);
476	spin_unlock_irqrestore(&engine->tail_lock, flags);
477	return ret;
478}
479
480void _sdma_txreq_ahgadd(
481	struct sdma_txreq *tx,
482	u8 num_ahg,
483	u8 ahg_entry,
484	u32 *ahg,
485	u8 ahg_hlen);
486
487/**
488 * sdma_txinit_ahg() - initialize an sdma_txreq struct with AHG
489 * @tx: tx request to initialize
490 * @flags: flags to key last descriptor additions
491 * @tlen: total packet length (pbc + headers + data)
492 * @ahg_entry: ahg entry to use  (0 - 31)
493 * @num_ahg: ahg descriptor for first descriptor (0 - 9)
494 * @ahg: array of AHG descriptors (up to 9 entries)
495 * @ahg_hlen: number of bytes from ASIC entry to use
496 * @cb: callback
497 *
498 * The allocation of the sdma_txreq and it enclosing structure is user
499 * dependent.  This routine must be called to initialize the user independent
500 * fields.
501 *
502 * The currently supported flags are SDMA_TXREQ_F_URGENT,
503 * SDMA_TXREQ_F_AHG_COPY, and SDMA_TXREQ_F_USE_AHG.
504 *
505 * SDMA_TXREQ_F_URGENT is used for latency sensitive situations where the
506 * completion is desired as soon as possible.
507 *
508 * SDMA_TXREQ_F_AHG_COPY causes the header in the first descriptor to be
509 * copied to chip entry. SDMA_TXREQ_F_USE_AHG causes the code to add in
510 * the AHG descriptors into the first 1 to 3 descriptors.
511 *
512 * Completions of submitted requests can be gotten on selected
513 * txreqs by giving a completion routine callback to sdma_txinit() or
514 * sdma_txinit_ahg().  The environment in which the callback runs
515 * can be from an ISR, a tasklet, or a thread, so no sleeping
516 * kernel routines can be used.   Aspects of the sdma ring may
517 * be locked so care should be taken with locking.
518 *
519 * The callback pointer can be NULL to avoid any callback for the packet
520 * being submitted. The callback will be provided this tx, a status, and a flag.
521 *
522 * The status will be one of SDMA_TXREQ_S_OK, SDMA_TXREQ_S_SENDERROR,
523 * SDMA_TXREQ_S_ABORTED, or SDMA_TXREQ_S_SHUTDOWN.
524 *
525 * The flag, if the is the iowait had been used, indicates the iowait
526 * sdma_busy count has reached zero.
527 *
528 * user data portion of tlen should be precise.   The sdma_txadd_* entrances
529 * will pad with a descriptor references 1 - 3 bytes when the number of bytes
530 * specified in tlen have been supplied to the sdma_txreq.
531 *
532 * ahg_hlen is used to determine the number of on-chip entry bytes to
533 * use as the header.   This is for cases where the stored header is
534 * larger than the header to be used in a packet.  This is typical
535 * for verbs where an RDMA_WRITE_FIRST is larger than the packet in
536 * and RDMA_WRITE_MIDDLE.
537 *
538 */
539static inline int sdma_txinit_ahg(
540	struct sdma_txreq *tx,
541	u16 flags,
542	u16 tlen,
543	u8 ahg_entry,
544	u8 num_ahg,
545	u32 *ahg,
546	u8 ahg_hlen,
547	void (*cb)(struct sdma_txreq *, int))
548{
549	if (tlen == 0)
550		return -ENODATA;
551	if (tlen > MAX_SDMA_PKT_SIZE)
552		return -EMSGSIZE;
553	tx->desc_limit = ARRAY_SIZE(tx->descs);
554	tx->descp = &tx->descs[0];
555	INIT_LIST_HEAD(&tx->list);
556	tx->num_desc = 0;
557	tx->flags = flags;
558	tx->complete = cb;
559	tx->coalesce_buf = NULL;
560	tx->wait = NULL;
561	tx->packet_len = tlen;
562	tx->tlen = tx->packet_len;
563	tx->descs[0].qw[0] = SDMA_DESC0_FIRST_DESC_FLAG;
564	tx->descs[0].qw[1] = 0;
565	if (flags & SDMA_TXREQ_F_AHG_COPY)
566		tx->descs[0].qw[1] |=
567			(((u64)ahg_entry & SDMA_DESC1_HEADER_INDEX_MASK)
568				<< SDMA_DESC1_HEADER_INDEX_SHIFT) |
569			(((u64)SDMA_AHG_COPY & SDMA_DESC1_HEADER_MODE_MASK)
570				<< SDMA_DESC1_HEADER_MODE_SHIFT);
571	else if (flags & SDMA_TXREQ_F_USE_AHG && num_ahg)
572		_sdma_txreq_ahgadd(tx, num_ahg, ahg_entry, ahg, ahg_hlen);
573	return 0;
574}
575
576/**
577 * sdma_txinit() - initialize an sdma_txreq struct (no AHG)
578 * @tx: tx request to initialize
579 * @flags: flags to key last descriptor additions
580 * @tlen: total packet length (pbc + headers + data)
581 * @cb: callback pointer
582 *
583 * The allocation of the sdma_txreq and it enclosing structure is user
584 * dependent.  This routine must be called to initialize the user
585 * independent fields.
586 *
587 * The currently supported flags is SDMA_TXREQ_F_URGENT.
588 *
589 * SDMA_TXREQ_F_URGENT is used for latency sensitive situations where the
590 * completion is desired as soon as possible.
591 *
592 * Completions of submitted requests can be gotten on selected
593 * txreqs by giving a completion routine callback to sdma_txinit() or
594 * sdma_txinit_ahg().  The environment in which the callback runs
595 * can be from an ISR, a tasklet, or a thread, so no sleeping
596 * kernel routines can be used.   The head size of the sdma ring may
597 * be locked so care should be taken with locking.
598 *
599 * The callback pointer can be NULL to avoid any callback for the packet
600 * being submitted.
601 *
602 * The callback, if non-NULL,  will be provided this tx and a status.  The
603 * status will be one of SDMA_TXREQ_S_OK, SDMA_TXREQ_S_SENDERROR,
604 * SDMA_TXREQ_S_ABORTED, or SDMA_TXREQ_S_SHUTDOWN.
605 *
606 */
607static inline int sdma_txinit(
608	struct sdma_txreq *tx,
609	u16 flags,
610	u16 tlen,
611	void (*cb)(struct sdma_txreq *, int))
612{
613	return sdma_txinit_ahg(tx, flags, tlen, 0, 0, NULL, 0, cb);
614}
615
616/* helpers - don't use */
617static inline int sdma_mapping_type(struct sdma_desc *d)
618{
619	return (d->qw[1] & SDMA_DESC1_GENERATION_SMASK)
620		>> SDMA_DESC1_GENERATION_SHIFT;
621}
622
623static inline size_t sdma_mapping_len(struct sdma_desc *d)
624{
625	return (d->qw[0] & SDMA_DESC0_BYTE_COUNT_SMASK)
626		>> SDMA_DESC0_BYTE_COUNT_SHIFT;
627}
628
629static inline dma_addr_t sdma_mapping_addr(struct sdma_desc *d)
630{
631	return (d->qw[0] & SDMA_DESC0_PHY_ADDR_SMASK)
632		>> SDMA_DESC0_PHY_ADDR_SHIFT;
633}
634
635static inline void make_tx_sdma_desc(
636	struct sdma_txreq *tx,
637	int type,
638	dma_addr_t addr,
639	size_t len,
640	void *pinning_ctx,
641	void (*ctx_get)(void *),
642	void (*ctx_put)(void *))
643{
644	struct sdma_desc *desc = &tx->descp[tx->num_desc];
645
646	if (!tx->num_desc) {
647		/* qw[0] zero; qw[1] first, ahg mode already in from init */
648		desc->qw[1] |= ((u64)type & SDMA_DESC1_GENERATION_MASK)
649				<< SDMA_DESC1_GENERATION_SHIFT;
650	} else {
651		desc->qw[0] = 0;
652		desc->qw[1] = ((u64)type & SDMA_DESC1_GENERATION_MASK)
653				<< SDMA_DESC1_GENERATION_SHIFT;
654	}
655	desc->qw[0] |= (((u64)addr & SDMA_DESC0_PHY_ADDR_MASK)
656				<< SDMA_DESC0_PHY_ADDR_SHIFT) |
657			(((u64)len & SDMA_DESC0_BYTE_COUNT_MASK)
658				<< SDMA_DESC0_BYTE_COUNT_SHIFT);
659
660	desc->pinning_ctx = pinning_ctx;
661	desc->ctx_put = ctx_put;
662	if (pinning_ctx && ctx_get)
663		ctx_get(pinning_ctx);
664}
665
666/* helper to extend txreq */
667int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
668			   int type, void *kvaddr, struct page *page,
669			   unsigned long offset, u16 len);
670int _pad_sdma_tx_descs(struct hfi1_devdata *, struct sdma_txreq *);
671void __sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *);
672
673static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx)
674{
675	if (tx->num_desc)
676		__sdma_txclean(dd, tx);
677}
678
679/* helpers used by public routines */
680static inline void _sdma_close_tx(struct hfi1_devdata *dd,
681				  struct sdma_txreq *tx)
682{
683	u16 last_desc = tx->num_desc - 1;
684
685	tx->descp[last_desc].qw[0] |= SDMA_DESC0_LAST_DESC_FLAG;
686	tx->descp[last_desc].qw[1] |= dd->default_desc1;
687	if (tx->flags & SDMA_TXREQ_F_URGENT)
688		tx->descp[last_desc].qw[1] |= (SDMA_DESC1_HEAD_TO_HOST_FLAG |
689					       SDMA_DESC1_INT_REQ_FLAG);
690}
691
692static inline int _sdma_txadd_daddr(
693	struct hfi1_devdata *dd,
694	int type,
695	struct sdma_txreq *tx,
696	dma_addr_t addr,
697	u16 len,
698	void *pinning_ctx,
699	void (*ctx_get)(void *),
700	void (*ctx_put)(void *))
701{
702	int rval = 0;
703
704	make_tx_sdma_desc(
705		tx,
706		type,
707		addr, len,
708		pinning_ctx, ctx_get, ctx_put);
709	WARN_ON(len > tx->tlen);
710	tx->num_desc++;
711	tx->tlen -= len;
712	/* special cases for last */
713	if (!tx->tlen) {
714		if (tx->packet_len & (sizeof(u32) - 1)) {
715			rval = _pad_sdma_tx_descs(dd, tx);
716			if (rval)
717				return rval;
718		} else {
719			_sdma_close_tx(dd, tx);
720		}
721	}
722	return rval;
723}
724
725/**
726 * sdma_txadd_page() - add a page to the sdma_txreq
727 * @dd: the device to use for mapping
728 * @tx: tx request to which the page is added
729 * @page: page to map
730 * @offset: offset within the page
731 * @len: length in bytes
732 * @pinning_ctx: context to be stored on struct sdma_desc .pinning_ctx. Not
733 *               added if coalesce buffer is used. E.g. pointer to pinned-page
734 *               cache entry for the sdma_desc.
735 * @ctx_get: optional function to take reference to @pinning_ctx. Not called if
736 *           @pinning_ctx is NULL.
737 * @ctx_put: optional function to release reference to @pinning_ctx after
738 *           sdma_desc completes. May be called in interrupt context so must
739 *           not sleep. Not called if @pinning_ctx is NULL.
740 *
741 * This is used to add a page/offset/length descriptor.
742 *
743 * The mapping/unmapping of the page/offset/len is automatically handled.
744 *
745 * Return:
746 * 0 - success, -ENOSPC - mapping fail, -ENOMEM - couldn't
747 * extend/coalesce descriptor array
748 */
749static inline int sdma_txadd_page(
750	struct hfi1_devdata *dd,
751	struct sdma_txreq *tx,
752	struct page *page,
753	unsigned long offset,
754	u16 len,
755	void *pinning_ctx,
756	void (*ctx_get)(void *),
757	void (*ctx_put)(void *))
758{
759	dma_addr_t addr;
760	int rval;
761
762	if ((unlikely(tx->num_desc == tx->desc_limit))) {
763		rval = ext_coal_sdma_tx_descs(dd, tx, SDMA_MAP_PAGE,
764					      NULL, page, offset, len);
765		if (rval <= 0)
766			return rval;
767	}
768
769	addr = dma_map_page(
770		       &dd->pcidev->dev,
771		       page,
772		       offset,
773		       len,
774		       DMA_TO_DEVICE);
775
776	if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
777		__sdma_txclean(dd, tx);
778		return -ENOSPC;
779	}
780
781	return _sdma_txadd_daddr(dd, SDMA_MAP_PAGE, tx, addr, len,
782				 pinning_ctx, ctx_get, ctx_put);
783}
784
785/**
786 * sdma_txadd_daddr() - add a dma address to the sdma_txreq
787 * @dd: the device to use for mapping
788 * @tx: sdma_txreq to which the page is added
789 * @addr: dma address mapped by caller
790 * @len: length in bytes
791 *
792 * This is used to add a descriptor for memory that is already dma mapped.
793 *
794 * In this case, there is no unmapping as part of the progress processing for
795 * this memory location.
796 *
797 * Return:
798 * 0 - success, -ENOMEM - couldn't extend descriptor array
799 */
800
801static inline int sdma_txadd_daddr(
802	struct hfi1_devdata *dd,
803	struct sdma_txreq *tx,
804	dma_addr_t addr,
805	u16 len)
806{
807	int rval;
808
809	if ((unlikely(tx->num_desc == tx->desc_limit))) {
810		rval = ext_coal_sdma_tx_descs(dd, tx, SDMA_MAP_NONE,
811					      NULL, NULL, 0, 0);
812		if (rval <= 0)
813			return rval;
814	}
815
816	return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, tx, addr, len,
817				 NULL, NULL, NULL);
818}
819
820/**
821 * sdma_txadd_kvaddr() - add a kernel virtual address to sdma_txreq
822 * @dd: the device to use for mapping
823 * @tx: sdma_txreq to which the page is added
824 * @kvaddr: the kernel virtual address
825 * @len: length in bytes
826 *
827 * This is used to add a descriptor referenced by the indicated kvaddr and
828 * len.
829 *
830 * The mapping/unmapping of the kvaddr and len is automatically handled.
831 *
832 * Return:
833 * 0 - success, -ENOSPC - mapping fail, -ENOMEM - couldn't extend/coalesce
834 * descriptor array
835 */
836static inline int sdma_txadd_kvaddr(
837	struct hfi1_devdata *dd,
838	struct sdma_txreq *tx,
839	void *kvaddr,
840	u16 len)
841{
842	dma_addr_t addr;
843	int rval;
844
845	if ((unlikely(tx->num_desc == tx->desc_limit))) {
846		rval = ext_coal_sdma_tx_descs(dd, tx, SDMA_MAP_SINGLE,
847					      kvaddr, NULL, 0, len);
848		if (rval <= 0)
849			return rval;
850	}
851
852	addr = dma_map_single(
853		       &dd->pcidev->dev,
854		       kvaddr,
855		       len,
856		       DMA_TO_DEVICE);
857
858	if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
859		__sdma_txclean(dd, tx);
860		return -ENOSPC;
861	}
862
863	return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx, addr, len,
864				 NULL, NULL, NULL);
865}
866
867struct iowait_work;
868
869int sdma_send_txreq(struct sdma_engine *sde,
870		    struct iowait_work *wait,
871		    struct sdma_txreq *tx,
872		    bool pkts_sent);
873int sdma_send_txlist(struct sdma_engine *sde,
874		     struct iowait_work *wait,
875		     struct list_head *tx_list,
876		     u16 *count_out);
877
878int sdma_ahg_alloc(struct sdma_engine *sde);
879void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);
880
881/**
882 * sdma_build_ahg - build ahg descriptor
883 * @data
884 * @dwindex
885 * @startbit
886 * @bits
887 *
888 * Build and return a 32 bit descriptor.
889 */
890static inline u32 sdma_build_ahg_descriptor(
891	u16 data,
892	u8 dwindex,
893	u8 startbit,
894	u8 bits)
895{
896	return (u32)(1UL << SDMA_AHG_UPDATE_ENABLE_SHIFT |
897		((startbit & SDMA_AHG_FIELD_START_MASK) <<
898		SDMA_AHG_FIELD_START_SHIFT) |
899		((bits & SDMA_AHG_FIELD_LEN_MASK) <<
900		SDMA_AHG_FIELD_LEN_SHIFT) |
901		((dwindex & SDMA_AHG_INDEX_MASK) <<
902		SDMA_AHG_INDEX_SHIFT) |
903		((data & SDMA_AHG_VALUE_MASK) <<
904		SDMA_AHG_VALUE_SHIFT));
905}
906
907/**
908 * sdma_progress - use seq number of detect head progress
909 * @sde: sdma_engine to check
910 * @seq: base seq count
911 * @tx: txreq for which we need to check descriptor availability
912 *
913 * This is used in the appropriate spot in the sleep routine
914 * to check for potential ring progress.  This routine gets the
915 * seqcount before queuing the iowait structure for progress.
916 *
917 * If the seqcount indicates that progress needs to be checked,
918 * re-submission is detected by checking whether the descriptor
919 * queue has enough descriptor for the txreq.
920 */
921static inline unsigned sdma_progress(struct sdma_engine *sde, unsigned seq,
922				     struct sdma_txreq *tx)
923{
924	if (read_seqretry(&sde->head_lock, seq)) {
925		sde->desc_avail = sdma_descq_freecnt(sde);
926		if (tx->num_desc > sde->desc_avail)
927			return 0;
928		return 1;
929	}
930	return 0;
931}
932
933/**
934 * sdma_iowait_schedule() - initialize wait structure
935 * @sde: sdma_engine to schedule
936 * @wait: wait struct to schedule
937 *
938 * This function initializes the iowait
939 * structure embedded in the QP or PQ.
940 *
941 */
942static inline void sdma_iowait_schedule(
943	struct sdma_engine *sde,
944	struct iowait *wait)
945{
946	struct hfi1_pportdata *ppd = sde->dd->pport;
947
948	iowait_schedule(wait, ppd->hfi1_wq, sde->cpu);
949}
950
951/* for use by interrupt handling */
952void sdma_engine_error(struct sdma_engine *sde, u64 status);
953void sdma_engine_interrupt(struct sdma_engine *sde, u64 status);
954
955/*
956 *
957 * The diagram below details the relationship of the mapping structures
958 *
959 * Since the mapping now allows for non-uniform engines per vl, the
960 * number of engines for a vl is either the vl_engines[vl] or
961 * a computation based on num_sdma/num_vls:
962 *
963 * For example:
964 * nactual = vl_engines ? vl_engines[vl] : num_sdma/num_vls
965 *
966 * n = roundup to next highest power of 2 using nactual
967 *
968 * In the case where there are num_sdma/num_vls doesn't divide
969 * evenly, the extras are added from the last vl downward.
970 *
971 * For the case where n > nactual, the engines are assigned
972 * in a round robin fashion wrapping back to the first engine
973 * for a particular vl.
974 *
975 *               dd->sdma_map
976 *                    |                                   sdma_map_elem[0]
977 *                    |                                +--------------------+
978 *                    v                                |       mask         |
979 *               sdma_vl_map                           |--------------------|
980 *      +--------------------------+                   | sde[0] -> eng 1    |
981 *      |    list (RCU)            |                   |--------------------|
982 *      |--------------------------|                 ->| sde[1] -> eng 2    |
983 *      |    mask                  |              --/  |--------------------|
984 *      |--------------------------|            -/     |        *           |
985 *      |    actual_vls (max 8)    |          -/       |--------------------|
986 *      |--------------------------|       --/         | sde[n-1] -> eng n  |
987 *      |    vls (max 8)           |     -/            +--------------------+
988 *      |--------------------------|  --/
989 *      |    map[0]                |-/
990 *      |--------------------------|                   +---------------------+
991 *      |    map[1]                |---                |       mask          |
992 *      |--------------------------|   \----           |---------------------|
993 *      |           *              |        \--        | sde[0] -> eng 1+n   |
994 *      |           *              |           \----   |---------------------|
995 *      |           *              |                \->| sde[1] -> eng 2+n   |
996 *      |--------------------------|                   |---------------------|
997 *      |   map[vls - 1]           |-                  |         *           |
998 *      +--------------------------+ \-                |---------------------|
999 *                                     \-              | sde[m-1] -> eng m+n |
1000 *                                       \             +---------------------+
1001 *                                        \-
1002 *                                          \
1003 *                                           \-        +----------------------+
1004 *                                             \-      |       mask           |
1005 *                                               \     |----------------------|
1006 *                                                \-   | sde[0] -> eng 1+m+n  |
1007 *                                                  \- |----------------------|
1008 *                                                    >| sde[1] -> eng 2+m+n  |
1009 *                                                     |----------------------|
1010 *                                                     |         *            |
1011 *                                                     |----------------------|
1012 *                                                     | sde[o-1] -> eng o+m+n|
1013 *                                                     +----------------------+
1014 *
1015 */
1016
1017/**
1018 * struct sdma_map_elem - mapping for a vl
1019 * @mask - selector mask
1020 * @sde - array of engines for this vl
1021 *
1022 * The mask is used to "mod" the selector
1023 * to produce index into the trailing
1024 * array of sdes.
1025 */
1026struct sdma_map_elem {
1027	u32 mask;
1028	struct sdma_engine *sde[];
1029};
1030
1031/**
1032 * struct sdma_map_el - mapping for a vl
1033 * @engine_to_vl - map of an engine to a vl
1034 * @list - rcu head for free callback
1035 * @mask - vl mask to "mod" the vl to produce an index to map array
1036 * @actual_vls - number of vls
1037 * @vls - number of vls rounded to next power of 2
1038 * @map - array of sdma_map_elem entries
1039 *
1040 * This is the parent mapping structure.  The trailing
1041 * members of the struct point to sdma_map_elem entries, which
1042 * in turn point to an array of sde's for that vl.
1043 */
1044struct sdma_vl_map {
1045	s8 engine_to_vl[TXE_NUM_SDMA_ENGINES];
1046	struct rcu_head list;
1047	u32 mask;
1048	u8 actual_vls;
1049	u8 vls;
1050	struct sdma_map_elem *map[];
1051};
1052
1053int sdma_map_init(
1054	struct hfi1_devdata *dd,
1055	u8 port,
1056	u8 num_vls,
1057	u8 *vl_engines);
1058
1059/* slow path */
1060void _sdma_engine_progress_schedule(struct sdma_engine *sde);
1061
1062/**
1063 * sdma_engine_progress_schedule() - schedule progress on engine
1064 * @sde: sdma_engine to schedule progress
1065 *
1066 * This is the fast path.
1067 *
1068 */
1069static inline void sdma_engine_progress_schedule(
1070	struct sdma_engine *sde)
1071{
1072	if (!sde || sdma_descq_inprocess(sde) < (sde->descq_cnt / 8))
1073		return;
1074	_sdma_engine_progress_schedule(sde);
1075}
1076
1077struct sdma_engine *sdma_select_engine_sc(
1078	struct hfi1_devdata *dd,
1079	u32 selector,
1080	u8 sc5);
1081
1082struct sdma_engine *sdma_select_engine_vl(
1083	struct hfi1_devdata *dd,
1084	u32 selector,
1085	u8 vl);
1086
1087struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
1088					    u32 selector, u8 vl);
1089ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf);
1090ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
1091				size_t count);
1092int sdma_engine_get_vl(struct sdma_engine *sde);
1093void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *);
1094void sdma_seqfile_dump_cpu_list(struct seq_file *s, struct hfi1_devdata *dd,
1095				unsigned long cpuid);
1096
1097#ifdef CONFIG_SDMA_VERBOSITY
1098void sdma_dumpstate(struct sdma_engine *);
1099#endif
1100static inline char *slashstrip(char *s)
1101{
1102	char *r = s;
1103
1104	while (*s)
1105		if (*s++ == '/')
1106			r = s;
1107	return r;
1108}
1109
1110u16 sdma_get_descq_cnt(void);
1111
1112extern uint mod_num_sdma;
1113
1114void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid);
1115#endif
1116