162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright(c) 2015, 2016 Intel Corporation.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include "hfi.h"
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci/* additive distance between non-SOP and SOP space */
962306a36Sopenharmony_ci#define SOP_DISTANCE (TXE_PIO_SIZE / 2)
1062306a36Sopenharmony_ci#define PIO_BLOCK_MASK (PIO_BLOCK_SIZE - 1)
1162306a36Sopenharmony_ci/* number of QUADWORDs in a block */
1262306a36Sopenharmony_ci#define PIO_BLOCK_QWS (PIO_BLOCK_SIZE / sizeof(u64))
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci/**
1562306a36Sopenharmony_ci * pio_copy - copy data block to MMIO space
1662306a36Sopenharmony_ci * @dd: hfi1 dev data
1762306a36Sopenharmony_ci * @pbuf: a number of blocks allocated within a PIO send context
1862306a36Sopenharmony_ci * @pbc: PBC to send
1962306a36Sopenharmony_ci * @from: source, must be 8 byte aligned
2062306a36Sopenharmony_ci * @count: number of DWORD (32-bit) quantities to copy from source
2162306a36Sopenharmony_ci *
2262306a36Sopenharmony_ci * Copy data from source to PIO Send Buffer memory, 8 bytes at a time.
2362306a36Sopenharmony_ci * Must always write full BLOCK_SIZE bytes blocks.  The first block must
2462306a36Sopenharmony_ci * be written to the corresponding SOP=1 address.
2562306a36Sopenharmony_ci *
2662306a36Sopenharmony_ci * Known:
2762306a36Sopenharmony_ci * o pbuf->start always starts on a block boundary
2862306a36Sopenharmony_ci * o pbuf can wrap only at a block boundary
2962306a36Sopenharmony_ci */
3062306a36Sopenharmony_civoid pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
3162306a36Sopenharmony_ci	      const void *from, size_t count)
3262306a36Sopenharmony_ci{
3362306a36Sopenharmony_ci	void __iomem *dest = pbuf->start + SOP_DISTANCE;
3462306a36Sopenharmony_ci	void __iomem *send = dest + PIO_BLOCK_SIZE;
3562306a36Sopenharmony_ci	void __iomem *dend;			/* 8-byte data end */
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci	/* write the PBC */
3862306a36Sopenharmony_ci	writeq(pbc, dest);
3962306a36Sopenharmony_ci	dest += sizeof(u64);
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci	/* calculate where the QWORD data ends - in SOP=1 space */
4262306a36Sopenharmony_ci	dend = dest + ((count >> 1) * sizeof(u64));
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	if (dend < send) {
4562306a36Sopenharmony_ci		/*
4662306a36Sopenharmony_ci		 * all QWORD data is within the SOP block, does *not*
4762306a36Sopenharmony_ci		 * reach the end of the SOP block
4862306a36Sopenharmony_ci		 */
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci		while (dest < dend) {
5162306a36Sopenharmony_ci			writeq(*(u64 *)from, dest);
5262306a36Sopenharmony_ci			from += sizeof(u64);
5362306a36Sopenharmony_ci			dest += sizeof(u64);
5462306a36Sopenharmony_ci		}
5562306a36Sopenharmony_ci		/*
5662306a36Sopenharmony_ci		 * No boundary checks are needed here:
5762306a36Sopenharmony_ci		 * 0. We're not on the SOP block boundary
5862306a36Sopenharmony_ci		 * 1. The possible DWORD dangle will still be within
5962306a36Sopenharmony_ci		 *    the SOP block
6062306a36Sopenharmony_ci		 * 2. We cannot wrap except on a block boundary.
6162306a36Sopenharmony_ci		 */
6262306a36Sopenharmony_ci	} else {
6362306a36Sopenharmony_ci		/* QWORD data extends _to_ or beyond the SOP block */
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci		/* write 8-byte SOP chunk data */
6662306a36Sopenharmony_ci		while (dest < send) {
6762306a36Sopenharmony_ci			writeq(*(u64 *)from, dest);
6862306a36Sopenharmony_ci			from += sizeof(u64);
6962306a36Sopenharmony_ci			dest += sizeof(u64);
7062306a36Sopenharmony_ci		}
7162306a36Sopenharmony_ci		/* drop out of the SOP range */
7262306a36Sopenharmony_ci		dest -= SOP_DISTANCE;
7362306a36Sopenharmony_ci		dend -= SOP_DISTANCE;
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci		/*
7662306a36Sopenharmony_ci		 * If the wrap comes before or matches the data end,
7762306a36Sopenharmony_ci		 * copy until until the wrap, then wrap.
7862306a36Sopenharmony_ci		 *
7962306a36Sopenharmony_ci		 * If the data ends at the end of the SOP above and
8062306a36Sopenharmony_ci		 * the buffer wraps, then pbuf->end == dend == dest
8162306a36Sopenharmony_ci		 * and nothing will get written, but we will wrap in
8262306a36Sopenharmony_ci		 * case there is a dangling DWORD.
8362306a36Sopenharmony_ci		 */
8462306a36Sopenharmony_ci		if (pbuf->end <= dend) {
8562306a36Sopenharmony_ci			while (dest < pbuf->end) {
8662306a36Sopenharmony_ci				writeq(*(u64 *)from, dest);
8762306a36Sopenharmony_ci				from += sizeof(u64);
8862306a36Sopenharmony_ci				dest += sizeof(u64);
8962306a36Sopenharmony_ci			}
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci			dest -= pbuf->sc->size;
9262306a36Sopenharmony_ci			dend -= pbuf->sc->size;
9362306a36Sopenharmony_ci		}
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci		/* write 8-byte non-SOP, non-wrap chunk data */
9662306a36Sopenharmony_ci		while (dest < dend) {
9762306a36Sopenharmony_ci			writeq(*(u64 *)from, dest);
9862306a36Sopenharmony_ci			from += sizeof(u64);
9962306a36Sopenharmony_ci			dest += sizeof(u64);
10062306a36Sopenharmony_ci		}
10162306a36Sopenharmony_ci	}
10262306a36Sopenharmony_ci	/* at this point we have wrapped if we are going to wrap */
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci	/* write dangling u32, if any */
10562306a36Sopenharmony_ci	if (count & 1) {
10662306a36Sopenharmony_ci		union mix val;
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci		val.val64 = 0;
10962306a36Sopenharmony_ci		val.val32[0] = *(u32 *)from;
11062306a36Sopenharmony_ci		writeq(val.val64, dest);
11162306a36Sopenharmony_ci		dest += sizeof(u64);
11262306a36Sopenharmony_ci	}
11362306a36Sopenharmony_ci	/*
11462306a36Sopenharmony_ci	 * fill in rest of block, no need to check pbuf->end
11562306a36Sopenharmony_ci	 * as we only wrap on a block boundary
11662306a36Sopenharmony_ci	 */
11762306a36Sopenharmony_ci	while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
11862306a36Sopenharmony_ci		writeq(0, dest);
11962306a36Sopenharmony_ci		dest += sizeof(u64);
12062306a36Sopenharmony_ci	}
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	/* finished with this buffer */
12362306a36Sopenharmony_ci	this_cpu_dec(*pbuf->sc->buffers_allocated);
12462306a36Sopenharmony_ci	preempt_enable();
12562306a36Sopenharmony_ci}
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci/*
12862306a36Sopenharmony_ci * Handle carry bytes using shifts and masks.
12962306a36Sopenharmony_ci *
13062306a36Sopenharmony_ci * NOTE: the value the unused portion of carry is expected to always be zero.
13162306a36Sopenharmony_ci */
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci/*
13462306a36Sopenharmony_ci * "zero" shift - bit shift used to zero out upper bytes.  Input is
13562306a36Sopenharmony_ci * the count of LSB bytes to preserve.
13662306a36Sopenharmony_ci */
13762306a36Sopenharmony_ci#define zshift(x) (8 * (8 - (x)))
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci/*
14062306a36Sopenharmony_ci * "merge" shift - bit shift used to merge with carry bytes.  Input is
14162306a36Sopenharmony_ci * the LSB byte count to move beyond.
14262306a36Sopenharmony_ci */
14362306a36Sopenharmony_ci#define mshift(x) (8 * (x))
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci/*
14662306a36Sopenharmony_ci * Jump copy - no-loop copy for < 8 bytes.
14762306a36Sopenharmony_ci */
14862306a36Sopenharmony_cistatic inline void jcopy(u8 *dest, const u8 *src, u32 n)
14962306a36Sopenharmony_ci{
15062306a36Sopenharmony_ci	switch (n) {
15162306a36Sopenharmony_ci	case 7:
15262306a36Sopenharmony_ci		*dest++ = *src++;
15362306a36Sopenharmony_ci		fallthrough;
15462306a36Sopenharmony_ci	case 6:
15562306a36Sopenharmony_ci		*dest++ = *src++;
15662306a36Sopenharmony_ci		fallthrough;
15762306a36Sopenharmony_ci	case 5:
15862306a36Sopenharmony_ci		*dest++ = *src++;
15962306a36Sopenharmony_ci		fallthrough;
16062306a36Sopenharmony_ci	case 4:
16162306a36Sopenharmony_ci		*dest++ = *src++;
16262306a36Sopenharmony_ci		fallthrough;
16362306a36Sopenharmony_ci	case 3:
16462306a36Sopenharmony_ci		*dest++ = *src++;
16562306a36Sopenharmony_ci		fallthrough;
16662306a36Sopenharmony_ci	case 2:
16762306a36Sopenharmony_ci		*dest++ = *src++;
16862306a36Sopenharmony_ci		fallthrough;
16962306a36Sopenharmony_ci	case 1:
17062306a36Sopenharmony_ci		*dest++ = *src++;
17162306a36Sopenharmony_ci	}
17262306a36Sopenharmony_ci}
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci/*
17562306a36Sopenharmony_ci * Read nbytes from "from" and place them in the low bytes
17662306a36Sopenharmony_ci * of pbuf->carry.  Other bytes are left as-is.  Any previous
17762306a36Sopenharmony_ci * value in pbuf->carry is lost.
17862306a36Sopenharmony_ci *
17962306a36Sopenharmony_ci * NOTES:
18062306a36Sopenharmony_ci * o do not read from from if nbytes is zero
18162306a36Sopenharmony_ci * o from may _not_ be u64 aligned.
18262306a36Sopenharmony_ci */
18362306a36Sopenharmony_cistatic inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
18462306a36Sopenharmony_ci				  unsigned int nbytes)
18562306a36Sopenharmony_ci{
18662306a36Sopenharmony_ci	pbuf->carry.val64 = 0;
18762306a36Sopenharmony_ci	jcopy(&pbuf->carry.val8[0], from, nbytes);
18862306a36Sopenharmony_ci	pbuf->carry_bytes = nbytes;
18962306a36Sopenharmony_ci}
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci/*
19262306a36Sopenharmony_ci * Read nbytes bytes from "from" and put them at the end of pbuf->carry.
19362306a36Sopenharmony_ci * It is expected that the extra read does not overfill carry.
19462306a36Sopenharmony_ci *
19562306a36Sopenharmony_ci * NOTES:
19662306a36Sopenharmony_ci * o from may _not_ be u64 aligned
19762306a36Sopenharmony_ci * o nbytes may span a QW boundary
19862306a36Sopenharmony_ci */
19962306a36Sopenharmony_cistatic inline void read_extra_bytes(struct pio_buf *pbuf,
20062306a36Sopenharmony_ci				    const void *from, unsigned int nbytes)
20162306a36Sopenharmony_ci{
20262306a36Sopenharmony_ci	jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes);
20362306a36Sopenharmony_ci	pbuf->carry_bytes += nbytes;
20462306a36Sopenharmony_ci}
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci/*
20762306a36Sopenharmony_ci * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
20862306a36Sopenharmony_ci * Put the unused part of the next 8 bytes of src into the LSB bytes of
20962306a36Sopenharmony_ci * pbuf->carry with the upper bytes zeroed..
21062306a36Sopenharmony_ci *
21162306a36Sopenharmony_ci * NOTES:
21262306a36Sopenharmony_ci * o result must keep unused bytes zeroed
21362306a36Sopenharmony_ci * o src must be u64 aligned
21462306a36Sopenharmony_ci */
21562306a36Sopenharmony_cistatic inline void merge_write8(
21662306a36Sopenharmony_ci	struct pio_buf *pbuf,
21762306a36Sopenharmony_ci	void __iomem *dest,
21862306a36Sopenharmony_ci	const void *src)
21962306a36Sopenharmony_ci{
22062306a36Sopenharmony_ci	u64 new, temp;
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci	new = *(u64 *)src;
22362306a36Sopenharmony_ci	temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
22462306a36Sopenharmony_ci	writeq(temp, dest);
22562306a36Sopenharmony_ci	pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
22662306a36Sopenharmony_ci}
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci/*
22962306a36Sopenharmony_ci * Write a quad word using all bytes of carry.
23062306a36Sopenharmony_ci */
23162306a36Sopenharmony_cistatic inline void carry8_write8(union mix carry, void __iomem *dest)
23262306a36Sopenharmony_ci{
23362306a36Sopenharmony_ci	writeq(carry.val64, dest);
23462306a36Sopenharmony_ci}
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci/*
23762306a36Sopenharmony_ci * Write a quad word using all the valid bytes of carry.  If carry
23862306a36Sopenharmony_ci * has zero valid bytes, nothing is written.
23962306a36Sopenharmony_ci * Returns 0 on nothing written, non-zero on quad word written.
24062306a36Sopenharmony_ci */
24162306a36Sopenharmony_cistatic inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
24262306a36Sopenharmony_ci{
24362306a36Sopenharmony_ci	if (pbuf->carry_bytes) {
24462306a36Sopenharmony_ci		/* unused bytes are always kept zeroed, so just write */
24562306a36Sopenharmony_ci		writeq(pbuf->carry.val64, dest);
24662306a36Sopenharmony_ci		return 1;
24762306a36Sopenharmony_ci	}
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	return 0;
25062306a36Sopenharmony_ci}
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci/*
25362306a36Sopenharmony_ci * Segmented PIO Copy - start
25462306a36Sopenharmony_ci *
25562306a36Sopenharmony_ci * Start a PIO copy.
25662306a36Sopenharmony_ci *
25762306a36Sopenharmony_ci * @pbuf: destination buffer
25862306a36Sopenharmony_ci * @pbc: the PBC for the PIO buffer
25962306a36Sopenharmony_ci * @from: data source, QWORD aligned
26062306a36Sopenharmony_ci * @nbytes: bytes to copy
26162306a36Sopenharmony_ci */
26262306a36Sopenharmony_civoid seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
26362306a36Sopenharmony_ci			const void *from, size_t nbytes)
26462306a36Sopenharmony_ci{
26562306a36Sopenharmony_ci	void __iomem *dest = pbuf->start + SOP_DISTANCE;
26662306a36Sopenharmony_ci	void __iomem *send = dest + PIO_BLOCK_SIZE;
26762306a36Sopenharmony_ci	void __iomem *dend;			/* 8-byte data end */
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	writeq(pbc, dest);
27062306a36Sopenharmony_ci	dest += sizeof(u64);
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci	/* calculate where the QWORD data ends - in SOP=1 space */
27362306a36Sopenharmony_ci	dend = dest + ((nbytes >> 3) * sizeof(u64));
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci	if (dend < send) {
27662306a36Sopenharmony_ci		/*
27762306a36Sopenharmony_ci		 * all QWORD data is within the SOP block, does *not*
27862306a36Sopenharmony_ci		 * reach the end of the SOP block
27962306a36Sopenharmony_ci		 */
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci		while (dest < dend) {
28262306a36Sopenharmony_ci			writeq(*(u64 *)from, dest);
28362306a36Sopenharmony_ci			from += sizeof(u64);
28462306a36Sopenharmony_ci			dest += sizeof(u64);
28562306a36Sopenharmony_ci		}
28662306a36Sopenharmony_ci		/*
28762306a36Sopenharmony_ci		 * No boundary checks are needed here:
28862306a36Sopenharmony_ci		 * 0. We're not on the SOP block boundary
28962306a36Sopenharmony_ci		 * 1. The possible DWORD dangle will still be within
29062306a36Sopenharmony_ci		 *    the SOP block
29162306a36Sopenharmony_ci		 * 2. We cannot wrap except on a block boundary.
29262306a36Sopenharmony_ci		 */
29362306a36Sopenharmony_ci	} else {
29462306a36Sopenharmony_ci		/* QWORD data extends _to_ or beyond the SOP block */
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci		/* write 8-byte SOP chunk data */
29762306a36Sopenharmony_ci		while (dest < send) {
29862306a36Sopenharmony_ci			writeq(*(u64 *)from, dest);
29962306a36Sopenharmony_ci			from += sizeof(u64);
30062306a36Sopenharmony_ci			dest += sizeof(u64);
30162306a36Sopenharmony_ci		}
30262306a36Sopenharmony_ci		/* drop out of the SOP range */
30362306a36Sopenharmony_ci		dest -= SOP_DISTANCE;
30462306a36Sopenharmony_ci		dend -= SOP_DISTANCE;
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci		/*
30762306a36Sopenharmony_ci		 * If the wrap comes before or matches the data end,
30862306a36Sopenharmony_ci		 * copy until until the wrap, then wrap.
30962306a36Sopenharmony_ci		 *
31062306a36Sopenharmony_ci		 * If the data ends at the end of the SOP above and
31162306a36Sopenharmony_ci		 * the buffer wraps, then pbuf->end == dend == dest
31262306a36Sopenharmony_ci		 * and nothing will get written, but we will wrap in
31362306a36Sopenharmony_ci		 * case there is a dangling DWORD.
31462306a36Sopenharmony_ci		 */
31562306a36Sopenharmony_ci		if (pbuf->end <= dend) {
31662306a36Sopenharmony_ci			while (dest < pbuf->end) {
31762306a36Sopenharmony_ci				writeq(*(u64 *)from, dest);
31862306a36Sopenharmony_ci				from += sizeof(u64);
31962306a36Sopenharmony_ci				dest += sizeof(u64);
32062306a36Sopenharmony_ci			}
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci			dest -= pbuf->sc->size;
32362306a36Sopenharmony_ci			dend -= pbuf->sc->size;
32462306a36Sopenharmony_ci		}
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci		/* write 8-byte non-SOP, non-wrap chunk data */
32762306a36Sopenharmony_ci		while (dest < dend) {
32862306a36Sopenharmony_ci			writeq(*(u64 *)from, dest);
32962306a36Sopenharmony_ci			from += sizeof(u64);
33062306a36Sopenharmony_ci			dest += sizeof(u64);
33162306a36Sopenharmony_ci		}
33262306a36Sopenharmony_ci	}
33362306a36Sopenharmony_ci	/* at this point we have wrapped if we are going to wrap */
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci	/* ...but it doesn't matter as we're done writing */
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci	/* save dangling bytes, if any */
33862306a36Sopenharmony_ci	read_low_bytes(pbuf, from, nbytes & 0x7);
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci	pbuf->qw_written = 1 /*PBC*/ + (nbytes >> 3);
34162306a36Sopenharmony_ci}
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci/*
34462306a36Sopenharmony_ci * Mid copy helper, "mixed case" - source is 64-bit aligned but carry
34562306a36Sopenharmony_ci * bytes are non-zero.
34662306a36Sopenharmony_ci *
34762306a36Sopenharmony_ci * Whole u64s must be written to the chip, so bytes must be manually merged.
34862306a36Sopenharmony_ci *
34962306a36Sopenharmony_ci * @pbuf: destination buffer
35062306a36Sopenharmony_ci * @from: data source, is QWORD aligned.
35162306a36Sopenharmony_ci * @nbytes: bytes to copy
35262306a36Sopenharmony_ci *
35362306a36Sopenharmony_ci * Must handle nbytes < 8.
35462306a36Sopenharmony_ci */
35562306a36Sopenharmony_cistatic void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
35662306a36Sopenharmony_ci{
35762306a36Sopenharmony_ci	void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
35862306a36Sopenharmony_ci	void __iomem *dend;			/* 8-byte data end */
35962306a36Sopenharmony_ci	unsigned long qw_to_write = nbytes >> 3;
36062306a36Sopenharmony_ci	unsigned long bytes_left = nbytes & 0x7;
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci	/* calculate 8-byte data end */
36362306a36Sopenharmony_ci	dend = dest + (qw_to_write * sizeof(u64));
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_ci	if (pbuf->qw_written < PIO_BLOCK_QWS) {
36662306a36Sopenharmony_ci		/*
36762306a36Sopenharmony_ci		 * Still within SOP block.  We don't need to check for
36862306a36Sopenharmony_ci		 * wrap because we are still in the first block and
36962306a36Sopenharmony_ci		 * can only wrap on block boundaries.
37062306a36Sopenharmony_ci		 */
37162306a36Sopenharmony_ci		void __iomem *send;		/* SOP end */
37262306a36Sopenharmony_ci		void __iomem *xend;
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci		/*
37562306a36Sopenharmony_ci		 * calculate the end of data or end of block, whichever
37662306a36Sopenharmony_ci		 * comes first
37762306a36Sopenharmony_ci		 */
37862306a36Sopenharmony_ci		send = pbuf->start + PIO_BLOCK_SIZE;
37962306a36Sopenharmony_ci		xend = min(send, dend);
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_ci		/* shift up to SOP=1 space */
38262306a36Sopenharmony_ci		dest += SOP_DISTANCE;
38362306a36Sopenharmony_ci		xend += SOP_DISTANCE;
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_ci		/* write 8-byte chunk data */
38662306a36Sopenharmony_ci		while (dest < xend) {
38762306a36Sopenharmony_ci			merge_write8(pbuf, dest, from);
38862306a36Sopenharmony_ci			from += sizeof(u64);
38962306a36Sopenharmony_ci			dest += sizeof(u64);
39062306a36Sopenharmony_ci		}
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci		/* shift down to SOP=0 space */
39362306a36Sopenharmony_ci		dest -= SOP_DISTANCE;
39462306a36Sopenharmony_ci	}
39562306a36Sopenharmony_ci	/*
39662306a36Sopenharmony_ci	 * At this point dest could be (either, both, or neither):
39762306a36Sopenharmony_ci	 * - at dend
39862306a36Sopenharmony_ci	 * - at the wrap
39962306a36Sopenharmony_ci	 */
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci	/*
40262306a36Sopenharmony_ci	 * If the wrap comes before or matches the data end,
40362306a36Sopenharmony_ci	 * copy until until the wrap, then wrap.
40462306a36Sopenharmony_ci	 *
40562306a36Sopenharmony_ci	 * If dest is at the wrap, we will fall into the if,
40662306a36Sopenharmony_ci	 * not do the loop, when wrap.
40762306a36Sopenharmony_ci	 *
40862306a36Sopenharmony_ci	 * If the data ends at the end of the SOP above and
40962306a36Sopenharmony_ci	 * the buffer wraps, then pbuf->end == dend == dest
41062306a36Sopenharmony_ci	 * and nothing will get written.
41162306a36Sopenharmony_ci	 */
41262306a36Sopenharmony_ci	if (pbuf->end <= dend) {
41362306a36Sopenharmony_ci		while (dest < pbuf->end) {
41462306a36Sopenharmony_ci			merge_write8(pbuf, dest, from);
41562306a36Sopenharmony_ci			from += sizeof(u64);
41662306a36Sopenharmony_ci			dest += sizeof(u64);
41762306a36Sopenharmony_ci		}
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_ci		dest -= pbuf->sc->size;
42062306a36Sopenharmony_ci		dend -= pbuf->sc->size;
42162306a36Sopenharmony_ci	}
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_ci	/* write 8-byte non-SOP, non-wrap chunk data */
42462306a36Sopenharmony_ci	while (dest < dend) {
42562306a36Sopenharmony_ci		merge_write8(pbuf, dest, from);
42662306a36Sopenharmony_ci		from += sizeof(u64);
42762306a36Sopenharmony_ci		dest += sizeof(u64);
42862306a36Sopenharmony_ci	}
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci	pbuf->qw_written += qw_to_write;
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	/* handle carry and left-over bytes */
43362306a36Sopenharmony_ci	if (pbuf->carry_bytes + bytes_left >= 8) {
43462306a36Sopenharmony_ci		unsigned long nread;
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci		/* there is enough to fill another qw - fill carry */
43762306a36Sopenharmony_ci		nread = 8 - pbuf->carry_bytes;
43862306a36Sopenharmony_ci		read_extra_bytes(pbuf, from, nread);
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci		/*
44162306a36Sopenharmony_ci		 * One more write - but need to make sure dest is correct.
44262306a36Sopenharmony_ci		 * Check for wrap and the possibility the write
44362306a36Sopenharmony_ci		 * should be in SOP space.
44462306a36Sopenharmony_ci		 *
44562306a36Sopenharmony_ci		 * The two checks immediately below cannot both be true, hence
44662306a36Sopenharmony_ci		 * the else. If we have wrapped, we cannot still be within the
44762306a36Sopenharmony_ci		 * first block. Conversely, if we are still in the first block,
44862306a36Sopenharmony_ci		 * we cannot have wrapped. We do the wrap check first as that
44962306a36Sopenharmony_ci		 * is more likely.
45062306a36Sopenharmony_ci		 */
45162306a36Sopenharmony_ci		/* adjust if we have wrapped */
45262306a36Sopenharmony_ci		if (dest >= pbuf->end)
45362306a36Sopenharmony_ci			dest -= pbuf->sc->size;
45462306a36Sopenharmony_ci		/* jump to the SOP range if within the first block */
45562306a36Sopenharmony_ci		else if (pbuf->qw_written < PIO_BLOCK_QWS)
45662306a36Sopenharmony_ci			dest += SOP_DISTANCE;
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci		/* flush out full carry */
45962306a36Sopenharmony_ci		carry8_write8(pbuf->carry, dest);
46062306a36Sopenharmony_ci		pbuf->qw_written++;
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci		/* now adjust and read the rest of the bytes into carry */
46362306a36Sopenharmony_ci		bytes_left -= nread;
46462306a36Sopenharmony_ci		from += nread; /* from is now not aligned */
46562306a36Sopenharmony_ci		read_low_bytes(pbuf, from, bytes_left);
46662306a36Sopenharmony_ci	} else {
46762306a36Sopenharmony_ci		/* not enough to fill another qw, append the rest to carry */
46862306a36Sopenharmony_ci		read_extra_bytes(pbuf, from, bytes_left);
46962306a36Sopenharmony_ci	}
47062306a36Sopenharmony_ci}
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci/*
47362306a36Sopenharmony_ci * Mid copy helper, "straight case" - source pointer is 64-bit aligned
47462306a36Sopenharmony_ci * with no carry bytes.
47562306a36Sopenharmony_ci *
47662306a36Sopenharmony_ci * @pbuf: destination buffer
47762306a36Sopenharmony_ci * @from: data source, is QWORD aligned
47862306a36Sopenharmony_ci * @nbytes: bytes to copy
47962306a36Sopenharmony_ci *
48062306a36Sopenharmony_ci * Must handle nbytes < 8.
48162306a36Sopenharmony_ci */
48262306a36Sopenharmony_cistatic void mid_copy_straight(struct pio_buf *pbuf,
48362306a36Sopenharmony_ci			      const void *from, size_t nbytes)
48462306a36Sopenharmony_ci{
48562306a36Sopenharmony_ci	void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
48662306a36Sopenharmony_ci	void __iomem *dend;			/* 8-byte data end */
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci	/* calculate 8-byte data end */
48962306a36Sopenharmony_ci	dend = dest + ((nbytes >> 3) * sizeof(u64));
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_ci	if (pbuf->qw_written < PIO_BLOCK_QWS) {
49262306a36Sopenharmony_ci		/*
49362306a36Sopenharmony_ci		 * Still within SOP block.  We don't need to check for
49462306a36Sopenharmony_ci		 * wrap because we are still in the first block and
49562306a36Sopenharmony_ci		 * can only wrap on block boundaries.
49662306a36Sopenharmony_ci		 */
49762306a36Sopenharmony_ci		void __iomem *send;		/* SOP end */
49862306a36Sopenharmony_ci		void __iomem *xend;
49962306a36Sopenharmony_ci
50062306a36Sopenharmony_ci		/*
50162306a36Sopenharmony_ci		 * calculate the end of data or end of block, whichever
50262306a36Sopenharmony_ci		 * comes first
50362306a36Sopenharmony_ci		 */
50462306a36Sopenharmony_ci		send = pbuf->start + PIO_BLOCK_SIZE;
50562306a36Sopenharmony_ci		xend = min(send, dend);
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci		/* shift up to SOP=1 space */
50862306a36Sopenharmony_ci		dest += SOP_DISTANCE;
50962306a36Sopenharmony_ci		xend += SOP_DISTANCE;
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ci		/* write 8-byte chunk data */
51262306a36Sopenharmony_ci		while (dest < xend) {
51362306a36Sopenharmony_ci			writeq(*(u64 *)from, dest);
51462306a36Sopenharmony_ci			from += sizeof(u64);
51562306a36Sopenharmony_ci			dest += sizeof(u64);
51662306a36Sopenharmony_ci		}
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ci		/* shift down to SOP=0 space */
51962306a36Sopenharmony_ci		dest -= SOP_DISTANCE;
52062306a36Sopenharmony_ci	}
52162306a36Sopenharmony_ci	/*
52262306a36Sopenharmony_ci	 * At this point dest could be (either, both, or neither):
52362306a36Sopenharmony_ci	 * - at dend
52462306a36Sopenharmony_ci	 * - at the wrap
52562306a36Sopenharmony_ci	 */
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci	/*
52862306a36Sopenharmony_ci	 * If the wrap comes before or matches the data end,
52962306a36Sopenharmony_ci	 * copy until until the wrap, then wrap.
53062306a36Sopenharmony_ci	 *
53162306a36Sopenharmony_ci	 * If dest is at the wrap, we will fall into the if,
53262306a36Sopenharmony_ci	 * not do the loop, when wrap.
53362306a36Sopenharmony_ci	 *
53462306a36Sopenharmony_ci	 * If the data ends at the end of the SOP above and
53562306a36Sopenharmony_ci	 * the buffer wraps, then pbuf->end == dend == dest
53662306a36Sopenharmony_ci	 * and nothing will get written.
53762306a36Sopenharmony_ci	 */
53862306a36Sopenharmony_ci	if (pbuf->end <= dend) {
53962306a36Sopenharmony_ci		while (dest < pbuf->end) {
54062306a36Sopenharmony_ci			writeq(*(u64 *)from, dest);
54162306a36Sopenharmony_ci			from += sizeof(u64);
54262306a36Sopenharmony_ci			dest += sizeof(u64);
54362306a36Sopenharmony_ci		}
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci		dest -= pbuf->sc->size;
54662306a36Sopenharmony_ci		dend -= pbuf->sc->size;
54762306a36Sopenharmony_ci	}
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_ci	/* write 8-byte non-SOP, non-wrap chunk data */
55062306a36Sopenharmony_ci	while (dest < dend) {
55162306a36Sopenharmony_ci		writeq(*(u64 *)from, dest);
55262306a36Sopenharmony_ci		from += sizeof(u64);
55362306a36Sopenharmony_ci		dest += sizeof(u64);
55462306a36Sopenharmony_ci	}
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_ci	/* we know carry_bytes was zero on entry to this routine */
55762306a36Sopenharmony_ci	read_low_bytes(pbuf, from, nbytes & 0x7);
55862306a36Sopenharmony_ci
55962306a36Sopenharmony_ci	pbuf->qw_written += nbytes >> 3;
56062306a36Sopenharmony_ci}
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_ci/*
56362306a36Sopenharmony_ci * Segmented PIO Copy - middle
56462306a36Sopenharmony_ci *
56562306a36Sopenharmony_ci * Must handle any aligned tail and any aligned source with any byte count.
56662306a36Sopenharmony_ci *
56762306a36Sopenharmony_ci * @pbuf: a number of blocks allocated within a PIO send context
56862306a36Sopenharmony_ci * @from: data source
56962306a36Sopenharmony_ci * @nbytes: number of bytes to copy
57062306a36Sopenharmony_ci */
57162306a36Sopenharmony_civoid seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
57262306a36Sopenharmony_ci{
57362306a36Sopenharmony_ci	unsigned long from_align = (unsigned long)from & 0x7;
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_ci	if (pbuf->carry_bytes + nbytes < 8) {
57662306a36Sopenharmony_ci		/* not enough bytes to fill a QW */
57762306a36Sopenharmony_ci		read_extra_bytes(pbuf, from, nbytes);
57862306a36Sopenharmony_ci		return;
57962306a36Sopenharmony_ci	}
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci	if (from_align) {
58262306a36Sopenharmony_ci		/* misaligned source pointer - align it */
58362306a36Sopenharmony_ci		unsigned long to_align;
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_ci		/* bytes to read to align "from" */
58662306a36Sopenharmony_ci		to_align = 8 - from_align;
58762306a36Sopenharmony_ci
58862306a36Sopenharmony_ci		/*
58962306a36Sopenharmony_ci		 * In the advance-to-alignment logic below, we do not need
59062306a36Sopenharmony_ci		 * to check if we are using more than nbytes.  This is because
59162306a36Sopenharmony_ci		 * if we are here, we already know that carry+nbytes will
59262306a36Sopenharmony_ci		 * fill at least one QW.
59362306a36Sopenharmony_ci		 */
59462306a36Sopenharmony_ci		if (pbuf->carry_bytes + to_align < 8) {
59562306a36Sopenharmony_ci			/* not enough align bytes to fill a QW */
59662306a36Sopenharmony_ci			read_extra_bytes(pbuf, from, to_align);
59762306a36Sopenharmony_ci			from += to_align;
59862306a36Sopenharmony_ci			nbytes -= to_align;
59962306a36Sopenharmony_ci		} else {
60062306a36Sopenharmony_ci			/* bytes to fill carry */
60162306a36Sopenharmony_ci			unsigned long to_fill = 8 - pbuf->carry_bytes;
60262306a36Sopenharmony_ci			/* bytes left over to be read */
60362306a36Sopenharmony_ci			unsigned long extra = to_align - to_fill;
60462306a36Sopenharmony_ci			void __iomem *dest;
60562306a36Sopenharmony_ci
60662306a36Sopenharmony_ci			/* fill carry... */
60762306a36Sopenharmony_ci			read_extra_bytes(pbuf, from, to_fill);
60862306a36Sopenharmony_ci			from += to_fill;
60962306a36Sopenharmony_ci			nbytes -= to_fill;
61062306a36Sopenharmony_ci			/* may not be enough valid bytes left to align */
61162306a36Sopenharmony_ci			if (extra > nbytes)
61262306a36Sopenharmony_ci				extra = nbytes;
61362306a36Sopenharmony_ci
61462306a36Sopenharmony_ci			/* ...now write carry */
61562306a36Sopenharmony_ci			dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci			/*
61862306a36Sopenharmony_ci			 * The two checks immediately below cannot both be
61962306a36Sopenharmony_ci			 * true, hence the else.  If we have wrapped, we
62062306a36Sopenharmony_ci			 * cannot still be within the first block.
62162306a36Sopenharmony_ci			 * Conversely, if we are still in the first block, we
62262306a36Sopenharmony_ci			 * cannot have wrapped.  We do the wrap check first
62362306a36Sopenharmony_ci			 * as that is more likely.
62462306a36Sopenharmony_ci			 */
62562306a36Sopenharmony_ci			/* adjust if we've wrapped */
62662306a36Sopenharmony_ci			if (dest >= pbuf->end)
62762306a36Sopenharmony_ci				dest -= pbuf->sc->size;
62862306a36Sopenharmony_ci			/* jump to SOP range if within the first block */
62962306a36Sopenharmony_ci			else if (pbuf->qw_written < PIO_BLOCK_QWS)
63062306a36Sopenharmony_ci				dest += SOP_DISTANCE;
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_ci			carry8_write8(pbuf->carry, dest);
63362306a36Sopenharmony_ci			pbuf->qw_written++;
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci			/* read any extra bytes to do final alignment */
63662306a36Sopenharmony_ci			/* this will overwrite anything in pbuf->carry */
63762306a36Sopenharmony_ci			read_low_bytes(pbuf, from, extra);
63862306a36Sopenharmony_ci			from += extra;
63962306a36Sopenharmony_ci			nbytes -= extra;
64062306a36Sopenharmony_ci			/*
64162306a36Sopenharmony_ci			 * If no bytes are left, return early - we are done.
64262306a36Sopenharmony_ci			 * NOTE: This short-circuit is *required* because
64362306a36Sopenharmony_ci			 * "extra" may have been reduced in size and "from"
64462306a36Sopenharmony_ci			 * is not aligned, as required when leaving this
64562306a36Sopenharmony_ci			 * if block.
64662306a36Sopenharmony_ci			 */
64762306a36Sopenharmony_ci			if (nbytes == 0)
64862306a36Sopenharmony_ci				return;
64962306a36Sopenharmony_ci		}
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ci		/* at this point, from is QW aligned */
65262306a36Sopenharmony_ci	}
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_ci	if (pbuf->carry_bytes)
65562306a36Sopenharmony_ci		mid_copy_mix(pbuf, from, nbytes);
65662306a36Sopenharmony_ci	else
65762306a36Sopenharmony_ci		mid_copy_straight(pbuf, from, nbytes);
65862306a36Sopenharmony_ci}
65962306a36Sopenharmony_ci
66062306a36Sopenharmony_ci/*
66162306a36Sopenharmony_ci * Segmented PIO Copy - end
66262306a36Sopenharmony_ci *
66362306a36Sopenharmony_ci * Write any remainder (in pbuf->carry) and finish writing the whole block.
66462306a36Sopenharmony_ci *
66562306a36Sopenharmony_ci * @pbuf: a number of blocks allocated within a PIO send context
66662306a36Sopenharmony_ci */
66762306a36Sopenharmony_civoid seg_pio_copy_end(struct pio_buf *pbuf)
66862306a36Sopenharmony_ci{
66962306a36Sopenharmony_ci	void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
67062306a36Sopenharmony_ci
67162306a36Sopenharmony_ci	/*
67262306a36Sopenharmony_ci	 * The two checks immediately below cannot both be true, hence the
67362306a36Sopenharmony_ci	 * else.  If we have wrapped, we cannot still be within the first
67462306a36Sopenharmony_ci	 * block.  Conversely, if we are still in the first block, we
67562306a36Sopenharmony_ci	 * cannot have wrapped.  We do the wrap check first as that is
67662306a36Sopenharmony_ci	 * more likely.
67762306a36Sopenharmony_ci	 */
67862306a36Sopenharmony_ci	/* adjust if we have wrapped */
67962306a36Sopenharmony_ci	if (dest >= pbuf->end)
68062306a36Sopenharmony_ci		dest -= pbuf->sc->size;
68162306a36Sopenharmony_ci	/* jump to the SOP range if within the first block */
68262306a36Sopenharmony_ci	else if (pbuf->qw_written < PIO_BLOCK_QWS)
68362306a36Sopenharmony_ci		dest += SOP_DISTANCE;
68462306a36Sopenharmony_ci
68562306a36Sopenharmony_ci	/* write final bytes, if any */
68662306a36Sopenharmony_ci	if (carry_write8(pbuf, dest)) {
68762306a36Sopenharmony_ci		dest += sizeof(u64);
68862306a36Sopenharmony_ci		/*
68962306a36Sopenharmony_ci		 * NOTE: We do not need to recalculate whether dest needs
69062306a36Sopenharmony_ci		 * SOP_DISTANCE or not.
69162306a36Sopenharmony_ci		 *
69262306a36Sopenharmony_ci		 * If we are in the first block and the dangle write
69362306a36Sopenharmony_ci		 * keeps us in the same block, dest will need
69462306a36Sopenharmony_ci		 * to retain SOP_DISTANCE in the loop below.
69562306a36Sopenharmony_ci		 *
69662306a36Sopenharmony_ci		 * If we are in the first block and the dangle write pushes
69762306a36Sopenharmony_ci		 * us to the next block, then loop below will not run
69862306a36Sopenharmony_ci		 * and dest is not used.  Hence we do not need to update
69962306a36Sopenharmony_ci		 * it.
70062306a36Sopenharmony_ci		 *
70162306a36Sopenharmony_ci		 * If we are past the first block, then SOP_DISTANCE
70262306a36Sopenharmony_ci		 * was never added, so there is nothing to do.
70362306a36Sopenharmony_ci		 */
70462306a36Sopenharmony_ci	}
70562306a36Sopenharmony_ci
70662306a36Sopenharmony_ci	/* fill in rest of block */
70762306a36Sopenharmony_ci	while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
70862306a36Sopenharmony_ci		writeq(0, dest);
70962306a36Sopenharmony_ci		dest += sizeof(u64);
71062306a36Sopenharmony_ci	}
71162306a36Sopenharmony_ci
71262306a36Sopenharmony_ci	/* finished with this buffer */
71362306a36Sopenharmony_ci	this_cpu_dec(*pbuf->sc->buffers_allocated);
71462306a36Sopenharmony_ci	preempt_enable();
71562306a36Sopenharmony_ci}
716