162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * This file is subject to the terms and conditions of the GNU General Public
362306a36Sopenharmony_ci * License.  See the file "COPYING" in the main directory of this archive
462306a36Sopenharmony_ci * for more details.
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * Unified implementation of memcpy, memmove and the __copy_user backend.
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org)
962306a36Sopenharmony_ci * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
1062306a36Sopenharmony_ci * Copyright (C) 2002 Broadcom, Inc.
1162306a36Sopenharmony_ci *   memcpy/copy_user author: Mark Vandevoorde
1262306a36Sopenharmony_ci *
1362306a36Sopenharmony_ci * Mnemonic names for arguments to memcpy/__copy_user
1462306a36Sopenharmony_ci */
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#include <linux/export.h>
1762306a36Sopenharmony_ci#include <asm/asm.h>
1862306a36Sopenharmony_ci#include <asm/asm-offsets.h>
1962306a36Sopenharmony_ci#include <asm/regdef.h>
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci#define dst a0
2262306a36Sopenharmony_ci#define src a1
2362306a36Sopenharmony_ci#define len a2
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci/*
2662306a36Sopenharmony_ci * Spec
2762306a36Sopenharmony_ci *
2862306a36Sopenharmony_ci * memcpy copies len bytes from src to dst and sets v0 to dst.
2962306a36Sopenharmony_ci * It assumes that
3062306a36Sopenharmony_ci *   - src and dst don't overlap
3162306a36Sopenharmony_ci *   - src is readable
3262306a36Sopenharmony_ci *   - dst is writable
3362306a36Sopenharmony_ci * memcpy uses the standard calling convention
3462306a36Sopenharmony_ci *
3562306a36Sopenharmony_ci * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
3662306a36Sopenharmony_ci * the number of uncopied bytes due to an exception caused by a read or write.
3762306a36Sopenharmony_ci * __copy_user assumes that src and dst don't overlap, and that the call is
3862306a36Sopenharmony_ci * implementing one of the following:
3962306a36Sopenharmony_ci *   copy_to_user
4062306a36Sopenharmony_ci *     - src is readable  (no exceptions when reading src)
4162306a36Sopenharmony_ci *   copy_from_user
4262306a36Sopenharmony_ci *     - dst is writable  (no exceptions when writing dst)
4362306a36Sopenharmony_ci * __copy_user uses a non-standard calling convention; see
4462306a36Sopenharmony_ci * arch/mips/include/asm/uaccess.h
4562306a36Sopenharmony_ci *
4662306a36Sopenharmony_ci * When an exception happens on a load, the handler must
4762306a36Sopenharmony_ci # ensure that all of the destination buffer is overwritten to prevent
4862306a36Sopenharmony_ci * leaking information to user mode programs.
4962306a36Sopenharmony_ci */
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci/*
5262306a36Sopenharmony_ci * Implementation
5362306a36Sopenharmony_ci */
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci/*
5662306a36Sopenharmony_ci * The exception handler for loads requires that:
5762306a36Sopenharmony_ci *  1- AT contain the address of the byte just past the end of the source
5862306a36Sopenharmony_ci *     of the copy,
5962306a36Sopenharmony_ci *  2- src_entry <= src < AT, and
6062306a36Sopenharmony_ci *  3- (dst - src) == (dst_entry - src_entry),
6162306a36Sopenharmony_ci * The _entry suffix denotes values when __copy_user was called.
6262306a36Sopenharmony_ci *
6362306a36Sopenharmony_ci * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user
6462306a36Sopenharmony_ci * (2) is met by incrementing src by the number of bytes copied
6562306a36Sopenharmony_ci * (3) is met by not doing loads between a pair of increments of dst and src
6662306a36Sopenharmony_ci *
6762306a36Sopenharmony_ci * The exception handlers for stores adjust len (if necessary) and return.
6862306a36Sopenharmony_ci * These handlers do not need to overwrite any data.
6962306a36Sopenharmony_ci *
7062306a36Sopenharmony_ci * For __rmemcpy and memmove an exception is always a kernel bug, therefore
7162306a36Sopenharmony_ci * they're not protected.
7262306a36Sopenharmony_ci */
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci#define EXC(inst_reg,addr,handler)		\
7562306a36Sopenharmony_ci9:	inst_reg, addr;				\
7662306a36Sopenharmony_ci	.section __ex_table,"a";		\
7762306a36Sopenharmony_ci	PTR_WD	9b, handler;			\
7862306a36Sopenharmony_ci	.previous
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci/*
8162306a36Sopenharmony_ci * Only on the 64-bit kernel we can made use of 64-bit registers.
8262306a36Sopenharmony_ci */
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci#define LOAD   ld
8562306a36Sopenharmony_ci#define LOADL  ldl
8662306a36Sopenharmony_ci#define LOADR  ldr
8762306a36Sopenharmony_ci#define STOREL sdl
8862306a36Sopenharmony_ci#define STORER sdr
8962306a36Sopenharmony_ci#define STORE  sd
9062306a36Sopenharmony_ci#define ADD    daddu
9162306a36Sopenharmony_ci#define SUB    dsubu
9262306a36Sopenharmony_ci#define SRL    dsrl
9362306a36Sopenharmony_ci#define SRA    dsra
9462306a36Sopenharmony_ci#define SLL    dsll
9562306a36Sopenharmony_ci#define SLLV   dsllv
9662306a36Sopenharmony_ci#define SRLV   dsrlv
9762306a36Sopenharmony_ci#define NBYTES 8
9862306a36Sopenharmony_ci#define LOG_NBYTES 3
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci/*
10162306a36Sopenharmony_ci * As we are sharing code base with the mips32 tree (which use the o32 ABI
10262306a36Sopenharmony_ci * register definitions). We need to redefine the register definitions from
10362306a36Sopenharmony_ci * the n64 ABI register naming to the o32 ABI register naming.
10462306a36Sopenharmony_ci */
10562306a36Sopenharmony_ci#undef t0
10662306a36Sopenharmony_ci#undef t1
10762306a36Sopenharmony_ci#undef t2
10862306a36Sopenharmony_ci#undef t3
10962306a36Sopenharmony_ci#define t0	$8
11062306a36Sopenharmony_ci#define t1	$9
11162306a36Sopenharmony_ci#define t2	$10
11262306a36Sopenharmony_ci#define t3	$11
11362306a36Sopenharmony_ci#define t4	$12
11462306a36Sopenharmony_ci#define t5	$13
11562306a36Sopenharmony_ci#define t6	$14
11662306a36Sopenharmony_ci#define t7	$15
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci#ifdef CONFIG_CPU_LITTLE_ENDIAN
11962306a36Sopenharmony_ci#define LDFIRST LOADR
12062306a36Sopenharmony_ci#define LDREST	LOADL
12162306a36Sopenharmony_ci#define STFIRST STORER
12262306a36Sopenharmony_ci#define STREST	STOREL
12362306a36Sopenharmony_ci#define SHIFT_DISCARD SLLV
12462306a36Sopenharmony_ci#else
12562306a36Sopenharmony_ci#define LDFIRST LOADL
12662306a36Sopenharmony_ci#define LDREST	LOADR
12762306a36Sopenharmony_ci#define STFIRST STOREL
12862306a36Sopenharmony_ci#define STREST	STORER
12962306a36Sopenharmony_ci#define SHIFT_DISCARD SRLV
13062306a36Sopenharmony_ci#endif
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci#define FIRST(unit) ((unit)*NBYTES)
13362306a36Sopenharmony_ci#define REST(unit)  (FIRST(unit)+NBYTES-1)
13462306a36Sopenharmony_ci#define UNIT(unit)  FIRST(unit)
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci#define ADDRMASK (NBYTES-1)
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	.text
13962306a36Sopenharmony_ci	.set	noreorder
14062306a36Sopenharmony_ci	.set	noat
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci/*
14362306a36Sopenharmony_ci * A combined memcpy/__copy_user
14462306a36Sopenharmony_ci * __copy_user sets len to 0 for success; else to an upper bound of
14562306a36Sopenharmony_ci * the number of uncopied bytes.
14662306a36Sopenharmony_ci * memcpy sets v0 to dst.
14762306a36Sopenharmony_ci */
14862306a36Sopenharmony_ci	.align	5
14962306a36Sopenharmony_ciLEAF(memcpy)					/* a0=dst a1=src a2=len */
15062306a36Sopenharmony_ciEXPORT_SYMBOL(memcpy)
15162306a36Sopenharmony_ci	move	v0, dst				/* return value */
15262306a36Sopenharmony_ci__memcpy:
15362306a36Sopenharmony_ciFEXPORT(__raw_copy_from_user)
15462306a36Sopenharmony_ciEXPORT_SYMBOL(__raw_copy_from_user)
15562306a36Sopenharmony_ciFEXPORT(__raw_copy_to_user)
15662306a36Sopenharmony_ciEXPORT_SYMBOL(__raw_copy_to_user)
15762306a36Sopenharmony_ci	/*
15862306a36Sopenharmony_ci	 * Note: dst & src may be unaligned, len may be 0
15962306a36Sopenharmony_ci	 * Temps
16062306a36Sopenharmony_ci	 */
16162306a36Sopenharmony_ci	#
16262306a36Sopenharmony_ci	# Octeon doesn't care if the destination is unaligned. The hardware
16362306a36Sopenharmony_ci	# can fix it faster than we can special case the assembly.
16462306a36Sopenharmony_ci	#
16562306a36Sopenharmony_ci	pref	0, 0(src)
16662306a36Sopenharmony_ci	sltu	t0, len, NBYTES		# Check if < 1 word
16762306a36Sopenharmony_ci	bnez	t0, copy_bytes_checklen
16862306a36Sopenharmony_ci	 and	t0, src, ADDRMASK	# Check if src unaligned
16962306a36Sopenharmony_ci	bnez	t0, src_unaligned
17062306a36Sopenharmony_ci	 sltu	t0, len, 4*NBYTES	# Check if < 4 words
17162306a36Sopenharmony_ci	bnez	t0, less_than_4units
17262306a36Sopenharmony_ci	 sltu	t0, len, 8*NBYTES	# Check if < 8 words
17362306a36Sopenharmony_ci	bnez	t0, less_than_8units
17462306a36Sopenharmony_ci	 sltu	t0, len, 16*NBYTES	# Check if < 16 words
17562306a36Sopenharmony_ci	bnez	t0, cleanup_both_aligned
17662306a36Sopenharmony_ci	 sltu	t0, len, 128+1		# Check if len < 129
17762306a36Sopenharmony_ci	bnez	t0, 1f			# Skip prefetch if len is too short
17862306a36Sopenharmony_ci	 sltu	t0, len, 256+1		# Check if len < 257
17962306a36Sopenharmony_ci	bnez	t0, 1f			# Skip prefetch if len is too short
18062306a36Sopenharmony_ci	 pref	0, 128(src)		# We must not prefetch invalid addresses
18162306a36Sopenharmony_ci	#
18262306a36Sopenharmony_ci	# This is where we loop if there is more than 128 bytes left
18362306a36Sopenharmony_ci2:	pref	0, 256(src)		# We must not prefetch invalid addresses
18462306a36Sopenharmony_ci	#
18562306a36Sopenharmony_ci	# This is where we loop if we can't prefetch anymore
18662306a36Sopenharmony_ci1:
18762306a36Sopenharmony_ciEXC(	LOAD	t0, UNIT(0)(src),	l_exc)
18862306a36Sopenharmony_ciEXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
18962306a36Sopenharmony_ciEXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
19062306a36Sopenharmony_ciEXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
19162306a36Sopenharmony_ci	SUB	len, len, 16*NBYTES
19262306a36Sopenharmony_ciEXC(	STORE	t0, UNIT(0)(dst),	s_exc_p16u)
19362306a36Sopenharmony_ciEXC(	STORE	t1, UNIT(1)(dst),	s_exc_p15u)
19462306a36Sopenharmony_ciEXC(	STORE	t2, UNIT(2)(dst),	s_exc_p14u)
19562306a36Sopenharmony_ciEXC(	STORE	t3, UNIT(3)(dst),	s_exc_p13u)
19662306a36Sopenharmony_ciEXC(	LOAD	t0, UNIT(4)(src),	l_exc_copy)
19762306a36Sopenharmony_ciEXC(	LOAD	t1, UNIT(5)(src),	l_exc_copy)
19862306a36Sopenharmony_ciEXC(	LOAD	t2, UNIT(6)(src),	l_exc_copy)
19962306a36Sopenharmony_ciEXC(	LOAD	t3, UNIT(7)(src),	l_exc_copy)
20062306a36Sopenharmony_ciEXC(	STORE	t0, UNIT(4)(dst),	s_exc_p12u)
20162306a36Sopenharmony_ciEXC(	STORE	t1, UNIT(5)(dst),	s_exc_p11u)
20262306a36Sopenharmony_ciEXC(	STORE	t2, UNIT(6)(dst),	s_exc_p10u)
20362306a36Sopenharmony_ci	ADD	src, src, 16*NBYTES
20462306a36Sopenharmony_ciEXC(	STORE	t3, UNIT(7)(dst),	s_exc_p9u)
20562306a36Sopenharmony_ci	ADD	dst, dst, 16*NBYTES
20662306a36Sopenharmony_ciEXC(	LOAD	t0, UNIT(-8)(src),	l_exc_copy_rewind16)
20762306a36Sopenharmony_ciEXC(	LOAD	t1, UNIT(-7)(src),	l_exc_copy_rewind16)
20862306a36Sopenharmony_ciEXC(	LOAD	t2, UNIT(-6)(src),	l_exc_copy_rewind16)
20962306a36Sopenharmony_ciEXC(	LOAD	t3, UNIT(-5)(src),	l_exc_copy_rewind16)
21062306a36Sopenharmony_ciEXC(	STORE	t0, UNIT(-8)(dst),	s_exc_p8u)
21162306a36Sopenharmony_ciEXC(	STORE	t1, UNIT(-7)(dst),	s_exc_p7u)
21262306a36Sopenharmony_ciEXC(	STORE	t2, UNIT(-6)(dst),	s_exc_p6u)
21362306a36Sopenharmony_ciEXC(	STORE	t3, UNIT(-5)(dst),	s_exc_p5u)
21462306a36Sopenharmony_ciEXC(	LOAD	t0, UNIT(-4)(src),	l_exc_copy_rewind16)
21562306a36Sopenharmony_ciEXC(	LOAD	t1, UNIT(-3)(src),	l_exc_copy_rewind16)
21662306a36Sopenharmony_ciEXC(	LOAD	t2, UNIT(-2)(src),	l_exc_copy_rewind16)
21762306a36Sopenharmony_ciEXC(	LOAD	t3, UNIT(-1)(src),	l_exc_copy_rewind16)
21862306a36Sopenharmony_ciEXC(	STORE	t0, UNIT(-4)(dst),	s_exc_p4u)
21962306a36Sopenharmony_ciEXC(	STORE	t1, UNIT(-3)(dst),	s_exc_p3u)
22062306a36Sopenharmony_ciEXC(	STORE	t2, UNIT(-2)(dst),	s_exc_p2u)
22162306a36Sopenharmony_ciEXC(	STORE	t3, UNIT(-1)(dst),	s_exc_p1u)
22262306a36Sopenharmony_ci	sltu	t0, len, 256+1		# See if we can prefetch more
22362306a36Sopenharmony_ci	beqz	t0, 2b
22462306a36Sopenharmony_ci	 sltu	t0, len, 128		# See if we can loop more time
22562306a36Sopenharmony_ci	beqz	t0, 1b
22662306a36Sopenharmony_ci	 nop
22762306a36Sopenharmony_ci	#
22862306a36Sopenharmony_ci	# Jump here if there are less than 16*NBYTES left.
22962306a36Sopenharmony_ci	#
23062306a36Sopenharmony_cicleanup_both_aligned:
23162306a36Sopenharmony_ci	beqz	len, done
23262306a36Sopenharmony_ci	 sltu	t0, len, 8*NBYTES
23362306a36Sopenharmony_ci	bnez	t0, less_than_8units
23462306a36Sopenharmony_ci	 nop
23562306a36Sopenharmony_ciEXC(	LOAD	t0, UNIT(0)(src),	l_exc)
23662306a36Sopenharmony_ciEXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
23762306a36Sopenharmony_ciEXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
23862306a36Sopenharmony_ciEXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
23962306a36Sopenharmony_ci	SUB	len, len, 8*NBYTES
24062306a36Sopenharmony_ciEXC(	STORE	t0, UNIT(0)(dst),	s_exc_p8u)
24162306a36Sopenharmony_ciEXC(	STORE	t1, UNIT(1)(dst),	s_exc_p7u)
24262306a36Sopenharmony_ciEXC(	STORE	t2, UNIT(2)(dst),	s_exc_p6u)
24362306a36Sopenharmony_ciEXC(	STORE	t3, UNIT(3)(dst),	s_exc_p5u)
24462306a36Sopenharmony_ciEXC(	LOAD	t0, UNIT(4)(src),	l_exc_copy)
24562306a36Sopenharmony_ciEXC(	LOAD	t1, UNIT(5)(src),	l_exc_copy)
24662306a36Sopenharmony_ciEXC(	LOAD	t2, UNIT(6)(src),	l_exc_copy)
24762306a36Sopenharmony_ciEXC(	LOAD	t3, UNIT(7)(src),	l_exc_copy)
24862306a36Sopenharmony_ciEXC(	STORE	t0, UNIT(4)(dst),	s_exc_p4u)
24962306a36Sopenharmony_ciEXC(	STORE	t1, UNIT(5)(dst),	s_exc_p3u)
25062306a36Sopenharmony_ciEXC(	STORE	t2, UNIT(6)(dst),	s_exc_p2u)
25162306a36Sopenharmony_ciEXC(	STORE	t3, UNIT(7)(dst),	s_exc_p1u)
25262306a36Sopenharmony_ci	ADD	src, src, 8*NBYTES
25362306a36Sopenharmony_ci	beqz	len, done
25462306a36Sopenharmony_ci	 ADD	dst, dst, 8*NBYTES
25562306a36Sopenharmony_ci	#
25662306a36Sopenharmony_ci	# Jump here if there are less than 8*NBYTES left.
25762306a36Sopenharmony_ci	#
25862306a36Sopenharmony_ciless_than_8units:
25962306a36Sopenharmony_ci	sltu	t0, len, 4*NBYTES
26062306a36Sopenharmony_ci	bnez	t0, less_than_4units
26162306a36Sopenharmony_ci	 nop
26262306a36Sopenharmony_ciEXC(	LOAD	t0, UNIT(0)(src),	l_exc)
26362306a36Sopenharmony_ciEXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
26462306a36Sopenharmony_ciEXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
26562306a36Sopenharmony_ciEXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
26662306a36Sopenharmony_ci	SUB	len, len, 4*NBYTES
26762306a36Sopenharmony_ciEXC(	STORE	t0, UNIT(0)(dst),	s_exc_p4u)
26862306a36Sopenharmony_ciEXC(	STORE	t1, UNIT(1)(dst),	s_exc_p3u)
26962306a36Sopenharmony_ciEXC(	STORE	t2, UNIT(2)(dst),	s_exc_p2u)
27062306a36Sopenharmony_ciEXC(	STORE	t3, UNIT(3)(dst),	s_exc_p1u)
27162306a36Sopenharmony_ci	ADD	src, src, 4*NBYTES
27262306a36Sopenharmony_ci	beqz	len, done
27362306a36Sopenharmony_ci	 ADD	dst, dst, 4*NBYTES
27462306a36Sopenharmony_ci	#
27562306a36Sopenharmony_ci	# Jump here if there are less than 4*NBYTES left. This means
27662306a36Sopenharmony_ci	# we may need to copy up to 3 NBYTES words.
27762306a36Sopenharmony_ci	#
27862306a36Sopenharmony_ciless_than_4units:
27962306a36Sopenharmony_ci	sltu	t0, len, 1*NBYTES
28062306a36Sopenharmony_ci	bnez	t0, copy_bytes_checklen
28162306a36Sopenharmony_ci	 nop
28262306a36Sopenharmony_ci	#
28362306a36Sopenharmony_ci	# 1) Copy NBYTES, then check length again
28462306a36Sopenharmony_ci	#
28562306a36Sopenharmony_ciEXC(	LOAD	t0, 0(src),		l_exc)
28662306a36Sopenharmony_ci	SUB	len, len, NBYTES
28762306a36Sopenharmony_ci	sltu	t1, len, 8
28862306a36Sopenharmony_ciEXC(	STORE	t0, 0(dst),		s_exc_p1u)
28962306a36Sopenharmony_ci	ADD	src, src, NBYTES
29062306a36Sopenharmony_ci	bnez	t1, copy_bytes_checklen
29162306a36Sopenharmony_ci	 ADD	dst, dst, NBYTES
29262306a36Sopenharmony_ci	#
29362306a36Sopenharmony_ci	# 2) Copy NBYTES, then check length again
29462306a36Sopenharmony_ci	#
29562306a36Sopenharmony_ciEXC(	LOAD	t0, 0(src),		l_exc)
29662306a36Sopenharmony_ci	SUB	len, len, NBYTES
29762306a36Sopenharmony_ci	sltu	t1, len, 8
29862306a36Sopenharmony_ciEXC(	STORE	t0, 0(dst),		s_exc_p1u)
29962306a36Sopenharmony_ci	ADD	src, src, NBYTES
30062306a36Sopenharmony_ci	bnez	t1, copy_bytes_checklen
30162306a36Sopenharmony_ci	 ADD	dst, dst, NBYTES
30262306a36Sopenharmony_ci	#
30362306a36Sopenharmony_ci	# 3) Copy NBYTES, then check length again
30462306a36Sopenharmony_ci	#
30562306a36Sopenharmony_ciEXC(	LOAD	t0, 0(src),		l_exc)
30662306a36Sopenharmony_ci	SUB	len, len, NBYTES
30762306a36Sopenharmony_ci	ADD	src, src, NBYTES
30862306a36Sopenharmony_ci	ADD	dst, dst, NBYTES
30962306a36Sopenharmony_ci	b copy_bytes_checklen
31062306a36Sopenharmony_ciEXC(	 STORE	t0, -8(dst),		s_exc_p1u)
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_cisrc_unaligned:
31362306a36Sopenharmony_ci#define rem t8
31462306a36Sopenharmony_ci	SRL	t0, len, LOG_NBYTES+2	 # +2 for 4 units/iter
31562306a36Sopenharmony_ci	beqz	t0, cleanup_src_unaligned
31662306a36Sopenharmony_ci	 and	rem, len, (4*NBYTES-1)	 # rem = len % 4*NBYTES
31762306a36Sopenharmony_ci1:
31862306a36Sopenharmony_ci/*
31962306a36Sopenharmony_ci * Avoid consecutive LD*'s to the same register since some mips
32062306a36Sopenharmony_ci * implementations can't issue them in the same cycle.
32162306a36Sopenharmony_ci * It's OK to load FIRST(N+1) before REST(N) because the two addresses
32262306a36Sopenharmony_ci * are to the same unit (unless src is aligned, but it's not).
32362306a36Sopenharmony_ci */
32462306a36Sopenharmony_ciEXC(	LDFIRST t0, FIRST(0)(src),	l_exc)
32562306a36Sopenharmony_ciEXC(	LDFIRST t1, FIRST(1)(src),	l_exc_copy)
32662306a36Sopenharmony_ci	SUB	len, len, 4*NBYTES
32762306a36Sopenharmony_ciEXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
32862306a36Sopenharmony_ciEXC(	LDREST	t1, REST(1)(src),	l_exc_copy)
32962306a36Sopenharmony_ciEXC(	LDFIRST t2, FIRST(2)(src),	l_exc_copy)
33062306a36Sopenharmony_ciEXC(	LDFIRST t3, FIRST(3)(src),	l_exc_copy)
33162306a36Sopenharmony_ciEXC(	LDREST	t2, REST(2)(src),	l_exc_copy)
33262306a36Sopenharmony_ciEXC(	LDREST	t3, REST(3)(src),	l_exc_copy)
33362306a36Sopenharmony_ci	ADD	src, src, 4*NBYTES
33462306a36Sopenharmony_ciEXC(	STORE	t0, UNIT(0)(dst),	s_exc_p4u)
33562306a36Sopenharmony_ciEXC(	STORE	t1, UNIT(1)(dst),	s_exc_p3u)
33662306a36Sopenharmony_ciEXC(	STORE	t2, UNIT(2)(dst),	s_exc_p2u)
33762306a36Sopenharmony_ciEXC(	STORE	t3, UNIT(3)(dst),	s_exc_p1u)
33862306a36Sopenharmony_ci	bne	len, rem, 1b
33962306a36Sopenharmony_ci	 ADD	dst, dst, 4*NBYTES
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_cicleanup_src_unaligned:
34262306a36Sopenharmony_ci	beqz	len, done
34362306a36Sopenharmony_ci	 and	rem, len, NBYTES-1  # rem = len % NBYTES
34462306a36Sopenharmony_ci	beq	rem, len, copy_bytes
34562306a36Sopenharmony_ci	 nop
34662306a36Sopenharmony_ci1:
34762306a36Sopenharmony_ciEXC(	LDFIRST t0, FIRST(0)(src),	l_exc)
34862306a36Sopenharmony_ciEXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
34962306a36Sopenharmony_ci	SUB	len, len, NBYTES
35062306a36Sopenharmony_ciEXC(	STORE	t0, 0(dst),		s_exc_p1u)
35162306a36Sopenharmony_ci	ADD	src, src, NBYTES
35262306a36Sopenharmony_ci	bne	len, rem, 1b
35362306a36Sopenharmony_ci	 ADD	dst, dst, NBYTES
35462306a36Sopenharmony_ci
35562306a36Sopenharmony_cicopy_bytes_checklen:
35662306a36Sopenharmony_ci	beqz	len, done
35762306a36Sopenharmony_ci	 nop
35862306a36Sopenharmony_cicopy_bytes:
35962306a36Sopenharmony_ci	/* 0 < len < NBYTES  */
36062306a36Sopenharmony_ci#define COPY_BYTE(N)			\
36162306a36Sopenharmony_ciEXC(	lb	t0, N(src), l_exc);	\
36262306a36Sopenharmony_ci	SUB	len, len, 1;		\
36362306a36Sopenharmony_ci	beqz	len, done;		\
36462306a36Sopenharmony_ciEXC(	 sb	t0, N(dst), s_exc_p1)
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci	COPY_BYTE(0)
36762306a36Sopenharmony_ci	COPY_BYTE(1)
36862306a36Sopenharmony_ci	COPY_BYTE(2)
36962306a36Sopenharmony_ci	COPY_BYTE(3)
37062306a36Sopenharmony_ci	COPY_BYTE(4)
37162306a36Sopenharmony_ci	COPY_BYTE(5)
37262306a36Sopenharmony_ciEXC(	lb	t0, NBYTES-2(src), l_exc)
37362306a36Sopenharmony_ci	SUB	len, len, 1
37462306a36Sopenharmony_ci	jr	ra
37562306a36Sopenharmony_ciEXC(	 sb	t0, NBYTES-2(dst), s_exc_p1)
37662306a36Sopenharmony_cidone:
37762306a36Sopenharmony_ci	jr	ra
37862306a36Sopenharmony_ci	 nop
37962306a36Sopenharmony_ci	END(memcpy)
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_cil_exc_copy_rewind16:
38262306a36Sopenharmony_ci	/* Rewind src and dst by 16*NBYTES for l_exc_copy */
38362306a36Sopenharmony_ci	SUB	src, src, 16*NBYTES
38462306a36Sopenharmony_ci	SUB	dst, dst, 16*NBYTES
38562306a36Sopenharmony_cil_exc_copy:
38662306a36Sopenharmony_ci	/*
38762306a36Sopenharmony_ci	 * Copy bytes from src until faulting load address (or until a
38862306a36Sopenharmony_ci	 * lb faults)
38962306a36Sopenharmony_ci	 *
39062306a36Sopenharmony_ci	 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
39162306a36Sopenharmony_ci	 * may be more than a byte beyond the last address.
39262306a36Sopenharmony_ci	 * Hence, the lb below may get an exception.
39362306a36Sopenharmony_ci	 *
39462306a36Sopenharmony_ci	 * Assumes src < THREAD_BUADDR($28)
39562306a36Sopenharmony_ci	 */
39662306a36Sopenharmony_ci	LOAD	t0, TI_TASK($28)
39762306a36Sopenharmony_ci	LOAD	t0, THREAD_BUADDR(t0)
39862306a36Sopenharmony_ci1:
39962306a36Sopenharmony_ciEXC(	lb	t1, 0(src),	l_exc)
40062306a36Sopenharmony_ci	ADD	src, src, 1
40162306a36Sopenharmony_ci	sb	t1, 0(dst)	# can't fault -- we're copy_from_user
40262306a36Sopenharmony_ci	bne	src, t0, 1b
40362306a36Sopenharmony_ci	 ADD	dst, dst, 1
40462306a36Sopenharmony_cil_exc:
40562306a36Sopenharmony_ci	LOAD	t0, TI_TASK($28)
40662306a36Sopenharmony_ci	LOAD	t0, THREAD_BUADDR(t0)	# t0 is just past last good address
40762306a36Sopenharmony_ci	SUB	len, AT, t0		# len number of uncopied bytes
40862306a36Sopenharmony_ci	jr	ra
40962306a36Sopenharmony_ci	 nop
41062306a36Sopenharmony_ci
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci#define SEXC(n)				\
41362306a36Sopenharmony_cis_exc_p ## n ## u:			\
41462306a36Sopenharmony_ci	jr	ra;			\
41562306a36Sopenharmony_ci	 ADD	len, len, n*NBYTES
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ciSEXC(16)
41862306a36Sopenharmony_ciSEXC(15)
41962306a36Sopenharmony_ciSEXC(14)
42062306a36Sopenharmony_ciSEXC(13)
42162306a36Sopenharmony_ciSEXC(12)
42262306a36Sopenharmony_ciSEXC(11)
42362306a36Sopenharmony_ciSEXC(10)
42462306a36Sopenharmony_ciSEXC(9)
42562306a36Sopenharmony_ciSEXC(8)
42662306a36Sopenharmony_ciSEXC(7)
42762306a36Sopenharmony_ciSEXC(6)
42862306a36Sopenharmony_ciSEXC(5)
42962306a36Sopenharmony_ciSEXC(4)
43062306a36Sopenharmony_ciSEXC(3)
43162306a36Sopenharmony_ciSEXC(2)
43262306a36Sopenharmony_ciSEXC(1)
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_cis_exc_p1:
43562306a36Sopenharmony_ci	jr	ra
43662306a36Sopenharmony_ci	 ADD	len, len, 1
43762306a36Sopenharmony_cis_exc:
43862306a36Sopenharmony_ci	jr	ra
43962306a36Sopenharmony_ci	 nop
44062306a36Sopenharmony_ci
44162306a36Sopenharmony_ci	.align	5
44262306a36Sopenharmony_ciLEAF(memmove)
44362306a36Sopenharmony_ciEXPORT_SYMBOL(memmove)
44462306a36Sopenharmony_ci	ADD	t0, a0, a2
44562306a36Sopenharmony_ci	ADD	t1, a1, a2
44662306a36Sopenharmony_ci	sltu	t0, a1, t0			# dst + len <= src -> memcpy
44762306a36Sopenharmony_ci	sltu	t1, a0, t1			# dst >= src + len -> memcpy
44862306a36Sopenharmony_ci	and	t0, t1
44962306a36Sopenharmony_ci	beqz	t0, __memcpy
45062306a36Sopenharmony_ci	 move	v0, a0				/* return value */
45162306a36Sopenharmony_ci	beqz	a2, r_out
45262306a36Sopenharmony_ci	END(memmove)
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_ci	/* fall through to __rmemcpy */
45562306a36Sopenharmony_ciLEAF(__rmemcpy)					/* a0=dst a1=src a2=len */
45662306a36Sopenharmony_ci	 sltu	t0, a1, a0
45762306a36Sopenharmony_ci	beqz	t0, r_end_bytes_up		# src >= dst
45862306a36Sopenharmony_ci	 nop
45962306a36Sopenharmony_ci	ADD	a0, a2				# dst = dst + len
46062306a36Sopenharmony_ci	ADD	a1, a2				# src = src + len
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_cir_end_bytes:
46362306a36Sopenharmony_ci	lb	t0, -1(a1)
46462306a36Sopenharmony_ci	SUB	a2, a2, 0x1
46562306a36Sopenharmony_ci	sb	t0, -1(a0)
46662306a36Sopenharmony_ci	SUB	a1, a1, 0x1
46762306a36Sopenharmony_ci	bnez	a2, r_end_bytes
46862306a36Sopenharmony_ci	 SUB	a0, a0, 0x1
46962306a36Sopenharmony_ci
47062306a36Sopenharmony_cir_out:
47162306a36Sopenharmony_ci	jr	ra
47262306a36Sopenharmony_ci	 move	a2, zero
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_cir_end_bytes_up:
47562306a36Sopenharmony_ci	lb	t0, (a1)
47662306a36Sopenharmony_ci	SUB	a2, a2, 0x1
47762306a36Sopenharmony_ci	sb	t0, (a0)
47862306a36Sopenharmony_ci	ADD	a1, a1, 0x1
47962306a36Sopenharmony_ci	bnez	a2, r_end_bytes_up
48062306a36Sopenharmony_ci	 ADD	a0, a0, 0x1
48162306a36Sopenharmony_ci
48262306a36Sopenharmony_ci	jr	ra
48362306a36Sopenharmony_ci	 move	a2, zero
48462306a36Sopenharmony_ci	END(__rmemcpy)
485