18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * This file is subject to the terms and conditions of the GNU General Public
38c2ecf20Sopenharmony_ci * License.  See the file "COPYING" in the main directory of this archive
48c2ecf20Sopenharmony_ci * for more details.
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Unified implementation of memcpy, memmove and the __copy_user backend.
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org)
98c2ecf20Sopenharmony_ci * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
108c2ecf20Sopenharmony_ci * Copyright (C) 2002 Broadcom, Inc.
118c2ecf20Sopenharmony_ci *   memcpy/copy_user author: Mark Vandevoorde
128c2ecf20Sopenharmony_ci *
138c2ecf20Sopenharmony_ci * Mnemonic names for arguments to memcpy/__copy_user
148c2ecf20Sopenharmony_ci */
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci#include <asm/asm.h>
178c2ecf20Sopenharmony_ci#include <asm/asm-offsets.h>
188c2ecf20Sopenharmony_ci#include <asm/export.h>
198c2ecf20Sopenharmony_ci#include <asm/regdef.h>
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci#define dst a0
228c2ecf20Sopenharmony_ci#define src a1
238c2ecf20Sopenharmony_ci#define len a2
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci/*
268c2ecf20Sopenharmony_ci * Spec
278c2ecf20Sopenharmony_ci *
288c2ecf20Sopenharmony_ci * memcpy copies len bytes from src to dst and sets v0 to dst.
298c2ecf20Sopenharmony_ci * It assumes that
308c2ecf20Sopenharmony_ci *   - src and dst don't overlap
318c2ecf20Sopenharmony_ci *   - src is readable
328c2ecf20Sopenharmony_ci *   - dst is writable
338c2ecf20Sopenharmony_ci * memcpy uses the standard calling convention
348c2ecf20Sopenharmony_ci *
358c2ecf20Sopenharmony_ci * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
368c2ecf20Sopenharmony_ci * the number of uncopied bytes due to an exception caused by a read or write.
378c2ecf20Sopenharmony_ci * __copy_user assumes that src and dst don't overlap, and that the call is
388c2ecf20Sopenharmony_ci * implementing one of the following:
398c2ecf20Sopenharmony_ci *   copy_to_user
408c2ecf20Sopenharmony_ci *     - src is readable  (no exceptions when reading src)
418c2ecf20Sopenharmony_ci *   copy_from_user
428c2ecf20Sopenharmony_ci *     - dst is writable  (no exceptions when writing dst)
438c2ecf20Sopenharmony_ci * __copy_user uses a non-standard calling convention; see
448c2ecf20Sopenharmony_ci * arch/mips/include/asm/uaccess.h
458c2ecf20Sopenharmony_ci *
468c2ecf20Sopenharmony_ci * When an exception happens on a load, the handler must
478c2ecf20Sopenharmony_ci # ensure that all of the destination buffer is overwritten to prevent
488c2ecf20Sopenharmony_ci * leaking information to user mode programs.
498c2ecf20Sopenharmony_ci */
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci/*
528c2ecf20Sopenharmony_ci * Implementation
538c2ecf20Sopenharmony_ci */
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci/*
568c2ecf20Sopenharmony_ci * The exception handler for loads requires that:
578c2ecf20Sopenharmony_ci *  1- AT contain the address of the byte just past the end of the source
588c2ecf20Sopenharmony_ci *     of the copy,
598c2ecf20Sopenharmony_ci *  2- src_entry <= src < AT, and
608c2ecf20Sopenharmony_ci *  3- (dst - src) == (dst_entry - src_entry),
618c2ecf20Sopenharmony_ci * The _entry suffix denotes values when __copy_user was called.
628c2ecf20Sopenharmony_ci *
638c2ecf20Sopenharmony_ci * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user
648c2ecf20Sopenharmony_ci * (2) is met by incrementing src by the number of bytes copied
658c2ecf20Sopenharmony_ci * (3) is met by not doing loads between a pair of increments of dst and src
668c2ecf20Sopenharmony_ci *
678c2ecf20Sopenharmony_ci * The exception handlers for stores adjust len (if necessary) and return.
688c2ecf20Sopenharmony_ci * These handlers do not need to overwrite any data.
698c2ecf20Sopenharmony_ci *
708c2ecf20Sopenharmony_ci * For __rmemcpy and memmove an exception is always a kernel bug, therefore
718c2ecf20Sopenharmony_ci * they're not protected.
728c2ecf20Sopenharmony_ci */
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci#define EXC(inst_reg,addr,handler)		\
758c2ecf20Sopenharmony_ci9:	inst_reg, addr;				\
768c2ecf20Sopenharmony_ci	.section __ex_table,"a";		\
778c2ecf20Sopenharmony_ci	PTR	9b, handler;			\
788c2ecf20Sopenharmony_ci	.previous
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci/*
818c2ecf20Sopenharmony_ci * Only on the 64-bit kernel we can made use of 64-bit registers.
828c2ecf20Sopenharmony_ci */
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci#define LOAD   ld
858c2ecf20Sopenharmony_ci#define LOADL  ldl
868c2ecf20Sopenharmony_ci#define LOADR  ldr
878c2ecf20Sopenharmony_ci#define STOREL sdl
888c2ecf20Sopenharmony_ci#define STORER sdr
898c2ecf20Sopenharmony_ci#define STORE  sd
908c2ecf20Sopenharmony_ci#define ADD    daddu
918c2ecf20Sopenharmony_ci#define SUB    dsubu
928c2ecf20Sopenharmony_ci#define SRL    dsrl
938c2ecf20Sopenharmony_ci#define SRA    dsra
948c2ecf20Sopenharmony_ci#define SLL    dsll
958c2ecf20Sopenharmony_ci#define SLLV   dsllv
968c2ecf20Sopenharmony_ci#define SRLV   dsrlv
978c2ecf20Sopenharmony_ci#define NBYTES 8
988c2ecf20Sopenharmony_ci#define LOG_NBYTES 3
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci/*
1018c2ecf20Sopenharmony_ci * As we are sharing code base with the mips32 tree (which use the o32 ABI
1028c2ecf20Sopenharmony_ci * register definitions). We need to redefine the register definitions from
1038c2ecf20Sopenharmony_ci * the n64 ABI register naming to the o32 ABI register naming.
1048c2ecf20Sopenharmony_ci */
1058c2ecf20Sopenharmony_ci#undef t0
1068c2ecf20Sopenharmony_ci#undef t1
1078c2ecf20Sopenharmony_ci#undef t2
1088c2ecf20Sopenharmony_ci#undef t3
1098c2ecf20Sopenharmony_ci#define t0	$8
1108c2ecf20Sopenharmony_ci#define t1	$9
1118c2ecf20Sopenharmony_ci#define t2	$10
1128c2ecf20Sopenharmony_ci#define t3	$11
1138c2ecf20Sopenharmony_ci#define t4	$12
1148c2ecf20Sopenharmony_ci#define t5	$13
1158c2ecf20Sopenharmony_ci#define t6	$14
1168c2ecf20Sopenharmony_ci#define t7	$15
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_LITTLE_ENDIAN
1198c2ecf20Sopenharmony_ci#define LDFIRST LOADR
1208c2ecf20Sopenharmony_ci#define LDREST	LOADL
1218c2ecf20Sopenharmony_ci#define STFIRST STORER
1228c2ecf20Sopenharmony_ci#define STREST	STOREL
1238c2ecf20Sopenharmony_ci#define SHIFT_DISCARD SLLV
1248c2ecf20Sopenharmony_ci#else
1258c2ecf20Sopenharmony_ci#define LDFIRST LOADL
1268c2ecf20Sopenharmony_ci#define LDREST	LOADR
1278c2ecf20Sopenharmony_ci#define STFIRST STOREL
1288c2ecf20Sopenharmony_ci#define STREST	STORER
1298c2ecf20Sopenharmony_ci#define SHIFT_DISCARD SRLV
1308c2ecf20Sopenharmony_ci#endif
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci#define FIRST(unit) ((unit)*NBYTES)
1338c2ecf20Sopenharmony_ci#define REST(unit)  (FIRST(unit)+NBYTES-1)
1348c2ecf20Sopenharmony_ci#define UNIT(unit)  FIRST(unit)
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci#define ADDRMASK (NBYTES-1)
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci	.text
1398c2ecf20Sopenharmony_ci	.set	noreorder
1408c2ecf20Sopenharmony_ci	.set	noat
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_ci/*
1438c2ecf20Sopenharmony_ci * A combined memcpy/__copy_user
1448c2ecf20Sopenharmony_ci * __copy_user sets len to 0 for success; else to an upper bound of
1458c2ecf20Sopenharmony_ci * the number of uncopied bytes.
1468c2ecf20Sopenharmony_ci * memcpy sets v0 to dst.
1478c2ecf20Sopenharmony_ci */
1488c2ecf20Sopenharmony_ci	.align	5
1498c2ecf20Sopenharmony_ciLEAF(memcpy)					/* a0=dst a1=src a2=len */
1508c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memcpy)
1518c2ecf20Sopenharmony_ci	move	v0, dst				/* return value */
1528c2ecf20Sopenharmony_ci__memcpy:
1538c2ecf20Sopenharmony_ciFEXPORT(__copy_user)
1548c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__copy_user)
1558c2ecf20Sopenharmony_ci	/*
1568c2ecf20Sopenharmony_ci	 * Note: dst & src may be unaligned, len may be 0
1578c2ecf20Sopenharmony_ci	 * Temps
1588c2ecf20Sopenharmony_ci	 */
1598c2ecf20Sopenharmony_ci	#
1608c2ecf20Sopenharmony_ci	# Octeon doesn't care if the destination is unaligned. The hardware
1618c2ecf20Sopenharmony_ci	# can fix it faster than we can special case the assembly.
1628c2ecf20Sopenharmony_ci	#
1638c2ecf20Sopenharmony_ci	pref	0, 0(src)
1648c2ecf20Sopenharmony_ci	sltu	t0, len, NBYTES		# Check if < 1 word
1658c2ecf20Sopenharmony_ci	bnez	t0, copy_bytes_checklen
1668c2ecf20Sopenharmony_ci	 and	t0, src, ADDRMASK	# Check if src unaligned
1678c2ecf20Sopenharmony_ci	bnez	t0, src_unaligned
1688c2ecf20Sopenharmony_ci	 sltu	t0, len, 4*NBYTES	# Check if < 4 words
1698c2ecf20Sopenharmony_ci	bnez	t0, less_than_4units
1708c2ecf20Sopenharmony_ci	 sltu	t0, len, 8*NBYTES	# Check if < 8 words
1718c2ecf20Sopenharmony_ci	bnez	t0, less_than_8units
1728c2ecf20Sopenharmony_ci	 sltu	t0, len, 16*NBYTES	# Check if < 16 words
1738c2ecf20Sopenharmony_ci	bnez	t0, cleanup_both_aligned
1748c2ecf20Sopenharmony_ci	 sltu	t0, len, 128+1		# Check if len < 129
1758c2ecf20Sopenharmony_ci	bnez	t0, 1f			# Skip prefetch if len is too short
1768c2ecf20Sopenharmony_ci	 sltu	t0, len, 256+1		# Check if len < 257
1778c2ecf20Sopenharmony_ci	bnez	t0, 1f			# Skip prefetch if len is too short
1788c2ecf20Sopenharmony_ci	 pref	0, 128(src)		# We must not prefetch invalid addresses
1798c2ecf20Sopenharmony_ci	#
1808c2ecf20Sopenharmony_ci	# This is where we loop if there is more than 128 bytes left
1818c2ecf20Sopenharmony_ci2:	pref	0, 256(src)		# We must not prefetch invalid addresses
1828c2ecf20Sopenharmony_ci	#
1838c2ecf20Sopenharmony_ci	# This is where we loop if we can't prefetch anymore
1848c2ecf20Sopenharmony_ci1:
1858c2ecf20Sopenharmony_ciEXC(	LOAD	t0, UNIT(0)(src),	l_exc)
1868c2ecf20Sopenharmony_ciEXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
1878c2ecf20Sopenharmony_ciEXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
1888c2ecf20Sopenharmony_ciEXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
1898c2ecf20Sopenharmony_ci	SUB	len, len, 16*NBYTES
1908c2ecf20Sopenharmony_ciEXC(	STORE	t0, UNIT(0)(dst),	s_exc_p16u)
1918c2ecf20Sopenharmony_ciEXC(	STORE	t1, UNIT(1)(dst),	s_exc_p15u)
1928c2ecf20Sopenharmony_ciEXC(	STORE	t2, UNIT(2)(dst),	s_exc_p14u)
1938c2ecf20Sopenharmony_ciEXC(	STORE	t3, UNIT(3)(dst),	s_exc_p13u)
1948c2ecf20Sopenharmony_ciEXC(	LOAD	t0, UNIT(4)(src),	l_exc_copy)
1958c2ecf20Sopenharmony_ciEXC(	LOAD	t1, UNIT(5)(src),	l_exc_copy)
1968c2ecf20Sopenharmony_ciEXC(	LOAD	t2, UNIT(6)(src),	l_exc_copy)
1978c2ecf20Sopenharmony_ciEXC(	LOAD	t3, UNIT(7)(src),	l_exc_copy)
1988c2ecf20Sopenharmony_ciEXC(	STORE	t0, UNIT(4)(dst),	s_exc_p12u)
1998c2ecf20Sopenharmony_ciEXC(	STORE	t1, UNIT(5)(dst),	s_exc_p11u)
2008c2ecf20Sopenharmony_ciEXC(	STORE	t2, UNIT(6)(dst),	s_exc_p10u)
2018c2ecf20Sopenharmony_ci	ADD	src, src, 16*NBYTES
2028c2ecf20Sopenharmony_ciEXC(	STORE	t3, UNIT(7)(dst),	s_exc_p9u)
2038c2ecf20Sopenharmony_ci	ADD	dst, dst, 16*NBYTES
2048c2ecf20Sopenharmony_ciEXC(	LOAD	t0, UNIT(-8)(src),	l_exc_copy_rewind16)
2058c2ecf20Sopenharmony_ciEXC(	LOAD	t1, UNIT(-7)(src),	l_exc_copy_rewind16)
2068c2ecf20Sopenharmony_ciEXC(	LOAD	t2, UNIT(-6)(src),	l_exc_copy_rewind16)
2078c2ecf20Sopenharmony_ciEXC(	LOAD	t3, UNIT(-5)(src),	l_exc_copy_rewind16)
2088c2ecf20Sopenharmony_ciEXC(	STORE	t0, UNIT(-8)(dst),	s_exc_p8u)
2098c2ecf20Sopenharmony_ciEXC(	STORE	t1, UNIT(-7)(dst),	s_exc_p7u)
2108c2ecf20Sopenharmony_ciEXC(	STORE	t2, UNIT(-6)(dst),	s_exc_p6u)
2118c2ecf20Sopenharmony_ciEXC(	STORE	t3, UNIT(-5)(dst),	s_exc_p5u)
2128c2ecf20Sopenharmony_ciEXC(	LOAD	t0, UNIT(-4)(src),	l_exc_copy_rewind16)
2138c2ecf20Sopenharmony_ciEXC(	LOAD	t1, UNIT(-3)(src),	l_exc_copy_rewind16)
2148c2ecf20Sopenharmony_ciEXC(	LOAD	t2, UNIT(-2)(src),	l_exc_copy_rewind16)
2158c2ecf20Sopenharmony_ciEXC(	LOAD	t3, UNIT(-1)(src),	l_exc_copy_rewind16)
2168c2ecf20Sopenharmony_ciEXC(	STORE	t0, UNIT(-4)(dst),	s_exc_p4u)
2178c2ecf20Sopenharmony_ciEXC(	STORE	t1, UNIT(-3)(dst),	s_exc_p3u)
2188c2ecf20Sopenharmony_ciEXC(	STORE	t2, UNIT(-2)(dst),	s_exc_p2u)
2198c2ecf20Sopenharmony_ciEXC(	STORE	t3, UNIT(-1)(dst),	s_exc_p1u)
2208c2ecf20Sopenharmony_ci	sltu	t0, len, 256+1		# See if we can prefetch more
2218c2ecf20Sopenharmony_ci	beqz	t0, 2b
2228c2ecf20Sopenharmony_ci	 sltu	t0, len, 128		# See if we can loop more time
2238c2ecf20Sopenharmony_ci	beqz	t0, 1b
2248c2ecf20Sopenharmony_ci	 nop
2258c2ecf20Sopenharmony_ci	#
2268c2ecf20Sopenharmony_ci	# Jump here if there are less than 16*NBYTES left.
2278c2ecf20Sopenharmony_ci	#
2288c2ecf20Sopenharmony_cicleanup_both_aligned:
2298c2ecf20Sopenharmony_ci	beqz	len, done
2308c2ecf20Sopenharmony_ci	 sltu	t0, len, 8*NBYTES
2318c2ecf20Sopenharmony_ci	bnez	t0, less_than_8units
2328c2ecf20Sopenharmony_ci	 nop
2338c2ecf20Sopenharmony_ciEXC(	LOAD	t0, UNIT(0)(src),	l_exc)
2348c2ecf20Sopenharmony_ciEXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
2358c2ecf20Sopenharmony_ciEXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
2368c2ecf20Sopenharmony_ciEXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
2378c2ecf20Sopenharmony_ci	SUB	len, len, 8*NBYTES
2388c2ecf20Sopenharmony_ciEXC(	STORE	t0, UNIT(0)(dst),	s_exc_p8u)
2398c2ecf20Sopenharmony_ciEXC(	STORE	t1, UNIT(1)(dst),	s_exc_p7u)
2408c2ecf20Sopenharmony_ciEXC(	STORE	t2, UNIT(2)(dst),	s_exc_p6u)
2418c2ecf20Sopenharmony_ciEXC(	STORE	t3, UNIT(3)(dst),	s_exc_p5u)
2428c2ecf20Sopenharmony_ciEXC(	LOAD	t0, UNIT(4)(src),	l_exc_copy)
2438c2ecf20Sopenharmony_ciEXC(	LOAD	t1, UNIT(5)(src),	l_exc_copy)
2448c2ecf20Sopenharmony_ciEXC(	LOAD	t2, UNIT(6)(src),	l_exc_copy)
2458c2ecf20Sopenharmony_ciEXC(	LOAD	t3, UNIT(7)(src),	l_exc_copy)
2468c2ecf20Sopenharmony_ciEXC(	STORE	t0, UNIT(4)(dst),	s_exc_p4u)
2478c2ecf20Sopenharmony_ciEXC(	STORE	t1, UNIT(5)(dst),	s_exc_p3u)
2488c2ecf20Sopenharmony_ciEXC(	STORE	t2, UNIT(6)(dst),	s_exc_p2u)
2498c2ecf20Sopenharmony_ciEXC(	STORE	t3, UNIT(7)(dst),	s_exc_p1u)
2508c2ecf20Sopenharmony_ci	ADD	src, src, 8*NBYTES
2518c2ecf20Sopenharmony_ci	beqz	len, done
2528c2ecf20Sopenharmony_ci	 ADD	dst, dst, 8*NBYTES
2538c2ecf20Sopenharmony_ci	#
2548c2ecf20Sopenharmony_ci	# Jump here if there are less than 8*NBYTES left.
2558c2ecf20Sopenharmony_ci	#
2568c2ecf20Sopenharmony_ciless_than_8units:
2578c2ecf20Sopenharmony_ci	sltu	t0, len, 4*NBYTES
2588c2ecf20Sopenharmony_ci	bnez	t0, less_than_4units
2598c2ecf20Sopenharmony_ci	 nop
2608c2ecf20Sopenharmony_ciEXC(	LOAD	t0, UNIT(0)(src),	l_exc)
2618c2ecf20Sopenharmony_ciEXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
2628c2ecf20Sopenharmony_ciEXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
2638c2ecf20Sopenharmony_ciEXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
2648c2ecf20Sopenharmony_ci	SUB	len, len, 4*NBYTES
2658c2ecf20Sopenharmony_ciEXC(	STORE	t0, UNIT(0)(dst),	s_exc_p4u)
2668c2ecf20Sopenharmony_ciEXC(	STORE	t1, UNIT(1)(dst),	s_exc_p3u)
2678c2ecf20Sopenharmony_ciEXC(	STORE	t2, UNIT(2)(dst),	s_exc_p2u)
2688c2ecf20Sopenharmony_ciEXC(	STORE	t3, UNIT(3)(dst),	s_exc_p1u)
2698c2ecf20Sopenharmony_ci	ADD	src, src, 4*NBYTES
2708c2ecf20Sopenharmony_ci	beqz	len, done
2718c2ecf20Sopenharmony_ci	 ADD	dst, dst, 4*NBYTES
2728c2ecf20Sopenharmony_ci	#
2738c2ecf20Sopenharmony_ci	# Jump here if there are less than 4*NBYTES left. This means
2748c2ecf20Sopenharmony_ci	# we may need to copy up to 3 NBYTES words.
2758c2ecf20Sopenharmony_ci	#
2768c2ecf20Sopenharmony_ciless_than_4units:
2778c2ecf20Sopenharmony_ci	sltu	t0, len, 1*NBYTES
2788c2ecf20Sopenharmony_ci	bnez	t0, copy_bytes_checklen
2798c2ecf20Sopenharmony_ci	 nop
2808c2ecf20Sopenharmony_ci	#
2818c2ecf20Sopenharmony_ci	# 1) Copy NBYTES, then check length again
2828c2ecf20Sopenharmony_ci	#
2838c2ecf20Sopenharmony_ciEXC(	LOAD	t0, 0(src),		l_exc)
2848c2ecf20Sopenharmony_ci	SUB	len, len, NBYTES
2858c2ecf20Sopenharmony_ci	sltu	t1, len, 8
2868c2ecf20Sopenharmony_ciEXC(	STORE	t0, 0(dst),		s_exc_p1u)
2878c2ecf20Sopenharmony_ci	ADD	src, src, NBYTES
2888c2ecf20Sopenharmony_ci	bnez	t1, copy_bytes_checklen
2898c2ecf20Sopenharmony_ci	 ADD	dst, dst, NBYTES
2908c2ecf20Sopenharmony_ci	#
2918c2ecf20Sopenharmony_ci	# 2) Copy NBYTES, then check length again
2928c2ecf20Sopenharmony_ci	#
2938c2ecf20Sopenharmony_ciEXC(	LOAD	t0, 0(src),		l_exc)
2948c2ecf20Sopenharmony_ci	SUB	len, len, NBYTES
2958c2ecf20Sopenharmony_ci	sltu	t1, len, 8
2968c2ecf20Sopenharmony_ciEXC(	STORE	t0, 0(dst),		s_exc_p1u)
2978c2ecf20Sopenharmony_ci	ADD	src, src, NBYTES
2988c2ecf20Sopenharmony_ci	bnez	t1, copy_bytes_checklen
2998c2ecf20Sopenharmony_ci	 ADD	dst, dst, NBYTES
3008c2ecf20Sopenharmony_ci	#
3018c2ecf20Sopenharmony_ci	# 3) Copy NBYTES, then check length again
3028c2ecf20Sopenharmony_ci	#
3038c2ecf20Sopenharmony_ciEXC(	LOAD	t0, 0(src),		l_exc)
3048c2ecf20Sopenharmony_ci	SUB	len, len, NBYTES
3058c2ecf20Sopenharmony_ci	ADD	src, src, NBYTES
3068c2ecf20Sopenharmony_ci	ADD	dst, dst, NBYTES
3078c2ecf20Sopenharmony_ci	b copy_bytes_checklen
3088c2ecf20Sopenharmony_ciEXC(	 STORE	t0, -8(dst),		s_exc_p1u)
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_cisrc_unaligned:
3118c2ecf20Sopenharmony_ci#define rem t8
3128c2ecf20Sopenharmony_ci	SRL	t0, len, LOG_NBYTES+2	 # +2 for 4 units/iter
3138c2ecf20Sopenharmony_ci	beqz	t0, cleanup_src_unaligned
3148c2ecf20Sopenharmony_ci	 and	rem, len, (4*NBYTES-1)	 # rem = len % 4*NBYTES
3158c2ecf20Sopenharmony_ci1:
3168c2ecf20Sopenharmony_ci/*
3178c2ecf20Sopenharmony_ci * Avoid consecutive LD*'s to the same register since some mips
3188c2ecf20Sopenharmony_ci * implementations can't issue them in the same cycle.
3198c2ecf20Sopenharmony_ci * It's OK to load FIRST(N+1) before REST(N) because the two addresses
3208c2ecf20Sopenharmony_ci * are to the same unit (unless src is aligned, but it's not).
3218c2ecf20Sopenharmony_ci */
3228c2ecf20Sopenharmony_ciEXC(	LDFIRST t0, FIRST(0)(src),	l_exc)
3238c2ecf20Sopenharmony_ciEXC(	LDFIRST t1, FIRST(1)(src),	l_exc_copy)
3248c2ecf20Sopenharmony_ci	SUB	len, len, 4*NBYTES
3258c2ecf20Sopenharmony_ciEXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
3268c2ecf20Sopenharmony_ciEXC(	LDREST	t1, REST(1)(src),	l_exc_copy)
3278c2ecf20Sopenharmony_ciEXC(	LDFIRST t2, FIRST(2)(src),	l_exc_copy)
3288c2ecf20Sopenharmony_ciEXC(	LDFIRST t3, FIRST(3)(src),	l_exc_copy)
3298c2ecf20Sopenharmony_ciEXC(	LDREST	t2, REST(2)(src),	l_exc_copy)
3308c2ecf20Sopenharmony_ciEXC(	LDREST	t3, REST(3)(src),	l_exc_copy)
3318c2ecf20Sopenharmony_ci	ADD	src, src, 4*NBYTES
3328c2ecf20Sopenharmony_ciEXC(	STORE	t0, UNIT(0)(dst),	s_exc_p4u)
3338c2ecf20Sopenharmony_ciEXC(	STORE	t1, UNIT(1)(dst),	s_exc_p3u)
3348c2ecf20Sopenharmony_ciEXC(	STORE	t2, UNIT(2)(dst),	s_exc_p2u)
3358c2ecf20Sopenharmony_ciEXC(	STORE	t3, UNIT(3)(dst),	s_exc_p1u)
3368c2ecf20Sopenharmony_ci	bne	len, rem, 1b
3378c2ecf20Sopenharmony_ci	 ADD	dst, dst, 4*NBYTES
3388c2ecf20Sopenharmony_ci
3398c2ecf20Sopenharmony_cicleanup_src_unaligned:
3408c2ecf20Sopenharmony_ci	beqz	len, done
3418c2ecf20Sopenharmony_ci	 and	rem, len, NBYTES-1  # rem = len % NBYTES
3428c2ecf20Sopenharmony_ci	beq	rem, len, copy_bytes
3438c2ecf20Sopenharmony_ci	 nop
3448c2ecf20Sopenharmony_ci1:
3458c2ecf20Sopenharmony_ciEXC(	LDFIRST t0, FIRST(0)(src),	l_exc)
3468c2ecf20Sopenharmony_ciEXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
3478c2ecf20Sopenharmony_ci	SUB	len, len, NBYTES
3488c2ecf20Sopenharmony_ciEXC(	STORE	t0, 0(dst),		s_exc_p1u)
3498c2ecf20Sopenharmony_ci	ADD	src, src, NBYTES
3508c2ecf20Sopenharmony_ci	bne	len, rem, 1b
3518c2ecf20Sopenharmony_ci	 ADD	dst, dst, NBYTES
3528c2ecf20Sopenharmony_ci
3538c2ecf20Sopenharmony_cicopy_bytes_checklen:
3548c2ecf20Sopenharmony_ci	beqz	len, done
3558c2ecf20Sopenharmony_ci	 nop
3568c2ecf20Sopenharmony_cicopy_bytes:
3578c2ecf20Sopenharmony_ci	/* 0 < len < NBYTES  */
3588c2ecf20Sopenharmony_ci#define COPY_BYTE(N)			\
3598c2ecf20Sopenharmony_ciEXC(	lb	t0, N(src), l_exc);	\
3608c2ecf20Sopenharmony_ci	SUB	len, len, 1;		\
3618c2ecf20Sopenharmony_ci	beqz	len, done;		\
3628c2ecf20Sopenharmony_ciEXC(	 sb	t0, N(dst), s_exc_p1)
3638c2ecf20Sopenharmony_ci
3648c2ecf20Sopenharmony_ci	COPY_BYTE(0)
3658c2ecf20Sopenharmony_ci	COPY_BYTE(1)
3668c2ecf20Sopenharmony_ci	COPY_BYTE(2)
3678c2ecf20Sopenharmony_ci	COPY_BYTE(3)
3688c2ecf20Sopenharmony_ci	COPY_BYTE(4)
3698c2ecf20Sopenharmony_ci	COPY_BYTE(5)
3708c2ecf20Sopenharmony_ciEXC(	lb	t0, NBYTES-2(src), l_exc)
3718c2ecf20Sopenharmony_ci	SUB	len, len, 1
3728c2ecf20Sopenharmony_ci	jr	ra
3738c2ecf20Sopenharmony_ciEXC(	 sb	t0, NBYTES-2(dst), s_exc_p1)
3748c2ecf20Sopenharmony_cidone:
3758c2ecf20Sopenharmony_ci	jr	ra
3768c2ecf20Sopenharmony_ci	 nop
3778c2ecf20Sopenharmony_ci	END(memcpy)
3788c2ecf20Sopenharmony_ci
3798c2ecf20Sopenharmony_cil_exc_copy_rewind16:
3808c2ecf20Sopenharmony_ci	/* Rewind src and dst by 16*NBYTES for l_exc_copy */
3818c2ecf20Sopenharmony_ci	SUB	src, src, 16*NBYTES
3828c2ecf20Sopenharmony_ci	SUB	dst, dst, 16*NBYTES
3838c2ecf20Sopenharmony_cil_exc_copy:
3848c2ecf20Sopenharmony_ci	/*
3858c2ecf20Sopenharmony_ci	 * Copy bytes from src until faulting load address (or until a
3868c2ecf20Sopenharmony_ci	 * lb faults)
3878c2ecf20Sopenharmony_ci	 *
3888c2ecf20Sopenharmony_ci	 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
3898c2ecf20Sopenharmony_ci	 * may be more than a byte beyond the last address.
3908c2ecf20Sopenharmony_ci	 * Hence, the lb below may get an exception.
3918c2ecf20Sopenharmony_ci	 *
3928c2ecf20Sopenharmony_ci	 * Assumes src < THREAD_BUADDR($28)
3938c2ecf20Sopenharmony_ci	 */
3948c2ecf20Sopenharmony_ci	LOAD	t0, TI_TASK($28)
3958c2ecf20Sopenharmony_ci	LOAD	t0, THREAD_BUADDR(t0)
3968c2ecf20Sopenharmony_ci1:
3978c2ecf20Sopenharmony_ciEXC(	lb	t1, 0(src),	l_exc)
3988c2ecf20Sopenharmony_ci	ADD	src, src, 1
3998c2ecf20Sopenharmony_ci	sb	t1, 0(dst)	# can't fault -- we're copy_from_user
4008c2ecf20Sopenharmony_ci	bne	src, t0, 1b
4018c2ecf20Sopenharmony_ci	 ADD	dst, dst, 1
4028c2ecf20Sopenharmony_cil_exc:
4038c2ecf20Sopenharmony_ci	LOAD	t0, TI_TASK($28)
4048c2ecf20Sopenharmony_ci	LOAD	t0, THREAD_BUADDR(t0)	# t0 is just past last good address
4058c2ecf20Sopenharmony_ci	SUB	len, AT, t0		# len number of uncopied bytes
4068c2ecf20Sopenharmony_ci	jr	ra
4078c2ecf20Sopenharmony_ci	 nop
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci
4108c2ecf20Sopenharmony_ci#define SEXC(n)				\
4118c2ecf20Sopenharmony_cis_exc_p ## n ## u:			\
4128c2ecf20Sopenharmony_ci	jr	ra;			\
4138c2ecf20Sopenharmony_ci	 ADD	len, len, n*NBYTES
4148c2ecf20Sopenharmony_ci
4158c2ecf20Sopenharmony_ciSEXC(16)
4168c2ecf20Sopenharmony_ciSEXC(15)
4178c2ecf20Sopenharmony_ciSEXC(14)
4188c2ecf20Sopenharmony_ciSEXC(13)
4198c2ecf20Sopenharmony_ciSEXC(12)
4208c2ecf20Sopenharmony_ciSEXC(11)
4218c2ecf20Sopenharmony_ciSEXC(10)
4228c2ecf20Sopenharmony_ciSEXC(9)
4238c2ecf20Sopenharmony_ciSEXC(8)
4248c2ecf20Sopenharmony_ciSEXC(7)
4258c2ecf20Sopenharmony_ciSEXC(6)
4268c2ecf20Sopenharmony_ciSEXC(5)
4278c2ecf20Sopenharmony_ciSEXC(4)
4288c2ecf20Sopenharmony_ciSEXC(3)
4298c2ecf20Sopenharmony_ciSEXC(2)
4308c2ecf20Sopenharmony_ciSEXC(1)
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_cis_exc_p1:
4338c2ecf20Sopenharmony_ci	jr	ra
4348c2ecf20Sopenharmony_ci	 ADD	len, len, 1
4358c2ecf20Sopenharmony_cis_exc:
4368c2ecf20Sopenharmony_ci	jr	ra
4378c2ecf20Sopenharmony_ci	 nop
4388c2ecf20Sopenharmony_ci
4398c2ecf20Sopenharmony_ci	.align	5
4408c2ecf20Sopenharmony_ciLEAF(memmove)
4418c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memmove)
4428c2ecf20Sopenharmony_ci	ADD	t0, a0, a2
4438c2ecf20Sopenharmony_ci	ADD	t1, a1, a2
4448c2ecf20Sopenharmony_ci	sltu	t0, a1, t0			# dst + len <= src -> memcpy
4458c2ecf20Sopenharmony_ci	sltu	t1, a0, t1			# dst >= src + len -> memcpy
4468c2ecf20Sopenharmony_ci	and	t0, t1
4478c2ecf20Sopenharmony_ci	beqz	t0, __memcpy
4488c2ecf20Sopenharmony_ci	 move	v0, a0				/* return value */
4498c2ecf20Sopenharmony_ci	beqz	a2, r_out
4508c2ecf20Sopenharmony_ci	END(memmove)
4518c2ecf20Sopenharmony_ci
4528c2ecf20Sopenharmony_ci	/* fall through to __rmemcpy */
4538c2ecf20Sopenharmony_ciLEAF(__rmemcpy)					/* a0=dst a1=src a2=len */
4548c2ecf20Sopenharmony_ci	 sltu	t0, a1, a0
4558c2ecf20Sopenharmony_ci	beqz	t0, r_end_bytes_up		# src >= dst
4568c2ecf20Sopenharmony_ci	 nop
4578c2ecf20Sopenharmony_ci	ADD	a0, a2				# dst = dst + len
4588c2ecf20Sopenharmony_ci	ADD	a1, a2				# src = src + len
4598c2ecf20Sopenharmony_ci
4608c2ecf20Sopenharmony_cir_end_bytes:
4618c2ecf20Sopenharmony_ci	lb	t0, -1(a1)
4628c2ecf20Sopenharmony_ci	SUB	a2, a2, 0x1
4638c2ecf20Sopenharmony_ci	sb	t0, -1(a0)
4648c2ecf20Sopenharmony_ci	SUB	a1, a1, 0x1
4658c2ecf20Sopenharmony_ci	bnez	a2, r_end_bytes
4668c2ecf20Sopenharmony_ci	 SUB	a0, a0, 0x1
4678c2ecf20Sopenharmony_ci
4688c2ecf20Sopenharmony_cir_out:
4698c2ecf20Sopenharmony_ci	jr	ra
4708c2ecf20Sopenharmony_ci	 move	a2, zero
4718c2ecf20Sopenharmony_ci
4728c2ecf20Sopenharmony_cir_end_bytes_up:
4738c2ecf20Sopenharmony_ci	lb	t0, (a1)
4748c2ecf20Sopenharmony_ci	SUB	a2, a2, 0x1
4758c2ecf20Sopenharmony_ci	sb	t0, (a0)
4768c2ecf20Sopenharmony_ci	ADD	a1, a1, 0x1
4778c2ecf20Sopenharmony_ci	bnez	a2, r_end_bytes_up
4788c2ecf20Sopenharmony_ci	 ADD	a0, a0, 0x1
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_ci	jr	ra
4818c2ecf20Sopenharmony_ci	 move	a2, zero
4828c2ecf20Sopenharmony_ci	END(__rmemcpy)
483