18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci *  arch/xtensa/lib/usercopy.S
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci *  Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci *  DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
78c2ecf20Sopenharmony_ci *  It needs to remain separate and distinct.  The hal files are part
88c2ecf20Sopenharmony_ci *  of the Xtensa link-time HAL, and those files may differ per
98c2ecf20Sopenharmony_ci *  processor configuration.  Patching the kernel for another
108c2ecf20Sopenharmony_ci *  processor configuration includes replacing the hal files, and we
118c2ecf20Sopenharmony_ci *  could lose the special functionality for accessing user-space
128c2ecf20Sopenharmony_ci *  memory during such a patch.  We sacrifice a little code space here
138c2ecf20Sopenharmony_ci *  in favor to simplify code maintenance.
148c2ecf20Sopenharmony_ci *
158c2ecf20Sopenharmony_ci *  This file is subject to the terms and conditions of the GNU General
168c2ecf20Sopenharmony_ci *  Public License.  See the file "COPYING" in the main directory of
178c2ecf20Sopenharmony_ci *  this archive for more details.
188c2ecf20Sopenharmony_ci *
198c2ecf20Sopenharmony_ci *  Copyright (C) 2002 Tensilica Inc.
208c2ecf20Sopenharmony_ci */
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_ci/*
248c2ecf20Sopenharmony_ci * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
258c2ecf20Sopenharmony_ci *
268c2ecf20Sopenharmony_ci * The returned value is the number of bytes not copied.  Implies zero
278c2ecf20Sopenharmony_ci * is success.
288c2ecf20Sopenharmony_ci *
298c2ecf20Sopenharmony_ci * The general case algorithm is as follows:
308c2ecf20Sopenharmony_ci *   If the destination and source are both aligned,
318c2ecf20Sopenharmony_ci *     do 16B chunks with a loop, and then finish up with
328c2ecf20Sopenharmony_ci *     8B, 4B, 2B, and 1B copies conditional on the length.
338c2ecf20Sopenharmony_ci *   If destination is aligned and source unaligned,
348c2ecf20Sopenharmony_ci *     do the same, but use SRC to align the source data.
358c2ecf20Sopenharmony_ci *   If destination is unaligned, align it by conditionally
368c2ecf20Sopenharmony_ci *     copying 1B and 2B and then retest.
378c2ecf20Sopenharmony_ci *   This code tries to use fall-through braches for the common
388c2ecf20Sopenharmony_ci *     case of aligned destinations (except for the branches to
398c2ecf20Sopenharmony_ci *     the alignment label).
408c2ecf20Sopenharmony_ci *
418c2ecf20Sopenharmony_ci * Register use:
428c2ecf20Sopenharmony_ci *	a0/ return address
438c2ecf20Sopenharmony_ci *	a1/ stack pointer
448c2ecf20Sopenharmony_ci *	a2/ return value
458c2ecf20Sopenharmony_ci *	a3/ src
468c2ecf20Sopenharmony_ci *	a4/ length
478c2ecf20Sopenharmony_ci *	a5/ dst
488c2ecf20Sopenharmony_ci *	a6/ tmp
498c2ecf20Sopenharmony_ci *	a7/ tmp
508c2ecf20Sopenharmony_ci *	a8/ tmp
518c2ecf20Sopenharmony_ci *	a9/ tmp
528c2ecf20Sopenharmony_ci *	a10/ tmp
538c2ecf20Sopenharmony_ci *	a11/ original length
548c2ecf20Sopenharmony_ci */
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci#include <linux/linkage.h>
578c2ecf20Sopenharmony_ci#include <asm/asmmacro.h>
588c2ecf20Sopenharmony_ci#include <asm/core.h>
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci	.text
618c2ecf20Sopenharmony_ciENTRY(__xtensa_copy_user)
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci	abi_entry_default
648c2ecf20Sopenharmony_ci	# a2/ dst, a3/ src, a4/ len
658c2ecf20Sopenharmony_ci	mov	a5, a2		# copy dst so that a2 is return value
668c2ecf20Sopenharmony_ci	mov	a11, a4		# preserve original len for error case
678c2ecf20Sopenharmony_ci.Lcommon:
688c2ecf20Sopenharmony_ci	bbsi.l	a2, 0, .Ldst1mod2 # if dst is 1 mod 2
698c2ecf20Sopenharmony_ci	bbsi.l	a2, 1, .Ldst2mod4 # if dst is 2 mod 4
708c2ecf20Sopenharmony_ci.Ldstaligned:	# return here from .Ldstunaligned when dst is aligned
718c2ecf20Sopenharmony_ci	srli	a7, a4, 4	# number of loop iterations with 16B
728c2ecf20Sopenharmony_ci				# per iteration
738c2ecf20Sopenharmony_ci	movi	a8, 3		  # if source is also aligned,
748c2ecf20Sopenharmony_ci	bnone	a3, a8, .Laligned # then use word copy
758c2ecf20Sopenharmony_ci	__ssa8	a3		# set shift amount from byte offset
768c2ecf20Sopenharmony_ci	bnez	a4, .Lsrcunaligned
778c2ecf20Sopenharmony_ci	movi	a2, 0		# return success for len==0
788c2ecf20Sopenharmony_ci	abi_ret_default
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci/*
818c2ecf20Sopenharmony_ci * Destination is unaligned
828c2ecf20Sopenharmony_ci */
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci.Ldst1mod2:	# dst is only byte aligned
858c2ecf20Sopenharmony_ci	bltui	a4, 7, .Lbytecopy	# do short copies byte by byte
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci	# copy 1 byte
888c2ecf20Sopenharmony_ciEX(10f)	l8ui	a6, a3, 0
898c2ecf20Sopenharmony_ci	addi	a3, a3,  1
908c2ecf20Sopenharmony_ciEX(10f)	s8i	a6, a5,  0
918c2ecf20Sopenharmony_ci	addi	a5, a5,  1
928c2ecf20Sopenharmony_ci	addi	a4, a4, -1
938c2ecf20Sopenharmony_ci	bbci.l	a5, 1, .Ldstaligned	# if dst is now aligned, then
948c2ecf20Sopenharmony_ci					# return to main algorithm
958c2ecf20Sopenharmony_ci.Ldst2mod4:	# dst 16-bit aligned
968c2ecf20Sopenharmony_ci	# copy 2 bytes
978c2ecf20Sopenharmony_ci	bltui	a4, 6, .Lbytecopy	# do short copies byte by byte
988c2ecf20Sopenharmony_ciEX(10f)	l8ui	a6, a3, 0
998c2ecf20Sopenharmony_ciEX(10f)	l8ui	a7, a3, 1
1008c2ecf20Sopenharmony_ci	addi	a3, a3,  2
1018c2ecf20Sopenharmony_ciEX(10f)	s8i	a6, a5,  0
1028c2ecf20Sopenharmony_ciEX(10f)	s8i	a7, a5,  1
1038c2ecf20Sopenharmony_ci	addi	a5, a5,  2
1048c2ecf20Sopenharmony_ci	addi	a4, a4, -2
1058c2ecf20Sopenharmony_ci	j	.Ldstaligned	# dst is now aligned, return to main algorithm
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci/*
1088c2ecf20Sopenharmony_ci * Byte by byte copy
1098c2ecf20Sopenharmony_ci */
1108c2ecf20Sopenharmony_ci	.align	4
1118c2ecf20Sopenharmony_ci	.byte	0		# 1 mod 4 alignment for LOOPNEZ
1128c2ecf20Sopenharmony_ci				# (0 mod 4 alignment for LBEG)
1138c2ecf20Sopenharmony_ci.Lbytecopy:
1148c2ecf20Sopenharmony_ci#if XCHAL_HAVE_LOOPS
1158c2ecf20Sopenharmony_ci	loopnez	a4, .Lbytecopydone
1168c2ecf20Sopenharmony_ci#else /* !XCHAL_HAVE_LOOPS */
1178c2ecf20Sopenharmony_ci	beqz	a4, .Lbytecopydone
1188c2ecf20Sopenharmony_ci	add	a7, a3, a4	# a7 = end address for source
1198c2ecf20Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */
1208c2ecf20Sopenharmony_ci.Lnextbyte:
1218c2ecf20Sopenharmony_ciEX(10f)	l8ui	a6, a3, 0
1228c2ecf20Sopenharmony_ci	addi	a3, a3, 1
1238c2ecf20Sopenharmony_ciEX(10f)	s8i	a6, a5, 0
1248c2ecf20Sopenharmony_ci	addi	a5, a5, 1
1258c2ecf20Sopenharmony_ci#if !XCHAL_HAVE_LOOPS
1268c2ecf20Sopenharmony_ci	blt	a3, a7, .Lnextbyte
1278c2ecf20Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */
1288c2ecf20Sopenharmony_ci.Lbytecopydone:
1298c2ecf20Sopenharmony_ci	movi	a2, 0		# return success for len bytes copied
1308c2ecf20Sopenharmony_ci	abi_ret_default
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci/*
1338c2ecf20Sopenharmony_ci * Destination and source are word-aligned.
1348c2ecf20Sopenharmony_ci */
1358c2ecf20Sopenharmony_ci	# copy 16 bytes per iteration for word-aligned dst and word-aligned src
1368c2ecf20Sopenharmony_ci	.align	4		# 1 mod 4 alignment for LOOPNEZ
1378c2ecf20Sopenharmony_ci	.byte	0		# (0 mod 4 alignment for LBEG)
1388c2ecf20Sopenharmony_ci.Laligned:
1398c2ecf20Sopenharmony_ci#if XCHAL_HAVE_LOOPS
1408c2ecf20Sopenharmony_ci	loopnez	a7, .Loop1done
1418c2ecf20Sopenharmony_ci#else /* !XCHAL_HAVE_LOOPS */
1428c2ecf20Sopenharmony_ci	beqz	a7, .Loop1done
1438c2ecf20Sopenharmony_ci	slli	a8, a7, 4
1448c2ecf20Sopenharmony_ci	add	a8, a8, a3	# a8 = end of last 16B source chunk
1458c2ecf20Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */
1468c2ecf20Sopenharmony_ci.Loop1:
1478c2ecf20Sopenharmony_ciEX(10f)	l32i	a6, a3,  0
1488c2ecf20Sopenharmony_ciEX(10f)	l32i	a7, a3,  4
1498c2ecf20Sopenharmony_ciEX(10f)	s32i	a6, a5,  0
1508c2ecf20Sopenharmony_ciEX(10f)	l32i	a6, a3,  8
1518c2ecf20Sopenharmony_ciEX(10f)	s32i	a7, a5,  4
1528c2ecf20Sopenharmony_ciEX(10f)	l32i	a7, a3, 12
1538c2ecf20Sopenharmony_ciEX(10f)	s32i	a6, a5,  8
1548c2ecf20Sopenharmony_ci	addi	a3, a3, 16
1558c2ecf20Sopenharmony_ciEX(10f)	s32i	a7, a5, 12
1568c2ecf20Sopenharmony_ci	addi	a5, a5, 16
1578c2ecf20Sopenharmony_ci#if !XCHAL_HAVE_LOOPS
1588c2ecf20Sopenharmony_ci	blt	a3, a8, .Loop1
1598c2ecf20Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */
1608c2ecf20Sopenharmony_ci.Loop1done:
1618c2ecf20Sopenharmony_ci	bbci.l	a4, 3, .L2
1628c2ecf20Sopenharmony_ci	# copy 8 bytes
1638c2ecf20Sopenharmony_ciEX(10f)	l32i	a6, a3,  0
1648c2ecf20Sopenharmony_ciEX(10f)	l32i	a7, a3,  4
1658c2ecf20Sopenharmony_ci	addi	a3, a3,  8
1668c2ecf20Sopenharmony_ciEX(10f)	s32i	a6, a5,  0
1678c2ecf20Sopenharmony_ciEX(10f)	s32i	a7, a5,  4
1688c2ecf20Sopenharmony_ci	addi	a5, a5,  8
1698c2ecf20Sopenharmony_ci.L2:
1708c2ecf20Sopenharmony_ci	bbci.l	a4, 2, .L3
1718c2ecf20Sopenharmony_ci	# copy 4 bytes
1728c2ecf20Sopenharmony_ciEX(10f)	l32i	a6, a3,  0
1738c2ecf20Sopenharmony_ci	addi	a3, a3,  4
1748c2ecf20Sopenharmony_ciEX(10f)	s32i	a6, a5,  0
1758c2ecf20Sopenharmony_ci	addi	a5, a5,  4
1768c2ecf20Sopenharmony_ci.L3:
1778c2ecf20Sopenharmony_ci	bbci.l	a4, 1, .L4
1788c2ecf20Sopenharmony_ci	# copy 2 bytes
1798c2ecf20Sopenharmony_ciEX(10f)	l16ui	a6, a3,  0
1808c2ecf20Sopenharmony_ci	addi	a3, a3,  2
1818c2ecf20Sopenharmony_ciEX(10f)	s16i	a6, a5,  0
1828c2ecf20Sopenharmony_ci	addi	a5, a5,  2
1838c2ecf20Sopenharmony_ci.L4:
1848c2ecf20Sopenharmony_ci	bbci.l	a4, 0, .L5
1858c2ecf20Sopenharmony_ci	# copy 1 byte
1868c2ecf20Sopenharmony_ciEX(10f)	l8ui	a6, a3,  0
1878c2ecf20Sopenharmony_ciEX(10f)	s8i	a6, a5,  0
1888c2ecf20Sopenharmony_ci.L5:
1898c2ecf20Sopenharmony_ci	movi	a2, 0		# return success for len bytes copied
1908c2ecf20Sopenharmony_ci	abi_ret_default
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci/*
1938c2ecf20Sopenharmony_ci * Destination is aligned, Source is unaligned
1948c2ecf20Sopenharmony_ci */
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci	.align	4
1978c2ecf20Sopenharmony_ci	.byte	0		# 1 mod 4 alignement for LOOPNEZ
1988c2ecf20Sopenharmony_ci				# (0 mod 4 alignment for LBEG)
1998c2ecf20Sopenharmony_ci.Lsrcunaligned:
2008c2ecf20Sopenharmony_ci	# copy 16 bytes per iteration for word-aligned dst and unaligned src
2018c2ecf20Sopenharmony_ci	and	a10, a3, a8	# save unalignment offset for below
2028c2ecf20Sopenharmony_ci	sub	a3, a3, a10	# align a3 (to avoid sim warnings only; not needed for hardware)
2038c2ecf20Sopenharmony_ciEX(10f)	l32i	a6, a3, 0	# load first word
2048c2ecf20Sopenharmony_ci#if XCHAL_HAVE_LOOPS
2058c2ecf20Sopenharmony_ci	loopnez	a7, .Loop2done
2068c2ecf20Sopenharmony_ci#else /* !XCHAL_HAVE_LOOPS */
2078c2ecf20Sopenharmony_ci	beqz	a7, .Loop2done
2088c2ecf20Sopenharmony_ci	slli	a12, a7, 4
2098c2ecf20Sopenharmony_ci	add	a12, a12, a3	# a12 = end of last 16B source chunk
2108c2ecf20Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */
2118c2ecf20Sopenharmony_ci.Loop2:
2128c2ecf20Sopenharmony_ciEX(10f)	l32i	a7, a3,  4
2138c2ecf20Sopenharmony_ciEX(10f)	l32i	a8, a3,  8
2148c2ecf20Sopenharmony_ci	__src_b	a6, a6, a7
2158c2ecf20Sopenharmony_ciEX(10f)	s32i	a6, a5,  0
2168c2ecf20Sopenharmony_ciEX(10f)	l32i	a9, a3, 12
2178c2ecf20Sopenharmony_ci	__src_b	a7, a7, a8
2188c2ecf20Sopenharmony_ciEX(10f)	s32i	a7, a5,  4
2198c2ecf20Sopenharmony_ciEX(10f)	l32i	a6, a3, 16
2208c2ecf20Sopenharmony_ci	__src_b	a8, a8, a9
2218c2ecf20Sopenharmony_ciEX(10f)	s32i	a8, a5,  8
2228c2ecf20Sopenharmony_ci	addi	a3, a3, 16
2238c2ecf20Sopenharmony_ci	__src_b	a9, a9, a6
2248c2ecf20Sopenharmony_ciEX(10f)	s32i	a9, a5, 12
2258c2ecf20Sopenharmony_ci	addi	a5, a5, 16
2268c2ecf20Sopenharmony_ci#if !XCHAL_HAVE_LOOPS
2278c2ecf20Sopenharmony_ci	blt	a3, a12, .Loop2
2288c2ecf20Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */
2298c2ecf20Sopenharmony_ci.Loop2done:
2308c2ecf20Sopenharmony_ci	bbci.l	a4, 3, .L12
2318c2ecf20Sopenharmony_ci	# copy 8 bytes
2328c2ecf20Sopenharmony_ciEX(10f)	l32i	a7, a3,  4
2338c2ecf20Sopenharmony_ciEX(10f)	l32i	a8, a3,  8
2348c2ecf20Sopenharmony_ci	__src_b	a6, a6, a7
2358c2ecf20Sopenharmony_ciEX(10f)	s32i	a6, a5,  0
2368c2ecf20Sopenharmony_ci	addi	a3, a3,  8
2378c2ecf20Sopenharmony_ci	__src_b	a7, a7, a8
2388c2ecf20Sopenharmony_ciEX(10f)	s32i	a7, a5,  4
2398c2ecf20Sopenharmony_ci	addi	a5, a5,  8
2408c2ecf20Sopenharmony_ci	mov	a6, a8
2418c2ecf20Sopenharmony_ci.L12:
2428c2ecf20Sopenharmony_ci	bbci.l	a4, 2, .L13
2438c2ecf20Sopenharmony_ci	# copy 4 bytes
2448c2ecf20Sopenharmony_ciEX(10f)	l32i	a7, a3,  4
2458c2ecf20Sopenharmony_ci	addi	a3, a3,  4
2468c2ecf20Sopenharmony_ci	__src_b	a6, a6, a7
2478c2ecf20Sopenharmony_ciEX(10f)	s32i	a6, a5,  0
2488c2ecf20Sopenharmony_ci	addi	a5, a5,  4
2498c2ecf20Sopenharmony_ci	mov	a6, a7
2508c2ecf20Sopenharmony_ci.L13:
2518c2ecf20Sopenharmony_ci	add	a3, a3, a10	# readjust a3 with correct misalignment
2528c2ecf20Sopenharmony_ci	bbci.l	a4, 1, .L14
2538c2ecf20Sopenharmony_ci	# copy 2 bytes
2548c2ecf20Sopenharmony_ciEX(10f)	l8ui	a6, a3,  0
2558c2ecf20Sopenharmony_ciEX(10f)	l8ui	a7, a3,  1
2568c2ecf20Sopenharmony_ci	addi	a3, a3,  2
2578c2ecf20Sopenharmony_ciEX(10f)	s8i	a6, a5,  0
2588c2ecf20Sopenharmony_ciEX(10f)	s8i	a7, a5,  1
2598c2ecf20Sopenharmony_ci	addi	a5, a5,  2
2608c2ecf20Sopenharmony_ci.L14:
2618c2ecf20Sopenharmony_ci	bbci.l	a4, 0, .L15
2628c2ecf20Sopenharmony_ci	# copy 1 byte
2638c2ecf20Sopenharmony_ciEX(10f)	l8ui	a6, a3,  0
2648c2ecf20Sopenharmony_ciEX(10f)	s8i	a6, a5,  0
2658c2ecf20Sopenharmony_ci.L15:
2668c2ecf20Sopenharmony_ci	movi	a2, 0		# return success for len bytes copied
2678c2ecf20Sopenharmony_ci	abi_ret_default
2688c2ecf20Sopenharmony_ci
2698c2ecf20Sopenharmony_ciENDPROC(__xtensa_copy_user)
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci	.section .fixup, "ax"
2728c2ecf20Sopenharmony_ci	.align	4
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ci/* a2 = original dst; a5 = current dst; a11= original len
2758c2ecf20Sopenharmony_ci * bytes_copied = a5 - a2
2768c2ecf20Sopenharmony_ci * retval = bytes_not_copied = original len - bytes_copied
2778c2ecf20Sopenharmony_ci * retval = a11 - (a5 - a2)
2788c2ecf20Sopenharmony_ci */
2798c2ecf20Sopenharmony_ci
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ci10:
2828c2ecf20Sopenharmony_ci	sub	a2, a5, a2	/* a2 <-- bytes copied */
2838c2ecf20Sopenharmony_ci	sub	a2, a11, a2	/* a2 <-- bytes not copied */
2848c2ecf20Sopenharmony_ci	abi_ret_default
285