1 /*
2  *  arch/xtensa/lib/strncpy_user.S
3  *
4  *  This file is subject to the terms and conditions of the GNU General
5  *  Public License.  See the file "COPYING" in the main directory of
6  *  this archive for more details.
7  *
8  *  Returns: -EFAULT if exception before terminator, N if the entire
9  *  buffer filled, else strlen.
10  *
11  *  Copyright (C) 2002 Tensilica Inc.
12  */
13 
14 #include <linux/errno.h>
15 #include <linux/linkage.h>
16 #include <asm/asmmacro.h>
17 #include <asm/core.h>
18 
19 /*
20  * char *__strncpy_user(char *dst, const char *src, size_t len)
21  */
22 
23 #ifdef __XTENSA_EB__
24 # define MASK0 0xff000000
25 # define MASK1 0x00ff0000
26 # define MASK2 0x0000ff00
27 # define MASK3 0x000000ff
28 #else
29 # define MASK0 0x000000ff
30 # define MASK1 0x0000ff00
31 # define MASK2 0x00ff0000
32 # define MASK3 0xff000000
33 #endif
34 
35 # Register use
36 #   a0/ return address
37 #   a1/ stack pointer
38 #   a2/ return value
39 #   a3/ src
40 #   a4/ len
41 #   a5/ mask0
42 #   a6/ mask1
43 #   a7/ mask2
44 #   a8/ mask3
45 #   a9/ tmp
46 #   a10/ tmp
47 #   a11/ dst
48 
49 .text
50 ENTRY(__strncpy_user)
51 
52 	abi_entry_default
53 	# a2/ dst, a3/ src, a4/ len
54 	mov	a11, a2		# leave dst in return value register
55 	beqz	a4, .Lret	# if len is zero
56 	movi	a5, MASK0	# mask for byte 0
57 	movi	a6, MASK1	# mask for byte 1
58 	movi	a7, MASK2	# mask for byte 2
59 	movi	a8, MASK3	# mask for byte 3
60 	bbsi.l	a3, 0, .Lsrc1mod2 # if only  8-bit aligned
61 	bbsi.l	a3, 1, .Lsrc2mod4 # if only 16-bit aligned
62 .Lsrcaligned:	# return here when src is word-aligned
63 	srli	a10, a4, 2	# number of loop iterations with 4B per loop
64 	movi	a9, 3
65 	bnone	a11, a9, .Laligned
66 	j	.Ldstunaligned
67 
68 .Lsrc1mod2:	# src address is odd
69 EX(11f)	l8ui	a9, a3, 0		# get byte 0
70 	addi	a3, a3, 1		# advance src pointer
71 EX(10f)	s8i	a9, a11, 0		# store byte 0
72 	beqz	a9, .Lret		# if byte 0 is zero
73 	addi	a11, a11, 1		# advance dst pointer
74 	addi	a4, a4, -1		# decrement len
75 	beqz	a4, .Lret		# if len is zero
76 	bbci.l	a3, 1, .Lsrcaligned	# if src is now word-aligned
77 
78 .Lsrc2mod4:	# src address is 2 mod 4
79 EX(11f)	l8ui	a9, a3, 0		# get byte 0
80 	/* 1-cycle interlock */
81 EX(10f)	s8i	a9, a11, 0		# store byte 0
82 	beqz	a9, .Lret		# if byte 0 is zero
83 	addi	a11, a11, 1		# advance dst pointer
84 	addi	a4, a4, -1		# decrement len
85 	beqz	a4, .Lret		# if len is zero
86 EX(11f)	l8ui	a9, a3, 1		# get byte 0
87 	addi	a3, a3, 2		# advance src pointer
88 EX(10f)	s8i	a9, a11, 0		# store byte 0
89 	beqz	a9, .Lret		# if byte 0 is zero
90 	addi	a11, a11, 1		# advance dst pointer
91 	addi	a4, a4, -1		# decrement len
92 	bnez	a4, .Lsrcaligned	# if len is nonzero
93 .Lret:
94 	sub	a2, a11, a2		# compute strlen
95 	abi_ret_default
96 
97 /*
98  * dst is word-aligned, src is word-aligned
99  */
100 	.align	4		# 1 mod 4 alignment for LOOPNEZ
101 	.byte	0		# (0 mod 4 alignment for LBEG)
102 .Laligned:
103 #if XCHAL_HAVE_LOOPS
104 	loopnez	a10, .Loop1done
105 #else
106 	beqz	a10, .Loop1done
107 	slli	a10, a10, 2
108 	add	a10, a10, a11	# a10 = end of last 4B chunck
109 #endif
110 .Loop1:
111 EX(11f)	l32i	a9, a3, 0		# get word from src
112 	addi	a3, a3, 4		# advance src pointer
113 	bnone	a9, a5, .Lz0		# if byte 0 is zero
114 	bnone	a9, a6, .Lz1		# if byte 1 is zero
115 	bnone	a9, a7, .Lz2		# if byte 2 is zero
116 EX(10f)	s32i	a9, a11, 0		# store word to dst
117 	bnone	a9, a8, .Lz3		# if byte 3 is zero
118 	addi	a11, a11, 4		# advance dst pointer
119 #if !XCHAL_HAVE_LOOPS
120 	blt	a11, a10, .Loop1
121 #endif
122 
123 .Loop1done:
124 	bbci.l	a4, 1, .L100
125 	# copy 2 bytes
126 EX(11f)	l16ui	a9, a3, 0
127 	addi	a3, a3, 2		# advance src pointer
128 #ifdef __XTENSA_EB__
129 	bnone	a9, a7, .Lz0		# if byte 2 is zero
130 	bnone	a9, a8, .Lz1		# if byte 3 is zero
131 #else
132 	bnone	a9, a5, .Lz0		# if byte 0 is zero
133 	bnone	a9, a6, .Lz1		# if byte 1 is zero
134 #endif
135 EX(10f)	s16i	a9, a11, 0
136 	addi	a11, a11, 2		# advance dst pointer
137 .L100:
138 	bbci.l	a4, 0, .Lret
139 EX(11f)	l8ui	a9, a3, 0
140 	/* slot */
141 EX(10f)	s8i	a9, a11, 0
142 	beqz	a9, .Lret		# if byte is zero
143 	addi	a11, a11, 1-3		# advance dst ptr 1, but also cancel
144 					# the effect of adding 3 in .Lz3 code
145 	/* fall thru to .Lz3 and "retw" */
146 
147 .Lz3:	# byte 3 is zero
148 	addi	a11, a11, 3		# advance dst pointer
149 	sub	a2, a11, a2		# compute strlen
150 	abi_ret_default
151 .Lz0:	# byte 0 is zero
152 #ifdef __XTENSA_EB__
153 	movi	a9, 0
154 #endif /* __XTENSA_EB__ */
155 EX(10f)	s8i	a9, a11, 0
156 	sub	a2, a11, a2		# compute strlen
157 	abi_ret_default
158 .Lz1:	# byte 1 is zero
159 #ifdef __XTENSA_EB__
160 	extui   a9, a9, 16, 16
161 #endif /* __XTENSA_EB__ */
162 EX(10f)	s16i	a9, a11, 0
163 	addi	a11, a11, 1		# advance dst pointer
164 	sub	a2, a11, a2		# compute strlen
165 	abi_ret_default
166 .Lz2:	# byte 2 is zero
167 #ifdef __XTENSA_EB__
168 	extui   a9, a9, 16, 16
169 #endif /* __XTENSA_EB__ */
170 EX(10f)	s16i	a9, a11, 0
171 	movi	a9, 0
172 EX(10f)	s8i	a9, a11, 2
173 	addi	a11, a11, 2		# advance dst pointer
174 	sub	a2, a11, a2		# compute strlen
175 	abi_ret_default
176 
177 	.align	4		# 1 mod 4 alignment for LOOPNEZ
178 	.byte	0		# (0 mod 4 alignment for LBEG)
179 .Ldstunaligned:
180 /*
181  * for now just use byte copy loop
182  */
183 #if XCHAL_HAVE_LOOPS
184 	loopnez	a4, .Lunalignedend
185 #else
186 	beqz	a4, .Lunalignedend
187 	add	a10, a11, a4		# a10 = ending address
188 #endif /* XCHAL_HAVE_LOOPS */
189 .Lnextbyte:
190 EX(11f)	l8ui	a9, a3, 0
191 	addi	a3, a3, 1
192 EX(10f)	s8i	a9, a11, 0
193 	beqz	a9, .Lunalignedend
194 	addi	a11, a11, 1
195 #if !XCHAL_HAVE_LOOPS
196 	blt	a11, a10, .Lnextbyte
197 #endif
198 
199 .Lunalignedend:
200 	sub	a2, a11, a2		# compute strlen
201 	abi_ret_default
202 
203 ENDPROC(__strncpy_user)
204 EXPORT_SYMBOL(__strncpy_user)
205 
206 	.section .fixup, "ax"
207 	.align	4
208 
209 	/* For now, just return -EFAULT.  Future implementations might
210 	 * like to clear remaining kernel space, like the fixup
211 	 * implementation in memset().  Thus, we differentiate between
212 	 * load/store fixups. */
213 
214 10:
215 11:
216 	movi	a2, -EFAULT
217 	abi_ret_default
218