1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright (C) 2013 ARM Ltd.
4  * Copyright (C) 2013 Linaro.
5  *
6  * This code is based on glibc cortex strings work originally authored by Linaro
7  * be found @
8  *
9  * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
10  * files/head:/src/aarch64/
11  */
12 
13 #include <linux/linkage.h>
14 #include <asm/assembler.h>
15 
16 /*
17  * compare two strings
18  *
19  * Parameters:
20  *	x0 - const string 1 pointer
21  *    x1 - const string 2 pointer
22  * Returns:
23  * x0 - an integer less than, equal to, or greater than zero
24  * if  s1  is  found, respectively, to be less than, to match,
25  * or be greater than s2.
26  */
27 
28 #define REP8_01 0x0101010101010101
29 #define REP8_7f 0x7f7f7f7f7f7f7f7f
30 #define REP8_80 0x8080808080808080
31 
32 /* Parameters and result.  */
33 src1		.req	x0
34 src2		.req	x1
35 result		.req	x0
36 
37 /* Internal variables.  */
38 data1		.req	x2
39 data1w		.req	w2
40 data2		.req	x3
41 data2w		.req	w3
42 has_nul		.req	x4
43 diff		.req	x5
44 syndrome	.req	x6
45 tmp1		.req	x7
46 tmp2		.req	x8
47 tmp3		.req	x9
48 zeroones	.req	x10
49 pos		.req	x11
50 
51 SYM_FUNC_START_WEAK_PI(strcmp)
52 	eor	tmp1, src1, src2
53 	mov	zeroones, #REP8_01
54 	tst	tmp1, #7
55 	b.ne	.Lmisaligned8
56 	ands	tmp1, src1, #7
57 	b.ne	.Lmutual_align
58 
59 	/*
60 	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
61 	* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
62 	* can be done in parallel across the entire word.
63 	*/
64 .Lloop_aligned:
65 	ldr	data1, [src1], #8
66 	ldr	data2, [src2], #8
67 .Lstart_realigned:
68 	sub	tmp1, data1, zeroones
69 	orr	tmp2, data1, #REP8_7f
70 	eor	diff, data1, data2	/* Non-zero if differences found.  */
71 	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
72 	orr	syndrome, diff, has_nul
73 	cbz	syndrome, .Lloop_aligned
74 	b	.Lcal_cmpresult
75 
76 .Lmutual_align:
77 	/*
78 	* Sources are mutually aligned, but are not currently at an
79 	* alignment boundary.  Round down the addresses and then mask off
80 	* the bytes that preceed the start point.
81 	*/
82 	bic	src1, src1, #7
83 	bic	src2, src2, #7
84 	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
85 	ldr	data1, [src1], #8
86 	neg	tmp1, tmp1		/* Bits to alignment -64.  */
87 	ldr	data2, [src2], #8
88 	mov	tmp2, #~0
89 	/* Big-endian.  Early bytes are at MSB.  */
90 CPU_BE( lsl	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
91 	/* Little-endian.  Early bytes are at LSB.  */
92 CPU_LE( lsr	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
93 
94 	orr	data1, data1, tmp2
95 	orr	data2, data2, tmp2
96 	b	.Lstart_realigned
97 
98 .Lmisaligned8:
99 	/*
100 	* Get the align offset length to compare per byte first.
101 	* After this process, one string's address will be aligned.
102 	*/
103 	and	tmp1, src1, #7
104 	neg	tmp1, tmp1
105 	add	tmp1, tmp1, #8
106 	and	tmp2, src2, #7
107 	neg	tmp2, tmp2
108 	add	tmp2, tmp2, #8
109 	subs	tmp3, tmp1, tmp2
110 	csel	pos, tmp1, tmp2, hi /*Choose the maximum. */
111 .Ltinycmp:
112 	ldrb	data1w, [src1], #1
113 	ldrb	data2w, [src2], #1
114 	subs	pos, pos, #1
115 	ccmp	data1w, #1, #0, ne  /* NZCV = 0b0000.  */
116 	ccmp	data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
117 	b.eq	.Ltinycmp
118 	cbnz	pos, 1f /*find the null or unequal...*/
119 	cmp	data1w, #1
120 	ccmp	data1w, data2w, #0, cs
121 	b.eq	.Lstart_align /*the last bytes are equal....*/
122 1:
123 	sub	result, data1, data2
124 	ret
125 
126 .Lstart_align:
127 	ands	xzr, src1, #7
128 	b.eq	.Lrecal_offset
129 	/*process more leading bytes to make str1 aligned...*/
130 	add	src1, src1, tmp3
131 	add	src2, src2, tmp3
132 	/*load 8 bytes from aligned str1 and non-aligned str2..*/
133 	ldr	data1, [src1], #8
134 	ldr	data2, [src2], #8
135 
136 	sub	tmp1, data1, zeroones
137 	orr	tmp2, data1, #REP8_7f
138 	bic	has_nul, tmp1, tmp2
139 	eor	diff, data1, data2 /* Non-zero if differences found.  */
140 	orr	syndrome, diff, has_nul
141 	cbnz	syndrome, .Lcal_cmpresult
142 	/*How far is the current str2 from the alignment boundary...*/
143 	and	tmp3, tmp3, #7
144 .Lrecal_offset:
145 	neg	pos, tmp3
146 .Lloopcmp_proc:
147 	/*
148 	* Divide the eight bytes into two parts. First,backwards the src2
149 	* to an alignment boundary,load eight bytes from the SRC2 alignment
150 	* boundary,then compare with the relative bytes from SRC1.
151 	* If all 8 bytes are equal,then start the second part's comparison.
152 	* Otherwise finish the comparison.
153 	* This special handle can garantee all the accesses are in the
154 	* thread/task space in avoid to overrange access.
155 	*/
156 	ldr	data1, [src1,pos]
157 	ldr	data2, [src2,pos]
158 	sub	tmp1, data1, zeroones
159 	orr	tmp2, data1, #REP8_7f
160 	bic	has_nul, tmp1, tmp2
161 	eor	diff, data1, data2  /* Non-zero if differences found.  */
162 	orr	syndrome, diff, has_nul
163 	cbnz	syndrome, .Lcal_cmpresult
164 
165 	/*The second part process*/
166 	ldr	data1, [src1], #8
167 	ldr	data2, [src2], #8
168 	sub	tmp1, data1, zeroones
169 	orr	tmp2, data1, #REP8_7f
170 	bic	has_nul, tmp1, tmp2
171 	eor	diff, data1, data2  /* Non-zero if differences found.  */
172 	orr	syndrome, diff, has_nul
173 	cbz	syndrome, .Lloopcmp_proc
174 
175 .Lcal_cmpresult:
176 	/*
177 	* reversed the byte-order as big-endian,then CLZ can find the most
178 	* significant zero bits.
179 	*/
180 CPU_LE( rev	syndrome, syndrome )
181 CPU_LE( rev	data1, data1 )
182 CPU_LE( rev	data2, data2 )
183 
184 	/*
185 	* For big-endian we cannot use the trick with the syndrome value
186 	* as carry-propagation can corrupt the upper bits if the trailing
187 	* bytes in the string contain 0x01.
188 	* However, if there is no NUL byte in the dword, we can generate
189 	* the result directly.  We cannot just subtract the bytes as the
190 	* MSB might be significant.
191 	*/
192 CPU_BE( cbnz	has_nul, 1f )
193 CPU_BE( cmp	data1, data2 )
194 CPU_BE( cset	result, ne )
195 CPU_BE( cneg	result, result, lo )
196 CPU_BE( ret )
197 CPU_BE( 1: )
198 	/*Re-compute the NUL-byte detection, using a byte-reversed value. */
199 CPU_BE(	rev	tmp3, data1 )
200 CPU_BE(	sub	tmp1, tmp3, zeroones )
201 CPU_BE(	orr	tmp2, tmp3, #REP8_7f )
202 CPU_BE(	bic	has_nul, tmp1, tmp2 )
203 CPU_BE(	rev	has_nul, has_nul )
204 CPU_BE(	orr	syndrome, diff, has_nul )
205 
206 	clz	pos, syndrome
207 	/*
208 	* The MS-non-zero bit of the syndrome marks either the first bit
209 	* that is different, or the top bit of the first zero byte.
210 	* Shifting left now will bring the critical information into the
211 	* top bits.
212 	*/
213 	lsl	data1, data1, pos
214 	lsl	data2, data2, pos
215 	/*
216 	* But we need to zero-extend (char is unsigned) the value and then
217 	* perform a signed 32-bit subtraction.
218 	*/
219 	lsr	data1, data1, #56
220 	sub	result, data1, data2, lsr #56
221 	ret
222 SYM_FUNC_END_PI(strcmp)
223 EXPORT_SYMBOL_NOKASAN(strcmp)
224