1bbbf1280Sopenharmony_ci/*
2bbbf1280Sopenharmony_ci * strcmp - compare two strings
3bbbf1280Sopenharmony_ci *
4bbbf1280Sopenharmony_ci * Copyright (c) 2012-2020, Arm Limited.
5bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT
6bbbf1280Sopenharmony_ci */
7bbbf1280Sopenharmony_ci
8bbbf1280Sopenharmony_ci/* Assumptions:
9bbbf1280Sopenharmony_ci *
10bbbf1280Sopenharmony_ci * ARMv8-a, AArch64
11bbbf1280Sopenharmony_ci */
12bbbf1280Sopenharmony_ci
13bbbf1280Sopenharmony_ci#include "../asmdefs.h"
14bbbf1280Sopenharmony_ci
15bbbf1280Sopenharmony_ci#define REP8_01 0x0101010101010101
16bbbf1280Sopenharmony_ci#define REP8_7f 0x7f7f7f7f7f7f7f7f
17bbbf1280Sopenharmony_ci#define REP8_80 0x8080808080808080
18bbbf1280Sopenharmony_ci
19bbbf1280Sopenharmony_ci/* Parameters and result.  */
20bbbf1280Sopenharmony_ci#define src1		x0
21bbbf1280Sopenharmony_ci#define src2		x1
22bbbf1280Sopenharmony_ci#define result		x0
23bbbf1280Sopenharmony_ci
24bbbf1280Sopenharmony_ci/* Internal variables.  */
25bbbf1280Sopenharmony_ci#define data1		x2
26bbbf1280Sopenharmony_ci#define data1w		w2
27bbbf1280Sopenharmony_ci#define data2		x3
28bbbf1280Sopenharmony_ci#define data2w		w3
29bbbf1280Sopenharmony_ci#define has_nul		x4
30bbbf1280Sopenharmony_ci#define diff		x5
31bbbf1280Sopenharmony_ci#define syndrome	x6
32bbbf1280Sopenharmony_ci#define tmp1		x7
33bbbf1280Sopenharmony_ci#define tmp2		x8
34bbbf1280Sopenharmony_ci#define tmp3		x9
35bbbf1280Sopenharmony_ci#define zeroones	x10
36bbbf1280Sopenharmony_ci#define pos		x11
37bbbf1280Sopenharmony_ci
38bbbf1280Sopenharmony_ci	/* Start of performance-critical section  -- one 64B cache line.  */
39bbbf1280Sopenharmony_ciENTRY (__strcmp_aarch64)
40bbbf1280Sopenharmony_ci	PTR_ARG (0)
41bbbf1280Sopenharmony_ci	PTR_ARG (1)
42bbbf1280Sopenharmony_ci	eor	tmp1, src1, src2
43bbbf1280Sopenharmony_ci	mov	zeroones, #REP8_01
44bbbf1280Sopenharmony_ci	tst	tmp1, #7
45bbbf1280Sopenharmony_ci	b.ne	L(misaligned8)
46bbbf1280Sopenharmony_ci	ands	tmp1, src1, #7
47bbbf1280Sopenharmony_ci	b.ne	L(mutual_align)
48bbbf1280Sopenharmony_ci	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
49bbbf1280Sopenharmony_ci	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
50bbbf1280Sopenharmony_ci	   can be done in parallel across the entire word.  */
51bbbf1280Sopenharmony_ciL(loop_aligned):
52bbbf1280Sopenharmony_ci	ldr	data1, [src1], #8
53bbbf1280Sopenharmony_ci	ldr	data2, [src2], #8
54bbbf1280Sopenharmony_ciL(start_realigned):
55bbbf1280Sopenharmony_ci	sub	tmp1, data1, zeroones
56bbbf1280Sopenharmony_ci	orr	tmp2, data1, #REP8_7f
57bbbf1280Sopenharmony_ci	eor	diff, data1, data2	/* Non-zero if differences found.  */
58bbbf1280Sopenharmony_ci	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
59bbbf1280Sopenharmony_ci	orr	syndrome, diff, has_nul
60bbbf1280Sopenharmony_ci	cbz	syndrome, L(loop_aligned)
61bbbf1280Sopenharmony_ci	/* End of performance-critical section  -- one 64B cache line.  */
62bbbf1280Sopenharmony_ci
63bbbf1280Sopenharmony_ciL(end):
64bbbf1280Sopenharmony_ci#ifndef	__AARCH64EB__
65bbbf1280Sopenharmony_ci	rev	syndrome, syndrome
66bbbf1280Sopenharmony_ci	rev	data1, data1
67bbbf1280Sopenharmony_ci	/* The MS-non-zero bit of the syndrome marks either the first bit
68bbbf1280Sopenharmony_ci	   that is different, or the top bit of the first zero byte.
69bbbf1280Sopenharmony_ci	   Shifting left now will bring the critical information into the
70bbbf1280Sopenharmony_ci	   top bits.  */
71bbbf1280Sopenharmony_ci	clz	pos, syndrome
72bbbf1280Sopenharmony_ci	rev	data2, data2
73bbbf1280Sopenharmony_ci	lsl	data1, data1, pos
74bbbf1280Sopenharmony_ci	lsl	data2, data2, pos
75bbbf1280Sopenharmony_ci	/* But we need to zero-extend (char is unsigned) the value and then
76bbbf1280Sopenharmony_ci	   perform a signed 32-bit subtraction.  */
77bbbf1280Sopenharmony_ci	lsr	data1, data1, #56
78bbbf1280Sopenharmony_ci	sub	result, data1, data2, lsr #56
79bbbf1280Sopenharmony_ci	ret
80bbbf1280Sopenharmony_ci#else
81bbbf1280Sopenharmony_ci	/* For big-endian we cannot use the trick with the syndrome value
82bbbf1280Sopenharmony_ci	   as carry-propagation can corrupt the upper bits if the trailing
83bbbf1280Sopenharmony_ci	   bytes in the string contain 0x01.  */
84bbbf1280Sopenharmony_ci	/* However, if there is no NUL byte in the dword, we can generate
85bbbf1280Sopenharmony_ci	   the result directly.  We can't just subtract the bytes as the
86bbbf1280Sopenharmony_ci	   MSB might be significant.  */
87bbbf1280Sopenharmony_ci	cbnz	has_nul, 1f
88bbbf1280Sopenharmony_ci	cmp	data1, data2
89bbbf1280Sopenharmony_ci	cset	result, ne
90bbbf1280Sopenharmony_ci	cneg	result, result, lo
91bbbf1280Sopenharmony_ci	ret
92bbbf1280Sopenharmony_ci1:
93bbbf1280Sopenharmony_ci	/* Re-compute the NUL-byte detection, using a byte-reversed value.  */
94bbbf1280Sopenharmony_ci	rev	tmp3, data1
95bbbf1280Sopenharmony_ci	sub	tmp1, tmp3, zeroones
96bbbf1280Sopenharmony_ci	orr	tmp2, tmp3, #REP8_7f
97bbbf1280Sopenharmony_ci	bic	has_nul, tmp1, tmp2
98bbbf1280Sopenharmony_ci	rev	has_nul, has_nul
99bbbf1280Sopenharmony_ci	orr	syndrome, diff, has_nul
100bbbf1280Sopenharmony_ci	clz	pos, syndrome
101bbbf1280Sopenharmony_ci	/* The MS-non-zero bit of the syndrome marks either the first bit
102bbbf1280Sopenharmony_ci	   that is different, or the top bit of the first zero byte.
103bbbf1280Sopenharmony_ci	   Shifting left now will bring the critical information into the
104bbbf1280Sopenharmony_ci	   top bits.  */
105bbbf1280Sopenharmony_ci	lsl	data1, data1, pos
106bbbf1280Sopenharmony_ci	lsl	data2, data2, pos
107bbbf1280Sopenharmony_ci	/* But we need to zero-extend (char is unsigned) the value and then
108bbbf1280Sopenharmony_ci	   perform a signed 32-bit subtraction.  */
109bbbf1280Sopenharmony_ci	lsr	data1, data1, #56
110bbbf1280Sopenharmony_ci	sub	result, data1, data2, lsr #56
111bbbf1280Sopenharmony_ci	ret
112bbbf1280Sopenharmony_ci#endif
113bbbf1280Sopenharmony_ci
114bbbf1280Sopenharmony_ciL(mutual_align):
115bbbf1280Sopenharmony_ci	/* Sources are mutually aligned, but are not currently at an
116bbbf1280Sopenharmony_ci	   alignment boundary.  Round down the addresses and then mask off
117bbbf1280Sopenharmony_ci	   the bytes that preceed the start point.  */
118bbbf1280Sopenharmony_ci	bic	src1, src1, #7
119bbbf1280Sopenharmony_ci	bic	src2, src2, #7
120bbbf1280Sopenharmony_ci	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
121bbbf1280Sopenharmony_ci	ldr	data1, [src1], #8
122bbbf1280Sopenharmony_ci	neg	tmp1, tmp1		/* Bits to alignment -64.  */
123bbbf1280Sopenharmony_ci	ldr	data2, [src2], #8
124bbbf1280Sopenharmony_ci	mov	tmp2, #~0
125bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__
126bbbf1280Sopenharmony_ci	/* Big-endian.  Early bytes are at MSB.  */
127bbbf1280Sopenharmony_ci	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
128bbbf1280Sopenharmony_ci#else
129bbbf1280Sopenharmony_ci	/* Little-endian.  Early bytes are at LSB.  */
130bbbf1280Sopenharmony_ci	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
131bbbf1280Sopenharmony_ci#endif
132bbbf1280Sopenharmony_ci	orr	data1, data1, tmp2
133bbbf1280Sopenharmony_ci	orr	data2, data2, tmp2
134bbbf1280Sopenharmony_ci	b	L(start_realigned)
135bbbf1280Sopenharmony_ci
136bbbf1280Sopenharmony_ciL(misaligned8):
137bbbf1280Sopenharmony_ci	/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
138bbbf1280Sopenharmony_ci	   checking to make sure that we don't access beyond page boundary in
139bbbf1280Sopenharmony_ci	   SRC2.  */
140bbbf1280Sopenharmony_ci	tst	src1, #7
141bbbf1280Sopenharmony_ci	b.eq	L(loop_misaligned)
142bbbf1280Sopenharmony_ciL(do_misaligned):
143bbbf1280Sopenharmony_ci	ldrb	data1w, [src1], #1
144bbbf1280Sopenharmony_ci	ldrb	data2w, [src2], #1
145bbbf1280Sopenharmony_ci	cmp	data1w, #1
146bbbf1280Sopenharmony_ci	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
147bbbf1280Sopenharmony_ci	b.ne	L(done)
148bbbf1280Sopenharmony_ci	tst	src1, #7
149bbbf1280Sopenharmony_ci	b.ne	L(do_misaligned)
150bbbf1280Sopenharmony_ci
151bbbf1280Sopenharmony_ciL(loop_misaligned):
152bbbf1280Sopenharmony_ci	/* Test if we are within the last dword of the end of a 4K page.  If
153bbbf1280Sopenharmony_ci	   yes then jump back to the misaligned loop to copy a byte at a time.  */
154bbbf1280Sopenharmony_ci	and	tmp1, src2, #0xff8
155bbbf1280Sopenharmony_ci	eor	tmp1, tmp1, #0xff8
156bbbf1280Sopenharmony_ci	cbz	tmp1, L(do_misaligned)
157bbbf1280Sopenharmony_ci	ldr	data1, [src1], #8
158bbbf1280Sopenharmony_ci	ldr	data2, [src2], #8
159bbbf1280Sopenharmony_ci
160bbbf1280Sopenharmony_ci	sub	tmp1, data1, zeroones
161bbbf1280Sopenharmony_ci	orr	tmp2, data1, #REP8_7f
162bbbf1280Sopenharmony_ci	eor	diff, data1, data2	/* Non-zero if differences found.  */
163bbbf1280Sopenharmony_ci	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
164bbbf1280Sopenharmony_ci	orr	syndrome, diff, has_nul
165bbbf1280Sopenharmony_ci	cbz	syndrome, L(loop_misaligned)
166bbbf1280Sopenharmony_ci	b	L(end)
167bbbf1280Sopenharmony_ci
168bbbf1280Sopenharmony_ciL(done):
169bbbf1280Sopenharmony_ci	sub	result, data1, data2
170bbbf1280Sopenharmony_ci	ret
171bbbf1280Sopenharmony_ci
172bbbf1280Sopenharmony_ciEND (__strcmp_aarch64)
173bbbf1280Sopenharmony_ci
174