1bbbf1280Sopenharmony_ci/*
2bbbf1280Sopenharmony_ci * strcmp - compare two strings
3bbbf1280Sopenharmony_ci *
4bbbf1280Sopenharmony_ci * Copyright (c) 2012-2020, Arm Limited.
5bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT
6bbbf1280Sopenharmony_ci */
7bbbf1280Sopenharmony_ci
8bbbf1280Sopenharmony_ci
9bbbf1280Sopenharmony_ci/* Assumptions:
10bbbf1280Sopenharmony_ci *
11bbbf1280Sopenharmony_ci * ARMv8-a, AArch64.
12bbbf1280Sopenharmony_ci * MTE compatible.
13bbbf1280Sopenharmony_ci */
14bbbf1280Sopenharmony_ci
15bbbf1280Sopenharmony_ci#include "../asmdefs.h"
16bbbf1280Sopenharmony_ci
17bbbf1280Sopenharmony_ci#define REP8_01 0x0101010101010101
18bbbf1280Sopenharmony_ci#define REP8_7f 0x7f7f7f7f7f7f7f7f
19bbbf1280Sopenharmony_ci
20bbbf1280Sopenharmony_ci#define src1		x0
21bbbf1280Sopenharmony_ci#define src2		x1
22bbbf1280Sopenharmony_ci#define result		x0
23bbbf1280Sopenharmony_ci
24bbbf1280Sopenharmony_ci#define data1		x2
25bbbf1280Sopenharmony_ci#define data1w		w2
26bbbf1280Sopenharmony_ci#define data2		x3
27bbbf1280Sopenharmony_ci#define data2w		w3
28bbbf1280Sopenharmony_ci#define has_nul		x4
29bbbf1280Sopenharmony_ci#define diff		x5
30bbbf1280Sopenharmony_ci#define off1		x5
31bbbf1280Sopenharmony_ci#define syndrome	x6
32bbbf1280Sopenharmony_ci#define tmp		x6
33bbbf1280Sopenharmony_ci#define data3		x7
34bbbf1280Sopenharmony_ci#define zeroones	x8
35bbbf1280Sopenharmony_ci#define shift		x9
36bbbf1280Sopenharmony_ci#define off2		x10
37bbbf1280Sopenharmony_ci
38bbbf1280Sopenharmony_ci/* On big-endian early bytes are at MSB and on little-endian LSB.
39bbbf1280Sopenharmony_ci   LS_FW means shifting towards early bytes.  */
40bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__
41bbbf1280Sopenharmony_ci# define LS_FW lsl
42bbbf1280Sopenharmony_ci#else
43bbbf1280Sopenharmony_ci# define LS_FW lsr
44bbbf1280Sopenharmony_ci#endif
45bbbf1280Sopenharmony_ci
46bbbf1280Sopenharmony_ci/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
47bbbf1280Sopenharmony_ci   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
48bbbf1280Sopenharmony_ci   can be done in parallel across the entire word.
49bbbf1280Sopenharmony_ci   Since carry propagation makes 0x1 bytes before a NUL byte appear
50bbbf1280Sopenharmony_ci   NUL too in big-endian, byte-reverse the data before the NUL check.  */
51bbbf1280Sopenharmony_ci
52bbbf1280Sopenharmony_ci
53bbbf1280Sopenharmony_ciENTRY (__strcmp_aarch64_mte)
54bbbf1280Sopenharmony_ci	PTR_ARG (0)
55bbbf1280Sopenharmony_ci	PTR_ARG (1)
56bbbf1280Sopenharmony_ci	sub	off2, src2, src1
57bbbf1280Sopenharmony_ci	mov	zeroones, REP8_01
58bbbf1280Sopenharmony_ci	and	tmp, src1, 7
59bbbf1280Sopenharmony_ci	tst	off2, 7
60bbbf1280Sopenharmony_ci	b.ne	L(misaligned8)
61bbbf1280Sopenharmony_ci	cbnz	tmp, L(mutual_align)
62bbbf1280Sopenharmony_ci
63bbbf1280Sopenharmony_ci	.p2align 4
64bbbf1280Sopenharmony_ci
65bbbf1280Sopenharmony_ciL(loop_aligned):
66bbbf1280Sopenharmony_ci	ldr	data2, [src1, off2]
67bbbf1280Sopenharmony_ci	ldr	data1, [src1], 8
68bbbf1280Sopenharmony_ciL(start_realigned):
69bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__
70bbbf1280Sopenharmony_ci	rev	tmp, data1
71bbbf1280Sopenharmony_ci	sub	has_nul, tmp, zeroones
72bbbf1280Sopenharmony_ci	orr	tmp, tmp, REP8_7f
73bbbf1280Sopenharmony_ci#else
74bbbf1280Sopenharmony_ci	sub	has_nul, data1, zeroones
75bbbf1280Sopenharmony_ci	orr	tmp, data1, REP8_7f
76bbbf1280Sopenharmony_ci#endif
77bbbf1280Sopenharmony_ci	bics	has_nul, has_nul, tmp	/* Non-zero if NUL terminator.  */
78bbbf1280Sopenharmony_ci	ccmp	data1, data2, 0, eq
79bbbf1280Sopenharmony_ci	b.eq	L(loop_aligned)
80bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__
81bbbf1280Sopenharmony_ci	rev	has_nul, has_nul
82bbbf1280Sopenharmony_ci#endif
83bbbf1280Sopenharmony_ci	eor	diff, data1, data2
84bbbf1280Sopenharmony_ci	orr	syndrome, diff, has_nul
85bbbf1280Sopenharmony_ciL(end):
86bbbf1280Sopenharmony_ci#ifndef __AARCH64EB__
87bbbf1280Sopenharmony_ci	rev	syndrome, syndrome
88bbbf1280Sopenharmony_ci	rev	data1, data1
89bbbf1280Sopenharmony_ci	rev	data2, data2
90bbbf1280Sopenharmony_ci#endif
91bbbf1280Sopenharmony_ci	clz	shift, syndrome
92bbbf1280Sopenharmony_ci	/* The most-significant-non-zero bit of the syndrome marks either the
93bbbf1280Sopenharmony_ci	   first bit that is different, or the top bit of the first zero byte.
94bbbf1280Sopenharmony_ci	   Shifting left now will bring the critical information into the
95bbbf1280Sopenharmony_ci	   top bits.  */
96bbbf1280Sopenharmony_ci	lsl	data1, data1, shift
97bbbf1280Sopenharmony_ci	lsl	data2, data2, shift
98bbbf1280Sopenharmony_ci	/* But we need to zero-extend (char is unsigned) the value and then
99bbbf1280Sopenharmony_ci	   perform a signed 32-bit subtraction.  */
100bbbf1280Sopenharmony_ci	lsr	data1, data1, 56
101bbbf1280Sopenharmony_ci	sub	result, data1, data2, lsr 56
102bbbf1280Sopenharmony_ci	ret
103bbbf1280Sopenharmony_ci
104bbbf1280Sopenharmony_ci	.p2align 4
105bbbf1280Sopenharmony_ci
106bbbf1280Sopenharmony_ciL(mutual_align):
107bbbf1280Sopenharmony_ci	/* Sources are mutually aligned, but are not currently at an
108bbbf1280Sopenharmony_ci	   alignment boundary.  Round down the addresses and then mask off
109bbbf1280Sopenharmony_ci	   the bytes that precede the start point.  */
110bbbf1280Sopenharmony_ci	bic	src1, src1, 7
111bbbf1280Sopenharmony_ci	ldr	data2, [src1, off2]
112bbbf1280Sopenharmony_ci	ldr	data1, [src1], 8
113bbbf1280Sopenharmony_ci	neg	shift, src2, lsl 3	/* Bits to alignment -64.  */
114bbbf1280Sopenharmony_ci	mov	tmp, -1
115bbbf1280Sopenharmony_ci	LS_FW	tmp, tmp, shift
116bbbf1280Sopenharmony_ci	orr	data1, data1, tmp
117bbbf1280Sopenharmony_ci	orr	data2, data2, tmp
118bbbf1280Sopenharmony_ci	b	L(start_realigned)
119bbbf1280Sopenharmony_ci
120bbbf1280Sopenharmony_ciL(misaligned8):
121bbbf1280Sopenharmony_ci	/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
122bbbf1280Sopenharmony_ci	   checking to make sure that we don't access beyond the end of SRC2.  */
123bbbf1280Sopenharmony_ci	cbz	tmp, L(src1_aligned)
124bbbf1280Sopenharmony_ciL(do_misaligned):
125bbbf1280Sopenharmony_ci	ldrb	data1w, [src1], 1
126bbbf1280Sopenharmony_ci	ldrb	data2w, [src2], 1
127bbbf1280Sopenharmony_ci	cmp	data1w, 0
128bbbf1280Sopenharmony_ci	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
129bbbf1280Sopenharmony_ci	b.ne	L(done)
130bbbf1280Sopenharmony_ci	tst	src1, 7
131bbbf1280Sopenharmony_ci	b.ne	L(do_misaligned)
132bbbf1280Sopenharmony_ci
133bbbf1280Sopenharmony_ciL(src1_aligned):
134bbbf1280Sopenharmony_ci	neg	shift, src2, lsl 3
135bbbf1280Sopenharmony_ci	bic	src2, src2, 7
136bbbf1280Sopenharmony_ci	ldr	data3, [src2], 8
137bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__
138bbbf1280Sopenharmony_ci	rev	data3, data3
139bbbf1280Sopenharmony_ci#endif
140bbbf1280Sopenharmony_ci	lsr	tmp, zeroones, shift
141bbbf1280Sopenharmony_ci	orr	data3, data3, tmp
142bbbf1280Sopenharmony_ci	sub	has_nul, data3, zeroones
143bbbf1280Sopenharmony_ci	orr	tmp, data3, REP8_7f
144bbbf1280Sopenharmony_ci	bics	has_nul, has_nul, tmp
145bbbf1280Sopenharmony_ci	b.ne	L(tail)
146bbbf1280Sopenharmony_ci
147bbbf1280Sopenharmony_ci	sub	off1, src2, src1
148bbbf1280Sopenharmony_ci
149bbbf1280Sopenharmony_ci	.p2align 4
150bbbf1280Sopenharmony_ci
151bbbf1280Sopenharmony_ciL(loop_unaligned):
152bbbf1280Sopenharmony_ci	ldr	data3, [src1, off1]
153bbbf1280Sopenharmony_ci	ldr	data2, [src1, off2]
154bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__
155bbbf1280Sopenharmony_ci	rev	data3, data3
156bbbf1280Sopenharmony_ci#endif
157bbbf1280Sopenharmony_ci	sub	has_nul, data3, zeroones
158bbbf1280Sopenharmony_ci	orr	tmp, data3, REP8_7f
159bbbf1280Sopenharmony_ci	ldr	data1, [src1], 8
160bbbf1280Sopenharmony_ci	bics	has_nul, has_nul, tmp
161bbbf1280Sopenharmony_ci	ccmp	data1, data2, 0, eq
162bbbf1280Sopenharmony_ci	b.eq	L(loop_unaligned)
163bbbf1280Sopenharmony_ci
164bbbf1280Sopenharmony_ci	lsl	tmp, has_nul, shift
165bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__
166bbbf1280Sopenharmony_ci	rev	tmp, tmp
167bbbf1280Sopenharmony_ci#endif
168bbbf1280Sopenharmony_ci	eor	diff, data1, data2
169bbbf1280Sopenharmony_ci	orr	syndrome, diff, tmp
170bbbf1280Sopenharmony_ci	cbnz	syndrome, L(end)
171bbbf1280Sopenharmony_ciL(tail):
172bbbf1280Sopenharmony_ci	ldr	data1, [src1]
173bbbf1280Sopenharmony_ci	neg	shift, shift
174bbbf1280Sopenharmony_ci	lsr	data2, data3, shift
175bbbf1280Sopenharmony_ci	lsr	has_nul, has_nul, shift
176bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__
177bbbf1280Sopenharmony_ci	rev     data2, data2
178bbbf1280Sopenharmony_ci	rev	has_nul, has_nul
179bbbf1280Sopenharmony_ci#endif
180bbbf1280Sopenharmony_ci	eor	diff, data1, data2
181bbbf1280Sopenharmony_ci	orr	syndrome, diff, has_nul
182bbbf1280Sopenharmony_ci	b	L(end)
183bbbf1280Sopenharmony_ci
184bbbf1280Sopenharmony_ciL(done):
185bbbf1280Sopenharmony_ci	sub	result, data1, data2
186bbbf1280Sopenharmony_ci	ret
187bbbf1280Sopenharmony_ci
188bbbf1280Sopenharmony_ciEND (__strcmp_aarch64_mte)
189bbbf1280Sopenharmony_ci
190