162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (c) 2012-2022, Arm Limited.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Adapted from the original at:
662306a36Sopenharmony_ci * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#include <linux/linkage.h>
1062306a36Sopenharmony_ci#include <asm/assembler.h>
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci/* Assumptions:
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci * ARMv8-a, AArch64.
1562306a36Sopenharmony_ci * MTE compatible.
1662306a36Sopenharmony_ci */
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#define L(label) .L ## label
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci#define REP8_01 0x0101010101010101
2162306a36Sopenharmony_ci#define REP8_7f 0x7f7f7f7f7f7f7f7f
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci#define src1		x0
2462306a36Sopenharmony_ci#define src2		x1
2562306a36Sopenharmony_ci#define result		x0
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci#define data1		x2
2862306a36Sopenharmony_ci#define data1w		w2
2962306a36Sopenharmony_ci#define data2		x3
3062306a36Sopenharmony_ci#define data2w		w3
3162306a36Sopenharmony_ci#define has_nul		x4
3262306a36Sopenharmony_ci#define diff		x5
3362306a36Sopenharmony_ci#define off1		x5
3462306a36Sopenharmony_ci#define syndrome	x6
3562306a36Sopenharmony_ci#define tmp		x6
3662306a36Sopenharmony_ci#define data3		x7
3762306a36Sopenharmony_ci#define zeroones	x8
3862306a36Sopenharmony_ci#define shift		x9
3962306a36Sopenharmony_ci#define off2		x10
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci/* On big-endian early bytes are at MSB and on little-endian LSB.
4262306a36Sopenharmony_ci   LS_FW means shifting towards early bytes.  */
4362306a36Sopenharmony_ci#ifdef __AARCH64EB__
4462306a36Sopenharmony_ci# define LS_FW lsl
4562306a36Sopenharmony_ci#else
4662306a36Sopenharmony_ci# define LS_FW lsr
4762306a36Sopenharmony_ci#endif
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
5062306a36Sopenharmony_ci   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
5162306a36Sopenharmony_ci   can be done in parallel across the entire word.
5262306a36Sopenharmony_ci   Since carry propagation makes 0x1 bytes before a NUL byte appear
5362306a36Sopenharmony_ci   NUL too in big-endian, byte-reverse the data before the NUL check.  */
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ciSYM_FUNC_START(__pi_strcmp)
5762306a36Sopenharmony_ci	sub	off2, src2, src1
5862306a36Sopenharmony_ci	mov	zeroones, REP8_01
5962306a36Sopenharmony_ci	and	tmp, src1, 7
6062306a36Sopenharmony_ci	tst	off2, 7
6162306a36Sopenharmony_ci	b.ne	L(misaligned8)
6262306a36Sopenharmony_ci	cbnz	tmp, L(mutual_align)
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci	.p2align 4
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ciL(loop_aligned):
6762306a36Sopenharmony_ci	ldr	data2, [src1, off2]
6862306a36Sopenharmony_ci	ldr	data1, [src1], 8
6962306a36Sopenharmony_ciL(start_realigned):
7062306a36Sopenharmony_ci#ifdef __AARCH64EB__
7162306a36Sopenharmony_ci	rev	tmp, data1
7262306a36Sopenharmony_ci	sub	has_nul, tmp, zeroones
7362306a36Sopenharmony_ci	orr	tmp, tmp, REP8_7f
7462306a36Sopenharmony_ci#else
7562306a36Sopenharmony_ci	sub	has_nul, data1, zeroones
7662306a36Sopenharmony_ci	orr	tmp, data1, REP8_7f
7762306a36Sopenharmony_ci#endif
7862306a36Sopenharmony_ci	bics	has_nul, has_nul, tmp	/* Non-zero if NUL terminator.  */
7962306a36Sopenharmony_ci	ccmp	data1, data2, 0, eq
8062306a36Sopenharmony_ci	b.eq	L(loop_aligned)
8162306a36Sopenharmony_ci#ifdef __AARCH64EB__
8262306a36Sopenharmony_ci	rev	has_nul, has_nul
8362306a36Sopenharmony_ci#endif
8462306a36Sopenharmony_ci	eor	diff, data1, data2
8562306a36Sopenharmony_ci	orr	syndrome, diff, has_nul
8662306a36Sopenharmony_ciL(end):
8762306a36Sopenharmony_ci#ifndef __AARCH64EB__
8862306a36Sopenharmony_ci	rev	syndrome, syndrome
8962306a36Sopenharmony_ci	rev	data1, data1
9062306a36Sopenharmony_ci	rev	data2, data2
9162306a36Sopenharmony_ci#endif
9262306a36Sopenharmony_ci	clz	shift, syndrome
9362306a36Sopenharmony_ci	/* The most-significant-non-zero bit of the syndrome marks either the
9462306a36Sopenharmony_ci	   first bit that is different, or the top bit of the first zero byte.
9562306a36Sopenharmony_ci	   Shifting left now will bring the critical information into the
9662306a36Sopenharmony_ci	   top bits.  */
9762306a36Sopenharmony_ci	lsl	data1, data1, shift
9862306a36Sopenharmony_ci	lsl	data2, data2, shift
9962306a36Sopenharmony_ci	/* But we need to zero-extend (char is unsigned) the value and then
10062306a36Sopenharmony_ci	   perform a signed 32-bit subtraction.  */
10162306a36Sopenharmony_ci	lsr	data1, data1, 56
10262306a36Sopenharmony_ci	sub	result, data1, data2, lsr 56
10362306a36Sopenharmony_ci	ret
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci	.p2align 4
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ciL(mutual_align):
10862306a36Sopenharmony_ci	/* Sources are mutually aligned, but are not currently at an
10962306a36Sopenharmony_ci	   alignment boundary.  Round down the addresses and then mask off
11062306a36Sopenharmony_ci	   the bytes that precede the start point.  */
11162306a36Sopenharmony_ci	bic	src1, src1, 7
11262306a36Sopenharmony_ci	ldr	data2, [src1, off2]
11362306a36Sopenharmony_ci	ldr	data1, [src1], 8
11462306a36Sopenharmony_ci	neg	shift, src2, lsl 3	/* Bits to alignment -64.  */
11562306a36Sopenharmony_ci	mov	tmp, -1
11662306a36Sopenharmony_ci	LS_FW	tmp, tmp, shift
11762306a36Sopenharmony_ci	orr	data1, data1, tmp
11862306a36Sopenharmony_ci	orr	data2, data2, tmp
11962306a36Sopenharmony_ci	b	L(start_realigned)
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ciL(misaligned8):
12262306a36Sopenharmony_ci	/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
12362306a36Sopenharmony_ci	   checking to make sure that we don't access beyond the end of SRC2.  */
12462306a36Sopenharmony_ci	cbz	tmp, L(src1_aligned)
12562306a36Sopenharmony_ciL(do_misaligned):
12662306a36Sopenharmony_ci	ldrb	data1w, [src1], 1
12762306a36Sopenharmony_ci	ldrb	data2w, [src2], 1
12862306a36Sopenharmony_ci	cmp	data1w, 0
12962306a36Sopenharmony_ci	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
13062306a36Sopenharmony_ci	b.ne	L(done)
13162306a36Sopenharmony_ci	tst	src1, 7
13262306a36Sopenharmony_ci	b.ne	L(do_misaligned)
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ciL(src1_aligned):
13562306a36Sopenharmony_ci	neg	shift, src2, lsl 3
13662306a36Sopenharmony_ci	bic	src2, src2, 7
13762306a36Sopenharmony_ci	ldr	data3, [src2], 8
13862306a36Sopenharmony_ci#ifdef __AARCH64EB__
13962306a36Sopenharmony_ci	rev	data3, data3
14062306a36Sopenharmony_ci#endif
14162306a36Sopenharmony_ci	lsr	tmp, zeroones, shift
14262306a36Sopenharmony_ci	orr	data3, data3, tmp
14362306a36Sopenharmony_ci	sub	has_nul, data3, zeroones
14462306a36Sopenharmony_ci	orr	tmp, data3, REP8_7f
14562306a36Sopenharmony_ci	bics	has_nul, has_nul, tmp
14662306a36Sopenharmony_ci	b.ne	L(tail)
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	sub	off1, src2, src1
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	.p2align 4
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ciL(loop_unaligned):
15362306a36Sopenharmony_ci	ldr	data3, [src1, off1]
15462306a36Sopenharmony_ci	ldr	data2, [src1, off2]
15562306a36Sopenharmony_ci#ifdef __AARCH64EB__
15662306a36Sopenharmony_ci	rev	data3, data3
15762306a36Sopenharmony_ci#endif
15862306a36Sopenharmony_ci	sub	has_nul, data3, zeroones
15962306a36Sopenharmony_ci	orr	tmp, data3, REP8_7f
16062306a36Sopenharmony_ci	ldr	data1, [src1], 8
16162306a36Sopenharmony_ci	bics	has_nul, has_nul, tmp
16262306a36Sopenharmony_ci	ccmp	data1, data2, 0, eq
16362306a36Sopenharmony_ci	b.eq	L(loop_unaligned)
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci	lsl	tmp, has_nul, shift
16662306a36Sopenharmony_ci#ifdef __AARCH64EB__
16762306a36Sopenharmony_ci	rev	tmp, tmp
16862306a36Sopenharmony_ci#endif
16962306a36Sopenharmony_ci	eor	diff, data1, data2
17062306a36Sopenharmony_ci	orr	syndrome, diff, tmp
17162306a36Sopenharmony_ci	cbnz	syndrome, L(end)
17262306a36Sopenharmony_ciL(tail):
17362306a36Sopenharmony_ci	ldr	data1, [src1]
17462306a36Sopenharmony_ci	neg	shift, shift
17562306a36Sopenharmony_ci	lsr	data2, data3, shift
17662306a36Sopenharmony_ci	lsr	has_nul, has_nul, shift
17762306a36Sopenharmony_ci#ifdef __AARCH64EB__
17862306a36Sopenharmony_ci	rev     data2, data2
17962306a36Sopenharmony_ci	rev	has_nul, has_nul
18062306a36Sopenharmony_ci#endif
18162306a36Sopenharmony_ci	eor	diff, data1, data2
18262306a36Sopenharmony_ci	orr	syndrome, diff, has_nul
18362306a36Sopenharmony_ci	b	L(end)
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ciL(done):
18662306a36Sopenharmony_ci	sub	result, data1, data2
18762306a36Sopenharmony_ci	ret
18862306a36Sopenharmony_ciSYM_FUNC_END(__pi_strcmp)
18962306a36Sopenharmony_ciSYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp)
19062306a36Sopenharmony_ciEXPORT_SYMBOL_NOKASAN(strcmp)
191