162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (c) 2013-2021, Arm Limited.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Adapted from the original at:
662306a36Sopenharmony_ci * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#include <linux/linkage.h>
1062306a36Sopenharmony_ci#include <asm/assembler.h>
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci/* Assumptions:
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci * ARMv8-a, AArch64, unaligned accesses.
1562306a36Sopenharmony_ci */
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci#define L(label) .L ## label
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci/* Parameters and result.  */
2062306a36Sopenharmony_ci#define src1		x0
2162306a36Sopenharmony_ci#define src2		x1
2262306a36Sopenharmony_ci#define limit		x2
2362306a36Sopenharmony_ci#define result		w0
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci/* Internal variables.  */
2662306a36Sopenharmony_ci#define data1		x3
2762306a36Sopenharmony_ci#define data1w		w3
2862306a36Sopenharmony_ci#define data1h		x4
2962306a36Sopenharmony_ci#define data2		x5
3062306a36Sopenharmony_ci#define data2w		w5
3162306a36Sopenharmony_ci#define data2h		x6
3262306a36Sopenharmony_ci#define tmp1		x7
3362306a36Sopenharmony_ci#define tmp2		x8
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ciSYM_FUNC_START(__pi_memcmp)
3662306a36Sopenharmony_ci	subs	limit, limit, 8
3762306a36Sopenharmony_ci	b.lo	L(less8)
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci	ldr	data1, [src1], 8
4062306a36Sopenharmony_ci	ldr	data2, [src2], 8
4162306a36Sopenharmony_ci	cmp	data1, data2
4262306a36Sopenharmony_ci	b.ne	L(return)
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	subs	limit, limit, 8
4562306a36Sopenharmony_ci	b.gt	L(more16)
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	ldr	data1, [src1, limit]
4862306a36Sopenharmony_ci	ldr	data2, [src2, limit]
4962306a36Sopenharmony_ci	b	L(return)
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ciL(more16):
5262306a36Sopenharmony_ci	ldr	data1, [src1], 8
5362306a36Sopenharmony_ci	ldr	data2, [src2], 8
5462306a36Sopenharmony_ci	cmp	data1, data2
5562306a36Sopenharmony_ci	bne	L(return)
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci	/* Jump directly to comparing the last 16 bytes for 32 byte (or less)
5862306a36Sopenharmony_ci	   strings.  */
5962306a36Sopenharmony_ci	subs	limit, limit, 16
6062306a36Sopenharmony_ci	b.ls	L(last_bytes)
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci	/* We overlap loads between 0-32 bytes at either side of SRC1 when we
6362306a36Sopenharmony_ci	   try to align, so limit it only to strings larger than 128 bytes.  */
6462306a36Sopenharmony_ci	cmp	limit, 96
6562306a36Sopenharmony_ci	b.ls	L(loop16)
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci	/* Align src1 and adjust src2 with bytes not yet done.  */
6862306a36Sopenharmony_ci	and	tmp1, src1, 15
6962306a36Sopenharmony_ci	add	limit, limit, tmp1
7062306a36Sopenharmony_ci	sub	src1, src1, tmp1
7162306a36Sopenharmony_ci	sub	src2, src2, tmp1
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	/* Loop performing 16 bytes per iteration using aligned src1.
7462306a36Sopenharmony_ci	   Limit is pre-decremented by 16 and must be larger than zero.
7562306a36Sopenharmony_ci	   Exit if <= 16 bytes left to do or if the data is not equal.  */
7662306a36Sopenharmony_ci	.p2align 4
7762306a36Sopenharmony_ciL(loop16):
7862306a36Sopenharmony_ci	ldp	data1, data1h, [src1], 16
7962306a36Sopenharmony_ci	ldp	data2, data2h, [src2], 16
8062306a36Sopenharmony_ci	subs	limit, limit, 16
8162306a36Sopenharmony_ci	ccmp	data1, data2, 0, hi
8262306a36Sopenharmony_ci	ccmp	data1h, data2h, 0, eq
8362306a36Sopenharmony_ci	b.eq	L(loop16)
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	cmp	data1, data2
8662306a36Sopenharmony_ci	bne	L(return)
8762306a36Sopenharmony_ci	mov	data1, data1h
8862306a36Sopenharmony_ci	mov	data2, data2h
8962306a36Sopenharmony_ci	cmp	data1, data2
9062306a36Sopenharmony_ci	bne	L(return)
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci	/* Compare last 1-16 bytes using unaligned access.  */
9362306a36Sopenharmony_ciL(last_bytes):
9462306a36Sopenharmony_ci	add	src1, src1, limit
9562306a36Sopenharmony_ci	add	src2, src2, limit
9662306a36Sopenharmony_ci	ldp	data1, data1h, [src1]
9762306a36Sopenharmony_ci	ldp	data2, data2h, [src2]
9862306a36Sopenharmony_ci	cmp	data1, data2
9962306a36Sopenharmony_ci	bne	L(return)
10062306a36Sopenharmony_ci	mov	data1, data1h
10162306a36Sopenharmony_ci	mov	data2, data2h
10262306a36Sopenharmony_ci	cmp	data1, data2
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci	/* Compare data bytes and set return value to 0, -1 or 1.  */
10562306a36Sopenharmony_ciL(return):
10662306a36Sopenharmony_ci#ifndef __AARCH64EB__
10762306a36Sopenharmony_ci	rev	data1, data1
10862306a36Sopenharmony_ci	rev	data2, data2
10962306a36Sopenharmony_ci#endif
11062306a36Sopenharmony_ci	cmp	data1, data2
11162306a36Sopenharmony_ciL(ret_eq):
11262306a36Sopenharmony_ci	cset	result, ne
11362306a36Sopenharmony_ci	cneg	result, result, lo
11462306a36Sopenharmony_ci	ret
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	.p2align 4
11762306a36Sopenharmony_ci	/* Compare up to 8 bytes.  Limit is [-8..-1].  */
11862306a36Sopenharmony_ciL(less8):
11962306a36Sopenharmony_ci	adds	limit, limit, 4
12062306a36Sopenharmony_ci	b.lo	L(less4)
12162306a36Sopenharmony_ci	ldr	data1w, [src1], 4
12262306a36Sopenharmony_ci	ldr	data2w, [src2], 4
12362306a36Sopenharmony_ci	cmp	data1w, data2w
12462306a36Sopenharmony_ci	b.ne	L(return)
12562306a36Sopenharmony_ci	sub	limit, limit, 4
12662306a36Sopenharmony_ciL(less4):
12762306a36Sopenharmony_ci	adds	limit, limit, 4
12862306a36Sopenharmony_ci	beq	L(ret_eq)
12962306a36Sopenharmony_ciL(byte_loop):
13062306a36Sopenharmony_ci	ldrb	data1w, [src1], 1
13162306a36Sopenharmony_ci	ldrb	data2w, [src2], 1
13262306a36Sopenharmony_ci	subs	limit, limit, 1
13362306a36Sopenharmony_ci	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
13462306a36Sopenharmony_ci	b.eq	L(byte_loop)
13562306a36Sopenharmony_ci	sub	result, data1w, data2w
13662306a36Sopenharmony_ci	ret
13762306a36Sopenharmony_ciSYM_FUNC_END(__pi_memcmp)
13862306a36Sopenharmony_ciSYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp)
13962306a36Sopenharmony_ciEXPORT_SYMBOL_NOKASAN(memcmp)
140