1bbbf1280Sopenharmony_ci/* memcmp - compare memory
2bbbf1280Sopenharmony_ci *
3bbbf1280Sopenharmony_ci * Copyright (c) 2013-2020, Arm Limited.
4bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT
5bbbf1280Sopenharmony_ci */
6bbbf1280Sopenharmony_ci
7bbbf1280Sopenharmony_ci/* Assumptions:
8bbbf1280Sopenharmony_ci *
9bbbf1280Sopenharmony_ci * ARMv8-a, AArch64, unaligned accesses.
10bbbf1280Sopenharmony_ci */
11bbbf1280Sopenharmony_ci
12bbbf1280Sopenharmony_ci#include "../asmdefs.h"
13bbbf1280Sopenharmony_ci
14bbbf1280Sopenharmony_ci/* Parameters and result.  */
15bbbf1280Sopenharmony_ci#define src1		x0
16bbbf1280Sopenharmony_ci#define src2		x1
17bbbf1280Sopenharmony_ci#define limit		x2
18bbbf1280Sopenharmony_ci#define result		w0
19bbbf1280Sopenharmony_ci
20bbbf1280Sopenharmony_ci/* Internal variables.  */
21bbbf1280Sopenharmony_ci#define data1		x3
22bbbf1280Sopenharmony_ci#define data1w		w3
23bbbf1280Sopenharmony_ci#define data1h		x4
24bbbf1280Sopenharmony_ci#define data2		x5
25bbbf1280Sopenharmony_ci#define data2w		w5
26bbbf1280Sopenharmony_ci#define data2h		x6
27bbbf1280Sopenharmony_ci#define tmp1		x7
28bbbf1280Sopenharmony_ci#define tmp2		x8
29bbbf1280Sopenharmony_ci
30bbbf1280Sopenharmony_ciENTRY (__memcmp_aarch64)
31bbbf1280Sopenharmony_ci	PTR_ARG (0)
32bbbf1280Sopenharmony_ci	PTR_ARG (1)
33bbbf1280Sopenharmony_ci	SIZE_ARG (2)
34bbbf1280Sopenharmony_ci	subs	limit, limit, 8
35bbbf1280Sopenharmony_ci	b.lo	L(less8)
36bbbf1280Sopenharmony_ci
37bbbf1280Sopenharmony_ci	ldr	data1, [src1], 8
38bbbf1280Sopenharmony_ci	ldr	data2, [src2], 8
39bbbf1280Sopenharmony_ci	cmp	data1, data2
40bbbf1280Sopenharmony_ci	b.ne	L(return)
41bbbf1280Sopenharmony_ci
42bbbf1280Sopenharmony_ci	subs	limit, limit, 8
43bbbf1280Sopenharmony_ci	b.gt	L(more16)
44bbbf1280Sopenharmony_ci
45bbbf1280Sopenharmony_ci	ldr	data1, [src1, limit]
46bbbf1280Sopenharmony_ci	ldr	data2, [src2, limit]
47bbbf1280Sopenharmony_ci	b	L(return)
48bbbf1280Sopenharmony_ci
49bbbf1280Sopenharmony_ciL(more16):
50bbbf1280Sopenharmony_ci	ldr	data1, [src1], 8
51bbbf1280Sopenharmony_ci	ldr	data2, [src2], 8
52bbbf1280Sopenharmony_ci	cmp	data1, data2
53bbbf1280Sopenharmony_ci	bne	L(return)
54bbbf1280Sopenharmony_ci
55bbbf1280Sopenharmony_ci	/* Jump directly to comparing the last 16 bytes for 32 byte (or less)
56bbbf1280Sopenharmony_ci	   strings.  */
57bbbf1280Sopenharmony_ci	subs	limit, limit, 16
58bbbf1280Sopenharmony_ci	b.ls	L(last_bytes)
59bbbf1280Sopenharmony_ci
60bbbf1280Sopenharmony_ci	/* We overlap loads between 0-32 bytes at either side of SRC1 when we
61bbbf1280Sopenharmony_ci	   try to align, so limit it only to strings larger than 128 bytes.  */
62bbbf1280Sopenharmony_ci	cmp	limit, 96
63bbbf1280Sopenharmony_ci	b.ls	L(loop16)
64bbbf1280Sopenharmony_ci
65bbbf1280Sopenharmony_ci	/* Align src1 and adjust src2 with bytes not yet done.  */
66bbbf1280Sopenharmony_ci	and	tmp1, src1, 15
67bbbf1280Sopenharmony_ci	add	limit, limit, tmp1
68bbbf1280Sopenharmony_ci	sub	src1, src1, tmp1
69bbbf1280Sopenharmony_ci	sub	src2, src2, tmp1
70bbbf1280Sopenharmony_ci
71bbbf1280Sopenharmony_ci	/* Loop performing 16 bytes per iteration using aligned src1.
72bbbf1280Sopenharmony_ci	   Limit is pre-decremented by 16 and must be larger than zero.
73bbbf1280Sopenharmony_ci	   Exit if <= 16 bytes left to do or if the data is not equal.  */
74bbbf1280Sopenharmony_ci	.p2align 4
75bbbf1280Sopenharmony_ciL(loop16):
76bbbf1280Sopenharmony_ci	ldp	data1, data1h, [src1], 16
77bbbf1280Sopenharmony_ci	ldp	data2, data2h, [src2], 16
78bbbf1280Sopenharmony_ci	subs	limit, limit, 16
79bbbf1280Sopenharmony_ci	ccmp	data1, data2, 0, hi
80bbbf1280Sopenharmony_ci	ccmp	data1h, data2h, 0, eq
81bbbf1280Sopenharmony_ci	b.eq	L(loop16)
82bbbf1280Sopenharmony_ci
83bbbf1280Sopenharmony_ci	cmp	data1, data2
84bbbf1280Sopenharmony_ci	bne	L(return)
85bbbf1280Sopenharmony_ci	mov	data1, data1h
86bbbf1280Sopenharmony_ci	mov	data2, data2h
87bbbf1280Sopenharmony_ci	cmp	data1, data2
88bbbf1280Sopenharmony_ci	bne	L(return)
89bbbf1280Sopenharmony_ci
90bbbf1280Sopenharmony_ci	/* Compare last 1-16 bytes using unaligned access.  */
91bbbf1280Sopenharmony_ciL(last_bytes):
92bbbf1280Sopenharmony_ci	add	src1, src1, limit
93bbbf1280Sopenharmony_ci	add	src2, src2, limit
94bbbf1280Sopenharmony_ci	ldp	data1, data1h, [src1]
95bbbf1280Sopenharmony_ci	ldp	data2, data2h, [src2]
96bbbf1280Sopenharmony_ci	cmp     data1, data2
97bbbf1280Sopenharmony_ci	bne	L(return)
98bbbf1280Sopenharmony_ci	mov	data1, data1h
99bbbf1280Sopenharmony_ci	mov	data2, data2h
100bbbf1280Sopenharmony_ci	cmp	data1, data2
101bbbf1280Sopenharmony_ci
102bbbf1280Sopenharmony_ci	/* Compare data bytes and set return value to 0, -1 or 1.  */
103bbbf1280Sopenharmony_ciL(return):
104bbbf1280Sopenharmony_ci#ifndef __AARCH64EB__
105bbbf1280Sopenharmony_ci	rev	data1, data1
106bbbf1280Sopenharmony_ci	rev	data2, data2
107bbbf1280Sopenharmony_ci#endif
108bbbf1280Sopenharmony_ci	cmp     data1, data2
109bbbf1280Sopenharmony_ciL(ret_eq):
110bbbf1280Sopenharmony_ci	cset	result, ne
111bbbf1280Sopenharmony_ci	cneg	result, result, lo
112bbbf1280Sopenharmony_ci	ret
113bbbf1280Sopenharmony_ci
114bbbf1280Sopenharmony_ci	.p2align 4
115bbbf1280Sopenharmony_ci	/* Compare up to 8 bytes.  Limit is [-8..-1].  */
116bbbf1280Sopenharmony_ciL(less8):
117bbbf1280Sopenharmony_ci	adds	limit, limit, 4
118bbbf1280Sopenharmony_ci	b.lo	L(less4)
119bbbf1280Sopenharmony_ci	ldr	data1w, [src1], 4
120bbbf1280Sopenharmony_ci	ldr	data2w, [src2], 4
121bbbf1280Sopenharmony_ci	cmp	data1w, data2w
122bbbf1280Sopenharmony_ci	b.ne	L(return)
123bbbf1280Sopenharmony_ci	sub	limit, limit, 4
124bbbf1280Sopenharmony_ciL(less4):
125bbbf1280Sopenharmony_ci	adds	limit, limit, 4
126bbbf1280Sopenharmony_ci	beq	L(ret_eq)
127bbbf1280Sopenharmony_ciL(byte_loop):
128bbbf1280Sopenharmony_ci	ldrb	data1w, [src1], 1
129bbbf1280Sopenharmony_ci	ldrb	data2w, [src2], 1
130bbbf1280Sopenharmony_ci	subs	limit, limit, 1
131bbbf1280Sopenharmony_ci	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
132bbbf1280Sopenharmony_ci	b.eq	L(byte_loop)
133bbbf1280Sopenharmony_ci	sub	result, data1w, data2w
134bbbf1280Sopenharmony_ci	ret
135bbbf1280Sopenharmony_ci
136bbbf1280Sopenharmony_ciEND (__memcmp_aarch64)
137bbbf1280Sopenharmony_ci
138