1 /*
2  * strlen - calculate the length of a string.
3  *
4  * Copyright (c) 2020, Arm Limited.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 /* Assumptions:
9  *
10  * ARMv8-a, AArch64, Advanced SIMD.
11  * MTE compatible.
12  */
13 
14 #include "../asmdefs.h"
15 
16 #define srcin		x0
17 #define result		x0
18 
19 #define src		x1
20 #define	synd		x2
21 #define tmp		x3
22 #define wtmp		w3
23 #define shift		x4
24 
25 #define data		q0
26 #define vdata		v0
27 #define vhas_nul	v1
28 #define vrepmask	v2
29 #define vend		v3
30 #define dend		d3
31 
32 /* Core algorithm:
33 
34    For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
35    per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
36    requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
37    set likewise for odd bytes so that adjacent bytes can be merged. Since the
38    bits in the syndrome reflect the order in which things occur in the original
39    string, counting trailing zeros identifies exactly which byte matched.  */
40 
41 ENTRY (__strlen_aarch64_mte)
42 	PTR_ARG (0)
43 	bic	src, srcin, 15
44 	mov	wtmp, 0xf00f
45 	ld1	{vdata.16b}, [src]
46 	dup	vrepmask.8h, wtmp
47 	cmeq	vhas_nul.16b, vdata.16b, 0
48 	lsl	shift, srcin, 2
49 	and	vhas_nul.16b, vhas_nul.16b, vrepmask.16b
50 	addp	vend.16b, vhas_nul.16b, vhas_nul.16b		/* 128->64 */
51 	fmov	synd, dend
52 	lsr	synd, synd, shift
53 	cbz	synd, L(loop)
54 
55 	rbit	synd, synd
56 	clz	result, synd
57 	lsr	result, result, 2
58 	ret
59 
60 	.p2align 5
61 L(loop):
62 	ldr	data, [src, 16]!
63 	cmeq	vhas_nul.16b, vdata.16b, 0
64 	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
65 	fmov	synd, dend
66 	cbz	synd, L(loop)
67 
68 	and	vhas_nul.16b, vhas_nul.16b, vrepmask.16b
69 	addp	vend.16b, vhas_nul.16b, vhas_nul.16b		/* 128->64 */
70 	sub	result, src, srcin
71 	fmov	synd, dend
72 #ifndef __AARCH64EB__
73 	rbit	synd, synd
74 #endif
75 	clz	tmp, synd
76 	add	result, result, tmp, lsr 2
77 	ret
78 
79 END (__strlen_aarch64_mte)
80 
81