1 /*
2  * strchr - find a character in a string
3  *
4  * Copyright (c) 2020, Arm Limited.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 /* Assumptions:
9  *
10  * ARMv8-a, AArch64, Advanced SIMD.
11  * MTE compatible.
12  */
13 
14 #include "../asmdefs.h"
15 
16 #define srcin		x0
17 #define chrin		w1
18 #define result		x0
19 
20 #define src		x2
21 #define tmp1		x1
22 #define wtmp2		w3
23 #define tmp3		x3
24 
25 #define vrepchr		v0
26 #define vdata		v1
27 #define qdata		q1
28 #define vhas_nul	v2
29 #define vhas_chr	v3
30 #define vrepmask	v4
31 #define vrepmask2	v5
32 #define vend		v6
33 #define dend		d6
34 
35 /* Core algorithm.
36 
37    For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
38    per byte. For even bytes, bits 0-1 are set if the relevant byte matched the
39    requested character, bits 2-3 are set if the byte is NUL (or matched), and
40    bits 4-7 are not used and must be zero if none of bits 0-3 are set). Odd
41    bytes set bits 4-7 so that adjacent bytes can be merged. Since the bits
42    in the syndrome reflect the order in which things occur in the original
43    string, counting trailing zeros identifies exactly which byte matched.  */
44 
45 ENTRY (__strchr_aarch64_mte)
46 	PTR_ARG (0)
47 	bic	src, srcin, 15
48 	dup	vrepchr.16b, chrin
49 	ld1	{vdata.16b}, [src]
50 	mov	wtmp2, 0x3003
51 	dup	vrepmask.8h, wtmp2
52 	cmeq	vhas_nul.16b, vdata.16b, 0
53 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
54 	mov	wtmp2, 0xf00f
55 	dup	vrepmask2.8h, wtmp2
56 
57 	bit	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
58 	and	vhas_nul.16b, vhas_nul.16b, vrepmask2.16b
59 	lsl	tmp3, srcin, 2
60 	addp	vend.16b, vhas_nul.16b, vhas_nul.16b		/* 128->64 */
61 
62 	fmov	tmp1, dend
63 	lsr	tmp1, tmp1, tmp3
64 	cbz	tmp1, L(loop)
65 
66 	rbit	tmp1, tmp1
67 	clz	tmp1, tmp1
68 	/* Tmp1 is an even multiple of 2 if the target character was
69 	   found first. Otherwise we've found the end of string.  */
70 	tst	tmp1, 2
71 	add	result, srcin, tmp1, lsr 2
72 	csel	result, result, xzr, eq
73 	ret
74 
75 	.p2align 4
76 L(loop):
77 	ldr	qdata, [src, 16]!
78 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
79 	cmhs	vhas_nul.16b, vhas_chr.16b, vdata.16b
80 	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
81 	fmov	tmp1, dend
82 	cbz	tmp1, L(loop)
83 
84 #ifdef __AARCH64EB__
85 	bif	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
86 	and	vhas_nul.16b, vhas_nul.16b, vrepmask2.16b
87 	addp	vend.16b, vhas_nul.16b, vhas_nul.16b		/* 128->64 */
88 	fmov	tmp1, dend
89 #else
90 	bit	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
91 	and	vhas_nul.16b, vhas_nul.16b, vrepmask2.16b
92 	addp	vend.16b, vhas_nul.16b, vhas_nul.16b		/* 128->64 */
93 	fmov	tmp1, dend
94 	rbit	tmp1, tmp1
95 #endif
96 	clz	tmp1, tmp1
97 	/* Tmp1 is an even multiple of 2 if the target character was
98 	   found first. Otherwise we've found the end of string.  */
99 	tst	tmp1, 2
100 	add	result, src, tmp1, lsr 2
101 	csel	result, result, xzr, eq
102 	ret
103 
104 END (__strchr_aarch64_mte)
105 
106