1bbbf1280Sopenharmony_ci/*
2bbbf1280Sopenharmony_ci * strchr - find a character in a string
3bbbf1280Sopenharmony_ci *
4bbbf1280Sopenharmony_ci * Copyright (c) 2014-2020, Arm Limited.
5bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT
6bbbf1280Sopenharmony_ci */
7bbbf1280Sopenharmony_ci
8bbbf1280Sopenharmony_ci/* Assumptions:
9bbbf1280Sopenharmony_ci *
10bbbf1280Sopenharmony_ci * ARMv8-a, AArch64
11bbbf1280Sopenharmony_ci * Neon Available.
12bbbf1280Sopenharmony_ci */
13bbbf1280Sopenharmony_ci
14bbbf1280Sopenharmony_ci#include "../asmdefs.h"
15bbbf1280Sopenharmony_ci
16bbbf1280Sopenharmony_ci/* Arguments and results.  */
17bbbf1280Sopenharmony_ci#define srcin		x0
18bbbf1280Sopenharmony_ci#define chrin		w1
19bbbf1280Sopenharmony_ci
20bbbf1280Sopenharmony_ci#define result		x0
21bbbf1280Sopenharmony_ci
22bbbf1280Sopenharmony_ci#define src		x2
23bbbf1280Sopenharmony_ci#define	tmp1		x3
24bbbf1280Sopenharmony_ci#define wtmp2		w4
25bbbf1280Sopenharmony_ci#define tmp3		x5
26bbbf1280Sopenharmony_ci
27bbbf1280Sopenharmony_ci#define vrepchr		v0
28bbbf1280Sopenharmony_ci#define vdata1		v1
29bbbf1280Sopenharmony_ci#define vdata2		v2
30bbbf1280Sopenharmony_ci#define vhas_nul1	v3
31bbbf1280Sopenharmony_ci#define vhas_nul2	v4
32bbbf1280Sopenharmony_ci#define vhas_chr1	v5
33bbbf1280Sopenharmony_ci#define vhas_chr2	v6
34bbbf1280Sopenharmony_ci#define vrepmask_0	v7
35bbbf1280Sopenharmony_ci#define vrepmask_c	v16
36bbbf1280Sopenharmony_ci#define vend1		v17
37bbbf1280Sopenharmony_ci#define vend2		v18
38bbbf1280Sopenharmony_ci
39bbbf1280Sopenharmony_ci/* Core algorithm.
40bbbf1280Sopenharmony_ci
41bbbf1280Sopenharmony_ci   For each 32-byte hunk we calculate a 64-bit syndrome value, with
42bbbf1280Sopenharmony_ci   two bits per byte (LSB is always in bits 0 and 1, for both big
43bbbf1280Sopenharmony_ci   and little-endian systems).  For each tuple, bit 0 is set iff
44bbbf1280Sopenharmony_ci   the relevant byte matched the requested character; bit 1 is set
45bbbf1280Sopenharmony_ci   iff the relevant byte matched the NUL end of string (we trigger
46bbbf1280Sopenharmony_ci   off bit0 for the special case of looking for NUL).  Since the bits
47bbbf1280Sopenharmony_ci   in the syndrome reflect exactly the order in which things occur
48bbbf1280Sopenharmony_ci   in the original string a count_trailing_zeros() operation will
49bbbf1280Sopenharmony_ci   identify exactly which byte is causing the termination, and why.  */
50bbbf1280Sopenharmony_ci
51bbbf1280Sopenharmony_ci/* Locals and temporaries.  */
52bbbf1280Sopenharmony_ci
53bbbf1280Sopenharmony_ciENTRY (__strchr_aarch64)
54bbbf1280Sopenharmony_ci	PTR_ARG (0)
55bbbf1280Sopenharmony_ci	/* Magic constant 0xc0300c03 to allow us to identify which lane
56bbbf1280Sopenharmony_ci	   matches the requested byte.  Even bits are set if the character
57bbbf1280Sopenharmony_ci	   matches, odd bits if either the char is NUL or matches.  */
58bbbf1280Sopenharmony_ci	mov	wtmp2, 0x0c03
59bbbf1280Sopenharmony_ci	movk	wtmp2, 0xc030, lsl 16
60bbbf1280Sopenharmony_ci	dup	vrepchr.16b, chrin
61bbbf1280Sopenharmony_ci	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */
62bbbf1280Sopenharmony_ci	dup	vrepmask_c.4s, wtmp2
63bbbf1280Sopenharmony_ci	ands	tmp1, srcin, #31
64bbbf1280Sopenharmony_ci	add	vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
65bbbf1280Sopenharmony_ci	b.eq	L(loop)
66bbbf1280Sopenharmony_ci
67bbbf1280Sopenharmony_ci	/* Input string is not 32-byte aligned.  Rather than forcing
68bbbf1280Sopenharmony_ci	   the padding bytes to a safe value, we calculate the syndrome
69bbbf1280Sopenharmony_ci	   for all the bytes, but then mask off those bits of the
70bbbf1280Sopenharmony_ci	   syndrome that are related to the padding.  */
71bbbf1280Sopenharmony_ci	ld1	{vdata1.16b, vdata2.16b}, [src], #32
72bbbf1280Sopenharmony_ci	neg	tmp1, tmp1
73bbbf1280Sopenharmony_ci	cmeq	vhas_nul1.16b, vdata1.16b, #0
74bbbf1280Sopenharmony_ci	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
75bbbf1280Sopenharmony_ci	cmeq	vhas_nul2.16b, vdata2.16b, #0
76bbbf1280Sopenharmony_ci	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
77bbbf1280Sopenharmony_ci	bif	vhas_nul1.16b, vhas_chr1.16b, vrepmask_0.16b
78bbbf1280Sopenharmony_ci	bif	vhas_nul2.16b, vhas_chr2.16b, vrepmask_0.16b
79bbbf1280Sopenharmony_ci	and	vend1.16b, vhas_nul1.16b, vrepmask_c.16b
80bbbf1280Sopenharmony_ci	and	vend2.16b, vhas_nul2.16b, vrepmask_c.16b
81bbbf1280Sopenharmony_ci	lsl	tmp1, tmp1, #1
82bbbf1280Sopenharmony_ci	addp	vend1.16b, vend1.16b, vend2.16b		// 256->128
83bbbf1280Sopenharmony_ci	mov	tmp3, #~0
84bbbf1280Sopenharmony_ci	addp	vend1.16b, vend1.16b, vend2.16b		// 128->64
85bbbf1280Sopenharmony_ci	lsr	tmp1, tmp3, tmp1
86bbbf1280Sopenharmony_ci
87bbbf1280Sopenharmony_ci	mov	tmp3, vend1.d[0]
88bbbf1280Sopenharmony_ci	bic	tmp1, tmp3, tmp1	// Mask padding bits.
89bbbf1280Sopenharmony_ci	cbnz	tmp1, L(tail)
90bbbf1280Sopenharmony_ci
91bbbf1280Sopenharmony_ci	.p2align 4
92bbbf1280Sopenharmony_ciL(loop):
93bbbf1280Sopenharmony_ci	ld1	{vdata1.16b, vdata2.16b}, [src], #32
94bbbf1280Sopenharmony_ci	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
95bbbf1280Sopenharmony_ci	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
96bbbf1280Sopenharmony_ci	cmhs	vhas_nul1.16b, vhas_chr1.16b, vdata1.16b
97bbbf1280Sopenharmony_ci	cmhs	vhas_nul2.16b, vhas_chr2.16b, vdata2.16b
98bbbf1280Sopenharmony_ci	orr	vend1.16b, vhas_nul1.16b, vhas_nul2.16b
99bbbf1280Sopenharmony_ci	umaxp	vend1.16b, vend1.16b, vend1.16b
100bbbf1280Sopenharmony_ci	mov	tmp1, vend1.d[0]
101bbbf1280Sopenharmony_ci	cbz	tmp1, L(loop)
102bbbf1280Sopenharmony_ci
103bbbf1280Sopenharmony_ci	/* Termination condition found.  Now need to establish exactly why
104bbbf1280Sopenharmony_ci	   we terminated.  */
105bbbf1280Sopenharmony_ci	bif	vhas_nul1.16b, vhas_chr1.16b, vrepmask_0.16b
106bbbf1280Sopenharmony_ci	bif	vhas_nul2.16b, vhas_chr2.16b, vrepmask_0.16b
107bbbf1280Sopenharmony_ci	and	vend1.16b, vhas_nul1.16b, vrepmask_c.16b
108bbbf1280Sopenharmony_ci	and	vend2.16b, vhas_nul2.16b, vrepmask_c.16b
109bbbf1280Sopenharmony_ci	addp	vend1.16b, vend1.16b, vend2.16b		// 256->128
110bbbf1280Sopenharmony_ci	addp	vend1.16b, vend1.16b, vend2.16b		// 128->64
111bbbf1280Sopenharmony_ci	mov	tmp1, vend1.d[0]
112bbbf1280Sopenharmony_ciL(tail):
113bbbf1280Sopenharmony_ci	/* Count the trailing zeros, by bit reversing...  */
114bbbf1280Sopenharmony_ci	rbit	tmp1, tmp1
115bbbf1280Sopenharmony_ci	/* Re-bias source.  */
116bbbf1280Sopenharmony_ci	sub	src, src, #32
117bbbf1280Sopenharmony_ci	clz	tmp1, tmp1	/* And counting the leading zeros.  */
118bbbf1280Sopenharmony_ci	/* Tmp1 is even if the target charager was found first.  Otherwise
119bbbf1280Sopenharmony_ci	   we've found the end of string and we weren't looking for NUL.  */
120bbbf1280Sopenharmony_ci	tst	tmp1, #1
121bbbf1280Sopenharmony_ci	add	result, src, tmp1, lsr #1
122bbbf1280Sopenharmony_ci	csel	result, result, xzr, eq
123bbbf1280Sopenharmony_ci	ret
124bbbf1280Sopenharmony_ci
125bbbf1280Sopenharmony_ciEND (__strchr_aarch64)
126bbbf1280Sopenharmony_ci
127