1bbbf1280Sopenharmony_ci/* 2bbbf1280Sopenharmony_ci * strchr - find a character in a string 3bbbf1280Sopenharmony_ci * 4bbbf1280Sopenharmony_ci * Copyright (c) 2014-2020, Arm Limited. 5bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT 6bbbf1280Sopenharmony_ci */ 7bbbf1280Sopenharmony_ci 8bbbf1280Sopenharmony_ci/* Assumptions: 9bbbf1280Sopenharmony_ci * 10bbbf1280Sopenharmony_ci * ARMv8-a, AArch64 11bbbf1280Sopenharmony_ci * Neon Available. 12bbbf1280Sopenharmony_ci */ 13bbbf1280Sopenharmony_ci 14bbbf1280Sopenharmony_ci#include "../asmdefs.h" 15bbbf1280Sopenharmony_ci 16bbbf1280Sopenharmony_ci/* Arguments and results. */ 17bbbf1280Sopenharmony_ci#define srcin x0 18bbbf1280Sopenharmony_ci#define chrin w1 19bbbf1280Sopenharmony_ci 20bbbf1280Sopenharmony_ci#define result x0 21bbbf1280Sopenharmony_ci 22bbbf1280Sopenharmony_ci#define src x2 23bbbf1280Sopenharmony_ci#define tmp1 x3 24bbbf1280Sopenharmony_ci#define wtmp2 w4 25bbbf1280Sopenharmony_ci#define tmp3 x5 26bbbf1280Sopenharmony_ci 27bbbf1280Sopenharmony_ci#define vrepchr v0 28bbbf1280Sopenharmony_ci#define vdata1 v1 29bbbf1280Sopenharmony_ci#define vdata2 v2 30bbbf1280Sopenharmony_ci#define vhas_nul1 v3 31bbbf1280Sopenharmony_ci#define vhas_nul2 v4 32bbbf1280Sopenharmony_ci#define vhas_chr1 v5 33bbbf1280Sopenharmony_ci#define vhas_chr2 v6 34bbbf1280Sopenharmony_ci#define vrepmask_0 v7 35bbbf1280Sopenharmony_ci#define vrepmask_c v16 36bbbf1280Sopenharmony_ci#define vend1 v17 37bbbf1280Sopenharmony_ci#define vend2 v18 38bbbf1280Sopenharmony_ci 39bbbf1280Sopenharmony_ci/* Core algorithm. 40bbbf1280Sopenharmony_ci 41bbbf1280Sopenharmony_ci For each 32-byte hunk we calculate a 64-bit syndrome value, with 42bbbf1280Sopenharmony_ci two bits per byte (LSB is always in bits 0 and 1, for both big 43bbbf1280Sopenharmony_ci and little-endian systems). For each tuple, bit 0 is set iff 44bbbf1280Sopenharmony_ci the relevant byte matched the requested character; bit 1 is set 45bbbf1280Sopenharmony_ci iff the relevant byte matched the NUL end of string (we trigger 46bbbf1280Sopenharmony_ci off bit0 for the special case of looking for NUL). Since the bits 47bbbf1280Sopenharmony_ci in the syndrome reflect exactly the order in which things occur 48bbbf1280Sopenharmony_ci in the original string a count_trailing_zeros() operation will 49bbbf1280Sopenharmony_ci identify exactly which byte is causing the termination, and why. */ 50bbbf1280Sopenharmony_ci 51bbbf1280Sopenharmony_ci/* Locals and temporaries. */ 52bbbf1280Sopenharmony_ci 53bbbf1280Sopenharmony_ciENTRY (__strchr_aarch64) 54bbbf1280Sopenharmony_ci PTR_ARG (0) 55bbbf1280Sopenharmony_ci /* Magic constant 0xc0300c03 to allow us to identify which lane 56bbbf1280Sopenharmony_ci matches the requested byte. Even bits are set if the character 57bbbf1280Sopenharmony_ci matches, odd bits if either the char is NUL or matches. */ 58bbbf1280Sopenharmony_ci mov wtmp2, 0x0c03 59bbbf1280Sopenharmony_ci movk wtmp2, 0xc030, lsl 16 60bbbf1280Sopenharmony_ci dup vrepchr.16b, chrin 61bbbf1280Sopenharmony_ci bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ 62bbbf1280Sopenharmony_ci dup vrepmask_c.4s, wtmp2 63bbbf1280Sopenharmony_ci ands tmp1, srcin, #31 64bbbf1280Sopenharmony_ci add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ 65bbbf1280Sopenharmony_ci b.eq L(loop) 66bbbf1280Sopenharmony_ci 67bbbf1280Sopenharmony_ci /* Input string is not 32-byte aligned. Rather than forcing 68bbbf1280Sopenharmony_ci the padding bytes to a safe value, we calculate the syndrome 69bbbf1280Sopenharmony_ci for all the bytes, but then mask off those bits of the 70bbbf1280Sopenharmony_ci syndrome that are related to the padding. */ 71bbbf1280Sopenharmony_ci ld1 {vdata1.16b, vdata2.16b}, [src], #32 72bbbf1280Sopenharmony_ci neg tmp1, tmp1 73bbbf1280Sopenharmony_ci cmeq vhas_nul1.16b, vdata1.16b, #0 74bbbf1280Sopenharmony_ci cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 75bbbf1280Sopenharmony_ci cmeq vhas_nul2.16b, vdata2.16b, #0 76bbbf1280Sopenharmony_ci cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 77bbbf1280Sopenharmony_ci bif vhas_nul1.16b, vhas_chr1.16b, vrepmask_0.16b 78bbbf1280Sopenharmony_ci bif vhas_nul2.16b, vhas_chr2.16b, vrepmask_0.16b 79bbbf1280Sopenharmony_ci and vend1.16b, vhas_nul1.16b, vrepmask_c.16b 80bbbf1280Sopenharmony_ci and vend2.16b, vhas_nul2.16b, vrepmask_c.16b 81bbbf1280Sopenharmony_ci lsl tmp1, tmp1, #1 82bbbf1280Sopenharmony_ci addp vend1.16b, vend1.16b, vend2.16b // 256->128 83bbbf1280Sopenharmony_ci mov tmp3, #~0 84bbbf1280Sopenharmony_ci addp vend1.16b, vend1.16b, vend2.16b // 128->64 85bbbf1280Sopenharmony_ci lsr tmp1, tmp3, tmp1 86bbbf1280Sopenharmony_ci 87bbbf1280Sopenharmony_ci mov tmp3, vend1.d[0] 88bbbf1280Sopenharmony_ci bic tmp1, tmp3, tmp1 // Mask padding bits. 89bbbf1280Sopenharmony_ci cbnz tmp1, L(tail) 90bbbf1280Sopenharmony_ci 91bbbf1280Sopenharmony_ci .p2align 4 92bbbf1280Sopenharmony_ciL(loop): 93bbbf1280Sopenharmony_ci ld1 {vdata1.16b, vdata2.16b}, [src], #32 94bbbf1280Sopenharmony_ci cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 95bbbf1280Sopenharmony_ci cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 96bbbf1280Sopenharmony_ci cmhs vhas_nul1.16b, vhas_chr1.16b, vdata1.16b 97bbbf1280Sopenharmony_ci cmhs vhas_nul2.16b, vhas_chr2.16b, vdata2.16b 98bbbf1280Sopenharmony_ci orr vend1.16b, vhas_nul1.16b, vhas_nul2.16b 99bbbf1280Sopenharmony_ci umaxp vend1.16b, vend1.16b, vend1.16b 100bbbf1280Sopenharmony_ci mov tmp1, vend1.d[0] 101bbbf1280Sopenharmony_ci cbz tmp1, L(loop) 102bbbf1280Sopenharmony_ci 103bbbf1280Sopenharmony_ci /* Termination condition found. Now need to establish exactly why 104bbbf1280Sopenharmony_ci we terminated. */ 105bbbf1280Sopenharmony_ci bif vhas_nul1.16b, vhas_chr1.16b, vrepmask_0.16b 106bbbf1280Sopenharmony_ci bif vhas_nul2.16b, vhas_chr2.16b, vrepmask_0.16b 107bbbf1280Sopenharmony_ci and vend1.16b, vhas_nul1.16b, vrepmask_c.16b 108bbbf1280Sopenharmony_ci and vend2.16b, vhas_nul2.16b, vrepmask_c.16b 109bbbf1280Sopenharmony_ci addp vend1.16b, vend1.16b, vend2.16b // 256->128 110bbbf1280Sopenharmony_ci addp vend1.16b, vend1.16b, vend2.16b // 128->64 111bbbf1280Sopenharmony_ci mov tmp1, vend1.d[0] 112bbbf1280Sopenharmony_ciL(tail): 113bbbf1280Sopenharmony_ci /* Count the trailing zeros, by bit reversing... */ 114bbbf1280Sopenharmony_ci rbit tmp1, tmp1 115bbbf1280Sopenharmony_ci /* Re-bias source. */ 116bbbf1280Sopenharmony_ci sub src, src, #32 117bbbf1280Sopenharmony_ci clz tmp1, tmp1 /* And counting the leading zeros. */ 118bbbf1280Sopenharmony_ci /* Tmp1 is even if the target charager was found first. Otherwise 119bbbf1280Sopenharmony_ci we've found the end of string and we weren't looking for NUL. */ 120bbbf1280Sopenharmony_ci tst tmp1, #1 121bbbf1280Sopenharmony_ci add result, src, tmp1, lsr #1 122bbbf1280Sopenharmony_ci csel result, result, xzr, eq 123bbbf1280Sopenharmony_ci ret 124bbbf1280Sopenharmony_ci 125bbbf1280Sopenharmony_ciEND (__strchr_aarch64) 126bbbf1280Sopenharmony_ci 127