1bbbf1280Sopenharmony_ci/* 2bbbf1280Sopenharmony_ci * strchr - find a character in a string 3bbbf1280Sopenharmony_ci * 4bbbf1280Sopenharmony_ci * Copyright (c) 2020, Arm Limited. 5bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT 6bbbf1280Sopenharmony_ci */ 7bbbf1280Sopenharmony_ci 8bbbf1280Sopenharmony_ci/* Assumptions: 9bbbf1280Sopenharmony_ci * 10bbbf1280Sopenharmony_ci * ARMv8-a, AArch64, Advanced SIMD. 11bbbf1280Sopenharmony_ci * MTE compatible. 12bbbf1280Sopenharmony_ci */ 13bbbf1280Sopenharmony_ci 14bbbf1280Sopenharmony_ci#include "../asmdefs.h" 15bbbf1280Sopenharmony_ci 16bbbf1280Sopenharmony_ci#define srcin x0 17bbbf1280Sopenharmony_ci#define chrin w1 18bbbf1280Sopenharmony_ci#define result x0 19bbbf1280Sopenharmony_ci 20bbbf1280Sopenharmony_ci#define src x2 21bbbf1280Sopenharmony_ci#define tmp1 x1 22bbbf1280Sopenharmony_ci#define wtmp2 w3 23bbbf1280Sopenharmony_ci#define tmp3 x3 24bbbf1280Sopenharmony_ci 25bbbf1280Sopenharmony_ci#define vrepchr v0 26bbbf1280Sopenharmony_ci#define vdata v1 27bbbf1280Sopenharmony_ci#define qdata q1 28bbbf1280Sopenharmony_ci#define vhas_nul v2 29bbbf1280Sopenharmony_ci#define vhas_chr v3 30bbbf1280Sopenharmony_ci#define vrepmask v4 31bbbf1280Sopenharmony_ci#define vrepmask2 v5 32bbbf1280Sopenharmony_ci#define vend v6 33bbbf1280Sopenharmony_ci#define dend d6 34bbbf1280Sopenharmony_ci 35bbbf1280Sopenharmony_ci/* Core algorithm. 36bbbf1280Sopenharmony_ci 37bbbf1280Sopenharmony_ci For each 16-byte chunk we calculate a 64-bit syndrome value with four bits 38bbbf1280Sopenharmony_ci per byte. For even bytes, bits 0-1 are set if the relevant byte matched the 39bbbf1280Sopenharmony_ci requested character, bits 2-3 are set if the byte is NUL (or matched), and 40bbbf1280Sopenharmony_ci bits 4-7 are not used and must be zero if none of bits 0-3 are set). Odd 41bbbf1280Sopenharmony_ci bytes set bits 4-7 so that adjacent bytes can be merged. Since the bits 42bbbf1280Sopenharmony_ci in the syndrome reflect the order in which things occur in the original 43bbbf1280Sopenharmony_ci string, counting trailing zeros identifies exactly which byte matched. */ 44bbbf1280Sopenharmony_ci 45bbbf1280Sopenharmony_ciENTRY (__strchr_aarch64_mte) 46bbbf1280Sopenharmony_ci PTR_ARG (0) 47bbbf1280Sopenharmony_ci bic src, srcin, 15 48bbbf1280Sopenharmony_ci dup vrepchr.16b, chrin 49bbbf1280Sopenharmony_ci ld1 {vdata.16b}, [src] 50bbbf1280Sopenharmony_ci mov wtmp2, 0x3003 51bbbf1280Sopenharmony_ci dup vrepmask.8h, wtmp2 52bbbf1280Sopenharmony_ci cmeq vhas_nul.16b, vdata.16b, 0 53bbbf1280Sopenharmony_ci cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 54bbbf1280Sopenharmony_ci mov wtmp2, 0xf00f 55bbbf1280Sopenharmony_ci dup vrepmask2.8h, wtmp2 56bbbf1280Sopenharmony_ci 57bbbf1280Sopenharmony_ci bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b 58bbbf1280Sopenharmony_ci and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b 59bbbf1280Sopenharmony_ci lsl tmp3, srcin, 2 60bbbf1280Sopenharmony_ci addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ 61bbbf1280Sopenharmony_ci 62bbbf1280Sopenharmony_ci fmov tmp1, dend 63bbbf1280Sopenharmony_ci lsr tmp1, tmp1, tmp3 64bbbf1280Sopenharmony_ci cbz tmp1, L(loop) 65bbbf1280Sopenharmony_ci 66bbbf1280Sopenharmony_ci rbit tmp1, tmp1 67bbbf1280Sopenharmony_ci clz tmp1, tmp1 68bbbf1280Sopenharmony_ci /* Tmp1 is an even multiple of 2 if the target character was 69bbbf1280Sopenharmony_ci found first. Otherwise we've found the end of string. */ 70bbbf1280Sopenharmony_ci tst tmp1, 2 71bbbf1280Sopenharmony_ci add result, srcin, tmp1, lsr 2 72bbbf1280Sopenharmony_ci csel result, result, xzr, eq 73bbbf1280Sopenharmony_ci ret 74bbbf1280Sopenharmony_ci 75bbbf1280Sopenharmony_ci .p2align 4 76bbbf1280Sopenharmony_ciL(loop): 77bbbf1280Sopenharmony_ci ldr qdata, [src, 16]! 78bbbf1280Sopenharmony_ci cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 79bbbf1280Sopenharmony_ci cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b 80bbbf1280Sopenharmony_ci umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 81bbbf1280Sopenharmony_ci fmov tmp1, dend 82bbbf1280Sopenharmony_ci cbz tmp1, L(loop) 83bbbf1280Sopenharmony_ci 84bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__ 85bbbf1280Sopenharmony_ci bif vhas_nul.16b, vhas_chr.16b, vrepmask.16b 86bbbf1280Sopenharmony_ci and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b 87bbbf1280Sopenharmony_ci addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ 88bbbf1280Sopenharmony_ci fmov tmp1, dend 89bbbf1280Sopenharmony_ci#else 90bbbf1280Sopenharmony_ci bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b 91bbbf1280Sopenharmony_ci and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b 92bbbf1280Sopenharmony_ci addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ 93bbbf1280Sopenharmony_ci fmov tmp1, dend 94bbbf1280Sopenharmony_ci rbit tmp1, tmp1 95bbbf1280Sopenharmony_ci#endif 96bbbf1280Sopenharmony_ci clz tmp1, tmp1 97bbbf1280Sopenharmony_ci /* Tmp1 is an even multiple of 2 if the target character was 98bbbf1280Sopenharmony_ci found first. Otherwise we've found the end of string. */ 99bbbf1280Sopenharmony_ci tst tmp1, 2 100bbbf1280Sopenharmony_ci add result, src, tmp1, lsr 2 101bbbf1280Sopenharmony_ci csel result, result, xzr, eq 102bbbf1280Sopenharmony_ci ret 103bbbf1280Sopenharmony_ci 104bbbf1280Sopenharmony_ciEND (__strchr_aarch64_mte) 105bbbf1280Sopenharmony_ci 106