1bbbf1280Sopenharmony_ci/* 2bbbf1280Sopenharmony_ci * strcmp - compare two strings 3bbbf1280Sopenharmony_ci * 4bbbf1280Sopenharmony_ci * Copyright (c) 2012-2020, Arm Limited. 5bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT 6bbbf1280Sopenharmony_ci */ 7bbbf1280Sopenharmony_ci 8bbbf1280Sopenharmony_ci 9bbbf1280Sopenharmony_ci/* Assumptions: 10bbbf1280Sopenharmony_ci * 11bbbf1280Sopenharmony_ci * ARMv8-a, AArch64. 12bbbf1280Sopenharmony_ci * MTE compatible. 13bbbf1280Sopenharmony_ci */ 14bbbf1280Sopenharmony_ci 15bbbf1280Sopenharmony_ci#include "../asmdefs.h" 16bbbf1280Sopenharmony_ci 17bbbf1280Sopenharmony_ci#define REP8_01 0x0101010101010101 18bbbf1280Sopenharmony_ci#define REP8_7f 0x7f7f7f7f7f7f7f7f 19bbbf1280Sopenharmony_ci 20bbbf1280Sopenharmony_ci#define src1 x0 21bbbf1280Sopenharmony_ci#define src2 x1 22bbbf1280Sopenharmony_ci#define result x0 23bbbf1280Sopenharmony_ci 24bbbf1280Sopenharmony_ci#define data1 x2 25bbbf1280Sopenharmony_ci#define data1w w2 26bbbf1280Sopenharmony_ci#define data2 x3 27bbbf1280Sopenharmony_ci#define data2w w3 28bbbf1280Sopenharmony_ci#define has_nul x4 29bbbf1280Sopenharmony_ci#define diff x5 30bbbf1280Sopenharmony_ci#define off1 x5 31bbbf1280Sopenharmony_ci#define syndrome x6 32bbbf1280Sopenharmony_ci#define tmp x6 33bbbf1280Sopenharmony_ci#define data3 x7 34bbbf1280Sopenharmony_ci#define zeroones x8 35bbbf1280Sopenharmony_ci#define shift x9 36bbbf1280Sopenharmony_ci#define off2 x10 37bbbf1280Sopenharmony_ci 38bbbf1280Sopenharmony_ci/* On big-endian early bytes are at MSB and on little-endian LSB. 39bbbf1280Sopenharmony_ci LS_FW means shifting towards early bytes. */ 40bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__ 41bbbf1280Sopenharmony_ci# define LS_FW lsl 42bbbf1280Sopenharmony_ci#else 43bbbf1280Sopenharmony_ci# define LS_FW lsr 44bbbf1280Sopenharmony_ci#endif 45bbbf1280Sopenharmony_ci 46bbbf1280Sopenharmony_ci/* NUL detection works on the principle that (X - 1) & (~X) & 0x80 47bbbf1280Sopenharmony_ci (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 48bbbf1280Sopenharmony_ci can be done in parallel across the entire word. 49bbbf1280Sopenharmony_ci Since carry propagation makes 0x1 bytes before a NUL byte appear 50bbbf1280Sopenharmony_ci NUL too in big-endian, byte-reverse the data before the NUL check. */ 51bbbf1280Sopenharmony_ci 52bbbf1280Sopenharmony_ci 53bbbf1280Sopenharmony_ciENTRY (__strcmp_aarch64_mte) 54bbbf1280Sopenharmony_ci PTR_ARG (0) 55bbbf1280Sopenharmony_ci PTR_ARG (1) 56bbbf1280Sopenharmony_ci sub off2, src2, src1 57bbbf1280Sopenharmony_ci mov zeroones, REP8_01 58bbbf1280Sopenharmony_ci and tmp, src1, 7 59bbbf1280Sopenharmony_ci tst off2, 7 60bbbf1280Sopenharmony_ci b.ne L(misaligned8) 61bbbf1280Sopenharmony_ci cbnz tmp, L(mutual_align) 62bbbf1280Sopenharmony_ci 63bbbf1280Sopenharmony_ci .p2align 4 64bbbf1280Sopenharmony_ci 65bbbf1280Sopenharmony_ciL(loop_aligned): 66bbbf1280Sopenharmony_ci ldr data2, [src1, off2] 67bbbf1280Sopenharmony_ci ldr data1, [src1], 8 68bbbf1280Sopenharmony_ciL(start_realigned): 69bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__ 70bbbf1280Sopenharmony_ci rev tmp, data1 71bbbf1280Sopenharmony_ci sub has_nul, tmp, zeroones 72bbbf1280Sopenharmony_ci orr tmp, tmp, REP8_7f 73bbbf1280Sopenharmony_ci#else 74bbbf1280Sopenharmony_ci sub has_nul, data1, zeroones 75bbbf1280Sopenharmony_ci orr tmp, data1, REP8_7f 76bbbf1280Sopenharmony_ci#endif 77bbbf1280Sopenharmony_ci bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */ 78bbbf1280Sopenharmony_ci ccmp data1, data2, 0, eq 79bbbf1280Sopenharmony_ci b.eq L(loop_aligned) 80bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__ 81bbbf1280Sopenharmony_ci rev has_nul, has_nul 82bbbf1280Sopenharmony_ci#endif 83bbbf1280Sopenharmony_ci eor diff, data1, data2 84bbbf1280Sopenharmony_ci orr syndrome, diff, has_nul 85bbbf1280Sopenharmony_ciL(end): 86bbbf1280Sopenharmony_ci#ifndef __AARCH64EB__ 87bbbf1280Sopenharmony_ci rev syndrome, syndrome 88bbbf1280Sopenharmony_ci rev data1, data1 89bbbf1280Sopenharmony_ci rev data2, data2 90bbbf1280Sopenharmony_ci#endif 91bbbf1280Sopenharmony_ci clz shift, syndrome 92bbbf1280Sopenharmony_ci /* The most-significant-non-zero bit of the syndrome marks either the 93bbbf1280Sopenharmony_ci first bit that is different, or the top bit of the first zero byte. 94bbbf1280Sopenharmony_ci Shifting left now will bring the critical information into the 95bbbf1280Sopenharmony_ci top bits. */ 96bbbf1280Sopenharmony_ci lsl data1, data1, shift 97bbbf1280Sopenharmony_ci lsl data2, data2, shift 98bbbf1280Sopenharmony_ci /* But we need to zero-extend (char is unsigned) the value and then 99bbbf1280Sopenharmony_ci perform a signed 32-bit subtraction. */ 100bbbf1280Sopenharmony_ci lsr data1, data1, 56 101bbbf1280Sopenharmony_ci sub result, data1, data2, lsr 56 102bbbf1280Sopenharmony_ci ret 103bbbf1280Sopenharmony_ci 104bbbf1280Sopenharmony_ci .p2align 4 105bbbf1280Sopenharmony_ci 106bbbf1280Sopenharmony_ciL(mutual_align): 107bbbf1280Sopenharmony_ci /* Sources are mutually aligned, but are not currently at an 108bbbf1280Sopenharmony_ci alignment boundary. Round down the addresses and then mask off 109bbbf1280Sopenharmony_ci the bytes that precede the start point. */ 110bbbf1280Sopenharmony_ci bic src1, src1, 7 111bbbf1280Sopenharmony_ci ldr data2, [src1, off2] 112bbbf1280Sopenharmony_ci ldr data1, [src1], 8 113bbbf1280Sopenharmony_ci neg shift, src2, lsl 3 /* Bits to alignment -64. */ 114bbbf1280Sopenharmony_ci mov tmp, -1 115bbbf1280Sopenharmony_ci LS_FW tmp, tmp, shift 116bbbf1280Sopenharmony_ci orr data1, data1, tmp 117bbbf1280Sopenharmony_ci orr data2, data2, tmp 118bbbf1280Sopenharmony_ci b L(start_realigned) 119bbbf1280Sopenharmony_ci 120bbbf1280Sopenharmony_ciL(misaligned8): 121bbbf1280Sopenharmony_ci /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always 122bbbf1280Sopenharmony_ci checking to make sure that we don't access beyond the end of SRC2. */ 123bbbf1280Sopenharmony_ci cbz tmp, L(src1_aligned) 124bbbf1280Sopenharmony_ciL(do_misaligned): 125bbbf1280Sopenharmony_ci ldrb data1w, [src1], 1 126bbbf1280Sopenharmony_ci ldrb data2w, [src2], 1 127bbbf1280Sopenharmony_ci cmp data1w, 0 128bbbf1280Sopenharmony_ci ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 129bbbf1280Sopenharmony_ci b.ne L(done) 130bbbf1280Sopenharmony_ci tst src1, 7 131bbbf1280Sopenharmony_ci b.ne L(do_misaligned) 132bbbf1280Sopenharmony_ci 133bbbf1280Sopenharmony_ciL(src1_aligned): 134bbbf1280Sopenharmony_ci neg shift, src2, lsl 3 135bbbf1280Sopenharmony_ci bic src2, src2, 7 136bbbf1280Sopenharmony_ci ldr data3, [src2], 8 137bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__ 138bbbf1280Sopenharmony_ci rev data3, data3 139bbbf1280Sopenharmony_ci#endif 140bbbf1280Sopenharmony_ci lsr tmp, zeroones, shift 141bbbf1280Sopenharmony_ci orr data3, data3, tmp 142bbbf1280Sopenharmony_ci sub has_nul, data3, zeroones 143bbbf1280Sopenharmony_ci orr tmp, data3, REP8_7f 144bbbf1280Sopenharmony_ci bics has_nul, has_nul, tmp 145bbbf1280Sopenharmony_ci b.ne L(tail) 146bbbf1280Sopenharmony_ci 147bbbf1280Sopenharmony_ci sub off1, src2, src1 148bbbf1280Sopenharmony_ci 149bbbf1280Sopenharmony_ci .p2align 4 150bbbf1280Sopenharmony_ci 151bbbf1280Sopenharmony_ciL(loop_unaligned): 152bbbf1280Sopenharmony_ci ldr data3, [src1, off1] 153bbbf1280Sopenharmony_ci ldr data2, [src1, off2] 154bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__ 155bbbf1280Sopenharmony_ci rev data3, data3 156bbbf1280Sopenharmony_ci#endif 157bbbf1280Sopenharmony_ci sub has_nul, data3, zeroones 158bbbf1280Sopenharmony_ci orr tmp, data3, REP8_7f 159bbbf1280Sopenharmony_ci ldr data1, [src1], 8 160bbbf1280Sopenharmony_ci bics has_nul, has_nul, tmp 161bbbf1280Sopenharmony_ci ccmp data1, data2, 0, eq 162bbbf1280Sopenharmony_ci b.eq L(loop_unaligned) 163bbbf1280Sopenharmony_ci 164bbbf1280Sopenharmony_ci lsl tmp, has_nul, shift 165bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__ 166bbbf1280Sopenharmony_ci rev tmp, tmp 167bbbf1280Sopenharmony_ci#endif 168bbbf1280Sopenharmony_ci eor diff, data1, data2 169bbbf1280Sopenharmony_ci orr syndrome, diff, tmp 170bbbf1280Sopenharmony_ci cbnz syndrome, L(end) 171bbbf1280Sopenharmony_ciL(tail): 172bbbf1280Sopenharmony_ci ldr data1, [src1] 173bbbf1280Sopenharmony_ci neg shift, shift 174bbbf1280Sopenharmony_ci lsr data2, data3, shift 175bbbf1280Sopenharmony_ci lsr has_nul, has_nul, shift 176bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__ 177bbbf1280Sopenharmony_ci rev data2, data2 178bbbf1280Sopenharmony_ci rev has_nul, has_nul 179bbbf1280Sopenharmony_ci#endif 180bbbf1280Sopenharmony_ci eor diff, data1, data2 181bbbf1280Sopenharmony_ci orr syndrome, diff, has_nul 182bbbf1280Sopenharmony_ci b L(end) 183bbbf1280Sopenharmony_ci 184bbbf1280Sopenharmony_ciL(done): 185bbbf1280Sopenharmony_ci sub result, data1, data2 186bbbf1280Sopenharmony_ci ret 187bbbf1280Sopenharmony_ci 188bbbf1280Sopenharmony_ciEND (__strcmp_aarch64_mte) 189bbbf1280Sopenharmony_ci 190