1bbbf1280Sopenharmony_ci/* 2bbbf1280Sopenharmony_ci * strcmp - compare two strings 3bbbf1280Sopenharmony_ci * 4bbbf1280Sopenharmony_ci * Copyright (c) 2012-2020, Arm Limited. 5bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT 6bbbf1280Sopenharmony_ci */ 7bbbf1280Sopenharmony_ci 8bbbf1280Sopenharmony_ci/* Assumptions: 9bbbf1280Sopenharmony_ci * 10bbbf1280Sopenharmony_ci * ARMv8-a, AArch64 11bbbf1280Sopenharmony_ci */ 12bbbf1280Sopenharmony_ci 13bbbf1280Sopenharmony_ci#include "../asmdefs.h" 14bbbf1280Sopenharmony_ci 15bbbf1280Sopenharmony_ci#define REP8_01 0x0101010101010101 16bbbf1280Sopenharmony_ci#define REP8_7f 0x7f7f7f7f7f7f7f7f 17bbbf1280Sopenharmony_ci#define REP8_80 0x8080808080808080 18bbbf1280Sopenharmony_ci 19bbbf1280Sopenharmony_ci/* Parameters and result. */ 20bbbf1280Sopenharmony_ci#define src1 x0 21bbbf1280Sopenharmony_ci#define src2 x1 22bbbf1280Sopenharmony_ci#define result x0 23bbbf1280Sopenharmony_ci 24bbbf1280Sopenharmony_ci/* Internal variables. */ 25bbbf1280Sopenharmony_ci#define data1 x2 26bbbf1280Sopenharmony_ci#define data1w w2 27bbbf1280Sopenharmony_ci#define data2 x3 28bbbf1280Sopenharmony_ci#define data2w w3 29bbbf1280Sopenharmony_ci#define has_nul x4 30bbbf1280Sopenharmony_ci#define diff x5 31bbbf1280Sopenharmony_ci#define syndrome x6 32bbbf1280Sopenharmony_ci#define tmp1 x7 33bbbf1280Sopenharmony_ci#define tmp2 x8 34bbbf1280Sopenharmony_ci#define tmp3 x9 35bbbf1280Sopenharmony_ci#define zeroones x10 36bbbf1280Sopenharmony_ci#define pos x11 37bbbf1280Sopenharmony_ci 38bbbf1280Sopenharmony_ci /* Start of performance-critical section -- one 64B cache line. */ 39bbbf1280Sopenharmony_ciENTRY (__strcmp_aarch64) 40bbbf1280Sopenharmony_ci PTR_ARG (0) 41bbbf1280Sopenharmony_ci PTR_ARG (1) 42bbbf1280Sopenharmony_ci eor tmp1, src1, src2 43bbbf1280Sopenharmony_ci mov zeroones, #REP8_01 44bbbf1280Sopenharmony_ci tst tmp1, #7 45bbbf1280Sopenharmony_ci b.ne L(misaligned8) 46bbbf1280Sopenharmony_ci ands tmp1, src1, #7 47bbbf1280Sopenharmony_ci b.ne L(mutual_align) 48bbbf1280Sopenharmony_ci /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 49bbbf1280Sopenharmony_ci (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 50bbbf1280Sopenharmony_ci can be done in parallel across the entire word. */ 51bbbf1280Sopenharmony_ciL(loop_aligned): 52bbbf1280Sopenharmony_ci ldr data1, [src1], #8 53bbbf1280Sopenharmony_ci ldr data2, [src2], #8 54bbbf1280Sopenharmony_ciL(start_realigned): 55bbbf1280Sopenharmony_ci sub tmp1, data1, zeroones 56bbbf1280Sopenharmony_ci orr tmp2, data1, #REP8_7f 57bbbf1280Sopenharmony_ci eor diff, data1, data2 /* Non-zero if differences found. */ 58bbbf1280Sopenharmony_ci bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 59bbbf1280Sopenharmony_ci orr syndrome, diff, has_nul 60bbbf1280Sopenharmony_ci cbz syndrome, L(loop_aligned) 61bbbf1280Sopenharmony_ci /* End of performance-critical section -- one 64B cache line. */ 62bbbf1280Sopenharmony_ci 63bbbf1280Sopenharmony_ciL(end): 64bbbf1280Sopenharmony_ci#ifndef __AARCH64EB__ 65bbbf1280Sopenharmony_ci rev syndrome, syndrome 66bbbf1280Sopenharmony_ci rev data1, data1 67bbbf1280Sopenharmony_ci /* The MS-non-zero bit of the syndrome marks either the first bit 68bbbf1280Sopenharmony_ci that is different, or the top bit of the first zero byte. 69bbbf1280Sopenharmony_ci Shifting left now will bring the critical information into the 70bbbf1280Sopenharmony_ci top bits. */ 71bbbf1280Sopenharmony_ci clz pos, syndrome 72bbbf1280Sopenharmony_ci rev data2, data2 73bbbf1280Sopenharmony_ci lsl data1, data1, pos 74bbbf1280Sopenharmony_ci lsl data2, data2, pos 75bbbf1280Sopenharmony_ci /* But we need to zero-extend (char is unsigned) the value and then 76bbbf1280Sopenharmony_ci perform a signed 32-bit subtraction. */ 77bbbf1280Sopenharmony_ci lsr data1, data1, #56 78bbbf1280Sopenharmony_ci sub result, data1, data2, lsr #56 79bbbf1280Sopenharmony_ci ret 80bbbf1280Sopenharmony_ci#else 81bbbf1280Sopenharmony_ci /* For big-endian we cannot use the trick with the syndrome value 82bbbf1280Sopenharmony_ci as carry-propagation can corrupt the upper bits if the trailing 83bbbf1280Sopenharmony_ci bytes in the string contain 0x01. */ 84bbbf1280Sopenharmony_ci /* However, if there is no NUL byte in the dword, we can generate 85bbbf1280Sopenharmony_ci the result directly. We can't just subtract the bytes as the 86bbbf1280Sopenharmony_ci MSB might be significant. */ 87bbbf1280Sopenharmony_ci cbnz has_nul, 1f 88bbbf1280Sopenharmony_ci cmp data1, data2 89bbbf1280Sopenharmony_ci cset result, ne 90bbbf1280Sopenharmony_ci cneg result, result, lo 91bbbf1280Sopenharmony_ci ret 92bbbf1280Sopenharmony_ci1: 93bbbf1280Sopenharmony_ci /* Re-compute the NUL-byte detection, using a byte-reversed value. */ 94bbbf1280Sopenharmony_ci rev tmp3, data1 95bbbf1280Sopenharmony_ci sub tmp1, tmp3, zeroones 96bbbf1280Sopenharmony_ci orr tmp2, tmp3, #REP8_7f 97bbbf1280Sopenharmony_ci bic has_nul, tmp1, tmp2 98bbbf1280Sopenharmony_ci rev has_nul, has_nul 99bbbf1280Sopenharmony_ci orr syndrome, diff, has_nul 100bbbf1280Sopenharmony_ci clz pos, syndrome 101bbbf1280Sopenharmony_ci /* The MS-non-zero bit of the syndrome marks either the first bit 102bbbf1280Sopenharmony_ci that is different, or the top bit of the first zero byte. 103bbbf1280Sopenharmony_ci Shifting left now will bring the critical information into the 104bbbf1280Sopenharmony_ci top bits. */ 105bbbf1280Sopenharmony_ci lsl data1, data1, pos 106bbbf1280Sopenharmony_ci lsl data2, data2, pos 107bbbf1280Sopenharmony_ci /* But we need to zero-extend (char is unsigned) the value and then 108bbbf1280Sopenharmony_ci perform a signed 32-bit subtraction. */ 109bbbf1280Sopenharmony_ci lsr data1, data1, #56 110bbbf1280Sopenharmony_ci sub result, data1, data2, lsr #56 111bbbf1280Sopenharmony_ci ret 112bbbf1280Sopenharmony_ci#endif 113bbbf1280Sopenharmony_ci 114bbbf1280Sopenharmony_ciL(mutual_align): 115bbbf1280Sopenharmony_ci /* Sources are mutually aligned, but are not currently at an 116bbbf1280Sopenharmony_ci alignment boundary. Round down the addresses and then mask off 117bbbf1280Sopenharmony_ci the bytes that preceed the start point. */ 118bbbf1280Sopenharmony_ci bic src1, src1, #7 119bbbf1280Sopenharmony_ci bic src2, src2, #7 120bbbf1280Sopenharmony_ci lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ 121bbbf1280Sopenharmony_ci ldr data1, [src1], #8 122bbbf1280Sopenharmony_ci neg tmp1, tmp1 /* Bits to alignment -64. */ 123bbbf1280Sopenharmony_ci ldr data2, [src2], #8 124bbbf1280Sopenharmony_ci mov tmp2, #~0 125bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__ 126bbbf1280Sopenharmony_ci /* Big-endian. Early bytes are at MSB. */ 127bbbf1280Sopenharmony_ci lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 128bbbf1280Sopenharmony_ci#else 129bbbf1280Sopenharmony_ci /* Little-endian. Early bytes are at LSB. */ 130bbbf1280Sopenharmony_ci lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 131bbbf1280Sopenharmony_ci#endif 132bbbf1280Sopenharmony_ci orr data1, data1, tmp2 133bbbf1280Sopenharmony_ci orr data2, data2, tmp2 134bbbf1280Sopenharmony_ci b L(start_realigned) 135bbbf1280Sopenharmony_ci 136bbbf1280Sopenharmony_ciL(misaligned8): 137bbbf1280Sopenharmony_ci /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always 138bbbf1280Sopenharmony_ci checking to make sure that we don't access beyond page boundary in 139bbbf1280Sopenharmony_ci SRC2. */ 140bbbf1280Sopenharmony_ci tst src1, #7 141bbbf1280Sopenharmony_ci b.eq L(loop_misaligned) 142bbbf1280Sopenharmony_ciL(do_misaligned): 143bbbf1280Sopenharmony_ci ldrb data1w, [src1], #1 144bbbf1280Sopenharmony_ci ldrb data2w, [src2], #1 145bbbf1280Sopenharmony_ci cmp data1w, #1 146bbbf1280Sopenharmony_ci ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 147bbbf1280Sopenharmony_ci b.ne L(done) 148bbbf1280Sopenharmony_ci tst src1, #7 149bbbf1280Sopenharmony_ci b.ne L(do_misaligned) 150bbbf1280Sopenharmony_ci 151bbbf1280Sopenharmony_ciL(loop_misaligned): 152bbbf1280Sopenharmony_ci /* Test if we are within the last dword of the end of a 4K page. If 153bbbf1280Sopenharmony_ci yes then jump back to the misaligned loop to copy a byte at a time. */ 154bbbf1280Sopenharmony_ci and tmp1, src2, #0xff8 155bbbf1280Sopenharmony_ci eor tmp1, tmp1, #0xff8 156bbbf1280Sopenharmony_ci cbz tmp1, L(do_misaligned) 157bbbf1280Sopenharmony_ci ldr data1, [src1], #8 158bbbf1280Sopenharmony_ci ldr data2, [src2], #8 159bbbf1280Sopenharmony_ci 160bbbf1280Sopenharmony_ci sub tmp1, data1, zeroones 161bbbf1280Sopenharmony_ci orr tmp2, data1, #REP8_7f 162bbbf1280Sopenharmony_ci eor diff, data1, data2 /* Non-zero if differences found. */ 163bbbf1280Sopenharmony_ci bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 164bbbf1280Sopenharmony_ci orr syndrome, diff, has_nul 165bbbf1280Sopenharmony_ci cbz syndrome, L(loop_misaligned) 166bbbf1280Sopenharmony_ci b L(end) 167bbbf1280Sopenharmony_ci 168bbbf1280Sopenharmony_ciL(done): 169bbbf1280Sopenharmony_ci sub result, data1, data2 170bbbf1280Sopenharmony_ci ret 171bbbf1280Sopenharmony_ci 172bbbf1280Sopenharmony_ciEND (__strcmp_aarch64) 173bbbf1280Sopenharmony_ci 174