1bbbf1280Sopenharmony_ci/* 2bbbf1280Sopenharmony_ci * strncmp - compare two strings 3bbbf1280Sopenharmony_ci * 4bbbf1280Sopenharmony_ci * Copyright (c) 2013-2021, Arm Limited. 5bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT 6bbbf1280Sopenharmony_ci */ 7bbbf1280Sopenharmony_ci 8bbbf1280Sopenharmony_ci/* Assumptions: 9bbbf1280Sopenharmony_ci * 10bbbf1280Sopenharmony_ci * ARMv8-a, AArch64 11bbbf1280Sopenharmony_ci */ 12bbbf1280Sopenharmony_ci 13bbbf1280Sopenharmony_ci#include "../asmdefs.h" 14bbbf1280Sopenharmony_ci 15bbbf1280Sopenharmony_ci#define REP8_01 0x0101010101010101 16bbbf1280Sopenharmony_ci#define REP8_7f 0x7f7f7f7f7f7f7f7f 17bbbf1280Sopenharmony_ci#define REP8_80 0x8080808080808080 18bbbf1280Sopenharmony_ci 19bbbf1280Sopenharmony_ci/* Parameters and result. */ 20bbbf1280Sopenharmony_ci#define src1 x0 21bbbf1280Sopenharmony_ci#define src2 x1 22bbbf1280Sopenharmony_ci#define limit x2 23bbbf1280Sopenharmony_ci#define result x0 24bbbf1280Sopenharmony_ci 25bbbf1280Sopenharmony_ci/* Internal variables. */ 26bbbf1280Sopenharmony_ci#define data1 x3 27bbbf1280Sopenharmony_ci#define data1w w3 28bbbf1280Sopenharmony_ci#define data2 x4 29bbbf1280Sopenharmony_ci#define data2w w4 30bbbf1280Sopenharmony_ci#define has_nul x5 31bbbf1280Sopenharmony_ci#define diff x6 32bbbf1280Sopenharmony_ci#define syndrome x7 33bbbf1280Sopenharmony_ci#define tmp1 x8 34bbbf1280Sopenharmony_ci#define tmp2 x9 35bbbf1280Sopenharmony_ci#define tmp3 x10 36bbbf1280Sopenharmony_ci#define zeroones x11 37bbbf1280Sopenharmony_ci#define pos x12 38bbbf1280Sopenharmony_ci#define limit_wd x13 39bbbf1280Sopenharmony_ci#define mask x14 40bbbf1280Sopenharmony_ci#define endloop x15 41bbbf1280Sopenharmony_ci#define count mask 42bbbf1280Sopenharmony_ci 43bbbf1280Sopenharmony_ciENTRY (__strncmp_aarch64) 44bbbf1280Sopenharmony_ci PTR_ARG (0) 45bbbf1280Sopenharmony_ci PTR_ARG (1) 46bbbf1280Sopenharmony_ci SIZE_ARG (2) 47bbbf1280Sopenharmony_ci cbz limit, L(ret0) 48bbbf1280Sopenharmony_ci eor tmp1, src1, src2 49bbbf1280Sopenharmony_ci mov zeroones, #REP8_01 50bbbf1280Sopenharmony_ci tst tmp1, #7 51bbbf1280Sopenharmony_ci and count, src1, #7 52bbbf1280Sopenharmony_ci b.ne L(misaligned8) 53bbbf1280Sopenharmony_ci cbnz count, L(mutual_align) 54bbbf1280Sopenharmony_ci /* Calculate the number of full and partial words -1. */ 55bbbf1280Sopenharmony_ci sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ 56bbbf1280Sopenharmony_ci lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ 57bbbf1280Sopenharmony_ci 58bbbf1280Sopenharmony_ci /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 59bbbf1280Sopenharmony_ci (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 60bbbf1280Sopenharmony_ci can be done in parallel across the entire word. */ 61bbbf1280Sopenharmony_ci .p2align 4 62bbbf1280Sopenharmony_ciL(loop_aligned): 63bbbf1280Sopenharmony_ci ldr data1, [src1], #8 64bbbf1280Sopenharmony_ci ldr data2, [src2], #8 65bbbf1280Sopenharmony_ciL(start_realigned): 66bbbf1280Sopenharmony_ci subs limit_wd, limit_wd, #1 67bbbf1280Sopenharmony_ci sub tmp1, data1, zeroones 68bbbf1280Sopenharmony_ci orr tmp2, data1, #REP8_7f 69bbbf1280Sopenharmony_ci eor diff, data1, data2 /* Non-zero if differences found. */ 70bbbf1280Sopenharmony_ci csinv endloop, diff, xzr, pl /* Last Dword or differences. */ 71bbbf1280Sopenharmony_ci bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 72bbbf1280Sopenharmony_ci ccmp endloop, #0, #0, eq 73bbbf1280Sopenharmony_ci b.eq L(loop_aligned) 74bbbf1280Sopenharmony_ci /* End of main loop */ 75bbbf1280Sopenharmony_ci 76bbbf1280Sopenharmony_ci /* Not reached the limit, must have found the end or a diff. */ 77bbbf1280Sopenharmony_ci tbz limit_wd, #63, L(not_limit) 78bbbf1280Sopenharmony_ci 79bbbf1280Sopenharmony_ci /* Limit % 8 == 0 => all bytes significant. */ 80bbbf1280Sopenharmony_ci ands limit, limit, #7 81bbbf1280Sopenharmony_ci b.eq L(not_limit) 82bbbf1280Sopenharmony_ci 83bbbf1280Sopenharmony_ci lsl limit, limit, #3 /* Bits -> bytes. */ 84bbbf1280Sopenharmony_ci mov mask, #~0 85bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__ 86bbbf1280Sopenharmony_ci lsr mask, mask, limit 87bbbf1280Sopenharmony_ci#else 88bbbf1280Sopenharmony_ci lsl mask, mask, limit 89bbbf1280Sopenharmony_ci#endif 90bbbf1280Sopenharmony_ci bic data1, data1, mask 91bbbf1280Sopenharmony_ci bic data2, data2, mask 92bbbf1280Sopenharmony_ci 93bbbf1280Sopenharmony_ci /* Make sure that the NUL byte is marked in the syndrome. */ 94bbbf1280Sopenharmony_ci orr has_nul, has_nul, mask 95bbbf1280Sopenharmony_ci 96bbbf1280Sopenharmony_ciL(not_limit): 97bbbf1280Sopenharmony_ci orr syndrome, diff, has_nul 98bbbf1280Sopenharmony_ci 99bbbf1280Sopenharmony_ci#ifndef __AARCH64EB__ 100bbbf1280Sopenharmony_ci rev syndrome, syndrome 101bbbf1280Sopenharmony_ci rev data1, data1 102bbbf1280Sopenharmony_ci /* The MS-non-zero bit of the syndrome marks either the first bit 103bbbf1280Sopenharmony_ci that is different, or the top bit of the first zero byte. 104bbbf1280Sopenharmony_ci Shifting left now will bring the critical information into the 105bbbf1280Sopenharmony_ci top bits. */ 106bbbf1280Sopenharmony_ci clz pos, syndrome 107bbbf1280Sopenharmony_ci rev data2, data2 108bbbf1280Sopenharmony_ci lsl data1, data1, pos 109bbbf1280Sopenharmony_ci lsl data2, data2, pos 110bbbf1280Sopenharmony_ci /* But we need to zero-extend (char is unsigned) the value and then 111bbbf1280Sopenharmony_ci perform a signed 32-bit subtraction. */ 112bbbf1280Sopenharmony_ci lsr data1, data1, #56 113bbbf1280Sopenharmony_ci sub result, data1, data2, lsr #56 114bbbf1280Sopenharmony_ci ret 115bbbf1280Sopenharmony_ci#else 116bbbf1280Sopenharmony_ci /* For big-endian we cannot use the trick with the syndrome value 117bbbf1280Sopenharmony_ci as carry-propagation can corrupt the upper bits if the trailing 118bbbf1280Sopenharmony_ci bytes in the string contain 0x01. */ 119bbbf1280Sopenharmony_ci /* However, if there is no NUL byte in the dword, we can generate 120bbbf1280Sopenharmony_ci the result directly. We can't just subtract the bytes as the 121bbbf1280Sopenharmony_ci MSB might be significant. */ 122bbbf1280Sopenharmony_ci cbnz has_nul, 1f 123bbbf1280Sopenharmony_ci cmp data1, data2 124bbbf1280Sopenharmony_ci cset result, ne 125bbbf1280Sopenharmony_ci cneg result, result, lo 126bbbf1280Sopenharmony_ci ret 127bbbf1280Sopenharmony_ci1: 128bbbf1280Sopenharmony_ci /* Re-compute the NUL-byte detection, using a byte-reversed value. */ 129bbbf1280Sopenharmony_ci rev tmp3, data1 130bbbf1280Sopenharmony_ci sub tmp1, tmp3, zeroones 131bbbf1280Sopenharmony_ci orr tmp2, tmp3, #REP8_7f 132bbbf1280Sopenharmony_ci bic has_nul, tmp1, tmp2 133bbbf1280Sopenharmony_ci rev has_nul, has_nul 134bbbf1280Sopenharmony_ci orr syndrome, diff, has_nul 135bbbf1280Sopenharmony_ci clz pos, syndrome 136bbbf1280Sopenharmony_ci /* The MS-non-zero bit of the syndrome marks either the first bit 137bbbf1280Sopenharmony_ci that is different, or the top bit of the first zero byte. 138bbbf1280Sopenharmony_ci Shifting left now will bring the critical information into the 139bbbf1280Sopenharmony_ci top bits. */ 140bbbf1280Sopenharmony_ci lsl data1, data1, pos 141bbbf1280Sopenharmony_ci lsl data2, data2, pos 142bbbf1280Sopenharmony_ci /* But we need to zero-extend (char is unsigned) the value and then 143bbbf1280Sopenharmony_ci perform a signed 32-bit subtraction. */ 144bbbf1280Sopenharmony_ci lsr data1, data1, #56 145bbbf1280Sopenharmony_ci sub result, data1, data2, lsr #56 146bbbf1280Sopenharmony_ci ret 147bbbf1280Sopenharmony_ci#endif 148bbbf1280Sopenharmony_ci 149bbbf1280Sopenharmony_ciL(mutual_align): 150bbbf1280Sopenharmony_ci /* Sources are mutually aligned, but are not currently at an 151bbbf1280Sopenharmony_ci alignment boundary. Round down the addresses and then mask off 152bbbf1280Sopenharmony_ci the bytes that precede the start point. 153bbbf1280Sopenharmony_ci We also need to adjust the limit calculations, but without 154bbbf1280Sopenharmony_ci overflowing if the limit is near ULONG_MAX. */ 155bbbf1280Sopenharmony_ci bic src1, src1, #7 156bbbf1280Sopenharmony_ci bic src2, src2, #7 157bbbf1280Sopenharmony_ci ldr data1, [src1], #8 158bbbf1280Sopenharmony_ci neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */ 159bbbf1280Sopenharmony_ci ldr data2, [src2], #8 160bbbf1280Sopenharmony_ci mov tmp2, #~0 161bbbf1280Sopenharmony_ci sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ 162bbbf1280Sopenharmony_ci#ifdef __AARCH64EB__ 163bbbf1280Sopenharmony_ci /* Big-endian. Early bytes are at MSB. */ 164bbbf1280Sopenharmony_ci lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */ 165bbbf1280Sopenharmony_ci#else 166bbbf1280Sopenharmony_ci /* Little-endian. Early bytes are at LSB. */ 167bbbf1280Sopenharmony_ci lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */ 168bbbf1280Sopenharmony_ci#endif 169bbbf1280Sopenharmony_ci and tmp3, limit_wd, #7 170bbbf1280Sopenharmony_ci lsr limit_wd, limit_wd, #3 171bbbf1280Sopenharmony_ci /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */ 172bbbf1280Sopenharmony_ci add limit, limit, count 173bbbf1280Sopenharmony_ci add tmp3, tmp3, count 174bbbf1280Sopenharmony_ci orr data1, data1, tmp2 175bbbf1280Sopenharmony_ci orr data2, data2, tmp2 176bbbf1280Sopenharmony_ci add limit_wd, limit_wd, tmp3, lsr #3 177bbbf1280Sopenharmony_ci b L(start_realigned) 178bbbf1280Sopenharmony_ci 179bbbf1280Sopenharmony_ci .p2align 4 180bbbf1280Sopenharmony_ci /* Don't bother with dwords for up to 16 bytes. */ 181bbbf1280Sopenharmony_ciL(misaligned8): 182bbbf1280Sopenharmony_ci cmp limit, #16 183bbbf1280Sopenharmony_ci b.hs L(try_misaligned_words) 184bbbf1280Sopenharmony_ci 185bbbf1280Sopenharmony_ciL(byte_loop): 186bbbf1280Sopenharmony_ci /* Perhaps we can do better than this. */ 187bbbf1280Sopenharmony_ci ldrb data1w, [src1], #1 188bbbf1280Sopenharmony_ci ldrb data2w, [src2], #1 189bbbf1280Sopenharmony_ci subs limit, limit, #1 190bbbf1280Sopenharmony_ci ccmp data1w, #1, #0, hi /* NZCV = 0b0000. */ 191bbbf1280Sopenharmony_ci ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 192bbbf1280Sopenharmony_ci b.eq L(byte_loop) 193bbbf1280Sopenharmony_ciL(done): 194bbbf1280Sopenharmony_ci sub result, data1, data2 195bbbf1280Sopenharmony_ci ret 196bbbf1280Sopenharmony_ci /* Align the SRC1 to a dword by doing a bytewise compare and then do 197bbbf1280Sopenharmony_ci the dword loop. */ 198bbbf1280Sopenharmony_ciL(try_misaligned_words): 199bbbf1280Sopenharmony_ci lsr limit_wd, limit, #3 200bbbf1280Sopenharmony_ci cbz count, L(do_misaligned) 201bbbf1280Sopenharmony_ci 202bbbf1280Sopenharmony_ci neg count, count 203bbbf1280Sopenharmony_ci and count, count, #7 204bbbf1280Sopenharmony_ci sub limit, limit, count 205bbbf1280Sopenharmony_ci lsr limit_wd, limit, #3 206bbbf1280Sopenharmony_ci 207bbbf1280Sopenharmony_ciL(page_end_loop): 208bbbf1280Sopenharmony_ci ldrb data1w, [src1], #1 209bbbf1280Sopenharmony_ci ldrb data2w, [src2], #1 210bbbf1280Sopenharmony_ci cmp data1w, #1 211bbbf1280Sopenharmony_ci ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 212bbbf1280Sopenharmony_ci b.ne L(done) 213bbbf1280Sopenharmony_ci subs count, count, #1 214bbbf1280Sopenharmony_ci b.hi L(page_end_loop) 215bbbf1280Sopenharmony_ci 216bbbf1280Sopenharmony_ciL(do_misaligned): 217bbbf1280Sopenharmony_ci /* Prepare ourselves for the next page crossing. Unlike the aligned 218bbbf1280Sopenharmony_ci loop, we fetch 1 less dword because we risk crossing bounds on 219bbbf1280Sopenharmony_ci SRC2. */ 220bbbf1280Sopenharmony_ci mov count, #8 221bbbf1280Sopenharmony_ci subs limit_wd, limit_wd, #1 222bbbf1280Sopenharmony_ci b.lo L(done_loop) 223bbbf1280Sopenharmony_ciL(loop_misaligned): 224bbbf1280Sopenharmony_ci and tmp2, src2, #0xff8 225bbbf1280Sopenharmony_ci eor tmp2, tmp2, #0xff8 226bbbf1280Sopenharmony_ci cbz tmp2, L(page_end_loop) 227bbbf1280Sopenharmony_ci 228bbbf1280Sopenharmony_ci ldr data1, [src1], #8 229bbbf1280Sopenharmony_ci ldr data2, [src2], #8 230bbbf1280Sopenharmony_ci sub tmp1, data1, zeroones 231bbbf1280Sopenharmony_ci orr tmp2, data1, #REP8_7f 232bbbf1280Sopenharmony_ci eor diff, data1, data2 /* Non-zero if differences found. */ 233bbbf1280Sopenharmony_ci bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 234bbbf1280Sopenharmony_ci ccmp diff, #0, #0, eq 235bbbf1280Sopenharmony_ci b.ne L(not_limit) 236bbbf1280Sopenharmony_ci subs limit_wd, limit_wd, #1 237bbbf1280Sopenharmony_ci b.pl L(loop_misaligned) 238bbbf1280Sopenharmony_ci 239bbbf1280Sopenharmony_ciL(done_loop): 240bbbf1280Sopenharmony_ci /* We found a difference or a NULL before the limit was reached. */ 241bbbf1280Sopenharmony_ci and limit, limit, #7 242bbbf1280Sopenharmony_ci cbz limit, L(not_limit) 243bbbf1280Sopenharmony_ci /* Read the last word. */ 244bbbf1280Sopenharmony_ci sub src1, src1, 8 245bbbf1280Sopenharmony_ci sub src2, src2, 8 246bbbf1280Sopenharmony_ci ldr data1, [src1, limit] 247bbbf1280Sopenharmony_ci ldr data2, [src2, limit] 248bbbf1280Sopenharmony_ci sub tmp1, data1, zeroones 249bbbf1280Sopenharmony_ci orr tmp2, data1, #REP8_7f 250bbbf1280Sopenharmony_ci eor diff, data1, data2 /* Non-zero if differences found. */ 251bbbf1280Sopenharmony_ci bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 252bbbf1280Sopenharmony_ci ccmp diff, #0, #0, eq 253bbbf1280Sopenharmony_ci b.ne L(not_limit) 254bbbf1280Sopenharmony_ci 255bbbf1280Sopenharmony_ciL(ret0): 256bbbf1280Sopenharmony_ci mov result, #0 257bbbf1280Sopenharmony_ci ret 258bbbf1280Sopenharmony_ci 259bbbf1280Sopenharmony_ciEND ( __strncmp_aarch64) 260bbbf1280Sopenharmony_ci 261