162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2012-2022, Arm Limited. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Adapted from the original at: 662306a36Sopenharmony_ci * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S 762306a36Sopenharmony_ci */ 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci#include <linux/linkage.h> 1062306a36Sopenharmony_ci#include <asm/assembler.h> 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci/* Assumptions: 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * ARMv8-a, AArch64. 1562306a36Sopenharmony_ci * MTE compatible. 1662306a36Sopenharmony_ci */ 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci#define L(label) .L ## label 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#define REP8_01 0x0101010101010101 2162306a36Sopenharmony_ci#define REP8_7f 0x7f7f7f7f7f7f7f7f 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci#define src1 x0 2462306a36Sopenharmony_ci#define src2 x1 2562306a36Sopenharmony_ci#define result x0 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci#define data1 x2 2862306a36Sopenharmony_ci#define data1w w2 2962306a36Sopenharmony_ci#define data2 x3 3062306a36Sopenharmony_ci#define data2w w3 3162306a36Sopenharmony_ci#define has_nul x4 3262306a36Sopenharmony_ci#define diff x5 3362306a36Sopenharmony_ci#define off1 x5 3462306a36Sopenharmony_ci#define syndrome x6 3562306a36Sopenharmony_ci#define tmp x6 3662306a36Sopenharmony_ci#define data3 x7 3762306a36Sopenharmony_ci#define zeroones x8 3862306a36Sopenharmony_ci#define shift x9 3962306a36Sopenharmony_ci#define off2 x10 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci/* On big-endian early bytes are at MSB and on little-endian LSB. 4262306a36Sopenharmony_ci LS_FW means shifting towards early bytes. */ 4362306a36Sopenharmony_ci#ifdef __AARCH64EB__ 4462306a36Sopenharmony_ci# define LS_FW lsl 4562306a36Sopenharmony_ci#else 4662306a36Sopenharmony_ci# define LS_FW lsr 4762306a36Sopenharmony_ci#endif 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci/* NUL detection works on the principle that (X - 1) & (~X) & 0x80 5062306a36Sopenharmony_ci (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 5162306a36Sopenharmony_ci can be done in parallel across the entire word. 5262306a36Sopenharmony_ci Since carry propagation makes 0x1 bytes before a NUL byte appear 5362306a36Sopenharmony_ci NUL too in big-endian, byte-reverse the data before the NUL check. */ 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ciSYM_FUNC_START(__pi_strcmp) 5762306a36Sopenharmony_ci sub off2, src2, src1 5862306a36Sopenharmony_ci mov zeroones, REP8_01 5962306a36Sopenharmony_ci and tmp, src1, 7 6062306a36Sopenharmony_ci tst off2, 7 6162306a36Sopenharmony_ci b.ne L(misaligned8) 6262306a36Sopenharmony_ci cbnz tmp, L(mutual_align) 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci .p2align 4 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ciL(loop_aligned): 6762306a36Sopenharmony_ci ldr data2, [src1, off2] 6862306a36Sopenharmony_ci ldr data1, [src1], 8 6962306a36Sopenharmony_ciL(start_realigned): 7062306a36Sopenharmony_ci#ifdef __AARCH64EB__ 7162306a36Sopenharmony_ci rev tmp, data1 7262306a36Sopenharmony_ci sub has_nul, tmp, zeroones 7362306a36Sopenharmony_ci orr tmp, tmp, REP8_7f 7462306a36Sopenharmony_ci#else 7562306a36Sopenharmony_ci sub has_nul, data1, zeroones 7662306a36Sopenharmony_ci orr tmp, data1, REP8_7f 7762306a36Sopenharmony_ci#endif 7862306a36Sopenharmony_ci bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */ 7962306a36Sopenharmony_ci ccmp data1, data2, 0, eq 8062306a36Sopenharmony_ci b.eq L(loop_aligned) 8162306a36Sopenharmony_ci#ifdef __AARCH64EB__ 8262306a36Sopenharmony_ci rev has_nul, has_nul 8362306a36Sopenharmony_ci#endif 8462306a36Sopenharmony_ci eor diff, data1, data2 8562306a36Sopenharmony_ci orr syndrome, diff, has_nul 8662306a36Sopenharmony_ciL(end): 8762306a36Sopenharmony_ci#ifndef __AARCH64EB__ 8862306a36Sopenharmony_ci rev syndrome, syndrome 8962306a36Sopenharmony_ci rev data1, data1 9062306a36Sopenharmony_ci rev data2, data2 9162306a36Sopenharmony_ci#endif 9262306a36Sopenharmony_ci clz shift, syndrome 9362306a36Sopenharmony_ci /* The most-significant-non-zero bit of the syndrome marks either the 9462306a36Sopenharmony_ci first bit that is different, or the top bit of the first zero byte. 9562306a36Sopenharmony_ci Shifting left now will bring the critical information into the 9662306a36Sopenharmony_ci top bits. */ 9762306a36Sopenharmony_ci lsl data1, data1, shift 9862306a36Sopenharmony_ci lsl data2, data2, shift 9962306a36Sopenharmony_ci /* But we need to zero-extend (char is unsigned) the value and then 10062306a36Sopenharmony_ci perform a signed 32-bit subtraction. */ 10162306a36Sopenharmony_ci lsr data1, data1, 56 10262306a36Sopenharmony_ci sub result, data1, data2, lsr 56 10362306a36Sopenharmony_ci ret 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci .p2align 4 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ciL(mutual_align): 10862306a36Sopenharmony_ci /* Sources are mutually aligned, but are not currently at an 10962306a36Sopenharmony_ci alignment boundary. Round down the addresses and then mask off 11062306a36Sopenharmony_ci the bytes that precede the start point. */ 11162306a36Sopenharmony_ci bic src1, src1, 7 11262306a36Sopenharmony_ci ldr data2, [src1, off2] 11362306a36Sopenharmony_ci ldr data1, [src1], 8 11462306a36Sopenharmony_ci neg shift, src2, lsl 3 /* Bits to alignment -64. */ 11562306a36Sopenharmony_ci mov tmp, -1 11662306a36Sopenharmony_ci LS_FW tmp, tmp, shift 11762306a36Sopenharmony_ci orr data1, data1, tmp 11862306a36Sopenharmony_ci orr data2, data2, tmp 11962306a36Sopenharmony_ci b L(start_realigned) 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ciL(misaligned8): 12262306a36Sopenharmony_ci /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always 12362306a36Sopenharmony_ci checking to make sure that we don't access beyond the end of SRC2. */ 12462306a36Sopenharmony_ci cbz tmp, L(src1_aligned) 12562306a36Sopenharmony_ciL(do_misaligned): 12662306a36Sopenharmony_ci ldrb data1w, [src1], 1 12762306a36Sopenharmony_ci ldrb data2w, [src2], 1 12862306a36Sopenharmony_ci cmp data1w, 0 12962306a36Sopenharmony_ci ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 13062306a36Sopenharmony_ci b.ne L(done) 13162306a36Sopenharmony_ci tst src1, 7 13262306a36Sopenharmony_ci b.ne L(do_misaligned) 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ciL(src1_aligned): 13562306a36Sopenharmony_ci neg shift, src2, lsl 3 13662306a36Sopenharmony_ci bic src2, src2, 7 13762306a36Sopenharmony_ci ldr data3, [src2], 8 13862306a36Sopenharmony_ci#ifdef __AARCH64EB__ 13962306a36Sopenharmony_ci rev data3, data3 14062306a36Sopenharmony_ci#endif 14162306a36Sopenharmony_ci lsr tmp, zeroones, shift 14262306a36Sopenharmony_ci orr data3, data3, tmp 14362306a36Sopenharmony_ci sub has_nul, data3, zeroones 14462306a36Sopenharmony_ci orr tmp, data3, REP8_7f 14562306a36Sopenharmony_ci bics has_nul, has_nul, tmp 14662306a36Sopenharmony_ci b.ne L(tail) 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci sub off1, src2, src1 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci .p2align 4 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ciL(loop_unaligned): 15362306a36Sopenharmony_ci ldr data3, [src1, off1] 15462306a36Sopenharmony_ci ldr data2, [src1, off2] 15562306a36Sopenharmony_ci#ifdef __AARCH64EB__ 15662306a36Sopenharmony_ci rev data3, data3 15762306a36Sopenharmony_ci#endif 15862306a36Sopenharmony_ci sub has_nul, data3, zeroones 15962306a36Sopenharmony_ci orr tmp, data3, REP8_7f 16062306a36Sopenharmony_ci ldr data1, [src1], 8 16162306a36Sopenharmony_ci bics has_nul, has_nul, tmp 16262306a36Sopenharmony_ci ccmp data1, data2, 0, eq 16362306a36Sopenharmony_ci b.eq L(loop_unaligned) 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci lsl tmp, has_nul, shift 16662306a36Sopenharmony_ci#ifdef __AARCH64EB__ 16762306a36Sopenharmony_ci rev tmp, tmp 16862306a36Sopenharmony_ci#endif 16962306a36Sopenharmony_ci eor diff, data1, data2 17062306a36Sopenharmony_ci orr syndrome, diff, tmp 17162306a36Sopenharmony_ci cbnz syndrome, L(end) 17262306a36Sopenharmony_ciL(tail): 17362306a36Sopenharmony_ci ldr data1, [src1] 17462306a36Sopenharmony_ci neg shift, shift 17562306a36Sopenharmony_ci lsr data2, data3, shift 17662306a36Sopenharmony_ci lsr has_nul, has_nul, shift 17762306a36Sopenharmony_ci#ifdef __AARCH64EB__ 17862306a36Sopenharmony_ci rev data2, data2 17962306a36Sopenharmony_ci rev has_nul, has_nul 18062306a36Sopenharmony_ci#endif 18162306a36Sopenharmony_ci eor diff, data1, data2 18262306a36Sopenharmony_ci orr syndrome, diff, has_nul 18362306a36Sopenharmony_ci b L(end) 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ciL(done): 18662306a36Sopenharmony_ci sub result, data1, data2 18762306a36Sopenharmony_ci ret 18862306a36Sopenharmony_ciSYM_FUNC_END(__pi_strcmp) 18962306a36Sopenharmony_ciSYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp) 19062306a36Sopenharmony_ciEXPORT_SYMBOL_NOKASAN(strcmp) 191