162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2013-2021, Arm Limited. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Adapted from the original at: 662306a36Sopenharmony_ci * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S 762306a36Sopenharmony_ci */ 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci#include <linux/linkage.h> 1062306a36Sopenharmony_ci#include <asm/assembler.h> 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci/* Assumptions: 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * ARMv8-a, AArch64, unaligned accesses. 1562306a36Sopenharmony_ci */ 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#define L(label) .L ## label 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci/* Parameters and result. */ 2062306a36Sopenharmony_ci#define src1 x0 2162306a36Sopenharmony_ci#define src2 x1 2262306a36Sopenharmony_ci#define limit x2 2362306a36Sopenharmony_ci#define result w0 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci/* Internal variables. */ 2662306a36Sopenharmony_ci#define data1 x3 2762306a36Sopenharmony_ci#define data1w w3 2862306a36Sopenharmony_ci#define data1h x4 2962306a36Sopenharmony_ci#define data2 x5 3062306a36Sopenharmony_ci#define data2w w5 3162306a36Sopenharmony_ci#define data2h x6 3262306a36Sopenharmony_ci#define tmp1 x7 3362306a36Sopenharmony_ci#define tmp2 x8 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ciSYM_FUNC_START(__pi_memcmp) 3662306a36Sopenharmony_ci subs limit, limit, 8 3762306a36Sopenharmony_ci b.lo L(less8) 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci ldr data1, [src1], 8 4062306a36Sopenharmony_ci ldr data2, [src2], 8 4162306a36Sopenharmony_ci cmp data1, data2 4262306a36Sopenharmony_ci b.ne L(return) 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci subs limit, limit, 8 4562306a36Sopenharmony_ci b.gt L(more16) 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci ldr data1, [src1, limit] 4862306a36Sopenharmony_ci ldr data2, [src2, limit] 4962306a36Sopenharmony_ci b L(return) 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ciL(more16): 5262306a36Sopenharmony_ci ldr data1, [src1], 8 5362306a36Sopenharmony_ci ldr data2, [src2], 8 5462306a36Sopenharmony_ci cmp data1, data2 5562306a36Sopenharmony_ci bne L(return) 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci /* Jump directly to comparing the last 16 bytes for 32 byte (or less) 5862306a36Sopenharmony_ci strings. */ 5962306a36Sopenharmony_ci subs limit, limit, 16 6062306a36Sopenharmony_ci b.ls L(last_bytes) 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci /* We overlap loads between 0-32 bytes at either side of SRC1 when we 6362306a36Sopenharmony_ci try to align, so limit it only to strings larger than 128 bytes. */ 6462306a36Sopenharmony_ci cmp limit, 96 6562306a36Sopenharmony_ci b.ls L(loop16) 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci /* Align src1 and adjust src2 with bytes not yet done. */ 6862306a36Sopenharmony_ci and tmp1, src1, 15 6962306a36Sopenharmony_ci add limit, limit, tmp1 7062306a36Sopenharmony_ci sub src1, src1, tmp1 7162306a36Sopenharmony_ci sub src2, src2, tmp1 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci /* Loop performing 16 bytes per iteration using aligned src1. 7462306a36Sopenharmony_ci Limit is pre-decremented by 16 and must be larger than zero. 7562306a36Sopenharmony_ci Exit if <= 16 bytes left to do or if the data is not equal. */ 7662306a36Sopenharmony_ci .p2align 4 7762306a36Sopenharmony_ciL(loop16): 7862306a36Sopenharmony_ci ldp data1, data1h, [src1], 16 7962306a36Sopenharmony_ci ldp data2, data2h, [src2], 16 8062306a36Sopenharmony_ci subs limit, limit, 16 8162306a36Sopenharmony_ci ccmp data1, data2, 0, hi 8262306a36Sopenharmony_ci ccmp data1h, data2h, 0, eq 8362306a36Sopenharmony_ci b.eq L(loop16) 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci cmp data1, data2 8662306a36Sopenharmony_ci bne L(return) 8762306a36Sopenharmony_ci mov data1, data1h 8862306a36Sopenharmony_ci mov data2, data2h 8962306a36Sopenharmony_ci cmp data1, data2 9062306a36Sopenharmony_ci bne L(return) 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci /* Compare last 1-16 bytes using unaligned access. */ 9362306a36Sopenharmony_ciL(last_bytes): 9462306a36Sopenharmony_ci add src1, src1, limit 9562306a36Sopenharmony_ci add src2, src2, limit 9662306a36Sopenharmony_ci ldp data1, data1h, [src1] 9762306a36Sopenharmony_ci ldp data2, data2h, [src2] 9862306a36Sopenharmony_ci cmp data1, data2 9962306a36Sopenharmony_ci bne L(return) 10062306a36Sopenharmony_ci mov data1, data1h 10162306a36Sopenharmony_ci mov data2, data2h 10262306a36Sopenharmony_ci cmp data1, data2 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci /* Compare data bytes and set return value to 0, -1 or 1. */ 10562306a36Sopenharmony_ciL(return): 10662306a36Sopenharmony_ci#ifndef __AARCH64EB__ 10762306a36Sopenharmony_ci rev data1, data1 10862306a36Sopenharmony_ci rev data2, data2 10962306a36Sopenharmony_ci#endif 11062306a36Sopenharmony_ci cmp data1, data2 11162306a36Sopenharmony_ciL(ret_eq): 11262306a36Sopenharmony_ci cset result, ne 11362306a36Sopenharmony_ci cneg result, result, lo 11462306a36Sopenharmony_ci ret 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci .p2align 4 11762306a36Sopenharmony_ci /* Compare up to 8 bytes. Limit is [-8..-1]. */ 11862306a36Sopenharmony_ciL(less8): 11962306a36Sopenharmony_ci adds limit, limit, 4 12062306a36Sopenharmony_ci b.lo L(less4) 12162306a36Sopenharmony_ci ldr data1w, [src1], 4 12262306a36Sopenharmony_ci ldr data2w, [src2], 4 12362306a36Sopenharmony_ci cmp data1w, data2w 12462306a36Sopenharmony_ci b.ne L(return) 12562306a36Sopenharmony_ci sub limit, limit, 4 12662306a36Sopenharmony_ciL(less4): 12762306a36Sopenharmony_ci adds limit, limit, 4 12862306a36Sopenharmony_ci beq L(ret_eq) 12962306a36Sopenharmony_ciL(byte_loop): 13062306a36Sopenharmony_ci ldrb data1w, [src1], 1 13162306a36Sopenharmony_ci ldrb data2w, [src2], 1 13262306a36Sopenharmony_ci subs limit, limit, 1 13362306a36Sopenharmony_ci ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 13462306a36Sopenharmony_ci b.eq L(byte_loop) 13562306a36Sopenharmony_ci sub result, data1w, data2w 13662306a36Sopenharmony_ci ret 13762306a36Sopenharmony_ciSYM_FUNC_END(__pi_memcmp) 13862306a36Sopenharmony_ciSYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp) 13962306a36Sopenharmony_ciEXPORT_SYMBOL_NOKASAN(memcmp) 140