1bbbf1280Sopenharmony_ci/* memcmp - compare memory 2bbbf1280Sopenharmony_ci * 3bbbf1280Sopenharmony_ci * Copyright (c) 2013-2020, Arm Limited. 4bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT 5bbbf1280Sopenharmony_ci */ 6bbbf1280Sopenharmony_ci 7bbbf1280Sopenharmony_ci/* Assumptions: 8bbbf1280Sopenharmony_ci * 9bbbf1280Sopenharmony_ci * ARMv8-a, AArch64, unaligned accesses. 10bbbf1280Sopenharmony_ci */ 11bbbf1280Sopenharmony_ci 12bbbf1280Sopenharmony_ci#include "../asmdefs.h" 13bbbf1280Sopenharmony_ci 14bbbf1280Sopenharmony_ci/* Parameters and result. */ 15bbbf1280Sopenharmony_ci#define src1 x0 16bbbf1280Sopenharmony_ci#define src2 x1 17bbbf1280Sopenharmony_ci#define limit x2 18bbbf1280Sopenharmony_ci#define result w0 19bbbf1280Sopenharmony_ci 20bbbf1280Sopenharmony_ci/* Internal variables. */ 21bbbf1280Sopenharmony_ci#define data1 x3 22bbbf1280Sopenharmony_ci#define data1w w3 23bbbf1280Sopenharmony_ci#define data1h x4 24bbbf1280Sopenharmony_ci#define data2 x5 25bbbf1280Sopenharmony_ci#define data2w w5 26bbbf1280Sopenharmony_ci#define data2h x6 27bbbf1280Sopenharmony_ci#define tmp1 x7 28bbbf1280Sopenharmony_ci#define tmp2 x8 29bbbf1280Sopenharmony_ci 30bbbf1280Sopenharmony_ciENTRY (__memcmp_aarch64) 31bbbf1280Sopenharmony_ci PTR_ARG (0) 32bbbf1280Sopenharmony_ci PTR_ARG (1) 33bbbf1280Sopenharmony_ci SIZE_ARG (2) 34bbbf1280Sopenharmony_ci subs limit, limit, 8 35bbbf1280Sopenharmony_ci b.lo L(less8) 36bbbf1280Sopenharmony_ci 37bbbf1280Sopenharmony_ci ldr data1, [src1], 8 38bbbf1280Sopenharmony_ci ldr data2, [src2], 8 39bbbf1280Sopenharmony_ci cmp data1, data2 40bbbf1280Sopenharmony_ci b.ne L(return) 41bbbf1280Sopenharmony_ci 42bbbf1280Sopenharmony_ci subs limit, limit, 8 43bbbf1280Sopenharmony_ci b.gt L(more16) 44bbbf1280Sopenharmony_ci 45bbbf1280Sopenharmony_ci ldr data1, [src1, limit] 46bbbf1280Sopenharmony_ci ldr data2, [src2, limit] 47bbbf1280Sopenharmony_ci b L(return) 48bbbf1280Sopenharmony_ci 49bbbf1280Sopenharmony_ciL(more16): 50bbbf1280Sopenharmony_ci ldr data1, [src1], 8 51bbbf1280Sopenharmony_ci ldr data2, [src2], 8 52bbbf1280Sopenharmony_ci cmp data1, data2 53bbbf1280Sopenharmony_ci bne L(return) 54bbbf1280Sopenharmony_ci 55bbbf1280Sopenharmony_ci /* Jump directly to comparing the last 16 bytes for 32 byte (or less) 56bbbf1280Sopenharmony_ci strings. */ 57bbbf1280Sopenharmony_ci subs limit, limit, 16 58bbbf1280Sopenharmony_ci b.ls L(last_bytes) 59bbbf1280Sopenharmony_ci 60bbbf1280Sopenharmony_ci /* We overlap loads between 0-32 bytes at either side of SRC1 when we 61bbbf1280Sopenharmony_ci try to align, so limit it only to strings larger than 128 bytes. */ 62bbbf1280Sopenharmony_ci cmp limit, 96 63bbbf1280Sopenharmony_ci b.ls L(loop16) 64bbbf1280Sopenharmony_ci 65bbbf1280Sopenharmony_ci /* Align src1 and adjust src2 with bytes not yet done. */ 66bbbf1280Sopenharmony_ci and tmp1, src1, 15 67bbbf1280Sopenharmony_ci add limit, limit, tmp1 68bbbf1280Sopenharmony_ci sub src1, src1, tmp1 69bbbf1280Sopenharmony_ci sub src2, src2, tmp1 70bbbf1280Sopenharmony_ci 71bbbf1280Sopenharmony_ci /* Loop performing 16 bytes per iteration using aligned src1. 72bbbf1280Sopenharmony_ci Limit is pre-decremented by 16 and must be larger than zero. 73bbbf1280Sopenharmony_ci Exit if <= 16 bytes left to do or if the data is not equal. */ 74bbbf1280Sopenharmony_ci .p2align 4 75bbbf1280Sopenharmony_ciL(loop16): 76bbbf1280Sopenharmony_ci ldp data1, data1h, [src1], 16 77bbbf1280Sopenharmony_ci ldp data2, data2h, [src2], 16 78bbbf1280Sopenharmony_ci subs limit, limit, 16 79bbbf1280Sopenharmony_ci ccmp data1, data2, 0, hi 80bbbf1280Sopenharmony_ci ccmp data1h, data2h, 0, eq 81bbbf1280Sopenharmony_ci b.eq L(loop16) 82bbbf1280Sopenharmony_ci 83bbbf1280Sopenharmony_ci cmp data1, data2 84bbbf1280Sopenharmony_ci bne L(return) 85bbbf1280Sopenharmony_ci mov data1, data1h 86bbbf1280Sopenharmony_ci mov data2, data2h 87bbbf1280Sopenharmony_ci cmp data1, data2 88bbbf1280Sopenharmony_ci bne L(return) 89bbbf1280Sopenharmony_ci 90bbbf1280Sopenharmony_ci /* Compare last 1-16 bytes using unaligned access. */ 91bbbf1280Sopenharmony_ciL(last_bytes): 92bbbf1280Sopenharmony_ci add src1, src1, limit 93bbbf1280Sopenharmony_ci add src2, src2, limit 94bbbf1280Sopenharmony_ci ldp data1, data1h, [src1] 95bbbf1280Sopenharmony_ci ldp data2, data2h, [src2] 96bbbf1280Sopenharmony_ci cmp data1, data2 97bbbf1280Sopenharmony_ci bne L(return) 98bbbf1280Sopenharmony_ci mov data1, data1h 99bbbf1280Sopenharmony_ci mov data2, data2h 100bbbf1280Sopenharmony_ci cmp data1, data2 101bbbf1280Sopenharmony_ci 102bbbf1280Sopenharmony_ci /* Compare data bytes and set return value to 0, -1 or 1. */ 103bbbf1280Sopenharmony_ciL(return): 104bbbf1280Sopenharmony_ci#ifndef __AARCH64EB__ 105bbbf1280Sopenharmony_ci rev data1, data1 106bbbf1280Sopenharmony_ci rev data2, data2 107bbbf1280Sopenharmony_ci#endif 108bbbf1280Sopenharmony_ci cmp data1, data2 109bbbf1280Sopenharmony_ciL(ret_eq): 110bbbf1280Sopenharmony_ci cset result, ne 111bbbf1280Sopenharmony_ci cneg result, result, lo 112bbbf1280Sopenharmony_ci ret 113bbbf1280Sopenharmony_ci 114bbbf1280Sopenharmony_ci .p2align 4 115bbbf1280Sopenharmony_ci /* Compare up to 8 bytes. Limit is [-8..-1]. */ 116bbbf1280Sopenharmony_ciL(less8): 117bbbf1280Sopenharmony_ci adds limit, limit, 4 118bbbf1280Sopenharmony_ci b.lo L(less4) 119bbbf1280Sopenharmony_ci ldr data1w, [src1], 4 120bbbf1280Sopenharmony_ci ldr data2w, [src2], 4 121bbbf1280Sopenharmony_ci cmp data1w, data2w 122bbbf1280Sopenharmony_ci b.ne L(return) 123bbbf1280Sopenharmony_ci sub limit, limit, 4 124bbbf1280Sopenharmony_ciL(less4): 125bbbf1280Sopenharmony_ci adds limit, limit, 4 126bbbf1280Sopenharmony_ci beq L(ret_eq) 127bbbf1280Sopenharmony_ciL(byte_loop): 128bbbf1280Sopenharmony_ci ldrb data1w, [src1], 1 129bbbf1280Sopenharmony_ci ldrb data2w, [src2], 1 130bbbf1280Sopenharmony_ci subs limit, limit, 1 131bbbf1280Sopenharmony_ci ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 132bbbf1280Sopenharmony_ci b.eq L(byte_loop) 133bbbf1280Sopenharmony_ci sub result, data1w, data2w 134bbbf1280Sopenharmony_ci ret 135bbbf1280Sopenharmony_ci 136bbbf1280Sopenharmony_ciEND (__memcmp_aarch64) 137bbbf1280Sopenharmony_ci 138