162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci/* This is optimized primarily for the ARC700. 762306a36Sopenharmony_ci It would be possible to speed up the loops by one cycle / word 862306a36Sopenharmony_ci respective one cycle / byte by forcing double source 1 alignment, unrolling 962306a36Sopenharmony_ci by a factor of two, and speculatively loading the second word / byte of 1062306a36Sopenharmony_ci source 1; however, that would increase the overhead for loop setup / finish, 1162306a36Sopenharmony_ci and strcmp might often terminate early. */ 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include <linux/linkage.h> 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ciENTRY_CFI(strcmp) 1662306a36Sopenharmony_ci or r2,r0,r1 1762306a36Sopenharmony_ci bmsk_s r2,r2,1 1862306a36Sopenharmony_ci brne r2,0,.Lcharloop 1962306a36Sopenharmony_ci mov_s r12,0x01010101 2062306a36Sopenharmony_ci ror r5,r12 2162306a36Sopenharmony_ci.Lwordloop: 2262306a36Sopenharmony_ci ld.ab r2,[r0,4] 2362306a36Sopenharmony_ci ld.ab r3,[r1,4] 2462306a36Sopenharmony_ci nop_s 2562306a36Sopenharmony_ci sub r4,r2,r12 2662306a36Sopenharmony_ci bic r4,r4,r2 2762306a36Sopenharmony_ci and r4,r4,r5 2862306a36Sopenharmony_ci brne r4,0,.Lfound0 2962306a36Sopenharmony_ci breq r2,r3,.Lwordloop 3062306a36Sopenharmony_ci#ifdef __LITTLE_ENDIAN__ 3162306a36Sopenharmony_ci xor r0,r2,r3 ; mask for difference 3262306a36Sopenharmony_ci sub_s r1,r0,1 3362306a36Sopenharmony_ci bic_s r0,r0,r1 ; mask for least significant difference bit 3462306a36Sopenharmony_ci sub r1,r5,r0 3562306a36Sopenharmony_ci xor r0,r5,r1 ; mask for least significant difference byte 3662306a36Sopenharmony_ci and_s r2,r2,r0 3762306a36Sopenharmony_ci and_s r3,r3,r0 3862306a36Sopenharmony_ci#endif /* LITTLE ENDIAN */ 3962306a36Sopenharmony_ci cmp_s r2,r3 4062306a36Sopenharmony_ci mov_s r0,1 4162306a36Sopenharmony_ci j_s.d [blink] 4262306a36Sopenharmony_ci bset.lo r0,r0,31 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci .balign 4 4562306a36Sopenharmony_ci#ifdef __LITTLE_ENDIAN__ 4662306a36Sopenharmony_ci.Lfound0: 4762306a36Sopenharmony_ci xor r0,r2,r3 ; mask for difference 4862306a36Sopenharmony_ci or r0,r0,r4 ; or in zero indicator 4962306a36Sopenharmony_ci sub_s r1,r0,1 5062306a36Sopenharmony_ci bic_s r0,r0,r1 ; mask for least significant difference bit 5162306a36Sopenharmony_ci sub r1,r5,r0 5262306a36Sopenharmony_ci xor r0,r5,r1 ; mask for least significant difference byte 5362306a36Sopenharmony_ci and_s r2,r2,r0 5462306a36Sopenharmony_ci and_s r3,r3,r0 5562306a36Sopenharmony_ci sub.f r0,r2,r3 5662306a36Sopenharmony_ci mov.hi r0,1 5762306a36Sopenharmony_ci j_s.d [blink] 5862306a36Sopenharmony_ci bset.lo r0,r0,31 5962306a36Sopenharmony_ci#else /* BIG ENDIAN */ 6062306a36Sopenharmony_ci /* The zero-detection above can mis-detect 0x01 bytes as zeroes 6162306a36Sopenharmony_ci because of carry-propagateion from a lower significant zero byte. 6262306a36Sopenharmony_ci We can compensate for this by checking that bit0 is zero. 6362306a36Sopenharmony_ci This compensation is not necessary in the step where we 6462306a36Sopenharmony_ci get a low estimate for r2, because in any affected bytes 6562306a36Sopenharmony_ci we already have 0x00 or 0x01, which will remain unchanged 6662306a36Sopenharmony_ci when bit 7 is cleared. */ 6762306a36Sopenharmony_ci .balign 4 6862306a36Sopenharmony_ci.Lfound0: 6962306a36Sopenharmony_ci lsr r0,r4,8 7062306a36Sopenharmony_ci lsr_s r1,r2 7162306a36Sopenharmony_ci bic_s r2,r2,r0 ; get low estimate for r2 and get ... 7262306a36Sopenharmony_ci bic_s r0,r0,r1 ; <this is the adjusted mask for zeros> 7362306a36Sopenharmony_ci or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ... 7462306a36Sopenharmony_ci cmp_s r3,r2 ; ... be independent of trailing garbage 7562306a36Sopenharmony_ci or_s r2,r2,r0 ; likewise for r3 > r2 7662306a36Sopenharmony_ci bic_s r3,r3,r0 7762306a36Sopenharmony_ci rlc r0,0 ; r0 := r2 > r3 ? 1 : 0 7862306a36Sopenharmony_ci cmp_s r2,r3 7962306a36Sopenharmony_ci j_s.d [blink] 8062306a36Sopenharmony_ci bset.lo r0,r0,31 8162306a36Sopenharmony_ci#endif /* ENDIAN */ 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci .balign 4 8462306a36Sopenharmony_ci.Lcharloop: 8562306a36Sopenharmony_ci ldb.ab r2,[r0,1] 8662306a36Sopenharmony_ci ldb.ab r3,[r1,1] 8762306a36Sopenharmony_ci nop_s 8862306a36Sopenharmony_ci breq r2,0,.Lcmpend 8962306a36Sopenharmony_ci breq r2,r3,.Lcharloop 9062306a36Sopenharmony_ci.Lcmpend: 9162306a36Sopenharmony_ci j_s.d [blink] 9262306a36Sopenharmony_ci sub r0,r2,r3 9362306a36Sopenharmony_ciEND_CFI(strcmp) 94