162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci/* This is optimized primarily for the ARC700.
762306a36Sopenharmony_ci   It would be possible to speed up the loops by one cycle / word
862306a36Sopenharmony_ci   respective one cycle / byte by forcing double source 1 alignment, unrolling
962306a36Sopenharmony_ci   by a factor of two, and speculatively loading the second word / byte of
1062306a36Sopenharmony_ci   source 1; however, that would increase the overhead for loop setup / finish,
1162306a36Sopenharmony_ci   and strcmp might often terminate early.  */
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include <linux/linkage.h>
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ciENTRY_CFI(strcmp)
1662306a36Sopenharmony_ci	or	r2,r0,r1
1762306a36Sopenharmony_ci	bmsk_s	r2,r2,1
1862306a36Sopenharmony_ci	brne	r2,0,.Lcharloop
1962306a36Sopenharmony_ci	mov_s	r12,0x01010101
2062306a36Sopenharmony_ci	ror	r5,r12
2162306a36Sopenharmony_ci.Lwordloop:
2262306a36Sopenharmony_ci	ld.ab	r2,[r0,4]
2362306a36Sopenharmony_ci	ld.ab	r3,[r1,4]
2462306a36Sopenharmony_ci	nop_s
2562306a36Sopenharmony_ci	sub	r4,r2,r12
2662306a36Sopenharmony_ci	bic	r4,r4,r2
2762306a36Sopenharmony_ci	and	r4,r4,r5
2862306a36Sopenharmony_ci	brne	r4,0,.Lfound0
2962306a36Sopenharmony_ci	breq	r2,r3,.Lwordloop
3062306a36Sopenharmony_ci#ifdef	__LITTLE_ENDIAN__
3162306a36Sopenharmony_ci	xor	r0,r2,r3	; mask for difference
3262306a36Sopenharmony_ci	sub_s	r1,r0,1
3362306a36Sopenharmony_ci	bic_s	r0,r0,r1	; mask for least significant difference bit
3462306a36Sopenharmony_ci	sub	r1,r5,r0
3562306a36Sopenharmony_ci	xor	r0,r5,r1	; mask for least significant difference byte
3662306a36Sopenharmony_ci	and_s	r2,r2,r0
3762306a36Sopenharmony_ci	and_s	r3,r3,r0
3862306a36Sopenharmony_ci#endif /* LITTLE ENDIAN */
3962306a36Sopenharmony_ci	cmp_s	r2,r3
4062306a36Sopenharmony_ci	mov_s	r0,1
4162306a36Sopenharmony_ci	j_s.d	[blink]
4262306a36Sopenharmony_ci	bset.lo	r0,r0,31
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	.balign	4
4562306a36Sopenharmony_ci#ifdef __LITTLE_ENDIAN__
4662306a36Sopenharmony_ci.Lfound0:
4762306a36Sopenharmony_ci	xor	r0,r2,r3	; mask for difference
4862306a36Sopenharmony_ci	or	r0,r0,r4	; or in zero indicator
4962306a36Sopenharmony_ci	sub_s	r1,r0,1
5062306a36Sopenharmony_ci	bic_s	r0,r0,r1	; mask for least significant difference bit
5162306a36Sopenharmony_ci	sub	r1,r5,r0
5262306a36Sopenharmony_ci	xor	r0,r5,r1	; mask for least significant difference byte
5362306a36Sopenharmony_ci	and_s	r2,r2,r0
5462306a36Sopenharmony_ci	and_s	r3,r3,r0
5562306a36Sopenharmony_ci	sub.f	r0,r2,r3
5662306a36Sopenharmony_ci	mov.hi	r0,1
5762306a36Sopenharmony_ci	j_s.d	[blink]
5862306a36Sopenharmony_ci	bset.lo	r0,r0,31
5962306a36Sopenharmony_ci#else /* BIG ENDIAN */
6062306a36Sopenharmony_ci	/* The zero-detection above can mis-detect 0x01 bytes as zeroes
6162306a36Sopenharmony_ci	   because of carry-propagateion from a lower significant zero byte.
6262306a36Sopenharmony_ci	   We can compensate for this by checking that bit0 is zero.
6362306a36Sopenharmony_ci	   This compensation is not necessary in the step where we
6462306a36Sopenharmony_ci	   get a low estimate for r2, because in any affected bytes
6562306a36Sopenharmony_ci	   we already have 0x00 or 0x01, which will remain unchanged
6662306a36Sopenharmony_ci	   when bit 7 is cleared.  */
6762306a36Sopenharmony_ci	.balign	4
6862306a36Sopenharmony_ci.Lfound0:
6962306a36Sopenharmony_ci	lsr	r0,r4,8
7062306a36Sopenharmony_ci	lsr_s	r1,r2
7162306a36Sopenharmony_ci	bic_s	r2,r2,r0	; get low estimate for r2 and get ...
7262306a36Sopenharmony_ci	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros>
7362306a36Sopenharmony_ci	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ...
7462306a36Sopenharmony_ci	cmp_s	r3,r2		; ... be independent of trailing garbage
7562306a36Sopenharmony_ci	or_s	r2,r2,r0	; likewise for r3 > r2
7662306a36Sopenharmony_ci	bic_s	r3,r3,r0
7762306a36Sopenharmony_ci	rlc	r0,0		; r0 := r2 > r3 ? 1 : 0
7862306a36Sopenharmony_ci	cmp_s	r2,r3
7962306a36Sopenharmony_ci	j_s.d	[blink]
8062306a36Sopenharmony_ci	bset.lo	r0,r0,31
8162306a36Sopenharmony_ci#endif /* ENDIAN */
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	.balign	4
8462306a36Sopenharmony_ci.Lcharloop:
8562306a36Sopenharmony_ci	ldb.ab	r2,[r0,1]
8662306a36Sopenharmony_ci	ldb.ab	r3,[r1,1]
8762306a36Sopenharmony_ci	nop_s
8862306a36Sopenharmony_ci	breq	r2,0,.Lcmpend
8962306a36Sopenharmony_ci	breq	r2,r3,.Lcharloop
9062306a36Sopenharmony_ci.Lcmpend:
9162306a36Sopenharmony_ci	j_s.d	[blink]
9262306a36Sopenharmony_ci	sub	r0,r2,r3
9362306a36Sopenharmony_ciEND_CFI(strcmp)
94