1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
4  */
5 
6 /* This is optimized primarily for the ARC700.
7    It would be possible to speed up the loops by one cycle / word
8    respective one cycle / byte by forcing double source 1 alignment, unrolling
9    by a factor of two, and speculatively loading the second word / byte of
10    source 1; however, that would increase the overhead for loop setup / finish,
11    and strcmp might often terminate early.  */
12 
13 #include <linux/linkage.h>
14 
15 ENTRY_CFI(strcmp)
16 	or	r2,r0,r1
17 	bmsk_s	r2,r2,1
18 	brne	r2,0,.Lcharloop
19 	mov_s	r12,0x01010101
20 	ror	r5,r12
21 .Lwordloop:
22 	ld.ab	r2,[r0,4]
23 	ld.ab	r3,[r1,4]
24 	nop_s
25 	sub	r4,r2,r12
26 	bic	r4,r4,r2
27 	and	r4,r4,r5
28 	brne	r4,0,.Lfound0
29 	breq	r2,r3,.Lwordloop
30 #ifdef	__LITTLE_ENDIAN__
31 	xor	r0,r2,r3	; mask for difference
32 	sub_s	r1,r0,1
33 	bic_s	r0,r0,r1	; mask for least significant difference bit
34 	sub	r1,r5,r0
35 	xor	r0,r5,r1	; mask for least significant difference byte
36 	and_s	r2,r2,r0
37 	and_s	r3,r3,r0
38 #endif /* LITTLE ENDIAN */
39 	cmp_s	r2,r3
40 	mov_s	r0,1
41 	j_s.d	[blink]
42 	bset.lo	r0,r0,31
43 
44 	.balign	4
45 #ifdef __LITTLE_ENDIAN__
46 .Lfound0:
47 	xor	r0,r2,r3	; mask for difference
48 	or	r0,r0,r4	; or in zero indicator
49 	sub_s	r1,r0,1
50 	bic_s	r0,r0,r1	; mask for least significant difference bit
51 	sub	r1,r5,r0
52 	xor	r0,r5,r1	; mask for least significant difference byte
53 	and_s	r2,r2,r0
54 	and_s	r3,r3,r0
55 	sub.f	r0,r2,r3
56 	mov.hi	r0,1
57 	j_s.d	[blink]
58 	bset.lo	r0,r0,31
59 #else /* BIG ENDIAN */
60 	/* The zero-detection above can mis-detect 0x01 bytes as zeroes
61 	   because of carry-propagateion from a lower significant zero byte.
62 	   We can compensate for this by checking that bit0 is zero.
63 	   This compensation is not necessary in the step where we
64 	   get a low estimate for r2, because in any affected bytes
65 	   we already have 0x00 or 0x01, which will remain unchanged
66 	   when bit 7 is cleared.  */
67 	.balign	4
68 .Lfound0:
69 	lsr	r0,r4,8
70 	lsr_s	r1,r2
71 	bic_s	r2,r2,r0	; get low estimate for r2 and get ...
72 	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros>
73 	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ...
74 	cmp_s	r3,r2		; ... be independent of trailing garbage
75 	or_s	r2,r2,r0	; likewise for r3 > r2
76 	bic_s	r3,r3,r0
77 	rlc	r0,0		; r0 := r2 > r3 ? 1 : 0
78 	cmp_s	r2,r3
79 	j_s.d	[blink]
80 	bset.lo	r0,r0,31
81 #endif /* ENDIAN */
82 
83 	.balign	4
84 .Lcharloop:
85 	ldb.ab	r2,[r0,1]
86 	ldb.ab	r3,[r1,1]
87 	nop_s
88 	breq	r2,0,.Lcmpend
89 	breq	r2,r3,.Lcharloop
90 .Lcmpend:
91 	j_s.d	[blink]
92 	sub	r0,r2,r3
93 END_CFI(strcmp)
94