xref: /kernel/linux/linux-6.6/arch/arc/lib/strcpy-700.S (revision 62306a36)
162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci/* If dst and src are 4 byte aligned, copy 8 bytes at a time.
762306a36Sopenharmony_ci   If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
862306a36Sopenharmony_ci   it 8 byte aligned.  Thus, we can do a little read-ahead, without
962306a36Sopenharmony_ci   dereferencing a cache line that we should not touch.
1062306a36Sopenharmony_ci   Note that short and long instructions have been scheduled to avoid
1162306a36Sopenharmony_ci   branch stalls.
1262306a36Sopenharmony_ci   The beq_s to r3z could be made unaligned & long to avoid a stall
1362306a36Sopenharmony_ci   there, but the it is not likely to be taken often, and it
1462306a36Sopenharmony_ci   would also be likey to cost an unaligned mispredict at the next call.  */
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#include <linux/linkage.h>
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ciENTRY_CFI(strcpy)
1962306a36Sopenharmony_ci	or	r2,r0,r1
2062306a36Sopenharmony_ci	bmsk_s	r2,r2,1
2162306a36Sopenharmony_ci	brne.d	r2,0,charloop
2262306a36Sopenharmony_ci	mov_s	r10,r0
2362306a36Sopenharmony_ci	ld_s	r3,[r1,0]
2462306a36Sopenharmony_ci	mov	r8,0x01010101
2562306a36Sopenharmony_ci	bbit0.d	r1,2,loop_start
2662306a36Sopenharmony_ci	ror	r12,r8
2762306a36Sopenharmony_ci	sub	r2,r3,r8
2862306a36Sopenharmony_ci	bic_s	r2,r2,r3
2962306a36Sopenharmony_ci	tst_s	r2,r12
3062306a36Sopenharmony_ci	bne	r3z
3162306a36Sopenharmony_ci	mov_s	r4,r3
3262306a36Sopenharmony_ci	.balign 4
3362306a36Sopenharmony_ciloop:
3462306a36Sopenharmony_ci	ld.a	r3,[r1,4]
3562306a36Sopenharmony_ci	st.ab	r4,[r10,4]
3662306a36Sopenharmony_ciloop_start:
3762306a36Sopenharmony_ci	ld.a	r4,[r1,4]
3862306a36Sopenharmony_ci	sub	r2,r3,r8
3962306a36Sopenharmony_ci	bic_s	r2,r2,r3
4062306a36Sopenharmony_ci	tst_s	r2,r12
4162306a36Sopenharmony_ci	bne_s	r3z
4262306a36Sopenharmony_ci	st.ab	r3,[r10,4]
4362306a36Sopenharmony_ci	sub	r2,r4,r8
4462306a36Sopenharmony_ci	bic	r2,r2,r4
4562306a36Sopenharmony_ci	tst	r2,r12
4662306a36Sopenharmony_ci	beq	loop
4762306a36Sopenharmony_ci	mov_s	r3,r4
4862306a36Sopenharmony_ci#ifdef __LITTLE_ENDIAN__
4962306a36Sopenharmony_cir3z:	bmsk.f	r1,r3,7
5062306a36Sopenharmony_ci	lsr_s	r3,r3,8
5162306a36Sopenharmony_ci#else
5262306a36Sopenharmony_cir3z:	lsr.f	r1,r3,24
5362306a36Sopenharmony_ci	asl_s	r3,r3,8
5462306a36Sopenharmony_ci#endif
5562306a36Sopenharmony_ci	bne.d	r3z
5662306a36Sopenharmony_ci	stb.ab	r1,[r10,1]
5762306a36Sopenharmony_ci	j_s	[blink]
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci	.balign	4
6062306a36Sopenharmony_cicharloop:
6162306a36Sopenharmony_ci	ldb.ab	r3,[r1,1]
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci	brne.d	r3,0,charloop
6562306a36Sopenharmony_ci	stb.ab	r3,[r10,1]
6662306a36Sopenharmony_ci	j	[blink]
6762306a36Sopenharmony_ciEND_CFI(strcpy)
68