162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * "memset" implementation for SH4
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 1999  Niibe Yutaka
662306a36Sopenharmony_ci * Copyright (c) 2009  STMicroelectronics Limited
762306a36Sopenharmony_ci * Author: Stuart Menefy <stuart.menefy:st.com>
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci/*
1162306a36Sopenharmony_ci *            void *memset(void *s, int c, size_t n);
1262306a36Sopenharmony_ci */
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#include <linux/linkage.h>
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ciENTRY(memset)
1762306a36Sopenharmony_ci	mov	#12,r0
1862306a36Sopenharmony_ci	add	r6,r4
1962306a36Sopenharmony_ci	cmp/gt	r6,r0
2062306a36Sopenharmony_ci	bt/s	40f		! if it's too small, set a byte at once
2162306a36Sopenharmony_ci	 mov	r4,r0
2262306a36Sopenharmony_ci	and	#3,r0
2362306a36Sopenharmony_ci	cmp/eq	#0,r0
2462306a36Sopenharmony_ci	bt/s	2f		! It's aligned
2562306a36Sopenharmony_ci	 sub	r0,r6
2662306a36Sopenharmony_ci1:
2762306a36Sopenharmony_ci	dt	r0
2862306a36Sopenharmony_ci	bf/s	1b
2962306a36Sopenharmony_ci	 mov.b	r5,@-r4
3062306a36Sopenharmony_ci2:				! make VVVV
3162306a36Sopenharmony_ci	extu.b	r5,r5
3262306a36Sopenharmony_ci	swap.b	r5,r0		!   V0
3362306a36Sopenharmony_ci	or	r0,r5		!   VV
3462306a36Sopenharmony_ci	swap.w	r5,r0		! VV00
3562306a36Sopenharmony_ci	or	r0,r5		! VVVV
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci	! Check if enough bytes need to be copied to be worth the big loop
3862306a36Sopenharmony_ci	mov	#0x40, r0	! (MT)
3962306a36Sopenharmony_ci	cmp/gt	r6,r0		! (MT)  64 > len => slow loop
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci	bt/s	22f
4262306a36Sopenharmony_ci	 mov	r6,r0
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	! align the dst to the cache block size if necessary
4562306a36Sopenharmony_ci	mov	r4, r3
4662306a36Sopenharmony_ci	mov	#~(0x1f), r1
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci	and	r3, r1
4962306a36Sopenharmony_ci	cmp/eq	r3, r1
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci	bt/s	11f		! dst is already aligned
5262306a36Sopenharmony_ci	 sub	r1, r3		! r3-r1 -> r3
5362306a36Sopenharmony_ci	shlr2	r3		! number of loops
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci10:	mov.l	r5,@-r4
5662306a36Sopenharmony_ci	dt	r3
5762306a36Sopenharmony_ci	bf/s	10b
5862306a36Sopenharmony_ci	 add	#-4, r6
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci11:	! dst is 32byte aligned
6162306a36Sopenharmony_ci	mov	r6,r2
6262306a36Sopenharmony_ci	mov	#-5,r0
6362306a36Sopenharmony_ci	shld	r0,r2		! number of loops
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	add	#-32, r4
6662306a36Sopenharmony_ci	mov	r5, r0
6762306a36Sopenharmony_ci12:
6862306a36Sopenharmony_ci	movca.l	r0,@r4
6962306a36Sopenharmony_ci	mov.l	r5,@(4, r4)
7062306a36Sopenharmony_ci	mov.l	r5,@(8, r4)
7162306a36Sopenharmony_ci	mov.l	r5,@(12,r4)
7262306a36Sopenharmony_ci	mov.l	r5,@(16,r4)
7362306a36Sopenharmony_ci	mov.l	r5,@(20,r4)
7462306a36Sopenharmony_ci	add	#-0x20, r6
7562306a36Sopenharmony_ci	mov.l	r5,@(24,r4)
7662306a36Sopenharmony_ci	dt	r2
7762306a36Sopenharmony_ci	mov.l	r5,@(28,r4)
7862306a36Sopenharmony_ci	bf/s	12b
7962306a36Sopenharmony_ci	 add	#-32, r4
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci	add	#32, r4
8262306a36Sopenharmony_ci	mov	#8, r0
8362306a36Sopenharmony_ci	cmp/ge	r0, r6
8462306a36Sopenharmony_ci	bf	40f
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci	mov	r6,r0
8762306a36Sopenharmony_ci22:
8862306a36Sopenharmony_ci	shlr2	r0
8962306a36Sopenharmony_ci	shlr	r0		! r0 = r6 >> 3
9062306a36Sopenharmony_ci3:
9162306a36Sopenharmony_ci	dt	r0
9262306a36Sopenharmony_ci	mov.l	r5,@-r4		! set 8-byte at once
9362306a36Sopenharmony_ci	bf/s	3b
9462306a36Sopenharmony_ci	 mov.l	r5,@-r4
9562306a36Sopenharmony_ci	!
9662306a36Sopenharmony_ci	mov	#7,r0
9762306a36Sopenharmony_ci	and	r0,r6
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	! fill bytes (length may be zero)
10062306a36Sopenharmony_ci40:	tst	r6,r6
10162306a36Sopenharmony_ci	bt	5f
10262306a36Sopenharmony_ci4:
10362306a36Sopenharmony_ci	dt	r6
10462306a36Sopenharmony_ci	bf/s	4b
10562306a36Sopenharmony_ci	 mov.b	r5,@-r4
10662306a36Sopenharmony_ci5:
10762306a36Sopenharmony_ci	rts
10862306a36Sopenharmony_ci	 mov	r4,r0
109