162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *  linux/arch/arm/lib/memset.S
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci *  Copyright (C) 1995-2000 Russell King
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci *  ASM optimised string functions
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci#include <linux/linkage.h>
1062306a36Sopenharmony_ci#include <asm/assembler.h>
1162306a36Sopenharmony_ci#include <asm/unwind.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci	.text
1462306a36Sopenharmony_ci	.align	5
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ciENTRY(__memset)
1762306a36Sopenharmony_ciENTRY(mmioset)
1862306a36Sopenharmony_ciWEAK(memset)
1962306a36Sopenharmony_ciUNWIND( .fnstart         )
2062306a36Sopenharmony_ci	and	r1, r1, #255		@ cast to unsigned char
2162306a36Sopenharmony_ci	ands	r3, r0, #3		@ 1 unaligned?
2262306a36Sopenharmony_ci	mov	ip, r0			@ preserve r0 as return value
2362306a36Sopenharmony_ci	bne	6f			@ 1
2462306a36Sopenharmony_ci/*
2562306a36Sopenharmony_ci * we know that the pointer in ip is aligned to a word boundary.
2662306a36Sopenharmony_ci */
2762306a36Sopenharmony_ci1:	orr	r1, r1, r1, lsl #8
2862306a36Sopenharmony_ci	orr	r1, r1, r1, lsl #16
2962306a36Sopenharmony_ci	mov	r3, r1
3062306a36Sopenharmony_ci7:	cmp	r2, #16
3162306a36Sopenharmony_ci	blt	4f
3262306a36Sopenharmony_ciUNWIND( .fnend              )
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci#if ! CALGN(1)+0
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci/*
3762306a36Sopenharmony_ci * We need 2 extra registers for this loop - use r8 and the LR
3862306a36Sopenharmony_ci */
3962306a36Sopenharmony_ciUNWIND( .fnstart            )
4062306a36Sopenharmony_ciUNWIND( .save {r8, lr}      )
4162306a36Sopenharmony_ci	stmfd	sp!, {r8, lr}
4262306a36Sopenharmony_ci	mov	r8, r1
4362306a36Sopenharmony_ci	mov	lr, r3
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci2:	subs	r2, r2, #64
4662306a36Sopenharmony_ci	stmiage	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
4762306a36Sopenharmony_ci	stmiage	ip!, {r1, r3, r8, lr}
4862306a36Sopenharmony_ci	stmiage	ip!, {r1, r3, r8, lr}
4962306a36Sopenharmony_ci	stmiage	ip!, {r1, r3, r8, lr}
5062306a36Sopenharmony_ci	bgt	2b
5162306a36Sopenharmony_ci	ldmfdeq	sp!, {r8, pc}		@ Now <64 bytes to go.
5262306a36Sopenharmony_ci/*
5362306a36Sopenharmony_ci * No need to correct the count; we're only testing bits from now on
5462306a36Sopenharmony_ci */
5562306a36Sopenharmony_ci	tst	r2, #32
5662306a36Sopenharmony_ci	stmiane	ip!, {r1, r3, r8, lr}
5762306a36Sopenharmony_ci	stmiane	ip!, {r1, r3, r8, lr}
5862306a36Sopenharmony_ci	tst	r2, #16
5962306a36Sopenharmony_ci	stmiane	ip!, {r1, r3, r8, lr}
6062306a36Sopenharmony_ci	ldmfd	sp!, {r8, lr}
6162306a36Sopenharmony_ciUNWIND( .fnend              )
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci#else
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci/*
6662306a36Sopenharmony_ci * This version aligns the destination pointer in order to write
6762306a36Sopenharmony_ci * whole cache lines at once.
6862306a36Sopenharmony_ci */
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ciUNWIND( .fnstart               )
7162306a36Sopenharmony_ciUNWIND( .save {r4-r8, lr}      )
7262306a36Sopenharmony_ci	stmfd	sp!, {r4-r8, lr}
7362306a36Sopenharmony_ci	mov	r4, r1
7462306a36Sopenharmony_ci	mov	r5, r3
7562306a36Sopenharmony_ci	mov	r6, r1
7662306a36Sopenharmony_ci	mov	r7, r3
7762306a36Sopenharmony_ci	mov	r8, r1
7862306a36Sopenharmony_ci	mov	lr, r3
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	cmp	r2, #96
8162306a36Sopenharmony_ci	tstgt	ip, #31
8262306a36Sopenharmony_ci	ble	3f
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	and	r8, ip, #31
8562306a36Sopenharmony_ci	rsb	r8, r8, #32
8662306a36Sopenharmony_ci	sub	r2, r2, r8
8762306a36Sopenharmony_ci	movs	r8, r8, lsl #(32 - 4)
8862306a36Sopenharmony_ci	stmiacs	ip!, {r4, r5, r6, r7}
8962306a36Sopenharmony_ci	stmiami	ip!, {r4, r5}
9062306a36Sopenharmony_ci	tst	r8, #(1 << 30)
9162306a36Sopenharmony_ci	mov	r8, r1
9262306a36Sopenharmony_ci	strne	r1, [ip], #4
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci3:	subs	r2, r2, #64
9562306a36Sopenharmony_ci	stmiage	ip!, {r1, r3-r8, lr}
9662306a36Sopenharmony_ci	stmiage	ip!, {r1, r3-r8, lr}
9762306a36Sopenharmony_ci	bgt	3b
9862306a36Sopenharmony_ci	ldmfdeq	sp!, {r4-r8, pc}
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	tst	r2, #32
10162306a36Sopenharmony_ci	stmiane	ip!, {r1, r3-r8, lr}
10262306a36Sopenharmony_ci	tst	r2, #16
10362306a36Sopenharmony_ci	stmiane	ip!, {r4-r7}
10462306a36Sopenharmony_ci	ldmfd	sp!, {r4-r8, lr}
10562306a36Sopenharmony_ciUNWIND( .fnend                 )
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci#endif
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ciUNWIND( .fnstart            )
11062306a36Sopenharmony_ci4:	tst	r2, #8
11162306a36Sopenharmony_ci	stmiane	ip!, {r1, r3}
11262306a36Sopenharmony_ci	tst	r2, #4
11362306a36Sopenharmony_ci	strne	r1, [ip], #4
11462306a36Sopenharmony_ci/*
11562306a36Sopenharmony_ci * When we get here, we've got less than 4 bytes to set.  We
11662306a36Sopenharmony_ci * may have an unaligned pointer as well.
11762306a36Sopenharmony_ci */
11862306a36Sopenharmony_ci5:	tst	r2, #2
11962306a36Sopenharmony_ci	strbne	r1, [ip], #1
12062306a36Sopenharmony_ci	strbne	r1, [ip], #1
12162306a36Sopenharmony_ci	tst	r2, #1
12262306a36Sopenharmony_ci	strbne	r1, [ip], #1
12362306a36Sopenharmony_ci	ret	lr
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci6:	subs	r2, r2, #4		@ 1 do we have enough
12662306a36Sopenharmony_ci	blt	5b			@ 1 bytes to align with?
12762306a36Sopenharmony_ci	cmp	r3, #2			@ 1
12862306a36Sopenharmony_ci	strblt	r1, [ip], #1		@ 1
12962306a36Sopenharmony_ci	strble	r1, [ip], #1		@ 1
13062306a36Sopenharmony_ci	strb	r1, [ip], #1		@ 1
13162306a36Sopenharmony_ci	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
13262306a36Sopenharmony_ci	b	1b
13362306a36Sopenharmony_ciUNWIND( .fnend   )
13462306a36Sopenharmony_ciENDPROC(memset)
13562306a36Sopenharmony_ciENDPROC(mmioset)
13662306a36Sopenharmony_ciENDPROC(__memset)
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ciENTRY(__memset32)
13962306a36Sopenharmony_ciUNWIND( .fnstart         )
14062306a36Sopenharmony_ci	mov	r3, r1			@ copy r1 to r3 and fall into memset64
14162306a36Sopenharmony_ciUNWIND( .fnend   )
14262306a36Sopenharmony_ciENDPROC(__memset32)
14362306a36Sopenharmony_ciENTRY(__memset64)
14462306a36Sopenharmony_ciUNWIND( .fnstart         )
14562306a36Sopenharmony_ci	mov	ip, r0			@ preserve r0 as return value
14662306a36Sopenharmony_ci	b	7b			@ jump into the middle of memset
14762306a36Sopenharmony_ciUNWIND( .fnend   )
14862306a36Sopenharmony_ciENDPROC(__memset64)
149