162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/export.h>
762306a36Sopenharmony_ci#include <asm/alternative-asm.h>
862306a36Sopenharmony_ci#include <asm/asm.h>
962306a36Sopenharmony_ci#include <asm/asmmacro.h>
1062306a36Sopenharmony_ci#include <asm/cpu.h>
1162306a36Sopenharmony_ci#include <asm/regdef.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci.macro fill_to_64 r0
1462306a36Sopenharmony_ci	bstrins.d \r0, \r0, 15, 8
1562306a36Sopenharmony_ci	bstrins.d \r0, \r0, 31, 16
1662306a36Sopenharmony_ci	bstrins.d \r0, \r0, 63, 32
1762306a36Sopenharmony_ci.endm
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci.section .noinstr.text, "ax"
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ciSYM_FUNC_START(memset)
2262306a36Sopenharmony_ci	/*
2362306a36Sopenharmony_ci	 * Some CPUs support hardware unaligned access
2462306a36Sopenharmony_ci	 */
2562306a36Sopenharmony_ci	ALTERNATIVE	"b __memset_generic", \
2662306a36Sopenharmony_ci			"b __memset_fast", CPU_FEATURE_UAL
2762306a36Sopenharmony_ciSYM_FUNC_END(memset)
2862306a36Sopenharmony_ciSYM_FUNC_ALIAS(__memset, memset)
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ciEXPORT_SYMBOL(memset)
3162306a36Sopenharmony_ciEXPORT_SYMBOL(__memset)
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci_ASM_NOKPROBE(memset)
3462306a36Sopenharmony_ci_ASM_NOKPROBE(__memset)
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci/*
3762306a36Sopenharmony_ci * void *__memset_generic(void *s, int c, size_t n)
3862306a36Sopenharmony_ci *
3962306a36Sopenharmony_ci * a0: s
4062306a36Sopenharmony_ci * a1: c
4162306a36Sopenharmony_ci * a2: n
4262306a36Sopenharmony_ci */
4362306a36Sopenharmony_ciSYM_FUNC_START(__memset_generic)
4462306a36Sopenharmony_ci	move	a3, a0
4562306a36Sopenharmony_ci	beqz	a2, 2f
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci1:	st.b	a1, a0, 0
4862306a36Sopenharmony_ci	addi.d	a0, a0, 1
4962306a36Sopenharmony_ci	addi.d	a2, a2, -1
5062306a36Sopenharmony_ci	bgt	a2, zero, 1b
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci2:	move	a0, a3
5362306a36Sopenharmony_ci	jr	ra
5462306a36Sopenharmony_ciSYM_FUNC_END(__memset_generic)
5562306a36Sopenharmony_ci_ASM_NOKPROBE(__memset_generic)
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci/*
5862306a36Sopenharmony_ci * void *__memset_fast(void *s, int c, size_t n)
5962306a36Sopenharmony_ci *
6062306a36Sopenharmony_ci * a0: s
6162306a36Sopenharmony_ci * a1: c
6262306a36Sopenharmony_ci * a2: n
6362306a36Sopenharmony_ci */
6462306a36Sopenharmony_ciSYM_FUNC_START(__memset_fast)
6562306a36Sopenharmony_ci	/* fill a1 to 64 bits */
6662306a36Sopenharmony_ci	fill_to_64 a1
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci	sltui	t0, a2, 9
6962306a36Sopenharmony_ci	bnez	t0, .Lsmall
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	add.d	a2, a0, a2
7262306a36Sopenharmony_ci	st.d	a1, a0, 0
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	/* align up address */
7562306a36Sopenharmony_ci	addi.d	a3, a0, 8
7662306a36Sopenharmony_ci	bstrins.d	a3, zero, 2, 0
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	addi.d	a4, a2, -64
7962306a36Sopenharmony_ci	bgeu	a3, a4, .Llt64
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci	/* set 64 bytes at a time */
8262306a36Sopenharmony_ci.Lloop64:
8362306a36Sopenharmony_ci	st.d	a1, a3, 0
8462306a36Sopenharmony_ci	st.d	a1, a3, 8
8562306a36Sopenharmony_ci	st.d	a1, a3, 16
8662306a36Sopenharmony_ci	st.d	a1, a3, 24
8762306a36Sopenharmony_ci	st.d	a1, a3, 32
8862306a36Sopenharmony_ci	st.d	a1, a3, 40
8962306a36Sopenharmony_ci	st.d	a1, a3, 48
9062306a36Sopenharmony_ci	st.d	a1, a3, 56
9162306a36Sopenharmony_ci	addi.d	a3, a3, 64
9262306a36Sopenharmony_ci	bltu	a3, a4, .Lloop64
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	/* set the remaining bytes */
9562306a36Sopenharmony_ci.Llt64:
9662306a36Sopenharmony_ci	addi.d	a4, a2, -32
9762306a36Sopenharmony_ci	bgeu	a3, a4, .Llt32
9862306a36Sopenharmony_ci	st.d	a1, a3, 0
9962306a36Sopenharmony_ci	st.d	a1, a3, 8
10062306a36Sopenharmony_ci	st.d	a1, a3, 16
10162306a36Sopenharmony_ci	st.d	a1, a3, 24
10262306a36Sopenharmony_ci	addi.d	a3, a3, 32
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci.Llt32:
10562306a36Sopenharmony_ci	addi.d	a4, a2, -16
10662306a36Sopenharmony_ci	bgeu	a3, a4, .Llt16
10762306a36Sopenharmony_ci	st.d	a1, a3, 0
10862306a36Sopenharmony_ci	st.d	a1, a3, 8
10962306a36Sopenharmony_ci	addi.d	a3, a3, 16
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci.Llt16:
11262306a36Sopenharmony_ci	addi.d	a4, a2, -8
11362306a36Sopenharmony_ci	bgeu	a3, a4, .Llt8
11462306a36Sopenharmony_ci	st.d	a1, a3, 0
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci.Llt8:
11762306a36Sopenharmony_ci	st.d	a1, a2, -8
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci	/* return */
12062306a36Sopenharmony_ci	jr	ra
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	.align	4
12362306a36Sopenharmony_ci.Lsmall:
12462306a36Sopenharmony_ci	pcaddi	t0, 4
12562306a36Sopenharmony_ci	slli.d	a2, a2, 4
12662306a36Sopenharmony_ci	add.d	t0, t0, a2
12762306a36Sopenharmony_ci	jr	t0
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	.align	4
13062306a36Sopenharmony_ci0:	jr	ra
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci	.align	4
13362306a36Sopenharmony_ci1:	st.b	a1, a0, 0
13462306a36Sopenharmony_ci	jr	ra
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	.align	4
13762306a36Sopenharmony_ci2:	st.h	a1, a0, 0
13862306a36Sopenharmony_ci	jr	ra
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	.align	4
14162306a36Sopenharmony_ci3:	st.h	a1, a0, 0
14262306a36Sopenharmony_ci	st.b	a1, a0, 2
14362306a36Sopenharmony_ci	jr	ra
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci	.align	4
14662306a36Sopenharmony_ci4:	st.w	a1, a0, 0
14762306a36Sopenharmony_ci	jr	ra
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci	.align	4
15062306a36Sopenharmony_ci5:	st.w	a1, a0, 0
15162306a36Sopenharmony_ci	st.b	a1, a0, 4
15262306a36Sopenharmony_ci	jr	ra
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	.align	4
15562306a36Sopenharmony_ci6:	st.w	a1, a0, 0
15662306a36Sopenharmony_ci	st.h	a1, a0, 4
15762306a36Sopenharmony_ci	jr	ra
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	.align	4
16062306a36Sopenharmony_ci7:	st.w	a1, a0, 0
16162306a36Sopenharmony_ci	st.w	a1, a0, 3
16262306a36Sopenharmony_ci	jr	ra
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci	.align	4
16562306a36Sopenharmony_ci8:	st.d	a1, a0, 0
16662306a36Sopenharmony_ci	jr	ra
16762306a36Sopenharmony_ciSYM_FUNC_END(__memset_fast)
16862306a36Sopenharmony_ci_ASM_NOKPROBE(__memset_fast)
169