162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/export.h>
762306a36Sopenharmony_ci#include <asm/alternative-asm.h>
862306a36Sopenharmony_ci#include <asm/asm.h>
962306a36Sopenharmony_ci#include <asm/asmmacro.h>
1062306a36Sopenharmony_ci#include <asm/cpu.h>
1162306a36Sopenharmony_ci#include <asm/regdef.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci.section .noinstr.text, "ax"
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ciSYM_FUNC_START(memcpy)
1662306a36Sopenharmony_ci	/*
1762306a36Sopenharmony_ci	 * Some CPUs support hardware unaligned access
1862306a36Sopenharmony_ci	 */
1962306a36Sopenharmony_ci	ALTERNATIVE	"b __memcpy_generic", \
2062306a36Sopenharmony_ci			"b __memcpy_fast", CPU_FEATURE_UAL
2162306a36Sopenharmony_ciSYM_FUNC_END(memcpy)
2262306a36Sopenharmony_ciSYM_FUNC_ALIAS(__memcpy, memcpy)
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ciEXPORT_SYMBOL(memcpy)
2562306a36Sopenharmony_ciEXPORT_SYMBOL(__memcpy)
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci_ASM_NOKPROBE(memcpy)
2862306a36Sopenharmony_ci_ASM_NOKPROBE(__memcpy)
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci/*
3162306a36Sopenharmony_ci * void *__memcpy_generic(void *dst, const void *src, size_t n)
3262306a36Sopenharmony_ci *
3362306a36Sopenharmony_ci * a0: dst
3462306a36Sopenharmony_ci * a1: src
3562306a36Sopenharmony_ci * a2: n
3662306a36Sopenharmony_ci */
3762306a36Sopenharmony_ciSYM_FUNC_START(__memcpy_generic)
3862306a36Sopenharmony_ci	move	a3, a0
3962306a36Sopenharmony_ci	beqz	a2, 2f
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci1:	ld.b	t0, a1, 0
4262306a36Sopenharmony_ci	st.b	t0, a0, 0
4362306a36Sopenharmony_ci	addi.d	a0, a0, 1
4462306a36Sopenharmony_ci	addi.d	a1, a1, 1
4562306a36Sopenharmony_ci	addi.d	a2, a2, -1
4662306a36Sopenharmony_ci	bgt	a2, zero, 1b
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci2:	move	a0, a3
4962306a36Sopenharmony_ci	jr	ra
5062306a36Sopenharmony_ciSYM_FUNC_END(__memcpy_generic)
5162306a36Sopenharmony_ci_ASM_NOKPROBE(__memcpy_generic)
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci	.align	5
5462306a36Sopenharmony_ciSYM_FUNC_START_NOALIGN(__memcpy_small)
5562306a36Sopenharmony_ci	pcaddi	t0, 8
5662306a36Sopenharmony_ci	slli.d	a2, a2, 5
5762306a36Sopenharmony_ci	add.d	t0, t0, a2
5862306a36Sopenharmony_ci	jr	t0
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	.align	5
6162306a36Sopenharmony_ci0:	jr	ra
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci	.align	5
6462306a36Sopenharmony_ci1:	ld.b	t0, a1, 0
6562306a36Sopenharmony_ci	st.b	t0, a0, 0
6662306a36Sopenharmony_ci	jr	ra
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci	.align	5
6962306a36Sopenharmony_ci2:	ld.h	t0, a1, 0
7062306a36Sopenharmony_ci	st.h	t0, a0, 0
7162306a36Sopenharmony_ci	jr	ra
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	.align	5
7462306a36Sopenharmony_ci3:	ld.h	t0, a1, 0
7562306a36Sopenharmony_ci	ld.b	t1, a1, 2
7662306a36Sopenharmony_ci	st.h	t0, a0, 0
7762306a36Sopenharmony_ci	st.b	t1, a0, 2
7862306a36Sopenharmony_ci	jr	ra
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	.align	5
8162306a36Sopenharmony_ci4:	ld.w	t0, a1, 0
8262306a36Sopenharmony_ci	st.w	t0, a0, 0
8362306a36Sopenharmony_ci	jr	ra
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	.align	5
8662306a36Sopenharmony_ci5:	ld.w	t0, a1, 0
8762306a36Sopenharmony_ci	ld.b	t1, a1, 4
8862306a36Sopenharmony_ci	st.w	t0, a0, 0
8962306a36Sopenharmony_ci	st.b	t1, a0, 4
9062306a36Sopenharmony_ci	jr	ra
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci	.align	5
9362306a36Sopenharmony_ci6:	ld.w	t0, a1, 0
9462306a36Sopenharmony_ci	ld.h	t1, a1, 4
9562306a36Sopenharmony_ci	st.w	t0, a0, 0
9662306a36Sopenharmony_ci	st.h	t1, a0, 4
9762306a36Sopenharmony_ci	jr	ra
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	.align	5
10062306a36Sopenharmony_ci7:	ld.w	t0, a1, 0
10162306a36Sopenharmony_ci	ld.w	t1, a1, 3
10262306a36Sopenharmony_ci	st.w	t0, a0, 0
10362306a36Sopenharmony_ci	st.w	t1, a0, 3
10462306a36Sopenharmony_ci	jr	ra
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci	.align	5
10762306a36Sopenharmony_ci8:	ld.d	t0, a1, 0
10862306a36Sopenharmony_ci	st.d	t0, a0, 0
10962306a36Sopenharmony_ci	jr	ra
11062306a36Sopenharmony_ciSYM_FUNC_END(__memcpy_small)
11162306a36Sopenharmony_ci_ASM_NOKPROBE(__memcpy_small)
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci/*
11462306a36Sopenharmony_ci * void *__memcpy_fast(void *dst, const void *src, size_t n)
11562306a36Sopenharmony_ci *
11662306a36Sopenharmony_ci * a0: dst
11762306a36Sopenharmony_ci * a1: src
11862306a36Sopenharmony_ci * a2: n
11962306a36Sopenharmony_ci */
12062306a36Sopenharmony_ciSYM_FUNC_START(__memcpy_fast)
12162306a36Sopenharmony_ci	sltui	t0, a2, 9
12262306a36Sopenharmony_ci	bnez	t0, __memcpy_small
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	add.d	a3, a1, a2
12562306a36Sopenharmony_ci	add.d	a2, a0, a2
12662306a36Sopenharmony_ci	ld.d	a6, a1, 0
12762306a36Sopenharmony_ci	ld.d	a7, a3, -8
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	/* align up destination address */
13062306a36Sopenharmony_ci	andi	t1, a0, 7
13162306a36Sopenharmony_ci	sub.d	t0, zero, t1
13262306a36Sopenharmony_ci	addi.d	t0, t0, 8
13362306a36Sopenharmony_ci	add.d	a1, a1, t0
13462306a36Sopenharmony_ci	add.d	a5, a0, t0
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	addi.d	a4, a3, -64
13762306a36Sopenharmony_ci	bgeu	a1, a4, .Llt64
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	/* copy 64 bytes at a time */
14062306a36Sopenharmony_ci.Lloop64:
14162306a36Sopenharmony_ci	ld.d	t0, a1, 0
14262306a36Sopenharmony_ci	ld.d	t1, a1, 8
14362306a36Sopenharmony_ci	ld.d	t2, a1, 16
14462306a36Sopenharmony_ci	ld.d	t3, a1, 24
14562306a36Sopenharmony_ci	ld.d	t4, a1, 32
14662306a36Sopenharmony_ci	ld.d	t5, a1, 40
14762306a36Sopenharmony_ci	ld.d	t6, a1, 48
14862306a36Sopenharmony_ci	ld.d	t7, a1, 56
14962306a36Sopenharmony_ci	addi.d	a1, a1, 64
15062306a36Sopenharmony_ci	st.d	t0, a5, 0
15162306a36Sopenharmony_ci	st.d	t1, a5, 8
15262306a36Sopenharmony_ci	st.d	t2, a5, 16
15362306a36Sopenharmony_ci	st.d	t3, a5, 24
15462306a36Sopenharmony_ci	st.d	t4, a5, 32
15562306a36Sopenharmony_ci	st.d	t5, a5, 40
15662306a36Sopenharmony_ci	st.d	t6, a5, 48
15762306a36Sopenharmony_ci	st.d	t7, a5, 56
15862306a36Sopenharmony_ci	addi.d	a5, a5, 64
15962306a36Sopenharmony_ci	bltu	a1, a4, .Lloop64
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci	/* copy the remaining bytes */
16262306a36Sopenharmony_ci.Llt64:
16362306a36Sopenharmony_ci	addi.d	a4, a3, -32
16462306a36Sopenharmony_ci	bgeu	a1, a4, .Llt32
16562306a36Sopenharmony_ci	ld.d	t0, a1, 0
16662306a36Sopenharmony_ci	ld.d	t1, a1, 8
16762306a36Sopenharmony_ci	ld.d	t2, a1, 16
16862306a36Sopenharmony_ci	ld.d	t3, a1, 24
16962306a36Sopenharmony_ci	addi.d	a1, a1, 32
17062306a36Sopenharmony_ci	st.d	t0, a5, 0
17162306a36Sopenharmony_ci	st.d	t1, a5, 8
17262306a36Sopenharmony_ci	st.d	t2, a5, 16
17362306a36Sopenharmony_ci	st.d	t3, a5, 24
17462306a36Sopenharmony_ci	addi.d	a5, a5, 32
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci.Llt32:
17762306a36Sopenharmony_ci	addi.d	a4, a3, -16
17862306a36Sopenharmony_ci	bgeu	a1, a4, .Llt16
17962306a36Sopenharmony_ci	ld.d	t0, a1, 0
18062306a36Sopenharmony_ci	ld.d	t1, a1, 8
18162306a36Sopenharmony_ci	addi.d	a1, a1, 16
18262306a36Sopenharmony_ci	st.d	t0, a5, 0
18362306a36Sopenharmony_ci	st.d	t1, a5, 8
18462306a36Sopenharmony_ci	addi.d	a5, a5, 16
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci.Llt16:
18762306a36Sopenharmony_ci	addi.d	a4, a3, -8
18862306a36Sopenharmony_ci	bgeu	a1, a4, .Llt8
18962306a36Sopenharmony_ci	ld.d	t0, a1, 0
19062306a36Sopenharmony_ci	st.d	t0, a5, 0
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci.Llt8:
19362306a36Sopenharmony_ci	st.d	a6, a0, 0
19462306a36Sopenharmony_ci	st.d	a7, a2, -8
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	/* return */
19762306a36Sopenharmony_ci	jr	ra
19862306a36Sopenharmony_ciSYM_FUNC_END(__memcpy_fast)
19962306a36Sopenharmony_ci_ASM_NOKPROBE(__memcpy_fast)
200