162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/export.h>
762306a36Sopenharmony_ci#include <asm/alternative-asm.h>
862306a36Sopenharmony_ci#include <asm/asm.h>
962306a36Sopenharmony_ci#include <asm/asmmacro.h>
1062306a36Sopenharmony_ci#include <asm/cpu.h>
1162306a36Sopenharmony_ci#include <asm/regdef.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci.section .noinstr.text, "ax"
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ciSYM_FUNC_START(memmove)
1662306a36Sopenharmony_ci	blt	a0, a1, __memcpy	/* dst < src, memcpy */
1762306a36Sopenharmony_ci	blt	a1, a0, __rmemcpy	/* src < dst, rmemcpy */
1862306a36Sopenharmony_ci	jr	ra			/* dst == src, return */
1962306a36Sopenharmony_ciSYM_FUNC_END(memmove)
2062306a36Sopenharmony_ciSYM_FUNC_ALIAS(__memmove, memmove)
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ciEXPORT_SYMBOL(memmove)
2362306a36Sopenharmony_ciEXPORT_SYMBOL(__memmove)
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci_ASM_NOKPROBE(memmove)
2662306a36Sopenharmony_ci_ASM_NOKPROBE(__memmove)
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ciSYM_FUNC_START(__rmemcpy)
2962306a36Sopenharmony_ci	/*
3062306a36Sopenharmony_ci	 * Some CPUs support hardware unaligned access
3162306a36Sopenharmony_ci	 */
3262306a36Sopenharmony_ci	ALTERNATIVE	"b __rmemcpy_generic", \
3362306a36Sopenharmony_ci			"b __rmemcpy_fast", CPU_FEATURE_UAL
3462306a36Sopenharmony_ciSYM_FUNC_END(__rmemcpy)
3562306a36Sopenharmony_ci_ASM_NOKPROBE(__rmemcpy)
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci/*
3862306a36Sopenharmony_ci * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
3962306a36Sopenharmony_ci *
4062306a36Sopenharmony_ci * a0: dst
4162306a36Sopenharmony_ci * a1: src
4262306a36Sopenharmony_ci * a2: n
4362306a36Sopenharmony_ci */
4462306a36Sopenharmony_ciSYM_FUNC_START(__rmemcpy_generic)
4562306a36Sopenharmony_ci	move	a3, a0
4662306a36Sopenharmony_ci	beqz	a2, 2f
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci	add.d	a0, a0, a2
4962306a36Sopenharmony_ci	add.d	a1, a1, a2
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci1:	ld.b	t0, a1, -1
5262306a36Sopenharmony_ci	st.b	t0, a0, -1
5362306a36Sopenharmony_ci	addi.d	a0, a0, -1
5462306a36Sopenharmony_ci	addi.d	a1, a1, -1
5562306a36Sopenharmony_ci	addi.d	a2, a2, -1
5662306a36Sopenharmony_ci	bgt	a2, zero, 1b
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci2:	move	a0, a3
5962306a36Sopenharmony_ci	jr	ra
6062306a36Sopenharmony_ciSYM_FUNC_END(__rmemcpy_generic)
6162306a36Sopenharmony_ci_ASM_NOKPROBE(__rmemcpy_generic)
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci/*
6462306a36Sopenharmony_ci * void *__rmemcpy_fast(void *dst, const void *src, size_t n)
6562306a36Sopenharmony_ci *
6662306a36Sopenharmony_ci * a0: dst
6762306a36Sopenharmony_ci * a1: src
6862306a36Sopenharmony_ci * a2: n
6962306a36Sopenharmony_ci */
7062306a36Sopenharmony_ciSYM_FUNC_START(__rmemcpy_fast)
7162306a36Sopenharmony_ci	sltui	t0, a2, 9
7262306a36Sopenharmony_ci	bnez	t0, __memcpy_small
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	add.d	a3, a1, a2
7562306a36Sopenharmony_ci	add.d	a2, a0, a2
7662306a36Sopenharmony_ci	ld.d	a6, a1, 0
7762306a36Sopenharmony_ci	ld.d	a7, a3, -8
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	/* align up destination address */
8062306a36Sopenharmony_ci	andi	t1, a2, 7
8162306a36Sopenharmony_ci	sub.d	a3, a3, t1
8262306a36Sopenharmony_ci	sub.d	a5, a2, t1
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	addi.d	a4, a1, 64
8562306a36Sopenharmony_ci	bgeu	a4, a3, .Llt64
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	/* copy 64 bytes at a time */
8862306a36Sopenharmony_ci.Lloop64:
8962306a36Sopenharmony_ci	ld.d	t0, a3, -8
9062306a36Sopenharmony_ci	ld.d	t1, a3, -16
9162306a36Sopenharmony_ci	ld.d	t2, a3, -24
9262306a36Sopenharmony_ci	ld.d	t3, a3, -32
9362306a36Sopenharmony_ci	ld.d	t4, a3, -40
9462306a36Sopenharmony_ci	ld.d	t5, a3, -48
9562306a36Sopenharmony_ci	ld.d	t6, a3, -56
9662306a36Sopenharmony_ci	ld.d	t7, a3, -64
9762306a36Sopenharmony_ci	addi.d	a3, a3, -64
9862306a36Sopenharmony_ci	st.d	t0, a5, -8
9962306a36Sopenharmony_ci	st.d	t1, a5, -16
10062306a36Sopenharmony_ci	st.d	t2, a5, -24
10162306a36Sopenharmony_ci	st.d	t3, a5, -32
10262306a36Sopenharmony_ci	st.d	t4, a5, -40
10362306a36Sopenharmony_ci	st.d	t5, a5, -48
10462306a36Sopenharmony_ci	st.d	t6, a5, -56
10562306a36Sopenharmony_ci	st.d	t7, a5, -64
10662306a36Sopenharmony_ci	addi.d	a5, a5, -64
10762306a36Sopenharmony_ci	bltu	a4, a3, .Lloop64
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ci	/* copy the remaining bytes */
11062306a36Sopenharmony_ci.Llt64:
11162306a36Sopenharmony_ci	addi.d	a4, a1, 32
11262306a36Sopenharmony_ci	bgeu	a4, a3, .Llt32
11362306a36Sopenharmony_ci	ld.d	t0, a3, -8
11462306a36Sopenharmony_ci	ld.d	t1, a3, -16
11562306a36Sopenharmony_ci	ld.d	t2, a3, -24
11662306a36Sopenharmony_ci	ld.d	t3, a3, -32
11762306a36Sopenharmony_ci	addi.d	a3, a3, -32
11862306a36Sopenharmony_ci	st.d	t0, a5, -8
11962306a36Sopenharmony_ci	st.d	t1, a5, -16
12062306a36Sopenharmony_ci	st.d	t2, a5, -24
12162306a36Sopenharmony_ci	st.d	t3, a5, -32
12262306a36Sopenharmony_ci	addi.d	a5, a5, -32
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci.Llt32:
12562306a36Sopenharmony_ci	addi.d	a4, a1, 16
12662306a36Sopenharmony_ci	bgeu	a4, a3, .Llt16
12762306a36Sopenharmony_ci	ld.d	t0, a3, -8
12862306a36Sopenharmony_ci	ld.d	t1, a3, -16
12962306a36Sopenharmony_ci	addi.d	a3, a3, -16
13062306a36Sopenharmony_ci	st.d	t0, a5, -8
13162306a36Sopenharmony_ci	st.d	t1, a5, -16
13262306a36Sopenharmony_ci	addi.d	a5, a5, -16
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci.Llt16:
13562306a36Sopenharmony_ci	addi.d	a4, a1, 8
13662306a36Sopenharmony_ci	bgeu	a4, a3, .Llt8
13762306a36Sopenharmony_ci	ld.d	t0, a3, -8
13862306a36Sopenharmony_ci	st.d	t0, a5, -8
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci.Llt8:
14162306a36Sopenharmony_ci	st.d	a6, a0, 0
14262306a36Sopenharmony_ci	st.d	a7, a2, -8
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	/* return */
14562306a36Sopenharmony_ci	jr	ra
14662306a36Sopenharmony_ciSYM_FUNC_END(__rmemcpy_fast)
14762306a36Sopenharmony_ci_ASM_NOKPROBE(__rmemcpy_fast)
148