162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2020-2022 Loongson Technology Corporation Limited 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/export.h> 762306a36Sopenharmony_ci#include <asm/alternative-asm.h> 862306a36Sopenharmony_ci#include <asm/asm.h> 962306a36Sopenharmony_ci#include <asm/asmmacro.h> 1062306a36Sopenharmony_ci#include <asm/cpu.h> 1162306a36Sopenharmony_ci#include <asm/regdef.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci.section .noinstr.text, "ax" 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ciSYM_FUNC_START(memcpy) 1662306a36Sopenharmony_ci /* 1762306a36Sopenharmony_ci * Some CPUs support hardware unaligned access 1862306a36Sopenharmony_ci */ 1962306a36Sopenharmony_ci ALTERNATIVE "b __memcpy_generic", \ 2062306a36Sopenharmony_ci "b __memcpy_fast", CPU_FEATURE_UAL 2162306a36Sopenharmony_ciSYM_FUNC_END(memcpy) 2262306a36Sopenharmony_ciSYM_FUNC_ALIAS(__memcpy, memcpy) 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ciEXPORT_SYMBOL(memcpy) 2562306a36Sopenharmony_ciEXPORT_SYMBOL(__memcpy) 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci_ASM_NOKPROBE(memcpy) 2862306a36Sopenharmony_ci_ASM_NOKPROBE(__memcpy) 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci/* 3162306a36Sopenharmony_ci * void *__memcpy_generic(void *dst, const void *src, size_t n) 3262306a36Sopenharmony_ci * 3362306a36Sopenharmony_ci * a0: dst 3462306a36Sopenharmony_ci * a1: src 3562306a36Sopenharmony_ci * a2: n 3662306a36Sopenharmony_ci */ 3762306a36Sopenharmony_ciSYM_FUNC_START(__memcpy_generic) 3862306a36Sopenharmony_ci move a3, a0 3962306a36Sopenharmony_ci beqz a2, 2f 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci1: ld.b t0, a1, 0 4262306a36Sopenharmony_ci st.b t0, a0, 0 4362306a36Sopenharmony_ci addi.d a0, a0, 1 4462306a36Sopenharmony_ci addi.d a1, a1, 1 4562306a36Sopenharmony_ci addi.d a2, a2, -1 4662306a36Sopenharmony_ci bgt a2, zero, 1b 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci2: move a0, a3 4962306a36Sopenharmony_ci jr ra 5062306a36Sopenharmony_ciSYM_FUNC_END(__memcpy_generic) 5162306a36Sopenharmony_ci_ASM_NOKPROBE(__memcpy_generic) 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci .align 5 5462306a36Sopenharmony_ciSYM_FUNC_START_NOALIGN(__memcpy_small) 5562306a36Sopenharmony_ci pcaddi t0, 8 5662306a36Sopenharmony_ci slli.d a2, a2, 5 5762306a36Sopenharmony_ci add.d t0, t0, a2 5862306a36Sopenharmony_ci jr t0 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci .align 5 6162306a36Sopenharmony_ci0: jr ra 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci .align 5 6462306a36Sopenharmony_ci1: ld.b t0, a1, 0 6562306a36Sopenharmony_ci st.b t0, a0, 0 6662306a36Sopenharmony_ci jr ra 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci .align 5 6962306a36Sopenharmony_ci2: ld.h t0, a1, 0 7062306a36Sopenharmony_ci st.h t0, a0, 0 7162306a36Sopenharmony_ci jr ra 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci .align 5 7462306a36Sopenharmony_ci3: ld.h t0, a1, 0 7562306a36Sopenharmony_ci ld.b t1, a1, 2 7662306a36Sopenharmony_ci st.h t0, a0, 0 7762306a36Sopenharmony_ci st.b t1, a0, 2 7862306a36Sopenharmony_ci jr ra 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci .align 5 8162306a36Sopenharmony_ci4: ld.w t0, a1, 0 8262306a36Sopenharmony_ci st.w t0, a0, 0 8362306a36Sopenharmony_ci jr ra 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci .align 5 8662306a36Sopenharmony_ci5: ld.w t0, a1, 0 8762306a36Sopenharmony_ci ld.b t1, a1, 4 8862306a36Sopenharmony_ci st.w t0, a0, 0 8962306a36Sopenharmony_ci st.b t1, a0, 4 9062306a36Sopenharmony_ci jr ra 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci .align 5 9362306a36Sopenharmony_ci6: ld.w t0, a1, 0 9462306a36Sopenharmony_ci ld.h t1, a1, 4 9562306a36Sopenharmony_ci st.w t0, a0, 0 9662306a36Sopenharmony_ci st.h t1, a0, 4 9762306a36Sopenharmony_ci jr ra 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci .align 5 10062306a36Sopenharmony_ci7: ld.w t0, a1, 0 10162306a36Sopenharmony_ci ld.w t1, a1, 3 10262306a36Sopenharmony_ci st.w t0, a0, 0 10362306a36Sopenharmony_ci st.w t1, a0, 3 10462306a36Sopenharmony_ci jr ra 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci .align 5 10762306a36Sopenharmony_ci8: ld.d t0, a1, 0 10862306a36Sopenharmony_ci st.d t0, a0, 0 10962306a36Sopenharmony_ci jr ra 11062306a36Sopenharmony_ciSYM_FUNC_END(__memcpy_small) 11162306a36Sopenharmony_ci_ASM_NOKPROBE(__memcpy_small) 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci/* 11462306a36Sopenharmony_ci * void *__memcpy_fast(void *dst, const void *src, size_t n) 11562306a36Sopenharmony_ci * 11662306a36Sopenharmony_ci * a0: dst 11762306a36Sopenharmony_ci * a1: src 11862306a36Sopenharmony_ci * a2: n 11962306a36Sopenharmony_ci */ 12062306a36Sopenharmony_ciSYM_FUNC_START(__memcpy_fast) 12162306a36Sopenharmony_ci sltui t0, a2, 9 12262306a36Sopenharmony_ci bnez t0, __memcpy_small 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci add.d a3, a1, a2 12562306a36Sopenharmony_ci add.d a2, a0, a2 12662306a36Sopenharmony_ci ld.d a6, a1, 0 12762306a36Sopenharmony_ci ld.d a7, a3, -8 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci /* align up destination address */ 13062306a36Sopenharmony_ci andi t1, a0, 7 13162306a36Sopenharmony_ci sub.d t0, zero, t1 13262306a36Sopenharmony_ci addi.d t0, t0, 8 13362306a36Sopenharmony_ci add.d a1, a1, t0 13462306a36Sopenharmony_ci add.d a5, a0, t0 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci addi.d a4, a3, -64 13762306a36Sopenharmony_ci bgeu a1, a4, .Llt64 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci /* copy 64 bytes at a time */ 14062306a36Sopenharmony_ci.Lloop64: 14162306a36Sopenharmony_ci ld.d t0, a1, 0 14262306a36Sopenharmony_ci ld.d t1, a1, 8 14362306a36Sopenharmony_ci ld.d t2, a1, 16 14462306a36Sopenharmony_ci ld.d t3, a1, 24 14562306a36Sopenharmony_ci ld.d t4, a1, 32 14662306a36Sopenharmony_ci ld.d t5, a1, 40 14762306a36Sopenharmony_ci ld.d t6, a1, 48 14862306a36Sopenharmony_ci ld.d t7, a1, 56 14962306a36Sopenharmony_ci addi.d a1, a1, 64 15062306a36Sopenharmony_ci st.d t0, a5, 0 15162306a36Sopenharmony_ci st.d t1, a5, 8 15262306a36Sopenharmony_ci st.d t2, a5, 16 15362306a36Sopenharmony_ci st.d t3, a5, 24 15462306a36Sopenharmony_ci st.d t4, a5, 32 15562306a36Sopenharmony_ci st.d t5, a5, 40 15662306a36Sopenharmony_ci st.d t6, a5, 48 15762306a36Sopenharmony_ci st.d t7, a5, 56 15862306a36Sopenharmony_ci addi.d a5, a5, 64 15962306a36Sopenharmony_ci bltu a1, a4, .Lloop64 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci /* copy the remaining bytes */ 16262306a36Sopenharmony_ci.Llt64: 16362306a36Sopenharmony_ci addi.d a4, a3, -32 16462306a36Sopenharmony_ci bgeu a1, a4, .Llt32 16562306a36Sopenharmony_ci ld.d t0, a1, 0 16662306a36Sopenharmony_ci ld.d t1, a1, 8 16762306a36Sopenharmony_ci ld.d t2, a1, 16 16862306a36Sopenharmony_ci ld.d t3, a1, 24 16962306a36Sopenharmony_ci addi.d a1, a1, 32 17062306a36Sopenharmony_ci st.d t0, a5, 0 17162306a36Sopenharmony_ci st.d t1, a5, 8 17262306a36Sopenharmony_ci st.d t2, a5, 16 17362306a36Sopenharmony_ci st.d t3, a5, 24 17462306a36Sopenharmony_ci addi.d a5, a5, 32 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci.Llt32: 17762306a36Sopenharmony_ci addi.d a4, a3, -16 17862306a36Sopenharmony_ci bgeu a1, a4, .Llt16 17962306a36Sopenharmony_ci ld.d t0, a1, 0 18062306a36Sopenharmony_ci ld.d t1, a1, 8 18162306a36Sopenharmony_ci addi.d a1, a1, 16 18262306a36Sopenharmony_ci st.d t0, a5, 0 18362306a36Sopenharmony_ci st.d t1, a5, 8 18462306a36Sopenharmony_ci addi.d a5, a5, 16 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci.Llt16: 18762306a36Sopenharmony_ci addi.d a4, a3, -8 18862306a36Sopenharmony_ci bgeu a1, a4, .Llt8 18962306a36Sopenharmony_ci ld.d t0, a1, 0 19062306a36Sopenharmony_ci st.d t0, a5, 0 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci.Llt8: 19362306a36Sopenharmony_ci st.d a6, a0, 0 19462306a36Sopenharmony_ci st.d a7, a2, -8 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci /* return */ 19762306a36Sopenharmony_ci jr ra 19862306a36Sopenharmony_ciSYM_FUNC_END(__memcpy_fast) 19962306a36Sopenharmony_ci_ASM_NOKPROBE(__memcpy_fast) 200