18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2020-2021 Loongson Technology Corporation Limited 48c2ecf20Sopenharmony_ci */ 58c2ecf20Sopenharmony_ci 68c2ecf20Sopenharmony_ci#include <asm/alternative-asm.h> 78c2ecf20Sopenharmony_ci#include <asm/asm.h> 88c2ecf20Sopenharmony_ci#include <asm/asmmacro.h> 98c2ecf20Sopenharmony_ci#include <asm/cpu.h> 108c2ecf20Sopenharmony_ci#include <asm/export.h> 118c2ecf20Sopenharmony_ci#include <asm/regdef.h> 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ciSYM_FUNC_START_WEAK(memmove) 148c2ecf20Sopenharmony_ciSYM_FUNC_START_ALIAS(__memmove) 158c2ecf20Sopenharmony_ci blt a0, a1, __memcpy /* dst < src, memcpy */ 168c2ecf20Sopenharmony_ci blt a1, a0, __rmemcpy /* src < dst, rmemcpy */ 178c2ecf20Sopenharmony_ci jr ra /* dst == src, return */ 188c2ecf20Sopenharmony_ciSYM_FUNC_END(memmove) 198c2ecf20Sopenharmony_ciSYM_FUNC_END_ALIAS(__memmove) 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memmove) 228c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__memmove) 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ciSYM_FUNC_START(__rmemcpy) 258c2ecf20Sopenharmony_ci /* 268c2ecf20Sopenharmony_ci * Some CPUs support hardware unaligned access 278c2ecf20Sopenharmony_ci */ 288c2ecf20Sopenharmony_ci ALTERNATIVE "b __rmemcpy_generic", \ 298c2ecf20Sopenharmony_ci "b __rmemcpy_fast", CPU_FEATURE_UAL 308c2ecf20Sopenharmony_ciSYM_FUNC_END(__rmemcpy) 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci/* 338c2ecf20Sopenharmony_ci * void *__rmemcpy_generic(void *dst, const void *src, size_t n) 348c2ecf20Sopenharmony_ci * 358c2ecf20Sopenharmony_ci * a0: dst 368c2ecf20Sopenharmony_ci * a1: src 378c2ecf20Sopenharmony_ci * a2: n 388c2ecf20Sopenharmony_ci */ 398c2ecf20Sopenharmony_ciSYM_FUNC_START(__rmemcpy_generic) 408c2ecf20Sopenharmony_ci move a3, a0 418c2ecf20Sopenharmony_ci beqz a2, 2f 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci add.d a0, a0, a2 448c2ecf20Sopenharmony_ci add.d a1, a1, a2 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci1: ld.b t0, a1, -1 478c2ecf20Sopenharmony_ci st.b t0, a0, -1 488c2ecf20Sopenharmony_ci addi.d a0, a0, -1 498c2ecf20Sopenharmony_ci addi.d a1, a1, -1 508c2ecf20Sopenharmony_ci addi.d a2, a2, -1 518c2ecf20Sopenharmony_ci bgt a2, zero, 1b 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci2: move a0, a3 548c2ecf20Sopenharmony_ci jr ra 558c2ecf20Sopenharmony_ciSYM_FUNC_END(__rmemcpy_generic) 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci/* 588c2ecf20Sopenharmony_ci * void *__rmemcpy_fast(void *dst, const void *src, size_t n) 598c2ecf20Sopenharmony_ci * 608c2ecf20Sopenharmony_ci * a0: dst 618c2ecf20Sopenharmony_ci * a1: src 628c2ecf20Sopenharmony_ci * a2: n 638c2ecf20Sopenharmony_ci */ 648c2ecf20Sopenharmony_ciSYM_FUNC_START(__rmemcpy_fast) 658c2ecf20Sopenharmony_ci sltui t0, a2, 9 668c2ecf20Sopenharmony_ci bnez t0, __memcpy_small 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci add.d a3, a1, a2 698c2ecf20Sopenharmony_ci add.d a2, a0, a2 708c2ecf20Sopenharmony_ci ld.d a6, a1, 0 718c2ecf20Sopenharmony_ci ld.d a7, a3, -8 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci /* align up destination address */ 748c2ecf20Sopenharmony_ci andi t1, a2, 7 758c2ecf20Sopenharmony_ci sub.d a3, a3, t1 768c2ecf20Sopenharmony_ci sub.d a5, a2, t1 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci addi.d a4, a1, 64 798c2ecf20Sopenharmony_ci bgeu a4, a3, .Llt64 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci /* copy 64 bytes at a time */ 828c2ecf20Sopenharmony_ci.Lloop64: 838c2ecf20Sopenharmony_ci ld.d t0, a3, -8 848c2ecf20Sopenharmony_ci ld.d t1, a3, -16 858c2ecf20Sopenharmony_ci ld.d t2, a3, -24 868c2ecf20Sopenharmony_ci ld.d t3, a3, -32 878c2ecf20Sopenharmony_ci ld.d t4, a3, -40 888c2ecf20Sopenharmony_ci ld.d t5, a3, -48 898c2ecf20Sopenharmony_ci ld.d t6, a3, -56 908c2ecf20Sopenharmony_ci ld.d t7, a3, -64 918c2ecf20Sopenharmony_ci addi.d a3, a3, -64 928c2ecf20Sopenharmony_ci st.d t0, a5, -8 938c2ecf20Sopenharmony_ci st.d t1, a5, -16 948c2ecf20Sopenharmony_ci st.d t2, a5, -24 958c2ecf20Sopenharmony_ci st.d t3, a5, -32 968c2ecf20Sopenharmony_ci st.d t4, a5, -40 978c2ecf20Sopenharmony_ci st.d t5, a5, -48 988c2ecf20Sopenharmony_ci st.d t6, a5, -56 998c2ecf20Sopenharmony_ci st.d t7, a5, -64 1008c2ecf20Sopenharmony_ci addi.d a5, a5, -64 1018c2ecf20Sopenharmony_ci bltu a4, a3, .Lloop64 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci /* copy the remaining bytes */ 1048c2ecf20Sopenharmony_ci.Llt64: 1058c2ecf20Sopenharmony_ci addi.d a4, a1, 32 1068c2ecf20Sopenharmony_ci bgeu a4, a3, .Llt32 1078c2ecf20Sopenharmony_ci ld.d t0, a3, -8 1088c2ecf20Sopenharmony_ci ld.d t1, a3, -16 1098c2ecf20Sopenharmony_ci ld.d t2, a3, -24 1108c2ecf20Sopenharmony_ci ld.d t3, a3, -32 1118c2ecf20Sopenharmony_ci addi.d a3, a3, -32 1128c2ecf20Sopenharmony_ci st.d t0, a5, -8 1138c2ecf20Sopenharmony_ci st.d t1, a5, -16 1148c2ecf20Sopenharmony_ci st.d t2, a5, -24 1158c2ecf20Sopenharmony_ci st.d t3, a5, -32 1168c2ecf20Sopenharmony_ci addi.d a5, a5, -32 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci.Llt32: 1198c2ecf20Sopenharmony_ci addi.d a4, a1, 16 1208c2ecf20Sopenharmony_ci bgeu a4, a3, .Llt16 1218c2ecf20Sopenharmony_ci ld.d t0, a3, -8 1228c2ecf20Sopenharmony_ci ld.d t1, a3, -16 1238c2ecf20Sopenharmony_ci addi.d a3, a3, -16 1248c2ecf20Sopenharmony_ci st.d t0, a5, -8 1258c2ecf20Sopenharmony_ci st.d t1, a5, -16 1268c2ecf20Sopenharmony_ci addi.d a5, a5, -16 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci.Llt16: 1298c2ecf20Sopenharmony_ci addi.d a4, a1, 8 1308c2ecf20Sopenharmony_ci bgeu a4, a3, .Llt8 1318c2ecf20Sopenharmony_ci ld.d t0, a3, -8 1328c2ecf20Sopenharmony_ci st.d t0, a5, -8 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci.Llt8: 1358c2ecf20Sopenharmony_ci st.d a6, a0, 0 1368c2ecf20Sopenharmony_ci st.d a7, a2, -8 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci /* return */ 1398c2ecf20Sopenharmony_ci jr ra 1408c2ecf20Sopenharmony_ciSYM_FUNC_END(__rmemcpy_fast) 141