18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2020-2021 Loongson Technology Corporation Limited 48c2ecf20Sopenharmony_ci */ 58c2ecf20Sopenharmony_ci 68c2ecf20Sopenharmony_ci#include <asm/alternative-asm.h> 78c2ecf20Sopenharmony_ci#include <asm/asm.h> 88c2ecf20Sopenharmony_ci#include <asm/asmmacro.h> 98c2ecf20Sopenharmony_ci#include <asm/cpu.h> 108c2ecf20Sopenharmony_ci#include <asm/export.h> 118c2ecf20Sopenharmony_ci#include <asm/regdef.h> 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ciSYM_FUNC_START_WEAK(memcpy) 148c2ecf20Sopenharmony_ciSYM_FUNC_START_ALIAS(__memcpy) 158c2ecf20Sopenharmony_ci /* 168c2ecf20Sopenharmony_ci * Some CPUs support hardware unaligned access 178c2ecf20Sopenharmony_ci */ 188c2ecf20Sopenharmony_ci ALTERNATIVE "b __memcpy_generic", \ 198c2ecf20Sopenharmony_ci "b __memcpy_fast", CPU_FEATURE_UAL 208c2ecf20Sopenharmony_ciSYM_FUNC_END(memcpy) 218c2ecf20Sopenharmony_ciSYM_FUNC_END_ALIAS(__memcpy) 228c2ecf20Sopenharmony_ci 238c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memcpy) 248c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__memcpy) 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci/* 278c2ecf20Sopenharmony_ci * void *__memcpy_generic(void *dst, const void *src, size_t n) 288c2ecf20Sopenharmony_ci * 298c2ecf20Sopenharmony_ci * a0: dst 308c2ecf20Sopenharmony_ci * a1: src 318c2ecf20Sopenharmony_ci * a2: n 328c2ecf20Sopenharmony_ci */ 338c2ecf20Sopenharmony_ciSYM_FUNC_START(__memcpy_generic) 348c2ecf20Sopenharmony_ci move a3, a0 358c2ecf20Sopenharmony_ci beqz a2, 2f 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci1: ld.b t0, a1, 0 388c2ecf20Sopenharmony_ci st.b t0, a0, 0 398c2ecf20Sopenharmony_ci addi.d a0, a0, 1 408c2ecf20Sopenharmony_ci addi.d a1, a1, 1 418c2ecf20Sopenharmony_ci addi.d a2, a2, -1 428c2ecf20Sopenharmony_ci bgt a2, zero, 1b 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ci2: move a0, a3 458c2ecf20Sopenharmony_ci jr ra 468c2ecf20Sopenharmony_ciSYM_FUNC_END(__memcpy_generic) 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci .align 5 498c2ecf20Sopenharmony_ciSYM_FUNC_START_NOALIGN(__memcpy_small) 508c2ecf20Sopenharmony_ci pcaddi t0, 8 518c2ecf20Sopenharmony_ci slli.d a2, a2, 5 528c2ecf20Sopenharmony_ci add.d t0, t0, a2 538c2ecf20Sopenharmony_ci jr t0 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_ci .align 5 568c2ecf20Sopenharmony_ci0: jr ra 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci .align 5 598c2ecf20Sopenharmony_ci1: ld.b t0, a1, 0 608c2ecf20Sopenharmony_ci st.b t0, a0, 0 618c2ecf20Sopenharmony_ci jr ra 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci .align 5 648c2ecf20Sopenharmony_ci2: ld.h t0, a1, 0 658c2ecf20Sopenharmony_ci st.h t0, a0, 0 668c2ecf20Sopenharmony_ci jr ra 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci .align 5 698c2ecf20Sopenharmony_ci3: ld.h t0, a1, 0 708c2ecf20Sopenharmony_ci ld.b t1, a1, 2 718c2ecf20Sopenharmony_ci st.h t0, a0, 0 728c2ecf20Sopenharmony_ci st.b t1, a0, 2 738c2ecf20Sopenharmony_ci jr ra 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci .align 5 768c2ecf20Sopenharmony_ci4: ld.w t0, a1, 0 778c2ecf20Sopenharmony_ci st.w t0, a0, 0 788c2ecf20Sopenharmony_ci jr ra 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci .align 5 818c2ecf20Sopenharmony_ci5: ld.w t0, a1, 0 828c2ecf20Sopenharmony_ci ld.b t1, a1, 4 838c2ecf20Sopenharmony_ci st.w t0, a0, 0 848c2ecf20Sopenharmony_ci st.b t1, a0, 4 858c2ecf20Sopenharmony_ci jr ra 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci .align 5 888c2ecf20Sopenharmony_ci6: ld.w t0, a1, 0 898c2ecf20Sopenharmony_ci ld.h t1, a1, 4 908c2ecf20Sopenharmony_ci st.w t0, a0, 0 918c2ecf20Sopenharmony_ci st.h t1, a0, 4 928c2ecf20Sopenharmony_ci jr ra 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci .align 5 958c2ecf20Sopenharmony_ci7: ld.w t0, a1, 0 968c2ecf20Sopenharmony_ci ld.w t1, a1, 3 978c2ecf20Sopenharmony_ci st.w t0, a0, 0 988c2ecf20Sopenharmony_ci st.w t1, a0, 3 998c2ecf20Sopenharmony_ci jr ra 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci .align 5 1028c2ecf20Sopenharmony_ci8: ld.d t0, a1, 0 1038c2ecf20Sopenharmony_ci st.d t0, a0, 0 1048c2ecf20Sopenharmony_ci jr ra 1058c2ecf20Sopenharmony_ciSYM_FUNC_END(__memcpy_small) 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci/* 1088c2ecf20Sopenharmony_ci * void *__memcpy_fast(void *dst, const void *src, size_t n) 1098c2ecf20Sopenharmony_ci * 1108c2ecf20Sopenharmony_ci * a0: dst 1118c2ecf20Sopenharmony_ci * a1: src 1128c2ecf20Sopenharmony_ci * a2: n 1138c2ecf20Sopenharmony_ci */ 1148c2ecf20Sopenharmony_ciSYM_FUNC_START(__memcpy_fast) 1158c2ecf20Sopenharmony_ci sltui t0, a2, 9 1168c2ecf20Sopenharmony_ci bnez t0, __memcpy_small 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci add.d a3, a1, a2 1198c2ecf20Sopenharmony_ci add.d a2, a0, a2 1208c2ecf20Sopenharmony_ci ld.d a6, a1, 0 1218c2ecf20Sopenharmony_ci ld.d a7, a3, -8 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci /* align up destination address */ 1248c2ecf20Sopenharmony_ci andi t1, a0, 7 1258c2ecf20Sopenharmony_ci sub.d t0, zero, t1 1268c2ecf20Sopenharmony_ci addi.d t0, t0, 8 1278c2ecf20Sopenharmony_ci add.d a1, a1, t0 1288c2ecf20Sopenharmony_ci add.d a5, a0, t0 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci addi.d a4, a3, -64 1318c2ecf20Sopenharmony_ci bgeu a1, a4, .Llt64 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci /* copy 64 bytes at a time */ 1348c2ecf20Sopenharmony_ci.Lloop64: 1358c2ecf20Sopenharmony_ci ld.d t0, a1, 0 1368c2ecf20Sopenharmony_ci ld.d t1, a1, 8 1378c2ecf20Sopenharmony_ci ld.d t2, a1, 16 1388c2ecf20Sopenharmony_ci ld.d t3, a1, 24 1398c2ecf20Sopenharmony_ci ld.d t4, a1, 32 1408c2ecf20Sopenharmony_ci ld.d t5, a1, 40 1418c2ecf20Sopenharmony_ci ld.d t6, a1, 48 1428c2ecf20Sopenharmony_ci ld.d t7, a1, 56 1438c2ecf20Sopenharmony_ci addi.d a1, a1, 64 1448c2ecf20Sopenharmony_ci st.d t0, a5, 0 1458c2ecf20Sopenharmony_ci st.d t1, a5, 8 1468c2ecf20Sopenharmony_ci st.d t2, a5, 16 1478c2ecf20Sopenharmony_ci st.d t3, a5, 24 1488c2ecf20Sopenharmony_ci st.d t4, a5, 32 1498c2ecf20Sopenharmony_ci st.d t5, a5, 40 1508c2ecf20Sopenharmony_ci st.d t6, a5, 48 1518c2ecf20Sopenharmony_ci st.d t7, a5, 56 1528c2ecf20Sopenharmony_ci addi.d a5, a5, 64 1538c2ecf20Sopenharmony_ci bltu a1, a4, .Lloop64 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci /* copy the remaining bytes */ 1568c2ecf20Sopenharmony_ci.Llt64: 1578c2ecf20Sopenharmony_ci addi.d a4, a3, -32 1588c2ecf20Sopenharmony_ci bgeu a1, a4, .Llt32 1598c2ecf20Sopenharmony_ci ld.d t0, a1, 0 1608c2ecf20Sopenharmony_ci ld.d t1, a1, 8 1618c2ecf20Sopenharmony_ci ld.d t2, a1, 16 1628c2ecf20Sopenharmony_ci ld.d t3, a1, 24 1638c2ecf20Sopenharmony_ci addi.d a1, a1, 32 1648c2ecf20Sopenharmony_ci st.d t0, a5, 0 1658c2ecf20Sopenharmony_ci st.d t1, a5, 8 1668c2ecf20Sopenharmony_ci st.d t2, a5, 16 1678c2ecf20Sopenharmony_ci st.d t3, a5, 24 1688c2ecf20Sopenharmony_ci addi.d a5, a5, 32 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci.Llt32: 1718c2ecf20Sopenharmony_ci addi.d a4, a3, -16 1728c2ecf20Sopenharmony_ci bgeu a1, a4, .Llt16 1738c2ecf20Sopenharmony_ci ld.d t0, a1, 0 1748c2ecf20Sopenharmony_ci ld.d t1, a1, 8 1758c2ecf20Sopenharmony_ci addi.d a1, a1, 16 1768c2ecf20Sopenharmony_ci st.d t0, a5, 0 1778c2ecf20Sopenharmony_ci st.d t1, a5, 8 1788c2ecf20Sopenharmony_ci addi.d a5, a5, 16 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci.Llt16: 1818c2ecf20Sopenharmony_ci addi.d a4, a3, -8 1828c2ecf20Sopenharmony_ci bgeu a1, a4, .Llt8 1838c2ecf20Sopenharmony_ci ld.d t0, a1, 0 1848c2ecf20Sopenharmony_ci st.d t0, a5, 0 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_ci.Llt8: 1878c2ecf20Sopenharmony_ci st.d a6, a0, 0 1888c2ecf20Sopenharmony_ci st.d a7, a2, -8 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci /* return */ 1918c2ecf20Sopenharmony_ci jr ra 1928c2ecf20Sopenharmony_ciSYM_FUNC_END(__memcpy_fast) 193