1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Copyright (C) 2020-2021 Loongson Technology Corporation Limited 4 */ 5 6 #include <asm/alternative-asm.h> 7 #include <asm/asm.h> 8 #include <asm/asmmacro.h> 9 #include <asm/cpu.h> 10 #include <asm/export.h> 11 #include <asm/regdef.h> 12 13 SYM_FUNC_START_WEAK(memmove) 14 SYM_FUNC_START_ALIAS(__memmove) 15 blt a0, a1, __memcpy /* dst < src, memcpy */ 16 blt a1, a0, __rmemcpy /* src < dst, rmemcpy */ 17 jr ra /* dst == src, return */ 18 SYM_FUNC_END(memmove) 19 SYM_FUNC_END_ALIAS(__memmove) 20 21 EXPORT_SYMBOL(memmove) 22 EXPORT_SYMBOL(__memmove) 23 24 SYM_FUNC_START(__rmemcpy) 25 /* 26 * Some CPUs support hardware unaligned access 27 */ 28 ALTERNATIVE "b __rmemcpy_generic", \ 29 "b __rmemcpy_fast", CPU_FEATURE_UAL 30 SYM_FUNC_END(__rmemcpy) 31 32 /* 33 * void *__rmemcpy_generic(void *dst, const void *src, size_t n) 34 * 35 * a0: dst 36 * a1: src 37 * a2: n 38 */ 39 SYM_FUNC_START(__rmemcpy_generic) 40 move a3, a0 41 beqz a2, 2f 42 43 add.d a0, a0, a2 44 add.d a1, a1, a2 45 46 1: ld.b t0, a1, -1 47 st.b t0, a0, -1 48 addi.d a0, a0, -1 49 addi.d a1, a1, -1 50 addi.d a2, a2, -1 51 bgt a2, zero, 1b 52 53 2: move a0, a3 54 jr ra 55 SYM_FUNC_END(__rmemcpy_generic) 56 57 /* 58 * void *__rmemcpy_fast(void *dst, const void *src, size_t n) 59 * 60 * a0: dst 61 * a1: src 62 * a2: n 63 */ 64 SYM_FUNC_START(__rmemcpy_fast) 65 sltui t0, a2, 9 66 bnez t0, __memcpy_small 67 68 add.d a3, a1, a2 69 add.d a2, a0, a2 70 ld.d a6, a1, 0 71 ld.d a7, a3, -8 72 73 /* align up destination address */ 74 andi t1, a2, 7 75 sub.d a3, a3, t1 76 sub.d a5, a2, t1 77 78 addi.d a4, a1, 64 79 bgeu a4, a3, .Llt64 80 81 /* copy 64 bytes at a time */ 82 .Lloop64: 83 ld.d t0, a3, -8 84 ld.d t1, a3, -16 85 ld.d t2, a3, -24 86 ld.d t3, a3, -32 87 ld.d t4, a3, -40 88 ld.d t5, a3, -48 89 ld.d t6, a3, -56 90 ld.d t7, a3, -64 91 addi.d a3, a3, -64 92 st.d t0, a5, -8 93 st.d t1, a5, -16 94 st.d t2, a5, -24 95 st.d t3, a5, -32 96 st.d t4, a5, -40 97 st.d t5, a5, -48 98 st.d t6, a5, -56 99 st.d t7, a5, -64 100 addi.d a5, a5, -64 101 bltu a4, a3, .Lloop64 102 103 /* copy the remaining bytes */ 104 .Llt64: 105 addi.d a4, a1, 32 106 bgeu a4, a3, .Llt32 107 ld.d t0, a3, -8 108 ld.d t1, a3, -16 109 ld.d t2, a3, -24 110 ld.d t3, a3, -32 111 addi.d a3, a3, -32 112 st.d t0, a5, -8 113 st.d t1, a5, -16 114 st.d t2, a5, -24 115 st.d t3, a5, -32 116 addi.d a5, a5, -32 117 118 .Llt32: 119 addi.d a4, a1, 16 120 bgeu a4, a3, .Llt16 121 ld.d t0, a3, -8 122 ld.d t1, a3, -16 123 addi.d a3, a3, -16 124 st.d t0, a5, -8 125 st.d t1, a5, -16 126 addi.d a5, a5, -16 127 128 .Llt16: 129 addi.d a4, a1, 8 130 bgeu a4, a3, .Llt8 131 ld.d t0, a3, -8 132 st.d t0, a5, -8 133 134 .Llt8: 135 st.d a6, a0, 0 136 st.d a7, a2, -8 137 138 /* return */ 139 jr ra 140 SYM_FUNC_END(__rmemcpy_fast) 141