18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. 38c2ecf20Sopenharmony_ci 48c2ecf20Sopenharmony_ci#include <linux/linkage.h> 58c2ecf20Sopenharmony_ci 68c2ecf20Sopenharmony_ci.macro GET_FRONT_BITS rx y 78c2ecf20Sopenharmony_ci#ifdef __cskyLE__ 88c2ecf20Sopenharmony_ci lsri \rx, \y 98c2ecf20Sopenharmony_ci#else 108c2ecf20Sopenharmony_ci lsli \rx, \y 118c2ecf20Sopenharmony_ci#endif 128c2ecf20Sopenharmony_ci.endm 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci.macro GET_AFTER_BITS rx y 158c2ecf20Sopenharmony_ci#ifdef __cskyLE__ 168c2ecf20Sopenharmony_ci lsli \rx, \y 178c2ecf20Sopenharmony_ci#else 188c2ecf20Sopenharmony_ci lsri \rx, \y 198c2ecf20Sopenharmony_ci#endif 208c2ecf20Sopenharmony_ci.endm 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci/* void *memcpy(void *dest, const void *src, size_t n); */ 238c2ecf20Sopenharmony_ciENTRY(memcpy) 248c2ecf20Sopenharmony_ci mov r7, r2 258c2ecf20Sopenharmony_ci cmplti r4, 4 268c2ecf20Sopenharmony_ci bt .L_copy_by_byte 278c2ecf20Sopenharmony_ci mov r6, r2 288c2ecf20Sopenharmony_ci andi r6, 3 298c2ecf20Sopenharmony_ci cmpnei r6, 0 308c2ecf20Sopenharmony_ci jbt .L_dest_not_aligned 318c2ecf20Sopenharmony_ci mov r6, r3 328c2ecf20Sopenharmony_ci andi r6, 3 338c2ecf20Sopenharmony_ci cmpnei r6, 0 348c2ecf20Sopenharmony_ci jbt .L_dest_aligned_but_src_not_aligned 358c2ecf20Sopenharmony_ci.L0: 368c2ecf20Sopenharmony_ci cmplti r4, 16 378c2ecf20Sopenharmony_ci jbt .L_aligned_and_len_less_16bytes 388c2ecf20Sopenharmony_ci subi sp, 8 398c2ecf20Sopenharmony_ci stw r8, (sp, 0) 408c2ecf20Sopenharmony_ci.L_aligned_and_len_larger_16bytes: 418c2ecf20Sopenharmony_ci ldw r1, (r3, 0) 428c2ecf20Sopenharmony_ci ldw r5, (r3, 4) 438c2ecf20Sopenharmony_ci ldw r8, (r3, 8) 448c2ecf20Sopenharmony_ci stw r1, (r7, 0) 458c2ecf20Sopenharmony_ci ldw r1, (r3, 12) 468c2ecf20Sopenharmony_ci stw r5, (r7, 4) 478c2ecf20Sopenharmony_ci stw r8, (r7, 8) 488c2ecf20Sopenharmony_ci stw r1, (r7, 12) 498c2ecf20Sopenharmony_ci subi r4, 16 508c2ecf20Sopenharmony_ci addi r3, 16 518c2ecf20Sopenharmony_ci addi r7, 16 528c2ecf20Sopenharmony_ci cmplti r4, 16 538c2ecf20Sopenharmony_ci jbf .L_aligned_and_len_larger_16bytes 548c2ecf20Sopenharmony_ci ldw r8, (sp, 0) 558c2ecf20Sopenharmony_ci addi sp, 8 568c2ecf20Sopenharmony_ci cmpnei r4, 0 578c2ecf20Sopenharmony_ci jbf .L_return 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci.L_aligned_and_len_less_16bytes: 608c2ecf20Sopenharmony_ci cmplti r4, 4 618c2ecf20Sopenharmony_ci bt .L_copy_by_byte 628c2ecf20Sopenharmony_ci.L1: 638c2ecf20Sopenharmony_ci ldw r1, (r3, 0) 648c2ecf20Sopenharmony_ci stw r1, (r7, 0) 658c2ecf20Sopenharmony_ci subi r4, 4 668c2ecf20Sopenharmony_ci addi r3, 4 678c2ecf20Sopenharmony_ci addi r7, 4 688c2ecf20Sopenharmony_ci cmplti r4, 4 698c2ecf20Sopenharmony_ci jbf .L1 708c2ecf20Sopenharmony_ci br .L_copy_by_byte 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci.L_return: 738c2ecf20Sopenharmony_ci rts 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci.L_copy_by_byte: /* len less than 4 bytes */ 768c2ecf20Sopenharmony_ci cmpnei r4, 0 778c2ecf20Sopenharmony_ci jbf .L_return 788c2ecf20Sopenharmony_ci.L4: 798c2ecf20Sopenharmony_ci ldb r1, (r3, 0) 808c2ecf20Sopenharmony_ci stb r1, (r7, 0) 818c2ecf20Sopenharmony_ci addi r3, 1 828c2ecf20Sopenharmony_ci addi r7, 1 838c2ecf20Sopenharmony_ci decne r4 848c2ecf20Sopenharmony_ci jbt .L4 858c2ecf20Sopenharmony_ci rts 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci/* 888c2ecf20Sopenharmony_ci * If dest is not aligned, just copying some bytes makes the dest align. 898c2ecf20Sopenharmony_ci * Afther that, we judge whether the src is aligned. 908c2ecf20Sopenharmony_ci */ 918c2ecf20Sopenharmony_ci.L_dest_not_aligned: 928c2ecf20Sopenharmony_ci mov r5, r3 938c2ecf20Sopenharmony_ci rsub r5, r5, r7 948c2ecf20Sopenharmony_ci abs r5, r5 958c2ecf20Sopenharmony_ci cmplt r5, r4 968c2ecf20Sopenharmony_ci bt .L_copy_by_byte 978c2ecf20Sopenharmony_ci mov r5, r7 988c2ecf20Sopenharmony_ci sub r5, r3 998c2ecf20Sopenharmony_ci cmphs r5, r4 1008c2ecf20Sopenharmony_ci bf .L_copy_by_byte 1018c2ecf20Sopenharmony_ci mov r5, r6 1028c2ecf20Sopenharmony_ci.L5: 1038c2ecf20Sopenharmony_ci ldb r1, (r3, 0) /* makes the dest align. */ 1048c2ecf20Sopenharmony_ci stb r1, (r7, 0) 1058c2ecf20Sopenharmony_ci addi r5, 1 1068c2ecf20Sopenharmony_ci subi r4, 1 1078c2ecf20Sopenharmony_ci addi r3, 1 1088c2ecf20Sopenharmony_ci addi r7, 1 1098c2ecf20Sopenharmony_ci cmpnei r5, 4 1108c2ecf20Sopenharmony_ci jbt .L5 1118c2ecf20Sopenharmony_ci cmplti r4, 4 1128c2ecf20Sopenharmony_ci jbt .L_copy_by_byte 1138c2ecf20Sopenharmony_ci mov r6, r3 /* judge whether the src is aligned. */ 1148c2ecf20Sopenharmony_ci andi r6, 3 1158c2ecf20Sopenharmony_ci cmpnei r6, 0 1168c2ecf20Sopenharmony_ci jbf .L0 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci/* Judge the number of misaligned, 1, 2, 3? */ 1198c2ecf20Sopenharmony_ci.L_dest_aligned_but_src_not_aligned: 1208c2ecf20Sopenharmony_ci mov r5, r3 1218c2ecf20Sopenharmony_ci rsub r5, r5, r7 1228c2ecf20Sopenharmony_ci abs r5, r5 1238c2ecf20Sopenharmony_ci cmplt r5, r4 1248c2ecf20Sopenharmony_ci bt .L_copy_by_byte 1258c2ecf20Sopenharmony_ci bclri r3, 0 1268c2ecf20Sopenharmony_ci bclri r3, 1 1278c2ecf20Sopenharmony_ci ldw r1, (r3, 0) 1288c2ecf20Sopenharmony_ci addi r3, 4 1298c2ecf20Sopenharmony_ci cmpnei r6, 2 1308c2ecf20Sopenharmony_ci bf .L_dest_aligned_but_src_not_aligned_2bytes 1318c2ecf20Sopenharmony_ci cmpnei r6, 3 1328c2ecf20Sopenharmony_ci bf .L_dest_aligned_but_src_not_aligned_3bytes 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci.L_dest_aligned_but_src_not_aligned_1byte: 1358c2ecf20Sopenharmony_ci mov r5, r7 1368c2ecf20Sopenharmony_ci sub r5, r3 1378c2ecf20Sopenharmony_ci cmphs r5, r4 1388c2ecf20Sopenharmony_ci bf .L_copy_by_byte 1398c2ecf20Sopenharmony_ci cmplti r4, 16 1408c2ecf20Sopenharmony_ci bf .L11 1418c2ecf20Sopenharmony_ci.L10: /* If the len is less than 16 bytes */ 1428c2ecf20Sopenharmony_ci GET_FRONT_BITS r1 8 1438c2ecf20Sopenharmony_ci mov r5, r1 1448c2ecf20Sopenharmony_ci ldw r6, (r3, 0) 1458c2ecf20Sopenharmony_ci mov r1, r6 1468c2ecf20Sopenharmony_ci GET_AFTER_BITS r6 24 1478c2ecf20Sopenharmony_ci or r5, r6 1488c2ecf20Sopenharmony_ci stw r5, (r7, 0) 1498c2ecf20Sopenharmony_ci subi r4, 4 1508c2ecf20Sopenharmony_ci addi r3, 4 1518c2ecf20Sopenharmony_ci addi r7, 4 1528c2ecf20Sopenharmony_ci cmplti r4, 4 1538c2ecf20Sopenharmony_ci bf .L10 1548c2ecf20Sopenharmony_ci subi r3, 3 1558c2ecf20Sopenharmony_ci br .L_copy_by_byte 1568c2ecf20Sopenharmony_ci.L11: 1578c2ecf20Sopenharmony_ci subi sp, 16 1588c2ecf20Sopenharmony_ci stw r8, (sp, 0) 1598c2ecf20Sopenharmony_ci stw r9, (sp, 4) 1608c2ecf20Sopenharmony_ci stw r10, (sp, 8) 1618c2ecf20Sopenharmony_ci stw r11, (sp, 12) 1628c2ecf20Sopenharmony_ci.L12: 1638c2ecf20Sopenharmony_ci ldw r5, (r3, 0) 1648c2ecf20Sopenharmony_ci ldw r11, (r3, 4) 1658c2ecf20Sopenharmony_ci ldw r8, (r3, 8) 1668c2ecf20Sopenharmony_ci ldw r9, (r3, 12) 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci GET_FRONT_BITS r1 8 /* little or big endian? */ 1698c2ecf20Sopenharmony_ci mov r10, r5 1708c2ecf20Sopenharmony_ci GET_AFTER_BITS r5 24 1718c2ecf20Sopenharmony_ci or r5, r1 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci GET_FRONT_BITS r10 8 1748c2ecf20Sopenharmony_ci mov r1, r11 1758c2ecf20Sopenharmony_ci GET_AFTER_BITS r11 24 1768c2ecf20Sopenharmony_ci or r11, r10 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci GET_FRONT_BITS r1 8 1798c2ecf20Sopenharmony_ci mov r10, r8 1808c2ecf20Sopenharmony_ci GET_AFTER_BITS r8 24 1818c2ecf20Sopenharmony_ci or r8, r1 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci GET_FRONT_BITS r10 8 1848c2ecf20Sopenharmony_ci mov r1, r9 1858c2ecf20Sopenharmony_ci GET_AFTER_BITS r9 24 1868c2ecf20Sopenharmony_ci or r9, r10 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci stw r5, (r7, 0) 1898c2ecf20Sopenharmony_ci stw r11, (r7, 4) 1908c2ecf20Sopenharmony_ci stw r8, (r7, 8) 1918c2ecf20Sopenharmony_ci stw r9, (r7, 12) 1928c2ecf20Sopenharmony_ci subi r4, 16 1938c2ecf20Sopenharmony_ci addi r3, 16 1948c2ecf20Sopenharmony_ci addi r7, 16 1958c2ecf20Sopenharmony_ci cmplti r4, 16 1968c2ecf20Sopenharmony_ci jbf .L12 1978c2ecf20Sopenharmony_ci ldw r8, (sp, 0) 1988c2ecf20Sopenharmony_ci ldw r9, (sp, 4) 1998c2ecf20Sopenharmony_ci ldw r10, (sp, 8) 2008c2ecf20Sopenharmony_ci ldw r11, (sp, 12) 2018c2ecf20Sopenharmony_ci addi sp , 16 2028c2ecf20Sopenharmony_ci cmplti r4, 4 2038c2ecf20Sopenharmony_ci bf .L10 2048c2ecf20Sopenharmony_ci subi r3, 3 2058c2ecf20Sopenharmony_ci br .L_copy_by_byte 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci.L_dest_aligned_but_src_not_aligned_2bytes: 2088c2ecf20Sopenharmony_ci cmplti r4, 16 2098c2ecf20Sopenharmony_ci bf .L21 2108c2ecf20Sopenharmony_ci.L20: 2118c2ecf20Sopenharmony_ci GET_FRONT_BITS r1 16 2128c2ecf20Sopenharmony_ci mov r5, r1 2138c2ecf20Sopenharmony_ci ldw r6, (r3, 0) 2148c2ecf20Sopenharmony_ci mov r1, r6 2158c2ecf20Sopenharmony_ci GET_AFTER_BITS r6 16 2168c2ecf20Sopenharmony_ci or r5, r6 2178c2ecf20Sopenharmony_ci stw r5, (r7, 0) 2188c2ecf20Sopenharmony_ci subi r4, 4 2198c2ecf20Sopenharmony_ci addi r3, 4 2208c2ecf20Sopenharmony_ci addi r7, 4 2218c2ecf20Sopenharmony_ci cmplti r4, 4 2228c2ecf20Sopenharmony_ci bf .L20 2238c2ecf20Sopenharmony_ci subi r3, 2 2248c2ecf20Sopenharmony_ci br .L_copy_by_byte 2258c2ecf20Sopenharmony_ci rts 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_ci.L21: /* n > 16 */ 2288c2ecf20Sopenharmony_ci subi sp, 16 2298c2ecf20Sopenharmony_ci stw r8, (sp, 0) 2308c2ecf20Sopenharmony_ci stw r9, (sp, 4) 2318c2ecf20Sopenharmony_ci stw r10, (sp, 8) 2328c2ecf20Sopenharmony_ci stw r11, (sp, 12) 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci.L22: 2358c2ecf20Sopenharmony_ci ldw r5, (r3, 0) 2368c2ecf20Sopenharmony_ci ldw r11, (r3, 4) 2378c2ecf20Sopenharmony_ci ldw r8, (r3, 8) 2388c2ecf20Sopenharmony_ci ldw r9, (r3, 12) 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci GET_FRONT_BITS r1 16 2418c2ecf20Sopenharmony_ci mov r10, r5 2428c2ecf20Sopenharmony_ci GET_AFTER_BITS r5 16 2438c2ecf20Sopenharmony_ci or r5, r1 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_ci GET_FRONT_BITS r10 16 2468c2ecf20Sopenharmony_ci mov r1, r11 2478c2ecf20Sopenharmony_ci GET_AFTER_BITS r11 16 2488c2ecf20Sopenharmony_ci or r11, r10 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci GET_FRONT_BITS r1 16 2518c2ecf20Sopenharmony_ci mov r10, r8 2528c2ecf20Sopenharmony_ci GET_AFTER_BITS r8 16 2538c2ecf20Sopenharmony_ci or r8, r1 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci GET_FRONT_BITS r10 16 2568c2ecf20Sopenharmony_ci mov r1, r9 2578c2ecf20Sopenharmony_ci GET_AFTER_BITS r9 16 2588c2ecf20Sopenharmony_ci or r9, r10 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_ci stw r5, (r7, 0) 2618c2ecf20Sopenharmony_ci stw r11, (r7, 4) 2628c2ecf20Sopenharmony_ci stw r8, (r7, 8) 2638c2ecf20Sopenharmony_ci stw r9, (r7, 12) 2648c2ecf20Sopenharmony_ci subi r4, 16 2658c2ecf20Sopenharmony_ci addi r3, 16 2668c2ecf20Sopenharmony_ci addi r7, 16 2678c2ecf20Sopenharmony_ci cmplti r4, 16 2688c2ecf20Sopenharmony_ci jbf .L22 2698c2ecf20Sopenharmony_ci ldw r8, (sp, 0) 2708c2ecf20Sopenharmony_ci ldw r9, (sp, 4) 2718c2ecf20Sopenharmony_ci ldw r10, (sp, 8) 2728c2ecf20Sopenharmony_ci ldw r11, (sp, 12) 2738c2ecf20Sopenharmony_ci addi sp, 16 2748c2ecf20Sopenharmony_ci cmplti r4, 4 2758c2ecf20Sopenharmony_ci bf .L20 2768c2ecf20Sopenharmony_ci subi r3, 2 2778c2ecf20Sopenharmony_ci br .L_copy_by_byte 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci 2808c2ecf20Sopenharmony_ci.L_dest_aligned_but_src_not_aligned_3bytes: 2818c2ecf20Sopenharmony_ci cmplti r4, 16 2828c2ecf20Sopenharmony_ci bf .L31 2838c2ecf20Sopenharmony_ci.L30: 2848c2ecf20Sopenharmony_ci GET_FRONT_BITS r1 24 2858c2ecf20Sopenharmony_ci mov r5, r1 2868c2ecf20Sopenharmony_ci ldw r6, (r3, 0) 2878c2ecf20Sopenharmony_ci mov r1, r6 2888c2ecf20Sopenharmony_ci GET_AFTER_BITS r6 8 2898c2ecf20Sopenharmony_ci or r5, r6 2908c2ecf20Sopenharmony_ci stw r5, (r7, 0) 2918c2ecf20Sopenharmony_ci subi r4, 4 2928c2ecf20Sopenharmony_ci addi r3, 4 2938c2ecf20Sopenharmony_ci addi r7, 4 2948c2ecf20Sopenharmony_ci cmplti r4, 4 2958c2ecf20Sopenharmony_ci bf .L30 2968c2ecf20Sopenharmony_ci subi r3, 1 2978c2ecf20Sopenharmony_ci br .L_copy_by_byte 2988c2ecf20Sopenharmony_ci.L31: 2998c2ecf20Sopenharmony_ci subi sp, 16 3008c2ecf20Sopenharmony_ci stw r8, (sp, 0) 3018c2ecf20Sopenharmony_ci stw r9, (sp, 4) 3028c2ecf20Sopenharmony_ci stw r10, (sp, 8) 3038c2ecf20Sopenharmony_ci stw r11, (sp, 12) 3048c2ecf20Sopenharmony_ci.L32: 3058c2ecf20Sopenharmony_ci ldw r5, (r3, 0) 3068c2ecf20Sopenharmony_ci ldw r11, (r3, 4) 3078c2ecf20Sopenharmony_ci ldw r8, (r3, 8) 3088c2ecf20Sopenharmony_ci ldw r9, (r3, 12) 3098c2ecf20Sopenharmony_ci 3108c2ecf20Sopenharmony_ci GET_FRONT_BITS r1 24 3118c2ecf20Sopenharmony_ci mov r10, r5 3128c2ecf20Sopenharmony_ci GET_AFTER_BITS r5 8 3138c2ecf20Sopenharmony_ci or r5, r1 3148c2ecf20Sopenharmony_ci 3158c2ecf20Sopenharmony_ci GET_FRONT_BITS r10 24 3168c2ecf20Sopenharmony_ci mov r1, r11 3178c2ecf20Sopenharmony_ci GET_AFTER_BITS r11 8 3188c2ecf20Sopenharmony_ci or r11, r10 3198c2ecf20Sopenharmony_ci 3208c2ecf20Sopenharmony_ci GET_FRONT_BITS r1 24 3218c2ecf20Sopenharmony_ci mov r10, r8 3228c2ecf20Sopenharmony_ci GET_AFTER_BITS r8 8 3238c2ecf20Sopenharmony_ci or r8, r1 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_ci GET_FRONT_BITS r10 24 3268c2ecf20Sopenharmony_ci mov r1, r9 3278c2ecf20Sopenharmony_ci GET_AFTER_BITS r9 8 3288c2ecf20Sopenharmony_ci or r9, r10 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci stw r5, (r7, 0) 3318c2ecf20Sopenharmony_ci stw r11, (r7, 4) 3328c2ecf20Sopenharmony_ci stw r8, (r7, 8) 3338c2ecf20Sopenharmony_ci stw r9, (r7, 12) 3348c2ecf20Sopenharmony_ci subi r4, 16 3358c2ecf20Sopenharmony_ci addi r3, 16 3368c2ecf20Sopenharmony_ci addi r7, 16 3378c2ecf20Sopenharmony_ci cmplti r4, 16 3388c2ecf20Sopenharmony_ci jbf .L32 3398c2ecf20Sopenharmony_ci ldw r8, (sp, 0) 3408c2ecf20Sopenharmony_ci ldw r9, (sp, 4) 3418c2ecf20Sopenharmony_ci ldw r10, (sp, 8) 3428c2ecf20Sopenharmony_ci ldw r11, (sp, 12) 3438c2ecf20Sopenharmony_ci addi sp, 16 3448c2ecf20Sopenharmony_ci cmplti r4, 4 3458c2ecf20Sopenharmony_ci bf .L30 3468c2ecf20Sopenharmony_ci subi r3, 1 3478c2ecf20Sopenharmony_ci br .L_copy_by_byte 348