18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Copyright (C) IBM Corporation, 2012 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * Author: Anton Blanchard <anton@au.ibm.com> 78c2ecf20Sopenharmony_ci */ 88c2ecf20Sopenharmony_ci#include <asm/ppc_asm.h> 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#ifndef SELFTEST_CASE 118c2ecf20Sopenharmony_ci/* 0 == don't use VMX, 1 == use VMX */ 128c2ecf20Sopenharmony_ci#define SELFTEST_CASE 0 138c2ecf20Sopenharmony_ci#endif 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN__ 168c2ecf20Sopenharmony_ci#define LVS(VRT,RA,RB) lvsl VRT,RA,RB 178c2ecf20Sopenharmony_ci#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC 188c2ecf20Sopenharmony_ci#else 198c2ecf20Sopenharmony_ci#define LVS(VRT,RA,RB) lvsr VRT,RA,RB 208c2ecf20Sopenharmony_ci#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC 218c2ecf20Sopenharmony_ci#endif 228c2ecf20Sopenharmony_ci 238c2ecf20Sopenharmony_ci_GLOBAL(memcpy_power7) 248c2ecf20Sopenharmony_ci cmpldi r5,16 258c2ecf20Sopenharmony_ci cmpldi cr1,r5,4096 268c2ecf20Sopenharmony_ci std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) 278c2ecf20Sopenharmony_ci blt .Lshort_copy 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci#ifdef CONFIG_ALTIVEC 308c2ecf20Sopenharmony_citest_feature = SELFTEST_CASE 318c2ecf20Sopenharmony_ciBEGIN_FTR_SECTION 328c2ecf20Sopenharmony_ci bgt cr1, .Lvmx_copy 338c2ecf20Sopenharmony_ciEND_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 348c2ecf20Sopenharmony_ci#endif 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci.Lnonvmx_copy: 378c2ecf20Sopenharmony_ci /* Get the source 8B aligned */ 388c2ecf20Sopenharmony_ci neg r6,r4 398c2ecf20Sopenharmony_ci mtocrf 0x01,r6 408c2ecf20Sopenharmony_ci clrldi r6,r6,(64-3) 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci bf cr7*4+3,1f 438c2ecf20Sopenharmony_ci lbz r0,0(r4) 448c2ecf20Sopenharmony_ci addi r4,r4,1 458c2ecf20Sopenharmony_ci stb r0,0(r3) 468c2ecf20Sopenharmony_ci addi r3,r3,1 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci1: bf cr7*4+2,2f 498c2ecf20Sopenharmony_ci lhz r0,0(r4) 508c2ecf20Sopenharmony_ci addi r4,r4,2 518c2ecf20Sopenharmony_ci sth r0,0(r3) 528c2ecf20Sopenharmony_ci addi r3,r3,2 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci2: bf cr7*4+1,3f 558c2ecf20Sopenharmony_ci lwz r0,0(r4) 568c2ecf20Sopenharmony_ci addi r4,r4,4 578c2ecf20Sopenharmony_ci stw r0,0(r3) 588c2ecf20Sopenharmony_ci addi r3,r3,4 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci3: sub r5,r5,r6 618c2ecf20Sopenharmony_ci cmpldi r5,128 628c2ecf20Sopenharmony_ci blt 5f 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci mflr r0 658c2ecf20Sopenharmony_ci stdu r1,-STACKFRAMESIZE(r1) 668c2ecf20Sopenharmony_ci std r14,STK_REG(R14)(r1) 678c2ecf20Sopenharmony_ci std r15,STK_REG(R15)(r1) 688c2ecf20Sopenharmony_ci std r16,STK_REG(R16)(r1) 698c2ecf20Sopenharmony_ci std r17,STK_REG(R17)(r1) 708c2ecf20Sopenharmony_ci std r18,STK_REG(R18)(r1) 718c2ecf20Sopenharmony_ci std r19,STK_REG(R19)(r1) 728c2ecf20Sopenharmony_ci std r20,STK_REG(R20)(r1) 738c2ecf20Sopenharmony_ci std r21,STK_REG(R21)(r1) 748c2ecf20Sopenharmony_ci std r22,STK_REG(R22)(r1) 758c2ecf20Sopenharmony_ci std r0,STACKFRAMESIZE+16(r1) 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci srdi r6,r5,7 788c2ecf20Sopenharmony_ci mtctr r6 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci /* Now do cacheline (128B) sized loads and stores. */ 818c2ecf20Sopenharmony_ci .align 5 828c2ecf20Sopenharmony_ci4: 838c2ecf20Sopenharmony_ci ld r0,0(r4) 848c2ecf20Sopenharmony_ci ld r6,8(r4) 858c2ecf20Sopenharmony_ci ld r7,16(r4) 868c2ecf20Sopenharmony_ci ld r8,24(r4) 878c2ecf20Sopenharmony_ci ld r9,32(r4) 888c2ecf20Sopenharmony_ci ld r10,40(r4) 898c2ecf20Sopenharmony_ci ld r11,48(r4) 908c2ecf20Sopenharmony_ci ld r12,56(r4) 918c2ecf20Sopenharmony_ci ld r14,64(r4) 928c2ecf20Sopenharmony_ci ld r15,72(r4) 938c2ecf20Sopenharmony_ci ld r16,80(r4) 948c2ecf20Sopenharmony_ci ld r17,88(r4) 958c2ecf20Sopenharmony_ci ld r18,96(r4) 968c2ecf20Sopenharmony_ci ld r19,104(r4) 978c2ecf20Sopenharmony_ci ld r20,112(r4) 988c2ecf20Sopenharmony_ci ld r21,120(r4) 998c2ecf20Sopenharmony_ci addi r4,r4,128 1008c2ecf20Sopenharmony_ci std r0,0(r3) 1018c2ecf20Sopenharmony_ci std r6,8(r3) 1028c2ecf20Sopenharmony_ci std r7,16(r3) 1038c2ecf20Sopenharmony_ci std r8,24(r3) 1048c2ecf20Sopenharmony_ci std r9,32(r3) 1058c2ecf20Sopenharmony_ci std r10,40(r3) 1068c2ecf20Sopenharmony_ci std r11,48(r3) 1078c2ecf20Sopenharmony_ci std r12,56(r3) 1088c2ecf20Sopenharmony_ci std r14,64(r3) 1098c2ecf20Sopenharmony_ci std r15,72(r3) 1108c2ecf20Sopenharmony_ci std r16,80(r3) 1118c2ecf20Sopenharmony_ci std r17,88(r3) 1128c2ecf20Sopenharmony_ci std r18,96(r3) 1138c2ecf20Sopenharmony_ci std r19,104(r3) 1148c2ecf20Sopenharmony_ci std r20,112(r3) 1158c2ecf20Sopenharmony_ci std r21,120(r3) 1168c2ecf20Sopenharmony_ci addi r3,r3,128 1178c2ecf20Sopenharmony_ci bdnz 4b 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci clrldi r5,r5,(64-7) 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci ld r14,STK_REG(R14)(r1) 1228c2ecf20Sopenharmony_ci ld r15,STK_REG(R15)(r1) 1238c2ecf20Sopenharmony_ci ld r16,STK_REG(R16)(r1) 1248c2ecf20Sopenharmony_ci ld r17,STK_REG(R17)(r1) 1258c2ecf20Sopenharmony_ci ld r18,STK_REG(R18)(r1) 1268c2ecf20Sopenharmony_ci ld r19,STK_REG(R19)(r1) 1278c2ecf20Sopenharmony_ci ld r20,STK_REG(R20)(r1) 1288c2ecf20Sopenharmony_ci ld r21,STK_REG(R21)(r1) 1298c2ecf20Sopenharmony_ci ld r22,STK_REG(R22)(r1) 1308c2ecf20Sopenharmony_ci addi r1,r1,STACKFRAMESIZE 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci /* Up to 127B to go */ 1338c2ecf20Sopenharmony_ci5: srdi r6,r5,4 1348c2ecf20Sopenharmony_ci mtocrf 0x01,r6 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci6: bf cr7*4+1,7f 1378c2ecf20Sopenharmony_ci ld r0,0(r4) 1388c2ecf20Sopenharmony_ci ld r6,8(r4) 1398c2ecf20Sopenharmony_ci ld r7,16(r4) 1408c2ecf20Sopenharmony_ci ld r8,24(r4) 1418c2ecf20Sopenharmony_ci ld r9,32(r4) 1428c2ecf20Sopenharmony_ci ld r10,40(r4) 1438c2ecf20Sopenharmony_ci ld r11,48(r4) 1448c2ecf20Sopenharmony_ci ld r12,56(r4) 1458c2ecf20Sopenharmony_ci addi r4,r4,64 1468c2ecf20Sopenharmony_ci std r0,0(r3) 1478c2ecf20Sopenharmony_ci std r6,8(r3) 1488c2ecf20Sopenharmony_ci std r7,16(r3) 1498c2ecf20Sopenharmony_ci std r8,24(r3) 1508c2ecf20Sopenharmony_ci std r9,32(r3) 1518c2ecf20Sopenharmony_ci std r10,40(r3) 1528c2ecf20Sopenharmony_ci std r11,48(r3) 1538c2ecf20Sopenharmony_ci std r12,56(r3) 1548c2ecf20Sopenharmony_ci addi r3,r3,64 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci /* Up to 63B to go */ 1578c2ecf20Sopenharmony_ci7: bf cr7*4+2,8f 1588c2ecf20Sopenharmony_ci ld r0,0(r4) 1598c2ecf20Sopenharmony_ci ld r6,8(r4) 1608c2ecf20Sopenharmony_ci ld r7,16(r4) 1618c2ecf20Sopenharmony_ci ld r8,24(r4) 1628c2ecf20Sopenharmony_ci addi r4,r4,32 1638c2ecf20Sopenharmony_ci std r0,0(r3) 1648c2ecf20Sopenharmony_ci std r6,8(r3) 1658c2ecf20Sopenharmony_ci std r7,16(r3) 1668c2ecf20Sopenharmony_ci std r8,24(r3) 1678c2ecf20Sopenharmony_ci addi r3,r3,32 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_ci /* Up to 31B to go */ 1708c2ecf20Sopenharmony_ci8: bf cr7*4+3,9f 1718c2ecf20Sopenharmony_ci ld r0,0(r4) 1728c2ecf20Sopenharmony_ci ld r6,8(r4) 1738c2ecf20Sopenharmony_ci addi r4,r4,16 1748c2ecf20Sopenharmony_ci std r0,0(r3) 1758c2ecf20Sopenharmony_ci std r6,8(r3) 1768c2ecf20Sopenharmony_ci addi r3,r3,16 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci9: clrldi r5,r5,(64-4) 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci /* Up to 15B to go */ 1818c2ecf20Sopenharmony_ci.Lshort_copy: 1828c2ecf20Sopenharmony_ci mtocrf 0x01,r5 1838c2ecf20Sopenharmony_ci bf cr7*4+0,12f 1848c2ecf20Sopenharmony_ci lwz r0,0(r4) /* Less chance of a reject with word ops */ 1858c2ecf20Sopenharmony_ci lwz r6,4(r4) 1868c2ecf20Sopenharmony_ci addi r4,r4,8 1878c2ecf20Sopenharmony_ci stw r0,0(r3) 1888c2ecf20Sopenharmony_ci stw r6,4(r3) 1898c2ecf20Sopenharmony_ci addi r3,r3,8 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci12: bf cr7*4+1,13f 1928c2ecf20Sopenharmony_ci lwz r0,0(r4) 1938c2ecf20Sopenharmony_ci addi r4,r4,4 1948c2ecf20Sopenharmony_ci stw r0,0(r3) 1958c2ecf20Sopenharmony_ci addi r3,r3,4 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_ci13: bf cr7*4+2,14f 1988c2ecf20Sopenharmony_ci lhz r0,0(r4) 1998c2ecf20Sopenharmony_ci addi r4,r4,2 2008c2ecf20Sopenharmony_ci sth r0,0(r3) 2018c2ecf20Sopenharmony_ci addi r3,r3,2 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci14: bf cr7*4+3,15f 2048c2ecf20Sopenharmony_ci lbz r0,0(r4) 2058c2ecf20Sopenharmony_ci stb r0,0(r3) 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci15: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) 2088c2ecf20Sopenharmony_ci blr 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci.Lunwind_stack_nonvmx_copy: 2118c2ecf20Sopenharmony_ci addi r1,r1,STACKFRAMESIZE 2128c2ecf20Sopenharmony_ci b .Lnonvmx_copy 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci.Lvmx_copy: 2158c2ecf20Sopenharmony_ci#ifdef CONFIG_ALTIVEC 2168c2ecf20Sopenharmony_ci mflr r0 2178c2ecf20Sopenharmony_ci std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) 2188c2ecf20Sopenharmony_ci std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) 2198c2ecf20Sopenharmony_ci std r0,16(r1) 2208c2ecf20Sopenharmony_ci stdu r1,-STACKFRAMESIZE(r1) 2218c2ecf20Sopenharmony_ci bl enter_vmx_ops 2228c2ecf20Sopenharmony_ci cmpwi cr1,r3,0 2238c2ecf20Sopenharmony_ci ld r0,STACKFRAMESIZE+16(r1) 2248c2ecf20Sopenharmony_ci ld r3,STK_REG(R31)(r1) 2258c2ecf20Sopenharmony_ci ld r4,STK_REG(R30)(r1) 2268c2ecf20Sopenharmony_ci ld r5,STK_REG(R29)(r1) 2278c2ecf20Sopenharmony_ci mtlr r0 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci /* 2308c2ecf20Sopenharmony_ci * We prefetch both the source and destination using enhanced touch 2318c2ecf20Sopenharmony_ci * instructions. We use a stream ID of 0 for the load side and 2328c2ecf20Sopenharmony_ci * 1 for the store side. 2338c2ecf20Sopenharmony_ci */ 2348c2ecf20Sopenharmony_ci clrrdi r6,r4,7 2358c2ecf20Sopenharmony_ci clrrdi r9,r3,7 2368c2ecf20Sopenharmony_ci ori r9,r9,1 /* stream=1 */ 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */ 2398c2ecf20Sopenharmony_ci cmpldi r7,0x3FF 2408c2ecf20Sopenharmony_ci ble 1f 2418c2ecf20Sopenharmony_ci li r7,0x3FF 2428c2ecf20Sopenharmony_ci1: lis r0,0x0E00 /* depth=7 */ 2438c2ecf20Sopenharmony_ci sldi r7,r7,7 2448c2ecf20Sopenharmony_ci or r7,r7,r0 2458c2ecf20Sopenharmony_ci ori r10,r7,1 /* stream=1 */ 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_ci lis r8,0x8000 /* GO=1 */ 2488c2ecf20Sopenharmony_ci clrldi r8,r8,32 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci dcbt 0,r6,0b01000 2518c2ecf20Sopenharmony_ci dcbt 0,r7,0b01010 2528c2ecf20Sopenharmony_ci dcbtst 0,r9,0b01000 2538c2ecf20Sopenharmony_ci dcbtst 0,r10,0b01010 2548c2ecf20Sopenharmony_ci eieio 2558c2ecf20Sopenharmony_ci dcbt 0,r8,0b01010 /* GO */ 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci beq cr1,.Lunwind_stack_nonvmx_copy 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_ci /* 2608c2ecf20Sopenharmony_ci * If source and destination are not relatively aligned we use a 2618c2ecf20Sopenharmony_ci * slower permute loop. 2628c2ecf20Sopenharmony_ci */ 2638c2ecf20Sopenharmony_ci xor r6,r4,r3 2648c2ecf20Sopenharmony_ci rldicl. r6,r6,0,(64-4) 2658c2ecf20Sopenharmony_ci bne .Lvmx_unaligned_copy 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci /* Get the destination 16B aligned */ 2688c2ecf20Sopenharmony_ci neg r6,r3 2698c2ecf20Sopenharmony_ci mtocrf 0x01,r6 2708c2ecf20Sopenharmony_ci clrldi r6,r6,(64-4) 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci bf cr7*4+3,1f 2738c2ecf20Sopenharmony_ci lbz r0,0(r4) 2748c2ecf20Sopenharmony_ci addi r4,r4,1 2758c2ecf20Sopenharmony_ci stb r0,0(r3) 2768c2ecf20Sopenharmony_ci addi r3,r3,1 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_ci1: bf cr7*4+2,2f 2798c2ecf20Sopenharmony_ci lhz r0,0(r4) 2808c2ecf20Sopenharmony_ci addi r4,r4,2 2818c2ecf20Sopenharmony_ci sth r0,0(r3) 2828c2ecf20Sopenharmony_ci addi r3,r3,2 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci2: bf cr7*4+1,3f 2858c2ecf20Sopenharmony_ci lwz r0,0(r4) 2868c2ecf20Sopenharmony_ci addi r4,r4,4 2878c2ecf20Sopenharmony_ci stw r0,0(r3) 2888c2ecf20Sopenharmony_ci addi r3,r3,4 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci3: bf cr7*4+0,4f 2918c2ecf20Sopenharmony_ci ld r0,0(r4) 2928c2ecf20Sopenharmony_ci addi r4,r4,8 2938c2ecf20Sopenharmony_ci std r0,0(r3) 2948c2ecf20Sopenharmony_ci addi r3,r3,8 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_ci4: sub r5,r5,r6 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_ci /* Get the desination 128B aligned */ 2998c2ecf20Sopenharmony_ci neg r6,r3 3008c2ecf20Sopenharmony_ci srdi r7,r6,4 3018c2ecf20Sopenharmony_ci mtocrf 0x01,r7 3028c2ecf20Sopenharmony_ci clrldi r6,r6,(64-7) 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ci li r9,16 3058c2ecf20Sopenharmony_ci li r10,32 3068c2ecf20Sopenharmony_ci li r11,48 3078c2ecf20Sopenharmony_ci 3088c2ecf20Sopenharmony_ci bf cr7*4+3,5f 3098c2ecf20Sopenharmony_ci lvx v1,0,r4 3108c2ecf20Sopenharmony_ci addi r4,r4,16 3118c2ecf20Sopenharmony_ci stvx v1,0,r3 3128c2ecf20Sopenharmony_ci addi r3,r3,16 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci5: bf cr7*4+2,6f 3158c2ecf20Sopenharmony_ci lvx v1,0,r4 3168c2ecf20Sopenharmony_ci lvx v0,r4,r9 3178c2ecf20Sopenharmony_ci addi r4,r4,32 3188c2ecf20Sopenharmony_ci stvx v1,0,r3 3198c2ecf20Sopenharmony_ci stvx v0,r3,r9 3208c2ecf20Sopenharmony_ci addi r3,r3,32 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_ci6: bf cr7*4+1,7f 3238c2ecf20Sopenharmony_ci lvx v3,0,r4 3248c2ecf20Sopenharmony_ci lvx v2,r4,r9 3258c2ecf20Sopenharmony_ci lvx v1,r4,r10 3268c2ecf20Sopenharmony_ci lvx v0,r4,r11 3278c2ecf20Sopenharmony_ci addi r4,r4,64 3288c2ecf20Sopenharmony_ci stvx v3,0,r3 3298c2ecf20Sopenharmony_ci stvx v2,r3,r9 3308c2ecf20Sopenharmony_ci stvx v1,r3,r10 3318c2ecf20Sopenharmony_ci stvx v0,r3,r11 3328c2ecf20Sopenharmony_ci addi r3,r3,64 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ci7: sub r5,r5,r6 3358c2ecf20Sopenharmony_ci srdi r6,r5,7 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_ci std r14,STK_REG(R14)(r1) 3388c2ecf20Sopenharmony_ci std r15,STK_REG(R15)(r1) 3398c2ecf20Sopenharmony_ci std r16,STK_REG(R16)(r1) 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci li r12,64 3428c2ecf20Sopenharmony_ci li r14,80 3438c2ecf20Sopenharmony_ci li r15,96 3448c2ecf20Sopenharmony_ci li r16,112 3458c2ecf20Sopenharmony_ci 3468c2ecf20Sopenharmony_ci mtctr r6 3478c2ecf20Sopenharmony_ci 3488c2ecf20Sopenharmony_ci /* 3498c2ecf20Sopenharmony_ci * Now do cacheline sized loads and stores. By this stage the 3508c2ecf20Sopenharmony_ci * cacheline stores are also cacheline aligned. 3518c2ecf20Sopenharmony_ci */ 3528c2ecf20Sopenharmony_ci .align 5 3538c2ecf20Sopenharmony_ci8: 3548c2ecf20Sopenharmony_ci lvx v7,0,r4 3558c2ecf20Sopenharmony_ci lvx v6,r4,r9 3568c2ecf20Sopenharmony_ci lvx v5,r4,r10 3578c2ecf20Sopenharmony_ci lvx v4,r4,r11 3588c2ecf20Sopenharmony_ci lvx v3,r4,r12 3598c2ecf20Sopenharmony_ci lvx v2,r4,r14 3608c2ecf20Sopenharmony_ci lvx v1,r4,r15 3618c2ecf20Sopenharmony_ci lvx v0,r4,r16 3628c2ecf20Sopenharmony_ci addi r4,r4,128 3638c2ecf20Sopenharmony_ci stvx v7,0,r3 3648c2ecf20Sopenharmony_ci stvx v6,r3,r9 3658c2ecf20Sopenharmony_ci stvx v5,r3,r10 3668c2ecf20Sopenharmony_ci stvx v4,r3,r11 3678c2ecf20Sopenharmony_ci stvx v3,r3,r12 3688c2ecf20Sopenharmony_ci stvx v2,r3,r14 3698c2ecf20Sopenharmony_ci stvx v1,r3,r15 3708c2ecf20Sopenharmony_ci stvx v0,r3,r16 3718c2ecf20Sopenharmony_ci addi r3,r3,128 3728c2ecf20Sopenharmony_ci bdnz 8b 3738c2ecf20Sopenharmony_ci 3748c2ecf20Sopenharmony_ci ld r14,STK_REG(R14)(r1) 3758c2ecf20Sopenharmony_ci ld r15,STK_REG(R15)(r1) 3768c2ecf20Sopenharmony_ci ld r16,STK_REG(R16)(r1) 3778c2ecf20Sopenharmony_ci 3788c2ecf20Sopenharmony_ci /* Up to 127B to go */ 3798c2ecf20Sopenharmony_ci clrldi r5,r5,(64-7) 3808c2ecf20Sopenharmony_ci srdi r6,r5,4 3818c2ecf20Sopenharmony_ci mtocrf 0x01,r6 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ci bf cr7*4+1,9f 3848c2ecf20Sopenharmony_ci lvx v3,0,r4 3858c2ecf20Sopenharmony_ci lvx v2,r4,r9 3868c2ecf20Sopenharmony_ci lvx v1,r4,r10 3878c2ecf20Sopenharmony_ci lvx v0,r4,r11 3888c2ecf20Sopenharmony_ci addi r4,r4,64 3898c2ecf20Sopenharmony_ci stvx v3,0,r3 3908c2ecf20Sopenharmony_ci stvx v2,r3,r9 3918c2ecf20Sopenharmony_ci stvx v1,r3,r10 3928c2ecf20Sopenharmony_ci stvx v0,r3,r11 3938c2ecf20Sopenharmony_ci addi r3,r3,64 3948c2ecf20Sopenharmony_ci 3958c2ecf20Sopenharmony_ci9: bf cr7*4+2,10f 3968c2ecf20Sopenharmony_ci lvx v1,0,r4 3978c2ecf20Sopenharmony_ci lvx v0,r4,r9 3988c2ecf20Sopenharmony_ci addi r4,r4,32 3998c2ecf20Sopenharmony_ci stvx v1,0,r3 4008c2ecf20Sopenharmony_ci stvx v0,r3,r9 4018c2ecf20Sopenharmony_ci addi r3,r3,32 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_ci10: bf cr7*4+3,11f 4048c2ecf20Sopenharmony_ci lvx v1,0,r4 4058c2ecf20Sopenharmony_ci addi r4,r4,16 4068c2ecf20Sopenharmony_ci stvx v1,0,r3 4078c2ecf20Sopenharmony_ci addi r3,r3,16 4088c2ecf20Sopenharmony_ci 4098c2ecf20Sopenharmony_ci /* Up to 15B to go */ 4108c2ecf20Sopenharmony_ci11: clrldi r5,r5,(64-4) 4118c2ecf20Sopenharmony_ci mtocrf 0x01,r5 4128c2ecf20Sopenharmony_ci bf cr7*4+0,12f 4138c2ecf20Sopenharmony_ci ld r0,0(r4) 4148c2ecf20Sopenharmony_ci addi r4,r4,8 4158c2ecf20Sopenharmony_ci std r0,0(r3) 4168c2ecf20Sopenharmony_ci addi r3,r3,8 4178c2ecf20Sopenharmony_ci 4188c2ecf20Sopenharmony_ci12: bf cr7*4+1,13f 4198c2ecf20Sopenharmony_ci lwz r0,0(r4) 4208c2ecf20Sopenharmony_ci addi r4,r4,4 4218c2ecf20Sopenharmony_ci stw r0,0(r3) 4228c2ecf20Sopenharmony_ci addi r3,r3,4 4238c2ecf20Sopenharmony_ci 4248c2ecf20Sopenharmony_ci13: bf cr7*4+2,14f 4258c2ecf20Sopenharmony_ci lhz r0,0(r4) 4268c2ecf20Sopenharmony_ci addi r4,r4,2 4278c2ecf20Sopenharmony_ci sth r0,0(r3) 4288c2ecf20Sopenharmony_ci addi r3,r3,2 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_ci14: bf cr7*4+3,15f 4318c2ecf20Sopenharmony_ci lbz r0,0(r4) 4328c2ecf20Sopenharmony_ci stb r0,0(r3) 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci15: addi r1,r1,STACKFRAMESIZE 4358c2ecf20Sopenharmony_ci ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) 4368c2ecf20Sopenharmony_ci b exit_vmx_ops /* tail call optimise */ 4378c2ecf20Sopenharmony_ci 4388c2ecf20Sopenharmony_ci.Lvmx_unaligned_copy: 4398c2ecf20Sopenharmony_ci /* Get the destination 16B aligned */ 4408c2ecf20Sopenharmony_ci neg r6,r3 4418c2ecf20Sopenharmony_ci mtocrf 0x01,r6 4428c2ecf20Sopenharmony_ci clrldi r6,r6,(64-4) 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_ci bf cr7*4+3,1f 4458c2ecf20Sopenharmony_ci lbz r0,0(r4) 4468c2ecf20Sopenharmony_ci addi r4,r4,1 4478c2ecf20Sopenharmony_ci stb r0,0(r3) 4488c2ecf20Sopenharmony_ci addi r3,r3,1 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_ci1: bf cr7*4+2,2f 4518c2ecf20Sopenharmony_ci lhz r0,0(r4) 4528c2ecf20Sopenharmony_ci addi r4,r4,2 4538c2ecf20Sopenharmony_ci sth r0,0(r3) 4548c2ecf20Sopenharmony_ci addi r3,r3,2 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_ci2: bf cr7*4+1,3f 4578c2ecf20Sopenharmony_ci lwz r0,0(r4) 4588c2ecf20Sopenharmony_ci addi r4,r4,4 4598c2ecf20Sopenharmony_ci stw r0,0(r3) 4608c2ecf20Sopenharmony_ci addi r3,r3,4 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci3: bf cr7*4+0,4f 4638c2ecf20Sopenharmony_ci lwz r0,0(r4) /* Less chance of a reject with word ops */ 4648c2ecf20Sopenharmony_ci lwz r7,4(r4) 4658c2ecf20Sopenharmony_ci addi r4,r4,8 4668c2ecf20Sopenharmony_ci stw r0,0(r3) 4678c2ecf20Sopenharmony_ci stw r7,4(r3) 4688c2ecf20Sopenharmony_ci addi r3,r3,8 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_ci4: sub r5,r5,r6 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci /* Get the desination 128B aligned */ 4738c2ecf20Sopenharmony_ci neg r6,r3 4748c2ecf20Sopenharmony_ci srdi r7,r6,4 4758c2ecf20Sopenharmony_ci mtocrf 0x01,r7 4768c2ecf20Sopenharmony_ci clrldi r6,r6,(64-7) 4778c2ecf20Sopenharmony_ci 4788c2ecf20Sopenharmony_ci li r9,16 4798c2ecf20Sopenharmony_ci li r10,32 4808c2ecf20Sopenharmony_ci li r11,48 4818c2ecf20Sopenharmony_ci 4828c2ecf20Sopenharmony_ci LVS(v16,0,r4) /* Setup permute control vector */ 4838c2ecf20Sopenharmony_ci lvx v0,0,r4 4848c2ecf20Sopenharmony_ci addi r4,r4,16 4858c2ecf20Sopenharmony_ci 4868c2ecf20Sopenharmony_ci bf cr7*4+3,5f 4878c2ecf20Sopenharmony_ci lvx v1,0,r4 4888c2ecf20Sopenharmony_ci VPERM(v8,v0,v1,v16) 4898c2ecf20Sopenharmony_ci addi r4,r4,16 4908c2ecf20Sopenharmony_ci stvx v8,0,r3 4918c2ecf20Sopenharmony_ci addi r3,r3,16 4928c2ecf20Sopenharmony_ci vor v0,v1,v1 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ci5: bf cr7*4+2,6f 4958c2ecf20Sopenharmony_ci lvx v1,0,r4 4968c2ecf20Sopenharmony_ci VPERM(v8,v0,v1,v16) 4978c2ecf20Sopenharmony_ci lvx v0,r4,r9 4988c2ecf20Sopenharmony_ci VPERM(v9,v1,v0,v16) 4998c2ecf20Sopenharmony_ci addi r4,r4,32 5008c2ecf20Sopenharmony_ci stvx v8,0,r3 5018c2ecf20Sopenharmony_ci stvx v9,r3,r9 5028c2ecf20Sopenharmony_ci addi r3,r3,32 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci6: bf cr7*4+1,7f 5058c2ecf20Sopenharmony_ci lvx v3,0,r4 5068c2ecf20Sopenharmony_ci VPERM(v8,v0,v3,v16) 5078c2ecf20Sopenharmony_ci lvx v2,r4,r9 5088c2ecf20Sopenharmony_ci VPERM(v9,v3,v2,v16) 5098c2ecf20Sopenharmony_ci lvx v1,r4,r10 5108c2ecf20Sopenharmony_ci VPERM(v10,v2,v1,v16) 5118c2ecf20Sopenharmony_ci lvx v0,r4,r11 5128c2ecf20Sopenharmony_ci VPERM(v11,v1,v0,v16) 5138c2ecf20Sopenharmony_ci addi r4,r4,64 5148c2ecf20Sopenharmony_ci stvx v8,0,r3 5158c2ecf20Sopenharmony_ci stvx v9,r3,r9 5168c2ecf20Sopenharmony_ci stvx v10,r3,r10 5178c2ecf20Sopenharmony_ci stvx v11,r3,r11 5188c2ecf20Sopenharmony_ci addi r3,r3,64 5198c2ecf20Sopenharmony_ci 5208c2ecf20Sopenharmony_ci7: sub r5,r5,r6 5218c2ecf20Sopenharmony_ci srdi r6,r5,7 5228c2ecf20Sopenharmony_ci 5238c2ecf20Sopenharmony_ci std r14,STK_REG(R14)(r1) 5248c2ecf20Sopenharmony_ci std r15,STK_REG(R15)(r1) 5258c2ecf20Sopenharmony_ci std r16,STK_REG(R16)(r1) 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ci li r12,64 5288c2ecf20Sopenharmony_ci li r14,80 5298c2ecf20Sopenharmony_ci li r15,96 5308c2ecf20Sopenharmony_ci li r16,112 5318c2ecf20Sopenharmony_ci 5328c2ecf20Sopenharmony_ci mtctr r6 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_ci /* 5358c2ecf20Sopenharmony_ci * Now do cacheline sized loads and stores. By this stage the 5368c2ecf20Sopenharmony_ci * cacheline stores are also cacheline aligned. 5378c2ecf20Sopenharmony_ci */ 5388c2ecf20Sopenharmony_ci .align 5 5398c2ecf20Sopenharmony_ci8: 5408c2ecf20Sopenharmony_ci lvx v7,0,r4 5418c2ecf20Sopenharmony_ci VPERM(v8,v0,v7,v16) 5428c2ecf20Sopenharmony_ci lvx v6,r4,r9 5438c2ecf20Sopenharmony_ci VPERM(v9,v7,v6,v16) 5448c2ecf20Sopenharmony_ci lvx v5,r4,r10 5458c2ecf20Sopenharmony_ci VPERM(v10,v6,v5,v16) 5468c2ecf20Sopenharmony_ci lvx v4,r4,r11 5478c2ecf20Sopenharmony_ci VPERM(v11,v5,v4,v16) 5488c2ecf20Sopenharmony_ci lvx v3,r4,r12 5498c2ecf20Sopenharmony_ci VPERM(v12,v4,v3,v16) 5508c2ecf20Sopenharmony_ci lvx v2,r4,r14 5518c2ecf20Sopenharmony_ci VPERM(v13,v3,v2,v16) 5528c2ecf20Sopenharmony_ci lvx v1,r4,r15 5538c2ecf20Sopenharmony_ci VPERM(v14,v2,v1,v16) 5548c2ecf20Sopenharmony_ci lvx v0,r4,r16 5558c2ecf20Sopenharmony_ci VPERM(v15,v1,v0,v16) 5568c2ecf20Sopenharmony_ci addi r4,r4,128 5578c2ecf20Sopenharmony_ci stvx v8,0,r3 5588c2ecf20Sopenharmony_ci stvx v9,r3,r9 5598c2ecf20Sopenharmony_ci stvx v10,r3,r10 5608c2ecf20Sopenharmony_ci stvx v11,r3,r11 5618c2ecf20Sopenharmony_ci stvx v12,r3,r12 5628c2ecf20Sopenharmony_ci stvx v13,r3,r14 5638c2ecf20Sopenharmony_ci stvx v14,r3,r15 5648c2ecf20Sopenharmony_ci stvx v15,r3,r16 5658c2ecf20Sopenharmony_ci addi r3,r3,128 5668c2ecf20Sopenharmony_ci bdnz 8b 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_ci ld r14,STK_REG(R14)(r1) 5698c2ecf20Sopenharmony_ci ld r15,STK_REG(R15)(r1) 5708c2ecf20Sopenharmony_ci ld r16,STK_REG(R16)(r1) 5718c2ecf20Sopenharmony_ci 5728c2ecf20Sopenharmony_ci /* Up to 127B to go */ 5738c2ecf20Sopenharmony_ci clrldi r5,r5,(64-7) 5748c2ecf20Sopenharmony_ci srdi r6,r5,4 5758c2ecf20Sopenharmony_ci mtocrf 0x01,r6 5768c2ecf20Sopenharmony_ci 5778c2ecf20Sopenharmony_ci bf cr7*4+1,9f 5788c2ecf20Sopenharmony_ci lvx v3,0,r4 5798c2ecf20Sopenharmony_ci VPERM(v8,v0,v3,v16) 5808c2ecf20Sopenharmony_ci lvx v2,r4,r9 5818c2ecf20Sopenharmony_ci VPERM(v9,v3,v2,v16) 5828c2ecf20Sopenharmony_ci lvx v1,r4,r10 5838c2ecf20Sopenharmony_ci VPERM(v10,v2,v1,v16) 5848c2ecf20Sopenharmony_ci lvx v0,r4,r11 5858c2ecf20Sopenharmony_ci VPERM(v11,v1,v0,v16) 5868c2ecf20Sopenharmony_ci addi r4,r4,64 5878c2ecf20Sopenharmony_ci stvx v8,0,r3 5888c2ecf20Sopenharmony_ci stvx v9,r3,r9 5898c2ecf20Sopenharmony_ci stvx v10,r3,r10 5908c2ecf20Sopenharmony_ci stvx v11,r3,r11 5918c2ecf20Sopenharmony_ci addi r3,r3,64 5928c2ecf20Sopenharmony_ci 5938c2ecf20Sopenharmony_ci9: bf cr7*4+2,10f 5948c2ecf20Sopenharmony_ci lvx v1,0,r4 5958c2ecf20Sopenharmony_ci VPERM(v8,v0,v1,v16) 5968c2ecf20Sopenharmony_ci lvx v0,r4,r9 5978c2ecf20Sopenharmony_ci VPERM(v9,v1,v0,v16) 5988c2ecf20Sopenharmony_ci addi r4,r4,32 5998c2ecf20Sopenharmony_ci stvx v8,0,r3 6008c2ecf20Sopenharmony_ci stvx v9,r3,r9 6018c2ecf20Sopenharmony_ci addi r3,r3,32 6028c2ecf20Sopenharmony_ci 6038c2ecf20Sopenharmony_ci10: bf cr7*4+3,11f 6048c2ecf20Sopenharmony_ci lvx v1,0,r4 6058c2ecf20Sopenharmony_ci VPERM(v8,v0,v1,v16) 6068c2ecf20Sopenharmony_ci addi r4,r4,16 6078c2ecf20Sopenharmony_ci stvx v8,0,r3 6088c2ecf20Sopenharmony_ci addi r3,r3,16 6098c2ecf20Sopenharmony_ci 6108c2ecf20Sopenharmony_ci /* Up to 15B to go */ 6118c2ecf20Sopenharmony_ci11: clrldi r5,r5,(64-4) 6128c2ecf20Sopenharmony_ci addi r4,r4,-16 /* Unwind the +16 load offset */ 6138c2ecf20Sopenharmony_ci mtocrf 0x01,r5 6148c2ecf20Sopenharmony_ci bf cr7*4+0,12f 6158c2ecf20Sopenharmony_ci lwz r0,0(r4) /* Less chance of a reject with word ops */ 6168c2ecf20Sopenharmony_ci lwz r6,4(r4) 6178c2ecf20Sopenharmony_ci addi r4,r4,8 6188c2ecf20Sopenharmony_ci stw r0,0(r3) 6198c2ecf20Sopenharmony_ci stw r6,4(r3) 6208c2ecf20Sopenharmony_ci addi r3,r3,8 6218c2ecf20Sopenharmony_ci 6228c2ecf20Sopenharmony_ci12: bf cr7*4+1,13f 6238c2ecf20Sopenharmony_ci lwz r0,0(r4) 6248c2ecf20Sopenharmony_ci addi r4,r4,4 6258c2ecf20Sopenharmony_ci stw r0,0(r3) 6268c2ecf20Sopenharmony_ci addi r3,r3,4 6278c2ecf20Sopenharmony_ci 6288c2ecf20Sopenharmony_ci13: bf cr7*4+2,14f 6298c2ecf20Sopenharmony_ci lhz r0,0(r4) 6308c2ecf20Sopenharmony_ci addi r4,r4,2 6318c2ecf20Sopenharmony_ci sth r0,0(r3) 6328c2ecf20Sopenharmony_ci addi r3,r3,2 6338c2ecf20Sopenharmony_ci 6348c2ecf20Sopenharmony_ci14: bf cr7*4+3,15f 6358c2ecf20Sopenharmony_ci lbz r0,0(r4) 6368c2ecf20Sopenharmony_ci stb r0,0(r3) 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_ci15: addi r1,r1,STACKFRAMESIZE 6398c2ecf20Sopenharmony_ci ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) 6408c2ecf20Sopenharmony_ci b exit_vmx_ops /* tail call optimise */ 6418c2ecf20Sopenharmony_ci#endif /* CONFIG_ALTIVEC */ 642