18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2002 Paul Mackerras, IBM Corp. 48c2ecf20Sopenharmony_ci */ 58c2ecf20Sopenharmony_ci#include <asm/processor.h> 68c2ecf20Sopenharmony_ci#include <asm/ppc_asm.h> 78c2ecf20Sopenharmony_ci#include <asm/export.h> 88c2ecf20Sopenharmony_ci#include <asm/asm-compat.h> 98c2ecf20Sopenharmony_ci#include <asm/feature-fixups.h> 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci#ifndef SELFTEST_CASE 128c2ecf20Sopenharmony_ci/* 0 == most CPUs, 1 == POWER6, 2 == Cell */ 138c2ecf20Sopenharmony_ci#define SELFTEST_CASE 0 148c2ecf20Sopenharmony_ci#endif 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN__ 178c2ecf20Sopenharmony_ci#define sLd sld /* Shift towards low-numbered address. */ 188c2ecf20Sopenharmony_ci#define sHd srd /* Shift towards high-numbered address. */ 198c2ecf20Sopenharmony_ci#else 208c2ecf20Sopenharmony_ci#define sLd srd /* Shift towards low-numbered address. */ 218c2ecf20Sopenharmony_ci#define sHd sld /* Shift towards high-numbered address. */ 228c2ecf20Sopenharmony_ci#endif 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci/* 258c2ecf20Sopenharmony_ci * These macros are used to generate exception table entries. 268c2ecf20Sopenharmony_ci * The exception handlers below use the original arguments 278c2ecf20Sopenharmony_ci * (stored on the stack) and the point where we're up to in 288c2ecf20Sopenharmony_ci * the destination buffer, i.e. the address of the first 298c2ecf20Sopenharmony_ci * unmodified byte. Generally r3 points into the destination 308c2ecf20Sopenharmony_ci * buffer, but the first unmodified byte is at a variable 318c2ecf20Sopenharmony_ci * offset from r3. In the code below, the symbol r3_offset 328c2ecf20Sopenharmony_ci * is set to indicate the current offset at each point in 338c2ecf20Sopenharmony_ci * the code. This offset is then used as a negative offset 348c2ecf20Sopenharmony_ci * from the exception handler code, and those instructions 358c2ecf20Sopenharmony_ci * before the exception handlers are addi instructions that 368c2ecf20Sopenharmony_ci * adjust r3 to point to the correct place. 378c2ecf20Sopenharmony_ci */ 388c2ecf20Sopenharmony_ci .macro lex /* exception handler for load */ 398c2ecf20Sopenharmony_ci100: EX_TABLE(100b, .Lld_exc - r3_offset) 408c2ecf20Sopenharmony_ci .endm 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci .macro stex /* exception handler for store */ 438c2ecf20Sopenharmony_ci100: EX_TABLE(100b, .Lst_exc - r3_offset) 448c2ecf20Sopenharmony_ci .endm 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci .align 7 478c2ecf20Sopenharmony_ci_GLOBAL_TOC(__copy_tofrom_user) 488c2ecf20Sopenharmony_ci#ifdef CONFIG_PPC_BOOK3S_64 498c2ecf20Sopenharmony_ciBEGIN_FTR_SECTION 508c2ecf20Sopenharmony_ci nop 518c2ecf20Sopenharmony_ciFTR_SECTION_ELSE 528c2ecf20Sopenharmony_ci b __copy_tofrom_user_power7 538c2ecf20Sopenharmony_ciALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 548c2ecf20Sopenharmony_ci#endif 558c2ecf20Sopenharmony_ci_GLOBAL(__copy_tofrom_user_base) 568c2ecf20Sopenharmony_ci /* first check for a 4kB copy on a 4kB boundary */ 578c2ecf20Sopenharmony_ci cmpldi cr1,r5,16 588c2ecf20Sopenharmony_ci cmpdi cr6,r5,4096 598c2ecf20Sopenharmony_ci or r0,r3,r4 608c2ecf20Sopenharmony_ci neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ 618c2ecf20Sopenharmony_ci andi. r0,r0,4095 628c2ecf20Sopenharmony_ci std r3,-24(r1) 638c2ecf20Sopenharmony_ci crand cr0*4+2,cr0*4+2,cr6*4+2 648c2ecf20Sopenharmony_ci std r4,-16(r1) 658c2ecf20Sopenharmony_ci std r5,-8(r1) 668c2ecf20Sopenharmony_ci dcbt 0,r4 678c2ecf20Sopenharmony_ci beq .Lcopy_page_4K 688c2ecf20Sopenharmony_ci andi. r6,r6,7 698c2ecf20Sopenharmony_ci PPC_MTOCRF(0x01,r5) 708c2ecf20Sopenharmony_ci blt cr1,.Lshort_copy 718c2ecf20Sopenharmony_ci/* Below we want to nop out the bne if we're on a CPU that has the 728c2ecf20Sopenharmony_ci * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit 738c2ecf20Sopenharmony_ci * cleared. 748c2ecf20Sopenharmony_ci * At the time of writing the only CPU that has this combination of bits 758c2ecf20Sopenharmony_ci * set is Power6. 768c2ecf20Sopenharmony_ci */ 778c2ecf20Sopenharmony_citest_feature = (SELFTEST_CASE == 1) 788c2ecf20Sopenharmony_ciBEGIN_FTR_SECTION 798c2ecf20Sopenharmony_ci nop 808c2ecf20Sopenharmony_ciFTR_SECTION_ELSE 818c2ecf20Sopenharmony_ci bne .Ldst_unaligned 828c2ecf20Sopenharmony_ciALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ 838c2ecf20Sopenharmony_ci CPU_FTR_UNALIGNED_LD_STD) 848c2ecf20Sopenharmony_ci.Ldst_aligned: 858c2ecf20Sopenharmony_ci addi r3,r3,-16 868c2ecf20Sopenharmony_cir3_offset = 16 878c2ecf20Sopenharmony_citest_feature = (SELFTEST_CASE == 0) 888c2ecf20Sopenharmony_ciBEGIN_FTR_SECTION 898c2ecf20Sopenharmony_ci andi. r0,r4,7 908c2ecf20Sopenharmony_ci bne .Lsrc_unaligned 918c2ecf20Sopenharmony_ciEND_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 928c2ecf20Sopenharmony_ci blt cr1,.Ldo_tail /* if < 16 bytes to copy */ 938c2ecf20Sopenharmony_ci srdi r0,r5,5 948c2ecf20Sopenharmony_ci cmpdi cr1,r0,0 958c2ecf20Sopenharmony_cilex; ld r7,0(r4) 968c2ecf20Sopenharmony_cilex; ld r6,8(r4) 978c2ecf20Sopenharmony_ci addi r4,r4,16 988c2ecf20Sopenharmony_ci mtctr r0 998c2ecf20Sopenharmony_ci andi. r0,r5,0x10 1008c2ecf20Sopenharmony_ci beq 22f 1018c2ecf20Sopenharmony_ci addi r3,r3,16 1028c2ecf20Sopenharmony_cir3_offset = 0 1038c2ecf20Sopenharmony_ci addi r4,r4,-16 1048c2ecf20Sopenharmony_ci mr r9,r7 1058c2ecf20Sopenharmony_ci mr r8,r6 1068c2ecf20Sopenharmony_ci beq cr1,72f 1078c2ecf20Sopenharmony_ci21: 1088c2ecf20Sopenharmony_cilex; ld r7,16(r4) 1098c2ecf20Sopenharmony_cilex; ld r6,24(r4) 1108c2ecf20Sopenharmony_ci addi r4,r4,32 1118c2ecf20Sopenharmony_cistex; std r9,0(r3) 1128c2ecf20Sopenharmony_cir3_offset = 8 1138c2ecf20Sopenharmony_cistex; std r8,8(r3) 1148c2ecf20Sopenharmony_cir3_offset = 16 1158c2ecf20Sopenharmony_ci22: 1168c2ecf20Sopenharmony_cilex; ld r9,0(r4) 1178c2ecf20Sopenharmony_cilex; ld r8,8(r4) 1188c2ecf20Sopenharmony_cistex; std r7,16(r3) 1198c2ecf20Sopenharmony_cir3_offset = 24 1208c2ecf20Sopenharmony_cistex; std r6,24(r3) 1218c2ecf20Sopenharmony_ci addi r3,r3,32 1228c2ecf20Sopenharmony_cir3_offset = 0 1238c2ecf20Sopenharmony_ci bdnz 21b 1248c2ecf20Sopenharmony_ci72: 1258c2ecf20Sopenharmony_cistex; std r9,0(r3) 1268c2ecf20Sopenharmony_cir3_offset = 8 1278c2ecf20Sopenharmony_cistex; std r8,8(r3) 1288c2ecf20Sopenharmony_cir3_offset = 16 1298c2ecf20Sopenharmony_ci andi. r5,r5,0xf 1308c2ecf20Sopenharmony_ci beq+ 3f 1318c2ecf20Sopenharmony_ci addi r4,r4,16 1328c2ecf20Sopenharmony_ci.Ldo_tail: 1338c2ecf20Sopenharmony_ci addi r3,r3,16 1348c2ecf20Sopenharmony_cir3_offset = 0 1358c2ecf20Sopenharmony_ci bf cr7*4+0,246f 1368c2ecf20Sopenharmony_cilex; ld r9,0(r4) 1378c2ecf20Sopenharmony_ci addi r4,r4,8 1388c2ecf20Sopenharmony_cistex; std r9,0(r3) 1398c2ecf20Sopenharmony_ci addi r3,r3,8 1408c2ecf20Sopenharmony_ci246: bf cr7*4+1,1f 1418c2ecf20Sopenharmony_cilex; lwz r9,0(r4) 1428c2ecf20Sopenharmony_ci addi r4,r4,4 1438c2ecf20Sopenharmony_cistex; stw r9,0(r3) 1448c2ecf20Sopenharmony_ci addi r3,r3,4 1458c2ecf20Sopenharmony_ci1: bf cr7*4+2,2f 1468c2ecf20Sopenharmony_cilex; lhz r9,0(r4) 1478c2ecf20Sopenharmony_ci addi r4,r4,2 1488c2ecf20Sopenharmony_cistex; sth r9,0(r3) 1498c2ecf20Sopenharmony_ci addi r3,r3,2 1508c2ecf20Sopenharmony_ci2: bf cr7*4+3,3f 1518c2ecf20Sopenharmony_cilex; lbz r9,0(r4) 1528c2ecf20Sopenharmony_cistex; stb r9,0(r3) 1538c2ecf20Sopenharmony_ci3: li r3,0 1548c2ecf20Sopenharmony_ci blr 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci.Lsrc_unaligned: 1578c2ecf20Sopenharmony_cir3_offset = 16 1588c2ecf20Sopenharmony_ci srdi r6,r5,3 1598c2ecf20Sopenharmony_ci addi r5,r5,-16 1608c2ecf20Sopenharmony_ci subf r4,r0,r4 1618c2ecf20Sopenharmony_ci srdi r7,r5,4 1628c2ecf20Sopenharmony_ci sldi r10,r0,3 1638c2ecf20Sopenharmony_ci cmpldi cr6,r6,3 1648c2ecf20Sopenharmony_ci andi. r5,r5,7 1658c2ecf20Sopenharmony_ci mtctr r7 1668c2ecf20Sopenharmony_ci subfic r11,r10,64 1678c2ecf20Sopenharmony_ci add r5,r5,r0 1688c2ecf20Sopenharmony_ci bt cr7*4+0,28f 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_cilex; ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 1718c2ecf20Sopenharmony_cilex; ld r0,8(r4) 1728c2ecf20Sopenharmony_ci sLd r6,r9,r10 1738c2ecf20Sopenharmony_cilex; ldu r9,16(r4) 1748c2ecf20Sopenharmony_ci sHd r7,r0,r11 1758c2ecf20Sopenharmony_ci sLd r8,r0,r10 1768c2ecf20Sopenharmony_ci or r7,r7,r6 1778c2ecf20Sopenharmony_ci blt cr6,79f 1788c2ecf20Sopenharmony_cilex; ld r0,8(r4) 1798c2ecf20Sopenharmony_ci b 2f 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci28: 1828c2ecf20Sopenharmony_cilex; ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 1838c2ecf20Sopenharmony_cilex; ldu r9,8(r4) 1848c2ecf20Sopenharmony_ci sLd r8,r0,r10 1858c2ecf20Sopenharmony_ci addi r3,r3,-8 1868c2ecf20Sopenharmony_cir3_offset = 24 1878c2ecf20Sopenharmony_ci blt cr6,5f 1888c2ecf20Sopenharmony_cilex; ld r0,8(r4) 1898c2ecf20Sopenharmony_ci sHd r12,r9,r11 1908c2ecf20Sopenharmony_ci sLd r6,r9,r10 1918c2ecf20Sopenharmony_cilex; ldu r9,16(r4) 1928c2ecf20Sopenharmony_ci or r12,r8,r12 1938c2ecf20Sopenharmony_ci sHd r7,r0,r11 1948c2ecf20Sopenharmony_ci sLd r8,r0,r10 1958c2ecf20Sopenharmony_ci addi r3,r3,16 1968c2ecf20Sopenharmony_cir3_offset = 8 1978c2ecf20Sopenharmony_ci beq cr6,78f 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci1: or r7,r7,r6 2008c2ecf20Sopenharmony_cilex; ld r0,8(r4) 2018c2ecf20Sopenharmony_cistex; std r12,8(r3) 2028c2ecf20Sopenharmony_cir3_offset = 16 2038c2ecf20Sopenharmony_ci2: sHd r12,r9,r11 2048c2ecf20Sopenharmony_ci sLd r6,r9,r10 2058c2ecf20Sopenharmony_cilex; ldu r9,16(r4) 2068c2ecf20Sopenharmony_ci or r12,r8,r12 2078c2ecf20Sopenharmony_cistex; stdu r7,16(r3) 2088c2ecf20Sopenharmony_cir3_offset = 8 2098c2ecf20Sopenharmony_ci sHd r7,r0,r11 2108c2ecf20Sopenharmony_ci sLd r8,r0,r10 2118c2ecf20Sopenharmony_ci bdnz 1b 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_ci78: 2148c2ecf20Sopenharmony_cistex; std r12,8(r3) 2158c2ecf20Sopenharmony_cir3_offset = 16 2168c2ecf20Sopenharmony_ci or r7,r7,r6 2178c2ecf20Sopenharmony_ci79: 2188c2ecf20Sopenharmony_cistex; std r7,16(r3) 2198c2ecf20Sopenharmony_cir3_offset = 24 2208c2ecf20Sopenharmony_ci5: sHd r12,r9,r11 2218c2ecf20Sopenharmony_ci or r12,r8,r12 2228c2ecf20Sopenharmony_cistex; std r12,24(r3) 2238c2ecf20Sopenharmony_cir3_offset = 32 2248c2ecf20Sopenharmony_ci bne 6f 2258c2ecf20Sopenharmony_ci li r3,0 2268c2ecf20Sopenharmony_ci blr 2278c2ecf20Sopenharmony_ci6: cmpwi cr1,r5,8 2288c2ecf20Sopenharmony_ci addi r3,r3,32 2298c2ecf20Sopenharmony_cir3_offset = 0 2308c2ecf20Sopenharmony_ci sLd r9,r9,r10 2318c2ecf20Sopenharmony_ci ble cr1,7f 2328c2ecf20Sopenharmony_cilex; ld r0,8(r4) 2338c2ecf20Sopenharmony_ci sHd r7,r0,r11 2348c2ecf20Sopenharmony_ci or r9,r7,r9 2358c2ecf20Sopenharmony_ci7: 2368c2ecf20Sopenharmony_ci bf cr7*4+1,1f 2378c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN__ 2388c2ecf20Sopenharmony_ci rotldi r9,r9,32 2398c2ecf20Sopenharmony_ci#endif 2408c2ecf20Sopenharmony_cistex; stw r9,0(r3) 2418c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN__ 2428c2ecf20Sopenharmony_ci rotrdi r9,r9,32 2438c2ecf20Sopenharmony_ci#endif 2448c2ecf20Sopenharmony_ci addi r3,r3,4 2458c2ecf20Sopenharmony_ci1: bf cr7*4+2,2f 2468c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN__ 2478c2ecf20Sopenharmony_ci rotldi r9,r9,16 2488c2ecf20Sopenharmony_ci#endif 2498c2ecf20Sopenharmony_cistex; sth r9,0(r3) 2508c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN__ 2518c2ecf20Sopenharmony_ci rotrdi r9,r9,16 2528c2ecf20Sopenharmony_ci#endif 2538c2ecf20Sopenharmony_ci addi r3,r3,2 2548c2ecf20Sopenharmony_ci2: bf cr7*4+3,3f 2558c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN__ 2568c2ecf20Sopenharmony_ci rotldi r9,r9,8 2578c2ecf20Sopenharmony_ci#endif 2588c2ecf20Sopenharmony_cistex; stb r9,0(r3) 2598c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN__ 2608c2ecf20Sopenharmony_ci rotrdi r9,r9,8 2618c2ecf20Sopenharmony_ci#endif 2628c2ecf20Sopenharmony_ci3: li r3,0 2638c2ecf20Sopenharmony_ci blr 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_ci.Ldst_unaligned: 2668c2ecf20Sopenharmony_cir3_offset = 0 2678c2ecf20Sopenharmony_ci PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */ 2688c2ecf20Sopenharmony_ci subf r5,r6,r5 2698c2ecf20Sopenharmony_ci li r7,0 2708c2ecf20Sopenharmony_ci cmpldi cr1,r5,16 2718c2ecf20Sopenharmony_ci bf cr7*4+3,1f 2728c2ecf20Sopenharmony_ci100: EX_TABLE(100b, .Lld_exc_r7) 2738c2ecf20Sopenharmony_ci lbz r0,0(r4) 2748c2ecf20Sopenharmony_ci100: EX_TABLE(100b, .Lst_exc_r7) 2758c2ecf20Sopenharmony_ci stb r0,0(r3) 2768c2ecf20Sopenharmony_ci addi r7,r7,1 2778c2ecf20Sopenharmony_ci1: bf cr7*4+2,2f 2788c2ecf20Sopenharmony_ci100: EX_TABLE(100b, .Lld_exc_r7) 2798c2ecf20Sopenharmony_ci lhzx r0,r7,r4 2808c2ecf20Sopenharmony_ci100: EX_TABLE(100b, .Lst_exc_r7) 2818c2ecf20Sopenharmony_ci sthx r0,r7,r3 2828c2ecf20Sopenharmony_ci addi r7,r7,2 2838c2ecf20Sopenharmony_ci2: bf cr7*4+1,3f 2848c2ecf20Sopenharmony_ci100: EX_TABLE(100b, .Lld_exc_r7) 2858c2ecf20Sopenharmony_ci lwzx r0,r7,r4 2868c2ecf20Sopenharmony_ci100: EX_TABLE(100b, .Lst_exc_r7) 2878c2ecf20Sopenharmony_ci stwx r0,r7,r3 2888c2ecf20Sopenharmony_ci3: PPC_MTOCRF(0x01,r5) 2898c2ecf20Sopenharmony_ci add r4,r6,r4 2908c2ecf20Sopenharmony_ci add r3,r6,r3 2918c2ecf20Sopenharmony_ci b .Ldst_aligned 2928c2ecf20Sopenharmony_ci 2938c2ecf20Sopenharmony_ci.Lshort_copy: 2948c2ecf20Sopenharmony_cir3_offset = 0 2958c2ecf20Sopenharmony_ci bf cr7*4+0,1f 2968c2ecf20Sopenharmony_cilex; lwz r0,0(r4) 2978c2ecf20Sopenharmony_cilex; lwz r9,4(r4) 2988c2ecf20Sopenharmony_ci addi r4,r4,8 2998c2ecf20Sopenharmony_cistex; stw r0,0(r3) 3008c2ecf20Sopenharmony_cistex; stw r9,4(r3) 3018c2ecf20Sopenharmony_ci addi r3,r3,8 3028c2ecf20Sopenharmony_ci1: bf cr7*4+1,2f 3038c2ecf20Sopenharmony_cilex; lwz r0,0(r4) 3048c2ecf20Sopenharmony_ci addi r4,r4,4 3058c2ecf20Sopenharmony_cistex; stw r0,0(r3) 3068c2ecf20Sopenharmony_ci addi r3,r3,4 3078c2ecf20Sopenharmony_ci2: bf cr7*4+2,3f 3088c2ecf20Sopenharmony_cilex; lhz r0,0(r4) 3098c2ecf20Sopenharmony_ci addi r4,r4,2 3108c2ecf20Sopenharmony_cistex; sth r0,0(r3) 3118c2ecf20Sopenharmony_ci addi r3,r3,2 3128c2ecf20Sopenharmony_ci3: bf cr7*4+3,4f 3138c2ecf20Sopenharmony_cilex; lbz r0,0(r4) 3148c2ecf20Sopenharmony_cistex; stb r0,0(r3) 3158c2ecf20Sopenharmony_ci4: li r3,0 3168c2ecf20Sopenharmony_ci blr 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci/* 3198c2ecf20Sopenharmony_ci * exception handlers follow 3208c2ecf20Sopenharmony_ci * we have to return the number of bytes not copied 3218c2ecf20Sopenharmony_ci * for an exception on a load, we set the rest of the destination to 0 3228c2ecf20Sopenharmony_ci * Note that the number of bytes of instructions for adjusting r3 needs 3238c2ecf20Sopenharmony_ci * to equal the amount of the adjustment, due to the trick of using 3248c2ecf20Sopenharmony_ci * .Lld_exc - r3_offset as the handler address. 3258c2ecf20Sopenharmony_ci */ 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci.Lld_exc_r7: 3288c2ecf20Sopenharmony_ci add r3,r3,r7 3298c2ecf20Sopenharmony_ci b .Lld_exc 3308c2ecf20Sopenharmony_ci 3318c2ecf20Sopenharmony_ci /* adjust by 24 */ 3328c2ecf20Sopenharmony_ci addi r3,r3,8 3338c2ecf20Sopenharmony_ci nop 3348c2ecf20Sopenharmony_ci /* adjust by 16 */ 3358c2ecf20Sopenharmony_ci addi r3,r3,8 3368c2ecf20Sopenharmony_ci nop 3378c2ecf20Sopenharmony_ci /* adjust by 8 */ 3388c2ecf20Sopenharmony_ci addi r3,r3,8 3398c2ecf20Sopenharmony_ci nop 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci/* 3428c2ecf20Sopenharmony_ci * Here we have had a fault on a load and r3 points to the first 3438c2ecf20Sopenharmony_ci * unmodified byte of the destination. We use the original arguments 3448c2ecf20Sopenharmony_ci * and r3 to work out how much wasn't copied. Since we load some 3458c2ecf20Sopenharmony_ci * distance ahead of the stores, we continue copying byte-by-byte until 3468c2ecf20Sopenharmony_ci * we hit the load fault again in order to copy as much as possible. 3478c2ecf20Sopenharmony_ci */ 3488c2ecf20Sopenharmony_ci.Lld_exc: 3498c2ecf20Sopenharmony_ci ld r6,-24(r1) 3508c2ecf20Sopenharmony_ci ld r4,-16(r1) 3518c2ecf20Sopenharmony_ci ld r5,-8(r1) 3528c2ecf20Sopenharmony_ci subf r6,r6,r3 3538c2ecf20Sopenharmony_ci add r4,r4,r6 3548c2ecf20Sopenharmony_ci subf r5,r6,r5 /* #bytes left to go */ 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_ci/* 3578c2ecf20Sopenharmony_ci * first see if we can copy any more bytes before hitting another exception 3588c2ecf20Sopenharmony_ci */ 3598c2ecf20Sopenharmony_ci mtctr r5 3608c2ecf20Sopenharmony_cir3_offset = 0 3618c2ecf20Sopenharmony_ci100: EX_TABLE(100b, .Ldone) 3628c2ecf20Sopenharmony_ci43: lbz r0,0(r4) 3638c2ecf20Sopenharmony_ci addi r4,r4,1 3648c2ecf20Sopenharmony_cistex; stb r0,0(r3) 3658c2ecf20Sopenharmony_ci addi r3,r3,1 3668c2ecf20Sopenharmony_ci bdnz 43b 3678c2ecf20Sopenharmony_ci li r3,0 /* huh? all copied successfully this time? */ 3688c2ecf20Sopenharmony_ci blr 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_ci/* 3718c2ecf20Sopenharmony_ci * here we have trapped again, amount remaining is in ctr. 3728c2ecf20Sopenharmony_ci */ 3738c2ecf20Sopenharmony_ci.Ldone: 3748c2ecf20Sopenharmony_ci mfctr r3 3758c2ecf20Sopenharmony_ci blr 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci/* 3788c2ecf20Sopenharmony_ci * exception handlers for stores: we need to work out how many bytes 3798c2ecf20Sopenharmony_ci * weren't copied, and we may need to copy some more. 3808c2ecf20Sopenharmony_ci * Note that the number of bytes of instructions for adjusting r3 needs 3818c2ecf20Sopenharmony_ci * to equal the amount of the adjustment, due to the trick of using 3828c2ecf20Sopenharmony_ci * .Lst_exc - r3_offset as the handler address. 3838c2ecf20Sopenharmony_ci */ 3848c2ecf20Sopenharmony_ci.Lst_exc_r7: 3858c2ecf20Sopenharmony_ci add r3,r3,r7 3868c2ecf20Sopenharmony_ci b .Lst_exc 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci /* adjust by 24 */ 3898c2ecf20Sopenharmony_ci addi r3,r3,8 3908c2ecf20Sopenharmony_ci nop 3918c2ecf20Sopenharmony_ci /* adjust by 16 */ 3928c2ecf20Sopenharmony_ci addi r3,r3,8 3938c2ecf20Sopenharmony_ci nop 3948c2ecf20Sopenharmony_ci /* adjust by 8 */ 3958c2ecf20Sopenharmony_ci addi r3,r3,4 3968c2ecf20Sopenharmony_ci /* adjust by 4 */ 3978c2ecf20Sopenharmony_ci addi r3,r3,4 3988c2ecf20Sopenharmony_ci.Lst_exc: 3998c2ecf20Sopenharmony_ci ld r6,-24(r1) /* original destination pointer */ 4008c2ecf20Sopenharmony_ci ld r4,-16(r1) /* original source pointer */ 4018c2ecf20Sopenharmony_ci ld r5,-8(r1) /* original number of bytes */ 4028c2ecf20Sopenharmony_ci add r7,r6,r5 4038c2ecf20Sopenharmony_ci /* 4048c2ecf20Sopenharmony_ci * If the destination pointer isn't 8-byte aligned, 4058c2ecf20Sopenharmony_ci * we may have got the exception as a result of a 4068c2ecf20Sopenharmony_ci * store that overlapped a page boundary, so we may be 4078c2ecf20Sopenharmony_ci * able to copy a few more bytes. 4088c2ecf20Sopenharmony_ci */ 4098c2ecf20Sopenharmony_ci17: andi. r0,r3,7 4108c2ecf20Sopenharmony_ci beq 19f 4118c2ecf20Sopenharmony_ci subf r8,r6,r3 /* #bytes copied */ 4128c2ecf20Sopenharmony_ci100: EX_TABLE(100b,19f) 4138c2ecf20Sopenharmony_ci lbzx r0,r8,r4 4148c2ecf20Sopenharmony_ci100: EX_TABLE(100b,19f) 4158c2ecf20Sopenharmony_ci stb r0,0(r3) 4168c2ecf20Sopenharmony_ci addi r3,r3,1 4178c2ecf20Sopenharmony_ci cmpld r3,r7 4188c2ecf20Sopenharmony_ci blt 17b 4198c2ecf20Sopenharmony_ci19: subf r3,r3,r7 /* #bytes not copied in r3 */ 4208c2ecf20Sopenharmony_ci blr 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci/* 4238c2ecf20Sopenharmony_ci * Routine to copy a whole page of data, optimized for POWER4. 4248c2ecf20Sopenharmony_ci * On POWER4 it is more than 50% faster than the simple loop 4258c2ecf20Sopenharmony_ci * above (following the .Ldst_aligned label). 4268c2ecf20Sopenharmony_ci */ 4278c2ecf20Sopenharmony_ci .macro exc 4288c2ecf20Sopenharmony_ci100: EX_TABLE(100b, .Labort) 4298c2ecf20Sopenharmony_ci .endm 4308c2ecf20Sopenharmony_ci.Lcopy_page_4K: 4318c2ecf20Sopenharmony_ci std r31,-32(1) 4328c2ecf20Sopenharmony_ci std r30,-40(1) 4338c2ecf20Sopenharmony_ci std r29,-48(1) 4348c2ecf20Sopenharmony_ci std r28,-56(1) 4358c2ecf20Sopenharmony_ci std r27,-64(1) 4368c2ecf20Sopenharmony_ci std r26,-72(1) 4378c2ecf20Sopenharmony_ci std r25,-80(1) 4388c2ecf20Sopenharmony_ci std r24,-88(1) 4398c2ecf20Sopenharmony_ci std r23,-96(1) 4408c2ecf20Sopenharmony_ci std r22,-104(1) 4418c2ecf20Sopenharmony_ci std r21,-112(1) 4428c2ecf20Sopenharmony_ci std r20,-120(1) 4438c2ecf20Sopenharmony_ci li r5,4096/32 - 1 4448c2ecf20Sopenharmony_ci addi r3,r3,-8 4458c2ecf20Sopenharmony_ci li r0,5 4468c2ecf20Sopenharmony_ci0: addi r5,r5,-24 4478c2ecf20Sopenharmony_ci mtctr r0 4488c2ecf20Sopenharmony_ciexc; ld r22,640(4) 4498c2ecf20Sopenharmony_ciexc; ld r21,512(4) 4508c2ecf20Sopenharmony_ciexc; ld r20,384(4) 4518c2ecf20Sopenharmony_ciexc; ld r11,256(4) 4528c2ecf20Sopenharmony_ciexc; ld r9,128(4) 4538c2ecf20Sopenharmony_ciexc; ld r7,0(4) 4548c2ecf20Sopenharmony_ciexc; ld r25,648(4) 4558c2ecf20Sopenharmony_ciexc; ld r24,520(4) 4568c2ecf20Sopenharmony_ciexc; ld r23,392(4) 4578c2ecf20Sopenharmony_ciexc; ld r10,264(4) 4588c2ecf20Sopenharmony_ciexc; ld r8,136(4) 4598c2ecf20Sopenharmony_ciexc; ldu r6,8(4) 4608c2ecf20Sopenharmony_ci cmpwi r5,24 4618c2ecf20Sopenharmony_ci1: 4628c2ecf20Sopenharmony_ciexc; std r22,648(3) 4638c2ecf20Sopenharmony_ciexc; std r21,520(3) 4648c2ecf20Sopenharmony_ciexc; std r20,392(3) 4658c2ecf20Sopenharmony_ciexc; std r11,264(3) 4668c2ecf20Sopenharmony_ciexc; std r9,136(3) 4678c2ecf20Sopenharmony_ciexc; std r7,8(3) 4688c2ecf20Sopenharmony_ciexc; ld r28,648(4) 4698c2ecf20Sopenharmony_ciexc; ld r27,520(4) 4708c2ecf20Sopenharmony_ciexc; ld r26,392(4) 4718c2ecf20Sopenharmony_ciexc; ld r31,264(4) 4728c2ecf20Sopenharmony_ciexc; ld r30,136(4) 4738c2ecf20Sopenharmony_ciexc; ld r29,8(4) 4748c2ecf20Sopenharmony_ciexc; std r25,656(3) 4758c2ecf20Sopenharmony_ciexc; std r24,528(3) 4768c2ecf20Sopenharmony_ciexc; std r23,400(3) 4778c2ecf20Sopenharmony_ciexc; std r10,272(3) 4788c2ecf20Sopenharmony_ciexc; std r8,144(3) 4798c2ecf20Sopenharmony_ciexc; std r6,16(3) 4808c2ecf20Sopenharmony_ciexc; ld r22,656(4) 4818c2ecf20Sopenharmony_ciexc; ld r21,528(4) 4828c2ecf20Sopenharmony_ciexc; ld r20,400(4) 4838c2ecf20Sopenharmony_ciexc; ld r11,272(4) 4848c2ecf20Sopenharmony_ciexc; ld r9,144(4) 4858c2ecf20Sopenharmony_ciexc; ld r7,16(4) 4868c2ecf20Sopenharmony_ciexc; std r28,664(3) 4878c2ecf20Sopenharmony_ciexc; std r27,536(3) 4888c2ecf20Sopenharmony_ciexc; std r26,408(3) 4898c2ecf20Sopenharmony_ciexc; std r31,280(3) 4908c2ecf20Sopenharmony_ciexc; std r30,152(3) 4918c2ecf20Sopenharmony_ciexc; stdu r29,24(3) 4928c2ecf20Sopenharmony_ciexc; ld r25,664(4) 4938c2ecf20Sopenharmony_ciexc; ld r24,536(4) 4948c2ecf20Sopenharmony_ciexc; ld r23,408(4) 4958c2ecf20Sopenharmony_ciexc; ld r10,280(4) 4968c2ecf20Sopenharmony_ciexc; ld r8,152(4) 4978c2ecf20Sopenharmony_ciexc; ldu r6,24(4) 4988c2ecf20Sopenharmony_ci bdnz 1b 4998c2ecf20Sopenharmony_ciexc; std r22,648(3) 5008c2ecf20Sopenharmony_ciexc; std r21,520(3) 5018c2ecf20Sopenharmony_ciexc; std r20,392(3) 5028c2ecf20Sopenharmony_ciexc; std r11,264(3) 5038c2ecf20Sopenharmony_ciexc; std r9,136(3) 5048c2ecf20Sopenharmony_ciexc; std r7,8(3) 5058c2ecf20Sopenharmony_ci addi r4,r4,640 5068c2ecf20Sopenharmony_ci addi r3,r3,648 5078c2ecf20Sopenharmony_ci bge 0b 5088c2ecf20Sopenharmony_ci mtctr r5 5098c2ecf20Sopenharmony_ciexc; ld r7,0(4) 5108c2ecf20Sopenharmony_ciexc; ld r8,8(4) 5118c2ecf20Sopenharmony_ciexc; ldu r9,16(4) 5128c2ecf20Sopenharmony_ci3: 5138c2ecf20Sopenharmony_ciexc; ld r10,8(4) 5148c2ecf20Sopenharmony_ciexc; std r7,8(3) 5158c2ecf20Sopenharmony_ciexc; ld r7,16(4) 5168c2ecf20Sopenharmony_ciexc; std r8,16(3) 5178c2ecf20Sopenharmony_ciexc; ld r8,24(4) 5188c2ecf20Sopenharmony_ciexc; std r9,24(3) 5198c2ecf20Sopenharmony_ciexc; ldu r9,32(4) 5208c2ecf20Sopenharmony_ciexc; stdu r10,32(3) 5218c2ecf20Sopenharmony_ci bdnz 3b 5228c2ecf20Sopenharmony_ci4: 5238c2ecf20Sopenharmony_ciexc; ld r10,8(4) 5248c2ecf20Sopenharmony_ciexc; std r7,8(3) 5258c2ecf20Sopenharmony_ciexc; std r8,16(3) 5268c2ecf20Sopenharmony_ciexc; std r9,24(3) 5278c2ecf20Sopenharmony_ciexc; std r10,32(3) 5288c2ecf20Sopenharmony_ci9: ld r20,-120(1) 5298c2ecf20Sopenharmony_ci ld r21,-112(1) 5308c2ecf20Sopenharmony_ci ld r22,-104(1) 5318c2ecf20Sopenharmony_ci ld r23,-96(1) 5328c2ecf20Sopenharmony_ci ld r24,-88(1) 5338c2ecf20Sopenharmony_ci ld r25,-80(1) 5348c2ecf20Sopenharmony_ci ld r26,-72(1) 5358c2ecf20Sopenharmony_ci ld r27,-64(1) 5368c2ecf20Sopenharmony_ci ld r28,-56(1) 5378c2ecf20Sopenharmony_ci ld r29,-48(1) 5388c2ecf20Sopenharmony_ci ld r30,-40(1) 5398c2ecf20Sopenharmony_ci ld r31,-32(1) 5408c2ecf20Sopenharmony_ci li r3,0 5418c2ecf20Sopenharmony_ci blr 5428c2ecf20Sopenharmony_ci 5438c2ecf20Sopenharmony_ci/* 5448c2ecf20Sopenharmony_ci * on an exception, reset to the beginning and jump back into the 5458c2ecf20Sopenharmony_ci * standard __copy_tofrom_user 5468c2ecf20Sopenharmony_ci */ 5478c2ecf20Sopenharmony_ci.Labort: 5488c2ecf20Sopenharmony_ci ld r20,-120(1) 5498c2ecf20Sopenharmony_ci ld r21,-112(1) 5508c2ecf20Sopenharmony_ci ld r22,-104(1) 5518c2ecf20Sopenharmony_ci ld r23,-96(1) 5528c2ecf20Sopenharmony_ci ld r24,-88(1) 5538c2ecf20Sopenharmony_ci ld r25,-80(1) 5548c2ecf20Sopenharmony_ci ld r26,-72(1) 5558c2ecf20Sopenharmony_ci ld r27,-64(1) 5568c2ecf20Sopenharmony_ci ld r28,-56(1) 5578c2ecf20Sopenharmony_ci ld r29,-48(1) 5588c2ecf20Sopenharmony_ci ld r30,-40(1) 5598c2ecf20Sopenharmony_ci ld r31,-32(1) 5608c2ecf20Sopenharmony_ci ld r3,-24(r1) 5618c2ecf20Sopenharmony_ci ld r4,-16(r1) 5628c2ecf20Sopenharmony_ci li r5,4096 5638c2ecf20Sopenharmony_ci b .Ldst_aligned 5648c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__copy_tofrom_user) 565