18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Memory copy functions for 32-bit PowerPC. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 1996-2005 Paul Mackerras. 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci#include <asm/processor.h> 88c2ecf20Sopenharmony_ci#include <asm/cache.h> 98c2ecf20Sopenharmony_ci#include <asm/errno.h> 108c2ecf20Sopenharmony_ci#include <asm/ppc_asm.h> 118c2ecf20Sopenharmony_ci#include <asm/export.h> 128c2ecf20Sopenharmony_ci#include <asm/code-patching-asm.h> 138c2ecf20Sopenharmony_ci#include <asm/kasan.h> 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci#define COPY_16_BYTES \ 168c2ecf20Sopenharmony_ci lwz r7,4(r4); \ 178c2ecf20Sopenharmony_ci lwz r8,8(r4); \ 188c2ecf20Sopenharmony_ci lwz r9,12(r4); \ 198c2ecf20Sopenharmony_ci lwzu r10,16(r4); \ 208c2ecf20Sopenharmony_ci stw r7,4(r6); \ 218c2ecf20Sopenharmony_ci stw r8,8(r6); \ 228c2ecf20Sopenharmony_ci stw r9,12(r6); \ 238c2ecf20Sopenharmony_ci stwu r10,16(r6) 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#define COPY_16_BYTES_WITHEX(n) \ 268c2ecf20Sopenharmony_ci8 ## n ## 0: \ 278c2ecf20Sopenharmony_ci lwz r7,4(r4); \ 288c2ecf20Sopenharmony_ci8 ## n ## 1: \ 298c2ecf20Sopenharmony_ci lwz r8,8(r4); \ 308c2ecf20Sopenharmony_ci8 ## n ## 2: \ 318c2ecf20Sopenharmony_ci lwz r9,12(r4); \ 328c2ecf20Sopenharmony_ci8 ## n ## 3: \ 338c2ecf20Sopenharmony_ci lwzu r10,16(r4); \ 348c2ecf20Sopenharmony_ci8 ## n ## 4: \ 358c2ecf20Sopenharmony_ci stw r7,4(r6); \ 368c2ecf20Sopenharmony_ci8 ## n ## 5: \ 378c2ecf20Sopenharmony_ci stw r8,8(r6); \ 388c2ecf20Sopenharmony_ci8 ## n ## 6: \ 398c2ecf20Sopenharmony_ci stw r9,12(r6); \ 408c2ecf20Sopenharmony_ci8 ## n ## 7: \ 418c2ecf20Sopenharmony_ci stwu r10,16(r6) 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci#define COPY_16_BYTES_EXCODE(n) \ 448c2ecf20Sopenharmony_ci9 ## n ## 0: \ 458c2ecf20Sopenharmony_ci addi r5,r5,-(16 * n); \ 468c2ecf20Sopenharmony_ci b 104f; \ 478c2ecf20Sopenharmony_ci9 ## n ## 1: \ 488c2ecf20Sopenharmony_ci addi r5,r5,-(16 * n); \ 498c2ecf20Sopenharmony_ci b 105f; \ 508c2ecf20Sopenharmony_ci EX_TABLE(8 ## n ## 0b,9 ## n ## 0b); \ 518c2ecf20Sopenharmony_ci EX_TABLE(8 ## n ## 1b,9 ## n ## 0b); \ 528c2ecf20Sopenharmony_ci EX_TABLE(8 ## n ## 2b,9 ## n ## 0b); \ 538c2ecf20Sopenharmony_ci EX_TABLE(8 ## n ## 3b,9 ## n ## 0b); \ 548c2ecf20Sopenharmony_ci EX_TABLE(8 ## n ## 4b,9 ## n ## 1b); \ 558c2ecf20Sopenharmony_ci EX_TABLE(8 ## n ## 5b,9 ## n ## 1b); \ 568c2ecf20Sopenharmony_ci EX_TABLE(8 ## n ## 6b,9 ## n ## 1b); \ 578c2ecf20Sopenharmony_ci EX_TABLE(8 ## n ## 7b,9 ## n ## 1b) 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci .text 608c2ecf20Sopenharmony_ci .stabs "arch/powerpc/lib/",N_SO,0,0,0f 618c2ecf20Sopenharmony_ci .stabs "copy_32.S",N_SO,0,0,0f 628c2ecf20Sopenharmony_ci0: 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ciCACHELINE_BYTES = L1_CACHE_BYTES 658c2ecf20Sopenharmony_ciLG_CACHELINE_BYTES = L1_CACHE_SHIFT 668c2ecf20Sopenharmony_ciCACHELINE_MASK = (L1_CACHE_BYTES-1) 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci#ifndef CONFIG_KASAN 698c2ecf20Sopenharmony_ci_GLOBAL(memset16) 708c2ecf20Sopenharmony_ci rlwinm. r0 ,r5, 31, 1, 31 718c2ecf20Sopenharmony_ci addi r6, r3, -4 728c2ecf20Sopenharmony_ci beq- 2f 738c2ecf20Sopenharmony_ci rlwimi r4 ,r4 ,16 ,0 ,15 748c2ecf20Sopenharmony_ci mtctr r0 758c2ecf20Sopenharmony_ci1: stwu r4, 4(r6) 768c2ecf20Sopenharmony_ci bdnz 1b 778c2ecf20Sopenharmony_ci2: andi. r0, r5, 1 788c2ecf20Sopenharmony_ci beqlr 798c2ecf20Sopenharmony_ci sth r4, 4(r6) 808c2ecf20Sopenharmony_ci blr 818c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memset16) 828c2ecf20Sopenharmony_ci#endif 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci/* 858c2ecf20Sopenharmony_ci * Use dcbz on the complete cache lines in the destination 868c2ecf20Sopenharmony_ci * to set them to zero. This requires that the destination 878c2ecf20Sopenharmony_ci * area is cacheable. -- paulus 888c2ecf20Sopenharmony_ci * 898c2ecf20Sopenharmony_ci * During early init, cache might not be active yet, so dcbz cannot be used. 908c2ecf20Sopenharmony_ci * We therefore skip the optimised bloc that uses dcbz. This jump is 918c2ecf20Sopenharmony_ci * replaced by a nop once cache is active. This is done in machine_init() 928c2ecf20Sopenharmony_ci */ 938c2ecf20Sopenharmony_ci_GLOBAL_KASAN(memset) 948c2ecf20Sopenharmony_ci cmplwi 0,r5,4 958c2ecf20Sopenharmony_ci blt 7f 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci rlwimi r4,r4,8,16,23 988c2ecf20Sopenharmony_ci rlwimi r4,r4,16,0,15 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci stw r4,0(r3) 1018c2ecf20Sopenharmony_ci beqlr 1028c2ecf20Sopenharmony_ci andi. r0,r3,3 1038c2ecf20Sopenharmony_ci add r5,r0,r5 1048c2ecf20Sopenharmony_ci subf r6,r0,r3 1058c2ecf20Sopenharmony_ci cmplwi 0,r4,0 1068c2ecf20Sopenharmony_ci /* 1078c2ecf20Sopenharmony_ci * Skip optimised bloc until cache is enabled. Will be replaced 1088c2ecf20Sopenharmony_ci * by 'bne' during boot to use normal procedure if r4 is not zero 1098c2ecf20Sopenharmony_ci */ 1108c2ecf20Sopenharmony_ci5: b 2f 1118c2ecf20Sopenharmony_ci patch_site 5b, patch__memset_nocache 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci clrlwi r7,r6,32-LG_CACHELINE_BYTES 1148c2ecf20Sopenharmony_ci add r8,r7,r5 1158c2ecf20Sopenharmony_ci srwi r9,r8,LG_CACHELINE_BYTES 1168c2ecf20Sopenharmony_ci addic. r9,r9,-1 /* total number of complete cachelines */ 1178c2ecf20Sopenharmony_ci ble 2f 1188c2ecf20Sopenharmony_ci xori r0,r7,CACHELINE_MASK & ~3 1198c2ecf20Sopenharmony_ci srwi. r0,r0,2 1208c2ecf20Sopenharmony_ci beq 3f 1218c2ecf20Sopenharmony_ci mtctr r0 1228c2ecf20Sopenharmony_ci4: stwu r4,4(r6) 1238c2ecf20Sopenharmony_ci bdnz 4b 1248c2ecf20Sopenharmony_ci3: mtctr r9 1258c2ecf20Sopenharmony_ci li r7,4 1268c2ecf20Sopenharmony_ci10: dcbz r7,r6 1278c2ecf20Sopenharmony_ci addi r6,r6,CACHELINE_BYTES 1288c2ecf20Sopenharmony_ci bdnz 10b 1298c2ecf20Sopenharmony_ci clrlwi r5,r8,32-LG_CACHELINE_BYTES 1308c2ecf20Sopenharmony_ci addi r5,r5,4 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci2: srwi r0,r5,2 1338c2ecf20Sopenharmony_ci mtctr r0 1348c2ecf20Sopenharmony_ci bdz 6f 1358c2ecf20Sopenharmony_ci1: stwu r4,4(r6) 1368c2ecf20Sopenharmony_ci bdnz 1b 1378c2ecf20Sopenharmony_ci6: andi. r5,r5,3 1388c2ecf20Sopenharmony_ci beqlr 1398c2ecf20Sopenharmony_ci mtctr r5 1408c2ecf20Sopenharmony_ci addi r6,r6,3 1418c2ecf20Sopenharmony_ci8: stbu r4,1(r6) 1428c2ecf20Sopenharmony_ci bdnz 8b 1438c2ecf20Sopenharmony_ci blr 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci7: cmpwi 0,r5,0 1468c2ecf20Sopenharmony_ci beqlr 1478c2ecf20Sopenharmony_ci mtctr r5 1488c2ecf20Sopenharmony_ci addi r6,r3,-1 1498c2ecf20Sopenharmony_ci9: stbu r4,1(r6) 1508c2ecf20Sopenharmony_ci bdnz 9b 1518c2ecf20Sopenharmony_ci blr 1528c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memset) 1538c2ecf20Sopenharmony_ciEXPORT_SYMBOL_KASAN(memset) 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci/* 1568c2ecf20Sopenharmony_ci * This version uses dcbz on the complete cache lines in the 1578c2ecf20Sopenharmony_ci * destination area to reduce memory traffic. This requires that 1588c2ecf20Sopenharmony_ci * the destination area is cacheable. 1598c2ecf20Sopenharmony_ci * We only use this version if the source and dest don't overlap. 1608c2ecf20Sopenharmony_ci * -- paulus. 1618c2ecf20Sopenharmony_ci * 1628c2ecf20Sopenharmony_ci * During early init, cache might not be active yet, so dcbz cannot be used. 1638c2ecf20Sopenharmony_ci * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is 1648c2ecf20Sopenharmony_ci * replaced by a nop once cache is active. This is done in machine_init() 1658c2ecf20Sopenharmony_ci */ 1668c2ecf20Sopenharmony_ci_GLOBAL_KASAN(memmove) 1678c2ecf20Sopenharmony_ci cmplw 0,r3,r4 1688c2ecf20Sopenharmony_ci bgt backwards_memcpy 1698c2ecf20Sopenharmony_ci /* fall through */ 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci_GLOBAL_KASAN(memcpy) 1728c2ecf20Sopenharmony_ci1: b generic_memcpy 1738c2ecf20Sopenharmony_ci patch_site 1b, patch__memcpy_nocache 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci add r7,r3,r5 /* test if the src & dst overlap */ 1768c2ecf20Sopenharmony_ci add r8,r4,r5 1778c2ecf20Sopenharmony_ci cmplw 0,r4,r7 1788c2ecf20Sopenharmony_ci cmplw 1,r3,r8 1798c2ecf20Sopenharmony_ci crand 0,0,4 /* cr0.lt &= cr1.lt */ 1808c2ecf20Sopenharmony_ci blt generic_memcpy /* if regions overlap */ 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci addi r4,r4,-4 1838c2ecf20Sopenharmony_ci addi r6,r3,-4 1848c2ecf20Sopenharmony_ci neg r0,r3 1858c2ecf20Sopenharmony_ci andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 1868c2ecf20Sopenharmony_ci beq 58f 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci cmplw 0,r5,r0 /* is this more than total to do? */ 1898c2ecf20Sopenharmony_ci blt 63f /* if not much to do */ 1908c2ecf20Sopenharmony_ci andi. r8,r0,3 /* get it word-aligned first */ 1918c2ecf20Sopenharmony_ci subf r5,r0,r5 1928c2ecf20Sopenharmony_ci mtctr r8 1938c2ecf20Sopenharmony_ci beq+ 61f 1948c2ecf20Sopenharmony_ci70: lbz r9,4(r4) /* do some bytes */ 1958c2ecf20Sopenharmony_ci addi r4,r4,1 1968c2ecf20Sopenharmony_ci addi r6,r6,1 1978c2ecf20Sopenharmony_ci stb r9,3(r6) 1988c2ecf20Sopenharmony_ci bdnz 70b 1998c2ecf20Sopenharmony_ci61: srwi. r0,r0,2 2008c2ecf20Sopenharmony_ci mtctr r0 2018c2ecf20Sopenharmony_ci beq 58f 2028c2ecf20Sopenharmony_ci72: lwzu r9,4(r4) /* do some words */ 2038c2ecf20Sopenharmony_ci stwu r9,4(r6) 2048c2ecf20Sopenharmony_ci bdnz 72b 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 2078c2ecf20Sopenharmony_ci clrlwi r5,r5,32-LG_CACHELINE_BYTES 2088c2ecf20Sopenharmony_ci li r11,4 2098c2ecf20Sopenharmony_ci mtctr r0 2108c2ecf20Sopenharmony_ci beq 63f 2118c2ecf20Sopenharmony_ci53: 2128c2ecf20Sopenharmony_ci dcbz r11,r6 2138c2ecf20Sopenharmony_ci COPY_16_BYTES 2148c2ecf20Sopenharmony_ci#if L1_CACHE_BYTES >= 32 2158c2ecf20Sopenharmony_ci COPY_16_BYTES 2168c2ecf20Sopenharmony_ci#if L1_CACHE_BYTES >= 64 2178c2ecf20Sopenharmony_ci COPY_16_BYTES 2188c2ecf20Sopenharmony_ci COPY_16_BYTES 2198c2ecf20Sopenharmony_ci#if L1_CACHE_BYTES >= 128 2208c2ecf20Sopenharmony_ci COPY_16_BYTES 2218c2ecf20Sopenharmony_ci COPY_16_BYTES 2228c2ecf20Sopenharmony_ci COPY_16_BYTES 2238c2ecf20Sopenharmony_ci COPY_16_BYTES 2248c2ecf20Sopenharmony_ci#endif 2258c2ecf20Sopenharmony_ci#endif 2268c2ecf20Sopenharmony_ci#endif 2278c2ecf20Sopenharmony_ci bdnz 53b 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci63: srwi. r0,r5,2 2308c2ecf20Sopenharmony_ci mtctr r0 2318c2ecf20Sopenharmony_ci beq 64f 2328c2ecf20Sopenharmony_ci30: lwzu r0,4(r4) 2338c2ecf20Sopenharmony_ci stwu r0,4(r6) 2348c2ecf20Sopenharmony_ci bdnz 30b 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci64: andi. r0,r5,3 2378c2ecf20Sopenharmony_ci mtctr r0 2388c2ecf20Sopenharmony_ci beq+ 65f 2398c2ecf20Sopenharmony_ci addi r4,r4,3 2408c2ecf20Sopenharmony_ci addi r6,r6,3 2418c2ecf20Sopenharmony_ci40: lbzu r0,1(r4) 2428c2ecf20Sopenharmony_ci stbu r0,1(r6) 2438c2ecf20Sopenharmony_ci bdnz 40b 2448c2ecf20Sopenharmony_ci65: blr 2458c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memcpy) 2468c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memmove) 2478c2ecf20Sopenharmony_ciEXPORT_SYMBOL_KASAN(memcpy) 2488c2ecf20Sopenharmony_ciEXPORT_SYMBOL_KASAN(memmove) 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_cigeneric_memcpy: 2518c2ecf20Sopenharmony_ci srwi. r7,r5,3 2528c2ecf20Sopenharmony_ci addi r6,r3,-4 2538c2ecf20Sopenharmony_ci addi r4,r4,-4 2548c2ecf20Sopenharmony_ci beq 2f /* if less than 8 bytes to do */ 2558c2ecf20Sopenharmony_ci andi. r0,r6,3 /* get dest word aligned */ 2568c2ecf20Sopenharmony_ci mtctr r7 2578c2ecf20Sopenharmony_ci bne 5f 2588c2ecf20Sopenharmony_ci1: lwz r7,4(r4) 2598c2ecf20Sopenharmony_ci lwzu r8,8(r4) 2608c2ecf20Sopenharmony_ci stw r7,4(r6) 2618c2ecf20Sopenharmony_ci stwu r8,8(r6) 2628c2ecf20Sopenharmony_ci bdnz 1b 2638c2ecf20Sopenharmony_ci andi. r5,r5,7 2648c2ecf20Sopenharmony_ci2: cmplwi 0,r5,4 2658c2ecf20Sopenharmony_ci blt 3f 2668c2ecf20Sopenharmony_ci lwzu r0,4(r4) 2678c2ecf20Sopenharmony_ci addi r5,r5,-4 2688c2ecf20Sopenharmony_ci stwu r0,4(r6) 2698c2ecf20Sopenharmony_ci3: cmpwi 0,r5,0 2708c2ecf20Sopenharmony_ci beqlr 2718c2ecf20Sopenharmony_ci mtctr r5 2728c2ecf20Sopenharmony_ci addi r4,r4,3 2738c2ecf20Sopenharmony_ci addi r6,r6,3 2748c2ecf20Sopenharmony_ci4: lbzu r0,1(r4) 2758c2ecf20Sopenharmony_ci stbu r0,1(r6) 2768c2ecf20Sopenharmony_ci bdnz 4b 2778c2ecf20Sopenharmony_ci blr 2788c2ecf20Sopenharmony_ci5: subfic r0,r0,4 2798c2ecf20Sopenharmony_ci mtctr r0 2808c2ecf20Sopenharmony_ci6: lbz r7,4(r4) 2818c2ecf20Sopenharmony_ci addi r4,r4,1 2828c2ecf20Sopenharmony_ci stb r7,4(r6) 2838c2ecf20Sopenharmony_ci addi r6,r6,1 2848c2ecf20Sopenharmony_ci bdnz 6b 2858c2ecf20Sopenharmony_ci subf r5,r0,r5 2868c2ecf20Sopenharmony_ci rlwinm. r7,r5,32-3,3,31 2878c2ecf20Sopenharmony_ci beq 2b 2888c2ecf20Sopenharmony_ci mtctr r7 2898c2ecf20Sopenharmony_ci b 1b 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci_GLOBAL(backwards_memcpy) 2928c2ecf20Sopenharmony_ci rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ 2938c2ecf20Sopenharmony_ci add r6,r3,r5 2948c2ecf20Sopenharmony_ci add r4,r4,r5 2958c2ecf20Sopenharmony_ci beq 2f 2968c2ecf20Sopenharmony_ci andi. r0,r6,3 2978c2ecf20Sopenharmony_ci mtctr r7 2988c2ecf20Sopenharmony_ci bne 5f 2998c2ecf20Sopenharmony_ci1: lwz r7,-4(r4) 3008c2ecf20Sopenharmony_ci lwzu r8,-8(r4) 3018c2ecf20Sopenharmony_ci stw r7,-4(r6) 3028c2ecf20Sopenharmony_ci stwu r8,-8(r6) 3038c2ecf20Sopenharmony_ci bdnz 1b 3048c2ecf20Sopenharmony_ci andi. r5,r5,7 3058c2ecf20Sopenharmony_ci2: cmplwi 0,r5,4 3068c2ecf20Sopenharmony_ci blt 3f 3078c2ecf20Sopenharmony_ci lwzu r0,-4(r4) 3088c2ecf20Sopenharmony_ci subi r5,r5,4 3098c2ecf20Sopenharmony_ci stwu r0,-4(r6) 3108c2ecf20Sopenharmony_ci3: cmpwi 0,r5,0 3118c2ecf20Sopenharmony_ci beqlr 3128c2ecf20Sopenharmony_ci mtctr r5 3138c2ecf20Sopenharmony_ci4: lbzu r0,-1(r4) 3148c2ecf20Sopenharmony_ci stbu r0,-1(r6) 3158c2ecf20Sopenharmony_ci bdnz 4b 3168c2ecf20Sopenharmony_ci blr 3178c2ecf20Sopenharmony_ci5: mtctr r0 3188c2ecf20Sopenharmony_ci6: lbzu r7,-1(r4) 3198c2ecf20Sopenharmony_ci stbu r7,-1(r6) 3208c2ecf20Sopenharmony_ci bdnz 6b 3218c2ecf20Sopenharmony_ci subf r5,r0,r5 3228c2ecf20Sopenharmony_ci rlwinm. r7,r5,32-3,3,31 3238c2ecf20Sopenharmony_ci beq 2b 3248c2ecf20Sopenharmony_ci mtctr r7 3258c2ecf20Sopenharmony_ci b 1b 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci_GLOBAL(__copy_tofrom_user) 3288c2ecf20Sopenharmony_ci addi r4,r4,-4 3298c2ecf20Sopenharmony_ci addi r6,r3,-4 3308c2ecf20Sopenharmony_ci neg r0,r3 3318c2ecf20Sopenharmony_ci andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 3328c2ecf20Sopenharmony_ci beq 58f 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ci cmplw 0,r5,r0 /* is this more than total to do? */ 3358c2ecf20Sopenharmony_ci blt 63f /* if not much to do */ 3368c2ecf20Sopenharmony_ci andi. r8,r0,3 /* get it word-aligned first */ 3378c2ecf20Sopenharmony_ci mtctr r8 3388c2ecf20Sopenharmony_ci beq+ 61f 3398c2ecf20Sopenharmony_ci70: lbz r9,4(r4) /* do some bytes */ 3408c2ecf20Sopenharmony_ci71: stb r9,4(r6) 3418c2ecf20Sopenharmony_ci addi r4,r4,1 3428c2ecf20Sopenharmony_ci addi r6,r6,1 3438c2ecf20Sopenharmony_ci bdnz 70b 3448c2ecf20Sopenharmony_ci61: subf r5,r0,r5 3458c2ecf20Sopenharmony_ci srwi. r0,r0,2 3468c2ecf20Sopenharmony_ci mtctr r0 3478c2ecf20Sopenharmony_ci beq 58f 3488c2ecf20Sopenharmony_ci72: lwzu r9,4(r4) /* do some words */ 3498c2ecf20Sopenharmony_ci73: stwu r9,4(r6) 3508c2ecf20Sopenharmony_ci bdnz 72b 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_ci EX_TABLE(70b,100f) 3538c2ecf20Sopenharmony_ci EX_TABLE(71b,101f) 3548c2ecf20Sopenharmony_ci EX_TABLE(72b,102f) 3558c2ecf20Sopenharmony_ci EX_TABLE(73b,103f) 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 3588c2ecf20Sopenharmony_ci clrlwi r5,r5,32-LG_CACHELINE_BYTES 3598c2ecf20Sopenharmony_ci li r11,4 3608c2ecf20Sopenharmony_ci beq 63f 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci /* Here we decide how far ahead to prefetch the source */ 3638c2ecf20Sopenharmony_ci li r3,4 3648c2ecf20Sopenharmony_ci cmpwi r0,1 3658c2ecf20Sopenharmony_ci li r7,0 3668c2ecf20Sopenharmony_ci ble 114f 3678c2ecf20Sopenharmony_ci li r7,1 3688c2ecf20Sopenharmony_ci#if MAX_COPY_PREFETCH > 1 3698c2ecf20Sopenharmony_ci /* Heuristically, for large transfers we prefetch 3708c2ecf20Sopenharmony_ci MAX_COPY_PREFETCH cachelines ahead. For small transfers 3718c2ecf20Sopenharmony_ci we prefetch 1 cacheline ahead. */ 3728c2ecf20Sopenharmony_ci cmpwi r0,MAX_COPY_PREFETCH 3738c2ecf20Sopenharmony_ci ble 112f 3748c2ecf20Sopenharmony_ci li r7,MAX_COPY_PREFETCH 3758c2ecf20Sopenharmony_ci112: mtctr r7 3768c2ecf20Sopenharmony_ci111: dcbt r3,r4 3778c2ecf20Sopenharmony_ci addi r3,r3,CACHELINE_BYTES 3788c2ecf20Sopenharmony_ci bdnz 111b 3798c2ecf20Sopenharmony_ci#else 3808c2ecf20Sopenharmony_ci dcbt r3,r4 3818c2ecf20Sopenharmony_ci addi r3,r3,CACHELINE_BYTES 3828c2ecf20Sopenharmony_ci#endif /* MAX_COPY_PREFETCH > 1 */ 3838c2ecf20Sopenharmony_ci 3848c2ecf20Sopenharmony_ci114: subf r8,r7,r0 3858c2ecf20Sopenharmony_ci mr r0,r7 3868c2ecf20Sopenharmony_ci mtctr r8 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci53: dcbt r3,r4 3898c2ecf20Sopenharmony_ci54: dcbz r11,r6 3908c2ecf20Sopenharmony_ci EX_TABLE(54b,105f) 3918c2ecf20Sopenharmony_ci/* the main body of the cacheline loop */ 3928c2ecf20Sopenharmony_ci COPY_16_BYTES_WITHEX(0) 3938c2ecf20Sopenharmony_ci#if L1_CACHE_BYTES >= 32 3948c2ecf20Sopenharmony_ci COPY_16_BYTES_WITHEX(1) 3958c2ecf20Sopenharmony_ci#if L1_CACHE_BYTES >= 64 3968c2ecf20Sopenharmony_ci COPY_16_BYTES_WITHEX(2) 3978c2ecf20Sopenharmony_ci COPY_16_BYTES_WITHEX(3) 3988c2ecf20Sopenharmony_ci#if L1_CACHE_BYTES >= 128 3998c2ecf20Sopenharmony_ci COPY_16_BYTES_WITHEX(4) 4008c2ecf20Sopenharmony_ci COPY_16_BYTES_WITHEX(5) 4018c2ecf20Sopenharmony_ci COPY_16_BYTES_WITHEX(6) 4028c2ecf20Sopenharmony_ci COPY_16_BYTES_WITHEX(7) 4038c2ecf20Sopenharmony_ci#endif 4048c2ecf20Sopenharmony_ci#endif 4058c2ecf20Sopenharmony_ci#endif 4068c2ecf20Sopenharmony_ci bdnz 53b 4078c2ecf20Sopenharmony_ci cmpwi r0,0 4088c2ecf20Sopenharmony_ci li r3,4 4098c2ecf20Sopenharmony_ci li r7,0 4108c2ecf20Sopenharmony_ci bne 114b 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ci63: srwi. r0,r5,2 4138c2ecf20Sopenharmony_ci mtctr r0 4148c2ecf20Sopenharmony_ci beq 64f 4158c2ecf20Sopenharmony_ci30: lwzu r0,4(r4) 4168c2ecf20Sopenharmony_ci31: stwu r0,4(r6) 4178c2ecf20Sopenharmony_ci bdnz 30b 4188c2ecf20Sopenharmony_ci 4198c2ecf20Sopenharmony_ci64: andi. r0,r5,3 4208c2ecf20Sopenharmony_ci mtctr r0 4218c2ecf20Sopenharmony_ci beq+ 65f 4228c2ecf20Sopenharmony_ci40: lbz r0,4(r4) 4238c2ecf20Sopenharmony_ci41: stb r0,4(r6) 4248c2ecf20Sopenharmony_ci addi r4,r4,1 4258c2ecf20Sopenharmony_ci addi r6,r6,1 4268c2ecf20Sopenharmony_ci bdnz 40b 4278c2ecf20Sopenharmony_ci65: li r3,0 4288c2ecf20Sopenharmony_ci blr 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_ci/* read fault, initial single-byte copy */ 4318c2ecf20Sopenharmony_ci100: li r9,0 4328c2ecf20Sopenharmony_ci b 90f 4338c2ecf20Sopenharmony_ci/* write fault, initial single-byte copy */ 4348c2ecf20Sopenharmony_ci101: li r9,1 4358c2ecf20Sopenharmony_ci90: subf r5,r8,r5 4368c2ecf20Sopenharmony_ci li r3,0 4378c2ecf20Sopenharmony_ci b 99f 4388c2ecf20Sopenharmony_ci/* read fault, initial word copy */ 4398c2ecf20Sopenharmony_ci102: li r9,0 4408c2ecf20Sopenharmony_ci b 91f 4418c2ecf20Sopenharmony_ci/* write fault, initial word copy */ 4428c2ecf20Sopenharmony_ci103: li r9,1 4438c2ecf20Sopenharmony_ci91: li r3,2 4448c2ecf20Sopenharmony_ci b 99f 4458c2ecf20Sopenharmony_ci 4468c2ecf20Sopenharmony_ci/* 4478c2ecf20Sopenharmony_ci * this stuff handles faults in the cacheline loop and branches to either 4488c2ecf20Sopenharmony_ci * 104f (if in read part) or 105f (if in write part), after updating r5 4498c2ecf20Sopenharmony_ci */ 4508c2ecf20Sopenharmony_ci COPY_16_BYTES_EXCODE(0) 4518c2ecf20Sopenharmony_ci#if L1_CACHE_BYTES >= 32 4528c2ecf20Sopenharmony_ci COPY_16_BYTES_EXCODE(1) 4538c2ecf20Sopenharmony_ci#if L1_CACHE_BYTES >= 64 4548c2ecf20Sopenharmony_ci COPY_16_BYTES_EXCODE(2) 4558c2ecf20Sopenharmony_ci COPY_16_BYTES_EXCODE(3) 4568c2ecf20Sopenharmony_ci#if L1_CACHE_BYTES >= 128 4578c2ecf20Sopenharmony_ci COPY_16_BYTES_EXCODE(4) 4588c2ecf20Sopenharmony_ci COPY_16_BYTES_EXCODE(5) 4598c2ecf20Sopenharmony_ci COPY_16_BYTES_EXCODE(6) 4608c2ecf20Sopenharmony_ci COPY_16_BYTES_EXCODE(7) 4618c2ecf20Sopenharmony_ci#endif 4628c2ecf20Sopenharmony_ci#endif 4638c2ecf20Sopenharmony_ci#endif 4648c2ecf20Sopenharmony_ci 4658c2ecf20Sopenharmony_ci/* read fault in cacheline loop */ 4668c2ecf20Sopenharmony_ci104: li r9,0 4678c2ecf20Sopenharmony_ci b 92f 4688c2ecf20Sopenharmony_ci/* fault on dcbz (effectively a write fault) */ 4698c2ecf20Sopenharmony_ci/* or write fault in cacheline loop */ 4708c2ecf20Sopenharmony_ci105: li r9,1 4718c2ecf20Sopenharmony_ci92: li r3,LG_CACHELINE_BYTES 4728c2ecf20Sopenharmony_ci mfctr r8 4738c2ecf20Sopenharmony_ci add r0,r0,r8 4748c2ecf20Sopenharmony_ci b 106f 4758c2ecf20Sopenharmony_ci/* read fault in final word loop */ 4768c2ecf20Sopenharmony_ci108: li r9,0 4778c2ecf20Sopenharmony_ci b 93f 4788c2ecf20Sopenharmony_ci/* write fault in final word loop */ 4798c2ecf20Sopenharmony_ci109: li r9,1 4808c2ecf20Sopenharmony_ci93: andi. r5,r5,3 4818c2ecf20Sopenharmony_ci li r3,2 4828c2ecf20Sopenharmony_ci b 99f 4838c2ecf20Sopenharmony_ci/* read fault in final byte loop */ 4848c2ecf20Sopenharmony_ci110: li r9,0 4858c2ecf20Sopenharmony_ci b 94f 4868c2ecf20Sopenharmony_ci/* write fault in final byte loop */ 4878c2ecf20Sopenharmony_ci111: li r9,1 4888c2ecf20Sopenharmony_ci94: li r5,0 4898c2ecf20Sopenharmony_ci li r3,0 4908c2ecf20Sopenharmony_ci/* 4918c2ecf20Sopenharmony_ci * At this stage the number of bytes not copied is 4928c2ecf20Sopenharmony_ci * r5 + (ctr << r3), and r9 is 0 for read or 1 for write. 4938c2ecf20Sopenharmony_ci */ 4948c2ecf20Sopenharmony_ci99: mfctr r0 4958c2ecf20Sopenharmony_ci106: slw r3,r0,r3 4968c2ecf20Sopenharmony_ci add. r3,r3,r5 4978c2ecf20Sopenharmony_ci beq 120f /* shouldn't happen */ 4988c2ecf20Sopenharmony_ci cmpwi 0,r9,0 4998c2ecf20Sopenharmony_ci bne 120f 5008c2ecf20Sopenharmony_ci/* for a read fault, first try to continue the copy one byte at a time */ 5018c2ecf20Sopenharmony_ci mtctr r3 5028c2ecf20Sopenharmony_ci130: lbz r0,4(r4) 5038c2ecf20Sopenharmony_ci131: stb r0,4(r6) 5048c2ecf20Sopenharmony_ci addi r4,r4,1 5058c2ecf20Sopenharmony_ci addi r6,r6,1 5068c2ecf20Sopenharmony_ci bdnz 130b 5078c2ecf20Sopenharmony_ci/* then clear out the destination: r3 bytes starting at 4(r6) */ 5088c2ecf20Sopenharmony_ci132: mfctr r3 5098c2ecf20Sopenharmony_ci120: blr 5108c2ecf20Sopenharmony_ci 5118c2ecf20Sopenharmony_ci EX_TABLE(30b,108b) 5128c2ecf20Sopenharmony_ci EX_TABLE(31b,109b) 5138c2ecf20Sopenharmony_ci EX_TABLE(40b,110b) 5148c2ecf20Sopenharmony_ci EX_TABLE(41b,111b) 5158c2ecf20Sopenharmony_ci EX_TABLE(130b,132b) 5168c2ecf20Sopenharmony_ci EX_TABLE(131b,120b) 5178c2ecf20Sopenharmony_ci 5188c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__copy_tofrom_user) 519