18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * linux/arch/arm/lib/copy_template.s 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Code template for optimized memory copy functions 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Author: Nicolas Pitre 88c2ecf20Sopenharmony_ci * Created: Sep 28, 2005 98c2ecf20Sopenharmony_ci * Copyright: MontaVista Software, Inc. 108c2ecf20Sopenharmony_ci */ 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci/* 138c2ecf20Sopenharmony_ci * Theory of operation 148c2ecf20Sopenharmony_ci * ------------------- 158c2ecf20Sopenharmony_ci * 168c2ecf20Sopenharmony_ci * This file provides the core code for a forward memory copy used in 178c2ecf20Sopenharmony_ci * the implementation of memcopy(), copy_to_user() and copy_from_user(). 188c2ecf20Sopenharmony_ci * 198c2ecf20Sopenharmony_ci * The including file must define the following accessor macros 208c2ecf20Sopenharmony_ci * according to the need of the given function: 218c2ecf20Sopenharmony_ci * 228c2ecf20Sopenharmony_ci * ldr1w ptr reg abort 238c2ecf20Sopenharmony_ci * 248c2ecf20Sopenharmony_ci * This loads one word from 'ptr', stores it in 'reg' and increments 258c2ecf20Sopenharmony_ci * 'ptr' to the next word. The 'abort' argument is used for fixup tables. 268c2ecf20Sopenharmony_ci * 278c2ecf20Sopenharmony_ci * ldr4w ptr reg1 reg2 reg3 reg4 abort 288c2ecf20Sopenharmony_ci * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort 298c2ecf20Sopenharmony_ci * 308c2ecf20Sopenharmony_ci * This loads four or eight words starting from 'ptr', stores them 318c2ecf20Sopenharmony_ci * in provided registers and increments 'ptr' past those words. 328c2ecf20Sopenharmony_ci * The'abort' argument is used for fixup tables. 338c2ecf20Sopenharmony_ci * 348c2ecf20Sopenharmony_ci * ldr1b ptr reg cond abort 358c2ecf20Sopenharmony_ci * 368c2ecf20Sopenharmony_ci * Similar to ldr1w, but it loads a byte and increments 'ptr' one byte. 378c2ecf20Sopenharmony_ci * It also must apply the condition code if provided, otherwise the 388c2ecf20Sopenharmony_ci * "al" condition is assumed by default. 398c2ecf20Sopenharmony_ci * 408c2ecf20Sopenharmony_ci * str1w ptr reg abort 418c2ecf20Sopenharmony_ci * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort 428c2ecf20Sopenharmony_ci * str1b ptr reg cond abort 438c2ecf20Sopenharmony_ci * 448c2ecf20Sopenharmony_ci * Same as their ldr* counterparts, but data is stored to 'ptr' location 458c2ecf20Sopenharmony_ci * rather than being loaded. 468c2ecf20Sopenharmony_ci * 478c2ecf20Sopenharmony_ci * enter reg1 reg2 488c2ecf20Sopenharmony_ci * 498c2ecf20Sopenharmony_ci * Preserve the provided registers on the stack plus any additional 508c2ecf20Sopenharmony_ci * data as needed by the implementation including this code. Called 518c2ecf20Sopenharmony_ci * upon code entry. 528c2ecf20Sopenharmony_ci * 538c2ecf20Sopenharmony_ci * usave reg1 reg2 548c2ecf20Sopenharmony_ci * 558c2ecf20Sopenharmony_ci * Unwind annotation macro is corresponding for 'enter' macro. 568c2ecf20Sopenharmony_ci * It tell unwinder that preserved some provided registers on the stack 578c2ecf20Sopenharmony_ci * and additional data by a prior 'enter' macro. 588c2ecf20Sopenharmony_ci * 598c2ecf20Sopenharmony_ci * exit reg1 reg2 608c2ecf20Sopenharmony_ci * 618c2ecf20Sopenharmony_ci * Restore registers with the values previously saved with the 628c2ecf20Sopenharmony_ci * 'preserv' macro. Called upon code termination. 638c2ecf20Sopenharmony_ci * 648c2ecf20Sopenharmony_ci * LDR1W_SHIFT 658c2ecf20Sopenharmony_ci * STR1W_SHIFT 668c2ecf20Sopenharmony_ci * 678c2ecf20Sopenharmony_ci * Correction to be applied to the "ip" register when branching into 688c2ecf20Sopenharmony_ci * the ldr1w or str1w instructions (some of these macros may expand to 698c2ecf20Sopenharmony_ci * than one 32bit instruction in Thumb-2) 708c2ecf20Sopenharmony_ci */ 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci UNWIND( .fnstart ) 748c2ecf20Sopenharmony_ci enter r4, lr 758c2ecf20Sopenharmony_ci UNWIND( .fnend ) 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci UNWIND( .fnstart ) 788c2ecf20Sopenharmony_ci usave r4, lr @ in first stmdb block 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci subs r2, r2, #4 818c2ecf20Sopenharmony_ci blt 8f 828c2ecf20Sopenharmony_ci ands ip, r0, #3 838c2ecf20Sopenharmony_ci PLD( pld [r1, #0] ) 848c2ecf20Sopenharmony_ci bne 9f 858c2ecf20Sopenharmony_ci ands ip, r1, #3 868c2ecf20Sopenharmony_ci bne 10f 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci1: subs r2, r2, #(28) 898c2ecf20Sopenharmony_ci stmfd sp!, {r5 - r8} 908c2ecf20Sopenharmony_ci UNWIND( .fnend ) 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci UNWIND( .fnstart ) 938c2ecf20Sopenharmony_ci usave r4, lr 948c2ecf20Sopenharmony_ci UNWIND( .save {r5 - r8} ) @ in second stmfd block 958c2ecf20Sopenharmony_ci blt 5f 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci CALGN( ands ip, r0, #31 ) 988c2ecf20Sopenharmony_ci CALGN( rsb r3, ip, #32 ) 998c2ecf20Sopenharmony_ci CALGN( sbcsne r4, r3, r2 ) @ C is always set here 1008c2ecf20Sopenharmony_ci CALGN( bcs 2f ) 1018c2ecf20Sopenharmony_ci CALGN( adr r4, 6f ) 1028c2ecf20Sopenharmony_ci CALGN( subs r2, r2, r3 ) @ C gets set 1038c2ecf20Sopenharmony_ci CALGN( add pc, r4, ip ) 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci PLD( pld [r1, #0] ) 1068c2ecf20Sopenharmony_ci2: PLD( subs r2, r2, #96 ) 1078c2ecf20Sopenharmony_ci PLD( pld [r1, #28] ) 1088c2ecf20Sopenharmony_ci PLD( blt 4f ) 1098c2ecf20Sopenharmony_ci PLD( pld [r1, #60] ) 1108c2ecf20Sopenharmony_ci PLD( pld [r1, #92] ) 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci3: PLD( pld [r1, #124] ) 1138c2ecf20Sopenharmony_ci4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f 1148c2ecf20Sopenharmony_ci subs r2, r2, #32 1158c2ecf20Sopenharmony_ci str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f 1168c2ecf20Sopenharmony_ci bge 3b 1178c2ecf20Sopenharmony_ci PLD( cmn r2, #96 ) 1188c2ecf20Sopenharmony_ci PLD( bge 4b ) 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci5: ands ip, r2, #28 1218c2ecf20Sopenharmony_ci rsb ip, ip, #32 1228c2ecf20Sopenharmony_ci#if LDR1W_SHIFT > 0 1238c2ecf20Sopenharmony_ci lsl ip, ip, #LDR1W_SHIFT 1248c2ecf20Sopenharmony_ci#endif 1258c2ecf20Sopenharmony_ci addne pc, pc, ip @ C is always clear here 1268c2ecf20Sopenharmony_ci b 7f 1278c2ecf20Sopenharmony_ci6: 1288c2ecf20Sopenharmony_ci .rept (1 << LDR1W_SHIFT) 1298c2ecf20Sopenharmony_ci W(nop) 1308c2ecf20Sopenharmony_ci .endr 1318c2ecf20Sopenharmony_ci ldr1w r1, r3, abort=20f 1328c2ecf20Sopenharmony_ci ldr1w r1, r4, abort=20f 1338c2ecf20Sopenharmony_ci ldr1w r1, r5, abort=20f 1348c2ecf20Sopenharmony_ci ldr1w r1, r6, abort=20f 1358c2ecf20Sopenharmony_ci ldr1w r1, r7, abort=20f 1368c2ecf20Sopenharmony_ci ldr1w r1, r8, abort=20f 1378c2ecf20Sopenharmony_ci ldr1w r1, lr, abort=20f 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci#if LDR1W_SHIFT < STR1W_SHIFT 1408c2ecf20Sopenharmony_ci lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT 1418c2ecf20Sopenharmony_ci#elif LDR1W_SHIFT > STR1W_SHIFT 1428c2ecf20Sopenharmony_ci lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT 1438c2ecf20Sopenharmony_ci#endif 1448c2ecf20Sopenharmony_ci add pc, pc, ip 1458c2ecf20Sopenharmony_ci nop 1468c2ecf20Sopenharmony_ci .rept (1 << STR1W_SHIFT) 1478c2ecf20Sopenharmony_ci W(nop) 1488c2ecf20Sopenharmony_ci .endr 1498c2ecf20Sopenharmony_ci str1w r0, r3, abort=20f 1508c2ecf20Sopenharmony_ci str1w r0, r4, abort=20f 1518c2ecf20Sopenharmony_ci str1w r0, r5, abort=20f 1528c2ecf20Sopenharmony_ci str1w r0, r6, abort=20f 1538c2ecf20Sopenharmony_ci str1w r0, r7, abort=20f 1548c2ecf20Sopenharmony_ci str1w r0, r8, abort=20f 1558c2ecf20Sopenharmony_ci str1w r0, lr, abort=20f 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci CALGN( bcs 2b ) 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci7: ldmfd sp!, {r5 - r8} 1608c2ecf20Sopenharmony_ci UNWIND( .fnend ) @ end of second stmfd block 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci UNWIND( .fnstart ) 1638c2ecf20Sopenharmony_ci usave r4, lr @ still in first stmdb block 1648c2ecf20Sopenharmony_ci8: movs r2, r2, lsl #31 1658c2ecf20Sopenharmony_ci ldr1b r1, r3, ne, abort=21f 1668c2ecf20Sopenharmony_ci ldr1b r1, r4, cs, abort=21f 1678c2ecf20Sopenharmony_ci ldr1b r1, ip, cs, abort=21f 1688c2ecf20Sopenharmony_ci str1b r0, r3, ne, abort=21f 1698c2ecf20Sopenharmony_ci str1b r0, r4, cs, abort=21f 1708c2ecf20Sopenharmony_ci str1b r0, ip, cs, abort=21f 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci exit r4, pc 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci9: rsb ip, ip, #4 1758c2ecf20Sopenharmony_ci cmp ip, #2 1768c2ecf20Sopenharmony_ci ldr1b r1, r3, gt, abort=21f 1778c2ecf20Sopenharmony_ci ldr1b r1, r4, ge, abort=21f 1788c2ecf20Sopenharmony_ci ldr1b r1, lr, abort=21f 1798c2ecf20Sopenharmony_ci str1b r0, r3, gt, abort=21f 1808c2ecf20Sopenharmony_ci str1b r0, r4, ge, abort=21f 1818c2ecf20Sopenharmony_ci subs r2, r2, ip 1828c2ecf20Sopenharmony_ci str1b r0, lr, abort=21f 1838c2ecf20Sopenharmony_ci blt 8b 1848c2ecf20Sopenharmony_ci ands ip, r1, #3 1858c2ecf20Sopenharmony_ci beq 1b 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci10: bic r1, r1, #3 1888c2ecf20Sopenharmony_ci cmp ip, #2 1898c2ecf20Sopenharmony_ci ldr1w r1, lr, abort=21f 1908c2ecf20Sopenharmony_ci beq 17f 1918c2ecf20Sopenharmony_ci bgt 18f 1928c2ecf20Sopenharmony_ci UNWIND( .fnend ) 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci .macro forward_copy_shift pull push 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_ci UNWIND( .fnstart ) 1988c2ecf20Sopenharmony_ci usave r4, lr @ still in first stmdb block 1998c2ecf20Sopenharmony_ci subs r2, r2, #28 2008c2ecf20Sopenharmony_ci blt 14f 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci CALGN( ands ip, r0, #31 ) 2038c2ecf20Sopenharmony_ci CALGN( rsb ip, ip, #32 ) 2048c2ecf20Sopenharmony_ci CALGN( sbcsne r4, ip, r2 ) @ C is always set here 2058c2ecf20Sopenharmony_ci CALGN( subcc r2, r2, ip ) 2068c2ecf20Sopenharmony_ci CALGN( bcc 15f ) 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ci11: stmfd sp!, {r5 - r9} 2098c2ecf20Sopenharmony_ci UNWIND( .fnend ) 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci UNWIND( .fnstart ) 2128c2ecf20Sopenharmony_ci usave r4, lr 2138c2ecf20Sopenharmony_ci UNWIND( .save {r5 - r9} ) @ in new second stmfd block 2148c2ecf20Sopenharmony_ci PLD( pld [r1, #0] ) 2158c2ecf20Sopenharmony_ci PLD( subs r2, r2, #96 ) 2168c2ecf20Sopenharmony_ci PLD( pld [r1, #28] ) 2178c2ecf20Sopenharmony_ci PLD( blt 13f ) 2188c2ecf20Sopenharmony_ci PLD( pld [r1, #60] ) 2198c2ecf20Sopenharmony_ci PLD( pld [r1, #92] ) 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci12: PLD( pld [r1, #124] ) 2228c2ecf20Sopenharmony_ci13: ldr4w r1, r4, r5, r6, r7, abort=19f 2238c2ecf20Sopenharmony_ci mov r3, lr, lspull #\pull 2248c2ecf20Sopenharmony_ci subs r2, r2, #32 2258c2ecf20Sopenharmony_ci ldr4w r1, r8, r9, ip, lr, abort=19f 2268c2ecf20Sopenharmony_ci orr r3, r3, r4, lspush #\push 2278c2ecf20Sopenharmony_ci mov r4, r4, lspull #\pull 2288c2ecf20Sopenharmony_ci orr r4, r4, r5, lspush #\push 2298c2ecf20Sopenharmony_ci mov r5, r5, lspull #\pull 2308c2ecf20Sopenharmony_ci orr r5, r5, r6, lspush #\push 2318c2ecf20Sopenharmony_ci mov r6, r6, lspull #\pull 2328c2ecf20Sopenharmony_ci orr r6, r6, r7, lspush #\push 2338c2ecf20Sopenharmony_ci mov r7, r7, lspull #\pull 2348c2ecf20Sopenharmony_ci orr r7, r7, r8, lspush #\push 2358c2ecf20Sopenharmony_ci mov r8, r8, lspull #\pull 2368c2ecf20Sopenharmony_ci orr r8, r8, r9, lspush #\push 2378c2ecf20Sopenharmony_ci mov r9, r9, lspull #\pull 2388c2ecf20Sopenharmony_ci orr r9, r9, ip, lspush #\push 2398c2ecf20Sopenharmony_ci mov ip, ip, lspull #\pull 2408c2ecf20Sopenharmony_ci orr ip, ip, lr, lspush #\push 2418c2ecf20Sopenharmony_ci str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, abort=19f 2428c2ecf20Sopenharmony_ci bge 12b 2438c2ecf20Sopenharmony_ci PLD( cmn r2, #96 ) 2448c2ecf20Sopenharmony_ci PLD( bge 13b ) 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci ldmfd sp!, {r5 - r9} 2478c2ecf20Sopenharmony_ci UNWIND( .fnend ) @ end of the second stmfd block 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci UNWIND( .fnstart ) 2508c2ecf20Sopenharmony_ci usave r4, lr @ still in first stmdb block 2518c2ecf20Sopenharmony_ci14: ands ip, r2, #28 2528c2ecf20Sopenharmony_ci beq 16f 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_ci15: mov r3, lr, lspull #\pull 2558c2ecf20Sopenharmony_ci ldr1w r1, lr, abort=21f 2568c2ecf20Sopenharmony_ci subs ip, ip, #4 2578c2ecf20Sopenharmony_ci orr r3, r3, lr, lspush #\push 2588c2ecf20Sopenharmony_ci str1w r0, r3, abort=21f 2598c2ecf20Sopenharmony_ci bgt 15b 2608c2ecf20Sopenharmony_ci CALGN( cmp r2, #0 ) 2618c2ecf20Sopenharmony_ci CALGN( bge 11b ) 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci16: sub r1, r1, #(\push / 8) 2648c2ecf20Sopenharmony_ci b 8b 2658c2ecf20Sopenharmony_ci UNWIND( .fnend ) 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci .endm 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci 2708c2ecf20Sopenharmony_ci forward_copy_shift pull=8 push=24 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci17: forward_copy_shift pull=16 push=16 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ci18: forward_copy_shift pull=24 push=8 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci 2778c2ecf20Sopenharmony_ci/* 2788c2ecf20Sopenharmony_ci * Abort preamble and completion macros. 2798c2ecf20Sopenharmony_ci * If a fixup handler is required then those macros must surround it. 2808c2ecf20Sopenharmony_ci * It is assumed that the fixup code will handle the private part of 2818c2ecf20Sopenharmony_ci * the exit macro. 2828c2ecf20Sopenharmony_ci */ 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci .macro copy_abort_preamble 2858c2ecf20Sopenharmony_ci19: ldmfd sp!, {r5 - r9} 2868c2ecf20Sopenharmony_ci b 21f 2878c2ecf20Sopenharmony_ci20: ldmfd sp!, {r5 - r8} 2888c2ecf20Sopenharmony_ci21: 2898c2ecf20Sopenharmony_ci .endm 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci .macro copy_abort_end 2928c2ecf20Sopenharmony_ci ldmfd sp!, {r4, pc} 2938c2ecf20Sopenharmony_ci .endm 2948c2ecf20Sopenharmony_ci 295