18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2002 Paul Mackerras, IBM Corp. 48c2ecf20Sopenharmony_ci */ 58c2ecf20Sopenharmony_ci#include <asm/processor.h> 68c2ecf20Sopenharmony_ci#include <asm/ppc_asm.h> 78c2ecf20Sopenharmony_ci#include <asm/export.h> 88c2ecf20Sopenharmony_ci#include <asm/asm-compat.h> 98c2ecf20Sopenharmony_ci#include <asm/feature-fixups.h> 108c2ecf20Sopenharmony_ci#include <asm/kasan.h> 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci#ifndef SELFTEST_CASE 138c2ecf20Sopenharmony_ci/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */ 148c2ecf20Sopenharmony_ci#define SELFTEST_CASE 0 158c2ecf20Sopenharmony_ci#endif 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci .align 7 188c2ecf20Sopenharmony_ci_GLOBAL_TOC_KASAN(memcpy) 198c2ecf20Sopenharmony_ciBEGIN_FTR_SECTION 208c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN__ 218c2ecf20Sopenharmony_ci cmpdi cr7,r5,0 228c2ecf20Sopenharmony_ci#else 238c2ecf20Sopenharmony_ci std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */ 248c2ecf20Sopenharmony_ci#endif 258c2ecf20Sopenharmony_ciFTR_SECTION_ELSE 268c2ecf20Sopenharmony_ci#ifdef CONFIG_PPC_BOOK3S_64 278c2ecf20Sopenharmony_ci b memcpy_power7 288c2ecf20Sopenharmony_ci#endif 298c2ecf20Sopenharmony_ciALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 308c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN__ 318c2ecf20Sopenharmony_ci /* dumb little-endian memcpy that will get replaced at runtime */ 328c2ecf20Sopenharmony_ci addi r9,r3,-1 338c2ecf20Sopenharmony_ci addi r4,r4,-1 348c2ecf20Sopenharmony_ci beqlr cr7 358c2ecf20Sopenharmony_ci mtctr r5 368c2ecf20Sopenharmony_ci1: lbzu r10,1(r4) 378c2ecf20Sopenharmony_ci stbu r10,1(r9) 388c2ecf20Sopenharmony_ci bdnz 1b 398c2ecf20Sopenharmony_ci blr 408c2ecf20Sopenharmony_ci#else 418c2ecf20Sopenharmony_ci PPC_MTOCRF(0x01,r5) 428c2ecf20Sopenharmony_ci cmpldi cr1,r5,16 438c2ecf20Sopenharmony_ci neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry 448c2ecf20Sopenharmony_ci andi. r6,r6,7 458c2ecf20Sopenharmony_ci dcbt 0,r4 468c2ecf20Sopenharmony_ci blt cr1,.Lshort_copy 478c2ecf20Sopenharmony_ci/* Below we want to nop out the bne if we're on a CPU that has the 488c2ecf20Sopenharmony_ci CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit 498c2ecf20Sopenharmony_ci cleared. 508c2ecf20Sopenharmony_ci At the time of writing the only CPU that has this combination of bits 518c2ecf20Sopenharmony_ci set is Power6. */ 528c2ecf20Sopenharmony_citest_feature = (SELFTEST_CASE == 1) 538c2ecf20Sopenharmony_ciBEGIN_FTR_SECTION 548c2ecf20Sopenharmony_ci nop 558c2ecf20Sopenharmony_ciFTR_SECTION_ELSE 568c2ecf20Sopenharmony_ci bne .Ldst_unaligned 578c2ecf20Sopenharmony_ciALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ 588c2ecf20Sopenharmony_ci CPU_FTR_UNALIGNED_LD_STD) 598c2ecf20Sopenharmony_ci.Ldst_aligned: 608c2ecf20Sopenharmony_ci addi r3,r3,-16 618c2ecf20Sopenharmony_citest_feature = (SELFTEST_CASE == 0) 628c2ecf20Sopenharmony_ciBEGIN_FTR_SECTION 638c2ecf20Sopenharmony_ci andi. r0,r4,7 648c2ecf20Sopenharmony_ci bne .Lsrc_unaligned 658c2ecf20Sopenharmony_ciEND_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 668c2ecf20Sopenharmony_ci srdi r7,r5,4 678c2ecf20Sopenharmony_ci ld r9,0(r4) 688c2ecf20Sopenharmony_ci addi r4,r4,-8 698c2ecf20Sopenharmony_ci mtctr r7 708c2ecf20Sopenharmony_ci andi. r5,r5,7 718c2ecf20Sopenharmony_ci bf cr7*4+0,2f 728c2ecf20Sopenharmony_ci addi r3,r3,8 738c2ecf20Sopenharmony_ci addi r4,r4,8 748c2ecf20Sopenharmony_ci mr r8,r9 758c2ecf20Sopenharmony_ci blt cr1,3f 768c2ecf20Sopenharmony_ci1: ld r9,8(r4) 778c2ecf20Sopenharmony_ci std r8,8(r3) 788c2ecf20Sopenharmony_ci2: ldu r8,16(r4) 798c2ecf20Sopenharmony_ci stdu r9,16(r3) 808c2ecf20Sopenharmony_ci bdnz 1b 818c2ecf20Sopenharmony_ci3: std r8,8(r3) 828c2ecf20Sopenharmony_ci beq 3f 838c2ecf20Sopenharmony_ci addi r3,r3,16 848c2ecf20Sopenharmony_ci.Ldo_tail: 858c2ecf20Sopenharmony_ci bf cr7*4+1,1f 868c2ecf20Sopenharmony_ci lwz r9,8(r4) 878c2ecf20Sopenharmony_ci addi r4,r4,4 888c2ecf20Sopenharmony_ci stw r9,0(r3) 898c2ecf20Sopenharmony_ci addi r3,r3,4 908c2ecf20Sopenharmony_ci1: bf cr7*4+2,2f 918c2ecf20Sopenharmony_ci lhz r9,8(r4) 928c2ecf20Sopenharmony_ci addi r4,r4,2 938c2ecf20Sopenharmony_ci sth r9,0(r3) 948c2ecf20Sopenharmony_ci addi r3,r3,2 958c2ecf20Sopenharmony_ci2: bf cr7*4+3,3f 968c2ecf20Sopenharmony_ci lbz r9,8(r4) 978c2ecf20Sopenharmony_ci stb r9,0(r3) 988c2ecf20Sopenharmony_ci3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 998c2ecf20Sopenharmony_ci blr 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci.Lsrc_unaligned: 1028c2ecf20Sopenharmony_ci srdi r6,r5,3 1038c2ecf20Sopenharmony_ci addi r5,r5,-16 1048c2ecf20Sopenharmony_ci subf r4,r0,r4 1058c2ecf20Sopenharmony_ci srdi r7,r5,4 1068c2ecf20Sopenharmony_ci sldi r10,r0,3 1078c2ecf20Sopenharmony_ci cmpdi cr6,r6,3 1088c2ecf20Sopenharmony_ci andi. r5,r5,7 1098c2ecf20Sopenharmony_ci mtctr r7 1108c2ecf20Sopenharmony_ci subfic r11,r10,64 1118c2ecf20Sopenharmony_ci add r5,r5,r0 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci bt cr7*4+0,0f 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci ld r9,0(r4) # 3+2n loads, 2+2n stores 1168c2ecf20Sopenharmony_ci ld r0,8(r4) 1178c2ecf20Sopenharmony_ci sld r6,r9,r10 1188c2ecf20Sopenharmony_ci ldu r9,16(r4) 1198c2ecf20Sopenharmony_ci srd r7,r0,r11 1208c2ecf20Sopenharmony_ci sld r8,r0,r10 1218c2ecf20Sopenharmony_ci or r7,r7,r6 1228c2ecf20Sopenharmony_ci blt cr6,4f 1238c2ecf20Sopenharmony_ci ld r0,8(r4) 1248c2ecf20Sopenharmony_ci # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12 1258c2ecf20Sopenharmony_ci b 2f 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci0: ld r0,0(r4) # 4+2n loads, 3+2n stores 1288c2ecf20Sopenharmony_ci ldu r9,8(r4) 1298c2ecf20Sopenharmony_ci sld r8,r0,r10 1308c2ecf20Sopenharmony_ci addi r3,r3,-8 1318c2ecf20Sopenharmony_ci blt cr6,5f 1328c2ecf20Sopenharmony_ci ld r0,8(r4) 1338c2ecf20Sopenharmony_ci srd r12,r9,r11 1348c2ecf20Sopenharmony_ci sld r6,r9,r10 1358c2ecf20Sopenharmony_ci ldu r9,16(r4) 1368c2ecf20Sopenharmony_ci or r12,r8,r12 1378c2ecf20Sopenharmony_ci srd r7,r0,r11 1388c2ecf20Sopenharmony_ci sld r8,r0,r10 1398c2ecf20Sopenharmony_ci addi r3,r3,16 1408c2ecf20Sopenharmony_ci beq cr6,3f 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9 1438c2ecf20Sopenharmony_ci1: or r7,r7,r6 1448c2ecf20Sopenharmony_ci ld r0,8(r4) 1458c2ecf20Sopenharmony_ci std r12,8(r3) 1468c2ecf20Sopenharmony_ci2: srd r12,r9,r11 1478c2ecf20Sopenharmony_ci sld r6,r9,r10 1488c2ecf20Sopenharmony_ci ldu r9,16(r4) 1498c2ecf20Sopenharmony_ci or r12,r8,r12 1508c2ecf20Sopenharmony_ci stdu r7,16(r3) 1518c2ecf20Sopenharmony_ci srd r7,r0,r11 1528c2ecf20Sopenharmony_ci sld r8,r0,r10 1538c2ecf20Sopenharmony_ci bdnz 1b 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci3: std r12,8(r3) 1568c2ecf20Sopenharmony_ci or r7,r7,r6 1578c2ecf20Sopenharmony_ci4: std r7,16(r3) 1588c2ecf20Sopenharmony_ci5: srd r12,r9,r11 1598c2ecf20Sopenharmony_ci or r12,r8,r12 1608c2ecf20Sopenharmony_ci std r12,24(r3) 1618c2ecf20Sopenharmony_ci beq 4f 1628c2ecf20Sopenharmony_ci cmpwi cr1,r5,8 1638c2ecf20Sopenharmony_ci addi r3,r3,32 1648c2ecf20Sopenharmony_ci sld r9,r9,r10 1658c2ecf20Sopenharmony_ci ble cr1,6f 1668c2ecf20Sopenharmony_ci ld r0,8(r4) 1678c2ecf20Sopenharmony_ci srd r7,r0,r11 1688c2ecf20Sopenharmony_ci or r9,r7,r9 1698c2ecf20Sopenharmony_ci6: 1708c2ecf20Sopenharmony_ci bf cr7*4+1,1f 1718c2ecf20Sopenharmony_ci rotldi r9,r9,32 1728c2ecf20Sopenharmony_ci stw r9,0(r3) 1738c2ecf20Sopenharmony_ci addi r3,r3,4 1748c2ecf20Sopenharmony_ci1: bf cr7*4+2,2f 1758c2ecf20Sopenharmony_ci rotldi r9,r9,16 1768c2ecf20Sopenharmony_ci sth r9,0(r3) 1778c2ecf20Sopenharmony_ci addi r3,r3,2 1788c2ecf20Sopenharmony_ci2: bf cr7*4+3,3f 1798c2ecf20Sopenharmony_ci rotldi r9,r9,8 1808c2ecf20Sopenharmony_ci stb r9,0(r3) 1818c2ecf20Sopenharmony_ci3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 1828c2ecf20Sopenharmony_ci blr 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci.Ldst_unaligned: 1858c2ecf20Sopenharmony_ci PPC_MTOCRF(0x01,r6) # put #bytes to 8B bdry into cr7 1868c2ecf20Sopenharmony_ci subf r5,r6,r5 1878c2ecf20Sopenharmony_ci li r7,0 1888c2ecf20Sopenharmony_ci cmpldi cr1,r5,16 1898c2ecf20Sopenharmony_ci bf cr7*4+3,1f 1908c2ecf20Sopenharmony_ci lbz r0,0(r4) 1918c2ecf20Sopenharmony_ci stb r0,0(r3) 1928c2ecf20Sopenharmony_ci addi r7,r7,1 1938c2ecf20Sopenharmony_ci1: bf cr7*4+2,2f 1948c2ecf20Sopenharmony_ci lhzx r0,r7,r4 1958c2ecf20Sopenharmony_ci sthx r0,r7,r3 1968c2ecf20Sopenharmony_ci addi r7,r7,2 1978c2ecf20Sopenharmony_ci2: bf cr7*4+1,3f 1988c2ecf20Sopenharmony_ci lwzx r0,r7,r4 1998c2ecf20Sopenharmony_ci stwx r0,r7,r3 2008c2ecf20Sopenharmony_ci3: PPC_MTOCRF(0x01,r5) 2018c2ecf20Sopenharmony_ci add r4,r6,r4 2028c2ecf20Sopenharmony_ci add r3,r6,r3 2038c2ecf20Sopenharmony_ci b .Ldst_aligned 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci.Lshort_copy: 2068c2ecf20Sopenharmony_ci bf cr7*4+0,1f 2078c2ecf20Sopenharmony_ci lwz r0,0(r4) 2088c2ecf20Sopenharmony_ci lwz r9,4(r4) 2098c2ecf20Sopenharmony_ci addi r4,r4,8 2108c2ecf20Sopenharmony_ci stw r0,0(r3) 2118c2ecf20Sopenharmony_ci stw r9,4(r3) 2128c2ecf20Sopenharmony_ci addi r3,r3,8 2138c2ecf20Sopenharmony_ci1: bf cr7*4+1,2f 2148c2ecf20Sopenharmony_ci lwz r0,0(r4) 2158c2ecf20Sopenharmony_ci addi r4,r4,4 2168c2ecf20Sopenharmony_ci stw r0,0(r3) 2178c2ecf20Sopenharmony_ci addi r3,r3,4 2188c2ecf20Sopenharmony_ci2: bf cr7*4+2,3f 2198c2ecf20Sopenharmony_ci lhz r0,0(r4) 2208c2ecf20Sopenharmony_ci addi r4,r4,2 2218c2ecf20Sopenharmony_ci sth r0,0(r3) 2228c2ecf20Sopenharmony_ci addi r3,r3,2 2238c2ecf20Sopenharmony_ci3: bf cr7*4+3,4f 2248c2ecf20Sopenharmony_ci lbz r0,0(r4) 2258c2ecf20Sopenharmony_ci stb r0,0(r3) 2268c2ecf20Sopenharmony_ci4: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 2278c2ecf20Sopenharmony_ci blr 2288c2ecf20Sopenharmony_ci#endif 2298c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memcpy) 2308c2ecf20Sopenharmony_ciEXPORT_SYMBOL_KASAN(memcpy) 231