162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2002 Paul Mackerras, IBM Corp. 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci#include <linux/export.h> 662306a36Sopenharmony_ci#include <asm/processor.h> 762306a36Sopenharmony_ci#include <asm/ppc_asm.h> 862306a36Sopenharmony_ci#include <asm/asm-compat.h> 962306a36Sopenharmony_ci#include <asm/feature-fixups.h> 1062306a36Sopenharmony_ci#include <asm/kasan.h> 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#ifndef SELFTEST_CASE 1362306a36Sopenharmony_ci/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */ 1462306a36Sopenharmony_ci#define SELFTEST_CASE 0 1562306a36Sopenharmony_ci#endif 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci .align 7 1862306a36Sopenharmony_ci_GLOBAL_TOC_KASAN(memcpy) 1962306a36Sopenharmony_ciBEGIN_FTR_SECTION 2062306a36Sopenharmony_ci#ifdef __LITTLE_ENDIAN__ 2162306a36Sopenharmony_ci cmpdi cr7,r5,0 2262306a36Sopenharmony_ci#else 2362306a36Sopenharmony_ci std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */ 2462306a36Sopenharmony_ci#endif 2562306a36Sopenharmony_ciFTR_SECTION_ELSE 2662306a36Sopenharmony_ci#ifdef CONFIG_PPC_BOOK3S_64 2762306a36Sopenharmony_ci b memcpy_power7 2862306a36Sopenharmony_ci#endif 2962306a36Sopenharmony_ciALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 3062306a36Sopenharmony_ci#ifdef __LITTLE_ENDIAN__ 3162306a36Sopenharmony_ci /* dumb little-endian memcpy that will get replaced at runtime */ 3262306a36Sopenharmony_ci addi r9,r3,-1 3362306a36Sopenharmony_ci addi r4,r4,-1 3462306a36Sopenharmony_ci beqlr cr7 3562306a36Sopenharmony_ci mtctr r5 3662306a36Sopenharmony_ci1: lbzu r10,1(r4) 3762306a36Sopenharmony_ci stbu r10,1(r9) 3862306a36Sopenharmony_ci bdnz 1b 3962306a36Sopenharmony_ci blr 4062306a36Sopenharmony_ci#else 4162306a36Sopenharmony_ci PPC_MTOCRF(0x01,r5) 4262306a36Sopenharmony_ci cmpldi cr1,r5,16 4362306a36Sopenharmony_ci neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry 4462306a36Sopenharmony_ci andi. r6,r6,7 4562306a36Sopenharmony_ci dcbt 0,r4 4662306a36Sopenharmony_ci blt cr1,.Lshort_copy 4762306a36Sopenharmony_ci/* Below we want to nop out the bne if we're on a CPU that has the 4862306a36Sopenharmony_ci CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit 4962306a36Sopenharmony_ci cleared. 5062306a36Sopenharmony_ci At the time of writing the only CPU that has this combination of bits 5162306a36Sopenharmony_ci set is Power6. */ 5262306a36Sopenharmony_citest_feature = (SELFTEST_CASE == 1) 5362306a36Sopenharmony_ciBEGIN_FTR_SECTION 5462306a36Sopenharmony_ci nop 5562306a36Sopenharmony_ciFTR_SECTION_ELSE 5662306a36Sopenharmony_ci bne .Ldst_unaligned 5762306a36Sopenharmony_ciALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ 5862306a36Sopenharmony_ci CPU_FTR_UNALIGNED_LD_STD) 5962306a36Sopenharmony_ci.Ldst_aligned: 6062306a36Sopenharmony_ci addi r3,r3,-16 6162306a36Sopenharmony_citest_feature = (SELFTEST_CASE == 0) 6262306a36Sopenharmony_ciBEGIN_FTR_SECTION 6362306a36Sopenharmony_ci andi. r0,r4,7 6462306a36Sopenharmony_ci bne .Lsrc_unaligned 6562306a36Sopenharmony_ciEND_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 6662306a36Sopenharmony_ci srdi r7,r5,4 6762306a36Sopenharmony_ci ld r9,0(r4) 6862306a36Sopenharmony_ci addi r4,r4,-8 6962306a36Sopenharmony_ci mtctr r7 7062306a36Sopenharmony_ci andi. r5,r5,7 7162306a36Sopenharmony_ci bf cr7*4+0,2f 7262306a36Sopenharmony_ci addi r3,r3,8 7362306a36Sopenharmony_ci addi r4,r4,8 7462306a36Sopenharmony_ci mr r8,r9 7562306a36Sopenharmony_ci blt cr1,3f 7662306a36Sopenharmony_ci1: ld r9,8(r4) 7762306a36Sopenharmony_ci std r8,8(r3) 7862306a36Sopenharmony_ci2: ldu r8,16(r4) 7962306a36Sopenharmony_ci stdu r9,16(r3) 8062306a36Sopenharmony_ci bdnz 1b 8162306a36Sopenharmony_ci3: std r8,8(r3) 8262306a36Sopenharmony_ci beq 3f 8362306a36Sopenharmony_ci addi r3,r3,16 8462306a36Sopenharmony_ci.Ldo_tail: 8562306a36Sopenharmony_ci bf cr7*4+1,1f 8662306a36Sopenharmony_ci lwz r9,8(r4) 8762306a36Sopenharmony_ci addi r4,r4,4 8862306a36Sopenharmony_ci stw r9,0(r3) 8962306a36Sopenharmony_ci addi r3,r3,4 9062306a36Sopenharmony_ci1: bf cr7*4+2,2f 9162306a36Sopenharmony_ci lhz r9,8(r4) 9262306a36Sopenharmony_ci addi r4,r4,2 9362306a36Sopenharmony_ci sth r9,0(r3) 9462306a36Sopenharmony_ci addi r3,r3,2 9562306a36Sopenharmony_ci2: bf cr7*4+3,3f 9662306a36Sopenharmony_ci lbz r9,8(r4) 9762306a36Sopenharmony_ci stb r9,0(r3) 9862306a36Sopenharmony_ci3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 9962306a36Sopenharmony_ci blr 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci.Lsrc_unaligned: 10262306a36Sopenharmony_ci srdi r6,r5,3 10362306a36Sopenharmony_ci addi r5,r5,-16 10462306a36Sopenharmony_ci subf r4,r0,r4 10562306a36Sopenharmony_ci srdi r7,r5,4 10662306a36Sopenharmony_ci sldi r10,r0,3 10762306a36Sopenharmony_ci cmpdi cr6,r6,3 10862306a36Sopenharmony_ci andi. r5,r5,7 10962306a36Sopenharmony_ci mtctr r7 11062306a36Sopenharmony_ci subfic r11,r10,64 11162306a36Sopenharmony_ci add r5,r5,r0 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci bt cr7*4+0,0f 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci ld r9,0(r4) # 3+2n loads, 2+2n stores 11662306a36Sopenharmony_ci ld r0,8(r4) 11762306a36Sopenharmony_ci sld r6,r9,r10 11862306a36Sopenharmony_ci ldu r9,16(r4) 11962306a36Sopenharmony_ci srd r7,r0,r11 12062306a36Sopenharmony_ci sld r8,r0,r10 12162306a36Sopenharmony_ci or r7,r7,r6 12262306a36Sopenharmony_ci blt cr6,4f 12362306a36Sopenharmony_ci ld r0,8(r4) 12462306a36Sopenharmony_ci # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12 12562306a36Sopenharmony_ci b 2f 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci0: ld r0,0(r4) # 4+2n loads, 3+2n stores 12862306a36Sopenharmony_ci ldu r9,8(r4) 12962306a36Sopenharmony_ci sld r8,r0,r10 13062306a36Sopenharmony_ci addi r3,r3,-8 13162306a36Sopenharmony_ci blt cr6,5f 13262306a36Sopenharmony_ci ld r0,8(r4) 13362306a36Sopenharmony_ci srd r12,r9,r11 13462306a36Sopenharmony_ci sld r6,r9,r10 13562306a36Sopenharmony_ci ldu r9,16(r4) 13662306a36Sopenharmony_ci or r12,r8,r12 13762306a36Sopenharmony_ci srd r7,r0,r11 13862306a36Sopenharmony_ci sld r8,r0,r10 13962306a36Sopenharmony_ci addi r3,r3,16 14062306a36Sopenharmony_ci beq cr6,3f 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9 14362306a36Sopenharmony_ci1: or r7,r7,r6 14462306a36Sopenharmony_ci ld r0,8(r4) 14562306a36Sopenharmony_ci std r12,8(r3) 14662306a36Sopenharmony_ci2: srd r12,r9,r11 14762306a36Sopenharmony_ci sld r6,r9,r10 14862306a36Sopenharmony_ci ldu r9,16(r4) 14962306a36Sopenharmony_ci or r12,r8,r12 15062306a36Sopenharmony_ci stdu r7,16(r3) 15162306a36Sopenharmony_ci srd r7,r0,r11 15262306a36Sopenharmony_ci sld r8,r0,r10 15362306a36Sopenharmony_ci bdnz 1b 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci3: std r12,8(r3) 15662306a36Sopenharmony_ci or r7,r7,r6 15762306a36Sopenharmony_ci4: std r7,16(r3) 15862306a36Sopenharmony_ci5: srd r12,r9,r11 15962306a36Sopenharmony_ci or r12,r8,r12 16062306a36Sopenharmony_ci std r12,24(r3) 16162306a36Sopenharmony_ci beq 4f 16262306a36Sopenharmony_ci cmpwi cr1,r5,8 16362306a36Sopenharmony_ci addi r3,r3,32 16462306a36Sopenharmony_ci sld r9,r9,r10 16562306a36Sopenharmony_ci ble cr1,6f 16662306a36Sopenharmony_ci ld r0,8(r4) 16762306a36Sopenharmony_ci srd r7,r0,r11 16862306a36Sopenharmony_ci or r9,r7,r9 16962306a36Sopenharmony_ci6: 17062306a36Sopenharmony_ci bf cr7*4+1,1f 17162306a36Sopenharmony_ci rotldi r9,r9,32 17262306a36Sopenharmony_ci stw r9,0(r3) 17362306a36Sopenharmony_ci addi r3,r3,4 17462306a36Sopenharmony_ci1: bf cr7*4+2,2f 17562306a36Sopenharmony_ci rotldi r9,r9,16 17662306a36Sopenharmony_ci sth r9,0(r3) 17762306a36Sopenharmony_ci addi r3,r3,2 17862306a36Sopenharmony_ci2: bf cr7*4+3,3f 17962306a36Sopenharmony_ci rotldi r9,r9,8 18062306a36Sopenharmony_ci stb r9,0(r3) 18162306a36Sopenharmony_ci3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 18262306a36Sopenharmony_ci blr 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci.Ldst_unaligned: 18562306a36Sopenharmony_ci PPC_MTOCRF(0x01,r6) # put #bytes to 8B bdry into cr7 18662306a36Sopenharmony_ci subf r5,r6,r5 18762306a36Sopenharmony_ci li r7,0 18862306a36Sopenharmony_ci cmpldi cr1,r5,16 18962306a36Sopenharmony_ci bf cr7*4+3,1f 19062306a36Sopenharmony_ci lbz r0,0(r4) 19162306a36Sopenharmony_ci stb r0,0(r3) 19262306a36Sopenharmony_ci addi r7,r7,1 19362306a36Sopenharmony_ci1: bf cr7*4+2,2f 19462306a36Sopenharmony_ci lhzx r0,r7,r4 19562306a36Sopenharmony_ci sthx r0,r7,r3 19662306a36Sopenharmony_ci addi r7,r7,2 19762306a36Sopenharmony_ci2: bf cr7*4+1,3f 19862306a36Sopenharmony_ci lwzx r0,r7,r4 19962306a36Sopenharmony_ci stwx r0,r7,r3 20062306a36Sopenharmony_ci3: PPC_MTOCRF(0x01,r5) 20162306a36Sopenharmony_ci add r4,r6,r4 20262306a36Sopenharmony_ci add r3,r6,r3 20362306a36Sopenharmony_ci b .Ldst_aligned 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci.Lshort_copy: 20662306a36Sopenharmony_ci bf cr7*4+0,1f 20762306a36Sopenharmony_ci lwz r0,0(r4) 20862306a36Sopenharmony_ci lwz r9,4(r4) 20962306a36Sopenharmony_ci addi r4,r4,8 21062306a36Sopenharmony_ci stw r0,0(r3) 21162306a36Sopenharmony_ci stw r9,4(r3) 21262306a36Sopenharmony_ci addi r3,r3,8 21362306a36Sopenharmony_ci1: bf cr7*4+1,2f 21462306a36Sopenharmony_ci lwz r0,0(r4) 21562306a36Sopenharmony_ci addi r4,r4,4 21662306a36Sopenharmony_ci stw r0,0(r3) 21762306a36Sopenharmony_ci addi r3,r3,4 21862306a36Sopenharmony_ci2: bf cr7*4+2,3f 21962306a36Sopenharmony_ci lhz r0,0(r4) 22062306a36Sopenharmony_ci addi r4,r4,2 22162306a36Sopenharmony_ci sth r0,0(r3) 22262306a36Sopenharmony_ci addi r3,r3,2 22362306a36Sopenharmony_ci3: bf cr7*4+3,4f 22462306a36Sopenharmony_ci lbz r0,0(r4) 22562306a36Sopenharmony_ci stb r0,0(r3) 22662306a36Sopenharmony_ci4: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 22762306a36Sopenharmony_ci blr 22862306a36Sopenharmony_ci#endif 22962306a36Sopenharmony_ciEXPORT_SYMBOL(memcpy) 23062306a36Sopenharmony_ciEXPORT_SYMBOL_KASAN(memcpy) 231