18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * Copyright (C) IBM Corporation, 2012
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Author: Anton Blanchard <anton@au.ibm.com>
78c2ecf20Sopenharmony_ci */
88c2ecf20Sopenharmony_ci#include <asm/ppc_asm.h>
98c2ecf20Sopenharmony_ci
108c2ecf20Sopenharmony_ci#ifndef SELFTEST_CASE
118c2ecf20Sopenharmony_ci/* 0 == don't use VMX, 1 == use VMX */
128c2ecf20Sopenharmony_ci#define SELFTEST_CASE	0
138c2ecf20Sopenharmony_ci#endif
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN__
168c2ecf20Sopenharmony_ci#define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB
178c2ecf20Sopenharmony_ci#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC
188c2ecf20Sopenharmony_ci#else
198c2ecf20Sopenharmony_ci#define LVS(VRT,RA,RB)		lvsr	VRT,RA,RB
208c2ecf20Sopenharmony_ci#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRB,VRA,VRC
218c2ecf20Sopenharmony_ci#endif
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_ci_GLOBAL(memcpy_power7)
248c2ecf20Sopenharmony_ci	cmpldi	r5,16
258c2ecf20Sopenharmony_ci	cmpldi	cr1,r5,4096
268c2ecf20Sopenharmony_ci	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
278c2ecf20Sopenharmony_ci	blt	.Lshort_copy
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci#ifdef CONFIG_ALTIVEC
308c2ecf20Sopenharmony_citest_feature = SELFTEST_CASE
318c2ecf20Sopenharmony_ciBEGIN_FTR_SECTION
328c2ecf20Sopenharmony_ci	bgt	cr1, .Lvmx_copy
338c2ecf20Sopenharmony_ciEND_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
348c2ecf20Sopenharmony_ci#endif
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci.Lnonvmx_copy:
378c2ecf20Sopenharmony_ci	/* Get the source 8B aligned */
388c2ecf20Sopenharmony_ci	neg	r6,r4
398c2ecf20Sopenharmony_ci	mtocrf	0x01,r6
408c2ecf20Sopenharmony_ci	clrldi	r6,r6,(64-3)
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci	bf	cr7*4+3,1f
438c2ecf20Sopenharmony_ci	lbz	r0,0(r4)
448c2ecf20Sopenharmony_ci	addi	r4,r4,1
458c2ecf20Sopenharmony_ci	stb	r0,0(r3)
468c2ecf20Sopenharmony_ci	addi	r3,r3,1
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci1:	bf	cr7*4+2,2f
498c2ecf20Sopenharmony_ci	lhz	r0,0(r4)
508c2ecf20Sopenharmony_ci	addi	r4,r4,2
518c2ecf20Sopenharmony_ci	sth	r0,0(r3)
528c2ecf20Sopenharmony_ci	addi	r3,r3,2
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci2:	bf	cr7*4+1,3f
558c2ecf20Sopenharmony_ci	lwz	r0,0(r4)
568c2ecf20Sopenharmony_ci	addi	r4,r4,4
578c2ecf20Sopenharmony_ci	stw	r0,0(r3)
588c2ecf20Sopenharmony_ci	addi	r3,r3,4
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci3:	sub	r5,r5,r6
618c2ecf20Sopenharmony_ci	cmpldi	r5,128
628c2ecf20Sopenharmony_ci	blt	5f
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	mflr	r0
658c2ecf20Sopenharmony_ci	stdu	r1,-STACKFRAMESIZE(r1)
668c2ecf20Sopenharmony_ci	std	r14,STK_REG(R14)(r1)
678c2ecf20Sopenharmony_ci	std	r15,STK_REG(R15)(r1)
688c2ecf20Sopenharmony_ci	std	r16,STK_REG(R16)(r1)
698c2ecf20Sopenharmony_ci	std	r17,STK_REG(R17)(r1)
708c2ecf20Sopenharmony_ci	std	r18,STK_REG(R18)(r1)
718c2ecf20Sopenharmony_ci	std	r19,STK_REG(R19)(r1)
728c2ecf20Sopenharmony_ci	std	r20,STK_REG(R20)(r1)
738c2ecf20Sopenharmony_ci	std	r21,STK_REG(R21)(r1)
748c2ecf20Sopenharmony_ci	std	r22,STK_REG(R22)(r1)
758c2ecf20Sopenharmony_ci	std	r0,STACKFRAMESIZE+16(r1)
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci	srdi	r6,r5,7
788c2ecf20Sopenharmony_ci	mtctr	r6
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	/* Now do cacheline (128B) sized loads and stores. */
818c2ecf20Sopenharmony_ci	.align	5
828c2ecf20Sopenharmony_ci4:
838c2ecf20Sopenharmony_ci	ld	r0,0(r4)
848c2ecf20Sopenharmony_ci	ld	r6,8(r4)
858c2ecf20Sopenharmony_ci	ld	r7,16(r4)
868c2ecf20Sopenharmony_ci	ld	r8,24(r4)
878c2ecf20Sopenharmony_ci	ld	r9,32(r4)
888c2ecf20Sopenharmony_ci	ld	r10,40(r4)
898c2ecf20Sopenharmony_ci	ld	r11,48(r4)
908c2ecf20Sopenharmony_ci	ld	r12,56(r4)
918c2ecf20Sopenharmony_ci	ld	r14,64(r4)
928c2ecf20Sopenharmony_ci	ld	r15,72(r4)
938c2ecf20Sopenharmony_ci	ld	r16,80(r4)
948c2ecf20Sopenharmony_ci	ld	r17,88(r4)
958c2ecf20Sopenharmony_ci	ld	r18,96(r4)
968c2ecf20Sopenharmony_ci	ld	r19,104(r4)
978c2ecf20Sopenharmony_ci	ld	r20,112(r4)
988c2ecf20Sopenharmony_ci	ld	r21,120(r4)
998c2ecf20Sopenharmony_ci	addi	r4,r4,128
1008c2ecf20Sopenharmony_ci	std	r0,0(r3)
1018c2ecf20Sopenharmony_ci	std	r6,8(r3)
1028c2ecf20Sopenharmony_ci	std	r7,16(r3)
1038c2ecf20Sopenharmony_ci	std	r8,24(r3)
1048c2ecf20Sopenharmony_ci	std	r9,32(r3)
1058c2ecf20Sopenharmony_ci	std	r10,40(r3)
1068c2ecf20Sopenharmony_ci	std	r11,48(r3)
1078c2ecf20Sopenharmony_ci	std	r12,56(r3)
1088c2ecf20Sopenharmony_ci	std	r14,64(r3)
1098c2ecf20Sopenharmony_ci	std	r15,72(r3)
1108c2ecf20Sopenharmony_ci	std	r16,80(r3)
1118c2ecf20Sopenharmony_ci	std	r17,88(r3)
1128c2ecf20Sopenharmony_ci	std	r18,96(r3)
1138c2ecf20Sopenharmony_ci	std	r19,104(r3)
1148c2ecf20Sopenharmony_ci	std	r20,112(r3)
1158c2ecf20Sopenharmony_ci	std	r21,120(r3)
1168c2ecf20Sopenharmony_ci	addi	r3,r3,128
1178c2ecf20Sopenharmony_ci	bdnz	4b
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_ci	clrldi	r5,r5,(64-7)
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci	ld	r14,STK_REG(R14)(r1)
1228c2ecf20Sopenharmony_ci	ld	r15,STK_REG(R15)(r1)
1238c2ecf20Sopenharmony_ci	ld	r16,STK_REG(R16)(r1)
1248c2ecf20Sopenharmony_ci	ld	r17,STK_REG(R17)(r1)
1258c2ecf20Sopenharmony_ci	ld	r18,STK_REG(R18)(r1)
1268c2ecf20Sopenharmony_ci	ld	r19,STK_REG(R19)(r1)
1278c2ecf20Sopenharmony_ci	ld	r20,STK_REG(R20)(r1)
1288c2ecf20Sopenharmony_ci	ld	r21,STK_REG(R21)(r1)
1298c2ecf20Sopenharmony_ci	ld	r22,STK_REG(R22)(r1)
1308c2ecf20Sopenharmony_ci	addi	r1,r1,STACKFRAMESIZE
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci	/* Up to 127B to go */
1338c2ecf20Sopenharmony_ci5:	srdi	r6,r5,4
1348c2ecf20Sopenharmony_ci	mtocrf	0x01,r6
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci6:	bf	cr7*4+1,7f
1378c2ecf20Sopenharmony_ci	ld	r0,0(r4)
1388c2ecf20Sopenharmony_ci	ld	r6,8(r4)
1398c2ecf20Sopenharmony_ci	ld	r7,16(r4)
1408c2ecf20Sopenharmony_ci	ld	r8,24(r4)
1418c2ecf20Sopenharmony_ci	ld	r9,32(r4)
1428c2ecf20Sopenharmony_ci	ld	r10,40(r4)
1438c2ecf20Sopenharmony_ci	ld	r11,48(r4)
1448c2ecf20Sopenharmony_ci	ld	r12,56(r4)
1458c2ecf20Sopenharmony_ci	addi	r4,r4,64
1468c2ecf20Sopenharmony_ci	std	r0,0(r3)
1478c2ecf20Sopenharmony_ci	std	r6,8(r3)
1488c2ecf20Sopenharmony_ci	std	r7,16(r3)
1498c2ecf20Sopenharmony_ci	std	r8,24(r3)
1508c2ecf20Sopenharmony_ci	std	r9,32(r3)
1518c2ecf20Sopenharmony_ci	std	r10,40(r3)
1528c2ecf20Sopenharmony_ci	std	r11,48(r3)
1538c2ecf20Sopenharmony_ci	std	r12,56(r3)
1548c2ecf20Sopenharmony_ci	addi	r3,r3,64
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci	/* Up to 63B to go */
1578c2ecf20Sopenharmony_ci7:	bf	cr7*4+2,8f
1588c2ecf20Sopenharmony_ci	ld	r0,0(r4)
1598c2ecf20Sopenharmony_ci	ld	r6,8(r4)
1608c2ecf20Sopenharmony_ci	ld	r7,16(r4)
1618c2ecf20Sopenharmony_ci	ld	r8,24(r4)
1628c2ecf20Sopenharmony_ci	addi	r4,r4,32
1638c2ecf20Sopenharmony_ci	std	r0,0(r3)
1648c2ecf20Sopenharmony_ci	std	r6,8(r3)
1658c2ecf20Sopenharmony_ci	std	r7,16(r3)
1668c2ecf20Sopenharmony_ci	std	r8,24(r3)
1678c2ecf20Sopenharmony_ci	addi	r3,r3,32
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_ci	/* Up to 31B to go */
1708c2ecf20Sopenharmony_ci8:	bf	cr7*4+3,9f
1718c2ecf20Sopenharmony_ci	ld	r0,0(r4)
1728c2ecf20Sopenharmony_ci	ld	r6,8(r4)
1738c2ecf20Sopenharmony_ci	addi	r4,r4,16
1748c2ecf20Sopenharmony_ci	std	r0,0(r3)
1758c2ecf20Sopenharmony_ci	std	r6,8(r3)
1768c2ecf20Sopenharmony_ci	addi	r3,r3,16
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci9:	clrldi	r5,r5,(64-4)
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci	/* Up to 15B to go */
1818c2ecf20Sopenharmony_ci.Lshort_copy:
1828c2ecf20Sopenharmony_ci	mtocrf	0x01,r5
1838c2ecf20Sopenharmony_ci	bf	cr7*4+0,12f
1848c2ecf20Sopenharmony_ci	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
1858c2ecf20Sopenharmony_ci	lwz	r6,4(r4)
1868c2ecf20Sopenharmony_ci	addi	r4,r4,8
1878c2ecf20Sopenharmony_ci	stw	r0,0(r3)
1888c2ecf20Sopenharmony_ci	stw	r6,4(r3)
1898c2ecf20Sopenharmony_ci	addi	r3,r3,8
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci12:	bf	cr7*4+1,13f
1928c2ecf20Sopenharmony_ci	lwz	r0,0(r4)
1938c2ecf20Sopenharmony_ci	addi	r4,r4,4
1948c2ecf20Sopenharmony_ci	stw	r0,0(r3)
1958c2ecf20Sopenharmony_ci	addi	r3,r3,4
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_ci13:	bf	cr7*4+2,14f
1988c2ecf20Sopenharmony_ci	lhz	r0,0(r4)
1998c2ecf20Sopenharmony_ci	addi	r4,r4,2
2008c2ecf20Sopenharmony_ci	sth	r0,0(r3)
2018c2ecf20Sopenharmony_ci	addi	r3,r3,2
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci14:	bf	cr7*4+3,15f
2048c2ecf20Sopenharmony_ci	lbz	r0,0(r4)
2058c2ecf20Sopenharmony_ci	stb	r0,0(r3)
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci15:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
2088c2ecf20Sopenharmony_ci	blr
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci.Lunwind_stack_nonvmx_copy:
2118c2ecf20Sopenharmony_ci	addi	r1,r1,STACKFRAMESIZE
2128c2ecf20Sopenharmony_ci	b	.Lnonvmx_copy
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci.Lvmx_copy:
2158c2ecf20Sopenharmony_ci#ifdef CONFIG_ALTIVEC
2168c2ecf20Sopenharmony_ci	mflr	r0
2178c2ecf20Sopenharmony_ci	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
2188c2ecf20Sopenharmony_ci	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
2198c2ecf20Sopenharmony_ci	std	r0,16(r1)
2208c2ecf20Sopenharmony_ci	stdu	r1,-STACKFRAMESIZE(r1)
2218c2ecf20Sopenharmony_ci	bl	enter_vmx_ops
2228c2ecf20Sopenharmony_ci	cmpwi	cr1,r3,0
2238c2ecf20Sopenharmony_ci	ld	r0,STACKFRAMESIZE+16(r1)
2248c2ecf20Sopenharmony_ci	ld	r3,STK_REG(R31)(r1)
2258c2ecf20Sopenharmony_ci	ld	r4,STK_REG(R30)(r1)
2268c2ecf20Sopenharmony_ci	ld	r5,STK_REG(R29)(r1)
2278c2ecf20Sopenharmony_ci	mtlr	r0
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_ci	/*
2308c2ecf20Sopenharmony_ci	 * We prefetch both the source and destination using enhanced touch
2318c2ecf20Sopenharmony_ci	 * instructions. We use a stream ID of 0 for the load side and
2328c2ecf20Sopenharmony_ci	 * 1 for the store side.
2338c2ecf20Sopenharmony_ci	 */
2348c2ecf20Sopenharmony_ci	clrrdi	r6,r4,7
2358c2ecf20Sopenharmony_ci	clrrdi	r9,r3,7
2368c2ecf20Sopenharmony_ci	ori	r9,r9,1		/* stream=1 */
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci	srdi	r7,r5,7		/* length in cachelines, capped at 0x3FF */
2398c2ecf20Sopenharmony_ci	cmpldi	r7,0x3FF
2408c2ecf20Sopenharmony_ci	ble	1f
2418c2ecf20Sopenharmony_ci	li	r7,0x3FF
2428c2ecf20Sopenharmony_ci1:	lis	r0,0x0E00	/* depth=7 */
2438c2ecf20Sopenharmony_ci	sldi	r7,r7,7
2448c2ecf20Sopenharmony_ci	or	r7,r7,r0
2458c2ecf20Sopenharmony_ci	ori	r10,r7,1	/* stream=1 */
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_ci	lis	r8,0x8000	/* GO=1 */
2488c2ecf20Sopenharmony_ci	clrldi	r8,r8,32
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci	dcbt	0,r6,0b01000
2518c2ecf20Sopenharmony_ci	dcbt	0,r7,0b01010
2528c2ecf20Sopenharmony_ci	dcbtst	0,r9,0b01000
2538c2ecf20Sopenharmony_ci	dcbtst	0,r10,0b01010
2548c2ecf20Sopenharmony_ci	eieio
2558c2ecf20Sopenharmony_ci	dcbt	0,r8,0b01010	/* GO */
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci	beq	cr1,.Lunwind_stack_nonvmx_copy
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci	/*
2608c2ecf20Sopenharmony_ci	 * If source and destination are not relatively aligned we use a
2618c2ecf20Sopenharmony_ci	 * slower permute loop.
2628c2ecf20Sopenharmony_ci	 */
2638c2ecf20Sopenharmony_ci	xor	r6,r4,r3
2648c2ecf20Sopenharmony_ci	rldicl.	r6,r6,0,(64-4)
2658c2ecf20Sopenharmony_ci	bne	.Lvmx_unaligned_copy
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ci	/* Get the destination 16B aligned */
2688c2ecf20Sopenharmony_ci	neg	r6,r3
2698c2ecf20Sopenharmony_ci	mtocrf	0x01,r6
2708c2ecf20Sopenharmony_ci	clrldi	r6,r6,(64-4)
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ci	bf	cr7*4+3,1f
2738c2ecf20Sopenharmony_ci	lbz	r0,0(r4)
2748c2ecf20Sopenharmony_ci	addi	r4,r4,1
2758c2ecf20Sopenharmony_ci	stb	r0,0(r3)
2768c2ecf20Sopenharmony_ci	addi	r3,r3,1
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_ci1:	bf	cr7*4+2,2f
2798c2ecf20Sopenharmony_ci	lhz	r0,0(r4)
2808c2ecf20Sopenharmony_ci	addi	r4,r4,2
2818c2ecf20Sopenharmony_ci	sth	r0,0(r3)
2828c2ecf20Sopenharmony_ci	addi	r3,r3,2
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci2:	bf	cr7*4+1,3f
2858c2ecf20Sopenharmony_ci	lwz	r0,0(r4)
2868c2ecf20Sopenharmony_ci	addi	r4,r4,4
2878c2ecf20Sopenharmony_ci	stw	r0,0(r3)
2888c2ecf20Sopenharmony_ci	addi	r3,r3,4
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_ci3:	bf	cr7*4+0,4f
2918c2ecf20Sopenharmony_ci	ld	r0,0(r4)
2928c2ecf20Sopenharmony_ci	addi	r4,r4,8
2938c2ecf20Sopenharmony_ci	std	r0,0(r3)
2948c2ecf20Sopenharmony_ci	addi	r3,r3,8
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_ci4:	sub	r5,r5,r6
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci	/* Get the desination 128B aligned */
2998c2ecf20Sopenharmony_ci	neg	r6,r3
3008c2ecf20Sopenharmony_ci	srdi	r7,r6,4
3018c2ecf20Sopenharmony_ci	mtocrf	0x01,r7
3028c2ecf20Sopenharmony_ci	clrldi	r6,r6,(64-7)
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci	li	r9,16
3058c2ecf20Sopenharmony_ci	li	r10,32
3068c2ecf20Sopenharmony_ci	li	r11,48
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_ci	bf	cr7*4+3,5f
3098c2ecf20Sopenharmony_ci	lvx	v1,0,r4
3108c2ecf20Sopenharmony_ci	addi	r4,r4,16
3118c2ecf20Sopenharmony_ci	stvx	v1,0,r3
3128c2ecf20Sopenharmony_ci	addi	r3,r3,16
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_ci5:	bf	cr7*4+2,6f
3158c2ecf20Sopenharmony_ci	lvx	v1,0,r4
3168c2ecf20Sopenharmony_ci	lvx	v0,r4,r9
3178c2ecf20Sopenharmony_ci	addi	r4,r4,32
3188c2ecf20Sopenharmony_ci	stvx	v1,0,r3
3198c2ecf20Sopenharmony_ci	stvx	v0,r3,r9
3208c2ecf20Sopenharmony_ci	addi	r3,r3,32
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_ci6:	bf	cr7*4+1,7f
3238c2ecf20Sopenharmony_ci	lvx	v3,0,r4
3248c2ecf20Sopenharmony_ci	lvx	v2,r4,r9
3258c2ecf20Sopenharmony_ci	lvx	v1,r4,r10
3268c2ecf20Sopenharmony_ci	lvx	v0,r4,r11
3278c2ecf20Sopenharmony_ci	addi	r4,r4,64
3288c2ecf20Sopenharmony_ci	stvx	v3,0,r3
3298c2ecf20Sopenharmony_ci	stvx	v2,r3,r9
3308c2ecf20Sopenharmony_ci	stvx	v1,r3,r10
3318c2ecf20Sopenharmony_ci	stvx	v0,r3,r11
3328c2ecf20Sopenharmony_ci	addi	r3,r3,64
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_ci7:	sub	r5,r5,r6
3358c2ecf20Sopenharmony_ci	srdi	r6,r5,7
3368c2ecf20Sopenharmony_ci
3378c2ecf20Sopenharmony_ci	std	r14,STK_REG(R14)(r1)
3388c2ecf20Sopenharmony_ci	std	r15,STK_REG(R15)(r1)
3398c2ecf20Sopenharmony_ci	std	r16,STK_REG(R16)(r1)
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci	li	r12,64
3428c2ecf20Sopenharmony_ci	li	r14,80
3438c2ecf20Sopenharmony_ci	li	r15,96
3448c2ecf20Sopenharmony_ci	li	r16,112
3458c2ecf20Sopenharmony_ci
3468c2ecf20Sopenharmony_ci	mtctr	r6
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ci	/*
3498c2ecf20Sopenharmony_ci	 * Now do cacheline sized loads and stores. By this stage the
3508c2ecf20Sopenharmony_ci	 * cacheline stores are also cacheline aligned.
3518c2ecf20Sopenharmony_ci	 */
3528c2ecf20Sopenharmony_ci	.align	5
3538c2ecf20Sopenharmony_ci8:
3548c2ecf20Sopenharmony_ci	lvx	v7,0,r4
3558c2ecf20Sopenharmony_ci	lvx	v6,r4,r9
3568c2ecf20Sopenharmony_ci	lvx	v5,r4,r10
3578c2ecf20Sopenharmony_ci	lvx	v4,r4,r11
3588c2ecf20Sopenharmony_ci	lvx	v3,r4,r12
3598c2ecf20Sopenharmony_ci	lvx	v2,r4,r14
3608c2ecf20Sopenharmony_ci	lvx	v1,r4,r15
3618c2ecf20Sopenharmony_ci	lvx	v0,r4,r16
3628c2ecf20Sopenharmony_ci	addi	r4,r4,128
3638c2ecf20Sopenharmony_ci	stvx	v7,0,r3
3648c2ecf20Sopenharmony_ci	stvx	v6,r3,r9
3658c2ecf20Sopenharmony_ci	stvx	v5,r3,r10
3668c2ecf20Sopenharmony_ci	stvx	v4,r3,r11
3678c2ecf20Sopenharmony_ci	stvx	v3,r3,r12
3688c2ecf20Sopenharmony_ci	stvx	v2,r3,r14
3698c2ecf20Sopenharmony_ci	stvx	v1,r3,r15
3708c2ecf20Sopenharmony_ci	stvx	v0,r3,r16
3718c2ecf20Sopenharmony_ci	addi	r3,r3,128
3728c2ecf20Sopenharmony_ci	bdnz	8b
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_ci	ld	r14,STK_REG(R14)(r1)
3758c2ecf20Sopenharmony_ci	ld	r15,STK_REG(R15)(r1)
3768c2ecf20Sopenharmony_ci	ld	r16,STK_REG(R16)(r1)
3778c2ecf20Sopenharmony_ci
3788c2ecf20Sopenharmony_ci	/* Up to 127B to go */
3798c2ecf20Sopenharmony_ci	clrldi	r5,r5,(64-7)
3808c2ecf20Sopenharmony_ci	srdi	r6,r5,4
3818c2ecf20Sopenharmony_ci	mtocrf	0x01,r6
3828c2ecf20Sopenharmony_ci
3838c2ecf20Sopenharmony_ci	bf	cr7*4+1,9f
3848c2ecf20Sopenharmony_ci	lvx	v3,0,r4
3858c2ecf20Sopenharmony_ci	lvx	v2,r4,r9
3868c2ecf20Sopenharmony_ci	lvx	v1,r4,r10
3878c2ecf20Sopenharmony_ci	lvx	v0,r4,r11
3888c2ecf20Sopenharmony_ci	addi	r4,r4,64
3898c2ecf20Sopenharmony_ci	stvx	v3,0,r3
3908c2ecf20Sopenharmony_ci	stvx	v2,r3,r9
3918c2ecf20Sopenharmony_ci	stvx	v1,r3,r10
3928c2ecf20Sopenharmony_ci	stvx	v0,r3,r11
3938c2ecf20Sopenharmony_ci	addi	r3,r3,64
3948c2ecf20Sopenharmony_ci
3958c2ecf20Sopenharmony_ci9:	bf	cr7*4+2,10f
3968c2ecf20Sopenharmony_ci	lvx	v1,0,r4
3978c2ecf20Sopenharmony_ci	lvx	v0,r4,r9
3988c2ecf20Sopenharmony_ci	addi	r4,r4,32
3998c2ecf20Sopenharmony_ci	stvx	v1,0,r3
4008c2ecf20Sopenharmony_ci	stvx	v0,r3,r9
4018c2ecf20Sopenharmony_ci	addi	r3,r3,32
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_ci10:	bf	cr7*4+3,11f
4048c2ecf20Sopenharmony_ci	lvx	v1,0,r4
4058c2ecf20Sopenharmony_ci	addi	r4,r4,16
4068c2ecf20Sopenharmony_ci	stvx	v1,0,r3
4078c2ecf20Sopenharmony_ci	addi	r3,r3,16
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	/* Up to 15B to go */
4108c2ecf20Sopenharmony_ci11:	clrldi	r5,r5,(64-4)
4118c2ecf20Sopenharmony_ci	mtocrf	0x01,r5
4128c2ecf20Sopenharmony_ci	bf	cr7*4+0,12f
4138c2ecf20Sopenharmony_ci	ld	r0,0(r4)
4148c2ecf20Sopenharmony_ci	addi	r4,r4,8
4158c2ecf20Sopenharmony_ci	std	r0,0(r3)
4168c2ecf20Sopenharmony_ci	addi	r3,r3,8
4178c2ecf20Sopenharmony_ci
4188c2ecf20Sopenharmony_ci12:	bf	cr7*4+1,13f
4198c2ecf20Sopenharmony_ci	lwz	r0,0(r4)
4208c2ecf20Sopenharmony_ci	addi	r4,r4,4
4218c2ecf20Sopenharmony_ci	stw	r0,0(r3)
4228c2ecf20Sopenharmony_ci	addi	r3,r3,4
4238c2ecf20Sopenharmony_ci
4248c2ecf20Sopenharmony_ci13:	bf	cr7*4+2,14f
4258c2ecf20Sopenharmony_ci	lhz	r0,0(r4)
4268c2ecf20Sopenharmony_ci	addi	r4,r4,2
4278c2ecf20Sopenharmony_ci	sth	r0,0(r3)
4288c2ecf20Sopenharmony_ci	addi	r3,r3,2
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci14:	bf	cr7*4+3,15f
4318c2ecf20Sopenharmony_ci	lbz	r0,0(r4)
4328c2ecf20Sopenharmony_ci	stb	r0,0(r3)
4338c2ecf20Sopenharmony_ci
4348c2ecf20Sopenharmony_ci15:	addi	r1,r1,STACKFRAMESIZE
4358c2ecf20Sopenharmony_ci	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
4368c2ecf20Sopenharmony_ci	b	exit_vmx_ops		/* tail call optimise */
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_ci.Lvmx_unaligned_copy:
4398c2ecf20Sopenharmony_ci	/* Get the destination 16B aligned */
4408c2ecf20Sopenharmony_ci	neg	r6,r3
4418c2ecf20Sopenharmony_ci	mtocrf	0x01,r6
4428c2ecf20Sopenharmony_ci	clrldi	r6,r6,(64-4)
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_ci	bf	cr7*4+3,1f
4458c2ecf20Sopenharmony_ci	lbz	r0,0(r4)
4468c2ecf20Sopenharmony_ci	addi	r4,r4,1
4478c2ecf20Sopenharmony_ci	stb	r0,0(r3)
4488c2ecf20Sopenharmony_ci	addi	r3,r3,1
4498c2ecf20Sopenharmony_ci
4508c2ecf20Sopenharmony_ci1:	bf	cr7*4+2,2f
4518c2ecf20Sopenharmony_ci	lhz	r0,0(r4)
4528c2ecf20Sopenharmony_ci	addi	r4,r4,2
4538c2ecf20Sopenharmony_ci	sth	r0,0(r3)
4548c2ecf20Sopenharmony_ci	addi	r3,r3,2
4558c2ecf20Sopenharmony_ci
4568c2ecf20Sopenharmony_ci2:	bf	cr7*4+1,3f
4578c2ecf20Sopenharmony_ci	lwz	r0,0(r4)
4588c2ecf20Sopenharmony_ci	addi	r4,r4,4
4598c2ecf20Sopenharmony_ci	stw	r0,0(r3)
4608c2ecf20Sopenharmony_ci	addi	r3,r3,4
4618c2ecf20Sopenharmony_ci
4628c2ecf20Sopenharmony_ci3:	bf	cr7*4+0,4f
4638c2ecf20Sopenharmony_ci	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
4648c2ecf20Sopenharmony_ci	lwz	r7,4(r4)
4658c2ecf20Sopenharmony_ci	addi	r4,r4,8
4668c2ecf20Sopenharmony_ci	stw	r0,0(r3)
4678c2ecf20Sopenharmony_ci	stw	r7,4(r3)
4688c2ecf20Sopenharmony_ci	addi	r3,r3,8
4698c2ecf20Sopenharmony_ci
4708c2ecf20Sopenharmony_ci4:	sub	r5,r5,r6
4718c2ecf20Sopenharmony_ci
4728c2ecf20Sopenharmony_ci	/* Get the desination 128B aligned */
4738c2ecf20Sopenharmony_ci	neg	r6,r3
4748c2ecf20Sopenharmony_ci	srdi	r7,r6,4
4758c2ecf20Sopenharmony_ci	mtocrf	0x01,r7
4768c2ecf20Sopenharmony_ci	clrldi	r6,r6,(64-7)
4778c2ecf20Sopenharmony_ci
4788c2ecf20Sopenharmony_ci	li	r9,16
4798c2ecf20Sopenharmony_ci	li	r10,32
4808c2ecf20Sopenharmony_ci	li	r11,48
4818c2ecf20Sopenharmony_ci
4828c2ecf20Sopenharmony_ci	LVS(v16,0,r4)		/* Setup permute control vector */
4838c2ecf20Sopenharmony_ci	lvx	v0,0,r4
4848c2ecf20Sopenharmony_ci	addi	r4,r4,16
4858c2ecf20Sopenharmony_ci
4868c2ecf20Sopenharmony_ci	bf	cr7*4+3,5f
4878c2ecf20Sopenharmony_ci	lvx	v1,0,r4
4888c2ecf20Sopenharmony_ci	VPERM(v8,v0,v1,v16)
4898c2ecf20Sopenharmony_ci	addi	r4,r4,16
4908c2ecf20Sopenharmony_ci	stvx	v8,0,r3
4918c2ecf20Sopenharmony_ci	addi	r3,r3,16
4928c2ecf20Sopenharmony_ci	vor	v0,v1,v1
4938c2ecf20Sopenharmony_ci
4948c2ecf20Sopenharmony_ci5:	bf	cr7*4+2,6f
4958c2ecf20Sopenharmony_ci	lvx	v1,0,r4
4968c2ecf20Sopenharmony_ci	VPERM(v8,v0,v1,v16)
4978c2ecf20Sopenharmony_ci	lvx	v0,r4,r9
4988c2ecf20Sopenharmony_ci	VPERM(v9,v1,v0,v16)
4998c2ecf20Sopenharmony_ci	addi	r4,r4,32
5008c2ecf20Sopenharmony_ci	stvx	v8,0,r3
5018c2ecf20Sopenharmony_ci	stvx	v9,r3,r9
5028c2ecf20Sopenharmony_ci	addi	r3,r3,32
5038c2ecf20Sopenharmony_ci
5048c2ecf20Sopenharmony_ci6:	bf	cr7*4+1,7f
5058c2ecf20Sopenharmony_ci	lvx	v3,0,r4
5068c2ecf20Sopenharmony_ci	VPERM(v8,v0,v3,v16)
5078c2ecf20Sopenharmony_ci	lvx	v2,r4,r9
5088c2ecf20Sopenharmony_ci	VPERM(v9,v3,v2,v16)
5098c2ecf20Sopenharmony_ci	lvx	v1,r4,r10
5108c2ecf20Sopenharmony_ci	VPERM(v10,v2,v1,v16)
5118c2ecf20Sopenharmony_ci	lvx	v0,r4,r11
5128c2ecf20Sopenharmony_ci	VPERM(v11,v1,v0,v16)
5138c2ecf20Sopenharmony_ci	addi	r4,r4,64
5148c2ecf20Sopenharmony_ci	stvx	v8,0,r3
5158c2ecf20Sopenharmony_ci	stvx	v9,r3,r9
5168c2ecf20Sopenharmony_ci	stvx	v10,r3,r10
5178c2ecf20Sopenharmony_ci	stvx	v11,r3,r11
5188c2ecf20Sopenharmony_ci	addi	r3,r3,64
5198c2ecf20Sopenharmony_ci
5208c2ecf20Sopenharmony_ci7:	sub	r5,r5,r6
5218c2ecf20Sopenharmony_ci	srdi	r6,r5,7
5228c2ecf20Sopenharmony_ci
5238c2ecf20Sopenharmony_ci	std	r14,STK_REG(R14)(r1)
5248c2ecf20Sopenharmony_ci	std	r15,STK_REG(R15)(r1)
5258c2ecf20Sopenharmony_ci	std	r16,STK_REG(R16)(r1)
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci	li	r12,64
5288c2ecf20Sopenharmony_ci	li	r14,80
5298c2ecf20Sopenharmony_ci	li	r15,96
5308c2ecf20Sopenharmony_ci	li	r16,112
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ci	mtctr	r6
5338c2ecf20Sopenharmony_ci
5348c2ecf20Sopenharmony_ci	/*
5358c2ecf20Sopenharmony_ci	 * Now do cacheline sized loads and stores. By this stage the
5368c2ecf20Sopenharmony_ci	 * cacheline stores are also cacheline aligned.
5378c2ecf20Sopenharmony_ci	 */
5388c2ecf20Sopenharmony_ci	.align	5
5398c2ecf20Sopenharmony_ci8:
5408c2ecf20Sopenharmony_ci	lvx	v7,0,r4
5418c2ecf20Sopenharmony_ci	VPERM(v8,v0,v7,v16)
5428c2ecf20Sopenharmony_ci	lvx	v6,r4,r9
5438c2ecf20Sopenharmony_ci	VPERM(v9,v7,v6,v16)
5448c2ecf20Sopenharmony_ci	lvx	v5,r4,r10
5458c2ecf20Sopenharmony_ci	VPERM(v10,v6,v5,v16)
5468c2ecf20Sopenharmony_ci	lvx	v4,r4,r11
5478c2ecf20Sopenharmony_ci	VPERM(v11,v5,v4,v16)
5488c2ecf20Sopenharmony_ci	lvx	v3,r4,r12
5498c2ecf20Sopenharmony_ci	VPERM(v12,v4,v3,v16)
5508c2ecf20Sopenharmony_ci	lvx	v2,r4,r14
5518c2ecf20Sopenharmony_ci	VPERM(v13,v3,v2,v16)
5528c2ecf20Sopenharmony_ci	lvx	v1,r4,r15
5538c2ecf20Sopenharmony_ci	VPERM(v14,v2,v1,v16)
5548c2ecf20Sopenharmony_ci	lvx	v0,r4,r16
5558c2ecf20Sopenharmony_ci	VPERM(v15,v1,v0,v16)
5568c2ecf20Sopenharmony_ci	addi	r4,r4,128
5578c2ecf20Sopenharmony_ci	stvx	v8,0,r3
5588c2ecf20Sopenharmony_ci	stvx	v9,r3,r9
5598c2ecf20Sopenharmony_ci	stvx	v10,r3,r10
5608c2ecf20Sopenharmony_ci	stvx	v11,r3,r11
5618c2ecf20Sopenharmony_ci	stvx	v12,r3,r12
5628c2ecf20Sopenharmony_ci	stvx	v13,r3,r14
5638c2ecf20Sopenharmony_ci	stvx	v14,r3,r15
5648c2ecf20Sopenharmony_ci	stvx	v15,r3,r16
5658c2ecf20Sopenharmony_ci	addi	r3,r3,128
5668c2ecf20Sopenharmony_ci	bdnz	8b
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_ci	ld	r14,STK_REG(R14)(r1)
5698c2ecf20Sopenharmony_ci	ld	r15,STK_REG(R15)(r1)
5708c2ecf20Sopenharmony_ci	ld	r16,STK_REG(R16)(r1)
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_ci	/* Up to 127B to go */
5738c2ecf20Sopenharmony_ci	clrldi	r5,r5,(64-7)
5748c2ecf20Sopenharmony_ci	srdi	r6,r5,4
5758c2ecf20Sopenharmony_ci	mtocrf	0x01,r6
5768c2ecf20Sopenharmony_ci
5778c2ecf20Sopenharmony_ci	bf	cr7*4+1,9f
5788c2ecf20Sopenharmony_ci	lvx	v3,0,r4
5798c2ecf20Sopenharmony_ci	VPERM(v8,v0,v3,v16)
5808c2ecf20Sopenharmony_ci	lvx	v2,r4,r9
5818c2ecf20Sopenharmony_ci	VPERM(v9,v3,v2,v16)
5828c2ecf20Sopenharmony_ci	lvx	v1,r4,r10
5838c2ecf20Sopenharmony_ci	VPERM(v10,v2,v1,v16)
5848c2ecf20Sopenharmony_ci	lvx	v0,r4,r11
5858c2ecf20Sopenharmony_ci	VPERM(v11,v1,v0,v16)
5868c2ecf20Sopenharmony_ci	addi	r4,r4,64
5878c2ecf20Sopenharmony_ci	stvx	v8,0,r3
5888c2ecf20Sopenharmony_ci	stvx	v9,r3,r9
5898c2ecf20Sopenharmony_ci	stvx	v10,r3,r10
5908c2ecf20Sopenharmony_ci	stvx	v11,r3,r11
5918c2ecf20Sopenharmony_ci	addi	r3,r3,64
5928c2ecf20Sopenharmony_ci
5938c2ecf20Sopenharmony_ci9:	bf	cr7*4+2,10f
5948c2ecf20Sopenharmony_ci	lvx	v1,0,r4
5958c2ecf20Sopenharmony_ci	VPERM(v8,v0,v1,v16)
5968c2ecf20Sopenharmony_ci	lvx	v0,r4,r9
5978c2ecf20Sopenharmony_ci	VPERM(v9,v1,v0,v16)
5988c2ecf20Sopenharmony_ci	addi	r4,r4,32
5998c2ecf20Sopenharmony_ci	stvx	v8,0,r3
6008c2ecf20Sopenharmony_ci	stvx	v9,r3,r9
6018c2ecf20Sopenharmony_ci	addi	r3,r3,32
6028c2ecf20Sopenharmony_ci
6038c2ecf20Sopenharmony_ci10:	bf	cr7*4+3,11f
6048c2ecf20Sopenharmony_ci	lvx	v1,0,r4
6058c2ecf20Sopenharmony_ci	VPERM(v8,v0,v1,v16)
6068c2ecf20Sopenharmony_ci	addi	r4,r4,16
6078c2ecf20Sopenharmony_ci	stvx	v8,0,r3
6088c2ecf20Sopenharmony_ci	addi	r3,r3,16
6098c2ecf20Sopenharmony_ci
6108c2ecf20Sopenharmony_ci	/* Up to 15B to go */
6118c2ecf20Sopenharmony_ci11:	clrldi	r5,r5,(64-4)
6128c2ecf20Sopenharmony_ci	addi	r4,r4,-16	/* Unwind the +16 load offset */
6138c2ecf20Sopenharmony_ci	mtocrf	0x01,r5
6148c2ecf20Sopenharmony_ci	bf	cr7*4+0,12f
6158c2ecf20Sopenharmony_ci	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
6168c2ecf20Sopenharmony_ci	lwz	r6,4(r4)
6178c2ecf20Sopenharmony_ci	addi	r4,r4,8
6188c2ecf20Sopenharmony_ci	stw	r0,0(r3)
6198c2ecf20Sopenharmony_ci	stw	r6,4(r3)
6208c2ecf20Sopenharmony_ci	addi	r3,r3,8
6218c2ecf20Sopenharmony_ci
6228c2ecf20Sopenharmony_ci12:	bf	cr7*4+1,13f
6238c2ecf20Sopenharmony_ci	lwz	r0,0(r4)
6248c2ecf20Sopenharmony_ci	addi	r4,r4,4
6258c2ecf20Sopenharmony_ci	stw	r0,0(r3)
6268c2ecf20Sopenharmony_ci	addi	r3,r3,4
6278c2ecf20Sopenharmony_ci
6288c2ecf20Sopenharmony_ci13:	bf	cr7*4+2,14f
6298c2ecf20Sopenharmony_ci	lhz	r0,0(r4)
6308c2ecf20Sopenharmony_ci	addi	r4,r4,2
6318c2ecf20Sopenharmony_ci	sth	r0,0(r3)
6328c2ecf20Sopenharmony_ci	addi	r3,r3,2
6338c2ecf20Sopenharmony_ci
6348c2ecf20Sopenharmony_ci14:	bf	cr7*4+3,15f
6358c2ecf20Sopenharmony_ci	lbz	r0,0(r4)
6368c2ecf20Sopenharmony_ci	stb	r0,0(r3)
6378c2ecf20Sopenharmony_ci
6388c2ecf20Sopenharmony_ci15:	addi	r1,r1,STACKFRAMESIZE
6398c2ecf20Sopenharmony_ci	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
6408c2ecf20Sopenharmony_ci	b	exit_vmx_ops		/* tail call optimise */
6418c2ecf20Sopenharmony_ci#endif /* CONFIG_ALTIVEC */
642