18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * Copyright (C) IBM Corporation, 2011
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Author: Anton Blanchard <anton@au.ibm.com>
78c2ecf20Sopenharmony_ci */
88c2ecf20Sopenharmony_ci#include <asm/ppc_asm.h>
98c2ecf20Sopenharmony_ci
108c2ecf20Sopenharmony_ci#ifndef SELFTEST_CASE
118c2ecf20Sopenharmony_ci/* 0 == don't use VMX, 1 == use VMX */
128c2ecf20Sopenharmony_ci#define SELFTEST_CASE	0
138c2ecf20Sopenharmony_ci#endif
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN__
168c2ecf20Sopenharmony_ci#define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB
178c2ecf20Sopenharmony_ci#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC
188c2ecf20Sopenharmony_ci#else
198c2ecf20Sopenharmony_ci#define LVS(VRT,RA,RB)		lvsr	VRT,RA,RB
208c2ecf20Sopenharmony_ci#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRB,VRA,VRC
218c2ecf20Sopenharmony_ci#endif
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_ci	.macro err1
248c2ecf20Sopenharmony_ci100:
258c2ecf20Sopenharmony_ci	EX_TABLE(100b,.Ldo_err1)
268c2ecf20Sopenharmony_ci	.endm
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci	.macro err2
298c2ecf20Sopenharmony_ci200:
308c2ecf20Sopenharmony_ci	EX_TABLE(200b,.Ldo_err2)
318c2ecf20Sopenharmony_ci	.endm
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci#ifdef CONFIG_ALTIVEC
348c2ecf20Sopenharmony_ci	.macro err3
358c2ecf20Sopenharmony_ci300:
368c2ecf20Sopenharmony_ci	EX_TABLE(300b,.Ldo_err3)
378c2ecf20Sopenharmony_ci	.endm
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci	.macro err4
408c2ecf20Sopenharmony_ci400:
418c2ecf20Sopenharmony_ci	EX_TABLE(400b,.Ldo_err4)
428c2ecf20Sopenharmony_ci	.endm
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci.Ldo_err4:
468c2ecf20Sopenharmony_ci	ld	r16,STK_REG(R16)(r1)
478c2ecf20Sopenharmony_ci	ld	r15,STK_REG(R15)(r1)
488c2ecf20Sopenharmony_ci	ld	r14,STK_REG(R14)(r1)
498c2ecf20Sopenharmony_ci.Ldo_err3:
508c2ecf20Sopenharmony_ci	bl	exit_vmx_usercopy
518c2ecf20Sopenharmony_ci	ld	r0,STACKFRAMESIZE+16(r1)
528c2ecf20Sopenharmony_ci	mtlr	r0
538c2ecf20Sopenharmony_ci	b	.Lexit
548c2ecf20Sopenharmony_ci#endif /* CONFIG_ALTIVEC */
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci.Ldo_err2:
578c2ecf20Sopenharmony_ci	ld	r22,STK_REG(R22)(r1)
588c2ecf20Sopenharmony_ci	ld	r21,STK_REG(R21)(r1)
598c2ecf20Sopenharmony_ci	ld	r20,STK_REG(R20)(r1)
608c2ecf20Sopenharmony_ci	ld	r19,STK_REG(R19)(r1)
618c2ecf20Sopenharmony_ci	ld	r18,STK_REG(R18)(r1)
628c2ecf20Sopenharmony_ci	ld	r17,STK_REG(R17)(r1)
638c2ecf20Sopenharmony_ci	ld	r16,STK_REG(R16)(r1)
648c2ecf20Sopenharmony_ci	ld	r15,STK_REG(R15)(r1)
658c2ecf20Sopenharmony_ci	ld	r14,STK_REG(R14)(r1)
668c2ecf20Sopenharmony_ci.Lexit:
678c2ecf20Sopenharmony_ci	addi	r1,r1,STACKFRAMESIZE
688c2ecf20Sopenharmony_ci.Ldo_err1:
698c2ecf20Sopenharmony_ci	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
708c2ecf20Sopenharmony_ci	ld	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
718c2ecf20Sopenharmony_ci	ld	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
728c2ecf20Sopenharmony_ci	b	__copy_tofrom_user_base
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_ci_GLOBAL(__copy_tofrom_user_power7)
768c2ecf20Sopenharmony_ci	cmpldi	r5,16
778c2ecf20Sopenharmony_ci	cmpldi	cr1,r5,3328
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
808c2ecf20Sopenharmony_ci	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
818c2ecf20Sopenharmony_ci	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_ci	blt	.Lshort_copy
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci#ifdef CONFIG_ALTIVEC
868c2ecf20Sopenharmony_citest_feature = SELFTEST_CASE
878c2ecf20Sopenharmony_ciBEGIN_FTR_SECTION
888c2ecf20Sopenharmony_ci	bgt	cr1,.Lvmx_copy
898c2ecf20Sopenharmony_ciEND_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
908c2ecf20Sopenharmony_ci#endif
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci.Lnonvmx_copy:
938c2ecf20Sopenharmony_ci	/* Get the source 8B aligned */
948c2ecf20Sopenharmony_ci	neg	r6,r4
958c2ecf20Sopenharmony_ci	mtocrf	0x01,r6
968c2ecf20Sopenharmony_ci	clrldi	r6,r6,(64-3)
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci	bf	cr7*4+3,1f
998c2ecf20Sopenharmony_cierr1;	lbz	r0,0(r4)
1008c2ecf20Sopenharmony_ci	addi	r4,r4,1
1018c2ecf20Sopenharmony_cierr1;	stb	r0,0(r3)
1028c2ecf20Sopenharmony_ci	addi	r3,r3,1
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci1:	bf	cr7*4+2,2f
1058c2ecf20Sopenharmony_cierr1;	lhz	r0,0(r4)
1068c2ecf20Sopenharmony_ci	addi	r4,r4,2
1078c2ecf20Sopenharmony_cierr1;	sth	r0,0(r3)
1088c2ecf20Sopenharmony_ci	addi	r3,r3,2
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci2:	bf	cr7*4+1,3f
1118c2ecf20Sopenharmony_cierr1;	lwz	r0,0(r4)
1128c2ecf20Sopenharmony_ci	addi	r4,r4,4
1138c2ecf20Sopenharmony_cierr1;	stw	r0,0(r3)
1148c2ecf20Sopenharmony_ci	addi	r3,r3,4
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci3:	sub	r5,r5,r6
1178c2ecf20Sopenharmony_ci	cmpldi	r5,128
1188c2ecf20Sopenharmony_ci	blt	5f
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	mflr	r0
1218c2ecf20Sopenharmony_ci	stdu	r1,-STACKFRAMESIZE(r1)
1228c2ecf20Sopenharmony_ci	std	r14,STK_REG(R14)(r1)
1238c2ecf20Sopenharmony_ci	std	r15,STK_REG(R15)(r1)
1248c2ecf20Sopenharmony_ci	std	r16,STK_REG(R16)(r1)
1258c2ecf20Sopenharmony_ci	std	r17,STK_REG(R17)(r1)
1268c2ecf20Sopenharmony_ci	std	r18,STK_REG(R18)(r1)
1278c2ecf20Sopenharmony_ci	std	r19,STK_REG(R19)(r1)
1288c2ecf20Sopenharmony_ci	std	r20,STK_REG(R20)(r1)
1298c2ecf20Sopenharmony_ci	std	r21,STK_REG(R21)(r1)
1308c2ecf20Sopenharmony_ci	std	r22,STK_REG(R22)(r1)
1318c2ecf20Sopenharmony_ci	std	r0,STACKFRAMESIZE+16(r1)
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_ci	srdi	r6,r5,7
1348c2ecf20Sopenharmony_ci	mtctr	r6
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci	/* Now do cacheline (128B) sized loads and stores. */
1378c2ecf20Sopenharmony_ci	.align	5
1388c2ecf20Sopenharmony_ci4:
1398c2ecf20Sopenharmony_cierr2;	ld	r0,0(r4)
1408c2ecf20Sopenharmony_cierr2;	ld	r6,8(r4)
1418c2ecf20Sopenharmony_cierr2;	ld	r7,16(r4)
1428c2ecf20Sopenharmony_cierr2;	ld	r8,24(r4)
1438c2ecf20Sopenharmony_cierr2;	ld	r9,32(r4)
1448c2ecf20Sopenharmony_cierr2;	ld	r10,40(r4)
1458c2ecf20Sopenharmony_cierr2;	ld	r11,48(r4)
1468c2ecf20Sopenharmony_cierr2;	ld	r12,56(r4)
1478c2ecf20Sopenharmony_cierr2;	ld	r14,64(r4)
1488c2ecf20Sopenharmony_cierr2;	ld	r15,72(r4)
1498c2ecf20Sopenharmony_cierr2;	ld	r16,80(r4)
1508c2ecf20Sopenharmony_cierr2;	ld	r17,88(r4)
1518c2ecf20Sopenharmony_cierr2;	ld	r18,96(r4)
1528c2ecf20Sopenharmony_cierr2;	ld	r19,104(r4)
1538c2ecf20Sopenharmony_cierr2;	ld	r20,112(r4)
1548c2ecf20Sopenharmony_cierr2;	ld	r21,120(r4)
1558c2ecf20Sopenharmony_ci	addi	r4,r4,128
1568c2ecf20Sopenharmony_cierr2;	std	r0,0(r3)
1578c2ecf20Sopenharmony_cierr2;	std	r6,8(r3)
1588c2ecf20Sopenharmony_cierr2;	std	r7,16(r3)
1598c2ecf20Sopenharmony_cierr2;	std	r8,24(r3)
1608c2ecf20Sopenharmony_cierr2;	std	r9,32(r3)
1618c2ecf20Sopenharmony_cierr2;	std	r10,40(r3)
1628c2ecf20Sopenharmony_cierr2;	std	r11,48(r3)
1638c2ecf20Sopenharmony_cierr2;	std	r12,56(r3)
1648c2ecf20Sopenharmony_cierr2;	std	r14,64(r3)
1658c2ecf20Sopenharmony_cierr2;	std	r15,72(r3)
1668c2ecf20Sopenharmony_cierr2;	std	r16,80(r3)
1678c2ecf20Sopenharmony_cierr2;	std	r17,88(r3)
1688c2ecf20Sopenharmony_cierr2;	std	r18,96(r3)
1698c2ecf20Sopenharmony_cierr2;	std	r19,104(r3)
1708c2ecf20Sopenharmony_cierr2;	std	r20,112(r3)
1718c2ecf20Sopenharmony_cierr2;	std	r21,120(r3)
1728c2ecf20Sopenharmony_ci	addi	r3,r3,128
1738c2ecf20Sopenharmony_ci	bdnz	4b
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	clrldi	r5,r5,(64-7)
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_ci	ld	r14,STK_REG(R14)(r1)
1788c2ecf20Sopenharmony_ci	ld	r15,STK_REG(R15)(r1)
1798c2ecf20Sopenharmony_ci	ld	r16,STK_REG(R16)(r1)
1808c2ecf20Sopenharmony_ci	ld	r17,STK_REG(R17)(r1)
1818c2ecf20Sopenharmony_ci	ld	r18,STK_REG(R18)(r1)
1828c2ecf20Sopenharmony_ci	ld	r19,STK_REG(R19)(r1)
1838c2ecf20Sopenharmony_ci	ld	r20,STK_REG(R20)(r1)
1848c2ecf20Sopenharmony_ci	ld	r21,STK_REG(R21)(r1)
1858c2ecf20Sopenharmony_ci	ld	r22,STK_REG(R22)(r1)
1868c2ecf20Sopenharmony_ci	addi	r1,r1,STACKFRAMESIZE
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci	/* Up to 127B to go */
1898c2ecf20Sopenharmony_ci5:	srdi	r6,r5,4
1908c2ecf20Sopenharmony_ci	mtocrf	0x01,r6
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci6:	bf	cr7*4+1,7f
1938c2ecf20Sopenharmony_cierr1;	ld	r0,0(r4)
1948c2ecf20Sopenharmony_cierr1;	ld	r6,8(r4)
1958c2ecf20Sopenharmony_cierr1;	ld	r7,16(r4)
1968c2ecf20Sopenharmony_cierr1;	ld	r8,24(r4)
1978c2ecf20Sopenharmony_cierr1;	ld	r9,32(r4)
1988c2ecf20Sopenharmony_cierr1;	ld	r10,40(r4)
1998c2ecf20Sopenharmony_cierr1;	ld	r11,48(r4)
2008c2ecf20Sopenharmony_cierr1;	ld	r12,56(r4)
2018c2ecf20Sopenharmony_ci	addi	r4,r4,64
2028c2ecf20Sopenharmony_cierr1;	std	r0,0(r3)
2038c2ecf20Sopenharmony_cierr1;	std	r6,8(r3)
2048c2ecf20Sopenharmony_cierr1;	std	r7,16(r3)
2058c2ecf20Sopenharmony_cierr1;	std	r8,24(r3)
2068c2ecf20Sopenharmony_cierr1;	std	r9,32(r3)
2078c2ecf20Sopenharmony_cierr1;	std	r10,40(r3)
2088c2ecf20Sopenharmony_cierr1;	std	r11,48(r3)
2098c2ecf20Sopenharmony_cierr1;	std	r12,56(r3)
2108c2ecf20Sopenharmony_ci	addi	r3,r3,64
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_ci	/* Up to 63B to go */
2138c2ecf20Sopenharmony_ci7:	bf	cr7*4+2,8f
2148c2ecf20Sopenharmony_cierr1;	ld	r0,0(r4)
2158c2ecf20Sopenharmony_cierr1;	ld	r6,8(r4)
2168c2ecf20Sopenharmony_cierr1;	ld	r7,16(r4)
2178c2ecf20Sopenharmony_cierr1;	ld	r8,24(r4)
2188c2ecf20Sopenharmony_ci	addi	r4,r4,32
2198c2ecf20Sopenharmony_cierr1;	std	r0,0(r3)
2208c2ecf20Sopenharmony_cierr1;	std	r6,8(r3)
2218c2ecf20Sopenharmony_cierr1;	std	r7,16(r3)
2228c2ecf20Sopenharmony_cierr1;	std	r8,24(r3)
2238c2ecf20Sopenharmony_ci	addi	r3,r3,32
2248c2ecf20Sopenharmony_ci
2258c2ecf20Sopenharmony_ci	/* Up to 31B to go */
2268c2ecf20Sopenharmony_ci8:	bf	cr7*4+3,9f
2278c2ecf20Sopenharmony_cierr1;	ld	r0,0(r4)
2288c2ecf20Sopenharmony_cierr1;	ld	r6,8(r4)
2298c2ecf20Sopenharmony_ci	addi	r4,r4,16
2308c2ecf20Sopenharmony_cierr1;	std	r0,0(r3)
2318c2ecf20Sopenharmony_cierr1;	std	r6,8(r3)
2328c2ecf20Sopenharmony_ci	addi	r3,r3,16
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_ci9:	clrldi	r5,r5,(64-4)
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ci	/* Up to 15B to go */
2378c2ecf20Sopenharmony_ci.Lshort_copy:
2388c2ecf20Sopenharmony_ci	mtocrf	0x01,r5
2398c2ecf20Sopenharmony_ci	bf	cr7*4+0,12f
2408c2ecf20Sopenharmony_cierr1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
2418c2ecf20Sopenharmony_cierr1;	lwz	r6,4(r4)
2428c2ecf20Sopenharmony_ci	addi	r4,r4,8
2438c2ecf20Sopenharmony_cierr1;	stw	r0,0(r3)
2448c2ecf20Sopenharmony_cierr1;	stw	r6,4(r3)
2458c2ecf20Sopenharmony_ci	addi	r3,r3,8
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_ci12:	bf	cr7*4+1,13f
2488c2ecf20Sopenharmony_cierr1;	lwz	r0,0(r4)
2498c2ecf20Sopenharmony_ci	addi	r4,r4,4
2508c2ecf20Sopenharmony_cierr1;	stw	r0,0(r3)
2518c2ecf20Sopenharmony_ci	addi	r3,r3,4
2528c2ecf20Sopenharmony_ci
2538c2ecf20Sopenharmony_ci13:	bf	cr7*4+2,14f
2548c2ecf20Sopenharmony_cierr1;	lhz	r0,0(r4)
2558c2ecf20Sopenharmony_ci	addi	r4,r4,2
2568c2ecf20Sopenharmony_cierr1;	sth	r0,0(r3)
2578c2ecf20Sopenharmony_ci	addi	r3,r3,2
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci14:	bf	cr7*4+3,15f
2608c2ecf20Sopenharmony_cierr1;	lbz	r0,0(r4)
2618c2ecf20Sopenharmony_cierr1;	stb	r0,0(r3)
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_ci15:	li	r3,0
2648c2ecf20Sopenharmony_ci	blr
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci.Lunwind_stack_nonvmx_copy:
2678c2ecf20Sopenharmony_ci	addi	r1,r1,STACKFRAMESIZE
2688c2ecf20Sopenharmony_ci	b	.Lnonvmx_copy
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_ci.Lvmx_copy:
2718c2ecf20Sopenharmony_ci#ifdef CONFIG_ALTIVEC
2728c2ecf20Sopenharmony_ci	mflr	r0
2738c2ecf20Sopenharmony_ci	std	r0,16(r1)
2748c2ecf20Sopenharmony_ci	stdu	r1,-STACKFRAMESIZE(r1)
2758c2ecf20Sopenharmony_ci	bl	enter_vmx_usercopy
2768c2ecf20Sopenharmony_ci	cmpwi	cr1,r3,0
2778c2ecf20Sopenharmony_ci	ld	r0,STACKFRAMESIZE+16(r1)
2788c2ecf20Sopenharmony_ci	ld	r3,STK_REG(R31)(r1)
2798c2ecf20Sopenharmony_ci	ld	r4,STK_REG(R30)(r1)
2808c2ecf20Sopenharmony_ci	ld	r5,STK_REG(R29)(r1)
2818c2ecf20Sopenharmony_ci	mtlr	r0
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci	/*
2848c2ecf20Sopenharmony_ci	 * We prefetch both the source and destination using enhanced touch
2858c2ecf20Sopenharmony_ci	 * instructions. We use a stream ID of 0 for the load side and
2868c2ecf20Sopenharmony_ci	 * 1 for the store side.
2878c2ecf20Sopenharmony_ci	 */
2888c2ecf20Sopenharmony_ci	clrrdi	r6,r4,7
2898c2ecf20Sopenharmony_ci	clrrdi	r9,r3,7
2908c2ecf20Sopenharmony_ci	ori	r9,r9,1		/* stream=1 */
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_ci	srdi	r7,r5,7		/* length in cachelines, capped at 0x3FF */
2938c2ecf20Sopenharmony_ci	cmpldi	r7,0x3FF
2948c2ecf20Sopenharmony_ci	ble	1f
2958c2ecf20Sopenharmony_ci	li	r7,0x3FF
2968c2ecf20Sopenharmony_ci1:	lis	r0,0x0E00	/* depth=7 */
2978c2ecf20Sopenharmony_ci	sldi	r7,r7,7
2988c2ecf20Sopenharmony_ci	or	r7,r7,r0
2998c2ecf20Sopenharmony_ci	ori	r10,r7,1	/* stream=1 */
3008c2ecf20Sopenharmony_ci
3018c2ecf20Sopenharmony_ci	lis	r8,0x8000	/* GO=1 */
3028c2ecf20Sopenharmony_ci	clrldi	r8,r8,32
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci	/* setup read stream 0 */
3058c2ecf20Sopenharmony_ci	dcbt	0,r6,0b01000   /* addr from */
3068c2ecf20Sopenharmony_ci	dcbt	0,r7,0b01010   /* length and depth from */
3078c2ecf20Sopenharmony_ci	/* setup write stream 1 */
3088c2ecf20Sopenharmony_ci	dcbtst	0,r9,0b01000   /* addr to */
3098c2ecf20Sopenharmony_ci	dcbtst	0,r10,0b01010  /* length and depth to */
3108c2ecf20Sopenharmony_ci	eieio
3118c2ecf20Sopenharmony_ci	dcbt	0,r8,0b01010	/* all streams GO */
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci	beq	cr1,.Lunwind_stack_nonvmx_copy
3148c2ecf20Sopenharmony_ci
3158c2ecf20Sopenharmony_ci	/*
3168c2ecf20Sopenharmony_ci	 * If source and destination are not relatively aligned we use a
3178c2ecf20Sopenharmony_ci	 * slower permute loop.
3188c2ecf20Sopenharmony_ci	 */
3198c2ecf20Sopenharmony_ci	xor	r6,r4,r3
3208c2ecf20Sopenharmony_ci	rldicl.	r6,r6,0,(64-4)
3218c2ecf20Sopenharmony_ci	bne	.Lvmx_unaligned_copy
3228c2ecf20Sopenharmony_ci
3238c2ecf20Sopenharmony_ci	/* Get the destination 16B aligned */
3248c2ecf20Sopenharmony_ci	neg	r6,r3
3258c2ecf20Sopenharmony_ci	mtocrf	0x01,r6
3268c2ecf20Sopenharmony_ci	clrldi	r6,r6,(64-4)
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_ci	bf	cr7*4+3,1f
3298c2ecf20Sopenharmony_cierr3;	lbz	r0,0(r4)
3308c2ecf20Sopenharmony_ci	addi	r4,r4,1
3318c2ecf20Sopenharmony_cierr3;	stb	r0,0(r3)
3328c2ecf20Sopenharmony_ci	addi	r3,r3,1
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_ci1:	bf	cr7*4+2,2f
3358c2ecf20Sopenharmony_cierr3;	lhz	r0,0(r4)
3368c2ecf20Sopenharmony_ci	addi	r4,r4,2
3378c2ecf20Sopenharmony_cierr3;	sth	r0,0(r3)
3388c2ecf20Sopenharmony_ci	addi	r3,r3,2
3398c2ecf20Sopenharmony_ci
3408c2ecf20Sopenharmony_ci2:	bf	cr7*4+1,3f
3418c2ecf20Sopenharmony_cierr3;	lwz	r0,0(r4)
3428c2ecf20Sopenharmony_ci	addi	r4,r4,4
3438c2ecf20Sopenharmony_cierr3;	stw	r0,0(r3)
3448c2ecf20Sopenharmony_ci	addi	r3,r3,4
3458c2ecf20Sopenharmony_ci
3468c2ecf20Sopenharmony_ci3:	bf	cr7*4+0,4f
3478c2ecf20Sopenharmony_cierr3;	ld	r0,0(r4)
3488c2ecf20Sopenharmony_ci	addi	r4,r4,8
3498c2ecf20Sopenharmony_cierr3;	std	r0,0(r3)
3508c2ecf20Sopenharmony_ci	addi	r3,r3,8
3518c2ecf20Sopenharmony_ci
3528c2ecf20Sopenharmony_ci4:	sub	r5,r5,r6
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci	/* Get the desination 128B aligned */
3558c2ecf20Sopenharmony_ci	neg	r6,r3
3568c2ecf20Sopenharmony_ci	srdi	r7,r6,4
3578c2ecf20Sopenharmony_ci	mtocrf	0x01,r7
3588c2ecf20Sopenharmony_ci	clrldi	r6,r6,(64-7)
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_ci	li	r9,16
3618c2ecf20Sopenharmony_ci	li	r10,32
3628c2ecf20Sopenharmony_ci	li	r11,48
3638c2ecf20Sopenharmony_ci
3648c2ecf20Sopenharmony_ci	bf	cr7*4+3,5f
3658c2ecf20Sopenharmony_cierr3;	lvx	v1,0,r4
3668c2ecf20Sopenharmony_ci	addi	r4,r4,16
3678c2ecf20Sopenharmony_cierr3;	stvx	v1,0,r3
3688c2ecf20Sopenharmony_ci	addi	r3,r3,16
3698c2ecf20Sopenharmony_ci
3708c2ecf20Sopenharmony_ci5:	bf	cr7*4+2,6f
3718c2ecf20Sopenharmony_cierr3;	lvx	v1,0,r4
3728c2ecf20Sopenharmony_cierr3;	lvx	v0,r4,r9
3738c2ecf20Sopenharmony_ci	addi	r4,r4,32
3748c2ecf20Sopenharmony_cierr3;	stvx	v1,0,r3
3758c2ecf20Sopenharmony_cierr3;	stvx	v0,r3,r9
3768c2ecf20Sopenharmony_ci	addi	r3,r3,32
3778c2ecf20Sopenharmony_ci
3788c2ecf20Sopenharmony_ci6:	bf	cr7*4+1,7f
3798c2ecf20Sopenharmony_cierr3;	lvx	v3,0,r4
3808c2ecf20Sopenharmony_cierr3;	lvx	v2,r4,r9
3818c2ecf20Sopenharmony_cierr3;	lvx	v1,r4,r10
3828c2ecf20Sopenharmony_cierr3;	lvx	v0,r4,r11
3838c2ecf20Sopenharmony_ci	addi	r4,r4,64
3848c2ecf20Sopenharmony_cierr3;	stvx	v3,0,r3
3858c2ecf20Sopenharmony_cierr3;	stvx	v2,r3,r9
3868c2ecf20Sopenharmony_cierr3;	stvx	v1,r3,r10
3878c2ecf20Sopenharmony_cierr3;	stvx	v0,r3,r11
3888c2ecf20Sopenharmony_ci	addi	r3,r3,64
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci7:	sub	r5,r5,r6
3918c2ecf20Sopenharmony_ci	srdi	r6,r5,7
3928c2ecf20Sopenharmony_ci
3938c2ecf20Sopenharmony_ci	std	r14,STK_REG(R14)(r1)
3948c2ecf20Sopenharmony_ci	std	r15,STK_REG(R15)(r1)
3958c2ecf20Sopenharmony_ci	std	r16,STK_REG(R16)(r1)
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_ci	li	r12,64
3988c2ecf20Sopenharmony_ci	li	r14,80
3998c2ecf20Sopenharmony_ci	li	r15,96
4008c2ecf20Sopenharmony_ci	li	r16,112
4018c2ecf20Sopenharmony_ci
4028c2ecf20Sopenharmony_ci	mtctr	r6
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci	/*
4058c2ecf20Sopenharmony_ci	 * Now do cacheline sized loads and stores. By this stage the
4068c2ecf20Sopenharmony_ci	 * cacheline stores are also cacheline aligned.
4078c2ecf20Sopenharmony_ci	 */
4088c2ecf20Sopenharmony_ci	.align	5
4098c2ecf20Sopenharmony_ci8:
4108c2ecf20Sopenharmony_cierr4;	lvx	v7,0,r4
4118c2ecf20Sopenharmony_cierr4;	lvx	v6,r4,r9
4128c2ecf20Sopenharmony_cierr4;	lvx	v5,r4,r10
4138c2ecf20Sopenharmony_cierr4;	lvx	v4,r4,r11
4148c2ecf20Sopenharmony_cierr4;	lvx	v3,r4,r12
4158c2ecf20Sopenharmony_cierr4;	lvx	v2,r4,r14
4168c2ecf20Sopenharmony_cierr4;	lvx	v1,r4,r15
4178c2ecf20Sopenharmony_cierr4;	lvx	v0,r4,r16
4188c2ecf20Sopenharmony_ci	addi	r4,r4,128
4198c2ecf20Sopenharmony_cierr4;	stvx	v7,0,r3
4208c2ecf20Sopenharmony_cierr4;	stvx	v6,r3,r9
4218c2ecf20Sopenharmony_cierr4;	stvx	v5,r3,r10
4228c2ecf20Sopenharmony_cierr4;	stvx	v4,r3,r11
4238c2ecf20Sopenharmony_cierr4;	stvx	v3,r3,r12
4248c2ecf20Sopenharmony_cierr4;	stvx	v2,r3,r14
4258c2ecf20Sopenharmony_cierr4;	stvx	v1,r3,r15
4268c2ecf20Sopenharmony_cierr4;	stvx	v0,r3,r16
4278c2ecf20Sopenharmony_ci	addi	r3,r3,128
4288c2ecf20Sopenharmony_ci	bdnz	8b
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci	ld	r14,STK_REG(R14)(r1)
4318c2ecf20Sopenharmony_ci	ld	r15,STK_REG(R15)(r1)
4328c2ecf20Sopenharmony_ci	ld	r16,STK_REG(R16)(r1)
4338c2ecf20Sopenharmony_ci
4348c2ecf20Sopenharmony_ci	/* Up to 127B to go */
4358c2ecf20Sopenharmony_ci	clrldi	r5,r5,(64-7)
4368c2ecf20Sopenharmony_ci	srdi	r6,r5,4
4378c2ecf20Sopenharmony_ci	mtocrf	0x01,r6
4388c2ecf20Sopenharmony_ci
4398c2ecf20Sopenharmony_ci	bf	cr7*4+1,9f
4408c2ecf20Sopenharmony_cierr3;	lvx	v3,0,r4
4418c2ecf20Sopenharmony_cierr3;	lvx	v2,r4,r9
4428c2ecf20Sopenharmony_cierr3;	lvx	v1,r4,r10
4438c2ecf20Sopenharmony_cierr3;	lvx	v0,r4,r11
4448c2ecf20Sopenharmony_ci	addi	r4,r4,64
4458c2ecf20Sopenharmony_cierr3;	stvx	v3,0,r3
4468c2ecf20Sopenharmony_cierr3;	stvx	v2,r3,r9
4478c2ecf20Sopenharmony_cierr3;	stvx	v1,r3,r10
4488c2ecf20Sopenharmony_cierr3;	stvx	v0,r3,r11
4498c2ecf20Sopenharmony_ci	addi	r3,r3,64
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci9:	bf	cr7*4+2,10f
4528c2ecf20Sopenharmony_cierr3;	lvx	v1,0,r4
4538c2ecf20Sopenharmony_cierr3;	lvx	v0,r4,r9
4548c2ecf20Sopenharmony_ci	addi	r4,r4,32
4558c2ecf20Sopenharmony_cierr3;	stvx	v1,0,r3
4568c2ecf20Sopenharmony_cierr3;	stvx	v0,r3,r9
4578c2ecf20Sopenharmony_ci	addi	r3,r3,32
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci10:	bf	cr7*4+3,11f
4608c2ecf20Sopenharmony_cierr3;	lvx	v1,0,r4
4618c2ecf20Sopenharmony_ci	addi	r4,r4,16
4628c2ecf20Sopenharmony_cierr3;	stvx	v1,0,r3
4638c2ecf20Sopenharmony_ci	addi	r3,r3,16
4648c2ecf20Sopenharmony_ci
4658c2ecf20Sopenharmony_ci	/* Up to 15B to go */
4668c2ecf20Sopenharmony_ci11:	clrldi	r5,r5,(64-4)
4678c2ecf20Sopenharmony_ci	mtocrf	0x01,r5
4688c2ecf20Sopenharmony_ci	bf	cr7*4+0,12f
4698c2ecf20Sopenharmony_cierr3;	ld	r0,0(r4)
4708c2ecf20Sopenharmony_ci	addi	r4,r4,8
4718c2ecf20Sopenharmony_cierr3;	std	r0,0(r3)
4728c2ecf20Sopenharmony_ci	addi	r3,r3,8
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci12:	bf	cr7*4+1,13f
4758c2ecf20Sopenharmony_cierr3;	lwz	r0,0(r4)
4768c2ecf20Sopenharmony_ci	addi	r4,r4,4
4778c2ecf20Sopenharmony_cierr3;	stw	r0,0(r3)
4788c2ecf20Sopenharmony_ci	addi	r3,r3,4
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_ci13:	bf	cr7*4+2,14f
4818c2ecf20Sopenharmony_cierr3;	lhz	r0,0(r4)
4828c2ecf20Sopenharmony_ci	addi	r4,r4,2
4838c2ecf20Sopenharmony_cierr3;	sth	r0,0(r3)
4848c2ecf20Sopenharmony_ci	addi	r3,r3,2
4858c2ecf20Sopenharmony_ci
4868c2ecf20Sopenharmony_ci14:	bf	cr7*4+3,15f
4878c2ecf20Sopenharmony_cierr3;	lbz	r0,0(r4)
4888c2ecf20Sopenharmony_cierr3;	stb	r0,0(r3)
4898c2ecf20Sopenharmony_ci
4908c2ecf20Sopenharmony_ci15:	addi	r1,r1,STACKFRAMESIZE
4918c2ecf20Sopenharmony_ci	b	exit_vmx_usercopy	/* tail call optimise */
4928c2ecf20Sopenharmony_ci
4938c2ecf20Sopenharmony_ci.Lvmx_unaligned_copy:
4948c2ecf20Sopenharmony_ci	/* Get the destination 16B aligned */
4958c2ecf20Sopenharmony_ci	neg	r6,r3
4968c2ecf20Sopenharmony_ci	mtocrf	0x01,r6
4978c2ecf20Sopenharmony_ci	clrldi	r6,r6,(64-4)
4988c2ecf20Sopenharmony_ci
4998c2ecf20Sopenharmony_ci	bf	cr7*4+3,1f
5008c2ecf20Sopenharmony_cierr3;	lbz	r0,0(r4)
5018c2ecf20Sopenharmony_ci	addi	r4,r4,1
5028c2ecf20Sopenharmony_cierr3;	stb	r0,0(r3)
5038c2ecf20Sopenharmony_ci	addi	r3,r3,1
5048c2ecf20Sopenharmony_ci
5058c2ecf20Sopenharmony_ci1:	bf	cr7*4+2,2f
5068c2ecf20Sopenharmony_cierr3;	lhz	r0,0(r4)
5078c2ecf20Sopenharmony_ci	addi	r4,r4,2
5088c2ecf20Sopenharmony_cierr3;	sth	r0,0(r3)
5098c2ecf20Sopenharmony_ci	addi	r3,r3,2
5108c2ecf20Sopenharmony_ci
5118c2ecf20Sopenharmony_ci2:	bf	cr7*4+1,3f
5128c2ecf20Sopenharmony_cierr3;	lwz	r0,0(r4)
5138c2ecf20Sopenharmony_ci	addi	r4,r4,4
5148c2ecf20Sopenharmony_cierr3;	stw	r0,0(r3)
5158c2ecf20Sopenharmony_ci	addi	r3,r3,4
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_ci3:	bf	cr7*4+0,4f
5188c2ecf20Sopenharmony_cierr3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
5198c2ecf20Sopenharmony_cierr3;	lwz	r7,4(r4)
5208c2ecf20Sopenharmony_ci	addi	r4,r4,8
5218c2ecf20Sopenharmony_cierr3;	stw	r0,0(r3)
5228c2ecf20Sopenharmony_cierr3;	stw	r7,4(r3)
5238c2ecf20Sopenharmony_ci	addi	r3,r3,8
5248c2ecf20Sopenharmony_ci
5258c2ecf20Sopenharmony_ci4:	sub	r5,r5,r6
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci	/* Get the desination 128B aligned */
5288c2ecf20Sopenharmony_ci	neg	r6,r3
5298c2ecf20Sopenharmony_ci	srdi	r7,r6,4
5308c2ecf20Sopenharmony_ci	mtocrf	0x01,r7
5318c2ecf20Sopenharmony_ci	clrldi	r6,r6,(64-7)
5328c2ecf20Sopenharmony_ci
5338c2ecf20Sopenharmony_ci	li	r9,16
5348c2ecf20Sopenharmony_ci	li	r10,32
5358c2ecf20Sopenharmony_ci	li	r11,48
5368c2ecf20Sopenharmony_ci
5378c2ecf20Sopenharmony_ci	LVS(v16,0,r4)		/* Setup permute control vector */
5388c2ecf20Sopenharmony_cierr3;	lvx	v0,0,r4
5398c2ecf20Sopenharmony_ci	addi	r4,r4,16
5408c2ecf20Sopenharmony_ci
5418c2ecf20Sopenharmony_ci	bf	cr7*4+3,5f
5428c2ecf20Sopenharmony_cierr3;	lvx	v1,0,r4
5438c2ecf20Sopenharmony_ci	VPERM(v8,v0,v1,v16)
5448c2ecf20Sopenharmony_ci	addi	r4,r4,16
5458c2ecf20Sopenharmony_cierr3;	stvx	v8,0,r3
5468c2ecf20Sopenharmony_ci	addi	r3,r3,16
5478c2ecf20Sopenharmony_ci	vor	v0,v1,v1
5488c2ecf20Sopenharmony_ci
5498c2ecf20Sopenharmony_ci5:	bf	cr7*4+2,6f
5508c2ecf20Sopenharmony_cierr3;	lvx	v1,0,r4
5518c2ecf20Sopenharmony_ci	VPERM(v8,v0,v1,v16)
5528c2ecf20Sopenharmony_cierr3;	lvx	v0,r4,r9
5538c2ecf20Sopenharmony_ci	VPERM(v9,v1,v0,v16)
5548c2ecf20Sopenharmony_ci	addi	r4,r4,32
5558c2ecf20Sopenharmony_cierr3;	stvx	v8,0,r3
5568c2ecf20Sopenharmony_cierr3;	stvx	v9,r3,r9
5578c2ecf20Sopenharmony_ci	addi	r3,r3,32
5588c2ecf20Sopenharmony_ci
5598c2ecf20Sopenharmony_ci6:	bf	cr7*4+1,7f
5608c2ecf20Sopenharmony_cierr3;	lvx	v3,0,r4
5618c2ecf20Sopenharmony_ci	VPERM(v8,v0,v3,v16)
5628c2ecf20Sopenharmony_cierr3;	lvx	v2,r4,r9
5638c2ecf20Sopenharmony_ci	VPERM(v9,v3,v2,v16)
5648c2ecf20Sopenharmony_cierr3;	lvx	v1,r4,r10
5658c2ecf20Sopenharmony_ci	VPERM(v10,v2,v1,v16)
5668c2ecf20Sopenharmony_cierr3;	lvx	v0,r4,r11
5678c2ecf20Sopenharmony_ci	VPERM(v11,v1,v0,v16)
5688c2ecf20Sopenharmony_ci	addi	r4,r4,64
5698c2ecf20Sopenharmony_cierr3;	stvx	v8,0,r3
5708c2ecf20Sopenharmony_cierr3;	stvx	v9,r3,r9
5718c2ecf20Sopenharmony_cierr3;	stvx	v10,r3,r10
5728c2ecf20Sopenharmony_cierr3;	stvx	v11,r3,r11
5738c2ecf20Sopenharmony_ci	addi	r3,r3,64
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_ci7:	sub	r5,r5,r6
5768c2ecf20Sopenharmony_ci	srdi	r6,r5,7
5778c2ecf20Sopenharmony_ci
5788c2ecf20Sopenharmony_ci	std	r14,STK_REG(R14)(r1)
5798c2ecf20Sopenharmony_ci	std	r15,STK_REG(R15)(r1)
5808c2ecf20Sopenharmony_ci	std	r16,STK_REG(R16)(r1)
5818c2ecf20Sopenharmony_ci
5828c2ecf20Sopenharmony_ci	li	r12,64
5838c2ecf20Sopenharmony_ci	li	r14,80
5848c2ecf20Sopenharmony_ci	li	r15,96
5858c2ecf20Sopenharmony_ci	li	r16,112
5868c2ecf20Sopenharmony_ci
5878c2ecf20Sopenharmony_ci	mtctr	r6
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci	/*
5908c2ecf20Sopenharmony_ci	 * Now do cacheline sized loads and stores. By this stage the
5918c2ecf20Sopenharmony_ci	 * cacheline stores are also cacheline aligned.
5928c2ecf20Sopenharmony_ci	 */
5938c2ecf20Sopenharmony_ci	.align	5
5948c2ecf20Sopenharmony_ci8:
5958c2ecf20Sopenharmony_cierr4;	lvx	v7,0,r4
5968c2ecf20Sopenharmony_ci	VPERM(v8,v0,v7,v16)
5978c2ecf20Sopenharmony_cierr4;	lvx	v6,r4,r9
5988c2ecf20Sopenharmony_ci	VPERM(v9,v7,v6,v16)
5998c2ecf20Sopenharmony_cierr4;	lvx	v5,r4,r10
6008c2ecf20Sopenharmony_ci	VPERM(v10,v6,v5,v16)
6018c2ecf20Sopenharmony_cierr4;	lvx	v4,r4,r11
6028c2ecf20Sopenharmony_ci	VPERM(v11,v5,v4,v16)
6038c2ecf20Sopenharmony_cierr4;	lvx	v3,r4,r12
6048c2ecf20Sopenharmony_ci	VPERM(v12,v4,v3,v16)
6058c2ecf20Sopenharmony_cierr4;	lvx	v2,r4,r14
6068c2ecf20Sopenharmony_ci	VPERM(v13,v3,v2,v16)
6078c2ecf20Sopenharmony_cierr4;	lvx	v1,r4,r15
6088c2ecf20Sopenharmony_ci	VPERM(v14,v2,v1,v16)
6098c2ecf20Sopenharmony_cierr4;	lvx	v0,r4,r16
6108c2ecf20Sopenharmony_ci	VPERM(v15,v1,v0,v16)
6118c2ecf20Sopenharmony_ci	addi	r4,r4,128
6128c2ecf20Sopenharmony_cierr4;	stvx	v8,0,r3
6138c2ecf20Sopenharmony_cierr4;	stvx	v9,r3,r9
6148c2ecf20Sopenharmony_cierr4;	stvx	v10,r3,r10
6158c2ecf20Sopenharmony_cierr4;	stvx	v11,r3,r11
6168c2ecf20Sopenharmony_cierr4;	stvx	v12,r3,r12
6178c2ecf20Sopenharmony_cierr4;	stvx	v13,r3,r14
6188c2ecf20Sopenharmony_cierr4;	stvx	v14,r3,r15
6198c2ecf20Sopenharmony_cierr4;	stvx	v15,r3,r16
6208c2ecf20Sopenharmony_ci	addi	r3,r3,128
6218c2ecf20Sopenharmony_ci	bdnz	8b
6228c2ecf20Sopenharmony_ci
6238c2ecf20Sopenharmony_ci	ld	r14,STK_REG(R14)(r1)
6248c2ecf20Sopenharmony_ci	ld	r15,STK_REG(R15)(r1)
6258c2ecf20Sopenharmony_ci	ld	r16,STK_REG(R16)(r1)
6268c2ecf20Sopenharmony_ci
6278c2ecf20Sopenharmony_ci	/* Up to 127B to go */
6288c2ecf20Sopenharmony_ci	clrldi	r5,r5,(64-7)
6298c2ecf20Sopenharmony_ci	srdi	r6,r5,4
6308c2ecf20Sopenharmony_ci	mtocrf	0x01,r6
6318c2ecf20Sopenharmony_ci
6328c2ecf20Sopenharmony_ci	bf	cr7*4+1,9f
6338c2ecf20Sopenharmony_cierr3;	lvx	v3,0,r4
6348c2ecf20Sopenharmony_ci	VPERM(v8,v0,v3,v16)
6358c2ecf20Sopenharmony_cierr3;	lvx	v2,r4,r9
6368c2ecf20Sopenharmony_ci	VPERM(v9,v3,v2,v16)
6378c2ecf20Sopenharmony_cierr3;	lvx	v1,r4,r10
6388c2ecf20Sopenharmony_ci	VPERM(v10,v2,v1,v16)
6398c2ecf20Sopenharmony_cierr3;	lvx	v0,r4,r11
6408c2ecf20Sopenharmony_ci	VPERM(v11,v1,v0,v16)
6418c2ecf20Sopenharmony_ci	addi	r4,r4,64
6428c2ecf20Sopenharmony_cierr3;	stvx	v8,0,r3
6438c2ecf20Sopenharmony_cierr3;	stvx	v9,r3,r9
6448c2ecf20Sopenharmony_cierr3;	stvx	v10,r3,r10
6458c2ecf20Sopenharmony_cierr3;	stvx	v11,r3,r11
6468c2ecf20Sopenharmony_ci	addi	r3,r3,64
6478c2ecf20Sopenharmony_ci
6488c2ecf20Sopenharmony_ci9:	bf	cr7*4+2,10f
6498c2ecf20Sopenharmony_cierr3;	lvx	v1,0,r4
6508c2ecf20Sopenharmony_ci	VPERM(v8,v0,v1,v16)
6518c2ecf20Sopenharmony_cierr3;	lvx	v0,r4,r9
6528c2ecf20Sopenharmony_ci	VPERM(v9,v1,v0,v16)
6538c2ecf20Sopenharmony_ci	addi	r4,r4,32
6548c2ecf20Sopenharmony_cierr3;	stvx	v8,0,r3
6558c2ecf20Sopenharmony_cierr3;	stvx	v9,r3,r9
6568c2ecf20Sopenharmony_ci	addi	r3,r3,32
6578c2ecf20Sopenharmony_ci
6588c2ecf20Sopenharmony_ci10:	bf	cr7*4+3,11f
6598c2ecf20Sopenharmony_cierr3;	lvx	v1,0,r4
6608c2ecf20Sopenharmony_ci	VPERM(v8,v0,v1,v16)
6618c2ecf20Sopenharmony_ci	addi	r4,r4,16
6628c2ecf20Sopenharmony_cierr3;	stvx	v8,0,r3
6638c2ecf20Sopenharmony_ci	addi	r3,r3,16
6648c2ecf20Sopenharmony_ci
6658c2ecf20Sopenharmony_ci	/* Up to 15B to go */
6668c2ecf20Sopenharmony_ci11:	clrldi	r5,r5,(64-4)
6678c2ecf20Sopenharmony_ci	addi	r4,r4,-16	/* Unwind the +16 load offset */
6688c2ecf20Sopenharmony_ci	mtocrf	0x01,r5
6698c2ecf20Sopenharmony_ci	bf	cr7*4+0,12f
6708c2ecf20Sopenharmony_cierr3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
6718c2ecf20Sopenharmony_cierr3;	lwz	r6,4(r4)
6728c2ecf20Sopenharmony_ci	addi	r4,r4,8
6738c2ecf20Sopenharmony_cierr3;	stw	r0,0(r3)
6748c2ecf20Sopenharmony_cierr3;	stw	r6,4(r3)
6758c2ecf20Sopenharmony_ci	addi	r3,r3,8
6768c2ecf20Sopenharmony_ci
6778c2ecf20Sopenharmony_ci12:	bf	cr7*4+1,13f
6788c2ecf20Sopenharmony_cierr3;	lwz	r0,0(r4)
6798c2ecf20Sopenharmony_ci	addi	r4,r4,4
6808c2ecf20Sopenharmony_cierr3;	stw	r0,0(r3)
6818c2ecf20Sopenharmony_ci	addi	r3,r3,4
6828c2ecf20Sopenharmony_ci
6838c2ecf20Sopenharmony_ci13:	bf	cr7*4+2,14f
6848c2ecf20Sopenharmony_cierr3;	lhz	r0,0(r4)
6858c2ecf20Sopenharmony_ci	addi	r4,r4,2
6868c2ecf20Sopenharmony_cierr3;	sth	r0,0(r3)
6878c2ecf20Sopenharmony_ci	addi	r3,r3,2
6888c2ecf20Sopenharmony_ci
6898c2ecf20Sopenharmony_ci14:	bf	cr7*4+3,15f
6908c2ecf20Sopenharmony_cierr3;	lbz	r0,0(r4)
6918c2ecf20Sopenharmony_cierr3;	stb	r0,0(r3)
6928c2ecf20Sopenharmony_ci
6938c2ecf20Sopenharmony_ci15:	addi	r1,r1,STACKFRAMESIZE
6948c2ecf20Sopenharmony_ci	b	exit_vmx_usercopy	/* tail call optimise */
6958c2ecf20Sopenharmony_ci#endif /* CONFIG_ALTIVEC */
696