162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * Copyright (C) IBM Corporation, 2011
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * Author: Anton Blanchard <anton@au.ibm.com>
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci#include <asm/ppc_asm.h>
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#ifndef SELFTEST_CASE
1162306a36Sopenharmony_ci/* 0 == don't use VMX, 1 == use VMX */
1262306a36Sopenharmony_ci#define SELFTEST_CASE	0
1362306a36Sopenharmony_ci#endif
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci#ifdef __BIG_ENDIAN__
1662306a36Sopenharmony_ci#define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB
1762306a36Sopenharmony_ci#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC
1862306a36Sopenharmony_ci#else
1962306a36Sopenharmony_ci#define LVS(VRT,RA,RB)		lvsr	VRT,RA,RB
2062306a36Sopenharmony_ci#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRB,VRA,VRC
2162306a36Sopenharmony_ci#endif
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci	.macro err1
2462306a36Sopenharmony_ci100:
2562306a36Sopenharmony_ci	EX_TABLE(100b,.Ldo_err1)
2662306a36Sopenharmony_ci	.endm
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci	.macro err2
2962306a36Sopenharmony_ci200:
3062306a36Sopenharmony_ci	EX_TABLE(200b,.Ldo_err2)
3162306a36Sopenharmony_ci	.endm
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci#ifdef CONFIG_ALTIVEC
3462306a36Sopenharmony_ci	.macro err3
3562306a36Sopenharmony_ci300:
3662306a36Sopenharmony_ci	EX_TABLE(300b,.Ldo_err3)
3762306a36Sopenharmony_ci	.endm
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci	.macro err4
4062306a36Sopenharmony_ci400:
4162306a36Sopenharmony_ci	EX_TABLE(400b,.Ldo_err4)
4262306a36Sopenharmony_ci	.endm
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci.Ldo_err4:
4662306a36Sopenharmony_ci	ld	r16,STK_REG(R16)(r1)
4762306a36Sopenharmony_ci	ld	r15,STK_REG(R15)(r1)
4862306a36Sopenharmony_ci	ld	r14,STK_REG(R14)(r1)
4962306a36Sopenharmony_ci.Ldo_err3:
5062306a36Sopenharmony_ci	bl	CFUNC(exit_vmx_usercopy)
5162306a36Sopenharmony_ci	ld	r0,STACKFRAMESIZE+16(r1)
5262306a36Sopenharmony_ci	mtlr	r0
5362306a36Sopenharmony_ci	b	.Lexit
5462306a36Sopenharmony_ci#endif /* CONFIG_ALTIVEC */
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci.Ldo_err2:
5762306a36Sopenharmony_ci	ld	r22,STK_REG(R22)(r1)
5862306a36Sopenharmony_ci	ld	r21,STK_REG(R21)(r1)
5962306a36Sopenharmony_ci	ld	r20,STK_REG(R20)(r1)
6062306a36Sopenharmony_ci	ld	r19,STK_REG(R19)(r1)
6162306a36Sopenharmony_ci	ld	r18,STK_REG(R18)(r1)
6262306a36Sopenharmony_ci	ld	r17,STK_REG(R17)(r1)
6362306a36Sopenharmony_ci	ld	r16,STK_REG(R16)(r1)
6462306a36Sopenharmony_ci	ld	r15,STK_REG(R15)(r1)
6562306a36Sopenharmony_ci	ld	r14,STK_REG(R14)(r1)
6662306a36Sopenharmony_ci.Lexit:
6762306a36Sopenharmony_ci	addi	r1,r1,STACKFRAMESIZE
6862306a36Sopenharmony_ci.Ldo_err1:
6962306a36Sopenharmony_ci	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
7062306a36Sopenharmony_ci	ld	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
7162306a36Sopenharmony_ci	ld	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
7262306a36Sopenharmony_ci	b	__copy_tofrom_user_base
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci_GLOBAL(__copy_tofrom_user_power7)
7662306a36Sopenharmony_ci	cmpldi	r5,16
7762306a36Sopenharmony_ci	cmpldi	cr1,r5,3328
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
8062306a36Sopenharmony_ci	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
8162306a36Sopenharmony_ci	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	blt	.Lshort_copy
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci#ifdef CONFIG_ALTIVEC
8662306a36Sopenharmony_citest_feature = SELFTEST_CASE
8762306a36Sopenharmony_ciBEGIN_FTR_SECTION
8862306a36Sopenharmony_ci	bgt	cr1,.Lvmx_copy
8962306a36Sopenharmony_ciEND_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
9062306a36Sopenharmony_ci#endif
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci.Lnonvmx_copy:
9362306a36Sopenharmony_ci	/* Get the source 8B aligned */
9462306a36Sopenharmony_ci	neg	r6,r4
9562306a36Sopenharmony_ci	mtocrf	0x01,r6
9662306a36Sopenharmony_ci	clrldi	r6,r6,(64-3)
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	bf	cr7*4+3,1f
9962306a36Sopenharmony_cierr1;	lbz	r0,0(r4)
10062306a36Sopenharmony_ci	addi	r4,r4,1
10162306a36Sopenharmony_cierr1;	stb	r0,0(r3)
10262306a36Sopenharmony_ci	addi	r3,r3,1
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci1:	bf	cr7*4+2,2f
10562306a36Sopenharmony_cierr1;	lhz	r0,0(r4)
10662306a36Sopenharmony_ci	addi	r4,r4,2
10762306a36Sopenharmony_cierr1;	sth	r0,0(r3)
10862306a36Sopenharmony_ci	addi	r3,r3,2
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci2:	bf	cr7*4+1,3f
11162306a36Sopenharmony_cierr1;	lwz	r0,0(r4)
11262306a36Sopenharmony_ci	addi	r4,r4,4
11362306a36Sopenharmony_cierr1;	stw	r0,0(r3)
11462306a36Sopenharmony_ci	addi	r3,r3,4
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci3:	sub	r5,r5,r6
11762306a36Sopenharmony_ci	cmpldi	r5,128
11862306a36Sopenharmony_ci	blt	5f
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	mflr	r0
12162306a36Sopenharmony_ci	stdu	r1,-STACKFRAMESIZE(r1)
12262306a36Sopenharmony_ci	std	r14,STK_REG(R14)(r1)
12362306a36Sopenharmony_ci	std	r15,STK_REG(R15)(r1)
12462306a36Sopenharmony_ci	std	r16,STK_REG(R16)(r1)
12562306a36Sopenharmony_ci	std	r17,STK_REG(R17)(r1)
12662306a36Sopenharmony_ci	std	r18,STK_REG(R18)(r1)
12762306a36Sopenharmony_ci	std	r19,STK_REG(R19)(r1)
12862306a36Sopenharmony_ci	std	r20,STK_REG(R20)(r1)
12962306a36Sopenharmony_ci	std	r21,STK_REG(R21)(r1)
13062306a36Sopenharmony_ci	std	r22,STK_REG(R22)(r1)
13162306a36Sopenharmony_ci	std	r0,STACKFRAMESIZE+16(r1)
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	srdi	r6,r5,7
13462306a36Sopenharmony_ci	mtctr	r6
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	/* Now do cacheline (128B) sized loads and stores. */
13762306a36Sopenharmony_ci	.align	5
13862306a36Sopenharmony_ci4:
13962306a36Sopenharmony_cierr2;	ld	r0,0(r4)
14062306a36Sopenharmony_cierr2;	ld	r6,8(r4)
14162306a36Sopenharmony_cierr2;	ld	r7,16(r4)
14262306a36Sopenharmony_cierr2;	ld	r8,24(r4)
14362306a36Sopenharmony_cierr2;	ld	r9,32(r4)
14462306a36Sopenharmony_cierr2;	ld	r10,40(r4)
14562306a36Sopenharmony_cierr2;	ld	r11,48(r4)
14662306a36Sopenharmony_cierr2;	ld	r12,56(r4)
14762306a36Sopenharmony_cierr2;	ld	r14,64(r4)
14862306a36Sopenharmony_cierr2;	ld	r15,72(r4)
14962306a36Sopenharmony_cierr2;	ld	r16,80(r4)
15062306a36Sopenharmony_cierr2;	ld	r17,88(r4)
15162306a36Sopenharmony_cierr2;	ld	r18,96(r4)
15262306a36Sopenharmony_cierr2;	ld	r19,104(r4)
15362306a36Sopenharmony_cierr2;	ld	r20,112(r4)
15462306a36Sopenharmony_cierr2;	ld	r21,120(r4)
15562306a36Sopenharmony_ci	addi	r4,r4,128
15662306a36Sopenharmony_cierr2;	std	r0,0(r3)
15762306a36Sopenharmony_cierr2;	std	r6,8(r3)
15862306a36Sopenharmony_cierr2;	std	r7,16(r3)
15962306a36Sopenharmony_cierr2;	std	r8,24(r3)
16062306a36Sopenharmony_cierr2;	std	r9,32(r3)
16162306a36Sopenharmony_cierr2;	std	r10,40(r3)
16262306a36Sopenharmony_cierr2;	std	r11,48(r3)
16362306a36Sopenharmony_cierr2;	std	r12,56(r3)
16462306a36Sopenharmony_cierr2;	std	r14,64(r3)
16562306a36Sopenharmony_cierr2;	std	r15,72(r3)
16662306a36Sopenharmony_cierr2;	std	r16,80(r3)
16762306a36Sopenharmony_cierr2;	std	r17,88(r3)
16862306a36Sopenharmony_cierr2;	std	r18,96(r3)
16962306a36Sopenharmony_cierr2;	std	r19,104(r3)
17062306a36Sopenharmony_cierr2;	std	r20,112(r3)
17162306a36Sopenharmony_cierr2;	std	r21,120(r3)
17262306a36Sopenharmony_ci	addi	r3,r3,128
17362306a36Sopenharmony_ci	bdnz	4b
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	clrldi	r5,r5,(64-7)
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	ld	r14,STK_REG(R14)(r1)
17862306a36Sopenharmony_ci	ld	r15,STK_REG(R15)(r1)
17962306a36Sopenharmony_ci	ld	r16,STK_REG(R16)(r1)
18062306a36Sopenharmony_ci	ld	r17,STK_REG(R17)(r1)
18162306a36Sopenharmony_ci	ld	r18,STK_REG(R18)(r1)
18262306a36Sopenharmony_ci	ld	r19,STK_REG(R19)(r1)
18362306a36Sopenharmony_ci	ld	r20,STK_REG(R20)(r1)
18462306a36Sopenharmony_ci	ld	r21,STK_REG(R21)(r1)
18562306a36Sopenharmony_ci	ld	r22,STK_REG(R22)(r1)
18662306a36Sopenharmony_ci	addi	r1,r1,STACKFRAMESIZE
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	/* Up to 127B to go */
18962306a36Sopenharmony_ci5:	srdi	r6,r5,4
19062306a36Sopenharmony_ci	mtocrf	0x01,r6
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci6:	bf	cr7*4+1,7f
19362306a36Sopenharmony_cierr1;	ld	r0,0(r4)
19462306a36Sopenharmony_cierr1;	ld	r6,8(r4)
19562306a36Sopenharmony_cierr1;	ld	r7,16(r4)
19662306a36Sopenharmony_cierr1;	ld	r8,24(r4)
19762306a36Sopenharmony_cierr1;	ld	r9,32(r4)
19862306a36Sopenharmony_cierr1;	ld	r10,40(r4)
19962306a36Sopenharmony_cierr1;	ld	r11,48(r4)
20062306a36Sopenharmony_cierr1;	ld	r12,56(r4)
20162306a36Sopenharmony_ci	addi	r4,r4,64
20262306a36Sopenharmony_cierr1;	std	r0,0(r3)
20362306a36Sopenharmony_cierr1;	std	r6,8(r3)
20462306a36Sopenharmony_cierr1;	std	r7,16(r3)
20562306a36Sopenharmony_cierr1;	std	r8,24(r3)
20662306a36Sopenharmony_cierr1;	std	r9,32(r3)
20762306a36Sopenharmony_cierr1;	std	r10,40(r3)
20862306a36Sopenharmony_cierr1;	std	r11,48(r3)
20962306a36Sopenharmony_cierr1;	std	r12,56(r3)
21062306a36Sopenharmony_ci	addi	r3,r3,64
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci	/* Up to 63B to go */
21362306a36Sopenharmony_ci7:	bf	cr7*4+2,8f
21462306a36Sopenharmony_cierr1;	ld	r0,0(r4)
21562306a36Sopenharmony_cierr1;	ld	r6,8(r4)
21662306a36Sopenharmony_cierr1;	ld	r7,16(r4)
21762306a36Sopenharmony_cierr1;	ld	r8,24(r4)
21862306a36Sopenharmony_ci	addi	r4,r4,32
21962306a36Sopenharmony_cierr1;	std	r0,0(r3)
22062306a36Sopenharmony_cierr1;	std	r6,8(r3)
22162306a36Sopenharmony_cierr1;	std	r7,16(r3)
22262306a36Sopenharmony_cierr1;	std	r8,24(r3)
22362306a36Sopenharmony_ci	addi	r3,r3,32
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci	/* Up to 31B to go */
22662306a36Sopenharmony_ci8:	bf	cr7*4+3,9f
22762306a36Sopenharmony_cierr1;	ld	r0,0(r4)
22862306a36Sopenharmony_cierr1;	ld	r6,8(r4)
22962306a36Sopenharmony_ci	addi	r4,r4,16
23062306a36Sopenharmony_cierr1;	std	r0,0(r3)
23162306a36Sopenharmony_cierr1;	std	r6,8(r3)
23262306a36Sopenharmony_ci	addi	r3,r3,16
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci9:	clrldi	r5,r5,(64-4)
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	/* Up to 15B to go */
23762306a36Sopenharmony_ci.Lshort_copy:
23862306a36Sopenharmony_ci	mtocrf	0x01,r5
23962306a36Sopenharmony_ci	bf	cr7*4+0,12f
24062306a36Sopenharmony_cierr1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
24162306a36Sopenharmony_cierr1;	lwz	r6,4(r4)
24262306a36Sopenharmony_ci	addi	r4,r4,8
24362306a36Sopenharmony_cierr1;	stw	r0,0(r3)
24462306a36Sopenharmony_cierr1;	stw	r6,4(r3)
24562306a36Sopenharmony_ci	addi	r3,r3,8
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci12:	bf	cr7*4+1,13f
24862306a36Sopenharmony_cierr1;	lwz	r0,0(r4)
24962306a36Sopenharmony_ci	addi	r4,r4,4
25062306a36Sopenharmony_cierr1;	stw	r0,0(r3)
25162306a36Sopenharmony_ci	addi	r3,r3,4
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci13:	bf	cr7*4+2,14f
25462306a36Sopenharmony_cierr1;	lhz	r0,0(r4)
25562306a36Sopenharmony_ci	addi	r4,r4,2
25662306a36Sopenharmony_cierr1;	sth	r0,0(r3)
25762306a36Sopenharmony_ci	addi	r3,r3,2
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci14:	bf	cr7*4+3,15f
26062306a36Sopenharmony_cierr1;	lbz	r0,0(r4)
26162306a36Sopenharmony_cierr1;	stb	r0,0(r3)
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci15:	li	r3,0
26462306a36Sopenharmony_ci	blr
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci.Lunwind_stack_nonvmx_copy:
26762306a36Sopenharmony_ci	addi	r1,r1,STACKFRAMESIZE
26862306a36Sopenharmony_ci	b	.Lnonvmx_copy
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci.Lvmx_copy:
27162306a36Sopenharmony_ci#ifdef CONFIG_ALTIVEC
27262306a36Sopenharmony_ci	mflr	r0
27362306a36Sopenharmony_ci	std	r0,16(r1)
27462306a36Sopenharmony_ci	stdu	r1,-STACKFRAMESIZE(r1)
27562306a36Sopenharmony_ci	bl	CFUNC(enter_vmx_usercopy)
27662306a36Sopenharmony_ci	cmpwi	cr1,r3,0
27762306a36Sopenharmony_ci	ld	r0,STACKFRAMESIZE+16(r1)
27862306a36Sopenharmony_ci	ld	r3,STK_REG(R31)(r1)
27962306a36Sopenharmony_ci	ld	r4,STK_REG(R30)(r1)
28062306a36Sopenharmony_ci	ld	r5,STK_REG(R29)(r1)
28162306a36Sopenharmony_ci	mtlr	r0
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	/*
28462306a36Sopenharmony_ci	 * We prefetch both the source and destination using enhanced touch
28562306a36Sopenharmony_ci	 * instructions. We use a stream ID of 0 for the load side and
28662306a36Sopenharmony_ci	 * 1 for the store side.
28762306a36Sopenharmony_ci	 */
28862306a36Sopenharmony_ci	clrrdi	r6,r4,7
28962306a36Sopenharmony_ci	clrrdi	r9,r3,7
29062306a36Sopenharmony_ci	ori	r9,r9,1		/* stream=1 */
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	srdi	r7,r5,7		/* length in cachelines, capped at 0x3FF */
29362306a36Sopenharmony_ci	cmpldi	r7,0x3FF
29462306a36Sopenharmony_ci	ble	1f
29562306a36Sopenharmony_ci	li	r7,0x3FF
29662306a36Sopenharmony_ci1:	lis	r0,0x0E00	/* depth=7 */
29762306a36Sopenharmony_ci	sldi	r7,r7,7
29862306a36Sopenharmony_ci	or	r7,r7,r0
29962306a36Sopenharmony_ci	ori	r10,r7,1	/* stream=1 */
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	lis	r8,0x8000	/* GO=1 */
30262306a36Sopenharmony_ci	clrldi	r8,r8,32
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	/* setup read stream 0 */
30562306a36Sopenharmony_ci	dcbt	0,r6,0b01000   /* addr from */
30662306a36Sopenharmony_ci	dcbt	0,r7,0b01010   /* length and depth from */
30762306a36Sopenharmony_ci	/* setup write stream 1 */
30862306a36Sopenharmony_ci	dcbtst	0,r9,0b01000   /* addr to */
30962306a36Sopenharmony_ci	dcbtst	0,r10,0b01010  /* length and depth to */
31062306a36Sopenharmony_ci	eieio
31162306a36Sopenharmony_ci	dcbt	0,r8,0b01010	/* all streams GO */
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	beq	cr1,.Lunwind_stack_nonvmx_copy
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_ci	/*
31662306a36Sopenharmony_ci	 * If source and destination are not relatively aligned we use a
31762306a36Sopenharmony_ci	 * slower permute loop.
31862306a36Sopenharmony_ci	 */
31962306a36Sopenharmony_ci	xor	r6,r4,r3
32062306a36Sopenharmony_ci	rldicl.	r6,r6,0,(64-4)
32162306a36Sopenharmony_ci	bne	.Lvmx_unaligned_copy
32262306a36Sopenharmony_ci
32362306a36Sopenharmony_ci	/* Get the destination 16B aligned */
32462306a36Sopenharmony_ci	neg	r6,r3
32562306a36Sopenharmony_ci	mtocrf	0x01,r6
32662306a36Sopenharmony_ci	clrldi	r6,r6,(64-4)
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ci	bf	cr7*4+3,1f
32962306a36Sopenharmony_cierr3;	lbz	r0,0(r4)
33062306a36Sopenharmony_ci	addi	r4,r4,1
33162306a36Sopenharmony_cierr3;	stb	r0,0(r3)
33262306a36Sopenharmony_ci	addi	r3,r3,1
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci1:	bf	cr7*4+2,2f
33562306a36Sopenharmony_cierr3;	lhz	r0,0(r4)
33662306a36Sopenharmony_ci	addi	r4,r4,2
33762306a36Sopenharmony_cierr3;	sth	r0,0(r3)
33862306a36Sopenharmony_ci	addi	r3,r3,2
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci2:	bf	cr7*4+1,3f
34162306a36Sopenharmony_cierr3;	lwz	r0,0(r4)
34262306a36Sopenharmony_ci	addi	r4,r4,4
34362306a36Sopenharmony_cierr3;	stw	r0,0(r3)
34462306a36Sopenharmony_ci	addi	r3,r3,4
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci3:	bf	cr7*4+0,4f
34762306a36Sopenharmony_cierr3;	ld	r0,0(r4)
34862306a36Sopenharmony_ci	addi	r4,r4,8
34962306a36Sopenharmony_cierr3;	std	r0,0(r3)
35062306a36Sopenharmony_ci	addi	r3,r3,8
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci4:	sub	r5,r5,r6
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	/* Get the desination 128B aligned */
35562306a36Sopenharmony_ci	neg	r6,r3
35662306a36Sopenharmony_ci	srdi	r7,r6,4
35762306a36Sopenharmony_ci	mtocrf	0x01,r7
35862306a36Sopenharmony_ci	clrldi	r6,r6,(64-7)
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_ci	li	r9,16
36162306a36Sopenharmony_ci	li	r10,32
36262306a36Sopenharmony_ci	li	r11,48
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	bf	cr7*4+3,5f
36562306a36Sopenharmony_cierr3;	lvx	v1,0,r4
36662306a36Sopenharmony_ci	addi	r4,r4,16
36762306a36Sopenharmony_cierr3;	stvx	v1,0,r3
36862306a36Sopenharmony_ci	addi	r3,r3,16
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci5:	bf	cr7*4+2,6f
37162306a36Sopenharmony_cierr3;	lvx	v1,0,r4
37262306a36Sopenharmony_cierr3;	lvx	v0,r4,r9
37362306a36Sopenharmony_ci	addi	r4,r4,32
37462306a36Sopenharmony_cierr3;	stvx	v1,0,r3
37562306a36Sopenharmony_cierr3;	stvx	v0,r3,r9
37662306a36Sopenharmony_ci	addi	r3,r3,32
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_ci6:	bf	cr7*4+1,7f
37962306a36Sopenharmony_cierr3;	lvx	v3,0,r4
38062306a36Sopenharmony_cierr3;	lvx	v2,r4,r9
38162306a36Sopenharmony_cierr3;	lvx	v1,r4,r10
38262306a36Sopenharmony_cierr3;	lvx	v0,r4,r11
38362306a36Sopenharmony_ci	addi	r4,r4,64
38462306a36Sopenharmony_cierr3;	stvx	v3,0,r3
38562306a36Sopenharmony_cierr3;	stvx	v2,r3,r9
38662306a36Sopenharmony_cierr3;	stvx	v1,r3,r10
38762306a36Sopenharmony_cierr3;	stvx	v0,r3,r11
38862306a36Sopenharmony_ci	addi	r3,r3,64
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci7:	sub	r5,r5,r6
39162306a36Sopenharmony_ci	srdi	r6,r5,7
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	std	r14,STK_REG(R14)(r1)
39462306a36Sopenharmony_ci	std	r15,STK_REG(R15)(r1)
39562306a36Sopenharmony_ci	std	r16,STK_REG(R16)(r1)
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci	li	r12,64
39862306a36Sopenharmony_ci	li	r14,80
39962306a36Sopenharmony_ci	li	r15,96
40062306a36Sopenharmony_ci	li	r16,112
40162306a36Sopenharmony_ci
40262306a36Sopenharmony_ci	mtctr	r6
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci	/*
40562306a36Sopenharmony_ci	 * Now do cacheline sized loads and stores. By this stage the
40662306a36Sopenharmony_ci	 * cacheline stores are also cacheline aligned.
40762306a36Sopenharmony_ci	 */
40862306a36Sopenharmony_ci	.align	5
40962306a36Sopenharmony_ci8:
41062306a36Sopenharmony_cierr4;	lvx	v7,0,r4
41162306a36Sopenharmony_cierr4;	lvx	v6,r4,r9
41262306a36Sopenharmony_cierr4;	lvx	v5,r4,r10
41362306a36Sopenharmony_cierr4;	lvx	v4,r4,r11
41462306a36Sopenharmony_cierr4;	lvx	v3,r4,r12
41562306a36Sopenharmony_cierr4;	lvx	v2,r4,r14
41662306a36Sopenharmony_cierr4;	lvx	v1,r4,r15
41762306a36Sopenharmony_cierr4;	lvx	v0,r4,r16
41862306a36Sopenharmony_ci	addi	r4,r4,128
41962306a36Sopenharmony_cierr4;	stvx	v7,0,r3
42062306a36Sopenharmony_cierr4;	stvx	v6,r3,r9
42162306a36Sopenharmony_cierr4;	stvx	v5,r3,r10
42262306a36Sopenharmony_cierr4;	stvx	v4,r3,r11
42362306a36Sopenharmony_cierr4;	stvx	v3,r3,r12
42462306a36Sopenharmony_cierr4;	stvx	v2,r3,r14
42562306a36Sopenharmony_cierr4;	stvx	v1,r3,r15
42662306a36Sopenharmony_cierr4;	stvx	v0,r3,r16
42762306a36Sopenharmony_ci	addi	r3,r3,128
42862306a36Sopenharmony_ci	bdnz	8b
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci	ld	r14,STK_REG(R14)(r1)
43162306a36Sopenharmony_ci	ld	r15,STK_REG(R15)(r1)
43262306a36Sopenharmony_ci	ld	r16,STK_REG(R16)(r1)
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	/* Up to 127B to go */
43562306a36Sopenharmony_ci	clrldi	r5,r5,(64-7)
43662306a36Sopenharmony_ci	srdi	r6,r5,4
43762306a36Sopenharmony_ci	mtocrf	0x01,r6
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci	bf	cr7*4+1,9f
44062306a36Sopenharmony_cierr3;	lvx	v3,0,r4
44162306a36Sopenharmony_cierr3;	lvx	v2,r4,r9
44262306a36Sopenharmony_cierr3;	lvx	v1,r4,r10
44362306a36Sopenharmony_cierr3;	lvx	v0,r4,r11
44462306a36Sopenharmony_ci	addi	r4,r4,64
44562306a36Sopenharmony_cierr3;	stvx	v3,0,r3
44662306a36Sopenharmony_cierr3;	stvx	v2,r3,r9
44762306a36Sopenharmony_cierr3;	stvx	v1,r3,r10
44862306a36Sopenharmony_cierr3;	stvx	v0,r3,r11
44962306a36Sopenharmony_ci	addi	r3,r3,64
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci9:	bf	cr7*4+2,10f
45262306a36Sopenharmony_cierr3;	lvx	v1,0,r4
45362306a36Sopenharmony_cierr3;	lvx	v0,r4,r9
45462306a36Sopenharmony_ci	addi	r4,r4,32
45562306a36Sopenharmony_cierr3;	stvx	v1,0,r3
45662306a36Sopenharmony_cierr3;	stvx	v0,r3,r9
45762306a36Sopenharmony_ci	addi	r3,r3,32
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_ci10:	bf	cr7*4+3,11f
46062306a36Sopenharmony_cierr3;	lvx	v1,0,r4
46162306a36Sopenharmony_ci	addi	r4,r4,16
46262306a36Sopenharmony_cierr3;	stvx	v1,0,r3
46362306a36Sopenharmony_ci	addi	r3,r3,16
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci	/* Up to 15B to go */
46662306a36Sopenharmony_ci11:	clrldi	r5,r5,(64-4)
46762306a36Sopenharmony_ci	mtocrf	0x01,r5
46862306a36Sopenharmony_ci	bf	cr7*4+0,12f
46962306a36Sopenharmony_cierr3;	ld	r0,0(r4)
47062306a36Sopenharmony_ci	addi	r4,r4,8
47162306a36Sopenharmony_cierr3;	std	r0,0(r3)
47262306a36Sopenharmony_ci	addi	r3,r3,8
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ci12:	bf	cr7*4+1,13f
47562306a36Sopenharmony_cierr3;	lwz	r0,0(r4)
47662306a36Sopenharmony_ci	addi	r4,r4,4
47762306a36Sopenharmony_cierr3;	stw	r0,0(r3)
47862306a36Sopenharmony_ci	addi	r3,r3,4
47962306a36Sopenharmony_ci
48062306a36Sopenharmony_ci13:	bf	cr7*4+2,14f
48162306a36Sopenharmony_cierr3;	lhz	r0,0(r4)
48262306a36Sopenharmony_ci	addi	r4,r4,2
48362306a36Sopenharmony_cierr3;	sth	r0,0(r3)
48462306a36Sopenharmony_ci	addi	r3,r3,2
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_ci14:	bf	cr7*4+3,15f
48762306a36Sopenharmony_cierr3;	lbz	r0,0(r4)
48862306a36Sopenharmony_cierr3;	stb	r0,0(r3)
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_ci15:	addi	r1,r1,STACKFRAMESIZE
49162306a36Sopenharmony_ci	b	CFUNC(exit_vmx_usercopy)	/* tail call optimise */
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_ci.Lvmx_unaligned_copy:
49462306a36Sopenharmony_ci	/* Get the destination 16B aligned */
49562306a36Sopenharmony_ci	neg	r6,r3
49662306a36Sopenharmony_ci	mtocrf	0x01,r6
49762306a36Sopenharmony_ci	clrldi	r6,r6,(64-4)
49862306a36Sopenharmony_ci
49962306a36Sopenharmony_ci	bf	cr7*4+3,1f
50062306a36Sopenharmony_cierr3;	lbz	r0,0(r4)
50162306a36Sopenharmony_ci	addi	r4,r4,1
50262306a36Sopenharmony_cierr3;	stb	r0,0(r3)
50362306a36Sopenharmony_ci	addi	r3,r3,1
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_ci1:	bf	cr7*4+2,2f
50662306a36Sopenharmony_cierr3;	lhz	r0,0(r4)
50762306a36Sopenharmony_ci	addi	r4,r4,2
50862306a36Sopenharmony_cierr3;	sth	r0,0(r3)
50962306a36Sopenharmony_ci	addi	r3,r3,2
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ci2:	bf	cr7*4+1,3f
51262306a36Sopenharmony_cierr3;	lwz	r0,0(r4)
51362306a36Sopenharmony_ci	addi	r4,r4,4
51462306a36Sopenharmony_cierr3;	stw	r0,0(r3)
51562306a36Sopenharmony_ci	addi	r3,r3,4
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci3:	bf	cr7*4+0,4f
51862306a36Sopenharmony_cierr3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
51962306a36Sopenharmony_cierr3;	lwz	r7,4(r4)
52062306a36Sopenharmony_ci	addi	r4,r4,8
52162306a36Sopenharmony_cierr3;	stw	r0,0(r3)
52262306a36Sopenharmony_cierr3;	stw	r7,4(r3)
52362306a36Sopenharmony_ci	addi	r3,r3,8
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci4:	sub	r5,r5,r6
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci	/* Get the desination 128B aligned */
52862306a36Sopenharmony_ci	neg	r6,r3
52962306a36Sopenharmony_ci	srdi	r7,r6,4
53062306a36Sopenharmony_ci	mtocrf	0x01,r7
53162306a36Sopenharmony_ci	clrldi	r6,r6,(64-7)
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci	li	r9,16
53462306a36Sopenharmony_ci	li	r10,32
53562306a36Sopenharmony_ci	li	r11,48
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci	LVS(v16,0,r4)		/* Setup permute control vector */
53862306a36Sopenharmony_cierr3;	lvx	v0,0,r4
53962306a36Sopenharmony_ci	addi	r4,r4,16
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_ci	bf	cr7*4+3,5f
54262306a36Sopenharmony_cierr3;	lvx	v1,0,r4
54362306a36Sopenharmony_ci	VPERM(v8,v0,v1,v16)
54462306a36Sopenharmony_ci	addi	r4,r4,16
54562306a36Sopenharmony_cierr3;	stvx	v8,0,r3
54662306a36Sopenharmony_ci	addi	r3,r3,16
54762306a36Sopenharmony_ci	vor	v0,v1,v1
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_ci5:	bf	cr7*4+2,6f
55062306a36Sopenharmony_cierr3;	lvx	v1,0,r4
55162306a36Sopenharmony_ci	VPERM(v8,v0,v1,v16)
55262306a36Sopenharmony_cierr3;	lvx	v0,r4,r9
55362306a36Sopenharmony_ci	VPERM(v9,v1,v0,v16)
55462306a36Sopenharmony_ci	addi	r4,r4,32
55562306a36Sopenharmony_cierr3;	stvx	v8,0,r3
55662306a36Sopenharmony_cierr3;	stvx	v9,r3,r9
55762306a36Sopenharmony_ci	addi	r3,r3,32
55862306a36Sopenharmony_ci
55962306a36Sopenharmony_ci6:	bf	cr7*4+1,7f
56062306a36Sopenharmony_cierr3;	lvx	v3,0,r4
56162306a36Sopenharmony_ci	VPERM(v8,v0,v3,v16)
56262306a36Sopenharmony_cierr3;	lvx	v2,r4,r9
56362306a36Sopenharmony_ci	VPERM(v9,v3,v2,v16)
56462306a36Sopenharmony_cierr3;	lvx	v1,r4,r10
56562306a36Sopenharmony_ci	VPERM(v10,v2,v1,v16)
56662306a36Sopenharmony_cierr3;	lvx	v0,r4,r11
56762306a36Sopenharmony_ci	VPERM(v11,v1,v0,v16)
56862306a36Sopenharmony_ci	addi	r4,r4,64
56962306a36Sopenharmony_cierr3;	stvx	v8,0,r3
57062306a36Sopenharmony_cierr3;	stvx	v9,r3,r9
57162306a36Sopenharmony_cierr3;	stvx	v10,r3,r10
57262306a36Sopenharmony_cierr3;	stvx	v11,r3,r11
57362306a36Sopenharmony_ci	addi	r3,r3,64
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_ci7:	sub	r5,r5,r6
57662306a36Sopenharmony_ci	srdi	r6,r5,7
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci	std	r14,STK_REG(R14)(r1)
57962306a36Sopenharmony_ci	std	r15,STK_REG(R15)(r1)
58062306a36Sopenharmony_ci	std	r16,STK_REG(R16)(r1)
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_ci	li	r12,64
58362306a36Sopenharmony_ci	li	r14,80
58462306a36Sopenharmony_ci	li	r15,96
58562306a36Sopenharmony_ci	li	r16,112
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci	mtctr	r6
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci	/*
59062306a36Sopenharmony_ci	 * Now do cacheline sized loads and stores. By this stage the
59162306a36Sopenharmony_ci	 * cacheline stores are also cacheline aligned.
59262306a36Sopenharmony_ci	 */
59362306a36Sopenharmony_ci	.align	5
59462306a36Sopenharmony_ci8:
59562306a36Sopenharmony_cierr4;	lvx	v7,0,r4
59662306a36Sopenharmony_ci	VPERM(v8,v0,v7,v16)
59762306a36Sopenharmony_cierr4;	lvx	v6,r4,r9
59862306a36Sopenharmony_ci	VPERM(v9,v7,v6,v16)
59962306a36Sopenharmony_cierr4;	lvx	v5,r4,r10
60062306a36Sopenharmony_ci	VPERM(v10,v6,v5,v16)
60162306a36Sopenharmony_cierr4;	lvx	v4,r4,r11
60262306a36Sopenharmony_ci	VPERM(v11,v5,v4,v16)
60362306a36Sopenharmony_cierr4;	lvx	v3,r4,r12
60462306a36Sopenharmony_ci	VPERM(v12,v4,v3,v16)
60562306a36Sopenharmony_cierr4;	lvx	v2,r4,r14
60662306a36Sopenharmony_ci	VPERM(v13,v3,v2,v16)
60762306a36Sopenharmony_cierr4;	lvx	v1,r4,r15
60862306a36Sopenharmony_ci	VPERM(v14,v2,v1,v16)
60962306a36Sopenharmony_cierr4;	lvx	v0,r4,r16
61062306a36Sopenharmony_ci	VPERM(v15,v1,v0,v16)
61162306a36Sopenharmony_ci	addi	r4,r4,128
61262306a36Sopenharmony_cierr4;	stvx	v8,0,r3
61362306a36Sopenharmony_cierr4;	stvx	v9,r3,r9
61462306a36Sopenharmony_cierr4;	stvx	v10,r3,r10
61562306a36Sopenharmony_cierr4;	stvx	v11,r3,r11
61662306a36Sopenharmony_cierr4;	stvx	v12,r3,r12
61762306a36Sopenharmony_cierr4;	stvx	v13,r3,r14
61862306a36Sopenharmony_cierr4;	stvx	v14,r3,r15
61962306a36Sopenharmony_cierr4;	stvx	v15,r3,r16
62062306a36Sopenharmony_ci	addi	r3,r3,128
62162306a36Sopenharmony_ci	bdnz	8b
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci	ld	r14,STK_REG(R14)(r1)
62462306a36Sopenharmony_ci	ld	r15,STK_REG(R15)(r1)
62562306a36Sopenharmony_ci	ld	r16,STK_REG(R16)(r1)
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_ci	/* Up to 127B to go */
62862306a36Sopenharmony_ci	clrldi	r5,r5,(64-7)
62962306a36Sopenharmony_ci	srdi	r6,r5,4
63062306a36Sopenharmony_ci	mtocrf	0x01,r6
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_ci	bf	cr7*4+1,9f
63362306a36Sopenharmony_cierr3;	lvx	v3,0,r4
63462306a36Sopenharmony_ci	VPERM(v8,v0,v3,v16)
63562306a36Sopenharmony_cierr3;	lvx	v2,r4,r9
63662306a36Sopenharmony_ci	VPERM(v9,v3,v2,v16)
63762306a36Sopenharmony_cierr3;	lvx	v1,r4,r10
63862306a36Sopenharmony_ci	VPERM(v10,v2,v1,v16)
63962306a36Sopenharmony_cierr3;	lvx	v0,r4,r11
64062306a36Sopenharmony_ci	VPERM(v11,v1,v0,v16)
64162306a36Sopenharmony_ci	addi	r4,r4,64
64262306a36Sopenharmony_cierr3;	stvx	v8,0,r3
64362306a36Sopenharmony_cierr3;	stvx	v9,r3,r9
64462306a36Sopenharmony_cierr3;	stvx	v10,r3,r10
64562306a36Sopenharmony_cierr3;	stvx	v11,r3,r11
64662306a36Sopenharmony_ci	addi	r3,r3,64
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci9:	bf	cr7*4+2,10f
64962306a36Sopenharmony_cierr3;	lvx	v1,0,r4
65062306a36Sopenharmony_ci	VPERM(v8,v0,v1,v16)
65162306a36Sopenharmony_cierr3;	lvx	v0,r4,r9
65262306a36Sopenharmony_ci	VPERM(v9,v1,v0,v16)
65362306a36Sopenharmony_ci	addi	r4,r4,32
65462306a36Sopenharmony_cierr3;	stvx	v8,0,r3
65562306a36Sopenharmony_cierr3;	stvx	v9,r3,r9
65662306a36Sopenharmony_ci	addi	r3,r3,32
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ci10:	bf	cr7*4+3,11f
65962306a36Sopenharmony_cierr3;	lvx	v1,0,r4
66062306a36Sopenharmony_ci	VPERM(v8,v0,v1,v16)
66162306a36Sopenharmony_ci	addi	r4,r4,16
66262306a36Sopenharmony_cierr3;	stvx	v8,0,r3
66362306a36Sopenharmony_ci	addi	r3,r3,16
66462306a36Sopenharmony_ci
66562306a36Sopenharmony_ci	/* Up to 15B to go */
66662306a36Sopenharmony_ci11:	clrldi	r5,r5,(64-4)
66762306a36Sopenharmony_ci	addi	r4,r4,-16	/* Unwind the +16 load offset */
66862306a36Sopenharmony_ci	mtocrf	0x01,r5
66962306a36Sopenharmony_ci	bf	cr7*4+0,12f
67062306a36Sopenharmony_cierr3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
67162306a36Sopenharmony_cierr3;	lwz	r6,4(r4)
67262306a36Sopenharmony_ci	addi	r4,r4,8
67362306a36Sopenharmony_cierr3;	stw	r0,0(r3)
67462306a36Sopenharmony_cierr3;	stw	r6,4(r3)
67562306a36Sopenharmony_ci	addi	r3,r3,8
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_ci12:	bf	cr7*4+1,13f
67862306a36Sopenharmony_cierr3;	lwz	r0,0(r4)
67962306a36Sopenharmony_ci	addi	r4,r4,4
68062306a36Sopenharmony_cierr3;	stw	r0,0(r3)
68162306a36Sopenharmony_ci	addi	r3,r3,4
68262306a36Sopenharmony_ci
68362306a36Sopenharmony_ci13:	bf	cr7*4+2,14f
68462306a36Sopenharmony_cierr3;	lhz	r0,0(r4)
68562306a36Sopenharmony_ci	addi	r4,r4,2
68662306a36Sopenharmony_cierr3;	sth	r0,0(r3)
68762306a36Sopenharmony_ci	addi	r3,r3,2
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_ci14:	bf	cr7*4+3,15f
69062306a36Sopenharmony_cierr3;	lbz	r0,0(r4)
69162306a36Sopenharmony_cierr3;	stb	r0,0(r3)
69262306a36Sopenharmony_ci
69362306a36Sopenharmony_ci15:	addi	r1,r1,STACKFRAMESIZE
69462306a36Sopenharmony_ci	b	CFUNC(exit_vmx_usercopy)	/* tail call optimise */
69562306a36Sopenharmony_ci#endif /* CONFIG_ALTIVEC */
696