18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
28c2ecf20Sopenharmony_ci/* NG2memcpy.S: Niagara-2 optimized memcpy.
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
58c2ecf20Sopenharmony_ci */
68c2ecf20Sopenharmony_ci
78c2ecf20Sopenharmony_ci#ifdef __KERNEL__
88c2ecf20Sopenharmony_ci#include <linux/linkage.h>
98c2ecf20Sopenharmony_ci#include <asm/visasm.h>
108c2ecf20Sopenharmony_ci#include <asm/asi.h>
118c2ecf20Sopenharmony_ci#define GLOBAL_SPARE	%g7
128c2ecf20Sopenharmony_ci#else
138c2ecf20Sopenharmony_ci#define ASI_PNF 0x82
148c2ecf20Sopenharmony_ci#define ASI_BLK_P 0xf0
158c2ecf20Sopenharmony_ci#define ASI_BLK_INIT_QUAD_LDD_P 0xe2
168c2ecf20Sopenharmony_ci#define FPRS_FEF  0x04
178c2ecf20Sopenharmony_ci#ifdef MEMCPY_DEBUG
188c2ecf20Sopenharmony_ci#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \
198c2ecf20Sopenharmony_ci		     clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0;
208c2ecf20Sopenharmony_ci#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
218c2ecf20Sopenharmony_ci#else
228c2ecf20Sopenharmony_ci#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
238c2ecf20Sopenharmony_ci#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
248c2ecf20Sopenharmony_ci#endif
258c2ecf20Sopenharmony_ci#define GLOBAL_SPARE	%g5
268c2ecf20Sopenharmony_ci#endif
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci#ifndef STORE_ASI
298c2ecf20Sopenharmony_ci#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
308c2ecf20Sopenharmony_ci#define STORE_ASI	ASI_BLK_INIT_QUAD_LDD_P
318c2ecf20Sopenharmony_ci#else
328c2ecf20Sopenharmony_ci#define STORE_ASI	0x80		/* ASI_P */
338c2ecf20Sopenharmony_ci#endif
348c2ecf20Sopenharmony_ci#endif
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci#ifndef EX_LD
378c2ecf20Sopenharmony_ci#define EX_LD(x,y)	x
388c2ecf20Sopenharmony_ci#endif
398c2ecf20Sopenharmony_ci#ifndef EX_LD_FP
408c2ecf20Sopenharmony_ci#define EX_LD_FP(x,y)	x
418c2ecf20Sopenharmony_ci#endif
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci#ifndef EX_ST
448c2ecf20Sopenharmony_ci#define EX_ST(x,y)	x
458c2ecf20Sopenharmony_ci#endif
468c2ecf20Sopenharmony_ci#ifndef EX_ST_FP
478c2ecf20Sopenharmony_ci#define EX_ST_FP(x,y)	x
488c2ecf20Sopenharmony_ci#endif
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci#ifndef LOAD
518c2ecf20Sopenharmony_ci#define LOAD(type,addr,dest)	type [addr], dest
528c2ecf20Sopenharmony_ci#endif
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci#ifndef LOAD_BLK
558c2ecf20Sopenharmony_ci#define LOAD_BLK(addr,dest)	ldda [addr] ASI_BLK_P, dest
568c2ecf20Sopenharmony_ci#endif
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci#ifndef STORE
598c2ecf20Sopenharmony_ci#ifndef MEMCPY_DEBUG
608c2ecf20Sopenharmony_ci#define STORE(type,src,addr)	type src, [addr]
618c2ecf20Sopenharmony_ci#else
628c2ecf20Sopenharmony_ci#define STORE(type,src,addr)	type##a src, [addr] 0x80
638c2ecf20Sopenharmony_ci#endif
648c2ecf20Sopenharmony_ci#endif
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci#ifndef STORE_BLK
678c2ecf20Sopenharmony_ci#define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_P
688c2ecf20Sopenharmony_ci#endif
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci#ifndef STORE_INIT
718c2ecf20Sopenharmony_ci#define STORE_INIT(src,addr)	stxa src, [addr] STORE_ASI
728c2ecf20Sopenharmony_ci#endif
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci#ifndef FUNC_NAME
758c2ecf20Sopenharmony_ci#define FUNC_NAME	NG2memcpy
768c2ecf20Sopenharmony_ci#endif
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci#ifndef PREAMBLE
798c2ecf20Sopenharmony_ci#define PREAMBLE
808c2ecf20Sopenharmony_ci#endif
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci#ifndef XCC
838c2ecf20Sopenharmony_ci#define XCC xcc
848c2ecf20Sopenharmony_ci#endif
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci#define FREG_FROB(x0, x1, x2, x3, x4, x5, x6, x7, x8) \
878c2ecf20Sopenharmony_ci	faligndata	%x0, %x1, %f0; \
888c2ecf20Sopenharmony_ci	faligndata	%x1, %x2, %f2; \
898c2ecf20Sopenharmony_ci	faligndata	%x2, %x3, %f4; \
908c2ecf20Sopenharmony_ci	faligndata	%x3, %x4, %f6; \
918c2ecf20Sopenharmony_ci	faligndata	%x4, %x5, %f8; \
928c2ecf20Sopenharmony_ci	faligndata	%x5, %x6, %f10; \
938c2ecf20Sopenharmony_ci	faligndata	%x6, %x7, %f12; \
948c2ecf20Sopenharmony_ci	faligndata	%x7, %x8, %f14;
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci#define FREG_MOVE_1(x0) \
978c2ecf20Sopenharmony_ci	fsrc2		%x0, %f0;
988c2ecf20Sopenharmony_ci#define FREG_MOVE_2(x0, x1) \
998c2ecf20Sopenharmony_ci	fsrc2		%x0, %f0; \
1008c2ecf20Sopenharmony_ci	fsrc2		%x1, %f2;
1018c2ecf20Sopenharmony_ci#define FREG_MOVE_3(x0, x1, x2) \
1028c2ecf20Sopenharmony_ci	fsrc2		%x0, %f0; \
1038c2ecf20Sopenharmony_ci	fsrc2		%x1, %f2; \
1048c2ecf20Sopenharmony_ci	fsrc2		%x2, %f4;
1058c2ecf20Sopenharmony_ci#define FREG_MOVE_4(x0, x1, x2, x3) \
1068c2ecf20Sopenharmony_ci	fsrc2		%x0, %f0; \
1078c2ecf20Sopenharmony_ci	fsrc2		%x1, %f2; \
1088c2ecf20Sopenharmony_ci	fsrc2		%x2, %f4; \
1098c2ecf20Sopenharmony_ci	fsrc2		%x3, %f6;
1108c2ecf20Sopenharmony_ci#define FREG_MOVE_5(x0, x1, x2, x3, x4) \
1118c2ecf20Sopenharmony_ci	fsrc2		%x0, %f0; \
1128c2ecf20Sopenharmony_ci	fsrc2		%x1, %f2; \
1138c2ecf20Sopenharmony_ci	fsrc2		%x2, %f4; \
1148c2ecf20Sopenharmony_ci	fsrc2		%x3, %f6; \
1158c2ecf20Sopenharmony_ci	fsrc2		%x4, %f8;
1168c2ecf20Sopenharmony_ci#define FREG_MOVE_6(x0, x1, x2, x3, x4, x5) \
1178c2ecf20Sopenharmony_ci	fsrc2		%x0, %f0; \
1188c2ecf20Sopenharmony_ci	fsrc2		%x1, %f2; \
1198c2ecf20Sopenharmony_ci	fsrc2		%x2, %f4; \
1208c2ecf20Sopenharmony_ci	fsrc2		%x3, %f6; \
1218c2ecf20Sopenharmony_ci	fsrc2		%x4, %f8; \
1228c2ecf20Sopenharmony_ci	fsrc2		%x5, %f10;
1238c2ecf20Sopenharmony_ci#define FREG_MOVE_7(x0, x1, x2, x3, x4, x5, x6) \
1248c2ecf20Sopenharmony_ci	fsrc2		%x0, %f0; \
1258c2ecf20Sopenharmony_ci	fsrc2		%x1, %f2; \
1268c2ecf20Sopenharmony_ci	fsrc2		%x2, %f4; \
1278c2ecf20Sopenharmony_ci	fsrc2		%x3, %f6; \
1288c2ecf20Sopenharmony_ci	fsrc2		%x4, %f8; \
1298c2ecf20Sopenharmony_ci	fsrc2		%x5, %f10; \
1308c2ecf20Sopenharmony_ci	fsrc2		%x6, %f12;
1318c2ecf20Sopenharmony_ci#define FREG_MOVE_8(x0, x1, x2, x3, x4, x5, x6, x7) \
1328c2ecf20Sopenharmony_ci	fsrc2		%x0, %f0; \
1338c2ecf20Sopenharmony_ci	fsrc2		%x1, %f2; \
1348c2ecf20Sopenharmony_ci	fsrc2		%x2, %f4; \
1358c2ecf20Sopenharmony_ci	fsrc2		%x3, %f6; \
1368c2ecf20Sopenharmony_ci	fsrc2		%x4, %f8; \
1378c2ecf20Sopenharmony_ci	fsrc2		%x5, %f10; \
1388c2ecf20Sopenharmony_ci	fsrc2		%x6, %f12; \
1398c2ecf20Sopenharmony_ci	fsrc2		%x7, %f14;
1408c2ecf20Sopenharmony_ci#define FREG_LOAD_1(base, x0) \
1418c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
1428c2ecf20Sopenharmony_ci#define FREG_LOAD_2(base, x0, x1) \
1438c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
1448c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
1458c2ecf20Sopenharmony_ci#define FREG_LOAD_3(base, x0, x1, x2) \
1468c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
1478c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
1488c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
1498c2ecf20Sopenharmony_ci#define FREG_LOAD_4(base, x0, x1, x2, x3) \
1508c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
1518c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
1528c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
1538c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
1548c2ecf20Sopenharmony_ci#define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
1558c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
1568c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
1578c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
1588c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
1598c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
1608c2ecf20Sopenharmony_ci#define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
1618c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
1628c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
1638c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
1648c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
1658c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
1668c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
1678c2ecf20Sopenharmony_ci#define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
1688c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
1698c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
1708c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
1718c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
1728c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
1738c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
1748c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci	.register	%g2,#scratch
1778c2ecf20Sopenharmony_ci	.register	%g3,#scratch
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci	.text
1808c2ecf20Sopenharmony_ci#ifndef EX_RETVAL
1818c2ecf20Sopenharmony_ci#define EX_RETVAL(x)	x
1828c2ecf20Sopenharmony_ci__restore_fp:
1838c2ecf20Sopenharmony_ci	VISExitHalf
1848c2ecf20Sopenharmony_ci__restore_asi:
1858c2ecf20Sopenharmony_ci	retl
1868c2ecf20Sopenharmony_ci	 wr	%g0, ASI_AIUS, %asi
1878c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2)
1888c2ecf20Sopenharmony_ci	ba,pt	%xcc, __restore_asi
1898c2ecf20Sopenharmony_ci	 mov	%o2, %o0
1908c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2)
1918c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_1)
1928c2ecf20Sopenharmony_ci	ba,pt	%xcc, __restore_asi
1938c2ecf20Sopenharmony_ci	 add	%o2, 1, %o0
1948c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_1)
1958c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_4)
1968c2ecf20Sopenharmony_ci	ba,pt	%xcc, __restore_asi
1978c2ecf20Sopenharmony_ci	 add	%o2, 4, %o0
1988c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_4)
1998c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_8)
2008c2ecf20Sopenharmony_ci	ba,pt	%xcc, __restore_asi
2018c2ecf20Sopenharmony_ci	 add	%o2, 8, %o0
2028c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_8)
2038c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_o4_plus_1)
2048c2ecf20Sopenharmony_ci	add	%o4, 1, %o4
2058c2ecf20Sopenharmony_ci	ba,pt	%xcc, __restore_asi
2068c2ecf20Sopenharmony_ci	 add	%o2, %o4, %o0
2078c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_o4_plus_1)
2088c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_o4_plus_8)
2098c2ecf20Sopenharmony_ci	add	%o4, 8, %o4
2108c2ecf20Sopenharmony_ci	ba,pt	%xcc, __restore_asi
2118c2ecf20Sopenharmony_ci	 add	%o2, %o4, %o0
2128c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_o4_plus_8)
2138c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_o4_plus_16)
2148c2ecf20Sopenharmony_ci	add	%o4, 16, %o4
2158c2ecf20Sopenharmony_ci	ba,pt	%xcc, __restore_asi
2168c2ecf20Sopenharmony_ci	 add	%o2, %o4, %o0
2178c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_o4_plus_16)
2188c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_g1_fp)
2198c2ecf20Sopenharmony_ci	ba,pt	%xcc, __restore_fp
2208c2ecf20Sopenharmony_ci	 add	%o2, %g1, %o0
2218c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_g1_fp)
2228c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_g1_plus_64_fp)
2238c2ecf20Sopenharmony_ci	add	%g1, 64, %g1
2248c2ecf20Sopenharmony_ci	ba,pt	%xcc, __restore_fp
2258c2ecf20Sopenharmony_ci	 add	%o2, %g1, %o0
2268c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_g1_plus_64_fp)
2278c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_g1_plus_1)
2288c2ecf20Sopenharmony_ci	add	%g1, 1, %g1
2298c2ecf20Sopenharmony_ci	ba,pt	%xcc, __restore_asi
2308c2ecf20Sopenharmony_ci	 add	%o2, %g1, %o0
2318c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_g1_plus_1)
2328c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_and_7_plus_o4)
2338c2ecf20Sopenharmony_ci	and	%o2, 7, %o2
2348c2ecf20Sopenharmony_ci	ba,pt	%xcc, __restore_asi
2358c2ecf20Sopenharmony_ci	 add	%o2, %o4, %o0
2368c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_and_7_plus_o4)
2378c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_and_7_plus_o4_plus_8)
2388c2ecf20Sopenharmony_ci	and	%o2, 7, %o2
2398c2ecf20Sopenharmony_ci	add	%o4, 8, %o4
2408c2ecf20Sopenharmony_ci	ba,pt	%xcc, __restore_asi
2418c2ecf20Sopenharmony_ci	 add	%o2, %o4, %o0
2428c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8)
2438c2ecf20Sopenharmony_ci#endif
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_ci	.align		64
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_ci	.globl	FUNC_NAME
2488c2ecf20Sopenharmony_ci	.type	FUNC_NAME,#function
2498c2ecf20Sopenharmony_ciFUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
2508c2ecf20Sopenharmony_ci	srlx		%o2, 31, %g2
2518c2ecf20Sopenharmony_ci	cmp		%g2, 0
2528c2ecf20Sopenharmony_ci	tne		%xcc, 5
2538c2ecf20Sopenharmony_ci	PREAMBLE
2548c2ecf20Sopenharmony_ci	mov		%o0, %o3
2558c2ecf20Sopenharmony_ci	cmp		%o2, 0
2568c2ecf20Sopenharmony_ci	be,pn		%XCC, 85f
2578c2ecf20Sopenharmony_ci	 or		%o0, %o1, GLOBAL_SPARE
2588c2ecf20Sopenharmony_ci	cmp		%o2, 16
2598c2ecf20Sopenharmony_ci	blu,a,pn	%XCC, 80f
2608c2ecf20Sopenharmony_ci	 or		GLOBAL_SPARE, %o2, GLOBAL_SPARE
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci	/* 2 blocks (128 bytes) is the minimum we can do the block
2638c2ecf20Sopenharmony_ci	 * copy with.  We need to ensure that we'll iterate at least
2648c2ecf20Sopenharmony_ci	 * once in the block copy loop.  At worst we'll need to align
2658c2ecf20Sopenharmony_ci	 * the destination to a 64-byte boundary which can chew up
2668c2ecf20Sopenharmony_ci	 * to (64 - 1) bytes from the length before we perform the
2678c2ecf20Sopenharmony_ci	 * block copy loop.
2688c2ecf20Sopenharmony_ci	 *
2698c2ecf20Sopenharmony_ci	 * However, the cut-off point, performance wise, is around
2708c2ecf20Sopenharmony_ci	 * 4 64-byte blocks.
2718c2ecf20Sopenharmony_ci	 */
2728c2ecf20Sopenharmony_ci	cmp		%o2, (4 * 64)
2738c2ecf20Sopenharmony_ci	blu,pt		%XCC, 75f
2748c2ecf20Sopenharmony_ci	 andcc		GLOBAL_SPARE, 0x7, %g0
2758c2ecf20Sopenharmony_ci
2768c2ecf20Sopenharmony_ci	/* %o0:	dst
2778c2ecf20Sopenharmony_ci	 * %o1:	src
2788c2ecf20Sopenharmony_ci	 * %o2:	len  (known to be >= 128)
2798c2ecf20Sopenharmony_ci	 *
2808c2ecf20Sopenharmony_ci	 * The block copy loops can use %o4, %g2, %g3 as
2818c2ecf20Sopenharmony_ci	 * temporaries while copying the data.  %o5 must
2828c2ecf20Sopenharmony_ci	 * be preserved between VISEntryHalf and VISExitHalf
2838c2ecf20Sopenharmony_ci	 */
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_ci	LOAD(prefetch, %o1 + 0x000, #one_read)
2868c2ecf20Sopenharmony_ci	LOAD(prefetch, %o1 + 0x040, #one_read)
2878c2ecf20Sopenharmony_ci	LOAD(prefetch, %o1 + 0x080, #one_read)
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_ci	/* Align destination on 64-byte boundary.  */
2908c2ecf20Sopenharmony_ci	andcc		%o0, (64 - 1), %o4
2918c2ecf20Sopenharmony_ci	be,pt		%XCC, 2f
2928c2ecf20Sopenharmony_ci	 sub		%o4, 64, %o4
2938c2ecf20Sopenharmony_ci	sub		%g0, %o4, %o4	! bytes to align dst
2948c2ecf20Sopenharmony_ci	sub		%o2, %o4, %o2
2958c2ecf20Sopenharmony_ci1:	subcc		%o4, 1, %o4
2968c2ecf20Sopenharmony_ci	EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1)
2978c2ecf20Sopenharmony_ci	EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1)
2988c2ecf20Sopenharmony_ci	add		%o1, 1, %o1
2998c2ecf20Sopenharmony_ci	bne,pt		%XCC, 1b
3008c2ecf20Sopenharmony_ci	add		%o0, 1, %o0
3018c2ecf20Sopenharmony_ci
3028c2ecf20Sopenharmony_ci2:
3038c2ecf20Sopenharmony_ci	/* Clobbers o5/g1/g2/g3/g7/icc/xcc.  We must preserve
3048c2ecf20Sopenharmony_ci	 * o5 from here until we hit VISExitHalf.
3058c2ecf20Sopenharmony_ci	 */
3068c2ecf20Sopenharmony_ci	VISEntryHalf
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_ci	membar		#Sync
3098c2ecf20Sopenharmony_ci	alignaddr	%o1, %g0, %g0
3108c2ecf20Sopenharmony_ci
3118c2ecf20Sopenharmony_ci	add		%o1, (64 - 1), %o4
3128c2ecf20Sopenharmony_ci	andn		%o4, (64 - 1), %o4
3138c2ecf20Sopenharmony_ci	andn		%o2, (64 - 1), %g1
3148c2ecf20Sopenharmony_ci	sub		%o2, %g1, %o2
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci	and		%o1, (64 - 1), %g2
3178c2ecf20Sopenharmony_ci	add		%o1, %g1, %o1
3188c2ecf20Sopenharmony_ci	sub		%o0, %o4, %g3
3198c2ecf20Sopenharmony_ci	brz,pt		%g2, 190f
3208c2ecf20Sopenharmony_ci	 cmp		%g2, 32
3218c2ecf20Sopenharmony_ci	blu,a		5f
3228c2ecf20Sopenharmony_ci	 cmp		%g2, 16
3238c2ecf20Sopenharmony_ci	cmp		%g2, 48
3248c2ecf20Sopenharmony_ci	blu,a		4f
3258c2ecf20Sopenharmony_ci	 cmp		%g2, 40
3268c2ecf20Sopenharmony_ci	cmp		%g2, 56
3278c2ecf20Sopenharmony_ci	blu		170f
3288c2ecf20Sopenharmony_ci	 nop
3298c2ecf20Sopenharmony_ci	ba,a,pt		%xcc, 180f
3308c2ecf20Sopenharmony_ci	 nop
3318c2ecf20Sopenharmony_ci
3328c2ecf20Sopenharmony_ci4:	/* 32 <= low bits < 48 */
3338c2ecf20Sopenharmony_ci	blu		150f
3348c2ecf20Sopenharmony_ci	 nop
3358c2ecf20Sopenharmony_ci	ba,a,pt		%xcc, 160f
3368c2ecf20Sopenharmony_ci	 nop
3378c2ecf20Sopenharmony_ci5:	/* 0 < low bits < 32 */
3388c2ecf20Sopenharmony_ci	blu,a		6f
3398c2ecf20Sopenharmony_ci	 cmp		%g2, 8
3408c2ecf20Sopenharmony_ci	cmp		%g2, 24
3418c2ecf20Sopenharmony_ci	blu		130f
3428c2ecf20Sopenharmony_ci	 nop
3438c2ecf20Sopenharmony_ci	ba,a,pt		%xcc, 140f
3448c2ecf20Sopenharmony_ci	 nop
3458c2ecf20Sopenharmony_ci6:	/* 0 < low bits < 16 */
3468c2ecf20Sopenharmony_ci	bgeu		120f
3478c2ecf20Sopenharmony_ci	 nop
3488c2ecf20Sopenharmony_ci	/* fall through for 0 < low bits < 8 */
3498c2ecf20Sopenharmony_ci110:	sub		%o4, 64, %g2
3508c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1)
3518c2ecf20Sopenharmony_ci1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
3528c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
3538c2ecf20Sopenharmony_ci	FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
3548c2ecf20Sopenharmony_ci	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
3558c2ecf20Sopenharmony_ci	FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
3568c2ecf20Sopenharmony_ci	subcc		%g1, 64, %g1
3578c2ecf20Sopenharmony_ci	add		%o4, 64, %o4
3588c2ecf20Sopenharmony_ci	bne,pt		%xcc, 1b
3598c2ecf20Sopenharmony_ci	 LOAD(prefetch, %o4 + 64, #one_read)
3608c2ecf20Sopenharmony_ci	ba,pt		%xcc, 195f
3618c2ecf20Sopenharmony_ci	 nop
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_ci120:	sub		%o4, 56, %g2
3648c2ecf20Sopenharmony_ci	FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
3658c2ecf20Sopenharmony_ci1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
3668c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
3678c2ecf20Sopenharmony_ci	FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
3688c2ecf20Sopenharmony_ci	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
3698c2ecf20Sopenharmony_ci	FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
3708c2ecf20Sopenharmony_ci	subcc		%g1, 64, %g1
3718c2ecf20Sopenharmony_ci	add		%o4, 64, %o4
3728c2ecf20Sopenharmony_ci	bne,pt		%xcc, 1b
3738c2ecf20Sopenharmony_ci	 LOAD(prefetch, %o4 + 64, #one_read)
3748c2ecf20Sopenharmony_ci	ba,pt		%xcc, 195f
3758c2ecf20Sopenharmony_ci	 nop
3768c2ecf20Sopenharmony_ci
3778c2ecf20Sopenharmony_ci130:	sub		%o4, 48, %g2
3788c2ecf20Sopenharmony_ci	FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
3798c2ecf20Sopenharmony_ci1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
3808c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
3818c2ecf20Sopenharmony_ci	FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
3828c2ecf20Sopenharmony_ci	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
3838c2ecf20Sopenharmony_ci	FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
3848c2ecf20Sopenharmony_ci	subcc		%g1, 64, %g1
3858c2ecf20Sopenharmony_ci	add		%o4, 64, %o4
3868c2ecf20Sopenharmony_ci	bne,pt		%xcc, 1b
3878c2ecf20Sopenharmony_ci	 LOAD(prefetch, %o4 + 64, #one_read)
3888c2ecf20Sopenharmony_ci	ba,pt		%xcc, 195f
3898c2ecf20Sopenharmony_ci	 nop
3908c2ecf20Sopenharmony_ci
3918c2ecf20Sopenharmony_ci140:	sub		%o4, 40, %g2
3928c2ecf20Sopenharmony_ci	FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
3938c2ecf20Sopenharmony_ci1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
3948c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
3958c2ecf20Sopenharmony_ci	FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
3968c2ecf20Sopenharmony_ci	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
3978c2ecf20Sopenharmony_ci	FREG_MOVE_5(f22, f24, f26, f28, f30)
3988c2ecf20Sopenharmony_ci	subcc		%g1, 64, %g1
3998c2ecf20Sopenharmony_ci	add		%o4, 64, %o4
4008c2ecf20Sopenharmony_ci	bne,pt		%xcc, 1b
4018c2ecf20Sopenharmony_ci	 LOAD(prefetch, %o4 + 64, #one_read)
4028c2ecf20Sopenharmony_ci	ba,pt		%xcc, 195f
4038c2ecf20Sopenharmony_ci	 nop
4048c2ecf20Sopenharmony_ci
4058c2ecf20Sopenharmony_ci150:	sub		%o4, 32, %g2
4068c2ecf20Sopenharmony_ci	FREG_LOAD_4(%g2, f0, f2, f4, f6)
4078c2ecf20Sopenharmony_ci1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
4088c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
4098c2ecf20Sopenharmony_ci	FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
4108c2ecf20Sopenharmony_ci	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
4118c2ecf20Sopenharmony_ci	FREG_MOVE_4(f24, f26, f28, f30)
4128c2ecf20Sopenharmony_ci	subcc		%g1, 64, %g1
4138c2ecf20Sopenharmony_ci	add		%o4, 64, %o4
4148c2ecf20Sopenharmony_ci	bne,pt		%xcc, 1b
4158c2ecf20Sopenharmony_ci	 LOAD(prefetch, %o4 + 64, #one_read)
4168c2ecf20Sopenharmony_ci	ba,pt		%xcc, 195f
4178c2ecf20Sopenharmony_ci	 nop
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_ci160:	sub		%o4, 24, %g2
4208c2ecf20Sopenharmony_ci	FREG_LOAD_3(%g2, f0, f2, f4)
4218c2ecf20Sopenharmony_ci1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
4228c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
4238c2ecf20Sopenharmony_ci	FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
4248c2ecf20Sopenharmony_ci	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
4258c2ecf20Sopenharmony_ci	FREG_MOVE_3(f26, f28, f30)
4268c2ecf20Sopenharmony_ci	subcc		%g1, 64, %g1
4278c2ecf20Sopenharmony_ci	add		%o4, 64, %o4
4288c2ecf20Sopenharmony_ci	bne,pt		%xcc, 1b
4298c2ecf20Sopenharmony_ci	 LOAD(prefetch, %o4 + 64, #one_read)
4308c2ecf20Sopenharmony_ci	ba,pt		%xcc, 195f
4318c2ecf20Sopenharmony_ci	 nop
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci170:	sub		%o4, 16, %g2
4348c2ecf20Sopenharmony_ci	FREG_LOAD_2(%g2, f0, f2)
4358c2ecf20Sopenharmony_ci1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
4368c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
4378c2ecf20Sopenharmony_ci	FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
4388c2ecf20Sopenharmony_ci	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
4398c2ecf20Sopenharmony_ci	FREG_MOVE_2(f28, f30)
4408c2ecf20Sopenharmony_ci	subcc		%g1, 64, %g1
4418c2ecf20Sopenharmony_ci	add		%o4, 64, %o4
4428c2ecf20Sopenharmony_ci	bne,pt		%xcc, 1b
4438c2ecf20Sopenharmony_ci	 LOAD(prefetch, %o4 + 64, #one_read)
4448c2ecf20Sopenharmony_ci	ba,pt		%xcc, 195f
4458c2ecf20Sopenharmony_ci	 nop
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_ci180:	sub		%o4, 8, %g2
4488c2ecf20Sopenharmony_ci	FREG_LOAD_1(%g2, f0)
4498c2ecf20Sopenharmony_ci1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
4508c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
4518c2ecf20Sopenharmony_ci	FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
4528c2ecf20Sopenharmony_ci	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
4538c2ecf20Sopenharmony_ci	FREG_MOVE_1(f30)
4548c2ecf20Sopenharmony_ci	subcc		%g1, 64, %g1
4558c2ecf20Sopenharmony_ci	add		%o4, 64, %o4
4568c2ecf20Sopenharmony_ci	bne,pt		%xcc, 1b
4578c2ecf20Sopenharmony_ci	 LOAD(prefetch, %o4 + 64, #one_read)
4588c2ecf20Sopenharmony_ci	ba,pt		%xcc, 195f
4598c2ecf20Sopenharmony_ci	 nop
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_ci190:
4628c2ecf20Sopenharmony_ci1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
4638c2ecf20Sopenharmony_ci	subcc		%g1, 64, %g1
4648c2ecf20Sopenharmony_ci	EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64)
4658c2ecf20Sopenharmony_ci	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64)
4668c2ecf20Sopenharmony_ci	add		%o4, 64, %o4
4678c2ecf20Sopenharmony_ci	bne,pt		%xcc, 1b
4688c2ecf20Sopenharmony_ci	 LOAD(prefetch, %o4 + 64, #one_read)
4698c2ecf20Sopenharmony_ci
4708c2ecf20Sopenharmony_ci195:
4718c2ecf20Sopenharmony_ci	add		%o4, %g3, %o0
4728c2ecf20Sopenharmony_ci	membar		#Sync
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	VISExitHalf
4758c2ecf20Sopenharmony_ci
4768c2ecf20Sopenharmony_ci	/* %o2 contains any final bytes still needed to be copied
4778c2ecf20Sopenharmony_ci	 * over. If anything is left, we copy it one byte at a time.
4788c2ecf20Sopenharmony_ci	 */
4798c2ecf20Sopenharmony_ci	brz,pt		%o2, 85f
4808c2ecf20Sopenharmony_ci	 sub		%o0, %o1, GLOBAL_SPARE
4818c2ecf20Sopenharmony_ci	ba,a,pt		%XCC, 90f
4828c2ecf20Sopenharmony_ci	 nop
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ci	.align		64
4858c2ecf20Sopenharmony_ci75: /* 16 < len <= 64 */
4868c2ecf20Sopenharmony_ci	bne,pn		%XCC, 75f
4878c2ecf20Sopenharmony_ci	 sub		%o0, %o1, GLOBAL_SPARE
4888c2ecf20Sopenharmony_ci
4898c2ecf20Sopenharmony_ci72:
4908c2ecf20Sopenharmony_ci	andn		%o2, 0xf, %o4
4918c2ecf20Sopenharmony_ci	and		%o2, 0xf, %o2
4928c2ecf20Sopenharmony_ci1:	subcc		%o4, 0x10, %o4
4938c2ecf20Sopenharmony_ci	EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16)
4948c2ecf20Sopenharmony_ci	add		%o1, 0x08, %o1
4958c2ecf20Sopenharmony_ci	EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16)
4968c2ecf20Sopenharmony_ci	sub		%o1, 0x08, %o1
4978c2ecf20Sopenharmony_ci	EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16)
4988c2ecf20Sopenharmony_ci	add		%o1, 0x8, %o1
4998c2ecf20Sopenharmony_ci	EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8)
5008c2ecf20Sopenharmony_ci	bgu,pt		%XCC, 1b
5018c2ecf20Sopenharmony_ci	 add		%o1, 0x8, %o1
5028c2ecf20Sopenharmony_ci73:	andcc		%o2, 0x8, %g0
5038c2ecf20Sopenharmony_ci	be,pt		%XCC, 1f
5048c2ecf20Sopenharmony_ci	 nop
5058c2ecf20Sopenharmony_ci	sub		%o2, 0x8, %o2
5068c2ecf20Sopenharmony_ci	EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8)
5078c2ecf20Sopenharmony_ci	EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8)
5088c2ecf20Sopenharmony_ci	add		%o1, 0x8, %o1
5098c2ecf20Sopenharmony_ci1:	andcc		%o2, 0x4, %g0
5108c2ecf20Sopenharmony_ci	be,pt		%XCC, 1f
5118c2ecf20Sopenharmony_ci	 nop
5128c2ecf20Sopenharmony_ci	sub		%o2, 0x4, %o2
5138c2ecf20Sopenharmony_ci	EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4)
5148c2ecf20Sopenharmony_ci	EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
5158c2ecf20Sopenharmony_ci	add		%o1, 0x4, %o1
5168c2ecf20Sopenharmony_ci1:	cmp		%o2, 0
5178c2ecf20Sopenharmony_ci	be,pt		%XCC, 85f
5188c2ecf20Sopenharmony_ci	 nop
5198c2ecf20Sopenharmony_ci	ba,pt		%xcc, 90f
5208c2ecf20Sopenharmony_ci	 nop
5218c2ecf20Sopenharmony_ci
5228c2ecf20Sopenharmony_ci75:
5238c2ecf20Sopenharmony_ci	andcc		%o0, 0x7, %g1
5248c2ecf20Sopenharmony_ci	sub		%g1, 0x8, %g1
5258c2ecf20Sopenharmony_ci	be,pn		%icc, 2f
5268c2ecf20Sopenharmony_ci	 sub		%g0, %g1, %g1
5278c2ecf20Sopenharmony_ci	sub		%o2, %g1, %o2
5288c2ecf20Sopenharmony_ci
5298c2ecf20Sopenharmony_ci1:	subcc		%g1, 1, %g1
5308c2ecf20Sopenharmony_ci	EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1)
5318c2ecf20Sopenharmony_ci	EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1)
5328c2ecf20Sopenharmony_ci	bgu,pt		%icc, 1b
5338c2ecf20Sopenharmony_ci	 add		%o1, 1, %o1
5348c2ecf20Sopenharmony_ci
5358c2ecf20Sopenharmony_ci2:	add		%o1, GLOBAL_SPARE, %o0
5368c2ecf20Sopenharmony_ci	andcc		%o1, 0x7, %g1
5378c2ecf20Sopenharmony_ci	bne,pt		%icc, 8f
5388c2ecf20Sopenharmony_ci	 sll		%g1, 3, %g1
5398c2ecf20Sopenharmony_ci
5408c2ecf20Sopenharmony_ci	cmp		%o2, 16
5418c2ecf20Sopenharmony_ci	bgeu,pt		%icc, 72b
5428c2ecf20Sopenharmony_ci	 nop
5438c2ecf20Sopenharmony_ci	ba,a,pt		%xcc, 73b
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_ci8:	mov		64, GLOBAL_SPARE
5468c2ecf20Sopenharmony_ci	andn		%o1, 0x7, %o1
5478c2ecf20Sopenharmony_ci	EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2)
5488c2ecf20Sopenharmony_ci	sub		GLOBAL_SPARE, %g1, GLOBAL_SPARE
5498c2ecf20Sopenharmony_ci	andn		%o2, 0x7, %o4
5508c2ecf20Sopenharmony_ci	sllx		%g2, %g1, %g2
5518c2ecf20Sopenharmony_ci1:	add		%o1, 0x8, %o1
5528c2ecf20Sopenharmony_ci	EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4)
5538c2ecf20Sopenharmony_ci	subcc		%o4, 0x8, %o4
5548c2ecf20Sopenharmony_ci	srlx		%g3, GLOBAL_SPARE, %o5
5558c2ecf20Sopenharmony_ci	or		%o5, %g2, %o5
5568c2ecf20Sopenharmony_ci	EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8)
5578c2ecf20Sopenharmony_ci	add		%o0, 0x8, %o0
5588c2ecf20Sopenharmony_ci	bgu,pt		%icc, 1b
5598c2ecf20Sopenharmony_ci	 sllx		%g3, %g1, %g2
5608c2ecf20Sopenharmony_ci
5618c2ecf20Sopenharmony_ci	srl		%g1, 3, %g1
5628c2ecf20Sopenharmony_ci	andcc		%o2, 0x7, %o2
5638c2ecf20Sopenharmony_ci	be,pn		%icc, 85f
5648c2ecf20Sopenharmony_ci	 add		%o1, %g1, %o1
5658c2ecf20Sopenharmony_ci	ba,pt		%xcc, 90f
5668c2ecf20Sopenharmony_ci	 sub		%o0, %o1, GLOBAL_SPARE
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_ci	.align		64
5698c2ecf20Sopenharmony_ci80: /* 0 < len <= 16 */
5708c2ecf20Sopenharmony_ci	andcc		GLOBAL_SPARE, 0x3, %g0
5718c2ecf20Sopenharmony_ci	bne,pn		%XCC, 90f
5728c2ecf20Sopenharmony_ci	 sub		%o0, %o1, GLOBAL_SPARE
5738c2ecf20Sopenharmony_ci
5748c2ecf20Sopenharmony_ci1:
5758c2ecf20Sopenharmony_ci	subcc		%o2, 4, %o2
5768c2ecf20Sopenharmony_ci	EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4)
5778c2ecf20Sopenharmony_ci	EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
5788c2ecf20Sopenharmony_ci	bgu,pt		%XCC, 1b
5798c2ecf20Sopenharmony_ci	 add		%o1, 4, %o1
5808c2ecf20Sopenharmony_ci
5818c2ecf20Sopenharmony_ci85:	retl
5828c2ecf20Sopenharmony_ci	 mov		EX_RETVAL(%o3), %o0
5838c2ecf20Sopenharmony_ci
5848c2ecf20Sopenharmony_ci	.align		32
5858c2ecf20Sopenharmony_ci90:
5868c2ecf20Sopenharmony_ci	subcc		%o2, 1, %o2
5878c2ecf20Sopenharmony_ci	EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1)
5888c2ecf20Sopenharmony_ci	EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1)
5898c2ecf20Sopenharmony_ci	bgu,pt		%XCC, 90b
5908c2ecf20Sopenharmony_ci	 add		%o1, 1, %o1
5918c2ecf20Sopenharmony_ci	retl
5928c2ecf20Sopenharmony_ci	 mov		EX_RETVAL(%o3), %o0
5938c2ecf20Sopenharmony_ci
5948c2ecf20Sopenharmony_ci	.size		FUNC_NAME, .-FUNC_NAME
595