18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci/* NG2memcpy.S: Niagara-2 optimized memcpy. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Copyright (C) 2007 David S. Miller (davem@davemloft.net) 58c2ecf20Sopenharmony_ci */ 68c2ecf20Sopenharmony_ci 78c2ecf20Sopenharmony_ci#ifdef __KERNEL__ 88c2ecf20Sopenharmony_ci#include <linux/linkage.h> 98c2ecf20Sopenharmony_ci#include <asm/visasm.h> 108c2ecf20Sopenharmony_ci#include <asm/asi.h> 118c2ecf20Sopenharmony_ci#define GLOBAL_SPARE %g7 128c2ecf20Sopenharmony_ci#else 138c2ecf20Sopenharmony_ci#define ASI_PNF 0x82 148c2ecf20Sopenharmony_ci#define ASI_BLK_P 0xf0 158c2ecf20Sopenharmony_ci#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 168c2ecf20Sopenharmony_ci#define FPRS_FEF 0x04 178c2ecf20Sopenharmony_ci#ifdef MEMCPY_DEBUG 188c2ecf20Sopenharmony_ci#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ 198c2ecf20Sopenharmony_ci clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; 208c2ecf20Sopenharmony_ci#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs 218c2ecf20Sopenharmony_ci#else 228c2ecf20Sopenharmony_ci#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs 238c2ecf20Sopenharmony_ci#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs 248c2ecf20Sopenharmony_ci#endif 258c2ecf20Sopenharmony_ci#define GLOBAL_SPARE %g5 268c2ecf20Sopenharmony_ci#endif 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci#ifndef STORE_ASI 298c2ecf20Sopenharmony_ci#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA 308c2ecf20Sopenharmony_ci#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P 318c2ecf20Sopenharmony_ci#else 328c2ecf20Sopenharmony_ci#define STORE_ASI 0x80 /* ASI_P */ 338c2ecf20Sopenharmony_ci#endif 348c2ecf20Sopenharmony_ci#endif 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci#ifndef EX_LD 378c2ecf20Sopenharmony_ci#define EX_LD(x,y) x 388c2ecf20Sopenharmony_ci#endif 398c2ecf20Sopenharmony_ci#ifndef EX_LD_FP 408c2ecf20Sopenharmony_ci#define EX_LD_FP(x,y) x 418c2ecf20Sopenharmony_ci#endif 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci#ifndef EX_ST 448c2ecf20Sopenharmony_ci#define EX_ST(x,y) x 458c2ecf20Sopenharmony_ci#endif 468c2ecf20Sopenharmony_ci#ifndef EX_ST_FP 478c2ecf20Sopenharmony_ci#define EX_ST_FP(x,y) x 488c2ecf20Sopenharmony_ci#endif 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci#ifndef LOAD 518c2ecf20Sopenharmony_ci#define LOAD(type,addr,dest) type [addr], dest 528c2ecf20Sopenharmony_ci#endif 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci#ifndef LOAD_BLK 558c2ecf20Sopenharmony_ci#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_P, dest 568c2ecf20Sopenharmony_ci#endif 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci#ifndef STORE 598c2ecf20Sopenharmony_ci#ifndef MEMCPY_DEBUG 608c2ecf20Sopenharmony_ci#define STORE(type,src,addr) type src, [addr] 618c2ecf20Sopenharmony_ci#else 628c2ecf20Sopenharmony_ci#define STORE(type,src,addr) type##a src, [addr] 0x80 638c2ecf20Sopenharmony_ci#endif 648c2ecf20Sopenharmony_ci#endif 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci#ifndef STORE_BLK 678c2ecf20Sopenharmony_ci#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_P 688c2ecf20Sopenharmony_ci#endif 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci#ifndef STORE_INIT 718c2ecf20Sopenharmony_ci#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI 728c2ecf20Sopenharmony_ci#endif 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci#ifndef FUNC_NAME 758c2ecf20Sopenharmony_ci#define FUNC_NAME NG2memcpy 768c2ecf20Sopenharmony_ci#endif 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci#ifndef PREAMBLE 798c2ecf20Sopenharmony_ci#define PREAMBLE 808c2ecf20Sopenharmony_ci#endif 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci#ifndef XCC 838c2ecf20Sopenharmony_ci#define XCC xcc 848c2ecf20Sopenharmony_ci#endif 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci#define FREG_FROB(x0, x1, x2, x3, x4, x5, x6, x7, x8) \ 878c2ecf20Sopenharmony_ci faligndata %x0, %x1, %f0; \ 888c2ecf20Sopenharmony_ci faligndata %x1, %x2, %f2; \ 898c2ecf20Sopenharmony_ci faligndata %x2, %x3, %f4; \ 908c2ecf20Sopenharmony_ci faligndata %x3, %x4, %f6; \ 918c2ecf20Sopenharmony_ci faligndata %x4, %x5, %f8; \ 928c2ecf20Sopenharmony_ci faligndata %x5, %x6, %f10; \ 938c2ecf20Sopenharmony_ci faligndata %x6, %x7, %f12; \ 948c2ecf20Sopenharmony_ci faligndata %x7, %x8, %f14; 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci#define FREG_MOVE_1(x0) \ 978c2ecf20Sopenharmony_ci fsrc2 %x0, %f0; 988c2ecf20Sopenharmony_ci#define FREG_MOVE_2(x0, x1) \ 998c2ecf20Sopenharmony_ci fsrc2 %x0, %f0; \ 1008c2ecf20Sopenharmony_ci fsrc2 %x1, %f2; 1018c2ecf20Sopenharmony_ci#define FREG_MOVE_3(x0, x1, x2) \ 1028c2ecf20Sopenharmony_ci fsrc2 %x0, %f0; \ 1038c2ecf20Sopenharmony_ci fsrc2 %x1, %f2; \ 1048c2ecf20Sopenharmony_ci fsrc2 %x2, %f4; 1058c2ecf20Sopenharmony_ci#define FREG_MOVE_4(x0, x1, x2, x3) \ 1068c2ecf20Sopenharmony_ci fsrc2 %x0, %f0; \ 1078c2ecf20Sopenharmony_ci fsrc2 %x1, %f2; \ 1088c2ecf20Sopenharmony_ci fsrc2 %x2, %f4; \ 1098c2ecf20Sopenharmony_ci fsrc2 %x3, %f6; 1108c2ecf20Sopenharmony_ci#define FREG_MOVE_5(x0, x1, x2, x3, x4) \ 1118c2ecf20Sopenharmony_ci fsrc2 %x0, %f0; \ 1128c2ecf20Sopenharmony_ci fsrc2 %x1, %f2; \ 1138c2ecf20Sopenharmony_ci fsrc2 %x2, %f4; \ 1148c2ecf20Sopenharmony_ci fsrc2 %x3, %f6; \ 1158c2ecf20Sopenharmony_ci fsrc2 %x4, %f8; 1168c2ecf20Sopenharmony_ci#define FREG_MOVE_6(x0, x1, x2, x3, x4, x5) \ 1178c2ecf20Sopenharmony_ci fsrc2 %x0, %f0; \ 1188c2ecf20Sopenharmony_ci fsrc2 %x1, %f2; \ 1198c2ecf20Sopenharmony_ci fsrc2 %x2, %f4; \ 1208c2ecf20Sopenharmony_ci fsrc2 %x3, %f6; \ 1218c2ecf20Sopenharmony_ci fsrc2 %x4, %f8; \ 1228c2ecf20Sopenharmony_ci fsrc2 %x5, %f10; 1238c2ecf20Sopenharmony_ci#define FREG_MOVE_7(x0, x1, x2, x3, x4, x5, x6) \ 1248c2ecf20Sopenharmony_ci fsrc2 %x0, %f0; \ 1258c2ecf20Sopenharmony_ci fsrc2 %x1, %f2; \ 1268c2ecf20Sopenharmony_ci fsrc2 %x2, %f4; \ 1278c2ecf20Sopenharmony_ci fsrc2 %x3, %f6; \ 1288c2ecf20Sopenharmony_ci fsrc2 %x4, %f8; \ 1298c2ecf20Sopenharmony_ci fsrc2 %x5, %f10; \ 1308c2ecf20Sopenharmony_ci fsrc2 %x6, %f12; 1318c2ecf20Sopenharmony_ci#define FREG_MOVE_8(x0, x1, x2, x3, x4, x5, x6, x7) \ 1328c2ecf20Sopenharmony_ci fsrc2 %x0, %f0; \ 1338c2ecf20Sopenharmony_ci fsrc2 %x1, %f2; \ 1348c2ecf20Sopenharmony_ci fsrc2 %x2, %f4; \ 1358c2ecf20Sopenharmony_ci fsrc2 %x3, %f6; \ 1368c2ecf20Sopenharmony_ci fsrc2 %x4, %f8; \ 1378c2ecf20Sopenharmony_ci fsrc2 %x5, %f10; \ 1388c2ecf20Sopenharmony_ci fsrc2 %x6, %f12; \ 1398c2ecf20Sopenharmony_ci fsrc2 %x7, %f14; 1408c2ecf20Sopenharmony_ci#define FREG_LOAD_1(base, x0) \ 1418c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1) 1428c2ecf20Sopenharmony_ci#define FREG_LOAD_2(base, x0, x1) \ 1438c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ 1448c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); 1458c2ecf20Sopenharmony_ci#define FREG_LOAD_3(base, x0, x1, x2) \ 1468c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ 1478c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ 1488c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); 1498c2ecf20Sopenharmony_ci#define FREG_LOAD_4(base, x0, x1, x2, x3) \ 1508c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ 1518c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ 1528c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ 1538c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); 1548c2ecf20Sopenharmony_ci#define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ 1558c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ 1568c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ 1578c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ 1588c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ 1598c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); 1608c2ecf20Sopenharmony_ci#define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ 1618c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ 1628c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ 1638c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ 1648c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ 1658c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ 1668c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); 1678c2ecf20Sopenharmony_ci#define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ 1688c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ 1698c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ 1708c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ 1718c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ 1728c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ 1738c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \ 1748c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1); 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci .register %g2,#scratch 1778c2ecf20Sopenharmony_ci .register %g3,#scratch 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci .text 1808c2ecf20Sopenharmony_ci#ifndef EX_RETVAL 1818c2ecf20Sopenharmony_ci#define EX_RETVAL(x) x 1828c2ecf20Sopenharmony_ci__restore_fp: 1838c2ecf20Sopenharmony_ci VISExitHalf 1848c2ecf20Sopenharmony_ci__restore_asi: 1858c2ecf20Sopenharmony_ci retl 1868c2ecf20Sopenharmony_ci wr %g0, ASI_AIUS, %asi 1878c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2) 1888c2ecf20Sopenharmony_ci ba,pt %xcc, __restore_asi 1898c2ecf20Sopenharmony_ci mov %o2, %o0 1908c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2) 1918c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_1) 1928c2ecf20Sopenharmony_ci ba,pt %xcc, __restore_asi 1938c2ecf20Sopenharmony_ci add %o2, 1, %o0 1948c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_1) 1958c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_4) 1968c2ecf20Sopenharmony_ci ba,pt %xcc, __restore_asi 1978c2ecf20Sopenharmony_ci add %o2, 4, %o0 1988c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_4) 1998c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_8) 2008c2ecf20Sopenharmony_ci ba,pt %xcc, __restore_asi 2018c2ecf20Sopenharmony_ci add %o2, 8, %o0 2028c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_8) 2038c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_o4_plus_1) 2048c2ecf20Sopenharmony_ci add %o4, 1, %o4 2058c2ecf20Sopenharmony_ci ba,pt %xcc, __restore_asi 2068c2ecf20Sopenharmony_ci add %o2, %o4, %o0 2078c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_o4_plus_1) 2088c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_o4_plus_8) 2098c2ecf20Sopenharmony_ci add %o4, 8, %o4 2108c2ecf20Sopenharmony_ci ba,pt %xcc, __restore_asi 2118c2ecf20Sopenharmony_ci add %o2, %o4, %o0 2128c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_o4_plus_8) 2138c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_o4_plus_16) 2148c2ecf20Sopenharmony_ci add %o4, 16, %o4 2158c2ecf20Sopenharmony_ci ba,pt %xcc, __restore_asi 2168c2ecf20Sopenharmony_ci add %o2, %o4, %o0 2178c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_o4_plus_16) 2188c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_g1_fp) 2198c2ecf20Sopenharmony_ci ba,pt %xcc, __restore_fp 2208c2ecf20Sopenharmony_ci add %o2, %g1, %o0 2218c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_g1_fp) 2228c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_g1_plus_64_fp) 2238c2ecf20Sopenharmony_ci add %g1, 64, %g1 2248c2ecf20Sopenharmony_ci ba,pt %xcc, __restore_fp 2258c2ecf20Sopenharmony_ci add %o2, %g1, %o0 2268c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_g1_plus_64_fp) 2278c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_plus_g1_plus_1) 2288c2ecf20Sopenharmony_ci add %g1, 1, %g1 2298c2ecf20Sopenharmony_ci ba,pt %xcc, __restore_asi 2308c2ecf20Sopenharmony_ci add %o2, %g1, %o0 2318c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_plus_g1_plus_1) 2328c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_and_7_plus_o4) 2338c2ecf20Sopenharmony_ci and %o2, 7, %o2 2348c2ecf20Sopenharmony_ci ba,pt %xcc, __restore_asi 2358c2ecf20Sopenharmony_ci add %o2, %o4, %o0 2368c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_and_7_plus_o4) 2378c2ecf20Sopenharmony_ciENTRY(NG2_retl_o2_and_7_plus_o4_plus_8) 2388c2ecf20Sopenharmony_ci and %o2, 7, %o2 2398c2ecf20Sopenharmony_ci add %o4, 8, %o4 2408c2ecf20Sopenharmony_ci ba,pt %xcc, __restore_asi 2418c2ecf20Sopenharmony_ci add %o2, %o4, %o0 2428c2ecf20Sopenharmony_ciENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8) 2438c2ecf20Sopenharmony_ci#endif 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_ci .align 64 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_ci .globl FUNC_NAME 2488c2ecf20Sopenharmony_ci .type FUNC_NAME,#function 2498c2ecf20Sopenharmony_ciFUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 2508c2ecf20Sopenharmony_ci srlx %o2, 31, %g2 2518c2ecf20Sopenharmony_ci cmp %g2, 0 2528c2ecf20Sopenharmony_ci tne %xcc, 5 2538c2ecf20Sopenharmony_ci PREAMBLE 2548c2ecf20Sopenharmony_ci mov %o0, %o3 2558c2ecf20Sopenharmony_ci cmp %o2, 0 2568c2ecf20Sopenharmony_ci be,pn %XCC, 85f 2578c2ecf20Sopenharmony_ci or %o0, %o1, GLOBAL_SPARE 2588c2ecf20Sopenharmony_ci cmp %o2, 16 2598c2ecf20Sopenharmony_ci blu,a,pn %XCC, 80f 2608c2ecf20Sopenharmony_ci or GLOBAL_SPARE, %o2, GLOBAL_SPARE 2618c2ecf20Sopenharmony_ci 2628c2ecf20Sopenharmony_ci /* 2 blocks (128 bytes) is the minimum we can do the block 2638c2ecf20Sopenharmony_ci * copy with. We need to ensure that we'll iterate at least 2648c2ecf20Sopenharmony_ci * once in the block copy loop. At worst we'll need to align 2658c2ecf20Sopenharmony_ci * the destination to a 64-byte boundary which can chew up 2668c2ecf20Sopenharmony_ci * to (64 - 1) bytes from the length before we perform the 2678c2ecf20Sopenharmony_ci * block copy loop. 2688c2ecf20Sopenharmony_ci * 2698c2ecf20Sopenharmony_ci * However, the cut-off point, performance wise, is around 2708c2ecf20Sopenharmony_ci * 4 64-byte blocks. 2718c2ecf20Sopenharmony_ci */ 2728c2ecf20Sopenharmony_ci cmp %o2, (4 * 64) 2738c2ecf20Sopenharmony_ci blu,pt %XCC, 75f 2748c2ecf20Sopenharmony_ci andcc GLOBAL_SPARE, 0x7, %g0 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci /* %o0: dst 2778c2ecf20Sopenharmony_ci * %o1: src 2788c2ecf20Sopenharmony_ci * %o2: len (known to be >= 128) 2798c2ecf20Sopenharmony_ci * 2808c2ecf20Sopenharmony_ci * The block copy loops can use %o4, %g2, %g3 as 2818c2ecf20Sopenharmony_ci * temporaries while copying the data. %o5 must 2828c2ecf20Sopenharmony_ci * be preserved between VISEntryHalf and VISExitHalf 2838c2ecf20Sopenharmony_ci */ 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_ci LOAD(prefetch, %o1 + 0x000, #one_read) 2868c2ecf20Sopenharmony_ci LOAD(prefetch, %o1 + 0x040, #one_read) 2878c2ecf20Sopenharmony_ci LOAD(prefetch, %o1 + 0x080, #one_read) 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci /* Align destination on 64-byte boundary. */ 2908c2ecf20Sopenharmony_ci andcc %o0, (64 - 1), %o4 2918c2ecf20Sopenharmony_ci be,pt %XCC, 2f 2928c2ecf20Sopenharmony_ci sub %o4, 64, %o4 2938c2ecf20Sopenharmony_ci sub %g0, %o4, %o4 ! bytes to align dst 2948c2ecf20Sopenharmony_ci sub %o2, %o4, %o2 2958c2ecf20Sopenharmony_ci1: subcc %o4, 1, %o4 2968c2ecf20Sopenharmony_ci EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1) 2978c2ecf20Sopenharmony_ci EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1) 2988c2ecf20Sopenharmony_ci add %o1, 1, %o1 2998c2ecf20Sopenharmony_ci bne,pt %XCC, 1b 3008c2ecf20Sopenharmony_ci add %o0, 1, %o0 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_ci2: 3038c2ecf20Sopenharmony_ci /* Clobbers o5/g1/g2/g3/g7/icc/xcc. We must preserve 3048c2ecf20Sopenharmony_ci * o5 from here until we hit VISExitHalf. 3058c2ecf20Sopenharmony_ci */ 3068c2ecf20Sopenharmony_ci VISEntryHalf 3078c2ecf20Sopenharmony_ci 3088c2ecf20Sopenharmony_ci membar #Sync 3098c2ecf20Sopenharmony_ci alignaddr %o1, %g0, %g0 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci add %o1, (64 - 1), %o4 3128c2ecf20Sopenharmony_ci andn %o4, (64 - 1), %o4 3138c2ecf20Sopenharmony_ci andn %o2, (64 - 1), %g1 3148c2ecf20Sopenharmony_ci sub %o2, %g1, %o2 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci and %o1, (64 - 1), %g2 3178c2ecf20Sopenharmony_ci add %o1, %g1, %o1 3188c2ecf20Sopenharmony_ci sub %o0, %o4, %g3 3198c2ecf20Sopenharmony_ci brz,pt %g2, 190f 3208c2ecf20Sopenharmony_ci cmp %g2, 32 3218c2ecf20Sopenharmony_ci blu,a 5f 3228c2ecf20Sopenharmony_ci cmp %g2, 16 3238c2ecf20Sopenharmony_ci cmp %g2, 48 3248c2ecf20Sopenharmony_ci blu,a 4f 3258c2ecf20Sopenharmony_ci cmp %g2, 40 3268c2ecf20Sopenharmony_ci cmp %g2, 56 3278c2ecf20Sopenharmony_ci blu 170f 3288c2ecf20Sopenharmony_ci nop 3298c2ecf20Sopenharmony_ci ba,a,pt %xcc, 180f 3308c2ecf20Sopenharmony_ci nop 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci4: /* 32 <= low bits < 48 */ 3338c2ecf20Sopenharmony_ci blu 150f 3348c2ecf20Sopenharmony_ci nop 3358c2ecf20Sopenharmony_ci ba,a,pt %xcc, 160f 3368c2ecf20Sopenharmony_ci nop 3378c2ecf20Sopenharmony_ci5: /* 0 < low bits < 32 */ 3388c2ecf20Sopenharmony_ci blu,a 6f 3398c2ecf20Sopenharmony_ci cmp %g2, 8 3408c2ecf20Sopenharmony_ci cmp %g2, 24 3418c2ecf20Sopenharmony_ci blu 130f 3428c2ecf20Sopenharmony_ci nop 3438c2ecf20Sopenharmony_ci ba,a,pt %xcc, 140f 3448c2ecf20Sopenharmony_ci nop 3458c2ecf20Sopenharmony_ci6: /* 0 < low bits < 16 */ 3468c2ecf20Sopenharmony_ci bgeu 120f 3478c2ecf20Sopenharmony_ci nop 3488c2ecf20Sopenharmony_ci /* fall through for 0 < low bits < 8 */ 3498c2ecf20Sopenharmony_ci110: sub %o4, 64, %g2 3508c2ecf20Sopenharmony_ci EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1) 3518c2ecf20Sopenharmony_ci1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) 3528c2ecf20Sopenharmony_ci EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) 3538c2ecf20Sopenharmony_ci FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) 3548c2ecf20Sopenharmony_ci EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) 3558c2ecf20Sopenharmony_ci FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) 3568c2ecf20Sopenharmony_ci subcc %g1, 64, %g1 3578c2ecf20Sopenharmony_ci add %o4, 64, %o4 3588c2ecf20Sopenharmony_ci bne,pt %xcc, 1b 3598c2ecf20Sopenharmony_ci LOAD(prefetch, %o4 + 64, #one_read) 3608c2ecf20Sopenharmony_ci ba,pt %xcc, 195f 3618c2ecf20Sopenharmony_ci nop 3628c2ecf20Sopenharmony_ci 3638c2ecf20Sopenharmony_ci120: sub %o4, 56, %g2 3648c2ecf20Sopenharmony_ci FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) 3658c2ecf20Sopenharmony_ci1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) 3668c2ecf20Sopenharmony_ci EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) 3678c2ecf20Sopenharmony_ci FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) 3688c2ecf20Sopenharmony_ci EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) 3698c2ecf20Sopenharmony_ci FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) 3708c2ecf20Sopenharmony_ci subcc %g1, 64, %g1 3718c2ecf20Sopenharmony_ci add %o4, 64, %o4 3728c2ecf20Sopenharmony_ci bne,pt %xcc, 1b 3738c2ecf20Sopenharmony_ci LOAD(prefetch, %o4 + 64, #one_read) 3748c2ecf20Sopenharmony_ci ba,pt %xcc, 195f 3758c2ecf20Sopenharmony_ci nop 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci130: sub %o4, 48, %g2 3788c2ecf20Sopenharmony_ci FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) 3798c2ecf20Sopenharmony_ci1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) 3808c2ecf20Sopenharmony_ci EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) 3818c2ecf20Sopenharmony_ci FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) 3828c2ecf20Sopenharmony_ci EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) 3838c2ecf20Sopenharmony_ci FREG_MOVE_6(f20, f22, f24, f26, f28, f30) 3848c2ecf20Sopenharmony_ci subcc %g1, 64, %g1 3858c2ecf20Sopenharmony_ci add %o4, 64, %o4 3868c2ecf20Sopenharmony_ci bne,pt %xcc, 1b 3878c2ecf20Sopenharmony_ci LOAD(prefetch, %o4 + 64, #one_read) 3888c2ecf20Sopenharmony_ci ba,pt %xcc, 195f 3898c2ecf20Sopenharmony_ci nop 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci140: sub %o4, 40, %g2 3928c2ecf20Sopenharmony_ci FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) 3938c2ecf20Sopenharmony_ci1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) 3948c2ecf20Sopenharmony_ci EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) 3958c2ecf20Sopenharmony_ci FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) 3968c2ecf20Sopenharmony_ci EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) 3978c2ecf20Sopenharmony_ci FREG_MOVE_5(f22, f24, f26, f28, f30) 3988c2ecf20Sopenharmony_ci subcc %g1, 64, %g1 3998c2ecf20Sopenharmony_ci add %o4, 64, %o4 4008c2ecf20Sopenharmony_ci bne,pt %xcc, 1b 4018c2ecf20Sopenharmony_ci LOAD(prefetch, %o4 + 64, #one_read) 4028c2ecf20Sopenharmony_ci ba,pt %xcc, 195f 4038c2ecf20Sopenharmony_ci nop 4048c2ecf20Sopenharmony_ci 4058c2ecf20Sopenharmony_ci150: sub %o4, 32, %g2 4068c2ecf20Sopenharmony_ci FREG_LOAD_4(%g2, f0, f2, f4, f6) 4078c2ecf20Sopenharmony_ci1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) 4088c2ecf20Sopenharmony_ci EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) 4098c2ecf20Sopenharmony_ci FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) 4108c2ecf20Sopenharmony_ci EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) 4118c2ecf20Sopenharmony_ci FREG_MOVE_4(f24, f26, f28, f30) 4128c2ecf20Sopenharmony_ci subcc %g1, 64, %g1 4138c2ecf20Sopenharmony_ci add %o4, 64, %o4 4148c2ecf20Sopenharmony_ci bne,pt %xcc, 1b 4158c2ecf20Sopenharmony_ci LOAD(prefetch, %o4 + 64, #one_read) 4168c2ecf20Sopenharmony_ci ba,pt %xcc, 195f 4178c2ecf20Sopenharmony_ci nop 4188c2ecf20Sopenharmony_ci 4198c2ecf20Sopenharmony_ci160: sub %o4, 24, %g2 4208c2ecf20Sopenharmony_ci FREG_LOAD_3(%g2, f0, f2, f4) 4218c2ecf20Sopenharmony_ci1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) 4228c2ecf20Sopenharmony_ci EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) 4238c2ecf20Sopenharmony_ci FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) 4248c2ecf20Sopenharmony_ci EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) 4258c2ecf20Sopenharmony_ci FREG_MOVE_3(f26, f28, f30) 4268c2ecf20Sopenharmony_ci subcc %g1, 64, %g1 4278c2ecf20Sopenharmony_ci add %o4, 64, %o4 4288c2ecf20Sopenharmony_ci bne,pt %xcc, 1b 4298c2ecf20Sopenharmony_ci LOAD(prefetch, %o4 + 64, #one_read) 4308c2ecf20Sopenharmony_ci ba,pt %xcc, 195f 4318c2ecf20Sopenharmony_ci nop 4328c2ecf20Sopenharmony_ci 4338c2ecf20Sopenharmony_ci170: sub %o4, 16, %g2 4348c2ecf20Sopenharmony_ci FREG_LOAD_2(%g2, f0, f2) 4358c2ecf20Sopenharmony_ci1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) 4368c2ecf20Sopenharmony_ci EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) 4378c2ecf20Sopenharmony_ci FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) 4388c2ecf20Sopenharmony_ci EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) 4398c2ecf20Sopenharmony_ci FREG_MOVE_2(f28, f30) 4408c2ecf20Sopenharmony_ci subcc %g1, 64, %g1 4418c2ecf20Sopenharmony_ci add %o4, 64, %o4 4428c2ecf20Sopenharmony_ci bne,pt %xcc, 1b 4438c2ecf20Sopenharmony_ci LOAD(prefetch, %o4 + 64, #one_read) 4448c2ecf20Sopenharmony_ci ba,pt %xcc, 195f 4458c2ecf20Sopenharmony_ci nop 4468c2ecf20Sopenharmony_ci 4478c2ecf20Sopenharmony_ci180: sub %o4, 8, %g2 4488c2ecf20Sopenharmony_ci FREG_LOAD_1(%g2, f0) 4498c2ecf20Sopenharmony_ci1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) 4508c2ecf20Sopenharmony_ci EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) 4518c2ecf20Sopenharmony_ci FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) 4528c2ecf20Sopenharmony_ci EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) 4538c2ecf20Sopenharmony_ci FREG_MOVE_1(f30) 4548c2ecf20Sopenharmony_ci subcc %g1, 64, %g1 4558c2ecf20Sopenharmony_ci add %o4, 64, %o4 4568c2ecf20Sopenharmony_ci bne,pt %xcc, 1b 4578c2ecf20Sopenharmony_ci LOAD(prefetch, %o4 + 64, #one_read) 4588c2ecf20Sopenharmony_ci ba,pt %xcc, 195f 4598c2ecf20Sopenharmony_ci nop 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ci190: 4628c2ecf20Sopenharmony_ci1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) 4638c2ecf20Sopenharmony_ci subcc %g1, 64, %g1 4648c2ecf20Sopenharmony_ci EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64) 4658c2ecf20Sopenharmony_ci EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64) 4668c2ecf20Sopenharmony_ci add %o4, 64, %o4 4678c2ecf20Sopenharmony_ci bne,pt %xcc, 1b 4688c2ecf20Sopenharmony_ci LOAD(prefetch, %o4 + 64, #one_read) 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_ci195: 4718c2ecf20Sopenharmony_ci add %o4, %g3, %o0 4728c2ecf20Sopenharmony_ci membar #Sync 4738c2ecf20Sopenharmony_ci 4748c2ecf20Sopenharmony_ci VISExitHalf 4758c2ecf20Sopenharmony_ci 4768c2ecf20Sopenharmony_ci /* %o2 contains any final bytes still needed to be copied 4778c2ecf20Sopenharmony_ci * over. If anything is left, we copy it one byte at a time. 4788c2ecf20Sopenharmony_ci */ 4798c2ecf20Sopenharmony_ci brz,pt %o2, 85f 4808c2ecf20Sopenharmony_ci sub %o0, %o1, GLOBAL_SPARE 4818c2ecf20Sopenharmony_ci ba,a,pt %XCC, 90f 4828c2ecf20Sopenharmony_ci nop 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci .align 64 4858c2ecf20Sopenharmony_ci75: /* 16 < len <= 64 */ 4868c2ecf20Sopenharmony_ci bne,pn %XCC, 75f 4878c2ecf20Sopenharmony_ci sub %o0, %o1, GLOBAL_SPARE 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_ci72: 4908c2ecf20Sopenharmony_ci andn %o2, 0xf, %o4 4918c2ecf20Sopenharmony_ci and %o2, 0xf, %o2 4928c2ecf20Sopenharmony_ci1: subcc %o4, 0x10, %o4 4938c2ecf20Sopenharmony_ci EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16) 4948c2ecf20Sopenharmony_ci add %o1, 0x08, %o1 4958c2ecf20Sopenharmony_ci EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16) 4968c2ecf20Sopenharmony_ci sub %o1, 0x08, %o1 4978c2ecf20Sopenharmony_ci EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16) 4988c2ecf20Sopenharmony_ci add %o1, 0x8, %o1 4998c2ecf20Sopenharmony_ci EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8) 5008c2ecf20Sopenharmony_ci bgu,pt %XCC, 1b 5018c2ecf20Sopenharmony_ci add %o1, 0x8, %o1 5028c2ecf20Sopenharmony_ci73: andcc %o2, 0x8, %g0 5038c2ecf20Sopenharmony_ci be,pt %XCC, 1f 5048c2ecf20Sopenharmony_ci nop 5058c2ecf20Sopenharmony_ci sub %o2, 0x8, %o2 5068c2ecf20Sopenharmony_ci EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8) 5078c2ecf20Sopenharmony_ci EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8) 5088c2ecf20Sopenharmony_ci add %o1, 0x8, %o1 5098c2ecf20Sopenharmony_ci1: andcc %o2, 0x4, %g0 5108c2ecf20Sopenharmony_ci be,pt %XCC, 1f 5118c2ecf20Sopenharmony_ci nop 5128c2ecf20Sopenharmony_ci sub %o2, 0x4, %o2 5138c2ecf20Sopenharmony_ci EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4) 5148c2ecf20Sopenharmony_ci EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) 5158c2ecf20Sopenharmony_ci add %o1, 0x4, %o1 5168c2ecf20Sopenharmony_ci1: cmp %o2, 0 5178c2ecf20Sopenharmony_ci be,pt %XCC, 85f 5188c2ecf20Sopenharmony_ci nop 5198c2ecf20Sopenharmony_ci ba,pt %xcc, 90f 5208c2ecf20Sopenharmony_ci nop 5218c2ecf20Sopenharmony_ci 5228c2ecf20Sopenharmony_ci75: 5238c2ecf20Sopenharmony_ci andcc %o0, 0x7, %g1 5248c2ecf20Sopenharmony_ci sub %g1, 0x8, %g1 5258c2ecf20Sopenharmony_ci be,pn %icc, 2f 5268c2ecf20Sopenharmony_ci sub %g0, %g1, %g1 5278c2ecf20Sopenharmony_ci sub %o2, %g1, %o2 5288c2ecf20Sopenharmony_ci 5298c2ecf20Sopenharmony_ci1: subcc %g1, 1, %g1 5308c2ecf20Sopenharmony_ci EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1) 5318c2ecf20Sopenharmony_ci EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1) 5328c2ecf20Sopenharmony_ci bgu,pt %icc, 1b 5338c2ecf20Sopenharmony_ci add %o1, 1, %o1 5348c2ecf20Sopenharmony_ci 5358c2ecf20Sopenharmony_ci2: add %o1, GLOBAL_SPARE, %o0 5368c2ecf20Sopenharmony_ci andcc %o1, 0x7, %g1 5378c2ecf20Sopenharmony_ci bne,pt %icc, 8f 5388c2ecf20Sopenharmony_ci sll %g1, 3, %g1 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_ci cmp %o2, 16 5418c2ecf20Sopenharmony_ci bgeu,pt %icc, 72b 5428c2ecf20Sopenharmony_ci nop 5438c2ecf20Sopenharmony_ci ba,a,pt %xcc, 73b 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci8: mov 64, GLOBAL_SPARE 5468c2ecf20Sopenharmony_ci andn %o1, 0x7, %o1 5478c2ecf20Sopenharmony_ci EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2) 5488c2ecf20Sopenharmony_ci sub GLOBAL_SPARE, %g1, GLOBAL_SPARE 5498c2ecf20Sopenharmony_ci andn %o2, 0x7, %o4 5508c2ecf20Sopenharmony_ci sllx %g2, %g1, %g2 5518c2ecf20Sopenharmony_ci1: add %o1, 0x8, %o1 5528c2ecf20Sopenharmony_ci EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4) 5538c2ecf20Sopenharmony_ci subcc %o4, 0x8, %o4 5548c2ecf20Sopenharmony_ci srlx %g3, GLOBAL_SPARE, %o5 5558c2ecf20Sopenharmony_ci or %o5, %g2, %o5 5568c2ecf20Sopenharmony_ci EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8) 5578c2ecf20Sopenharmony_ci add %o0, 0x8, %o0 5588c2ecf20Sopenharmony_ci bgu,pt %icc, 1b 5598c2ecf20Sopenharmony_ci sllx %g3, %g1, %g2 5608c2ecf20Sopenharmony_ci 5618c2ecf20Sopenharmony_ci srl %g1, 3, %g1 5628c2ecf20Sopenharmony_ci andcc %o2, 0x7, %o2 5638c2ecf20Sopenharmony_ci be,pn %icc, 85f 5648c2ecf20Sopenharmony_ci add %o1, %g1, %o1 5658c2ecf20Sopenharmony_ci ba,pt %xcc, 90f 5668c2ecf20Sopenharmony_ci sub %o0, %o1, GLOBAL_SPARE 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_ci .align 64 5698c2ecf20Sopenharmony_ci80: /* 0 < len <= 16 */ 5708c2ecf20Sopenharmony_ci andcc GLOBAL_SPARE, 0x3, %g0 5718c2ecf20Sopenharmony_ci bne,pn %XCC, 90f 5728c2ecf20Sopenharmony_ci sub %o0, %o1, GLOBAL_SPARE 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_ci1: 5758c2ecf20Sopenharmony_ci subcc %o2, 4, %o2 5768c2ecf20Sopenharmony_ci EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4) 5778c2ecf20Sopenharmony_ci EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) 5788c2ecf20Sopenharmony_ci bgu,pt %XCC, 1b 5798c2ecf20Sopenharmony_ci add %o1, 4, %o1 5808c2ecf20Sopenharmony_ci 5818c2ecf20Sopenharmony_ci85: retl 5828c2ecf20Sopenharmony_ci mov EX_RETVAL(%o3), %o0 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci .align 32 5858c2ecf20Sopenharmony_ci90: 5868c2ecf20Sopenharmony_ci subcc %o2, 1, %o2 5878c2ecf20Sopenharmony_ci EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1) 5888c2ecf20Sopenharmony_ci EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1) 5898c2ecf20Sopenharmony_ci bgu,pt %XCC, 90b 5908c2ecf20Sopenharmony_ci add %o1, 1, %o1 5918c2ecf20Sopenharmony_ci retl 5928c2ecf20Sopenharmony_ci mov EX_RETVAL(%o3), %o0 5938c2ecf20Sopenharmony_ci 5948c2ecf20Sopenharmony_ci .size FUNC_NAME, .-FUNC_NAME 595