18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci/* U1memcpy.S: UltraSPARC-I/II/IIi/IIe optimized memcpy. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com) 58c2ecf20Sopenharmony_ci * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz) 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#ifdef __KERNEL__ 98c2ecf20Sopenharmony_ci#include <linux/linkage.h> 108c2ecf20Sopenharmony_ci#include <asm/visasm.h> 118c2ecf20Sopenharmony_ci#include <asm/asi.h> 128c2ecf20Sopenharmony_ci#include <asm/export.h> 138c2ecf20Sopenharmony_ci#define GLOBAL_SPARE g7 148c2ecf20Sopenharmony_ci#else 158c2ecf20Sopenharmony_ci#define GLOBAL_SPARE g5 168c2ecf20Sopenharmony_ci#define ASI_BLK_P 0xf0 178c2ecf20Sopenharmony_ci#define FPRS_FEF 0x04 188c2ecf20Sopenharmony_ci#ifdef MEMCPY_DEBUG 198c2ecf20Sopenharmony_ci#define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ 208c2ecf20Sopenharmony_ci clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0; 218c2ecf20Sopenharmony_ci#define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs 228c2ecf20Sopenharmony_ci#else 238c2ecf20Sopenharmony_ci#define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs 248c2ecf20Sopenharmony_ci#define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs 258c2ecf20Sopenharmony_ci#endif 268c2ecf20Sopenharmony_ci#endif 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci#ifndef EX_LD 298c2ecf20Sopenharmony_ci#define EX_LD(x,y) x 308c2ecf20Sopenharmony_ci#endif 318c2ecf20Sopenharmony_ci#ifndef EX_LD_FP 328c2ecf20Sopenharmony_ci#define EX_LD_FP(x,y) x 338c2ecf20Sopenharmony_ci#endif 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci#ifndef EX_ST 368c2ecf20Sopenharmony_ci#define EX_ST(x,y) x 378c2ecf20Sopenharmony_ci#endif 388c2ecf20Sopenharmony_ci#ifndef EX_ST_FP 398c2ecf20Sopenharmony_ci#define EX_ST_FP(x,y) x 408c2ecf20Sopenharmony_ci#endif 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci#ifndef LOAD 438c2ecf20Sopenharmony_ci#define LOAD(type,addr,dest) type [addr], dest 448c2ecf20Sopenharmony_ci#endif 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci#ifndef LOAD_BLK 478c2ecf20Sopenharmony_ci#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_P, dest 488c2ecf20Sopenharmony_ci#endif 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci#ifndef STORE 518c2ecf20Sopenharmony_ci#define STORE(type,src,addr) type src, [addr] 528c2ecf20Sopenharmony_ci#endif 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci#ifndef STORE_BLK 558c2ecf20Sopenharmony_ci#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_P 568c2ecf20Sopenharmony_ci#endif 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci#ifndef FUNC_NAME 598c2ecf20Sopenharmony_ci#define FUNC_NAME memcpy 608c2ecf20Sopenharmony_ci#endif 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci#ifndef PREAMBLE 638c2ecf20Sopenharmony_ci#define PREAMBLE 648c2ecf20Sopenharmony_ci#endif 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci#ifndef XCC 678c2ecf20Sopenharmony_ci#define XCC xcc 688c2ecf20Sopenharmony_ci#endif 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci#define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9) \ 718c2ecf20Sopenharmony_ci faligndata %f1, %f2, %f48; \ 728c2ecf20Sopenharmony_ci faligndata %f2, %f3, %f50; \ 738c2ecf20Sopenharmony_ci faligndata %f3, %f4, %f52; \ 748c2ecf20Sopenharmony_ci faligndata %f4, %f5, %f54; \ 758c2ecf20Sopenharmony_ci faligndata %f5, %f6, %f56; \ 768c2ecf20Sopenharmony_ci faligndata %f6, %f7, %f58; \ 778c2ecf20Sopenharmony_ci faligndata %f7, %f8, %f60; \ 788c2ecf20Sopenharmony_ci faligndata %f8, %f9, %f62; 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \ 818c2ecf20Sopenharmony_ci EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \ 828c2ecf20Sopenharmony_ci EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ 838c2ecf20Sopenharmony_ci add %src, 0x40, %src; \ 848c2ecf20Sopenharmony_ci subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \ 858c2ecf20Sopenharmony_ci be,pn %xcc, jmptgt; \ 868c2ecf20Sopenharmony_ci add %dest, 0x40, %dest; \ 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci#define LOOP_CHUNK1(src, dest, branch_dest) \ 898c2ecf20Sopenharmony_ci MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest) 908c2ecf20Sopenharmony_ci#define LOOP_CHUNK2(src, dest, branch_dest) \ 918c2ecf20Sopenharmony_ci MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest) 928c2ecf20Sopenharmony_ci#define LOOP_CHUNK3(src, dest, branch_dest) \ 938c2ecf20Sopenharmony_ci MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest) 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci#define DO_SYNC membar #Sync; 968c2ecf20Sopenharmony_ci#define STORE_SYNC(dest, fsrc) \ 978c2ecf20Sopenharmony_ci EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ 988c2ecf20Sopenharmony_ci add %dest, 0x40, %dest; \ 998c2ecf20Sopenharmony_ci DO_SYNC 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci#define STORE_JUMP(dest, fsrc, target) \ 1028c2ecf20Sopenharmony_ci EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \ 1038c2ecf20Sopenharmony_ci add %dest, 0x40, %dest; \ 1048c2ecf20Sopenharmony_ci ba,pt %xcc, target; \ 1058c2ecf20Sopenharmony_ci nop; 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci#define FINISH_VISCHUNK(dest, f0, f1) \ 1088c2ecf20Sopenharmony_ci subcc %g3, 8, %g3; \ 1098c2ecf20Sopenharmony_ci bl,pn %xcc, 95f; \ 1108c2ecf20Sopenharmony_ci faligndata %f0, %f1, %f48; \ 1118c2ecf20Sopenharmony_ci EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \ 1128c2ecf20Sopenharmony_ci add %dest, 8, %dest; 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ 1158c2ecf20Sopenharmony_ci subcc %g3, 8, %g3; \ 1168c2ecf20Sopenharmony_ci bl,pn %xcc, 95f; \ 1178c2ecf20Sopenharmony_ci fsrc2 %f0, %f1; 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci#define UNEVEN_VISCHUNK(dest, f0, f1) \ 1208c2ecf20Sopenharmony_ci UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ 1218c2ecf20Sopenharmony_ci ba,a,pt %xcc, 93f; 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci .register %g2,#scratch 1248c2ecf20Sopenharmony_ci .register %g3,#scratch 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci .text 1278c2ecf20Sopenharmony_ci#ifndef EX_RETVAL 1288c2ecf20Sopenharmony_ci#define EX_RETVAL(x) x 1298c2ecf20Sopenharmony_ciENTRY(U1_g1_1_fp) 1308c2ecf20Sopenharmony_ci VISExitHalf 1318c2ecf20Sopenharmony_ci add %g1, 1, %g1 1328c2ecf20Sopenharmony_ci add %g1, %g2, %g1 1338c2ecf20Sopenharmony_ci retl 1348c2ecf20Sopenharmony_ci add %g1, %o2, %o0 1358c2ecf20Sopenharmony_ciENDPROC(U1_g1_1_fp) 1368c2ecf20Sopenharmony_ciENTRY(U1_g2_0_fp) 1378c2ecf20Sopenharmony_ci VISExitHalf 1388c2ecf20Sopenharmony_ci retl 1398c2ecf20Sopenharmony_ci add %g2, %o2, %o0 1408c2ecf20Sopenharmony_ciENDPROC(U1_g2_0_fp) 1418c2ecf20Sopenharmony_ciENTRY(U1_g2_8_fp) 1428c2ecf20Sopenharmony_ci VISExitHalf 1438c2ecf20Sopenharmony_ci add %g2, 8, %g2 1448c2ecf20Sopenharmony_ci retl 1458c2ecf20Sopenharmony_ci add %g2, %o2, %o0 1468c2ecf20Sopenharmony_ciENDPROC(U1_g2_8_fp) 1478c2ecf20Sopenharmony_ciENTRY(U1_gs_0_fp) 1488c2ecf20Sopenharmony_ci VISExitHalf 1498c2ecf20Sopenharmony_ci add %GLOBAL_SPARE, %g3, %o0 1508c2ecf20Sopenharmony_ci retl 1518c2ecf20Sopenharmony_ci add %o0, %o2, %o0 1528c2ecf20Sopenharmony_ciENDPROC(U1_gs_0_fp) 1538c2ecf20Sopenharmony_ciENTRY(U1_gs_80_fp) 1548c2ecf20Sopenharmony_ci VISExitHalf 1558c2ecf20Sopenharmony_ci add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE 1568c2ecf20Sopenharmony_ci add %GLOBAL_SPARE, %g3, %o0 1578c2ecf20Sopenharmony_ci retl 1588c2ecf20Sopenharmony_ci add %o0, %o2, %o0 1598c2ecf20Sopenharmony_ciENDPROC(U1_gs_80_fp) 1608c2ecf20Sopenharmony_ciENTRY(U1_gs_40_fp) 1618c2ecf20Sopenharmony_ci VISExitHalf 1628c2ecf20Sopenharmony_ci add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE 1638c2ecf20Sopenharmony_ci add %GLOBAL_SPARE, %g3, %o0 1648c2ecf20Sopenharmony_ci retl 1658c2ecf20Sopenharmony_ci add %o0, %o2, %o0 1668c2ecf20Sopenharmony_ciENDPROC(U1_gs_40_fp) 1678c2ecf20Sopenharmony_ciENTRY(U1_g3_0_fp) 1688c2ecf20Sopenharmony_ci VISExitHalf 1698c2ecf20Sopenharmony_ci retl 1708c2ecf20Sopenharmony_ci add %g3, %o2, %o0 1718c2ecf20Sopenharmony_ciENDPROC(U1_g3_0_fp) 1728c2ecf20Sopenharmony_ciENTRY(U1_g3_8_fp) 1738c2ecf20Sopenharmony_ci VISExitHalf 1748c2ecf20Sopenharmony_ci add %g3, 8, %g3 1758c2ecf20Sopenharmony_ci retl 1768c2ecf20Sopenharmony_ci add %g3, %o2, %o0 1778c2ecf20Sopenharmony_ciENDPROC(U1_g3_8_fp) 1788c2ecf20Sopenharmony_ciENTRY(U1_o2_0_fp) 1798c2ecf20Sopenharmony_ci VISExitHalf 1808c2ecf20Sopenharmony_ci retl 1818c2ecf20Sopenharmony_ci mov %o2, %o0 1828c2ecf20Sopenharmony_ciENDPROC(U1_o2_0_fp) 1838c2ecf20Sopenharmony_ciENTRY(U1_o2_1_fp) 1848c2ecf20Sopenharmony_ci VISExitHalf 1858c2ecf20Sopenharmony_ci retl 1868c2ecf20Sopenharmony_ci add %o2, 1, %o0 1878c2ecf20Sopenharmony_ciENDPROC(U1_o2_1_fp) 1888c2ecf20Sopenharmony_ciENTRY(U1_gs_0) 1898c2ecf20Sopenharmony_ci VISExitHalf 1908c2ecf20Sopenharmony_ci retl 1918c2ecf20Sopenharmony_ci add %GLOBAL_SPARE, %o2, %o0 1928c2ecf20Sopenharmony_ciENDPROC(U1_gs_0) 1938c2ecf20Sopenharmony_ciENTRY(U1_gs_8) 1948c2ecf20Sopenharmony_ci VISExitHalf 1958c2ecf20Sopenharmony_ci add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE 1968c2ecf20Sopenharmony_ci retl 1978c2ecf20Sopenharmony_ci add %GLOBAL_SPARE, 0x8, %o0 1988c2ecf20Sopenharmony_ciENDPROC(U1_gs_8) 1998c2ecf20Sopenharmony_ciENTRY(U1_gs_10) 2008c2ecf20Sopenharmony_ci VISExitHalf 2018c2ecf20Sopenharmony_ci add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE 2028c2ecf20Sopenharmony_ci retl 2038c2ecf20Sopenharmony_ci add %GLOBAL_SPARE, 0x10, %o0 2048c2ecf20Sopenharmony_ciENDPROC(U1_gs_10) 2058c2ecf20Sopenharmony_ciENTRY(U1_o2_0) 2068c2ecf20Sopenharmony_ci retl 2078c2ecf20Sopenharmony_ci mov %o2, %o0 2088c2ecf20Sopenharmony_ciENDPROC(U1_o2_0) 2098c2ecf20Sopenharmony_ciENTRY(U1_o2_8) 2108c2ecf20Sopenharmony_ci retl 2118c2ecf20Sopenharmony_ci add %o2, 8, %o0 2128c2ecf20Sopenharmony_ciENDPROC(U1_o2_8) 2138c2ecf20Sopenharmony_ciENTRY(U1_o2_4) 2148c2ecf20Sopenharmony_ci retl 2158c2ecf20Sopenharmony_ci add %o2, 4, %o0 2168c2ecf20Sopenharmony_ciENDPROC(U1_o2_4) 2178c2ecf20Sopenharmony_ciENTRY(U1_o2_1) 2188c2ecf20Sopenharmony_ci retl 2198c2ecf20Sopenharmony_ci add %o2, 1, %o0 2208c2ecf20Sopenharmony_ciENDPROC(U1_o2_1) 2218c2ecf20Sopenharmony_ciENTRY(U1_g1_0) 2228c2ecf20Sopenharmony_ci retl 2238c2ecf20Sopenharmony_ci add %g1, %o2, %o0 2248c2ecf20Sopenharmony_ciENDPROC(U1_g1_0) 2258c2ecf20Sopenharmony_ciENTRY(U1_g1_1) 2268c2ecf20Sopenharmony_ci add %g1, 1, %g1 2278c2ecf20Sopenharmony_ci retl 2288c2ecf20Sopenharmony_ci add %g1, %o2, %o0 2298c2ecf20Sopenharmony_ciENDPROC(U1_g1_1) 2308c2ecf20Sopenharmony_ciENTRY(U1_gs_0_o2_adj) 2318c2ecf20Sopenharmony_ci and %o2, 7, %o2 2328c2ecf20Sopenharmony_ci retl 2338c2ecf20Sopenharmony_ci add %GLOBAL_SPARE, %o2, %o0 2348c2ecf20Sopenharmony_ciENDPROC(U1_gs_0_o2_adj) 2358c2ecf20Sopenharmony_ciENTRY(U1_gs_8_o2_adj) 2368c2ecf20Sopenharmony_ci and %o2, 7, %o2 2378c2ecf20Sopenharmony_ci add %GLOBAL_SPARE, 8, %GLOBAL_SPARE 2388c2ecf20Sopenharmony_ci retl 2398c2ecf20Sopenharmony_ci add %GLOBAL_SPARE, %o2, %o0 2408c2ecf20Sopenharmony_ciENDPROC(U1_gs_8_o2_adj) 2418c2ecf20Sopenharmony_ci#endif 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci .align 64 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_ci .globl FUNC_NAME 2468c2ecf20Sopenharmony_ci .type FUNC_NAME,#function 2478c2ecf20Sopenharmony_ciFUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 2488c2ecf20Sopenharmony_ci srlx %o2, 31, %g2 2498c2ecf20Sopenharmony_ci cmp %g2, 0 2508c2ecf20Sopenharmony_ci tne %xcc, 5 2518c2ecf20Sopenharmony_ci PREAMBLE 2528c2ecf20Sopenharmony_ci mov %o0, %o4 2538c2ecf20Sopenharmony_ci cmp %o2, 0 2548c2ecf20Sopenharmony_ci be,pn %XCC, 85f 2558c2ecf20Sopenharmony_ci or %o0, %o1, %o3 2568c2ecf20Sopenharmony_ci cmp %o2, 16 2578c2ecf20Sopenharmony_ci blu,a,pn %XCC, 80f 2588c2ecf20Sopenharmony_ci or %o3, %o2, %o3 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_ci cmp %o2, (5 * 64) 2618c2ecf20Sopenharmony_ci blu,pt %XCC, 70f 2628c2ecf20Sopenharmony_ci andcc %o3, 0x7, %g0 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci /* Clobbers o5/g1/g2/g3/g7/icc/xcc. */ 2658c2ecf20Sopenharmony_ci VISEntry 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci /* Is 'dst' already aligned on an 64-byte boundary? */ 2688c2ecf20Sopenharmony_ci andcc %o0, 0x3f, %g2 2698c2ecf20Sopenharmony_ci be,pt %XCC, 2f 2708c2ecf20Sopenharmony_ci 2718c2ecf20Sopenharmony_ci /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number 2728c2ecf20Sopenharmony_ci * of bytes to copy to make 'dst' 64-byte aligned. We pre- 2738c2ecf20Sopenharmony_ci * subtract this from 'len'. 2748c2ecf20Sopenharmony_ci */ 2758c2ecf20Sopenharmony_ci sub %o0, %o1, %GLOBAL_SPARE 2768c2ecf20Sopenharmony_ci sub %g2, 0x40, %g2 2778c2ecf20Sopenharmony_ci sub %g0, %g2, %g2 2788c2ecf20Sopenharmony_ci sub %o2, %g2, %o2 2798c2ecf20Sopenharmony_ci andcc %g2, 0x7, %g1 2808c2ecf20Sopenharmony_ci be,pt %icc, 2f 2818c2ecf20Sopenharmony_ci and %g2, 0x38, %g2 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci1: subcc %g1, 0x1, %g1 2848c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp) 2858c2ecf20Sopenharmony_ci EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp) 2868c2ecf20Sopenharmony_ci bgu,pt %XCC, 1b 2878c2ecf20Sopenharmony_ci add %o1, 0x1, %o1 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci add %o1, %GLOBAL_SPARE, %o0 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci2: cmp %g2, 0x0 2928c2ecf20Sopenharmony_ci and %o1, 0x7, %g1 2938c2ecf20Sopenharmony_ci be,pt %icc, 3f 2948c2ecf20Sopenharmony_ci alignaddr %o1, %g0, %o1 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp) 2978c2ecf20Sopenharmony_ci1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp) 2988c2ecf20Sopenharmony_ci add %o1, 0x8, %o1 2998c2ecf20Sopenharmony_ci subcc %g2, 0x8, %g2 3008c2ecf20Sopenharmony_ci faligndata %f4, %f6, %f0 3018c2ecf20Sopenharmony_ci EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) 3028c2ecf20Sopenharmony_ci be,pn %icc, 3f 3038c2ecf20Sopenharmony_ci add %o0, 0x8, %o0 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp) 3068c2ecf20Sopenharmony_ci add %o1, 0x8, %o1 3078c2ecf20Sopenharmony_ci subcc %g2, 0x8, %g2 3088c2ecf20Sopenharmony_ci faligndata %f6, %f4, %f0 3098c2ecf20Sopenharmony_ci EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) 3108c2ecf20Sopenharmony_ci bne,pt %icc, 1b 3118c2ecf20Sopenharmony_ci add %o0, 0x8, %o0 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci /* Destination is 64-byte aligned. */ 3148c2ecf20Sopenharmony_ci3: 3158c2ecf20Sopenharmony_ci membar #LoadStore | #StoreStore | #StoreLoad 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ci subcc %o2, 0x40, %GLOBAL_SPARE 3188c2ecf20Sopenharmony_ci add %o1, %g1, %g1 3198c2ecf20Sopenharmony_ci andncc %GLOBAL_SPARE, (0x40 - 1), %GLOBAL_SPARE 3208c2ecf20Sopenharmony_ci srl %g1, 3, %g2 3218c2ecf20Sopenharmony_ci sub %o2, %GLOBAL_SPARE, %g3 3228c2ecf20Sopenharmony_ci andn %o1, (0x40 - 1), %o1 3238c2ecf20Sopenharmony_ci and %g2, 7, %g2 3248c2ecf20Sopenharmony_ci andncc %g3, 0x7, %g3 3258c2ecf20Sopenharmony_ci fsrc2 %f0, %f2 3268c2ecf20Sopenharmony_ci sub %g3, 0x8, %g3 3278c2ecf20Sopenharmony_ci sub %o2, %GLOBAL_SPARE, %o2 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci add %g1, %GLOBAL_SPARE, %g1 3308c2ecf20Sopenharmony_ci subcc %o2, %g3, %o2 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp) 3338c2ecf20Sopenharmony_ci add %o1, 0x40, %o1 3348c2ecf20Sopenharmony_ci add %g1, %g3, %g1 3358c2ecf20Sopenharmony_ci EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp) 3368c2ecf20Sopenharmony_ci add %o1, 0x40, %o1 3378c2ecf20Sopenharmony_ci sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE 3388c2ecf20Sopenharmony_ci EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp) 3398c2ecf20Sopenharmony_ci add %o1, 0x40, %o1 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci /* There are 8 instances of the unrolled loop, 3428c2ecf20Sopenharmony_ci * one for each possible alignment of the 3438c2ecf20Sopenharmony_ci * source buffer. Each loop instance is 452 3448c2ecf20Sopenharmony_ci * bytes. 3458c2ecf20Sopenharmony_ci */ 3468c2ecf20Sopenharmony_ci sll %g2, 3, %o3 3478c2ecf20Sopenharmony_ci sub %o3, %g2, %o3 3488c2ecf20Sopenharmony_ci sllx %o3, 4, %o3 3498c2ecf20Sopenharmony_ci add %o3, %g2, %o3 3508c2ecf20Sopenharmony_ci sllx %o3, 2, %g2 3518c2ecf20Sopenharmony_ci1: rd %pc, %o3 3528c2ecf20Sopenharmony_ci add %o3, %lo(1f - 1b), %o3 3538c2ecf20Sopenharmony_ci jmpl %o3 + %g2, %g0 3548c2ecf20Sopenharmony_ci nop 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_ci .align 64 3578c2ecf20Sopenharmony_ci1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 3588c2ecf20Sopenharmony_ci LOOP_CHUNK1(o1, o0, 1f) 3598c2ecf20Sopenharmony_ci FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 3608c2ecf20Sopenharmony_ci LOOP_CHUNK2(o1, o0, 2f) 3618c2ecf20Sopenharmony_ci FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 3628c2ecf20Sopenharmony_ci LOOP_CHUNK3(o1, o0, 3f) 3638c2ecf20Sopenharmony_ci ba,pt %xcc, 1b+4 3648c2ecf20Sopenharmony_ci faligndata %f0, %f2, %f48 3658c2ecf20Sopenharmony_ci1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 3668c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 3678c2ecf20Sopenharmony_ci FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 3688c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 40f) 3698c2ecf20Sopenharmony_ci2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 3708c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 3718c2ecf20Sopenharmony_ci FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 3728c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 48f) 3738c2ecf20Sopenharmony_ci3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 3748c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 3758c2ecf20Sopenharmony_ci FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 3768c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 56f) 3778c2ecf20Sopenharmony_ci 3788c2ecf20Sopenharmony_ci1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 3798c2ecf20Sopenharmony_ci LOOP_CHUNK1(o1, o0, 1f) 3808c2ecf20Sopenharmony_ci FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 3818c2ecf20Sopenharmony_ci LOOP_CHUNK2(o1, o0, 2f) 3828c2ecf20Sopenharmony_ci FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 3838c2ecf20Sopenharmony_ci LOOP_CHUNK3(o1, o0, 3f) 3848c2ecf20Sopenharmony_ci ba,pt %xcc, 1b+4 3858c2ecf20Sopenharmony_ci faligndata %f2, %f4, %f48 3868c2ecf20Sopenharmony_ci1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 3878c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 3888c2ecf20Sopenharmony_ci FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 3898c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 41f) 3908c2ecf20Sopenharmony_ci2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 3918c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 3928c2ecf20Sopenharmony_ci FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 3938c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 49f) 3948c2ecf20Sopenharmony_ci3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 3958c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 3968c2ecf20Sopenharmony_ci FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 3978c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 57f) 3988c2ecf20Sopenharmony_ci 3998c2ecf20Sopenharmony_ci1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 4008c2ecf20Sopenharmony_ci LOOP_CHUNK1(o1, o0, 1f) 4018c2ecf20Sopenharmony_ci FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 4028c2ecf20Sopenharmony_ci LOOP_CHUNK2(o1, o0, 2f) 4038c2ecf20Sopenharmony_ci FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 4048c2ecf20Sopenharmony_ci LOOP_CHUNK3(o1, o0, 3f) 4058c2ecf20Sopenharmony_ci ba,pt %xcc, 1b+4 4068c2ecf20Sopenharmony_ci faligndata %f4, %f6, %f48 4078c2ecf20Sopenharmony_ci1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 4088c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 4098c2ecf20Sopenharmony_ci FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 4108c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 42f) 4118c2ecf20Sopenharmony_ci2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 4128c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 4138c2ecf20Sopenharmony_ci FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 4148c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 50f) 4158c2ecf20Sopenharmony_ci3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 4168c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 4178c2ecf20Sopenharmony_ci FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 4188c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 58f) 4198c2ecf20Sopenharmony_ci 4208c2ecf20Sopenharmony_ci1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 4218c2ecf20Sopenharmony_ci LOOP_CHUNK1(o1, o0, 1f) 4228c2ecf20Sopenharmony_ci FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 4238c2ecf20Sopenharmony_ci LOOP_CHUNK2(o1, o0, 2f) 4248c2ecf20Sopenharmony_ci FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 4258c2ecf20Sopenharmony_ci LOOP_CHUNK3(o1, o0, 3f) 4268c2ecf20Sopenharmony_ci ba,pt %xcc, 1b+4 4278c2ecf20Sopenharmony_ci faligndata %f6, %f8, %f48 4288c2ecf20Sopenharmony_ci1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 4298c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 4308c2ecf20Sopenharmony_ci FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 4318c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 43f) 4328c2ecf20Sopenharmony_ci2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 4338c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 4348c2ecf20Sopenharmony_ci FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 4358c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 51f) 4368c2ecf20Sopenharmony_ci3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 4378c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 4388c2ecf20Sopenharmony_ci FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 4398c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 59f) 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_ci1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 4428c2ecf20Sopenharmony_ci LOOP_CHUNK1(o1, o0, 1f) 4438c2ecf20Sopenharmony_ci FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 4448c2ecf20Sopenharmony_ci LOOP_CHUNK2(o1, o0, 2f) 4458c2ecf20Sopenharmony_ci FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 4468c2ecf20Sopenharmony_ci LOOP_CHUNK3(o1, o0, 3f) 4478c2ecf20Sopenharmony_ci ba,pt %xcc, 1b+4 4488c2ecf20Sopenharmony_ci faligndata %f8, %f10, %f48 4498c2ecf20Sopenharmony_ci1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 4508c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 4518c2ecf20Sopenharmony_ci FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 4528c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 44f) 4538c2ecf20Sopenharmony_ci2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 4548c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 4558c2ecf20Sopenharmony_ci FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 4568c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 52f) 4578c2ecf20Sopenharmony_ci3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 4588c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 4598c2ecf20Sopenharmony_ci FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 4608c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 60f) 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 4638c2ecf20Sopenharmony_ci LOOP_CHUNK1(o1, o0, 1f) 4648c2ecf20Sopenharmony_ci FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 4658c2ecf20Sopenharmony_ci LOOP_CHUNK2(o1, o0, 2f) 4668c2ecf20Sopenharmony_ci FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 4678c2ecf20Sopenharmony_ci LOOP_CHUNK3(o1, o0, 3f) 4688c2ecf20Sopenharmony_ci ba,pt %xcc, 1b+4 4698c2ecf20Sopenharmony_ci faligndata %f10, %f12, %f48 4708c2ecf20Sopenharmony_ci1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 4718c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 4728c2ecf20Sopenharmony_ci FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 4738c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 45f) 4748c2ecf20Sopenharmony_ci2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 4758c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 4768c2ecf20Sopenharmony_ci FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 4778c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 53f) 4788c2ecf20Sopenharmony_ci3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 4798c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 4808c2ecf20Sopenharmony_ci FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 4818c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 61f) 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ci1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 4848c2ecf20Sopenharmony_ci LOOP_CHUNK1(o1, o0, 1f) 4858c2ecf20Sopenharmony_ci FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 4868c2ecf20Sopenharmony_ci LOOP_CHUNK2(o1, o0, 2f) 4878c2ecf20Sopenharmony_ci FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 4888c2ecf20Sopenharmony_ci LOOP_CHUNK3(o1, o0, 3f) 4898c2ecf20Sopenharmony_ci ba,pt %xcc, 1b+4 4908c2ecf20Sopenharmony_ci faligndata %f12, %f14, %f48 4918c2ecf20Sopenharmony_ci1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 4928c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 4938c2ecf20Sopenharmony_ci FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 4948c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 46f) 4958c2ecf20Sopenharmony_ci2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 4968c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 4978c2ecf20Sopenharmony_ci FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 4988c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 54f) 4998c2ecf20Sopenharmony_ci3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 5008c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 5018c2ecf20Sopenharmony_ci FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 5028c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 62f) 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 5058c2ecf20Sopenharmony_ci LOOP_CHUNK1(o1, o0, 1f) 5068c2ecf20Sopenharmony_ci FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 5078c2ecf20Sopenharmony_ci LOOP_CHUNK2(o1, o0, 2f) 5088c2ecf20Sopenharmony_ci FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 5098c2ecf20Sopenharmony_ci LOOP_CHUNK3(o1, o0, 3f) 5108c2ecf20Sopenharmony_ci ba,pt %xcc, 1b+4 5118c2ecf20Sopenharmony_ci faligndata %f14, %f16, %f48 5128c2ecf20Sopenharmony_ci1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 5138c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 5148c2ecf20Sopenharmony_ci FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 5158c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 47f) 5168c2ecf20Sopenharmony_ci2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 5178c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 5188c2ecf20Sopenharmony_ci FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 5198c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 55f) 5208c2ecf20Sopenharmony_ci3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 5218c2ecf20Sopenharmony_ci STORE_SYNC(o0, f48) 5228c2ecf20Sopenharmony_ci FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 5238c2ecf20Sopenharmony_ci STORE_JUMP(o0, f48, 63f) 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_ci40: FINISH_VISCHUNK(o0, f0, f2) 5268c2ecf20Sopenharmony_ci41: FINISH_VISCHUNK(o0, f2, f4) 5278c2ecf20Sopenharmony_ci42: FINISH_VISCHUNK(o0, f4, f6) 5288c2ecf20Sopenharmony_ci43: FINISH_VISCHUNK(o0, f6, f8) 5298c2ecf20Sopenharmony_ci44: FINISH_VISCHUNK(o0, f8, f10) 5308c2ecf20Sopenharmony_ci45: FINISH_VISCHUNK(o0, f10, f12) 5318c2ecf20Sopenharmony_ci46: FINISH_VISCHUNK(o0, f12, f14) 5328c2ecf20Sopenharmony_ci47: UNEVEN_VISCHUNK(o0, f14, f0) 5338c2ecf20Sopenharmony_ci48: FINISH_VISCHUNK(o0, f16, f18) 5348c2ecf20Sopenharmony_ci49: FINISH_VISCHUNK(o0, f18, f20) 5358c2ecf20Sopenharmony_ci50: FINISH_VISCHUNK(o0, f20, f22) 5368c2ecf20Sopenharmony_ci51: FINISH_VISCHUNK(o0, f22, f24) 5378c2ecf20Sopenharmony_ci52: FINISH_VISCHUNK(o0, f24, f26) 5388c2ecf20Sopenharmony_ci53: FINISH_VISCHUNK(o0, f26, f28) 5398c2ecf20Sopenharmony_ci54: FINISH_VISCHUNK(o0, f28, f30) 5408c2ecf20Sopenharmony_ci55: UNEVEN_VISCHUNK(o0, f30, f0) 5418c2ecf20Sopenharmony_ci56: FINISH_VISCHUNK(o0, f32, f34) 5428c2ecf20Sopenharmony_ci57: FINISH_VISCHUNK(o0, f34, f36) 5438c2ecf20Sopenharmony_ci58: FINISH_VISCHUNK(o0, f36, f38) 5448c2ecf20Sopenharmony_ci59: FINISH_VISCHUNK(o0, f38, f40) 5458c2ecf20Sopenharmony_ci60: FINISH_VISCHUNK(o0, f40, f42) 5468c2ecf20Sopenharmony_ci61: FINISH_VISCHUNK(o0, f42, f44) 5478c2ecf20Sopenharmony_ci62: FINISH_VISCHUNK(o0, f44, f46) 5488c2ecf20Sopenharmony_ci63: UNEVEN_VISCHUNK_LAST(o0, f46, f0) 5498c2ecf20Sopenharmony_ci 5508c2ecf20Sopenharmony_ci93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp) 5518c2ecf20Sopenharmony_ci add %o1, 8, %o1 5528c2ecf20Sopenharmony_ci subcc %g3, 8, %g3 5538c2ecf20Sopenharmony_ci faligndata %f0, %f2, %f8 5548c2ecf20Sopenharmony_ci EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) 5558c2ecf20Sopenharmony_ci bl,pn %xcc, 95f 5568c2ecf20Sopenharmony_ci add %o0, 8, %o0 5578c2ecf20Sopenharmony_ci EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp) 5588c2ecf20Sopenharmony_ci add %o1, 8, %o1 5598c2ecf20Sopenharmony_ci subcc %g3, 8, %g3 5608c2ecf20Sopenharmony_ci faligndata %f2, %f0, %f8 5618c2ecf20Sopenharmony_ci EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) 5628c2ecf20Sopenharmony_ci bge,pt %xcc, 93b 5638c2ecf20Sopenharmony_ci add %o0, 8, %o0 5648c2ecf20Sopenharmony_ci 5658c2ecf20Sopenharmony_ci95: brz,pt %o2, 2f 5668c2ecf20Sopenharmony_ci mov %g1, %o1 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_ci1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp) 5698c2ecf20Sopenharmony_ci add %o1, 1, %o1 5708c2ecf20Sopenharmony_ci subcc %o2, 1, %o2 5718c2ecf20Sopenharmony_ci EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp) 5728c2ecf20Sopenharmony_ci bne,pt %xcc, 1b 5738c2ecf20Sopenharmony_ci add %o0, 1, %o0 5748c2ecf20Sopenharmony_ci 5758c2ecf20Sopenharmony_ci2: membar #StoreLoad | #StoreStore 5768c2ecf20Sopenharmony_ci VISExit 5778c2ecf20Sopenharmony_ci retl 5788c2ecf20Sopenharmony_ci mov EX_RETVAL(%o4), %o0 5798c2ecf20Sopenharmony_ci 5808c2ecf20Sopenharmony_ci .align 64 5818c2ecf20Sopenharmony_ci70: /* 16 < len <= (5 * 64) */ 5828c2ecf20Sopenharmony_ci bne,pn %XCC, 75f 5838c2ecf20Sopenharmony_ci sub %o0, %o1, %o3 5848c2ecf20Sopenharmony_ci 5858c2ecf20Sopenharmony_ci72: andn %o2, 0xf, %GLOBAL_SPARE 5868c2ecf20Sopenharmony_ci and %o2, 0xf, %o2 5878c2ecf20Sopenharmony_ci1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0) 5888c2ecf20Sopenharmony_ci EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0) 5898c2ecf20Sopenharmony_ci subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE 5908c2ecf20Sopenharmony_ci EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10) 5918c2ecf20Sopenharmony_ci add %o1, 0x8, %o1 5928c2ecf20Sopenharmony_ci EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8) 5938c2ecf20Sopenharmony_ci bgu,pt %XCC, 1b 5948c2ecf20Sopenharmony_ci add %o1, 0x8, %o1 5958c2ecf20Sopenharmony_ci73: andcc %o2, 0x8, %g0 5968c2ecf20Sopenharmony_ci be,pt %XCC, 1f 5978c2ecf20Sopenharmony_ci nop 5988c2ecf20Sopenharmony_ci EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0) 5998c2ecf20Sopenharmony_ci sub %o2, 0x8, %o2 6008c2ecf20Sopenharmony_ci EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8) 6018c2ecf20Sopenharmony_ci add %o1, 0x8, %o1 6028c2ecf20Sopenharmony_ci1: andcc %o2, 0x4, %g0 6038c2ecf20Sopenharmony_ci be,pt %XCC, 1f 6048c2ecf20Sopenharmony_ci nop 6058c2ecf20Sopenharmony_ci EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0) 6068c2ecf20Sopenharmony_ci sub %o2, 0x4, %o2 6078c2ecf20Sopenharmony_ci EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4) 6088c2ecf20Sopenharmony_ci add %o1, 0x4, %o1 6098c2ecf20Sopenharmony_ci1: cmp %o2, 0 6108c2ecf20Sopenharmony_ci be,pt %XCC, 85f 6118c2ecf20Sopenharmony_ci nop 6128c2ecf20Sopenharmony_ci ba,pt %xcc, 90f 6138c2ecf20Sopenharmony_ci nop 6148c2ecf20Sopenharmony_ci 6158c2ecf20Sopenharmony_ci75: andcc %o0, 0x7, %g1 6168c2ecf20Sopenharmony_ci sub %g1, 0x8, %g1 6178c2ecf20Sopenharmony_ci be,pn %icc, 2f 6188c2ecf20Sopenharmony_ci sub %g0, %g1, %g1 6198c2ecf20Sopenharmony_ci sub %o2, %g1, %o2 6208c2ecf20Sopenharmony_ci 6218c2ecf20Sopenharmony_ci1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0) 6228c2ecf20Sopenharmony_ci subcc %g1, 1, %g1 6238c2ecf20Sopenharmony_ci EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1) 6248c2ecf20Sopenharmony_ci bgu,pt %icc, 1b 6258c2ecf20Sopenharmony_ci add %o1, 1, %o1 6268c2ecf20Sopenharmony_ci 6278c2ecf20Sopenharmony_ci2: add %o1, %o3, %o0 6288c2ecf20Sopenharmony_ci andcc %o1, 0x7, %g1 6298c2ecf20Sopenharmony_ci bne,pt %icc, 8f 6308c2ecf20Sopenharmony_ci sll %g1, 3, %g1 6318c2ecf20Sopenharmony_ci 6328c2ecf20Sopenharmony_ci cmp %o2, 16 6338c2ecf20Sopenharmony_ci bgeu,pt %icc, 72b 6348c2ecf20Sopenharmony_ci nop 6358c2ecf20Sopenharmony_ci ba,a,pt %xcc, 73b 6368c2ecf20Sopenharmony_ci 6378c2ecf20Sopenharmony_ci8: mov 64, %o3 6388c2ecf20Sopenharmony_ci andn %o1, 0x7, %o1 6398c2ecf20Sopenharmony_ci EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0) 6408c2ecf20Sopenharmony_ci sub %o3, %g1, %o3 6418c2ecf20Sopenharmony_ci andn %o2, 0x7, %GLOBAL_SPARE 6428c2ecf20Sopenharmony_ci sllx %g2, %g1, %g2 6438c2ecf20Sopenharmony_ci1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj) 6448c2ecf20Sopenharmony_ci subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE 6458c2ecf20Sopenharmony_ci add %o1, 0x8, %o1 6468c2ecf20Sopenharmony_ci srlx %g3, %o3, %o5 6478c2ecf20Sopenharmony_ci or %o5, %g2, %o5 6488c2ecf20Sopenharmony_ci EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj) 6498c2ecf20Sopenharmony_ci add %o0, 0x8, %o0 6508c2ecf20Sopenharmony_ci bgu,pt %icc, 1b 6518c2ecf20Sopenharmony_ci sllx %g3, %g1, %g2 6528c2ecf20Sopenharmony_ci 6538c2ecf20Sopenharmony_ci srl %g1, 3, %g1 6548c2ecf20Sopenharmony_ci andcc %o2, 0x7, %o2 6558c2ecf20Sopenharmony_ci be,pn %icc, 85f 6568c2ecf20Sopenharmony_ci add %o1, %g1, %o1 6578c2ecf20Sopenharmony_ci ba,pt %xcc, 90f 6588c2ecf20Sopenharmony_ci sub %o0, %o1, %o3 6598c2ecf20Sopenharmony_ci 6608c2ecf20Sopenharmony_ci .align 64 6618c2ecf20Sopenharmony_ci80: /* 0 < len <= 16 */ 6628c2ecf20Sopenharmony_ci andcc %o3, 0x3, %g0 6638c2ecf20Sopenharmony_ci bne,pn %XCC, 90f 6648c2ecf20Sopenharmony_ci sub %o0, %o1, %o3 6658c2ecf20Sopenharmony_ci 6668c2ecf20Sopenharmony_ci1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0) 6678c2ecf20Sopenharmony_ci subcc %o2, 4, %o2 6688c2ecf20Sopenharmony_ci EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4) 6698c2ecf20Sopenharmony_ci bgu,pt %XCC, 1b 6708c2ecf20Sopenharmony_ci add %o1, 4, %o1 6718c2ecf20Sopenharmony_ci 6728c2ecf20Sopenharmony_ci85: retl 6738c2ecf20Sopenharmony_ci mov EX_RETVAL(%o4), %o0 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_ci .align 32 6768c2ecf20Sopenharmony_ci90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0) 6778c2ecf20Sopenharmony_ci subcc %o2, 1, %o2 6788c2ecf20Sopenharmony_ci EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1) 6798c2ecf20Sopenharmony_ci bgu,pt %XCC, 90b 6808c2ecf20Sopenharmony_ci add %o1, 1, %o1 6818c2ecf20Sopenharmony_ci retl 6828c2ecf20Sopenharmony_ci mov EX_RETVAL(%o4), %o0 6838c2ecf20Sopenharmony_ci 6848c2ecf20Sopenharmony_ci .size FUNC_NAME, .-FUNC_NAME 6858c2ecf20Sopenharmony_ciEXPORT_SYMBOL(FUNC_NAME) 686