18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci/* NG4memset.S: Niagara-4 optimized memset/bzero. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Copyright (C) 2012 David S. Miller (davem@davemloft.net) 58c2ecf20Sopenharmony_ci */ 68c2ecf20Sopenharmony_ci 78c2ecf20Sopenharmony_ci#include <asm/asi.h> 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ci .register %g2, #scratch 108c2ecf20Sopenharmony_ci .register %g3, #scratch 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci .text 138c2ecf20Sopenharmony_ci .align 32 148c2ecf20Sopenharmony_ci .globl NG4memset 158c2ecf20Sopenharmony_ciNG4memset: 168c2ecf20Sopenharmony_ci andcc %o1, 0xff, %o4 178c2ecf20Sopenharmony_ci be,pt %icc, 1f 188c2ecf20Sopenharmony_ci mov %o2, %o1 198c2ecf20Sopenharmony_ci sllx %o4, 8, %g1 208c2ecf20Sopenharmony_ci or %g1, %o4, %o2 218c2ecf20Sopenharmony_ci sllx %o2, 16, %g1 228c2ecf20Sopenharmony_ci or %g1, %o2, %o2 238c2ecf20Sopenharmony_ci sllx %o2, 32, %g1 248c2ecf20Sopenharmony_ci ba,pt %icc, 1f 258c2ecf20Sopenharmony_ci or %g1, %o2, %o4 268c2ecf20Sopenharmony_ci .size NG4memset,.-NG4memset 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci .align 32 298c2ecf20Sopenharmony_ci .globl NG4bzero 308c2ecf20Sopenharmony_ciNG4bzero: 318c2ecf20Sopenharmony_ci clr %o4 328c2ecf20Sopenharmony_ci1: cmp %o1, 16 338c2ecf20Sopenharmony_ci ble %icc, .Ltiny 348c2ecf20Sopenharmony_ci mov %o0, %o3 358c2ecf20Sopenharmony_ci sub %g0, %o0, %g1 368c2ecf20Sopenharmony_ci and %g1, 0x7, %g1 378c2ecf20Sopenharmony_ci brz,pt %g1, .Laligned8 388c2ecf20Sopenharmony_ci sub %o1, %g1, %o1 398c2ecf20Sopenharmony_ci1: stb %o4, [%o0 + 0x00] 408c2ecf20Sopenharmony_ci subcc %g1, 1, %g1 418c2ecf20Sopenharmony_ci bne,pt %icc, 1b 428c2ecf20Sopenharmony_ci add %o0, 1, %o0 438c2ecf20Sopenharmony_ci.Laligned8: 448c2ecf20Sopenharmony_ci cmp %o1, 64 + (64 - 8) 458c2ecf20Sopenharmony_ci ble .Lmedium 468c2ecf20Sopenharmony_ci sub %g0, %o0, %g1 478c2ecf20Sopenharmony_ci andcc %g1, (64 - 1), %g1 488c2ecf20Sopenharmony_ci brz,pn %g1, .Laligned64 498c2ecf20Sopenharmony_ci sub %o1, %g1, %o1 508c2ecf20Sopenharmony_ci1: stx %o4, [%o0 + 0x00] 518c2ecf20Sopenharmony_ci subcc %g1, 8, %g1 528c2ecf20Sopenharmony_ci bne,pt %icc, 1b 538c2ecf20Sopenharmony_ci add %o0, 0x8, %o0 548c2ecf20Sopenharmony_ci.Laligned64: 558c2ecf20Sopenharmony_ci andn %o1, 64 - 1, %g1 568c2ecf20Sopenharmony_ci sub %o1, %g1, %o1 578c2ecf20Sopenharmony_ci brnz,pn %o4, .Lnon_bzero_loop 588c2ecf20Sopenharmony_ci mov 0x20, %g2 598c2ecf20Sopenharmony_ci1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P 608c2ecf20Sopenharmony_ci subcc %g1, 0x40, %g1 618c2ecf20Sopenharmony_ci stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P 628c2ecf20Sopenharmony_ci bne,pt %icc, 1b 638c2ecf20Sopenharmony_ci add %o0, 0x40, %o0 648c2ecf20Sopenharmony_ci.Lpostloop: 658c2ecf20Sopenharmony_ci cmp %o1, 8 668c2ecf20Sopenharmony_ci bl,pn %icc, .Ltiny 678c2ecf20Sopenharmony_ci membar #StoreStore|#StoreLoad 688c2ecf20Sopenharmony_ci.Lmedium: 698c2ecf20Sopenharmony_ci andn %o1, 0x7, %g1 708c2ecf20Sopenharmony_ci sub %o1, %g1, %o1 718c2ecf20Sopenharmony_ci1: stx %o4, [%o0 + 0x00] 728c2ecf20Sopenharmony_ci subcc %g1, 0x8, %g1 738c2ecf20Sopenharmony_ci bne,pt %icc, 1b 748c2ecf20Sopenharmony_ci add %o0, 0x08, %o0 758c2ecf20Sopenharmony_ci andcc %o1, 0x4, %g1 768c2ecf20Sopenharmony_ci be,pt %icc, .Ltiny 778c2ecf20Sopenharmony_ci sub %o1, %g1, %o1 788c2ecf20Sopenharmony_ci stw %o4, [%o0 + 0x00] 798c2ecf20Sopenharmony_ci add %o0, 0x4, %o0 808c2ecf20Sopenharmony_ci.Ltiny: 818c2ecf20Sopenharmony_ci cmp %o1, 0 828c2ecf20Sopenharmony_ci be,pn %icc, .Lexit 838c2ecf20Sopenharmony_ci1: subcc %o1, 1, %o1 848c2ecf20Sopenharmony_ci stb %o4, [%o0 + 0x00] 858c2ecf20Sopenharmony_ci bne,pt %icc, 1b 868c2ecf20Sopenharmony_ci add %o0, 1, %o0 878c2ecf20Sopenharmony_ci.Lexit: 888c2ecf20Sopenharmony_ci retl 898c2ecf20Sopenharmony_ci mov %o3, %o0 908c2ecf20Sopenharmony_ci.Lnon_bzero_loop: 918c2ecf20Sopenharmony_ci mov 0x08, %g3 928c2ecf20Sopenharmony_ci mov 0x28, %o5 938c2ecf20Sopenharmony_ci1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P 948c2ecf20Sopenharmony_ci subcc %g1, 0x40, %g1 958c2ecf20Sopenharmony_ci stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P 968c2ecf20Sopenharmony_ci stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P 978c2ecf20Sopenharmony_ci stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P 988c2ecf20Sopenharmony_ci add %o0, 0x10, %o0 998c2ecf20Sopenharmony_ci stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P 1008c2ecf20Sopenharmony_ci stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P 1018c2ecf20Sopenharmony_ci stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P 1028c2ecf20Sopenharmony_ci stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P 1038c2ecf20Sopenharmony_ci bne,pt %icc, 1b 1048c2ecf20Sopenharmony_ci add %o0, 0x30, %o0 1058c2ecf20Sopenharmony_ci ba,a,pt %icc, .Lpostloop 1068c2ecf20Sopenharmony_ci nop 1078c2ecf20Sopenharmony_ci .size NG4bzero,.-NG4bzero 108