162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/linkage.h> 762306a36Sopenharmony_ci#include <asm/cache.h> 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci/* 1062306a36Sopenharmony_ci * The memset implementation below is optimized to use prefetchw and prealloc 1162306a36Sopenharmony_ci * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6) 1262306a36Sopenharmony_ci * If you want to implement optimized memset for other possible L1 data cache 1362306a36Sopenharmony_ci * line lengths (32B and 128B) you should rewrite code carefully checking 1462306a36Sopenharmony_ci * we don't call any prefetchw/prealloc instruction for L1 cache lines which 1562306a36Sopenharmony_ci * don't belongs to memset area. 1662306a36Sopenharmony_ci */ 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci#if L1_CACHE_SHIFT == 6 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci.macro PREALLOC_INSTR reg, off 2162306a36Sopenharmony_ci prealloc [\reg, \off] 2262306a36Sopenharmony_ci.endm 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci.macro PREFETCHW_INSTR reg, off 2562306a36Sopenharmony_ci prefetchw [\reg, \off] 2662306a36Sopenharmony_ci.endm 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci#else 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci.macro PREALLOC_INSTR reg, off 3162306a36Sopenharmony_ci.endm 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci.macro PREFETCHW_INSTR reg, off 3462306a36Sopenharmony_ci.endm 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci#endif 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ciENTRY_CFI(memset) 3962306a36Sopenharmony_ci mov.f 0, r2 4062306a36Sopenharmony_ci;;; if size is zero 4162306a36Sopenharmony_ci jz.d [blink] 4262306a36Sopenharmony_ci mov r3, r0 ; don't clobber ret val 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci PREFETCHW_INSTR r0, 0 ; Prefetch the first write location 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci;;; if length < 8 4762306a36Sopenharmony_ci brls.d.nt r2, 8, .Lsmallchunk 4862306a36Sopenharmony_ci mov.f lp_count,r2 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci and.f r4, r0, 0x03 5162306a36Sopenharmony_ci rsub lp_count, r4, 4 5262306a36Sopenharmony_ci lpnz @.Laligndestination 5362306a36Sopenharmony_ci ;; LOOP BEGIN 5462306a36Sopenharmony_ci stb.ab r1, [r3,1] 5562306a36Sopenharmony_ci sub r2, r2, 1 5662306a36Sopenharmony_ci.Laligndestination: 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci;;; Destination is aligned 5962306a36Sopenharmony_ci and r1, r1, 0xFF 6062306a36Sopenharmony_ci asl r4, r1, 8 6162306a36Sopenharmony_ci or r4, r4, r1 6262306a36Sopenharmony_ci asl r5, r4, 16 6362306a36Sopenharmony_ci or r5, r5, r4 6462306a36Sopenharmony_ci mov r4, r5 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci sub3 lp_count, r2, 8 6762306a36Sopenharmony_ci cmp r2, 64 6862306a36Sopenharmony_ci bmsk.hi r2, r2, 5 6962306a36Sopenharmony_ci mov.ls lp_count, 0 7062306a36Sopenharmony_ci add3.hi r2, r2, 8 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci;;; Convert len to Dwords, unfold x8 7362306a36Sopenharmony_ci lsr.f lp_count, lp_count, 6 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci lpnz @.Lset64bytes 7662306a36Sopenharmony_ci ;; LOOP START 7762306a36Sopenharmony_ci PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci#ifdef CONFIG_ARC_HAS_LL64 8062306a36Sopenharmony_ci std.ab r4, [r3, 8] 8162306a36Sopenharmony_ci std.ab r4, [r3, 8] 8262306a36Sopenharmony_ci std.ab r4, [r3, 8] 8362306a36Sopenharmony_ci std.ab r4, [r3, 8] 8462306a36Sopenharmony_ci std.ab r4, [r3, 8] 8562306a36Sopenharmony_ci std.ab r4, [r3, 8] 8662306a36Sopenharmony_ci std.ab r4, [r3, 8] 8762306a36Sopenharmony_ci std.ab r4, [r3, 8] 8862306a36Sopenharmony_ci#else 8962306a36Sopenharmony_ci st.ab r4, [r3, 4] 9062306a36Sopenharmony_ci st.ab r4, [r3, 4] 9162306a36Sopenharmony_ci st.ab r4, [r3, 4] 9262306a36Sopenharmony_ci st.ab r4, [r3, 4] 9362306a36Sopenharmony_ci st.ab r4, [r3, 4] 9462306a36Sopenharmony_ci st.ab r4, [r3, 4] 9562306a36Sopenharmony_ci st.ab r4, [r3, 4] 9662306a36Sopenharmony_ci st.ab r4, [r3, 4] 9762306a36Sopenharmony_ci st.ab r4, [r3, 4] 9862306a36Sopenharmony_ci st.ab r4, [r3, 4] 9962306a36Sopenharmony_ci st.ab r4, [r3, 4] 10062306a36Sopenharmony_ci st.ab r4, [r3, 4] 10162306a36Sopenharmony_ci st.ab r4, [r3, 4] 10262306a36Sopenharmony_ci st.ab r4, [r3, 4] 10362306a36Sopenharmony_ci st.ab r4, [r3, 4] 10462306a36Sopenharmony_ci st.ab r4, [r3, 4] 10562306a36Sopenharmony_ci#endif 10662306a36Sopenharmony_ci.Lset64bytes: 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes 10962306a36Sopenharmony_ci lpnz .Lset32bytes 11062306a36Sopenharmony_ci ;; LOOP START 11162306a36Sopenharmony_ci#ifdef CONFIG_ARC_HAS_LL64 11262306a36Sopenharmony_ci std.ab r4, [r3, 8] 11362306a36Sopenharmony_ci std.ab r4, [r3, 8] 11462306a36Sopenharmony_ci std.ab r4, [r3, 8] 11562306a36Sopenharmony_ci std.ab r4, [r3, 8] 11662306a36Sopenharmony_ci#else 11762306a36Sopenharmony_ci st.ab r4, [r3, 4] 11862306a36Sopenharmony_ci st.ab r4, [r3, 4] 11962306a36Sopenharmony_ci st.ab r4, [r3, 4] 12062306a36Sopenharmony_ci st.ab r4, [r3, 4] 12162306a36Sopenharmony_ci st.ab r4, [r3, 4] 12262306a36Sopenharmony_ci st.ab r4, [r3, 4] 12362306a36Sopenharmony_ci st.ab r4, [r3, 4] 12462306a36Sopenharmony_ci st.ab r4, [r3, 4] 12562306a36Sopenharmony_ci#endif 12662306a36Sopenharmony_ci.Lset32bytes: 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci and.f lp_count, r2, 0x1F ;Last remaining 31 bytes 12962306a36Sopenharmony_ci.Lsmallchunk: 13062306a36Sopenharmony_ci lpnz .Lcopy3bytes 13162306a36Sopenharmony_ci ;; LOOP START 13262306a36Sopenharmony_ci stb.ab r1, [r3, 1] 13362306a36Sopenharmony_ci.Lcopy3bytes: 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci j [blink] 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ciEND_CFI(memset) 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ciENTRY_CFI(memzero) 14062306a36Sopenharmony_ci ; adjust bzero args to memset args 14162306a36Sopenharmony_ci mov r2, r1 14262306a36Sopenharmony_ci b.d memset ;tail call so need to tinker with blink 14362306a36Sopenharmony_ci mov r1, 0 14462306a36Sopenharmony_ciEND_CFI(memzero) 145