162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/linkage.h>
762306a36Sopenharmony_ci#include <asm/cache.h>
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci/*
1062306a36Sopenharmony_ci * The memset implementation below is optimized to use prefetchw and prealloc
1162306a36Sopenharmony_ci * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
1262306a36Sopenharmony_ci * If you want to implement optimized memset for other possible L1 data cache
1362306a36Sopenharmony_ci * line lengths (32B and 128B) you should rewrite code carefully checking
1462306a36Sopenharmony_ci * we don't call any prefetchw/prealloc instruction for L1 cache lines which
1562306a36Sopenharmony_ci * don't belongs to memset area.
1662306a36Sopenharmony_ci */
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#if L1_CACHE_SHIFT == 6
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci.macro PREALLOC_INSTR	reg, off
2162306a36Sopenharmony_ci	prealloc	[\reg, \off]
2262306a36Sopenharmony_ci.endm
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci.macro PREFETCHW_INSTR	reg, off
2562306a36Sopenharmony_ci	prefetchw	[\reg, \off]
2662306a36Sopenharmony_ci.endm
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci#else
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci.macro PREALLOC_INSTR	reg, off
3162306a36Sopenharmony_ci.endm
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci.macro PREFETCHW_INSTR	reg, off
3462306a36Sopenharmony_ci.endm
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci#endif
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ciENTRY_CFI(memset)
3962306a36Sopenharmony_ci	mov.f	0, r2
4062306a36Sopenharmony_ci;;; if size is zero
4162306a36Sopenharmony_ci	jz.d	[blink]
4262306a36Sopenharmony_ci	mov	r3, r0		; don't clobber ret val
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	PREFETCHW_INSTR	r0, 0	; Prefetch the first write location
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci;;; if length < 8
4762306a36Sopenharmony_ci	brls.d.nt	r2, 8, .Lsmallchunk
4862306a36Sopenharmony_ci	mov.f	lp_count,r2
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci	and.f	r4, r0, 0x03
5162306a36Sopenharmony_ci	rsub	lp_count, r4, 4
5262306a36Sopenharmony_ci	lpnz	@.Laligndestination
5362306a36Sopenharmony_ci	;; LOOP BEGIN
5462306a36Sopenharmony_ci	stb.ab	r1, [r3,1]
5562306a36Sopenharmony_ci	sub	r2, r2, 1
5662306a36Sopenharmony_ci.Laligndestination:
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci;;; Destination is aligned
5962306a36Sopenharmony_ci	and	r1, r1, 0xFF
6062306a36Sopenharmony_ci	asl	r4, r1, 8
6162306a36Sopenharmony_ci	or	r4, r4, r1
6262306a36Sopenharmony_ci	asl	r5, r4, 16
6362306a36Sopenharmony_ci	or	r5, r5, r4
6462306a36Sopenharmony_ci	mov	r4, r5
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci	sub3	lp_count, r2, 8
6762306a36Sopenharmony_ci	cmp     r2, 64
6862306a36Sopenharmony_ci	bmsk.hi	r2, r2, 5
6962306a36Sopenharmony_ci	mov.ls	lp_count, 0
7062306a36Sopenharmony_ci	add3.hi	r2, r2, 8
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci;;; Convert len to Dwords, unfold x8
7362306a36Sopenharmony_ci	lsr.f	lp_count, lp_count, 6
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	lpnz	@.Lset64bytes
7662306a36Sopenharmony_ci	;; LOOP START
7762306a36Sopenharmony_ci	PREALLOC_INSTR	r3, 64	; alloc next line w/o fetching
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci#ifdef CONFIG_ARC_HAS_LL64
8062306a36Sopenharmony_ci	std.ab	r4, [r3, 8]
8162306a36Sopenharmony_ci	std.ab	r4, [r3, 8]
8262306a36Sopenharmony_ci	std.ab	r4, [r3, 8]
8362306a36Sopenharmony_ci	std.ab	r4, [r3, 8]
8462306a36Sopenharmony_ci	std.ab	r4, [r3, 8]
8562306a36Sopenharmony_ci	std.ab	r4, [r3, 8]
8662306a36Sopenharmony_ci	std.ab	r4, [r3, 8]
8762306a36Sopenharmony_ci	std.ab	r4, [r3, 8]
8862306a36Sopenharmony_ci#else
8962306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
9062306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
9162306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
9262306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
9362306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
9462306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
9562306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
9662306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
9762306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
9862306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
9962306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
10062306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
10162306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
10262306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
10362306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
10462306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
10562306a36Sopenharmony_ci#endif
10662306a36Sopenharmony_ci.Lset64bytes:
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes
10962306a36Sopenharmony_ci	lpnz	.Lset32bytes
11062306a36Sopenharmony_ci	;; LOOP START
11162306a36Sopenharmony_ci#ifdef CONFIG_ARC_HAS_LL64
11262306a36Sopenharmony_ci	std.ab	r4, [r3, 8]
11362306a36Sopenharmony_ci	std.ab	r4, [r3, 8]
11462306a36Sopenharmony_ci	std.ab	r4, [r3, 8]
11562306a36Sopenharmony_ci	std.ab	r4, [r3, 8]
11662306a36Sopenharmony_ci#else
11762306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
11862306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
11962306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
12062306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
12162306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
12262306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
12362306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
12462306a36Sopenharmony_ci	st.ab	r4, [r3, 4]
12562306a36Sopenharmony_ci#endif
12662306a36Sopenharmony_ci.Lset32bytes:
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci	and.f	lp_count, r2, 0x1F ;Last remaining 31 bytes
12962306a36Sopenharmony_ci.Lsmallchunk:
13062306a36Sopenharmony_ci	lpnz	.Lcopy3bytes
13162306a36Sopenharmony_ci	;; LOOP START
13262306a36Sopenharmony_ci	stb.ab	r1, [r3, 1]
13362306a36Sopenharmony_ci.Lcopy3bytes:
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	j	[blink]
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ciEND_CFI(memset)
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ciENTRY_CFI(memzero)
14062306a36Sopenharmony_ci    ; adjust bzero args to memset args
14162306a36Sopenharmony_ci    mov r2, r1
14262306a36Sopenharmony_ci    b.d  memset    ;tail call so need to tinker with blink
14362306a36Sopenharmony_ci    mov r1, 0
14462306a36Sopenharmony_ciEND_CFI(memzero)
145