1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
4  */
5 
6 #include <linux/linkage.h>
7 #include <asm/cache.h>
8 
9 /*
10  * The memset implementation below is optimized to use prefetchw and prealloc
11  * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
12  * If you want to implement optimized memset for other possible L1 data cache
13  * line lengths (32B and 128B) you should rewrite code carefully checking
14  * we don't call any prefetchw/prealloc instruction for L1 cache lines which
15  * don't belongs to memset area.
16  */
17 
18 #if L1_CACHE_SHIFT == 6
19 
20 .macro PREALLOC_INSTR	reg, off
21 	prealloc	[\reg, \off]
22 .endm
23 
24 .macro PREFETCHW_INSTR	reg, off
25 	prefetchw	[\reg, \off]
26 .endm
27 
28 #else
29 
30 .macro PREALLOC_INSTR	reg, off
31 .endm
32 
33 .macro PREFETCHW_INSTR	reg, off
34 .endm
35 
36 #endif
37 
38 ENTRY_CFI(memset)
39 	mov.f	0, r2
40 ;;; if size is zero
41 	jz.d	[blink]
42 	mov	r3, r0		; don't clobber ret val
43 
44 	PREFETCHW_INSTR	r0, 0	; Prefetch the first write location
45 
46 ;;; if length < 8
47 	brls.d.nt	r2, 8, .Lsmallchunk
48 	mov.f	lp_count,r2
49 
50 	and.f	r4, r0, 0x03
51 	rsub	lp_count, r4, 4
52 	lpnz	@.Laligndestination
53 	;; LOOP BEGIN
54 	stb.ab	r1, [r3,1]
55 	sub	r2, r2, 1
56 .Laligndestination:
57 
58 ;;; Destination is aligned
59 	and	r1, r1, 0xFF
60 	asl	r4, r1, 8
61 	or	r4, r4, r1
62 	asl	r5, r4, 16
63 	or	r5, r5, r4
64 	mov	r4, r5
65 
66 	sub3	lp_count, r2, 8
67 	cmp     r2, 64
68 	bmsk.hi	r2, r2, 5
69 	mov.ls	lp_count, 0
70 	add3.hi	r2, r2, 8
71 
72 ;;; Convert len to Dwords, unfold x8
73 	lsr.f	lp_count, lp_count, 6
74 
75 	lpnz	@.Lset64bytes
76 	;; LOOP START
77 	PREALLOC_INSTR	r3, 64	; alloc next line w/o fetching
78 
79 #ifdef CONFIG_ARC_HAS_LL64
80 	std.ab	r4, [r3, 8]
81 	std.ab	r4, [r3, 8]
82 	std.ab	r4, [r3, 8]
83 	std.ab	r4, [r3, 8]
84 	std.ab	r4, [r3, 8]
85 	std.ab	r4, [r3, 8]
86 	std.ab	r4, [r3, 8]
87 	std.ab	r4, [r3, 8]
88 #else
89 	st.ab	r4, [r3, 4]
90 	st.ab	r4, [r3, 4]
91 	st.ab	r4, [r3, 4]
92 	st.ab	r4, [r3, 4]
93 	st.ab	r4, [r3, 4]
94 	st.ab	r4, [r3, 4]
95 	st.ab	r4, [r3, 4]
96 	st.ab	r4, [r3, 4]
97 	st.ab	r4, [r3, 4]
98 	st.ab	r4, [r3, 4]
99 	st.ab	r4, [r3, 4]
100 	st.ab	r4, [r3, 4]
101 	st.ab	r4, [r3, 4]
102 	st.ab	r4, [r3, 4]
103 	st.ab	r4, [r3, 4]
104 	st.ab	r4, [r3, 4]
105 #endif
106 .Lset64bytes:
107 
108 	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes
109 	lpnz	.Lset32bytes
110 	;; LOOP START
111 #ifdef CONFIG_ARC_HAS_LL64
112 	std.ab	r4, [r3, 8]
113 	std.ab	r4, [r3, 8]
114 	std.ab	r4, [r3, 8]
115 	std.ab	r4, [r3, 8]
116 #else
117 	st.ab	r4, [r3, 4]
118 	st.ab	r4, [r3, 4]
119 	st.ab	r4, [r3, 4]
120 	st.ab	r4, [r3, 4]
121 	st.ab	r4, [r3, 4]
122 	st.ab	r4, [r3, 4]
123 	st.ab	r4, [r3, 4]
124 	st.ab	r4, [r3, 4]
125 #endif
126 .Lset32bytes:
127 
128 	and.f	lp_count, r2, 0x1F ;Last remaining 31 bytes
129 .Lsmallchunk:
130 	lpnz	.Lcopy3bytes
131 	;; LOOP START
132 	stb.ab	r1, [r3, 1]
133 .Lcopy3bytes:
134 
135 	j	[blink]
136 
137 END_CFI(memset)
138 
139 ENTRY_CFI(memzero)
140     ; adjust bzero args to memset args
141     mov r2, r1
142     b.d  memset    ;tail call so need to tinker with blink
143     mov r1, 0
144 END_CFI(memzero)
145