1 /*
2  * memset - fill memory with a constant byte
3  *
4  * Copyright (c) 2012-2020, Arm Limited.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 /* Assumptions:
9  *
10  * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
11  *
12  */
13 
14 #define dstin   x0
15 #define val     x1
16 #define valw    w1
17 #define count   x2
18 #define dst     x3
19 #define dstend  x4
20 #define zva_val x5
21 
22 .global memset
23 .type memset,%function
24 memset:
25 
26 	dup     v0.16B, valw
27 	add     dstend, dstin, count
28 
29 	cmp     count, 96
30 	b.hi    .Lset_long
31 	cmp     count, 16
32 	b.hs    .Lset_medium
33 	mov     val, v0.D[0]
34 
35 	/* Set 0..15 bytes.  */
36 	tbz     count, 3, 1f
37 	str     val, [dstin]
38 	str     val, [dstend, -8]
39 	ret
40 	nop
41 1:      tbz     count, 2, 2f
42 	str     valw, [dstin]
43 	str     valw, [dstend, -4]
44 	ret
45 2:      cbz     count, 3f
46 	strb    valw, [dstin]
47 	tbz     count, 1, 3f
48 	strh    valw, [dstend, -2]
49 3:      ret
50 
51 	/* Set 17..96 bytes.  */
52 .Lset_medium:
53 	str     q0, [dstin]
54 	tbnz    count, 6, .Lset96
55 	str     q0, [dstend, -16]
56 	tbz     count, 5, 1f
57 	str     q0, [dstin, 16]
58 	str     q0, [dstend, -32]
59 1:      ret
60 
61 	.p2align 4
62 	/* Set 64..96 bytes.  Write 64 bytes from the start and
63 	   32 bytes from the end.  */
64 .Lset96:
65 	str     q0, [dstin, 16]
66 	stp     q0, q0, [dstin, 32]
67 	stp     q0, q0, [dstend, -32]
68 	ret
69 
70 	.p2align 4
71 .Lset_long:
72 	and     valw, valw, 255
73 	bic     dst, dstin, 15
74 	str     q0, [dstin]
75 	cmp     count, 160
76 	ccmp    valw, 0, 0, hs
77 	b.ne    .Lno_zva
78 
79 #ifndef SKIP_ZVA_CHECK
80 	mrs     zva_val, dczid_el0
81 	and     zva_val, zva_val, 31
82 	cmp     zva_val, 4              /* ZVA size is 64 bytes.  */
83 	b.ne    .Lno_zva
84 #endif
85 	str     q0, [dst, 16]
86 	stp     q0, q0, [dst, 32]
87 	bic     dst, dst, 63
88 	sub     count, dstend, dst      /* Count is now 64 too large.  */
89 	sub     count, count, 128       /* Adjust count and bias for loop.  */
90 
91 	.p2align 4
92 .Lzva_loop:
93 	add     dst, dst, 64
94 	dc      zva, dst
95 	subs    count, count, 64
96 	b.hi    .Lzva_loop
97 	stp     q0, q0, [dstend, -64]
98 	stp     q0, q0, [dstend, -32]
99 	ret
100 
101 .Lno_zva:
102 	sub     count, dstend, dst      /* Count is 16 too large.  */
103 	sub     dst, dst, 16            /* Dst is biased by -32.  */
104 	sub     count, count, 64 + 16   /* Adjust count and bias for loop.  */
105 .Lno_zva_loop:
106 	stp     q0, q0, [dst, 32]
107 	stp     q0, q0, [dst, 64]!
108 	subs    count, count, 64
109 	b.hi    .Lno_zva_loop
110 	stp     q0, q0, [dstend, -64]
111 	stp     q0, q0, [dstend, -32]
112 	ret
113 
114 .size memset,.-memset
115 
116