xref: /third_party/musl/src/string/aarch64/memset.S (revision 570af302)
1570af302Sopenharmony_ci/*
2570af302Sopenharmony_ci * memset - fill memory with a constant byte
3570af302Sopenharmony_ci *
4570af302Sopenharmony_ci * Copyright (c) 2012-2020, Arm Limited.
5570af302Sopenharmony_ci * SPDX-License-Identifier: MIT
6570af302Sopenharmony_ci */
7570af302Sopenharmony_ci
8570af302Sopenharmony_ci/* Assumptions:
9570af302Sopenharmony_ci *
10570af302Sopenharmony_ci * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
11570af302Sopenharmony_ci *
12570af302Sopenharmony_ci */
13570af302Sopenharmony_ci
14570af302Sopenharmony_ci#define dstin   x0
15570af302Sopenharmony_ci#define val     x1
16570af302Sopenharmony_ci#define valw    w1
17570af302Sopenharmony_ci#define count   x2
18570af302Sopenharmony_ci#define dst     x3
19570af302Sopenharmony_ci#define dstend  x4
20570af302Sopenharmony_ci#define zva_val x5
21570af302Sopenharmony_ci
22570af302Sopenharmony_ci.global memset
23570af302Sopenharmony_ci.type memset,%function
24570af302Sopenharmony_cimemset:
25570af302Sopenharmony_ci
26570af302Sopenharmony_ci	dup     v0.16B, valw
27570af302Sopenharmony_ci	add     dstend, dstin, count
28570af302Sopenharmony_ci
29570af302Sopenharmony_ci	cmp     count, 96
30570af302Sopenharmony_ci	b.hi    .Lset_long
31570af302Sopenharmony_ci	cmp     count, 16
32570af302Sopenharmony_ci	b.hs    .Lset_medium
33570af302Sopenharmony_ci	mov     val, v0.D[0]
34570af302Sopenharmony_ci
35570af302Sopenharmony_ci	/* Set 0..15 bytes.  */
36570af302Sopenharmony_ci	tbz     count, 3, 1f
37570af302Sopenharmony_ci	str     val, [dstin]
38570af302Sopenharmony_ci	str     val, [dstend, -8]
39570af302Sopenharmony_ci	ret
40570af302Sopenharmony_ci	nop
41570af302Sopenharmony_ci1:      tbz     count, 2, 2f
42570af302Sopenharmony_ci	str     valw, [dstin]
43570af302Sopenharmony_ci	str     valw, [dstend, -4]
44570af302Sopenharmony_ci	ret
45570af302Sopenharmony_ci2:      cbz     count, 3f
46570af302Sopenharmony_ci	strb    valw, [dstin]
47570af302Sopenharmony_ci	tbz     count, 1, 3f
48570af302Sopenharmony_ci	strh    valw, [dstend, -2]
49570af302Sopenharmony_ci3:      ret
50570af302Sopenharmony_ci
51570af302Sopenharmony_ci	/* Set 17..96 bytes.  */
52570af302Sopenharmony_ci.Lset_medium:
53570af302Sopenharmony_ci	str     q0, [dstin]
54570af302Sopenharmony_ci	tbnz    count, 6, .Lset96
55570af302Sopenharmony_ci	str     q0, [dstend, -16]
56570af302Sopenharmony_ci	tbz     count, 5, 1f
57570af302Sopenharmony_ci	str     q0, [dstin, 16]
58570af302Sopenharmony_ci	str     q0, [dstend, -32]
59570af302Sopenharmony_ci1:      ret
60570af302Sopenharmony_ci
61570af302Sopenharmony_ci	.p2align 4
62570af302Sopenharmony_ci	/* Set 64..96 bytes.  Write 64 bytes from the start and
63570af302Sopenharmony_ci	   32 bytes from the end.  */
64570af302Sopenharmony_ci.Lset96:
65570af302Sopenharmony_ci	str     q0, [dstin, 16]
66570af302Sopenharmony_ci	stp     q0, q0, [dstin, 32]
67570af302Sopenharmony_ci	stp     q0, q0, [dstend, -32]
68570af302Sopenharmony_ci	ret
69570af302Sopenharmony_ci
70570af302Sopenharmony_ci	.p2align 4
71570af302Sopenharmony_ci.Lset_long:
72570af302Sopenharmony_ci	and     valw, valw, 255
73570af302Sopenharmony_ci	bic     dst, dstin, 15
74570af302Sopenharmony_ci	str     q0, [dstin]
75570af302Sopenharmony_ci	cmp     count, 160
76570af302Sopenharmony_ci	ccmp    valw, 0, 0, hs
77570af302Sopenharmony_ci	b.ne    .Lno_zva
78570af302Sopenharmony_ci
79570af302Sopenharmony_ci#ifndef SKIP_ZVA_CHECK
80570af302Sopenharmony_ci	mrs     zva_val, dczid_el0
81570af302Sopenharmony_ci	and     zva_val, zva_val, 31
82570af302Sopenharmony_ci	cmp     zva_val, 4              /* ZVA size is 64 bytes.  */
83570af302Sopenharmony_ci	b.ne    .Lno_zva
84570af302Sopenharmony_ci#endif
85570af302Sopenharmony_ci	str     q0, [dst, 16]
86570af302Sopenharmony_ci	stp     q0, q0, [dst, 32]
87570af302Sopenharmony_ci	bic     dst, dst, 63
88570af302Sopenharmony_ci	sub     count, dstend, dst      /* Count is now 64 too large.  */
89570af302Sopenharmony_ci	sub     count, count, 128       /* Adjust count and bias for loop.  */
90570af302Sopenharmony_ci
91570af302Sopenharmony_ci	.p2align 4
92570af302Sopenharmony_ci.Lzva_loop:
93570af302Sopenharmony_ci	add     dst, dst, 64
94570af302Sopenharmony_ci	dc      zva, dst
95570af302Sopenharmony_ci	subs    count, count, 64
96570af302Sopenharmony_ci	b.hi    .Lzva_loop
97570af302Sopenharmony_ci	stp     q0, q0, [dstend, -64]
98570af302Sopenharmony_ci	stp     q0, q0, [dstend, -32]
99570af302Sopenharmony_ci	ret
100570af302Sopenharmony_ci
101570af302Sopenharmony_ci.Lno_zva:
102570af302Sopenharmony_ci	sub     count, dstend, dst      /* Count is 16 too large.  */
103570af302Sopenharmony_ci	sub     dst, dst, 16            /* Dst is biased by -32.  */
104570af302Sopenharmony_ci	sub     count, count, 64 + 16   /* Adjust count and bias for loop.  */
105570af302Sopenharmony_ci.Lno_zva_loop:
106570af302Sopenharmony_ci	stp     q0, q0, [dst, 32]
107570af302Sopenharmony_ci	stp     q0, q0, [dst, 64]!
108570af302Sopenharmony_ci	subs    count, count, 64
109570af302Sopenharmony_ci	b.hi    .Lno_zva_loop
110570af302Sopenharmony_ci	stp     q0, q0, [dstend, -64]
111570af302Sopenharmony_ci	stp     q0, q0, [dstend, -32]
112570af302Sopenharmony_ci	ret
113570af302Sopenharmony_ci
114570af302Sopenharmony_ci.size memset,.-memset
115570af302Sopenharmony_ci
116