1 // SPDX-License-Identifier: GPL-2.0-only
2 // Copyright (C) 2021 ARM Limited.
3 // Original author: Mark Brown <broonie@kernel.org>
4 //
5 // Scalable Matrix Extension ZA context switch test
6 // Repeatedly writes unique test patterns into each ZA tile
7 // and reads them back to verify integrity.
8 //
9 // for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
10 // (leave it running for as long as you want...)
11 // kill $pids
12 
13 #include <asm/unistd.h>
14 #include "assembler.h"
15 #include "asm-offsets.h"
16 #include "sme-inst.h"
17 
18 .arch_extension sve
19 
20 #define MAXVL     2048
21 #define MAXVL_B   (MAXVL / 8)
22 
23 // Declare some storage space to shadow ZA register contents and a
24 // scratch buffer for a vector.
25 .pushsection .text
26 .data
27 .align 4
28 zaref:
29 	.space	MAXVL_B * MAXVL_B
30 scratch:
31 	.space	MAXVL_B
32 .popsection
33 
34 // Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
35 // Clobbers x0-x3
36 function memcpy
37 	cmp	x2, #0
38 	b.eq	1f
39 0:	ldrb	w3, [x1], #1
40 	strb	w3, [x0], #1
41 	subs	x2, x2, #1
42 	b.ne	0b
43 1:	ret
44 endfunction
45 
46 // Generate a test pattern for storage in ZA
47 // x0: pid
48 // x1: row in ZA
49 // x2: generation
50 
51 // These values are used to constuct a 32-bit pattern that is repeated in the
52 // scratch buffer as many times as will fit:
53 // bits 31:28	generation number (increments once per test_loop)
54 // bits 27:16	pid
55 // bits 15: 8	row number
56 // bits  7: 0	32-bit lane index
57 
58 function pattern
59 	mov	w3, wzr
60 	bfi	w3, w0, #16, #12	// PID
61 	bfi	w3, w1, #8, #8		// Row
62 	bfi	w3, w2, #28, #4		// Generation
63 
64 	ldr	x0, =scratch
65 	mov	w1, #MAXVL_B / 4
66 
67 0:	str	w3, [x0], #4
68 	add	w3, w3, #1		// Lane
69 	subs	w1, w1, #1
70 	b.ne	0b
71 
72 	ret
73 endfunction
74 
75 // Get the address of shadow data for ZA horizontal vector xn
76 .macro _adrza xd, xn, nrtmp
77 	ldr	\xd, =zaref
78 	rdsvl	\nrtmp, 1
79 	madd	\xd, x\nrtmp, \xn, \xd
80 .endm
81 
82 // Set up test pattern in a ZA horizontal vector
83 // x0: pid
84 // x1: row number
85 // x2: generation
86 function setup_za
87 	mov	x4, x30
88 	mov	x12, x1			// Use x12 for vector select
89 
90 	bl	pattern			// Get pattern in scratch buffer
91 	_adrza	x0, x12, 2		// Shadow buffer pointer to x0 and x5
92 	mov	x5, x0
93 	ldr	x1, =scratch
94 	bl	memcpy			// length set up in x2 by _adrza
95 
96 	_ldr_za 12, 5			// load vector w12 from pointer x5
97 
98 	ret	x4
99 endfunction
100 
101 // Trivial memory compare: compare x2 bytes starting at address x0 with
102 // bytes starting at address x1.
103 // Returns only if all bytes match; otherwise, the program is aborted.
104 // Clobbers x0-x5.
105 function memcmp
106 	cbz	x2, 2f
107 
108 	stp	x0, x1, [sp, #-0x20]!
109 	str	x2, [sp, #0x10]
110 
111 	mov	x5, #0
112 0:	ldrb	w3, [x0, x5]
113 	ldrb	w4, [x1, x5]
114 	add	x5, x5, #1
115 	cmp	w3, w4
116 	b.ne	1f
117 	subs	x2, x2, #1
118 	b.ne	0b
119 
120 1:	ldr	x2, [sp, #0x10]
121 	ldp	x0, x1, [sp], #0x20
122 	b.ne	barf
123 
124 2:	ret
125 endfunction
126 
127 // Verify that a ZA vector matches its shadow in memory, else abort
128 // x0: row number
129 // Clobbers x0-x7 and x12.
130 function check_za
131 	mov	x3, x30
132 
133 	mov	x12, x0
134 	_adrza	x5, x0, 6		// pointer to expected value in x5
135 	mov	x4, x0
136 	ldr	x7, =scratch		// x7 is scratch
137 
138 	mov	x0, x7			// Poison scratch
139 	mov	x1, x6
140 	bl	memfill_ae
141 
142 	_str_za 12, 7			// save vector w12 to pointer x7
143 
144 	mov	x0, x5
145 	mov	x1, x7
146 	mov	x2, x6
147 	mov	x30, x3
148 	b	memcmp
149 endfunction
150 
151 // Any SME register modified here can cause corruption in the main
152 // thread -- but *only* the locations modified here.
153 function irritator_handler
154 	// Increment the irritation signal count (x23):
155 	ldr	x0, [x2, #ucontext_regs + 8 * 23]
156 	add	x0, x0, #1
157 	str	x0, [x2, #ucontext_regs + 8 * 23]
158 
159 	// Corrupt some random ZA data
160 #if 0
161 	adr	x0, .text + (irritator_handler - .text) / 16 * 16
162 	movi	v0.8b, #1
163 	movi	v9.16b, #2
164 	movi	v31.8b, #3
165 #endif
166 
167 	ret
168 endfunction
169 
170 function tickle_handler
171 	// Increment the signal count (x23):
172 	ldr	x0, [x2, #ucontext_regs + 8 * 23]
173 	add	x0, x0, #1
174 	str	x0, [x2, #ucontext_regs + 8 * 23]
175 
176 	ret
177 endfunction
178 
179 function terminate_handler
180 	mov	w21, w0
181 	mov	x20, x2
182 
183 	puts	"Terminated by signal "
184 	mov	w0, w21
185 	bl	putdec
186 	puts	", no error, iterations="
187 	ldr	x0, [x20, #ucontext_regs + 8 * 22]
188 	bl	putdec
189 	puts	", signals="
190 	ldr	x0, [x20, #ucontext_regs + 8 * 23]
191 	bl	putdecn
192 
193 	mov	x0, #0
194 	mov	x8, #__NR_exit
195 	svc	#0
196 endfunction
197 
198 // w0: signal number
199 // x1: sa_action
200 // w2: sa_flags
201 // Clobbers x0-x6,x8
202 function setsignal
203 	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
204 
205 	mov	w4, w0
206 	mov	x5, x1
207 	mov	w6, w2
208 
209 	add	x0, sp, #16
210 	mov	x1, #sa_sz
211 	bl	memclr
212 
213 	mov	w0, w4
214 	add	x1, sp, #16
215 	str	w6, [x1, #sa_flags]
216 	str	x5, [x1, #sa_handler]
217 	mov	x2, #0
218 	mov	x3, #sa_mask_sz
219 	mov	x8, #__NR_rt_sigaction
220 	svc	#0
221 
222 	cbz	w0, 1f
223 
224 	puts	"sigaction failure\n"
225 	b	.Labort
226 
227 1:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
228 	ret
229 endfunction
230 
231 // Main program entry point
232 .globl _start
233 function _start
234 	mov	x23, #0		// signal count
235 
236 	mov	w0, #SIGINT
237 	adr	x1, terminate_handler
238 	mov	w2, #SA_SIGINFO
239 	bl	setsignal
240 
241 	mov	w0, #SIGTERM
242 	adr	x1, terminate_handler
243 	mov	w2, #SA_SIGINFO
244 	bl	setsignal
245 
246 	mov	w0, #SIGUSR1
247 	adr	x1, irritator_handler
248 	mov	w2, #SA_SIGINFO
249 	orr	w2, w2, #SA_NODEFER
250 	bl	setsignal
251 
252 	mov	w0, #SIGUSR2
253 	adr	x1, tickle_handler
254 	mov	w2, #SA_SIGINFO
255 	orr	w2, w2, #SA_NODEFER
256 	bl	setsignal
257 
258 	puts	"Streaming mode "
259 	smstart_za
260 
261 	// Sanity-check and report the vector length
262 
263 	rdsvl	19, 8
264 	cmp	x19, #128
265 	b.lo	1f
266 	cmp	x19, #2048
267 	b.hi	1f
268 	tst	x19, #(8 - 1)
269 	b.eq	2f
270 
271 1:	puts	"bad vector length: "
272 	mov	x0, x19
273 	bl	putdecn
274 	b	.Labort
275 
276 2:	puts	"vector length:\t"
277 	mov	x0, x19
278 	bl	putdec
279 	puts	" bits\n"
280 
281 	// Obtain our PID, to ensure test pattern uniqueness between processes
282 	mov	x8, #__NR_getpid
283 	svc	#0
284 	mov	x20, x0
285 
286 	puts	"PID:\t"
287 	mov	x0, x20
288 	bl	putdecn
289 
290 	mov	x22, #0		// generation number, increments per iteration
291 .Ltest_loop:
292 	rdsvl	0, 8
293 	cmp	x0, x19
294 	b.ne	vl_barf
295 
296 	rdsvl	21, 1		// Set up ZA & shadow with test pattern
297 0:	mov	x0, x20
298 	sub	x1, x21, #1
299 	mov	x2, x22
300 	bl	setup_za
301 	subs	x21, x21, #1
302 	b.ne	0b
303 
304 	mov	x8, #__NR_sched_yield	// encourage preemption
305 1:
306 	svc	#0
307 
308 	mrs	x0, S3_3_C4_C2_2	// SVCR should have ZA=1,SM=0
309 	and	x1, x0, #3
310 	cmp	x1, #2
311 	b.ne	svcr_barf
312 
313 	rdsvl	21, 1			// Verify that the data made it through
314 	rdsvl	24, 1			// Verify that the data made it through
315 0:	sub	x0, x24, x21
316 	bl	check_za
317 	subs	x21, x21, #1
318 	bne	0b
319 
320 	add	x22, x22, #1	// Everything still working
321 	b	.Ltest_loop
322 
323 .Labort:
324 	mov	x0, #0
325 	mov	x1, #SIGABRT
326 	mov	x8, #__NR_kill
327 	svc	#0
328 endfunction
329 
330 function barf
331 // fpsimd.c acitivty log dump hack
332 //	ldr	w0, =0xdeadc0de
333 //	mov	w8, #__NR_exit
334 //	svc	#0
335 // end hack
336 	smstop
337 	mov	x10, x0	// expected data
338 	mov	x11, x1	// actual data
339 	mov	x12, x2	// data size
340 
341 	puts	"Mismatch: PID="
342 	mov	x0, x20
343 	bl	putdec
344 	puts	", iteration="
345 	mov	x0, x22
346 	bl	putdec
347 	puts	", row="
348 	mov	x0, x21
349 	bl	putdecn
350 	puts	"\tExpected ["
351 	mov	x0, x10
352 	mov	x1, x12
353 	bl	dumphex
354 	puts	"]\n\tGot      ["
355 	mov	x0, x11
356 	mov	x1, x12
357 	bl	dumphex
358 	puts	"]\n"
359 
360 	mov	x8, #__NR_getpid
361 	svc	#0
362 // fpsimd.c acitivty log dump hack
363 //	ldr	w0, =0xdeadc0de
364 //	mov	w8, #__NR_exit
365 //	svc	#0
366 // ^ end of hack
367 	mov	x1, #SIGABRT
368 	mov	x8, #__NR_kill
369 	svc	#0
370 //	mov	x8, #__NR_exit
371 //	mov	x1, #1
372 //	svc	#0
373 endfunction
374 
375 function vl_barf
376 	mov	x10, x0
377 
378 	puts	"Bad active VL: "
379 	mov	x0, x10
380 	bl	putdecn
381 
382 	mov	x8, #__NR_exit
383 	mov	x1, #1
384 	svc	#0
385 endfunction
386 
387 function svcr_barf
388 	mov	x10, x0
389 
390 	puts	"Bad SVCR: "
391 	mov	x0, x10
392 	bl	putdecn
393 
394 	mov	x8, #__NR_exit
395 	mov	x1, #1
396 	svc	#0
397 endfunction
398