162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci// Copyright (C) 2021 ARM Limited. 362306a36Sopenharmony_ci// Original author: Mark Brown <broonie@kernel.org> 462306a36Sopenharmony_ci// 562306a36Sopenharmony_ci// Scalable Matrix Extension ZA context switch test 662306a36Sopenharmony_ci// Repeatedly writes unique test patterns into each ZA tile 762306a36Sopenharmony_ci// and reads them back to verify integrity. 862306a36Sopenharmony_ci// 962306a36Sopenharmony_ci// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done 1062306a36Sopenharmony_ci// (leave it running for as long as you want...) 1162306a36Sopenharmony_ci// kill $pids 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include <asm/unistd.h> 1462306a36Sopenharmony_ci#include "assembler.h" 1562306a36Sopenharmony_ci#include "asm-offsets.h" 1662306a36Sopenharmony_ci#include "sme-inst.h" 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci.arch_extension sve 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#define MAXVL 2048 2162306a36Sopenharmony_ci#define MAXVL_B (MAXVL / 8) 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci// Declare some storage space to shadow ZA register contents and a 2462306a36Sopenharmony_ci// scratch buffer for a vector. 2562306a36Sopenharmony_ci.pushsection .text 2662306a36Sopenharmony_ci.data 2762306a36Sopenharmony_ci.align 4 2862306a36Sopenharmony_cizaref: 2962306a36Sopenharmony_ci .space MAXVL_B * MAXVL_B 3062306a36Sopenharmony_ciscratch: 3162306a36Sopenharmony_ci .space MAXVL_B 3262306a36Sopenharmony_ci.popsection 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. 3562306a36Sopenharmony_ci// Clobbers x0-x3 3662306a36Sopenharmony_cifunction memcpy 3762306a36Sopenharmony_ci cmp x2, #0 3862306a36Sopenharmony_ci b.eq 1f 3962306a36Sopenharmony_ci0: ldrb w3, [x1], #1 4062306a36Sopenharmony_ci strb w3, [x0], #1 4162306a36Sopenharmony_ci subs x2, x2, #1 4262306a36Sopenharmony_ci b.ne 0b 4362306a36Sopenharmony_ci1: ret 4462306a36Sopenharmony_ciendfunction 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci// Generate a test pattern for storage in ZA 4762306a36Sopenharmony_ci// x0: pid 4862306a36Sopenharmony_ci// x1: row in ZA 4962306a36Sopenharmony_ci// x2: generation 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci// These values are used to constuct a 32-bit pattern that is repeated in the 5262306a36Sopenharmony_ci// scratch buffer as many times as will fit: 5362306a36Sopenharmony_ci// bits 31:28 generation number (increments once per test_loop) 5462306a36Sopenharmony_ci// bits 27:16 pid 5562306a36Sopenharmony_ci// bits 15: 8 row number 5662306a36Sopenharmony_ci// bits 7: 0 32-bit lane index 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_cifunction pattern 5962306a36Sopenharmony_ci mov w3, wzr 6062306a36Sopenharmony_ci bfi w3, w0, #16, #12 // PID 6162306a36Sopenharmony_ci bfi w3, w1, #8, #8 // Row 6262306a36Sopenharmony_ci bfi w3, w2, #28, #4 // Generation 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci ldr x0, =scratch 6562306a36Sopenharmony_ci mov w1, #MAXVL_B / 4 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci0: str w3, [x0], #4 6862306a36Sopenharmony_ci add w3, w3, #1 // Lane 6962306a36Sopenharmony_ci subs w1, w1, #1 7062306a36Sopenharmony_ci b.ne 0b 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci ret 7362306a36Sopenharmony_ciendfunction 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci// Get the address of shadow data for ZA horizontal vector xn 7662306a36Sopenharmony_ci.macro _adrza xd, xn, nrtmp 7762306a36Sopenharmony_ci ldr \xd, =zaref 7862306a36Sopenharmony_ci rdsvl \nrtmp, 1 7962306a36Sopenharmony_ci madd \xd, x\nrtmp, \xn, \xd 8062306a36Sopenharmony_ci.endm 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci// Set up test pattern in a ZA horizontal vector 8362306a36Sopenharmony_ci// x0: pid 8462306a36Sopenharmony_ci// x1: row number 8562306a36Sopenharmony_ci// x2: generation 8662306a36Sopenharmony_cifunction setup_za 8762306a36Sopenharmony_ci mov x4, x30 8862306a36Sopenharmony_ci mov x12, x1 // Use x12 for vector select 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci bl pattern // Get pattern in scratch buffer 9162306a36Sopenharmony_ci _adrza x0, x12, 2 // Shadow buffer pointer to x0 and x5 9262306a36Sopenharmony_ci mov x5, x0 9362306a36Sopenharmony_ci ldr x1, =scratch 9462306a36Sopenharmony_ci bl memcpy // length set up in x2 by _adrza 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci _ldr_za 12, 5 // load vector w12 from pointer x5 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci ret x4 9962306a36Sopenharmony_ciendfunction 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci// Trivial memory compare: compare x2 bytes starting at address x0 with 10262306a36Sopenharmony_ci// bytes starting at address x1. 10362306a36Sopenharmony_ci// Returns only if all bytes match; otherwise, the program is aborted. 10462306a36Sopenharmony_ci// Clobbers x0-x5. 10562306a36Sopenharmony_cifunction memcmp 10662306a36Sopenharmony_ci cbz x2, 2f 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci stp x0, x1, [sp, #-0x20]! 10962306a36Sopenharmony_ci str x2, [sp, #0x10] 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci mov x5, #0 11262306a36Sopenharmony_ci0: ldrb w3, [x0, x5] 11362306a36Sopenharmony_ci ldrb w4, [x1, x5] 11462306a36Sopenharmony_ci add x5, x5, #1 11562306a36Sopenharmony_ci cmp w3, w4 11662306a36Sopenharmony_ci b.ne 1f 11762306a36Sopenharmony_ci subs x2, x2, #1 11862306a36Sopenharmony_ci b.ne 0b 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci1: ldr x2, [sp, #0x10] 12162306a36Sopenharmony_ci ldp x0, x1, [sp], #0x20 12262306a36Sopenharmony_ci b.ne barf 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci2: ret 12562306a36Sopenharmony_ciendfunction 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci// Verify that a ZA vector matches its shadow in memory, else abort 12862306a36Sopenharmony_ci// x0: row number 12962306a36Sopenharmony_ci// Clobbers x0-x7 and x12. 13062306a36Sopenharmony_cifunction check_za 13162306a36Sopenharmony_ci mov x3, x30 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci mov x12, x0 13462306a36Sopenharmony_ci _adrza x5, x0, 6 // pointer to expected value in x5 13562306a36Sopenharmony_ci mov x4, x0 13662306a36Sopenharmony_ci ldr x7, =scratch // x7 is scratch 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci mov x0, x7 // Poison scratch 13962306a36Sopenharmony_ci mov x1, x6 14062306a36Sopenharmony_ci bl memfill_ae 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci _str_za 12, 7 // save vector w12 to pointer x7 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci mov x0, x5 14562306a36Sopenharmony_ci mov x1, x7 14662306a36Sopenharmony_ci mov x2, x6 14762306a36Sopenharmony_ci mov x30, x3 14862306a36Sopenharmony_ci b memcmp 14962306a36Sopenharmony_ciendfunction 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci// Any SME register modified here can cause corruption in the main 15262306a36Sopenharmony_ci// thread -- but *only* the locations modified here. 15362306a36Sopenharmony_cifunction irritator_handler 15462306a36Sopenharmony_ci // Increment the irritation signal count (x23): 15562306a36Sopenharmony_ci ldr x0, [x2, #ucontext_regs + 8 * 23] 15662306a36Sopenharmony_ci add x0, x0, #1 15762306a36Sopenharmony_ci str x0, [x2, #ucontext_regs + 8 * 23] 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci // Corrupt some random ZA data 16062306a36Sopenharmony_ci#if 0 16162306a36Sopenharmony_ci adr x0, .text + (irritator_handler - .text) / 16 * 16 16262306a36Sopenharmony_ci movi v0.8b, #1 16362306a36Sopenharmony_ci movi v9.16b, #2 16462306a36Sopenharmony_ci movi v31.8b, #3 16562306a36Sopenharmony_ci#endif 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci ret 16862306a36Sopenharmony_ciendfunction 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_cifunction tickle_handler 17162306a36Sopenharmony_ci // Increment the signal count (x23): 17262306a36Sopenharmony_ci ldr x0, [x2, #ucontext_regs + 8 * 23] 17362306a36Sopenharmony_ci add x0, x0, #1 17462306a36Sopenharmony_ci str x0, [x2, #ucontext_regs + 8 * 23] 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci ret 17762306a36Sopenharmony_ciendfunction 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_cifunction terminate_handler 18062306a36Sopenharmony_ci mov w21, w0 18162306a36Sopenharmony_ci mov x20, x2 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci puts "Terminated by signal " 18462306a36Sopenharmony_ci mov w0, w21 18562306a36Sopenharmony_ci bl putdec 18662306a36Sopenharmony_ci puts ", no error, iterations=" 18762306a36Sopenharmony_ci ldr x0, [x20, #ucontext_regs + 8 * 22] 18862306a36Sopenharmony_ci bl putdec 18962306a36Sopenharmony_ci puts ", signals=" 19062306a36Sopenharmony_ci ldr x0, [x20, #ucontext_regs + 8 * 23] 19162306a36Sopenharmony_ci bl putdecn 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci mov x0, #0 19462306a36Sopenharmony_ci mov x8, #__NR_exit 19562306a36Sopenharmony_ci svc #0 19662306a36Sopenharmony_ciendfunction 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci// w0: signal number 19962306a36Sopenharmony_ci// x1: sa_action 20062306a36Sopenharmony_ci// w2: sa_flags 20162306a36Sopenharmony_ci// Clobbers x0-x6,x8 20262306a36Sopenharmony_cifunction setsignal 20362306a36Sopenharmony_ci str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci mov w4, w0 20662306a36Sopenharmony_ci mov x5, x1 20762306a36Sopenharmony_ci mov w6, w2 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci add x0, sp, #16 21062306a36Sopenharmony_ci mov x1, #sa_sz 21162306a36Sopenharmony_ci bl memclr 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci mov w0, w4 21462306a36Sopenharmony_ci add x1, sp, #16 21562306a36Sopenharmony_ci str w6, [x1, #sa_flags] 21662306a36Sopenharmony_ci str x5, [x1, #sa_handler] 21762306a36Sopenharmony_ci mov x2, #0 21862306a36Sopenharmony_ci mov x3, #sa_mask_sz 21962306a36Sopenharmony_ci mov x8, #__NR_rt_sigaction 22062306a36Sopenharmony_ci svc #0 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci cbz w0, 1f 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci puts "sigaction failure\n" 22562306a36Sopenharmony_ci b .Labort 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) 22862306a36Sopenharmony_ci ret 22962306a36Sopenharmony_ciendfunction 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci// Main program entry point 23262306a36Sopenharmony_ci.globl _start 23362306a36Sopenharmony_cifunction _start 23462306a36Sopenharmony_ci mov x23, #0 // signal count 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci mov w0, #SIGINT 23762306a36Sopenharmony_ci adr x1, terminate_handler 23862306a36Sopenharmony_ci mov w2, #SA_SIGINFO 23962306a36Sopenharmony_ci bl setsignal 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci mov w0, #SIGTERM 24262306a36Sopenharmony_ci adr x1, terminate_handler 24362306a36Sopenharmony_ci mov w2, #SA_SIGINFO 24462306a36Sopenharmony_ci bl setsignal 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci mov w0, #SIGUSR1 24762306a36Sopenharmony_ci adr x1, irritator_handler 24862306a36Sopenharmony_ci mov w2, #SA_SIGINFO 24962306a36Sopenharmony_ci orr w2, w2, #SA_NODEFER 25062306a36Sopenharmony_ci bl setsignal 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci mov w0, #SIGUSR2 25362306a36Sopenharmony_ci adr x1, tickle_handler 25462306a36Sopenharmony_ci mov w2, #SA_SIGINFO 25562306a36Sopenharmony_ci orr w2, w2, #SA_NODEFER 25662306a36Sopenharmony_ci bl setsignal 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci puts "Streaming mode " 25962306a36Sopenharmony_ci smstart_za 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci // Sanity-check and report the vector length 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci rdsvl 19, 8 26462306a36Sopenharmony_ci cmp x19, #128 26562306a36Sopenharmony_ci b.lo 1f 26662306a36Sopenharmony_ci cmp x19, #2048 26762306a36Sopenharmony_ci b.hi 1f 26862306a36Sopenharmony_ci tst x19, #(8 - 1) 26962306a36Sopenharmony_ci b.eq 2f 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci1: puts "bad vector length: " 27262306a36Sopenharmony_ci mov x0, x19 27362306a36Sopenharmony_ci bl putdecn 27462306a36Sopenharmony_ci b .Labort 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci2: puts "vector length:\t" 27762306a36Sopenharmony_ci mov x0, x19 27862306a36Sopenharmony_ci bl putdec 27962306a36Sopenharmony_ci puts " bits\n" 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci // Obtain our PID, to ensure test pattern uniqueness between processes 28262306a36Sopenharmony_ci mov x8, #__NR_getpid 28362306a36Sopenharmony_ci svc #0 28462306a36Sopenharmony_ci mov x20, x0 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci puts "PID:\t" 28762306a36Sopenharmony_ci mov x0, x20 28862306a36Sopenharmony_ci bl putdecn 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci mov x22, #0 // generation number, increments per iteration 29162306a36Sopenharmony_ci.Ltest_loop: 29262306a36Sopenharmony_ci rdsvl 0, 8 29362306a36Sopenharmony_ci cmp x0, x19 29462306a36Sopenharmony_ci b.ne vl_barf 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci rdsvl 21, 1 // Set up ZA & shadow with test pattern 29762306a36Sopenharmony_ci0: mov x0, x20 29862306a36Sopenharmony_ci sub x1, x21, #1 29962306a36Sopenharmony_ci mov x2, x22 30062306a36Sopenharmony_ci bl setup_za 30162306a36Sopenharmony_ci subs x21, x21, #1 30262306a36Sopenharmony_ci b.ne 0b 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci mov x8, #__NR_sched_yield // encourage preemption 30562306a36Sopenharmony_ci1: 30662306a36Sopenharmony_ci svc #0 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0 30962306a36Sopenharmony_ci and x1, x0, #3 31062306a36Sopenharmony_ci cmp x1, #2 31162306a36Sopenharmony_ci b.ne svcr_barf 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci rdsvl 21, 1 // Verify that the data made it through 31462306a36Sopenharmony_ci rdsvl 24, 1 // Verify that the data made it through 31562306a36Sopenharmony_ci0: sub x0, x24, x21 31662306a36Sopenharmony_ci bl check_za 31762306a36Sopenharmony_ci subs x21, x21, #1 31862306a36Sopenharmony_ci bne 0b 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ci add x22, x22, #1 // Everything still working 32162306a36Sopenharmony_ci b .Ltest_loop 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci.Labort: 32462306a36Sopenharmony_ci mov x0, #0 32562306a36Sopenharmony_ci mov x1, #SIGABRT 32662306a36Sopenharmony_ci mov x8, #__NR_kill 32762306a36Sopenharmony_ci svc #0 32862306a36Sopenharmony_ciendfunction 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_cifunction barf 33162306a36Sopenharmony_ci// fpsimd.c acitivty log dump hack 33262306a36Sopenharmony_ci// ldr w0, =0xdeadc0de 33362306a36Sopenharmony_ci// mov w8, #__NR_exit 33462306a36Sopenharmony_ci// svc #0 33562306a36Sopenharmony_ci// end hack 33662306a36Sopenharmony_ci smstop 33762306a36Sopenharmony_ci mov x10, x0 // expected data 33862306a36Sopenharmony_ci mov x11, x1 // actual data 33962306a36Sopenharmony_ci mov x12, x2 // data size 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci puts "Mismatch: PID=" 34262306a36Sopenharmony_ci mov x0, x20 34362306a36Sopenharmony_ci bl putdec 34462306a36Sopenharmony_ci puts ", iteration=" 34562306a36Sopenharmony_ci mov x0, x22 34662306a36Sopenharmony_ci bl putdec 34762306a36Sopenharmony_ci puts ", row=" 34862306a36Sopenharmony_ci mov x0, x21 34962306a36Sopenharmony_ci bl putdecn 35062306a36Sopenharmony_ci puts "\tExpected [" 35162306a36Sopenharmony_ci mov x0, x10 35262306a36Sopenharmony_ci mov x1, x12 35362306a36Sopenharmony_ci bl dumphex 35462306a36Sopenharmony_ci puts "]\n\tGot [" 35562306a36Sopenharmony_ci mov x0, x11 35662306a36Sopenharmony_ci mov x1, x12 35762306a36Sopenharmony_ci bl dumphex 35862306a36Sopenharmony_ci puts "]\n" 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci mov x8, #__NR_getpid 36162306a36Sopenharmony_ci svc #0 36262306a36Sopenharmony_ci// fpsimd.c acitivty log dump hack 36362306a36Sopenharmony_ci// ldr w0, =0xdeadc0de 36462306a36Sopenharmony_ci// mov w8, #__NR_exit 36562306a36Sopenharmony_ci// svc #0 36662306a36Sopenharmony_ci// ^ end of hack 36762306a36Sopenharmony_ci mov x1, #SIGABRT 36862306a36Sopenharmony_ci mov x8, #__NR_kill 36962306a36Sopenharmony_ci svc #0 37062306a36Sopenharmony_ci// mov x8, #__NR_exit 37162306a36Sopenharmony_ci// mov x1, #1 37262306a36Sopenharmony_ci// svc #0 37362306a36Sopenharmony_ciendfunction 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_cifunction vl_barf 37662306a36Sopenharmony_ci mov x10, x0 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci puts "Bad active VL: " 37962306a36Sopenharmony_ci mov x0, x10 38062306a36Sopenharmony_ci bl putdecn 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci mov x8, #__NR_exit 38362306a36Sopenharmony_ci mov x1, #1 38462306a36Sopenharmony_ci svc #0 38562306a36Sopenharmony_ciendfunction 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_cifunction svcr_barf 38862306a36Sopenharmony_ci mov x10, x0 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci puts "Bad SVCR: " 39162306a36Sopenharmony_ci mov x0, x10 39262306a36Sopenharmony_ci bl putdecn 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_ci mov x8, #__NR_exit 39562306a36Sopenharmony_ci mov x1, #1 39662306a36Sopenharmony_ci svc #0 39762306a36Sopenharmony_ciendfunction 398