1// Copyright 2019, VIXL authors 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are met: 6// 7// * Redistributions of source code must retain the above copyright notice, 8// this list of conditions and the following disclaimer. 9// * Redistributions in binary form must reproduce the above copyright notice, 10// this list of conditions and the following disclaimer in the documentation 11// and/or other materials provided with the distribution. 12// * Neither the name of ARM Limited nor the names of its contributors may be 13// used to endorse or promote products derived from this software without 14// specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 28#include <cstdio> 29#include <cstring> 30#include <string> 31 32#include "test-runner.h" 33 34#include "aarch64/disasm-aarch64.h" 35#include "aarch64/macro-assembler-aarch64.h" 36#include "aarch64/test-utils-aarch64.h" 37#include "test-disasm-aarch64.h" 38#include "test-utils-aarch64.h" 39 40namespace vixl { 41namespace aarch64 { 42 43TEST(sve) { 44 SETUP(); 45 46 // TODO: Replace these tests when the disassembler is more capable. 47 COMPARE(asrr(z0.VnB(), p7.Merging(), z0.VnB(), z1.VnB()), 48 "asrr z0.b, p7/m, z0.b, z1.b"); 49 COMPARE(fcmeq(p6.VnD(), p7.Zeroing(), z0.VnD(), z1.VnD()), 50 "fcmeq p6.d, p7/z, z0.d, z1.d"); 51 COMPARE(mla(z0.VnB(), p7.Merging(), z0.VnB(), z1.VnB()), 52 "mla z0.b, p7/m, z0.b, z1.b"); 53 COMPARE(mla(z1.VnS(), p7.Merging(), z1.VnS(), z0.VnS()), 54 "mla z1.s, p7/m, z1.s, z0.s"); 55 COMPARE(asr(z1.VnB(), z0.VnB(), 8), "asr z1.b, z0.b, #8"); 56 COMPARE(lsl(z0.VnH(), z1.VnH(), 15), "lsl z0.h, z1.h, #15"); 57 COMPARE(lsr(z1.VnS(), z0.VnS(), 32), "lsr z1.s, z0.s, #32"); 58 COMPARE(and_(p6.VnB(), p7.Zeroing(), p6.VnB(), p7.VnB()), 59 "and p6.b, p7/z, p6.b, p7.b"); 60 COMPARE(rev(p7.VnB(), p6.VnB()), "rev p7.b, p6.b"); 61 COMPARE(splice(z0.VnH(), p7, z0.VnH(), z1.VnH()), 62 "splice z0.h, p7, z0.h, z1.h"); 63 COMPARE(fnmad(z0.VnD(), p6.Merging(), z1.VnD(), z0.VnD()), 64 "fnmad z0.d, p6/m, z1.d, z0.d"); 65 66 CLEANUP(); 67} 68 69TEST(sve_unallocated_fp_byte_type) { 70 // Ensure disassembly of FP instructions does not report byte-sized lanes. 71 72 SETUP(); 73 74 COMPARE_PREFIX(dci(0x650003ca), "unallocated"); 75 COMPARE_PREFIX(dci(0x6500230b), "unallocated"); 76 COMPARE_PREFIX(dci(0x6500424c), "unallocated"); 77 COMPARE_PREFIX(dci(0x6500618d), "unallocated"); 78 COMPARE_PREFIX(dci(0x6500a00f), "unallocated"); 79 COMPARE_PREFIX(dci(0x6500de91), "unallocated"); 80 COMPARE_PREFIX(dci(0x6500fdd2), "unallocated"); 81 COMPARE_PREFIX(dci(0x65011d13), "unallocated"); 82 COMPARE_PREFIX(dci(0x65015b95), "unallocated"); 83 COMPARE_PREFIX(dci(0x65017ad6), "unallocated"); 84 COMPARE_PREFIX(dci(0x65019a17), "unallocated"); 85 COMPARE_PREFIX(dci(0x6501b958), "unallocated"); 86 COMPARE_PREFIX(dci(0x6502941f), "unallocated"); 87 COMPARE_PREFIX(dci(0x6502b360), "unallocated"); 88 COMPARE_PREFIX(dci(0x6502d2a1), "unallocated"); 89 COMPARE_PREFIX(dci(0x65038e27), "unallocated"); 90 COMPARE_PREFIX(dci(0x6503ad68), "unallocated"); 91 COMPARE_PREFIX(dci(0x65042a6c), "unallocated"); 92 COMPARE_PREFIX(dci(0x6504882f), "unallocated"); 93 COMPARE_PREFIX(dci(0x6504a770), "unallocated"); 94 COMPARE_PREFIX(dci(0x65052474), "unallocated"); 95 COMPARE_PREFIX(dci(0x65058237), "unallocated"); 96 COMPARE_PREFIX(dci(0x65063dbd), "unallocated"); 97 COMPARE_PREFIX(dci(0x65069b80), "unallocated"); 98 COMPARE_PREFIX(dci(0x6506bac1), "unallocated"); 99 COMPARE_PREFIX(dci(0x65071884), "unallocated"); 100 COMPARE_PREFIX(dci(0x650737c5), "unallocated"); 101 COMPARE_PREFIX(dci(0x65079588), "unallocated"); 102 COMPARE_PREFIX(dci(0x6507b4c9), "unallocated"); 103 COMPARE_PREFIX(dci(0x65088f90), "unallocated"); 104 COMPARE_PREFIX(dci(0x65090c94), "unallocated"); 105 COMPARE_PREFIX(dci(0x65098998), "unallocated"); 106 COMPARE_PREFIX(dci(0x650a83a0), "unallocated"); 107 COMPARE_PREFIX(dci(0x650c96f1), "unallocated"); 108 COMPARE_PREFIX(dci(0x650d90f9), "unallocated"); 109 COMPARE_PREFIX(dci(0x65113a97), "unallocated"); 110 COMPARE_PREFIX(dci(0x65183010), "unallocated"); 111 COMPARE_PREFIX(dci(0x65200050), "unallocated"); 112 COMPARE_PREFIX(dci(0x65203ed2), "unallocated"); 113 COMPARE_PREFIX(dci(0x65205e13), "unallocated"); 114 COMPARE_PREFIX(dci(0x65207d54), "unallocated"); 115 COMPARE_PREFIX(dci(0x65209c95), "unallocated"); 116 COMPARE_PREFIX(dci(0x6520bbd6), "unallocated"); 117 COMPARE_PREFIX(dci(0x6520db17), "unallocated"); 118 COMPARE_PREFIX(dci(0x6520fa58), "unallocated"); 119 COMPARE_PREFIX(dci(0x650f31e1), "unallocated"); 120 COMPARE_PREFIX(dci(0x650e30f7), "unallocated"); 121 COMPARE_PREFIX(dci(0x6511376e), "unallocated"); 122 123 CLEANUP(); 124} 125 126TEST(sve_address_generation) { 127 SETUP(); 128 129 COMPARE(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW)), 130 "adr z19.d, [z22.d, z11.d, sxtw]"); 131 COMPARE(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 1)), 132 "adr z19.d, [z22.d, z11.d, sxtw #1]"); 133 COMPARE(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 2)), 134 "adr z19.d, [z22.d, z11.d, sxtw #2]"); 135 COMPARE(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 3)), 136 "adr z19.d, [z22.d, z11.d, sxtw #3]"); 137 COMPARE(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW)), 138 "adr z30.d, [z14.d, z16.d, uxtw]"); 139 COMPARE(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 1)), 140 "adr z30.d, [z14.d, z16.d, uxtw #1]"); 141 COMPARE(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 2)), 142 "adr z30.d, [z14.d, z16.d, uxtw #2]"); 143 COMPARE(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 3)), 144 "adr z30.d, [z14.d, z16.d, uxtw #3]"); 145 COMPARE(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS())), 146 "adr z8.s, [z16.s, z16.s]"); 147 COMPARE(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 1)), 148 "adr z8.s, [z16.s, z16.s, lsl #1]"); 149 COMPARE(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 2)), 150 "adr z8.s, [z16.s, z16.s, lsl #2]"); 151 COMPARE(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 3)), 152 "adr z8.s, [z16.s, z16.s, lsl #3]"); 153 COMPARE(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD())), 154 "adr z9.d, [z1.d, z16.d]"); 155 COMPARE(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 1)), 156 "adr z9.d, [z1.d, z16.d, lsl #1]"); 157 COMPARE(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 2)), 158 "adr z9.d, [z1.d, z16.d, lsl #2]"); 159 COMPARE(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 3)), 160 "adr z9.d, [z1.d, z16.d, lsl #3]"); 161 162 163 CLEANUP(); 164} 165 166TEST(sve_calculate_sve_address) { 167#pragma GCC diagnostic push 168#pragma GCC diagnostic ignored "-Wshadow" 169 170 // Shadow the `MacroAssembler` type so that the test macros work without 171 // modification. 172 typedef CalculateSVEAddressMacroAssembler MacroAssembler; 173 174 SETUP(); 175 176 // It is important that we cover every branch in this test because most other 177 // tests tend not to check every code path. 178 179 // IsEquivalentToScalar() 180 COMPARE_MACRO(CalculateSVEAddress(x0, SVEMemOperand(x1)), "mov x0, x1"); 181 COMPARE_MACRO(CalculateSVEAddress(x4, SVEMemOperand(x2, 0)), "mov x4, x2"); 182 COMPARE_MACRO(CalculateSVEAddress(x4, SVEMemOperand(x2, xzr, LSL, 2)), 183 "mov x4, x2"); 184 185 // IsScalarPlusImmediate() 186 // Simple immediates just pass through to 'Add'. 187 COMPARE_MACRO(CalculateSVEAddress(x10, SVEMemOperand(x0, 42)), 188 "add x10, x0, #0x2a (42)"); 189 COMPARE_MACRO(CalculateSVEAddress(x10, SVEMemOperand(sp, 42)), 190 "add x10, sp, #0x2a (42)"); 191 // SVE_MUL_VL variants use `Addpl`, which has its own tests, but 192 // `CalculateSVEAddress` needs to check and handle the access size. 193 COMPARE_MACRO(CalculateSVEAddress(x10, SVEMemOperand(x0, 3, SVE_MUL_VL), 0), 194 "addpl x10, x0, #24"); 195 COMPARE_MACRO(CalculateSVEAddress(x10, SVEMemOperand(x0, 3, SVE_MUL_VL), 1), 196 "addpl x10, x0, #12"); 197 COMPARE_MACRO(CalculateSVEAddress(x10, SVEMemOperand(x0, 3, SVE_MUL_VL), 2), 198 "addpl x10, x0, #6"); 199 COMPARE_MACRO(CalculateSVEAddress(x10, SVEMemOperand(x0, 3, SVE_MUL_VL), 3), 200 "addpl x10, x0, #3"); 201 202 // IsScalarPlusScalar() 203 // All forms pass through to `Add`, but SVE_LSL must be handled correctly. 204 COMPARE_MACRO(CalculateSVEAddress(x22, SVEMemOperand(x2, x3)), 205 "add x22, x2, x3"); 206 COMPARE_MACRO(CalculateSVEAddress(x22, SVEMemOperand(sp, x3)), 207 "add x22, sp, x3"); 208 COMPARE_MACRO(CalculateSVEAddress(x22, SVEMemOperand(x2, x3, LSL, 2)), 209 "add x22, x2, x3, lsl #2"); 210 COMPARE_MACRO(CalculateSVEAddress(x22, SVEMemOperand(sp, x3, LSL, 2)), 211 "add x22, sp, x3, lsl #2"); 212 213 CLEANUP(); 214 215#pragma GCC diagnostic pop 216} 217 218TEST(sve_bitwise_imm) { 219 SETUP(); 220 221 // The assembler will necessarily encode an immediate in the simplest bitset. 222 COMPARE(and_(z2.VnD(), z2.VnD(), 0x0000ffff0000ffff), 223 "and z2.s, z2.s, #0xffff"); 224 COMPARE(dupm(z15.VnS(), 0x7f007f00), "dupm z15.h, #0x7f00"); 225 COMPARE(eor(z26.VnH(), z26.VnH(), 0x7ff8), "eor z26.h, z26.h, #0x7ff8"); 226 COMPARE(orr(z13.VnB(), z13.VnB(), 0x78), "orr z13.b, z13.b, #0x78"); 227 228 // Logical aliases. 229 COMPARE(bic(z21.VnD(), z21.VnD(), 0xffff00000000ffff), 230 "and z21.d, z21.d, #0xffffffff0000"); 231 COMPARE(eon(z31.VnS(), z31.VnS(), 0x1ffe), "eor z31.s, z31.s, #0xffffe001"); 232 COMPARE(orn(z11.VnH(), z11.VnH(), 0x2), "orr z11.h, z11.h, #0xfffd"); 233 234 // Mov alias for dupm. 235 COMPARE(mov(z0.VnH(), 0xf00f), "mov z0.h, #0xf00f"); 236 COMPARE_MACRO(Mov(z11.VnS(), 0xe0000003), "mov z11.s, #0xe0000003"); 237 COMPARE_MACRO(Mov(z22.VnD(), 0x8000), "dupm z22.d, #0x8000"); 238 239 // Test dupm versus mov disassembly. 240 COMPARE(dupm(z0.VnH(), 0xfe), "dupm z0.h, #0xfe"); 241 COMPARE(dupm(z0.VnH(), 0xff), "dupm z0.h, #0xff"); 242 COMPARE(dupm(z0.VnH(), 0x1fe), "mov z0.h, #0x1fe"); 243 COMPARE(dupm(z0.VnH(), 0xfe00), "dupm z0.h, #0xfe00"); 244 COMPARE(dupm(z0.VnH(), 0xfe01), "mov z0.h, #0xfe01"); 245 COMPARE(dupm(z0.VnS(), 0xfe00), "dupm z0.s, #0xfe00"); 246 COMPARE(dupm(z0.VnS(), 0xfe000001), "mov z0.s, #0xfe000001"); 247 COMPARE(dupm(z0.VnS(), 0xffffff00), "dupm z0.s, #0xffffff00"); 248 COMPARE(dupm(z0.VnS(), 0xffffff01), "dupm z0.s, #0xffffff01"); 249 COMPARE(dupm(z0.VnS(), 0xfffffe01), "mov z0.s, #0xfffffe01"); 250 COMPARE(dupm(z0.VnS(), 0xfff), "mov z0.s, #0xfff"); 251 COMPARE(dupm(z0.VnD(), 0xffffffffffffff00), "dupm z0.d, #0xffffffffffffff00"); 252 COMPARE(dupm(z0.VnD(), 0x7fffffffffffff80), "mov z0.d, #0x7fffffffffffff80"); 253 COMPARE(dupm(z0.VnD(), 0x8000), "dupm z0.d, #0x8000"); 254 COMPARE(dupm(z0.VnD(), 0x10000), "mov z0.d, #0x10000"); 255 256 CLEANUP(); 257} 258 259TEST(sve_bitwise_logical_unpredicated) { 260 SETUP(); 261 262 COMPARE(and_(z12.VnD(), z5.VnD(), z29.VnD()), "and z12.d, z5.d, z29.d"); 263 COMPARE(bic(z11.VnD(), z15.VnD(), z9.VnD()), "bic z11.d, z15.d, z9.d"); 264 COMPARE(eor(z9.VnD(), z31.VnD(), z29.VnD()), "eor z9.d, z31.d, z29.d"); 265 COMPARE(orr(z17.VnD(), z8.VnD(), z19.VnD()), "orr z17.d, z8.d, z19.d"); 266 267 // Check mov aliases. 268 COMPARE(orr(z17.VnD(), z8.VnD(), z8.VnD()), "mov z17.d, z8.d"); 269 COMPARE(mov(z18, z9), "mov z18.d, z9.d"); 270 COMPARE_MACRO(Mov(z19, z10), "mov z19.d, z10.d"); 271 272 CLEANUP(); 273} 274 275TEST(sve_bitwise_shift_predicated) { 276 SETUP(); 277 278 COMPARE(asrr(z20.VnB(), p3.Merging(), z20.VnB(), z11.VnB()), 279 "asrr z20.b, p3/m, z20.b, z11.b"); 280 COMPARE(asrr(z20.VnH(), p3.Merging(), z20.VnH(), z11.VnH()), 281 "asrr z20.h, p3/m, z20.h, z11.h"); 282 COMPARE(asrr(z20.VnS(), p3.Merging(), z20.VnS(), z11.VnS()), 283 "asrr z20.s, p3/m, z20.s, z11.s"); 284 COMPARE(asrr(z20.VnD(), p3.Merging(), z20.VnD(), z11.VnD()), 285 "asrr z20.d, p3/m, z20.d, z11.d"); 286 COMPARE(asr(z26.VnB(), p2.Merging(), z26.VnB(), z17.VnB()), 287 "asr z26.b, p2/m, z26.b, z17.b"); 288 COMPARE(asr(z26.VnH(), p2.Merging(), z26.VnH(), z17.VnH()), 289 "asr z26.h, p2/m, z26.h, z17.h"); 290 COMPARE(asr(z26.VnS(), p2.Merging(), z26.VnS(), z17.VnS()), 291 "asr z26.s, p2/m, z26.s, z17.s"); 292 COMPARE(asr(z26.VnD(), p2.Merging(), z26.VnD(), z17.VnD()), 293 "asr z26.d, p2/m, z26.d, z17.d"); 294 COMPARE(lslr(z30.VnB(), p1.Merging(), z30.VnB(), z26.VnB()), 295 "lslr z30.b, p1/m, z30.b, z26.b"); 296 COMPARE(lslr(z30.VnH(), p1.Merging(), z30.VnH(), z26.VnH()), 297 "lslr z30.h, p1/m, z30.h, z26.h"); 298 COMPARE(lslr(z30.VnS(), p1.Merging(), z30.VnS(), z26.VnS()), 299 "lslr z30.s, p1/m, z30.s, z26.s"); 300 COMPARE(lslr(z30.VnD(), p1.Merging(), z30.VnD(), z26.VnD()), 301 "lslr z30.d, p1/m, z30.d, z26.d"); 302 COMPARE(lsl(z14.VnB(), p6.Merging(), z14.VnB(), z25.VnB()), 303 "lsl z14.b, p6/m, z14.b, z25.b"); 304 COMPARE(lsl(z14.VnH(), p6.Merging(), z14.VnH(), z25.VnH()), 305 "lsl z14.h, p6/m, z14.h, z25.h"); 306 COMPARE(lsl(z14.VnS(), p6.Merging(), z14.VnS(), z25.VnS()), 307 "lsl z14.s, p6/m, z14.s, z25.s"); 308 COMPARE(lsl(z14.VnD(), p6.Merging(), z14.VnD(), z25.VnD()), 309 "lsl z14.d, p6/m, z14.d, z25.d"); 310 COMPARE(lsrr(z3.VnB(), p1.Merging(), z3.VnB(), z16.VnB()), 311 "lsrr z3.b, p1/m, z3.b, z16.b"); 312 COMPARE(lsrr(z3.VnH(), p1.Merging(), z3.VnH(), z16.VnH()), 313 "lsrr z3.h, p1/m, z3.h, z16.h"); 314 COMPARE(lsrr(z3.VnS(), p1.Merging(), z3.VnS(), z16.VnS()), 315 "lsrr z3.s, p1/m, z3.s, z16.s"); 316 COMPARE(lsrr(z3.VnD(), p1.Merging(), z3.VnD(), z16.VnD()), 317 "lsrr z3.d, p1/m, z3.d, z16.d"); 318 COMPARE(lsr(z29.VnB(), p7.Merging(), z29.VnB(), z13.VnB()), 319 "lsr z29.b, p7/m, z29.b, z13.b"); 320 COMPARE(lsr(z29.VnH(), p7.Merging(), z29.VnH(), z13.VnH()), 321 "lsr z29.h, p7/m, z29.h, z13.h"); 322 COMPARE(lsr(z29.VnS(), p7.Merging(), z29.VnS(), z13.VnS()), 323 "lsr z29.s, p7/m, z29.s, z13.s"); 324 COMPARE(lsr(z29.VnD(), p7.Merging(), z29.VnD(), z13.VnD()), 325 "lsr z29.d, p7/m, z29.d, z13.d"); 326 327 COMPARE(asr(z4.VnB(), p0.Merging(), z4.VnB(), z30.VnD()), 328 "asr z4.b, p0/m, z4.b, z30.d"); 329 COMPARE(asr(z4.VnH(), p0.Merging(), z4.VnH(), z30.VnD()), 330 "asr z4.h, p0/m, z4.h, z30.d"); 331 COMPARE(asr(z4.VnS(), p0.Merging(), z4.VnS(), z30.VnD()), 332 "asr z4.s, p0/m, z4.s, z30.d"); 333 COMPARE(lsl(z13.VnB(), p7.Merging(), z13.VnB(), z18.VnD()), 334 "lsl z13.b, p7/m, z13.b, z18.d"); 335 COMPARE(lsl(z13.VnH(), p7.Merging(), z13.VnH(), z18.VnD()), 336 "lsl z13.h, p7/m, z13.h, z18.d"); 337 COMPARE(lsl(z13.VnS(), p7.Merging(), z13.VnS(), z18.VnD()), 338 "lsl z13.s, p7/m, z13.s, z18.d"); 339 COMPARE(lsr(z1.VnB(), p4.Merging(), z1.VnB(), z14.VnD()), 340 "lsr z1.b, p4/m, z1.b, z14.d"); 341 COMPARE(lsr(z1.VnH(), p4.Merging(), z1.VnH(), z14.VnD()), 342 "lsr z1.h, p4/m, z1.h, z14.d"); 343 COMPARE(lsr(z1.VnS(), p4.Merging(), z1.VnS(), z14.VnD()), 344 "lsr z1.s, p4/m, z1.s, z14.d"); 345 346 COMPARE_MACRO(Asr(z4.VnB(), p0.Merging(), z4.VnB(), z30.VnB()), 347 "asr z4.b, p0/m, z4.b, z30.b"); 348 COMPARE_MACRO(Asr(z4.VnB(), p0.Merging(), z30.VnB(), z4.VnB()), 349 "asrr z4.b, p0/m, z4.b, z30.b"); 350 COMPARE_MACRO(Asr(z4.VnB(), p0.Merging(), z10.VnB(), z14.VnB()), 351 "movprfx z4.b, p0/m, z10.b\n" 352 "asr z4.b, p0/m, z4.b, z14.b"); 353 COMPARE_MACRO(Lsl(z4.VnB(), p0.Merging(), z4.VnB(), z30.VnB()), 354 "lsl z4.b, p0/m, z4.b, z30.b"); 355 COMPARE_MACRO(Lsl(z4.VnB(), p0.Merging(), z30.VnB(), z4.VnB()), 356 "lslr z4.b, p0/m, z4.b, z30.b"); 357 COMPARE_MACRO(Lsl(z4.VnB(), p0.Merging(), z10.VnB(), z14.VnB()), 358 "movprfx z4.b, p0/m, z10.b\n" 359 "lsl z4.b, p0/m, z4.b, z14.b"); 360 COMPARE_MACRO(Lsr(z4.VnB(), p0.Merging(), z4.VnB(), z30.VnB()), 361 "lsr z4.b, p0/m, z4.b, z30.b"); 362 COMPARE_MACRO(Lsr(z4.VnB(), p0.Merging(), z30.VnB(), z4.VnB()), 363 "lsrr z4.b, p0/m, z4.b, z30.b"); 364 COMPARE_MACRO(Lsr(z4.VnB(), p0.Merging(), z10.VnB(), z14.VnB()), 365 "movprfx z4.b, p0/m, z10.b\n" 366 "lsr z4.b, p0/m, z4.b, z14.b"); 367 368 COMPARE(asrd(z0.VnB(), p4.Merging(), z0.VnB(), 1), 369 "asrd z0.b, p4/m, z0.b, #1"); 370 COMPARE(asrd(z0.VnH(), p4.Merging(), z0.VnH(), 1), 371 "asrd z0.h, p4/m, z0.h, #1"); 372 COMPARE(asrd(z0.VnS(), p4.Merging(), z0.VnS(), 1), 373 "asrd z0.s, p4/m, z0.s, #1"); 374 COMPARE(asrd(z0.VnD(), p4.Merging(), z0.VnD(), 1), 375 "asrd z0.d, p4/m, z0.d, #1"); 376 COMPARE(asr(z8.VnB(), p7.Merging(), z8.VnB(), 3), "asr z8.b, p7/m, z8.b, #3"); 377 COMPARE(asr(z8.VnH(), p7.Merging(), z8.VnH(), 3), "asr z8.h, p7/m, z8.h, #3"); 378 COMPARE(asr(z8.VnS(), p7.Merging(), z8.VnS(), 3), "asr z8.s, p7/m, z8.s, #3"); 379 COMPARE(asr(z8.VnD(), p7.Merging(), z8.VnD(), 3), "asr z8.d, p7/m, z8.d, #3"); 380 COMPARE(lsl(z29.VnB(), p6.Merging(), z29.VnB(), 0), 381 "lsl z29.b, p6/m, z29.b, #0"); 382 COMPARE(lsl(z29.VnH(), p6.Merging(), z29.VnH(), 5), 383 "lsl z29.h, p6/m, z29.h, #5"); 384 COMPARE(lsl(z29.VnS(), p6.Merging(), z29.VnS(), 0), 385 "lsl z29.s, p6/m, z29.s, #0"); 386 COMPARE(lsl(z29.VnD(), p6.Merging(), z29.VnD(), 63), 387 "lsl z29.d, p6/m, z29.d, #63"); 388 COMPARE(lsr(z24.VnB(), p2.Merging(), z24.VnB(), 8), 389 "lsr z24.b, p2/m, z24.b, #8"); 390 COMPARE(lsr(z24.VnH(), p2.Merging(), z24.VnH(), 16), 391 "lsr z24.h, p2/m, z24.h, #16"); 392 COMPARE(lsr(z24.VnS(), p2.Merging(), z24.VnS(), 32), 393 "lsr z24.s, p2/m, z24.s, #32"); 394 COMPARE(lsr(z24.VnD(), p2.Merging(), z24.VnD(), 64), 395 "lsr z24.d, p2/m, z24.d, #64"); 396 397 COMPARE_MACRO(Asrd(z0.VnB(), p4.Merging(), z8.VnB(), 1), 398 "movprfx z0.b, p4/m, z8.b\n" 399 "asrd z0.b, p4/m, z0.b, #1"); 400 COMPARE_MACRO(Asr(z8.VnH(), p7.Merging(), z29.VnH(), 3), 401 "movprfx z8.h, p7/m, z29.h\n" 402 "asr z8.h, p7/m, z8.h, #3"); 403 COMPARE_MACRO(Lsl(z29.VnS(), p6.Merging(), z24.VnS(), 0), 404 "movprfx z29.s, p6/m, z24.s\n" 405 "lsl z29.s, p6/m, z29.s, #0"); 406 COMPARE_MACRO(Lsr(z24.VnD(), p2.Merging(), z0.VnD(), 64), 407 "movprfx z24.d, p2/m, z0.d\n" 408 "lsr z24.d, p2/m, z24.d, #64"); 409 410 CLEANUP(); 411} 412 413TEST(sve_bitwise_shift_unpredicated) { 414 SETUP(); 415 416 // Test lsl with reserved D-sized lane field. 417 COMPARE_PREFIX(dci(0x04ef8e15), "unimplemented"); 418 // Test asr with reserved tsz field. 419 COMPARE_PREFIX(dci(0x04209345), "unimplemented"); 420 421 COMPARE(asr(z4.VnB(), z27.VnB(), 1), "asr z4.b, z27.b, #1"); 422 COMPARE(asr(z5.VnB(), z26.VnB(), 8), "asr z5.b, z26.b, #8"); 423 COMPARE(asr(z6.VnH(), z25.VnH(), 1), "asr z6.h, z25.h, #1"); 424 COMPARE(asr(z7.VnH(), z24.VnH(), 16), "asr z7.h, z24.h, #16"); 425 COMPARE(asr(z8.VnS(), z23.VnS(), 1), "asr z8.s, z23.s, #1"); 426 COMPARE(asr(z9.VnS(), z22.VnS(), 32), "asr z9.s, z22.s, #32"); 427 COMPARE(asr(z10.VnD(), z21.VnD(), 1), "asr z10.d, z21.d, #1"); 428 COMPARE(asr(z11.VnD(), z20.VnD(), 64), "asr z11.d, z20.d, #64"); 429 COMPARE(lsr(z4.VnB(), z27.VnB(), 3), "lsr z4.b, z27.b, #3"); 430 COMPARE(lsr(z5.VnB(), z26.VnB(), 7), "lsr z5.b, z26.b, #7"); 431 COMPARE(lsr(z6.VnH(), z25.VnH(), 8), "lsr z6.h, z25.h, #8"); 432 COMPARE(lsr(z7.VnH(), z24.VnH(), 15), "lsr z7.h, z24.h, #15"); 433 COMPARE(lsr(z8.VnS(), z23.VnS(), 14), "lsr z8.s, z23.s, #14"); 434 COMPARE(lsr(z9.VnS(), z22.VnS(), 31), "lsr z9.s, z22.s, #31"); 435 COMPARE(lsr(z10.VnD(), z21.VnD(), 30), "lsr z10.d, z21.d, #30"); 436 COMPARE(lsr(z11.VnD(), z20.VnD(), 63), "lsr z11.d, z20.d, #63"); 437 COMPARE(lsl(z4.VnB(), z27.VnB(), 4), "lsl z4.b, z27.b, #4"); 438 COMPARE(lsl(z5.VnB(), z26.VnB(), 6), "lsl z5.b, z26.b, #6"); 439 COMPARE(lsl(z6.VnH(), z25.VnH(), 10), "lsl z6.h, z25.h, #10"); 440 COMPARE(lsl(z7.VnH(), z24.VnH(), 14), "lsl z7.h, z24.h, #14"); 441 COMPARE(lsl(z8.VnS(), z23.VnS(), 21), "lsl z8.s, z23.s, #21"); 442 COMPARE(lsl(z9.VnS(), z22.VnS(), 30), "lsl z9.s, z22.s, #30"); 443 COMPARE(lsl(z10.VnD(), z21.VnD(), 44), "lsl z10.d, z21.d, #44"); 444 COMPARE(lsl(z11.VnD(), z20.VnD(), 62), "lsl z11.d, z20.d, #62"); 445 COMPARE(asr(z12.VnB(), z8.VnB(), z14.VnD()), "asr z12.b, z8.b, z14.d"); 446 COMPARE(asr(z14.VnH(), z8.VnH(), z12.VnD()), "asr z14.h, z8.h, z12.d"); 447 COMPARE(asr(z16.VnS(), z8.VnS(), z10.VnD()), "asr z16.s, z8.s, z10.d"); 448 COMPARE(lsl(z21.VnB(), z16.VnB(), z15.VnD()), "lsl z21.b, z16.b, z15.d"); 449 COMPARE(lsl(z23.VnH(), z16.VnH(), z13.VnD()), "lsl z23.h, z16.h, z13.d"); 450 COMPARE(lsl(z25.VnS(), z16.VnS(), z11.VnD()), "lsl z25.s, z16.s, z11.d"); 451 COMPARE(lsr(z16.VnB(), z19.VnB(), z2.VnD()), "lsr z16.b, z19.b, z2.d"); 452 COMPARE(lsr(z18.VnH(), z19.VnH(), z4.VnD()), "lsr z18.h, z19.h, z4.d"); 453 COMPARE(lsr(z20.VnS(), z19.VnS(), z6.VnD()), "lsr z20.s, z19.s, z6.d"); 454 455 CLEANUP(); 456} 457 458 459TEST(sve_element_count) { 460 SETUP(); 461 462 COMPARE_MACRO(Cntb(x7), "cntb x7"); 463 COMPARE_MACRO(Cntb(x7, SVE_POW2), "cntb x7, pow2"); 464 COMPARE_MACRO(Cntb(x7, SVE_VL1), "cntb x7, vl1"); 465 COMPARE_MACRO(Cntb(x7, SVE_VL2), "cntb x7, vl2"); 466 COMPARE_MACRO(Cntb(x7, SVE_VL16), "cntb x7, vl16"); 467 COMPARE_MACRO(Cntb(x7, SVE_VL256), "cntb x7, vl256"); 468 COMPARE_MACRO(Cntb(x7, SVE_MUL4), "cntb x7, mul4"); 469 COMPARE_MACRO(Cntb(x7, SVE_MUL3), "cntb x7, mul3"); 470 COMPARE_MACRO(Cntb(x7, SVE_ALL), "cntb x7"); 471 472 COMPARE_MACRO(Cntb(x7, SVE_POW2, 1), "cntb x7, pow2"); 473 COMPARE_MACRO(Cntb(x7, SVE_VL1, 16), "cntb x7, vl1, mul #16"); 474 COMPARE_MACRO(Cntb(x7, SVE_VL2, 15), "cntb x7, vl2, mul #15"); 475 COMPARE_MACRO(Cntb(x7, SVE_VL16, 14), "cntb x7, vl16, mul #14"); 476 COMPARE_MACRO(Cntb(x7, SVE_VL256, 8), "cntb x7, vl256, mul #8"); 477 COMPARE_MACRO(Cntb(x7, SVE_MUL4, 4), "cntb x7, mul4, mul #4"); 478 COMPARE_MACRO(Cntb(x7, SVE_MUL3, 3), "cntb x7, mul3, mul #3"); 479 COMPARE_MACRO(Cntb(x7, SVE_ALL, 2), "cntb x7, all, mul #2"); 480 481 COMPARE_MACRO(Cntb(x30), "cntb x30"); 482 COMPARE_MACRO(Cntd(xzr, SVE_POW2), "cntd xzr, pow2"); 483 COMPARE_MACRO(Cntd(xzr, SVE_MUL4, 1), "cntd xzr, mul4"); 484 COMPARE_MACRO(Cnth(x29, SVE_MUL3, 4), "cnth x29, mul3, mul #4"); 485 COMPARE_MACRO(Cntw(x28, SVE_VL256, 16), "cntw x28, vl256, mul #16"); 486 487 CLEANUP(); 488} 489 490TEST(sve_incdec_reg_element_count) { 491 SETUP(); 492 493 COMPARE_MACRO(Decb(x4), "decb x4"); 494 COMPARE_MACRO(Decb(x4, SVE_POW2), "decb x4, pow2"); 495 COMPARE_MACRO(Decb(x4, SVE_VL1), "decb x4, vl1"); 496 COMPARE_MACRO(Decb(x4, SVE_VL2), "decb x4, vl2"); 497 COMPARE_MACRO(Decb(x4, SVE_VL16), "decb x4, vl16"); 498 COMPARE_MACRO(Decb(x4, SVE_VL256), "decb x4, vl256"); 499 COMPARE_MACRO(Decb(x4, SVE_MUL4), "decb x4, mul4"); 500 COMPARE_MACRO(Decb(x4, SVE_MUL3), "decb x4, mul3"); 501 COMPARE_MACRO(Decb(x4, SVE_ALL), "decb x4"); 502 503 COMPARE_MACRO(Decb(x4, SVE_POW2, 1), "decb x4, pow2"); 504 COMPARE_MACRO(Decb(x4, SVE_VL1, 16), "decb x4, vl1, mul #16"); 505 COMPARE_MACRO(Decb(x4, SVE_VL2, 15), "decb x4, vl2, mul #15"); 506 COMPARE_MACRO(Decb(x4, SVE_VL16, 14), "decb x4, vl16, mul #14"); 507 COMPARE_MACRO(Decb(x4, SVE_VL256, 8), "decb x4, vl256, mul #8"); 508 COMPARE_MACRO(Decb(x4, SVE_MUL4, 4), "decb x4, mul4, mul #4"); 509 COMPARE_MACRO(Decb(x4, SVE_MUL3, 3), "decb x4, mul3, mul #3"); 510 COMPARE_MACRO(Decb(x4, SVE_ALL, 2), "decb x4, all, mul #2"); 511 512 COMPARE_MACRO(Decb(x30), "decb x30"); 513 COMPARE_MACRO(Decd(xzr, SVE_POW2), "decd xzr, pow2"); 514 COMPARE_MACRO(Decd(xzr, SVE_MUL4, 1), "decd xzr, mul4"); 515 COMPARE_MACRO(Dech(x29, SVE_MUL3, 4), "dech x29, mul3, mul #4"); 516 COMPARE_MACRO(Decw(x28, SVE_VL256, 16), "decw x28, vl256, mul #16"); 517 518 COMPARE_MACRO(Incb(x17), "incb x17"); 519 COMPARE_MACRO(Incb(x17, SVE_POW2), "incb x17, pow2"); 520 COMPARE_MACRO(Incb(x17, SVE_VL1), "incb x17, vl1"); 521 COMPARE_MACRO(Incb(x17, SVE_VL2), "incb x17, vl2"); 522 COMPARE_MACRO(Incb(x17, SVE_VL16), "incb x17, vl16"); 523 COMPARE_MACRO(Incb(x17, SVE_VL256), "incb x17, vl256"); 524 COMPARE_MACRO(Incb(x17, SVE_MUL4), "incb x17, mul4"); 525 COMPARE_MACRO(Incb(x17, SVE_MUL3), "incb x17, mul3"); 526 COMPARE_MACRO(Incb(x17, SVE_ALL), "incb x17"); 527 528 COMPARE_MACRO(Incb(x17, SVE_POW2, 1), "incb x17, pow2"); 529 COMPARE_MACRO(Incb(x17, SVE_VL1, 16), "incb x17, vl1, mul #16"); 530 COMPARE_MACRO(Incb(x17, SVE_VL2, 15), "incb x17, vl2, mul #15"); 531 COMPARE_MACRO(Incb(x17, SVE_VL16, 14), "incb x17, vl16, mul #14"); 532 COMPARE_MACRO(Incb(x17, SVE_VL256, 8), "incb x17, vl256, mul #8"); 533 COMPARE_MACRO(Incb(x17, SVE_MUL4, 4), "incb x17, mul4, mul #4"); 534 COMPARE_MACRO(Incb(x17, SVE_MUL3, 3), "incb x17, mul3, mul #3"); 535 COMPARE_MACRO(Incb(x17, SVE_ALL, 2), "incb x17, all, mul #2"); 536 537 COMPARE_MACRO(Incb(x30), "incb x30"); 538 COMPARE_MACRO(Incd(xzr, SVE_POW2), "incd xzr, pow2"); 539 COMPARE_MACRO(Incd(xzr, SVE_MUL4, 1), "incd xzr, mul4"); 540 COMPARE_MACRO(Inch(x29, SVE_MUL3, 4), "inch x29, mul3, mul #4"); 541 COMPARE_MACRO(Incw(x28, SVE_VL256, 16), "incw x28, vl256, mul #16"); 542 543 CLEANUP(); 544} 545 546TEST(sve_signed_sat_incdec_reg_element_count) { 547 SETUP(); 548 549 COMPARE_MACRO(Sqdecb(x12, w12), "sqdecb x12, w12"); 550 COMPARE_MACRO(Sqdecb(x12, w12, SVE_POW2), "sqdecb x12, w12, pow2"); 551 COMPARE_MACRO(Sqdecb(x12, w12, SVE_VL1), "sqdecb x12, w12, vl1"); 552 COMPARE_MACRO(Sqdecb(x12, w12, SVE_VL2), "sqdecb x12, w12, vl2"); 553 COMPARE_MACRO(Sqdecb(x12, w12, SVE_VL16), "sqdecb x12, w12, vl16"); 554 COMPARE_MACRO(Sqdecb(x12, w12, SVE_VL256), "sqdecb x12, w12, vl256"); 555 COMPARE_MACRO(Sqdecb(x12, w12, SVE_MUL4), "sqdecb x12, w12, mul4"); 556 COMPARE_MACRO(Sqdecb(x12, w12, SVE_MUL3), "sqdecb x12, w12, mul3"); 557 COMPARE_MACRO(Sqdecb(x12, w12, SVE_ALL), "sqdecb x12, w12"); 558 559 COMPARE_MACRO(Sqdecb(x12, w12, SVE_POW2, 1), "sqdecb x12, w12, pow2"); 560 COMPARE_MACRO(Sqdecb(x12, w12, SVE_VL1, 16), "sqdecb x12, w12, vl1, mul #16"); 561 COMPARE_MACRO(Sqdecb(x12, w12, SVE_VL2, 15), "sqdecb x12, w12, vl2, mul #15"); 562 COMPARE_MACRO(Sqdecb(x12, w12, SVE_VL16, 14), 563 "sqdecb x12, w12, vl16, mul #14"); 564 COMPARE_MACRO(Sqdecb(x12, w12, SVE_VL256, 8), 565 "sqdecb x12, w12, vl256, mul #8"); 566 COMPARE_MACRO(Sqdecb(x12, w12, SVE_MUL4, 4), "sqdecb x12, w12, mul4, mul #4"); 567 COMPARE_MACRO(Sqdecb(x12, w12, SVE_MUL3, 3), "sqdecb x12, w12, mul3, mul #3"); 568 COMPARE_MACRO(Sqdecb(x12, w12, SVE_ALL, 2), "sqdecb x12, w12, all, mul #2"); 569 570 COMPARE_MACRO(Sqdecb(xzr, wzr, SVE_POW2), "sqdecb xzr, wzr, pow2"); 571 COMPARE_MACRO(Sqdecb(xzr, wzr, SVE_MUL4, 1), "sqdecb xzr, wzr, mul4"); 572 573 COMPARE_MACRO(Sqincw(x20, w20, SVE_POW2, 1), "sqincw x20, w20, pow2"); 574 COMPARE_MACRO(Sqincd(x20, w20, SVE_VL1, 16), "sqincd x20, w20, vl1, mul #16"); 575 COMPARE_MACRO(Sqinch(x20, w20, SVE_VL2, 15), "sqinch x20, w20, vl2, mul #15"); 576 COMPARE_MACRO(Sqincw(x20, w20, SVE_VL16, 14), 577 "sqincw x20, w20, vl16, mul #14"); 578 COMPARE_MACRO(Sqincd(x20, w20, SVE_VL256, 8), 579 "sqincd x20, w20, vl256, mul #8"); 580 COMPARE_MACRO(Sqinch(x20, w20, SVE_MUL4, 4), "sqinch x20, w20, mul4, mul #4"); 581 COMPARE_MACRO(Sqincw(x20, w20, SVE_MUL3, 3), "sqincw x20, w20, mul3, mul #3"); 582 COMPARE_MACRO(Sqincd(x20, w20, SVE_ALL, 2), "sqincd x20, w20, all, mul #2"); 583 584 COMPARE_MACRO(Sqdecb(x5), "sqdecb x5"); 585 COMPARE_MACRO(Sqdecb(x5, SVE_POW2), "sqdecb x5, pow2"); 586 COMPARE_MACRO(Sqdecb(x5, SVE_VL1), "sqdecb x5, vl1"); 587 COMPARE_MACRO(Sqdecb(x5, SVE_VL2), "sqdecb x5, vl2"); 588 COMPARE_MACRO(Sqdecb(x5, SVE_VL16), "sqdecb x5, vl16"); 589 COMPARE_MACRO(Sqdecb(x5, SVE_VL256), "sqdecb x5, vl256"); 590 COMPARE_MACRO(Sqdecb(x5, SVE_MUL4), "sqdecb x5, mul4"); 591 COMPARE_MACRO(Sqdecb(x5, SVE_MUL3), "sqdecb x5, mul3"); 592 COMPARE_MACRO(Sqdecb(x5, SVE_ALL), "sqdecb x5"); 593 594 COMPARE_MACRO(Sqdecb(x5, SVE_POW2, 1), "sqdecb x5, pow2"); 595 COMPARE_MACRO(Sqdecb(x5, SVE_VL1, 16), "sqdecb x5, vl1, mul #16"); 596 COMPARE_MACRO(Sqdecb(x5, SVE_VL2, 15), "sqdecb x5, vl2, mul #15"); 597 COMPARE_MACRO(Sqdecb(x5, SVE_VL16, 14), "sqdecb x5, vl16, mul #14"); 598 COMPARE_MACRO(Sqdecb(x5, SVE_VL256, 8), "sqdecb x5, vl256, mul #8"); 599 COMPARE_MACRO(Sqdecb(x5, SVE_MUL4, 4), "sqdecb x5, mul4, mul #4"); 600 COMPARE_MACRO(Sqdecb(x5, SVE_MUL3, 3), "sqdecb x5, mul3, mul #3"); 601 COMPARE_MACRO(Sqdecb(x5, SVE_ALL, 2), "sqdecb x5, all, mul #2"); 602 603 COMPARE_MACRO(Sqdecb(xzr, SVE_POW2), "sqdecb xzr, pow2"); 604 COMPARE_MACRO(Sqdecb(xzr, SVE_MUL4, 1), "sqdecb xzr, mul4"); 605 606 COMPARE_MACRO(Sqincw(x7, SVE_POW2, 1), "sqincw x7, pow2"); 607 COMPARE_MACRO(Sqincd(x7, SVE_VL1, 16), "sqincd x7, vl1, mul #16"); 608 COMPARE_MACRO(Sqinch(x7, SVE_VL2, 15), "sqinch x7, vl2, mul #15"); 609 COMPARE_MACRO(Sqincw(x7, SVE_VL16, 14), "sqincw x7, vl16, mul #14"); 610 COMPARE_MACRO(Sqincd(x7, SVE_VL256, 8), "sqincd x7, vl256, mul #8"); 611 COMPARE_MACRO(Sqinch(x7, SVE_MUL4, 4), "sqinch x7, mul4, mul #4"); 612 COMPARE_MACRO(Sqincw(x7, SVE_MUL3, 3), "sqincw x7, mul3, mul #3"); 613 COMPARE_MACRO(Sqincd(x7, SVE_ALL, 2), "sqincd x7, all, mul #2"); 614 615 CLEANUP(); 616} 617 618TEST(sve_unsigned_sat_incdec_reg_element_count) { 619 SETUP(); 620 621 COMPARE_MACRO(Uqdecb(w12), "uqdecb w12"); 622 COMPARE_MACRO(Uqdecb(w12, SVE_POW2), "uqdecb w12, pow2"); 623 COMPARE_MACRO(Uqdecb(w12, SVE_VL1), "uqdecb w12, vl1"); 624 COMPARE_MACRO(Uqdecb(w12, SVE_VL2), "uqdecb w12, vl2"); 625 COMPARE_MACRO(Uqdecb(w12, SVE_VL16), "uqdecb w12, vl16"); 626 COMPARE_MACRO(Uqdecb(w12, SVE_VL256), "uqdecb w12, vl256"); 627 COMPARE_MACRO(Uqdecb(w12, SVE_MUL4), "uqdecb w12, mul4"); 628 COMPARE_MACRO(Uqdecb(w12, SVE_MUL3), "uqdecb w12, mul3"); 629 COMPARE_MACRO(Uqdecb(w12, SVE_ALL), "uqdecb w12"); 630 631 COMPARE_MACRO(Uqdecb(w12, SVE_POW2, 1), "uqdecb w12, pow2"); 632 COMPARE_MACRO(Uqdecb(w12, SVE_VL1, 16), "uqdecb w12, vl1, mul #16"); 633 COMPARE_MACRO(Uqdecb(w12, SVE_VL2, 15), "uqdecb w12, vl2, mul #15"); 634 COMPARE_MACRO(Uqdecb(w12, SVE_VL16, 14), "uqdecb w12, vl16, mul #14"); 635 COMPARE_MACRO(Uqdecb(w12, SVE_VL256, 8), "uqdecb w12, vl256, mul #8"); 636 COMPARE_MACRO(Uqdecb(w12, SVE_MUL4, 4), "uqdecb w12, mul4, mul #4"); 637 COMPARE_MACRO(Uqdecb(w12, SVE_MUL3, 3), "uqdecb w12, mul3, mul #3"); 638 COMPARE_MACRO(Uqdecb(w12, SVE_ALL, 2), "uqdecb w12, all, mul #2"); 639 640 COMPARE_MACRO(Uqdecb(x30), "uqdecb x30"); 641 COMPARE_MACRO(Uqdecb(xzr, SVE_POW2), "uqdecb xzr, pow2"); 642 COMPARE_MACRO(Uqdecb(xzr, SVE_MUL4, 1), "uqdecb xzr, mul4"); 643 644 COMPARE_MACRO(Uqdecw(w22), "uqdecw w22"); 645 COMPARE_MACRO(Uqdecd(w22, SVE_POW2, 1), "uqdecd w22, pow2"); 646 COMPARE_MACRO(Uqdech(w22, SVE_VL1, 16), "uqdech w22, vl1, mul #16"); 647 COMPARE_MACRO(Uqdecw(x22, SVE_VL2, 15), "uqdecw x22, vl2, mul #15"); 648 COMPARE_MACRO(Uqdecd(x22, SVE_VL16, 14), "uqdecd x22, vl16, mul #14"); 649 COMPARE_MACRO(Uqdech(x22, SVE_VL256, 8), "uqdech x22, vl256, mul #8"); 650 COMPARE_MACRO(Uqdecw(w22, SVE_MUL4, 4), "uqdecw w22, mul4, mul #4"); 651 COMPARE_MACRO(Uqdecd(w22, SVE_MUL3, 3), "uqdecd w22, mul3, mul #3"); 652 COMPARE_MACRO(Uqdech(w22, SVE_ALL, 2), "uqdech w22, all, mul #2"); 653 654 COMPARE_MACRO(Uqincb(w29), "uqincb w29"); 655 COMPARE_MACRO(Uqincb(w29, SVE_POW2), "uqincb w29, pow2"); 656 COMPARE_MACRO(Uqincb(w29, SVE_VL1), "uqincb w29, vl1"); 657 COMPARE_MACRO(Uqincb(w29, SVE_VL2), "uqincb w29, vl2"); 658 COMPARE_MACRO(Uqincb(w29, SVE_VL16), "uqincb w29, vl16"); 659 COMPARE_MACRO(Uqincb(w29, SVE_VL256), "uqincb w29, vl256"); 660 COMPARE_MACRO(Uqincb(w29, SVE_MUL4), "uqincb w29, mul4"); 661 COMPARE_MACRO(Uqincb(w29, SVE_MUL3), "uqincb w29, mul3"); 662 COMPARE_MACRO(Uqincb(w29, SVE_ALL), "uqincb w29"); 663 664 COMPARE_MACRO(Uqincb(w29, SVE_POW2, 1), "uqincb w29, pow2"); 665 COMPARE_MACRO(Uqincb(w29, SVE_VL1, 16), "uqincb w29, vl1, mul #16"); 666 COMPARE_MACRO(Uqincb(w29, SVE_VL2, 15), "uqincb w29, vl2, mul #15"); 667 COMPARE_MACRO(Uqincb(w29, SVE_VL16, 14), "uqincb w29, vl16, mul #14"); 668 COMPARE_MACRO(Uqincb(w29, SVE_VL256, 8), "uqincb w29, vl256, mul #8"); 669 COMPARE_MACRO(Uqincb(w29, SVE_MUL4, 4), "uqincb w29, mul4, mul #4"); 670 COMPARE_MACRO(Uqincb(w29, SVE_MUL3, 3), "uqincb w29, mul3, mul #3"); 671 COMPARE_MACRO(Uqincb(w29, SVE_ALL, 2), "uqincb w29, all, mul #2"); 672 673 COMPARE_MACRO(Uqincb(x30), "uqincb x30"); 674 COMPARE_MACRO(Uqincb(xzr, SVE_POW2), "uqincb xzr, pow2"); 675 COMPARE_MACRO(Uqincb(xzr, SVE_MUL4, 1), "uqincb xzr, mul4"); 676 677 COMPARE_MACRO(Uqinch(w11), "uqinch w11"); 678 COMPARE_MACRO(Uqincw(w11, SVE_POW2), "uqincw w11, pow2"); 679 COMPARE_MACRO(Uqincd(w11, SVE_VL1), "uqincd w11, vl1"); 680 COMPARE_MACRO(Uqinch(x11, SVE_VL2), "uqinch x11, vl2"); 681 COMPARE_MACRO(Uqincw(x11, SVE_VL16), "uqincw x11, vl16"); 682 COMPARE_MACRO(Uqincd(x11, SVE_VL256), "uqincd x11, vl256"); 683 COMPARE_MACRO(Uqinch(w11, SVE_MUL4), "uqinch w11, mul4"); 684 COMPARE_MACRO(Uqincw(w11, SVE_MUL3), "uqincw w11, mul3"); 685 COMPARE_MACRO(Uqincd(w11, SVE_ALL), "uqincd w11"); 686 687 CLEANUP(); 688} 689 690TEST(sve_vector_dec_element_count) { 691 SETUP(); 692 693 COMPARE_MACRO(Decd(z14.VnD()), "decd z14.d"); 694 COMPARE_MACRO(Decd(z14.VnD(), SVE_POW2), "decd z14.d, pow2"); 695 COMPARE_MACRO(Decd(z14.VnD(), SVE_VL1), "decd z14.d, vl1"); 696 COMPARE_MACRO(Decd(z14.VnD(), SVE_VL2), "decd z14.d, vl2"); 697 COMPARE_MACRO(Decd(z14.VnD(), SVE_VL16), "decd z14.d, vl16"); 698 COMPARE_MACRO(Decd(z14.VnD(), SVE_VL256), "decd z14.d, vl256"); 699 COMPARE_MACRO(Decd(z14.VnD(), SVE_MUL4), "decd z14.d, mul4"); 700 COMPARE_MACRO(Decd(z14.VnD(), SVE_MUL3), "decd z14.d, mul3"); 701 COMPARE_MACRO(Decd(z14.VnD(), SVE_ALL), "decd z14.d"); 702 703 COMPARE_MACRO(Decd(z19.VnD(), SVE_POW2, 1), "decd z19.d, pow2"); 704 COMPARE_MACRO(Decd(z19.VnD(), SVE_VL1, 16), "decd z19.d, vl1, mul #16"); 705 COMPARE_MACRO(Decd(z19.VnD(), SVE_VL2, 15), "decd z19.d, vl2, mul #15"); 706 COMPARE_MACRO(Decd(z19.VnD(), SVE_VL16, 14), "decd z19.d, vl16, mul #14"); 707 COMPARE_MACRO(Decd(z19.VnD(), SVE_VL256, 8), "decd z19.d, vl256, mul #8"); 708 COMPARE_MACRO(Decd(z19.VnD(), SVE_MUL4, 4), "decd z19.d, mul4, mul #4"); 709 COMPARE_MACRO(Decd(z19.VnD(), SVE_MUL3, 3), "decd z19.d, mul3, mul #3"); 710 COMPARE_MACRO(Decd(z19.VnD(), SVE_ALL, 2), "decd z19.d, all, mul #2"); 711 712 COMPARE_MACRO(Dech(z27.VnH(), SVE_POW2, 1), "dech z27.h, pow2"); 713 COMPARE_MACRO(Decw(z27.VnS(), SVE_VL1, 16), "decw z27.s, vl1, mul #16"); 714 COMPARE_MACRO(Dech(z27.VnH(), SVE_VL2, 15), "dech z27.h, vl2, mul #15"); 715 COMPARE_MACRO(Decw(z27.VnS(), SVE_VL16, 14), "decw z27.s, vl16, mul #14"); 716 COMPARE_MACRO(Dech(z27.VnH(), SVE_VL256, 8), "dech z27.h, vl256, mul #8"); 717 COMPARE_MACRO(Decw(z27.VnS(), SVE_MUL4, 4), "decw z27.s, mul4, mul #4"); 718 COMPARE_MACRO(Dech(z27.VnH(), SVE_MUL3, 3), "dech z27.h, mul3, mul #3"); 719 COMPARE_MACRO(Decw(z27.VnS(), SVE_ALL, 2), "decw z27.s, all, mul #2"); 720 721 COMPARE_MACRO(Sqdecd(z13.VnD()), "sqdecd z13.d"); 722 COMPARE_MACRO(Sqdecd(z13.VnD(), SVE_POW2), "sqdecd z13.d, pow2"); 723 COMPARE_MACRO(Sqdecd(z13.VnD(), SVE_VL1), "sqdecd z13.d, vl1"); 724 COMPARE_MACRO(Sqdecd(z13.VnD(), SVE_VL2), "sqdecd z13.d, vl2"); 725 COMPARE_MACRO(Sqdecd(z13.VnD(), SVE_VL16), "sqdecd z13.d, vl16"); 726 COMPARE_MACRO(Sqdecd(z13.VnD(), SVE_VL256), "sqdecd z13.d, vl256"); 727 COMPARE_MACRO(Sqdecd(z13.VnD(), SVE_MUL4), "sqdecd z13.d, mul4"); 728 COMPARE_MACRO(Sqdecd(z13.VnD(), SVE_MUL3), "sqdecd z13.d, mul3"); 729 COMPARE_MACRO(Sqdecd(z13.VnD(), SVE_ALL), "sqdecd z13.d"); 730 731 COMPARE_MACRO(Sqdecd(z9.VnD(), SVE_POW2, 1), "sqdecd z9.d, pow2"); 732 COMPARE_MACRO(Sqdecd(z9.VnD(), SVE_VL1, 16), "sqdecd z9.d, vl1, mul #16"); 733 COMPARE_MACRO(Sqdecd(z9.VnD(), SVE_VL2, 15), "sqdecd z9.d, vl2, mul #15"); 734 COMPARE_MACRO(Sqdecd(z9.VnD(), SVE_VL16, 14), "sqdecd z9.d, vl16, mul #14"); 735 COMPARE_MACRO(Sqdecd(z9.VnD(), SVE_VL256, 8), "sqdecd z9.d, vl256, mul #8"); 736 COMPARE_MACRO(Sqdecd(z9.VnD(), SVE_MUL4, 4), "sqdecd z9.d, mul4, mul #4"); 737 COMPARE_MACRO(Sqdecd(z9.VnD(), SVE_MUL3, 3), "sqdecd z9.d, mul3, mul #3"); 738 COMPARE_MACRO(Sqdecd(z9.VnD(), SVE_ALL, 2), "sqdecd z9.d, all, mul #2"); 739 740 COMPARE_MACRO(Sqdech(z31.VnH(), SVE_POW2, 1), "sqdech z31.h, pow2"); 741 COMPARE_MACRO(Sqdecw(z31.VnS(), SVE_VL1, 16), "sqdecw z31.s, vl1, mul #16"); 742 COMPARE_MACRO(Sqdech(z31.VnH(), SVE_VL2, 15), "sqdech z31.h, vl2, mul #15"); 743 COMPARE_MACRO(Sqdecw(z31.VnS(), SVE_VL16, 14), "sqdecw z31.s, vl16, mul #14"); 744 COMPARE_MACRO(Sqdech(z31.VnH(), SVE_VL256, 8), "sqdech z31.h, vl256, mul #8"); 745 COMPARE_MACRO(Sqdecw(z31.VnS(), SVE_MUL4, 4), "sqdecw z31.s, mul4, mul #4"); 746 COMPARE_MACRO(Sqdech(z31.VnH(), SVE_MUL3, 3), "sqdech z31.h, mul3, mul #3"); 747 COMPARE_MACRO(Sqdecw(z31.VnS(), SVE_ALL, 2), "sqdecw z31.s, all, mul #2"); 748 749 COMPARE_MACRO(Uqdecd(z1.VnD()), "uqdecd z1.d"); 750 COMPARE_MACRO(Uqdecd(z1.VnD(), SVE_POW2), "uqdecd z1.d, pow2"); 751 COMPARE_MACRO(Uqdecd(z1.VnD(), SVE_VL1), "uqdecd z1.d, vl1"); 752 COMPARE_MACRO(Uqdecd(z1.VnD(), SVE_VL2), "uqdecd z1.d, vl2"); 753 COMPARE_MACRO(Uqdecd(z1.VnD(), SVE_VL16), "uqdecd z1.d, vl16"); 754 COMPARE_MACRO(Uqdecd(z1.VnD(), SVE_VL256), "uqdecd z1.d, vl256"); 755 COMPARE_MACRO(Uqdecd(z1.VnD(), SVE_MUL4), "uqdecd z1.d, mul4"); 756 COMPARE_MACRO(Uqdecd(z1.VnD(), SVE_MUL3), "uqdecd z1.d, mul3"); 757 COMPARE_MACRO(Uqdecd(z1.VnD(), SVE_ALL), "uqdecd z1.d"); 758 759 COMPARE_MACRO(Uqdecd(z7.VnD(), SVE_POW2, 1), "uqdecd z7.d, pow2"); 760 COMPARE_MACRO(Uqdecd(z7.VnD(), SVE_VL1, 16), "uqdecd z7.d, vl1, mul #16"); 761 COMPARE_MACRO(Uqdecd(z7.VnD(), SVE_VL2, 15), "uqdecd z7.d, vl2, mul #15"); 762 COMPARE_MACRO(Uqdecd(z7.VnD(), SVE_VL16, 14), "uqdecd z7.d, vl16, mul #14"); 763 COMPARE_MACRO(Uqdecd(z7.VnD(), SVE_VL256, 8), "uqdecd z7.d, vl256, mul #8"); 764 COMPARE_MACRO(Uqdecd(z7.VnD(), SVE_MUL4, 4), "uqdecd z7.d, mul4, mul #4"); 765 COMPARE_MACRO(Uqdecd(z7.VnD(), SVE_MUL3, 3), "uqdecd z7.d, mul3, mul #3"); 766 COMPARE_MACRO(Uqdecd(z7.VnD(), SVE_ALL, 2), "uqdecd z7.d, all, mul #2"); 767 768 COMPARE_MACRO(Uqdech(z26.VnH(), SVE_POW2, 1), "uqdech z26.h, pow2"); 769 COMPARE_MACRO(Uqdecw(z26.VnS(), SVE_VL1, 16), "uqdecw z26.s, vl1, mul #16"); 770 COMPARE_MACRO(Uqdech(z26.VnH(), SVE_VL2, 15), "uqdech z26.h, vl2, mul #15"); 771 COMPARE_MACRO(Uqdecw(z26.VnS(), SVE_VL16, 14), "uqdecw z26.s, vl16, mul #14"); 772 COMPARE_MACRO(Uqdech(z26.VnH(), SVE_VL256, 8), "uqdech z26.h, vl256, mul #8"); 773 COMPARE_MACRO(Uqdecw(z26.VnS(), SVE_MUL4, 4), "uqdecw z26.s, mul4, mul #4"); 774 COMPARE_MACRO(Uqdech(z26.VnH(), SVE_MUL3, 3), "uqdech z26.h, mul3, mul #3"); 775 COMPARE_MACRO(Uqdecw(z26.VnS(), SVE_ALL, 2), "uqdecw z26.s, all, mul #2"); 776 777 CLEANUP(); 778} 779 780TEST(sve_vector_inc_element_count) { 781 SETUP(); 782 783 COMPARE_MACRO(Incd(z16.VnD()), "incd z16.d"); 784 COMPARE_MACRO(Incd(z16.VnD(), SVE_POW2), "incd z16.d, pow2"); 785 COMPARE_MACRO(Incd(z16.VnD(), SVE_VL1), "incd z16.d, vl1"); 786 COMPARE_MACRO(Incd(z16.VnD(), SVE_VL2), "incd z16.d, vl2"); 787 COMPARE_MACRO(Incd(z16.VnD(), SVE_VL16), "incd z16.d, vl16"); 788 COMPARE_MACRO(Incd(z16.VnD(), SVE_VL256), "incd z16.d, vl256"); 789 COMPARE_MACRO(Incd(z16.VnD(), SVE_MUL4), "incd z16.d, mul4"); 790 COMPARE_MACRO(Incd(z16.VnD(), SVE_MUL3), "incd z16.d, mul3"); 791 COMPARE_MACRO(Incd(z16.VnD(), SVE_ALL), "incd z16.d"); 792 793 COMPARE_MACRO(Incd(z18.VnD(), SVE_POW2, 1), "incd z18.d, pow2"); 794 COMPARE_MACRO(Incd(z18.VnD(), SVE_VL1, 16), "incd z18.d, vl1, mul #16"); 795 COMPARE_MACRO(Incd(z18.VnD(), SVE_VL2, 15), "incd z18.d, vl2, mul #15"); 796 COMPARE_MACRO(Incd(z18.VnD(), SVE_VL16, 14), "incd z18.d, vl16, mul #14"); 797 COMPARE_MACRO(Incd(z18.VnD(), SVE_VL256, 8), "incd z18.d, vl256, mul #8"); 798 COMPARE_MACRO(Incd(z18.VnD(), SVE_MUL4, 4), "incd z18.d, mul4, mul #4"); 799 COMPARE_MACRO(Incd(z18.VnD(), SVE_MUL3, 3), "incd z18.d, mul3, mul #3"); 800 COMPARE_MACRO(Incd(z18.VnD(), SVE_ALL, 2), "incd z18.d, all, mul #2"); 801 802 COMPARE_MACRO(Inch(z24.VnH(), SVE_POW2, 1), "inch z24.h, pow2"); 803 COMPARE_MACRO(Incw(z24.VnS(), SVE_VL1, 16), "incw z24.s, vl1, mul #16"); 804 COMPARE_MACRO(Inch(z24.VnH(), SVE_VL2, 15), "inch z24.h, vl2, mul #15"); 805 COMPARE_MACRO(Incw(z24.VnS(), SVE_VL16, 14), "incw z24.s, vl16, mul #14"); 806 COMPARE_MACRO(Inch(z24.VnH(), SVE_VL256, 8), "inch z24.h, vl256, mul #8"); 807 COMPARE_MACRO(Incw(z24.VnS(), SVE_MUL4, 4), "incw z24.s, mul4, mul #4"); 808 COMPARE_MACRO(Inch(z24.VnH(), SVE_MUL3, 3), "inch z24.h, mul3, mul #3"); 809 COMPARE_MACRO(Incw(z24.VnS(), SVE_ALL, 2), "incw z24.s, all, mul #2"); 810 811 COMPARE_MACRO(Sqincd(z10.VnD()), "sqincd z10.d"); 812 COMPARE_MACRO(Sqincd(z10.VnD(), SVE_POW2), "sqincd z10.d, pow2"); 813 COMPARE_MACRO(Sqincd(z10.VnD(), SVE_VL1), "sqincd z10.d, vl1"); 814 COMPARE_MACRO(Sqincd(z10.VnD(), SVE_VL2), "sqincd z10.d, vl2"); 815 COMPARE_MACRO(Sqincd(z10.VnD(), SVE_VL16), "sqincd z10.d, vl16"); 816 COMPARE_MACRO(Sqincd(z10.VnD(), SVE_VL256), "sqincd z10.d, vl256"); 817 COMPARE_MACRO(Sqincd(z10.VnD(), SVE_MUL4), "sqincd z10.d, mul4"); 818 COMPARE_MACRO(Sqincd(z10.VnD(), SVE_MUL3), "sqincd z10.d, mul3"); 819 COMPARE_MACRO(Sqincd(z10.VnD(), SVE_ALL), "sqincd z10.d"); 820 821 COMPARE_MACRO(Sqincd(z3.VnD(), SVE_POW2, 1), "sqincd z3.d, pow2"); 822 COMPARE_MACRO(Sqincd(z3.VnD(), SVE_VL1, 16), "sqincd z3.d, vl1, mul #16"); 823 COMPARE_MACRO(Sqincd(z3.VnD(), SVE_VL2, 15), "sqincd z3.d, vl2, mul #15"); 824 COMPARE_MACRO(Sqincd(z3.VnD(), SVE_VL16, 14), "sqincd z3.d, vl16, mul #14"); 825 COMPARE_MACRO(Sqincd(z3.VnD(), SVE_VL256, 8), "sqincd z3.d, vl256, mul #8"); 826 COMPARE_MACRO(Sqincd(z3.VnD(), SVE_MUL4, 4), "sqincd z3.d, mul4, mul #4"); 827 COMPARE_MACRO(Sqincd(z3.VnD(), SVE_MUL3, 3), "sqincd z3.d, mul3, mul #3"); 828 COMPARE_MACRO(Sqincd(z3.VnD(), SVE_ALL, 2), "sqincd z3.d, all, mul #2"); 829 830 COMPARE_MACRO(Sqinch(z24.VnH(), SVE_POW2, 1), "sqinch z24.h, pow2"); 831 COMPARE_MACRO(Sqincw(z24.VnS(), SVE_VL1, 16), "sqincw z24.s, vl1, mul #16"); 832 COMPARE_MACRO(Sqinch(z24.VnH(), SVE_VL2, 15), "sqinch z24.h, vl2, mul #15"); 833 COMPARE_MACRO(Sqincw(z24.VnS(), SVE_VL16, 14), "sqincw z24.s, vl16, mul #14"); 834 COMPARE_MACRO(Sqinch(z24.VnH(), SVE_VL256, 8), "sqinch z24.h, vl256, mul #8"); 835 COMPARE_MACRO(Sqincw(z24.VnS(), SVE_MUL4, 4), "sqincw z24.s, mul4, mul #4"); 836 COMPARE_MACRO(Sqinch(z24.VnH(), SVE_MUL3, 3), "sqinch z24.h, mul3, mul #3"); 837 COMPARE_MACRO(Sqincw(z24.VnS(), SVE_ALL, 2), "sqincw z24.s, all, mul #2"); 838 839 COMPARE_MACRO(Uqincd(z10.VnD()), "uqincd z10.d"); 840 COMPARE_MACRO(Uqincd(z10.VnD(), SVE_POW2), "uqincd z10.d, pow2"); 841 COMPARE_MACRO(Uqincd(z10.VnD(), SVE_VL1), "uqincd z10.d, vl1"); 842 COMPARE_MACRO(Uqincd(z10.VnD(), SVE_VL2), "uqincd z10.d, vl2"); 843 COMPARE_MACRO(Uqincd(z10.VnD(), SVE_VL16), "uqincd z10.d, vl16"); 844 COMPARE_MACRO(Uqincd(z10.VnD(), SVE_VL256), "uqincd z10.d, vl256"); 845 COMPARE_MACRO(Uqincd(z10.VnD(), SVE_MUL4), "uqincd z10.d, mul4"); 846 COMPARE_MACRO(Uqincd(z10.VnD(), SVE_MUL3), "uqincd z10.d, mul3"); 847 COMPARE_MACRO(Uqincd(z10.VnD(), SVE_ALL), "uqincd z10.d"); 848 849 COMPARE_MACRO(Uqincd(z6.VnD(), SVE_POW2, 1), "uqincd z6.d, pow2"); 850 COMPARE_MACRO(Uqincd(z6.VnD(), SVE_VL1, 16), "uqincd z6.d, vl1, mul #16"); 851 COMPARE_MACRO(Uqincd(z6.VnD(), SVE_VL2, 15), "uqincd z6.d, vl2, mul #15"); 852 COMPARE_MACRO(Uqincd(z6.VnD(), SVE_VL16, 14), "uqincd z6.d, vl16, mul #14"); 853 COMPARE_MACRO(Uqincd(z6.VnD(), SVE_VL256, 8), "uqincd z6.d, vl256, mul #8"); 854 COMPARE_MACRO(Uqincd(z6.VnD(), SVE_MUL4, 4), "uqincd z6.d, mul4, mul #4"); 855 COMPARE_MACRO(Uqincd(z6.VnD(), SVE_MUL3, 3), "uqincd z6.d, mul3, mul #3"); 856 COMPARE_MACRO(Uqincd(z6.VnD(), SVE_ALL, 2), "uqincd z6.d, all, mul #2"); 857 858 COMPARE_MACRO(Uqinch(z20.VnH(), SVE_POW2, 1), "uqinch z20.h, pow2"); 859 COMPARE_MACRO(Uqincw(z20.VnS(), SVE_VL1, 16), "uqincw z20.s, vl1, mul #16"); 860 COMPARE_MACRO(Uqinch(z20.VnH(), SVE_VL2, 15), "uqinch z20.h, vl2, mul #15"); 861 COMPARE_MACRO(Uqincw(z20.VnS(), SVE_VL16, 14), "uqincw z20.s, vl16, mul #14"); 862 COMPARE_MACRO(Uqinch(z20.VnH(), SVE_VL256, 8), "uqinch z20.h, vl256, mul #8"); 863 COMPARE_MACRO(Uqincw(z20.VnS(), SVE_MUL4, 4), "uqincw z20.s, mul4, mul #4"); 864 COMPARE_MACRO(Uqinch(z20.VnH(), SVE_MUL3, 3), "uqinch z20.h, mul3, mul #3"); 865 COMPARE_MACRO(Uqincw(z20.VnS(), SVE_ALL, 2), "uqincw z20.s, all, mul #2"); 866 867 CLEANUP(); 868} 869 870TEST(sve_fp_accumulating_reduction) { 871 SETUP(); 872 873 COMPARE(fadda(h10, p2, h10, z0.VnH()), "fadda h10, p2, h10, z0.h"); 874 COMPARE(fadda(s10, p2, s10, z0.VnS()), "fadda s10, p2, s10, z0.s"); 875 COMPARE(fadda(d10, p2, d10, z0.VnD()), "fadda d10, p2, d10, z0.d"); 876 877 CLEANUP(); 878} 879 880TEST(sve_fp_arithmetic_predicated) { 881 SETUP(); 882 883 COMPARE(fdiv(z9.VnH(), p4.Merging(), z9.VnH(), z4.VnH()), 884 "fdiv z9.h, p4/m, z9.h, z4.h"); 885 COMPARE(fdiv(z19.VnS(), p5.Merging(), z19.VnS(), z14.VnS()), 886 "fdiv z19.s, p5/m, z19.s, z14.s"); 887 COMPARE(fdiv(z29.VnD(), p6.Merging(), z29.VnD(), z24.VnD()), 888 "fdiv z29.d, p6/m, z29.d, z24.d"); 889 COMPARE(fdivr(z21.VnH(), p3.Merging(), z21.VnH(), z11.VnH()), 890 "fdivr z21.h, p3/m, z21.h, z11.h"); 891 COMPARE(fdivr(z23.VnS(), p5.Merging(), z23.VnS(), z15.VnS()), 892 "fdivr z23.s, p5/m, z23.s, z15.s"); 893 COMPARE(fdivr(z25.VnD(), p7.Merging(), z25.VnD(), z19.VnD()), 894 "fdivr z25.d, p7/m, z25.d, z19.d"); 895 COMPARE(fmax(z4.VnH(), p1.Merging(), z4.VnH(), z29.VnH()), 896 "fmax z4.h, p1/m, z4.h, z29.h"); 897 COMPARE(fmax(z14.VnS(), p3.Merging(), z14.VnS(), z29.VnS()), 898 "fmax z14.s, p3/m, z14.s, z29.s"); 899 COMPARE(fmax(z24.VnD(), p5.Merging(), z24.VnD(), z29.VnD()), 900 "fmax z24.d, p5/m, z24.d, z29.d"); 901 COMPARE(fmin(z1.VnH(), p2.Merging(), z1.VnH(), z30.VnH()), 902 "fmin z1.h, p2/m, z1.h, z30.h"); 903 COMPARE(fmin(z11.VnS(), p4.Merging(), z11.VnS(), z30.VnS()), 904 "fmin z11.s, p4/m, z11.s, z30.s"); 905 COMPARE(fmin(z21.VnD(), p6.Merging(), z21.VnD(), z30.VnD()), 906 "fmin z21.d, p6/m, z21.d, z30.d"); 907 908 COMPARE(ftmad(z21.VnH(), z21.VnH(), z22.VnH(), 0), 909 "ftmad z21.h, z21.h, z22.h, #0"); 910 COMPARE(ftmad(z21.VnH(), z21.VnH(), z22.VnH(), 2), 911 "ftmad z21.h, z21.h, z22.h, #2"); 912 COMPARE(ftmad(z2.VnH(), z2.VnH(), z21.VnH(), 7), 913 "ftmad z2.h, z2.h, z21.h, #7"); 914 COMPARE(ftmad(z21.VnS(), z21.VnS(), z22.VnS(), 0), 915 "ftmad z21.s, z21.s, z22.s, #0"); 916 COMPARE(ftmad(z21.VnS(), z21.VnS(), z22.VnS(), 2), 917 "ftmad z21.s, z21.s, z22.s, #2"); 918 COMPARE(ftmad(z2.VnS(), z2.VnS(), z21.VnS(), 7), 919 "ftmad z2.s, z2.s, z21.s, #7"); 920 COMPARE(ftmad(z21.VnD(), z21.VnD(), z22.VnD(), 0), 921 "ftmad z21.d, z21.d, z22.d, #0"); 922 COMPARE(ftmad(z21.VnD(), z21.VnD(), z22.VnD(), 2), 923 "ftmad z21.d, z21.d, z22.d, #2"); 924 COMPARE(ftmad(z2.VnD(), z2.VnD(), z21.VnD(), 7), 925 "ftmad z2.d, z2.d, z21.d, #7"); 926 927 COMPARE_MACRO(Ftmad(z3.VnH(), z2.VnH(), z1.VnH(), 1), 928 "movprfx z3, z2\n" 929 "ftmad z3.h, z3.h, z1.h, #1"); 930 COMPARE_MACRO(Ftmad(z6.VnS(), z4.VnS(), z6.VnS(), 1), 931 "mov z31.d, z6.d\n" 932 "movprfx z6, z4\n" 933 "ftmad z6.s, z6.s, z31.s, #1"); 934 935 COMPARE(fabd(z31.VnH(), p7.Merging(), z31.VnH(), z17.VnH()), 936 "fabd z31.h, p7/m, z31.h, z17.h"); 937 COMPARE(fabd(z31.VnS(), p7.Merging(), z31.VnS(), z17.VnS()), 938 "fabd z31.s, p7/m, z31.s, z17.s"); 939 COMPARE(fabd(z31.VnD(), p7.Merging(), z31.VnD(), z17.VnD()), 940 "fabd z31.d, p7/m, z31.d, z17.d"); 941 COMPARE(fadd(z24.VnH(), p2.Merging(), z24.VnH(), z15.VnH()), 942 "fadd z24.h, p2/m, z24.h, z15.h"); 943 COMPARE(fadd(z24.VnS(), p2.Merging(), z24.VnS(), z15.VnS()), 944 "fadd z24.s, p2/m, z24.s, z15.s"); 945 COMPARE(fadd(z24.VnD(), p2.Merging(), z24.VnD(), z15.VnD()), 946 "fadd z24.d, p2/m, z24.d, z15.d"); 947 COMPARE(fmaxnm(z15.VnH(), p4.Merging(), z15.VnH(), z3.VnH()), 948 "fmaxnm z15.h, p4/m, z15.h, z3.h"); 949 COMPARE(fmaxnm(z15.VnS(), p4.Merging(), z15.VnS(), z3.VnS()), 950 "fmaxnm z15.s, p4/m, z15.s, z3.s"); 951 COMPARE(fmaxnm(z15.VnD(), p4.Merging(), z15.VnD(), z3.VnD()), 952 "fmaxnm z15.d, p4/m, z15.d, z3.d"); 953 COMPARE(fminnm(z19.VnH(), p2.Merging(), z19.VnH(), z29.VnH()), 954 "fminnm z19.h, p2/m, z19.h, z29.h"); 955 COMPARE(fminnm(z19.VnS(), p2.Merging(), z19.VnS(), z29.VnS()), 956 "fminnm z19.s, p2/m, z19.s, z29.s"); 957 COMPARE(fminnm(z19.VnD(), p2.Merging(), z19.VnD(), z29.VnD()), 958 "fminnm z19.d, p2/m, z19.d, z29.d"); 959 COMPARE(fmulx(z30.VnH(), p6.Merging(), z30.VnH(), z20.VnH()), 960 "fmulx z30.h, p6/m, z30.h, z20.h"); 961 COMPARE(fmulx(z30.VnS(), p6.Merging(), z30.VnS(), z20.VnS()), 962 "fmulx z30.s, p6/m, z30.s, z20.s"); 963 COMPARE(fmulx(z30.VnD(), p6.Merging(), z30.VnD(), z20.VnD()), 964 "fmulx z30.d, p6/m, z30.d, z20.d"); 965 COMPARE(fmul(z26.VnH(), p2.Merging(), z26.VnH(), z6.VnH()), 966 "fmul z26.h, p2/m, z26.h, z6.h"); 967 COMPARE(fmul(z26.VnS(), p2.Merging(), z26.VnS(), z6.VnS()), 968 "fmul z26.s, p2/m, z26.s, z6.s"); 969 COMPARE(fmul(z26.VnD(), p2.Merging(), z26.VnD(), z6.VnD()), 970 "fmul z26.d, p2/m, z26.d, z6.d"); 971 COMPARE(fscale(z8.VnH(), p3.Merging(), z8.VnH(), z6.VnH()), 972 "fscale z8.h, p3/m, z8.h, z6.h"); 973 COMPARE(fscale(z8.VnS(), p3.Merging(), z8.VnS(), z6.VnS()), 974 "fscale z8.s, p3/m, z8.s, z6.s"); 975 COMPARE(fscale(z8.VnD(), p3.Merging(), z8.VnD(), z6.VnD()), 976 "fscale z8.d, p3/m, z8.d, z6.d"); 977 COMPARE(fsubr(z16.VnH(), p5.Merging(), z16.VnH(), z15.VnH()), 978 "fsubr z16.h, p5/m, z16.h, z15.h"); 979 COMPARE(fsubr(z16.VnS(), p5.Merging(), z16.VnS(), z15.VnS()), 980 "fsubr z16.s, p5/m, z16.s, z15.s"); 981 COMPARE(fsubr(z16.VnD(), p5.Merging(), z16.VnD(), z15.VnD()), 982 "fsubr z16.d, p5/m, z16.d, z15.d"); 983 COMPARE(fsub(z16.VnH(), p5.Merging(), z16.VnH(), z26.VnH()), 984 "fsub z16.h, p5/m, z16.h, z26.h"); 985 COMPARE(fsub(z16.VnS(), p5.Merging(), z16.VnS(), z26.VnS()), 986 "fsub z16.s, p5/m, z16.s, z26.s"); 987 COMPARE(fsub(z16.VnD(), p5.Merging(), z16.VnD(), z26.VnD()), 988 "fsub z16.d, p5/m, z16.d, z26.d"); 989 990 COMPARE_MACRO(Fsub(z0.VnH(), p0.Merging(), z1.VnH(), z0.VnH()), 991 "fsubr z0.h, p0/m, z0.h, z1.h"); 992 993 COMPARE_MACRO(Fadd(z0.VnH(), 994 p0.Merging(), 995 z1.VnH(), 996 z2.VnH(), 997 FastNaNPropagation), 998 "movprfx z0.h, p0/m, z1.h\n" 999 "fadd z0.h, p0/m, z0.h, z2.h"); 1000 COMPARE_MACRO(Fadd(z0.VnH(), 1001 p0.Merging(), 1002 z1.VnH(), 1003 z0.VnH(), 1004 FastNaNPropagation), 1005 "fadd z0.h, p0/m, z0.h, z1.h"); 1006 COMPARE_MACRO(Fadd(z0.VnH(), 1007 p0.Merging(), 1008 z1.VnH(), 1009 z0.VnH(), 1010 StrictNaNPropagation), 1011 "movprfx z31.h, p0/m, z1.h\n" 1012 "fadd z31.h, p0/m, z31.h, z0.h\n" 1013 "mov z0.d, z31.d"); 1014 COMPARE_MACRO(Fmul(z1.VnS(), 1015 p1.Merging(), 1016 z2.VnS(), 1017 z3.VnS(), 1018 FastNaNPropagation), 1019 "movprfx z1.s, p1/m, z2.s\n" 1020 "fmul z1.s, p1/m, z1.s, z3.s"); 1021 COMPARE_MACRO(Fmulx(z2.VnD(), 1022 p2.Merging(), 1023 z3.VnD(), 1024 z4.VnD(), 1025 FastNaNPropagation), 1026 "movprfx z2.d, p2/m, z3.d\n" 1027 "fmulx z2.d, p2/m, z2.d, z4.d"); 1028 COMPARE_MACRO(Fminnm(z3.VnH(), 1029 p3.Merging(), 1030 z4.VnH(), 1031 z5.VnH(), 1032 FastNaNPropagation), 1033 "movprfx z3.h, p3/m, z4.h\n" 1034 "fminnm z3.h, p3/m, z3.h, z5.h"); 1035 COMPARE_MACRO(Fmaxnm(z4.VnS(), 1036 p4.Merging(), 1037 z5.VnS(), 1038 z6.VnS(), 1039 FastNaNPropagation), 1040 "movprfx z4.s, p4/m, z5.s\n" 1041 "fmaxnm z4.s, p4/m, z4.s, z6.s"); 1042 COMPARE_MACRO(Fsub(z5.VnD(), p5.Merging(), z6.VnD(), z7.VnD()), 1043 "movprfx z5.d, p5/m, z6.d\n" 1044 "fsub z5.d, p5/m, z5.d, z7.d"); 1045 COMPARE_MACRO(Fscale(z6.VnH(), p6.Merging(), z7.VnH(), z8.VnH()), 1046 "movprfx z6.h, p6/m, z7.h\n" 1047 "fscale z6.h, p6/m, z6.h, z8.h"); 1048 COMPARE_MACRO(Fscale(z7.VnS(), p7.Merging(), z8.VnS(), z7.VnS()), 1049 "mov z31.d, z7.d\n" 1050 "movprfx z7.s, p7/m, z8.s\n" 1051 "fscale z7.s, p7/m, z7.s, z31.s"); 1052 1053 COMPARE(fadd(z18.VnH(), p0.Merging(), z18.VnH(), 0.5), 1054 "fadd z18.h, p0/m, z18.h, #0.5"); 1055 COMPARE(fadd(z18.VnS(), p0.Merging(), z18.VnS(), 1.0), 1056 "fadd z18.s, p0/m, z18.s, #1.0"); 1057 COMPARE(fadd(z18.VnD(), p0.Merging(), z18.VnD(), 1.0), 1058 "fadd z18.d, p0/m, z18.d, #1.0"); 1059 COMPARE(fmaxnm(z6.VnH(), p1.Merging(), z6.VnH(), 0.0), 1060 "fmaxnm z6.h, p1/m, z6.h, #0.0"); 1061 COMPARE(fmaxnm(z6.VnS(), p1.Merging(), z6.VnS(), 1.0), 1062 "fmaxnm z6.s, p1/m, z6.s, #1.0"); 1063 COMPARE(fmaxnm(z6.VnD(), p1.Merging(), z6.VnD(), 1.0), 1064 "fmaxnm z6.d, p1/m, z6.d, #1.0"); 1065 COMPARE(fmax(z8.VnH(), p6.Merging(), z8.VnH(), 0.0), 1066 "fmax z8.h, p6/m, z8.h, #0.0"); 1067 COMPARE(fmax(z8.VnS(), p6.Merging(), z8.VnS(), 0.0), 1068 "fmax z8.s, p6/m, z8.s, #0.0"); 1069 COMPARE(fmax(z8.VnD(), p6.Merging(), z8.VnD(), 1.0), 1070 "fmax z8.d, p6/m, z8.d, #1.0"); 1071 COMPARE(fminnm(z26.VnH(), p0.Merging(), z26.VnH(), 1.0), 1072 "fminnm z26.h, p0/m, z26.h, #1.0"); 1073 COMPARE(fminnm(z26.VnS(), p0.Merging(), z26.VnS(), 0.0), 1074 "fminnm z26.s, p0/m, z26.s, #0.0"); 1075 COMPARE(fminnm(z26.VnD(), p0.Merging(), z26.VnD(), 1.0), 1076 "fminnm z26.d, p0/m, z26.d, #1.0"); 1077 COMPARE(fmin(z22.VnH(), p0.Merging(), z22.VnH(), 1.0), 1078 "fmin z22.h, p0/m, z22.h, #1.0"); 1079 COMPARE(fmin(z22.VnS(), p0.Merging(), z22.VnS(), 1.0), 1080 "fmin z22.s, p0/m, z22.s, #1.0"); 1081 COMPARE(fmin(z22.VnD(), p0.Merging(), z22.VnD(), 0.0), 1082 "fmin z22.d, p0/m, z22.d, #0.0"); 1083 COMPARE(fmul(z21.VnH(), p3.Merging(), z21.VnH(), 0.5), 1084 "fmul z21.h, p3/m, z21.h, #0.5"); 1085 COMPARE(fmul(z21.VnS(), p3.Merging(), z21.VnS(), 2.0), 1086 "fmul z21.s, p3/m, z21.s, #2.0"); 1087 COMPARE(fmul(z21.VnD(), p3.Merging(), z21.VnD(), 2.0), 1088 "fmul z21.d, p3/m, z21.d, #2.0"); 1089 COMPARE(fsubr(z21.VnH(), p3.Merging(), z21.VnH(), 1.0), 1090 "fsubr z21.h, p3/m, z21.h, #1.0"); 1091 COMPARE(fsubr(z21.VnS(), p3.Merging(), z21.VnS(), 0.5), 1092 "fsubr z21.s, p3/m, z21.s, #0.5"); 1093 COMPARE(fsubr(z21.VnD(), p3.Merging(), z21.VnD(), 1.0), 1094 "fsubr z21.d, p3/m, z21.d, #1.0"); 1095 COMPARE(fsub(z26.VnH(), p4.Merging(), z26.VnH(), 0.5), 1096 "fsub z26.h, p4/m, z26.h, #0.5"); 1097 COMPARE(fsub(z26.VnS(), p4.Merging(), z26.VnS(), 1.0), 1098 "fsub z26.s, p4/m, z26.s, #1.0"); 1099 COMPARE(fsub(z26.VnD(), p4.Merging(), z26.VnD(), 0.5), 1100 "fsub z26.d, p4/m, z26.d, #0.5"); 1101 1102 COMPARE_MACRO(Fadd(z18.VnH(), p0.Merging(), z8.VnH(), 1.0), 1103 "movprfx z18.h, p0/m, z8.h\n" 1104 "fadd z18.h, p0/m, z18.h, #1.0"); 1105 COMPARE_MACRO(Fsub(z19.VnH(), p1.Merging(), z9.VnH(), 0.5), 1106 "movprfx z19.h, p1/m, z9.h\n" 1107 "fsub z19.h, p1/m, z19.h, #0.5"); 1108 COMPARE_MACRO(Fsub(z20.VnH(), p2.Merging(), 1.0, z10.VnH()), 1109 "movprfx z20.h, p2/m, z10.h\n" 1110 "fsubr z20.h, p2/m, z20.h, #1.0"); 1111 COMPARE_MACRO(Fmul(z21.VnH(), p3.Merging(), z11.VnH(), 2.0), 1112 "movprfx z21.h, p3/m, z11.h\n" 1113 "fmul z21.h, p3/m, z21.h, #2.0"); 1114 COMPARE_MACRO(Fmin(z22.VnH(), p4.Merging(), z12.VnH(), 0.0), 1115 "movprfx z22.h, p4/m, z12.h\n" 1116 "fmin z22.h, p4/m, z22.h, #0.0"); 1117 COMPARE_MACRO(Fminnm(z22.VnH(), p4.Merging(), z12.VnH(), 0.0), 1118 "movprfx z22.h, p4/m, z12.h\n" 1119 "fminnm z22.h, p4/m, z22.h, #0.0"); 1120 COMPARE_MACRO(Fmax(z23.VnH(), p5.Merging(), z13.VnH(), 1.0), 1121 "movprfx z23.h, p5/m, z13.h\n" 1122 "fmax z23.h, p5/m, z23.h, #1.0"); 1123 COMPARE_MACRO(Fmaxnm(z23.VnH(), p5.Merging(), z13.VnH(), 1.0), 1124 "movprfx z23.h, p5/m, z13.h\n" 1125 "fmaxnm z23.h, p5/m, z23.h, #1.0"); 1126 CLEANUP(); 1127} 1128 1129TEST(sve_fp_arithmetic_predicated_macro_fast_nan_propagation) { 1130#pragma GCC diagnostic push 1131#pragma GCC diagnostic ignored "-Wshadow" 1132 1133 // Shadow the `MacroAssembler` type so that the test macros work without 1134 // modification. 1135 typedef FastNaNPropagationMacroAssembler MacroAssembler; 1136 1137 SETUP(); 1138 1139 COMPARE_MACRO(Fdiv(z8.VnH(), p4.Merging(), z8.VnH(), z4.VnH()), 1140 "fdiv z8.h, p4/m, z8.h, z4.h"); 1141 COMPARE_MACRO(Fdiv(z18.VnS(), p5.Merging(), z18.VnS(), z14.VnS()), 1142 "fdiv z18.s, p5/m, z18.s, z14.s"); 1143 COMPARE_MACRO(Fdiv(z28.VnD(), p6.Merging(), z28.VnD(), z24.VnD()), 1144 "fdiv z28.d, p6/m, z28.d, z24.d"); 1145 COMPARE_MACRO(Fdiv(z17.VnH(), p2.Merging(), z17.VnH(), z17.VnH()), 1146 "fdiv z17.h, p2/m, z17.h, z17.h"); 1147 COMPARE_MACRO(Fdiv(z20.VnS(), p5.Merging(), z23.VnS(), z20.VnS()), 1148 "fdivr z20.s, p5/m, z20.s, z23.s"); 1149 COMPARE_MACRO(Fdiv(z15.VnD(), p6.Merging(), z30.VnD(), z8.VnD()), 1150 "movprfx z15.d, p6/m, z30.d\n" 1151 "fdiv z15.d, p6/m, z15.d, z8.d"); 1152 1153 COMPARE_MACRO(Fmax(z8.VnH(), p4.Merging(), z8.VnH(), z4.VnH()), 1154 "fmax z8.h, p4/m, z8.h, z4.h"); 1155 COMPARE_MACRO(Fmax(z18.VnS(), p5.Merging(), z18.VnS(), z14.VnS()), 1156 "fmax z18.s, p5/m, z18.s, z14.s"); 1157 COMPARE_MACRO(Fmax(z28.VnD(), p6.Merging(), z28.VnD(), z24.VnD()), 1158 "fmax z28.d, p6/m, z28.d, z24.d"); 1159 COMPARE_MACRO(Fmax(z17.VnH(), p2.Merging(), z17.VnH(), z17.VnH()), 1160 "fmax z17.h, p2/m, z17.h, z17.h"); 1161 COMPARE_MACRO(Fmax(z20.VnS(), p5.Merging(), z23.VnS(), z20.VnS()), 1162 "fmax z20.s, p5/m, z20.s, z23.s"); 1163 COMPARE_MACRO(Fmax(z15.VnD(), p6.Merging(), z30.VnD(), z8.VnD()), 1164 "movprfx z15.d, p6/m, z30.d\n" 1165 "fmax z15.d, p6/m, z15.d, z8.d"); 1166 1167 COMPARE_MACRO(Fmin(z8.VnH(), p4.Merging(), z8.VnH(), z4.VnH()), 1168 "fmin z8.h, p4/m, z8.h, z4.h"); 1169 COMPARE_MACRO(Fmin(z18.VnS(), p5.Merging(), z18.VnS(), z14.VnS()), 1170 "fmin z18.s, p5/m, z18.s, z14.s"); 1171 COMPARE_MACRO(Fmin(z28.VnD(), p6.Merging(), z28.VnD(), z24.VnD()), 1172 "fmin z28.d, p6/m, z28.d, z24.d"); 1173 COMPARE_MACRO(Fmin(z17.VnH(), p2.Merging(), z17.VnH(), z17.VnH()), 1174 "fmin z17.h, p2/m, z17.h, z17.h"); 1175 COMPARE_MACRO(Fmin(z20.VnS(), p5.Merging(), z23.VnS(), z20.VnS()), 1176 "fmin z20.s, p5/m, z20.s, z23.s"); 1177 COMPARE_MACRO(Fmin(z15.VnD(), p6.Merging(), z30.VnD(), z8.VnD()), 1178 "movprfx z15.d, p6/m, z30.d\n" 1179 "fmin z15.d, p6/m, z15.d, z8.d"); 1180 1181 CLEANUP(); 1182 1183#pragma GCC diagnostic pop 1184} 1185 1186TEST(sve_fp_arithmetic_predicated_macro_strict_nan_propagation) { 1187#pragma GCC diagnostic push 1188#pragma GCC diagnostic ignored "-Wshadow" 1189 1190 // Shadow the `MacroAssembler` type so that the test macros work without 1191 // modification. 1192 typedef StrictNaNPropagationMacroAssembler MacroAssembler; 1193 1194 SETUP(); 1195 1196 COMPARE_MACRO(Fdiv(z8.VnH(), p4.Merging(), z8.VnH(), z4.VnH()), 1197 "fdiv z8.h, p4/m, z8.h, z4.h"); 1198 COMPARE_MACRO(Fdiv(z18.VnS(), p5.Merging(), z18.VnS(), z14.VnS()), 1199 "fdiv z18.s, p5/m, z18.s, z14.s"); 1200 COMPARE_MACRO(Fdiv(z28.VnD(), p6.Merging(), z28.VnD(), z24.VnD()), 1201 "fdiv z28.d, p6/m, z28.d, z24.d"); 1202 COMPARE_MACRO(Fdiv(z17.VnH(), p2.Merging(), z17.VnH(), z17.VnH()), 1203 "fdiv z17.h, p2/m, z17.h, z17.h"); 1204 COMPARE_MACRO(Fdiv(z20.VnS(), p5.Merging(), z23.VnS(), z20.VnS()), 1205 "fdivr z20.s, p5/m, z20.s, z23.s"); 1206 COMPARE_MACRO(Fdiv(z15.VnD(), p6.Merging(), z30.VnD(), z8.VnD()), 1207 "movprfx z15.d, p6/m, z30.d\n" 1208 "fdiv z15.d, p6/m, z15.d, z8.d"); 1209 1210 COMPARE_MACRO(Fmax(z8.VnH(), p4.Merging(), z8.VnH(), z4.VnH()), 1211 "fmax z8.h, p4/m, z8.h, z4.h"); 1212 COMPARE_MACRO(Fmax(z18.VnS(), p5.Merging(), z18.VnS(), z14.VnS()), 1213 "fmax z18.s, p5/m, z18.s, z14.s"); 1214 COMPARE_MACRO(Fmax(z28.VnD(), p6.Merging(), z28.VnD(), z24.VnD()), 1215 "fmax z28.d, p6/m, z28.d, z24.d"); 1216 COMPARE_MACRO(Fmax(z17.VnH(), p2.Merging(), z17.VnH(), z17.VnH()), 1217 "fmax z17.h, p2/m, z17.h, z17.h"); 1218 COMPARE_MACRO(Fmax(z20.VnS(), p5.Merging(), z23.VnS(), z20.VnS()), 1219 "movprfx z31.s, p5/m, z23.s\n" 1220 "fmax z31.s, p5/m, z31.s, z20.s\n" 1221 "mov z20.d, z31.d"); 1222 COMPARE_MACRO(Fmax(z15.VnD(), p6.Merging(), z30.VnD(), z8.VnD()), 1223 "movprfx z15.d, p6/m, z30.d\n" 1224 "fmax z15.d, p6/m, z15.d, z8.d"); 1225 1226 COMPARE_MACRO(Fmin(z8.VnH(), p4.Merging(), z8.VnH(), z4.VnH()), 1227 "fmin z8.h, p4/m, z8.h, z4.h"); 1228 COMPARE_MACRO(Fmin(z18.VnS(), p5.Merging(), z18.VnS(), z14.VnS()), 1229 "fmin z18.s, p5/m, z18.s, z14.s"); 1230 COMPARE_MACRO(Fmin(z28.VnD(), p6.Merging(), z28.VnD(), z24.VnD()), 1231 "fmin z28.d, p6/m, z28.d, z24.d"); 1232 COMPARE_MACRO(Fmin(z17.VnH(), p2.Merging(), z17.VnH(), z17.VnH()), 1233 "fmin z17.h, p2/m, z17.h, z17.h"); 1234 COMPARE_MACRO(Fmin(z20.VnS(), p5.Merging(), z23.VnS(), z20.VnS()), 1235 "movprfx z31.s, p5/m, z23.s\n" 1236 "fmin z31.s, p5/m, z31.s, z20.s\n" 1237 "mov z20.d, z31.d"); 1238 COMPARE_MACRO(Fmin(z15.VnD(), p6.Merging(), z30.VnD(), z8.VnD()), 1239 "movprfx z15.d, p6/m, z30.d\n" 1240 "fmin z15.d, p6/m, z15.d, z8.d"); 1241 1242 CLEANUP(); 1243 1244#pragma GCC diagnostic pop 1245} 1246 1247TEST(sve_fp_arithmetic_unpredicated) { 1248 SETUP(); 1249 1250 COMPARE(fadd(z5.VnH(), z24.VnH(), z11.VnH()), "fadd z5.h, z24.h, z11.h"); 1251 COMPARE(fadd(z15.VnS(), z14.VnS(), z12.VnS()), "fadd z15.s, z14.s, z12.s"); 1252 COMPARE(fadd(z25.VnD(), z4.VnD(), z13.VnD()), "fadd z25.d, z4.d, z13.d"); 1253 COMPARE(fmul(z9.VnH(), z24.VnH(), z10.VnH()), "fmul z9.h, z24.h, z10.h"); 1254 COMPARE(fmul(z19.VnS(), z14.VnS(), z0.VnS()), "fmul z19.s, z14.s, z0.s"); 1255 COMPARE(fmul(z29.VnD(), z4.VnD(), z20.VnD()), "fmul z29.d, z4.d, z20.d"); 1256 COMPARE(fsub(z4.VnH(), z14.VnH(), z29.VnH()), "fsub z4.h, z14.h, z29.h"); 1257 COMPARE(fsub(z14.VnS(), z24.VnS(), z9.VnS()), "fsub z14.s, z24.s, z9.s"); 1258 COMPARE(fsub(z14.VnD(), z4.VnD(), z19.VnD()), "fsub z14.d, z4.d, z19.d"); 1259 COMPARE(frecps(z14.VnH(), z29.VnH(), z18.VnH()), 1260 "frecps z14.h, z29.h, z18.h"); 1261 COMPARE(frecps(z14.VnS(), z29.VnS(), z18.VnS()), 1262 "frecps z14.s, z29.s, z18.s"); 1263 COMPARE(frecps(z14.VnD(), z29.VnD(), z18.VnD()), 1264 "frecps z14.d, z29.d, z18.d"); 1265 COMPARE(frsqrts(z5.VnH(), z6.VnH(), z28.VnH()), "frsqrts z5.h, z6.h, z28.h"); 1266 COMPARE(frsqrts(z5.VnS(), z6.VnS(), z28.VnS()), "frsqrts z5.s, z6.s, z28.s"); 1267 COMPARE(frsqrts(z5.VnD(), z6.VnD(), z28.VnD()), "frsqrts z5.d, z6.d, z28.d"); 1268 COMPARE(ftsmul(z21.VnH(), z17.VnH(), z24.VnH()), 1269 "ftsmul z21.h, z17.h, z24.h"); 1270 COMPARE(ftsmul(z21.VnS(), z17.VnS(), z24.VnS()), 1271 "ftsmul z21.s, z17.s, z24.s"); 1272 COMPARE(ftsmul(z21.VnD(), z17.VnD(), z24.VnD()), 1273 "ftsmul z21.d, z17.d, z24.d"); 1274 1275 CLEANUP(); 1276} 1277 1278TEST(sve_fp_compare_vectors) { 1279 SETUP(); 1280 1281 1282 COMPARE(facge(p1.VnH(), p3.Zeroing(), z22.VnH(), z25.VnH()), 1283 "facge p1.h, p3/z, z22.h, z25.h"); 1284 COMPARE(facge(p1.VnS(), p3.Zeroing(), z22.VnS(), z25.VnS()), 1285 "facge p1.s, p3/z, z22.s, z25.s"); 1286 COMPARE(facge(p1.VnD(), p3.Zeroing(), z22.VnD(), z25.VnD()), 1287 "facge p1.d, p3/z, z22.d, z25.d"); 1288 COMPARE(facgt(p8.VnH(), p7.Zeroing(), z25.VnH(), z17.VnH()), 1289 "facgt p8.h, p7/z, z25.h, z17.h"); 1290 COMPARE(facgt(p8.VnS(), p7.Zeroing(), z25.VnS(), z17.VnS()), 1291 "facgt p8.s, p7/z, z25.s, z17.s"); 1292 COMPARE(facgt(p8.VnD(), p7.Zeroing(), z25.VnD(), z17.VnD()), 1293 "facgt p8.d, p7/z, z25.d, z17.d"); 1294 COMPARE(fcmeq(p10.VnH(), p2.Zeroing(), z1.VnH(), z17.VnH()), 1295 "fcmeq p10.h, p2/z, z1.h, z17.h"); 1296 COMPARE(fcmeq(p10.VnS(), p2.Zeroing(), z1.VnS(), z17.VnS()), 1297 "fcmeq p10.s, p2/z, z1.s, z17.s"); 1298 COMPARE(fcmeq(p10.VnD(), p2.Zeroing(), z1.VnD(), z17.VnD()), 1299 "fcmeq p10.d, p2/z, z1.d, z17.d"); 1300 COMPARE(fcmge(p0.VnH(), p0.Zeroing(), z1.VnH(), z0.VnH()), 1301 "fcmge p0.h, p0/z, z1.h, z0.h"); 1302 COMPARE(fcmge(p0.VnS(), p0.Zeroing(), z1.VnS(), z0.VnS()), 1303 "fcmge p0.s, p0/z, z1.s, z0.s"); 1304 COMPARE(fcmge(p0.VnD(), p0.Zeroing(), z1.VnD(), z0.VnD()), 1305 "fcmge p0.d, p0/z, z1.d, z0.d"); 1306 COMPARE(fcmgt(p15.VnH(), p5.Zeroing(), z26.VnH(), z5.VnH()), 1307 "fcmgt p15.h, p5/z, z26.h, z5.h"); 1308 COMPARE(fcmgt(p15.VnS(), p5.Zeroing(), z26.VnS(), z5.VnS()), 1309 "fcmgt p15.s, p5/z, z26.s, z5.s"); 1310 COMPARE(fcmgt(p15.VnD(), p5.Zeroing(), z26.VnD(), z5.VnD()), 1311 "fcmgt p15.d, p5/z, z26.d, z5.d"); 1312 COMPARE(fcmne(p2.VnH(), p1.Zeroing(), z9.VnH(), z4.VnH()), 1313 "fcmne p2.h, p1/z, z9.h, z4.h"); 1314 COMPARE(fcmne(p2.VnS(), p1.Zeroing(), z9.VnS(), z4.VnS()), 1315 "fcmne p2.s, p1/z, z9.s, z4.s"); 1316 COMPARE(fcmne(p2.VnD(), p1.Zeroing(), z9.VnD(), z4.VnD()), 1317 "fcmne p2.d, p1/z, z9.d, z4.d"); 1318 COMPARE(fcmuo(p6.VnH(), p4.Zeroing(), z10.VnH(), z21.VnH()), 1319 "fcmuo p6.h, p4/z, z10.h, z21.h"); 1320 COMPARE(fcmuo(p6.VnS(), p4.Zeroing(), z10.VnS(), z21.VnS()), 1321 "fcmuo p6.s, p4/z, z10.s, z21.s"); 1322 COMPARE(fcmuo(p6.VnD(), p4.Zeroing(), z10.VnD(), z21.VnD()), 1323 "fcmuo p6.d, p4/z, z10.d, z21.d"); 1324 1325 COMPARE_MACRO(Facle(p2.VnH(), p0.Zeroing(), z11.VnH(), z15.VnH()), 1326 "facge p2.h, p0/z, z15.h, z11.h"); 1327 COMPARE_MACRO(Facle(p2.VnS(), p0.Zeroing(), z11.VnS(), z15.VnS()), 1328 "facge p2.s, p0/z, z15.s, z11.s"); 1329 COMPARE_MACRO(Facle(p2.VnD(), p0.Zeroing(), z11.VnD(), z15.VnD()), 1330 "facge p2.d, p0/z, z15.d, z11.d"); 1331 COMPARE_MACRO(Faclt(p9.VnH(), p4.Zeroing(), z27.VnH(), z5.VnH()), 1332 "facgt p9.h, p4/z, z5.h, z27.h"); 1333 COMPARE_MACRO(Faclt(p9.VnS(), p4.Zeroing(), z27.VnS(), z5.VnS()), 1334 "facgt p9.s, p4/z, z5.s, z27.s"); 1335 COMPARE_MACRO(Faclt(p9.VnD(), p4.Zeroing(), z27.VnD(), z5.VnD()), 1336 "facgt p9.d, p4/z, z5.d, z27.d"); 1337 1338 COMPARE_MACRO(Fcmle(p12.VnH(), p2.Zeroing(), z21.VnH(), z29.VnH()), 1339 "fcmge p12.h, p2/z, z29.h, z21.h"); 1340 COMPARE_MACRO(Fcmle(p12.VnS(), p2.Zeroing(), z21.VnS(), z29.VnS()), 1341 "fcmge p12.s, p2/z, z29.s, z21.s"); 1342 COMPARE_MACRO(Fcmle(p12.VnD(), p2.Zeroing(), z21.VnD(), z29.VnD()), 1343 "fcmge p12.d, p2/z, z29.d, z21.d"); 1344 COMPARE_MACRO(Fcmlt(p7.VnH(), p3.Zeroing(), z7.VnH(), z14.VnH()), 1345 "fcmgt p7.h, p3/z, z14.h, z7.h"); 1346 COMPARE_MACRO(Fcmlt(p7.VnS(), p3.Zeroing(), z7.VnS(), z14.VnS()), 1347 "fcmgt p7.s, p3/z, z14.s, z7.s"); 1348 COMPARE_MACRO(Fcmlt(p7.VnD(), p3.Zeroing(), z7.VnD(), z14.VnD()), 1349 "fcmgt p7.d, p3/z, z14.d, z7.d"); 1350 1351 CLEANUP(); 1352} 1353 1354TEST(sve_fp_compare_with_zero) { 1355 SETUP(); 1356 1357 COMPARE(fcmeq(p9.VnH(), p1.Zeroing(), z17.VnH(), 0), 1358 "fcmeq p9.h, p1/z, z17.h, #0.0"); 1359 COMPARE(fcmeq(p9.VnS(), p1.Zeroing(), z17.VnS(), 0), 1360 "fcmeq p9.s, p1/z, z17.s, #0.0"); 1361 COMPARE(fcmeq(p9.VnD(), p1.Zeroing(), z17.VnD(), 0), 1362 "fcmeq p9.d, p1/z, z17.d, #0.0"); 1363 COMPARE(fcmge(p13.VnH(), p3.Zeroing(), z13.VnH(), 0), 1364 "fcmge p13.h, p3/z, z13.h, #0.0"); 1365 COMPARE(fcmge(p13.VnS(), p3.Zeroing(), z13.VnS(), 0), 1366 "fcmge p13.s, p3/z, z13.s, #0.0"); 1367 COMPARE(fcmge(p13.VnD(), p3.Zeroing(), z13.VnD(), 0), 1368 "fcmge p13.d, p3/z, z13.d, #0.0"); 1369 COMPARE(fcmgt(p10.VnH(), p2.Zeroing(), z24.VnH(), 0), 1370 "fcmgt p10.h, p2/z, z24.h, #0.0"); 1371 COMPARE(fcmgt(p10.VnS(), p2.Zeroing(), z24.VnS(), 0), 1372 "fcmgt p10.s, p2/z, z24.s, #0.0"); 1373 COMPARE(fcmgt(p10.VnD(), p2.Zeroing(), z24.VnD(), 0), 1374 "fcmgt p10.d, p2/z, z24.d, #0.0"); 1375 COMPARE(fcmle(p4.VnH(), p7.Zeroing(), z1.VnH(), 0), 1376 "fcmle p4.h, p7/z, z1.h, #0.0"); 1377 COMPARE(fcmle(p4.VnS(), p7.Zeroing(), z1.VnS(), 0), 1378 "fcmle p4.s, p7/z, z1.s, #0.0"); 1379 COMPARE(fcmle(p4.VnD(), p7.Zeroing(), z1.VnD(), 0), 1380 "fcmle p4.d, p7/z, z1.d, #0.0"); 1381 COMPARE(fcmlt(p15.VnH(), p7.Zeroing(), z9.VnH(), 0), 1382 "fcmlt p15.h, p7/z, z9.h, #0.0"); 1383 COMPARE(fcmlt(p15.VnS(), p7.Zeroing(), z9.VnS(), 0), 1384 "fcmlt p15.s, p7/z, z9.s, #0.0"); 1385 COMPARE(fcmlt(p15.VnD(), p7.Zeroing(), z9.VnD(), 0), 1386 "fcmlt p15.d, p7/z, z9.d, #0.0"); 1387 COMPARE(fcmne(p14.VnH(), p7.Zeroing(), z28.VnH(), 0), 1388 "fcmne p14.h, p7/z, z28.h, #0.0"); 1389 COMPARE(fcmne(p14.VnS(), p7.Zeroing(), z28.VnS(), 0), 1390 "fcmne p14.s, p7/z, z28.s, #0.0"); 1391 COMPARE(fcmne(p14.VnD(), p7.Zeroing(), z28.VnD(), 0), 1392 "fcmne p14.d, p7/z, z28.d, #0.0"); 1393 1394 CLEANUP(); 1395} 1396 1397TEST(sve_fp_complex_addition) { 1398 SETUP(); 1399 1400 COMPARE(fcadd(z12.VnH(), p5.Merging(), z12.VnH(), z13.VnH(), 90), 1401 "fcadd z12.h, p5/m, z12.h, z13.h, #90"); 1402 COMPARE(fcadd(z12.VnS(), p5.Merging(), z12.VnS(), z13.VnS(), 90), 1403 "fcadd z12.s, p5/m, z12.s, z13.s, #90"); 1404 COMPARE(fcadd(z12.VnD(), p5.Merging(), z12.VnD(), z13.VnD(), 90), 1405 "fcadd z12.d, p5/m, z12.d, z13.d, #90"); 1406 COMPARE(fcadd(z22.VnH(), p0.Merging(), z22.VnH(), z23.VnH(), 270), 1407 "fcadd z22.h, p0/m, z22.h, z23.h, #270"); 1408 COMPARE(fcadd(z22.VnS(), p0.Merging(), z22.VnS(), z23.VnS(), 270), 1409 "fcadd z22.s, p0/m, z22.s, z23.s, #270"); 1410 COMPARE(fcadd(z22.VnD(), p0.Merging(), z22.VnD(), z23.VnD(), 270), 1411 "fcadd z22.d, p0/m, z22.d, z23.d, #270"); 1412 1413 COMPARE_MACRO(Fcadd(z12.VnH(), p5.Merging(), z1.VnH(), z13.VnH(), 90), 1414 "movprfx z12.h, p5/m, z1.h\n" 1415 "fcadd z12.h, p5/m, z12.h, z13.h, #90"); 1416 COMPARE_MACRO(Fcadd(z12.VnH(), p5.Merging(), z1.VnH(), z12.VnH(), 90), 1417 "movprfx z31.h, p5/m, z1.h\n" 1418 "fcadd z31.h, p5/m, z31.h, z12.h, #90\n" 1419 "mov z12.d, z31.d"); 1420 CLEANUP(); 1421} 1422 1423TEST(sve_fp_complex_mul_add) { 1424 SETUP(); 1425 1426 COMPARE_MACRO(Fcmla(z19.VnH(), 1427 p7.Merging(), 1428 z19.VnH(), 1429 z16.VnH(), 1430 z0.VnH(), 1431 90), 1432 "fcmla z19.h, p7/m, z16.h, z0.h, #90"); 1433 COMPARE_MACRO(Fcmla(z19.VnS(), 1434 p7.Merging(), 1435 z19.VnS(), 1436 z16.VnS(), 1437 z0.VnS(), 1438 90), 1439 "fcmla z19.s, p7/m, z16.s, z0.s, #90"); 1440 COMPARE_MACRO(Fcmla(z19.VnD(), 1441 p7.Merging(), 1442 z19.VnD(), 1443 z16.VnD(), 1444 z0.VnD(), 1445 90), 1446 "fcmla z19.d, p7/m, z16.d, z0.d, #90"); 1447 1448 COMPARE_MACRO(Fcmla(z20.VnD(), 1449 p6.Merging(), 1450 z20.VnD(), 1451 z15.VnD(), 1452 z1.VnD(), 1453 0), 1454 "fcmla z20.d, p6/m, z15.d, z1.d, #0"); 1455 COMPARE_MACRO(Fcmla(z20.VnD(), 1456 p6.Merging(), 1457 z20.VnD(), 1458 z15.VnD(), 1459 z1.VnD(), 1460 180), 1461 "fcmla z20.d, p6/m, z15.d, z1.d, #180"); 1462 COMPARE_MACRO(Fcmla(z20.VnD(), 1463 p6.Merging(), 1464 z20.VnD(), 1465 z15.VnD(), 1466 z1.VnD(), 1467 270), 1468 "fcmla z20.d, p6/m, z15.d, z1.d, #270"); 1469 1470 COMPARE_MACRO(Fcmla(z20.VnD(), 1471 p6.Merging(), 1472 z20.VnD(), 1473 z15.VnD(), 1474 z20.VnD(), 1475 270), 1476 "fcmla z20.d, p6/m, z15.d, z20.d, #270"); 1477 COMPARE_MACRO(Fcmla(z20.VnD(), 1478 p6.Merging(), 1479 z21.VnD(), 1480 z15.VnD(), 1481 z1.VnD(), 1482 270), 1483 "movprfx z20.d, p6/m, z21.d\n" 1484 "fcmla z20.d, p6/m, z15.d, z1.d, #270"); 1485 COMPARE_MACRO(Fcmla(z20.VnD(), 1486 p6.Merging(), 1487 z21.VnD(), 1488 z20.VnD(), 1489 z1.VnD(), 1490 270), 1491 "movprfx z31, z21\n" 1492 "fcmla z31.d, p6/m, z20.d, z1.d, #270\n" 1493 "mov z20.d, p6/m, z31.d"); 1494 COMPARE_MACRO(Fcmla(z20.VnD(), 1495 p6.Merging(), 1496 z21.VnD(), 1497 z15.VnD(), 1498 z20.VnD(), 1499 270), 1500 "movprfx z31, z21\n" 1501 "fcmla z31.d, p6/m, z15.d, z20.d, #270\n" 1502 "mov z20.d, p6/m, z31.d"); 1503 COMPARE_MACRO(Fcmla(z20.VnD(), 1504 p6.Merging(), 1505 z21.VnD(), 1506 z20.VnD(), 1507 z20.VnD(), 1508 270), 1509 "movprfx z31, z21\n" 1510 "fcmla z31.d, p6/m, z20.d, z20.d, #270\n" 1511 "mov z20.d, p6/m, z31.d"); 1512 1513 CLEANUP(); 1514} 1515 1516TEST(sve_fp_complex_mul_add_index) { 1517 SETUP(); 1518 1519 COMPARE(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 0, 0), 1520 "fcmla z30.h, z20.h, z3.h[0], #0"); 1521 COMPARE(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 1, 0), 1522 "fcmla z30.h, z20.h, z3.h[1], #0"); 1523 COMPARE(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 2, 90), 1524 "fcmla z30.h, z20.h, z3.h[2], #90"); 1525 COMPARE(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 0, 270), 1526 "fcmla z30.h, z20.h, z3.h[0], #270"); 1527 COMPARE(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 0, 0), 1528 "fcmla z10.s, z20.s, z1.s[0], #0"); 1529 COMPARE(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 1, 0), 1530 "fcmla z10.s, z20.s, z1.s[1], #0"); 1531 COMPARE(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 1, 90), 1532 "fcmla z10.s, z20.s, z1.s[1], #90"); 1533 COMPARE(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 0, 270), 1534 "fcmla z10.s, z20.s, z1.s[0], #270"); 1535 1536 CLEANUP(); 1537} 1538 1539TEST(sve_fp_fast_reduction) { 1540 SETUP(); 1541 1542 COMPARE(faddv(h26, p6, z16.VnH()), "faddv h26, p6, z16.h"); 1543 COMPARE(faddv(s26, p6, z16.VnS()), "faddv s26, p6, z16.s"); 1544 COMPARE(faddv(d26, p6, z16.VnD()), "faddv d26, p6, z16.d"); 1545 COMPARE(fmaxnmv(h28, p1, z0.VnH()), "fmaxnmv h28, p1, z0.h"); 1546 COMPARE(fmaxnmv(s28, p1, z0.VnS()), "fmaxnmv s28, p1, z0.s"); 1547 COMPARE(fmaxnmv(d28, p1, z0.VnD()), "fmaxnmv d28, p1, z0.d"); 1548 COMPARE(fmaxv(h3, p1, z23.VnH()), "fmaxv h3, p1, z23.h"); 1549 COMPARE(fmaxv(s3, p1, z23.VnS()), "fmaxv s3, p1, z23.s"); 1550 COMPARE(fmaxv(d3, p1, z23.VnD()), "fmaxv d3, p1, z23.d"); 1551 COMPARE(fminnmv(h20, p6, z21.VnH()), "fminnmv h20, p6, z21.h"); 1552 COMPARE(fminnmv(s20, p6, z21.VnS()), "fminnmv s20, p6, z21.s"); 1553 COMPARE(fminnmv(d20, p6, z21.VnD()), "fminnmv d20, p6, z21.d"); 1554 COMPARE(fminv(h10, p4, z27.VnH()), "fminv h10, p4, z27.h"); 1555 COMPARE(fminv(s10, p4, z27.VnS()), "fminv s10, p4, z27.s"); 1556 COMPARE(fminv(d10, p4, z27.VnD()), "fminv d10, p4, z27.d"); 1557 1558 CLEANUP(); 1559} 1560 1561TEST(sve_fp_mul_add) { 1562 SETUP(); 1563 1564 COMPARE(fmad(z31.VnH(), p2.Merging(), z8.VnH(), z1.VnH()), 1565 "fmad z31.h, p2/m, z8.h, z1.h"); 1566 COMPARE(fmad(z31.VnS(), p2.Merging(), z8.VnS(), z1.VnS()), 1567 "fmad z31.s, p2/m, z8.s, z1.s"); 1568 COMPARE(fmad(z31.VnD(), p2.Merging(), z8.VnD(), z1.VnD()), 1569 "fmad z31.d, p2/m, z8.d, z1.d"); 1570 COMPARE(fmla(z26.VnH(), p7.Merging(), z19.VnH(), z16.VnH()), 1571 "fmla z26.h, p7/m, z19.h, z16.h"); 1572 COMPARE(fmla(z26.VnS(), p7.Merging(), z19.VnS(), z16.VnS()), 1573 "fmla z26.s, p7/m, z19.s, z16.s"); 1574 COMPARE(fmla(z26.VnD(), p7.Merging(), z19.VnD(), z16.VnD()), 1575 "fmla z26.d, p7/m, z19.d, z16.d"); 1576 COMPARE(fmls(z20.VnH(), p6.Merging(), z28.VnH(), z0.VnH()), 1577 "fmls z20.h, p6/m, z28.h, z0.h"); 1578 COMPARE(fmls(z20.VnS(), p6.Merging(), z28.VnS(), z0.VnS()), 1579 "fmls z20.s, p6/m, z28.s, z0.s"); 1580 COMPARE(fmls(z20.VnD(), p6.Merging(), z28.VnD(), z0.VnD()), 1581 "fmls z20.d, p6/m, z28.d, z0.d"); 1582 COMPARE(fmsb(z3.VnH(), p4.Merging(), z8.VnH(), z22.VnH()), 1583 "fmsb z3.h, p4/m, z8.h, z22.h"); 1584 COMPARE(fmsb(z3.VnS(), p4.Merging(), z8.VnS(), z22.VnS()), 1585 "fmsb z3.s, p4/m, z8.s, z22.s"); 1586 COMPARE(fmsb(z3.VnD(), p4.Merging(), z8.VnD(), z22.VnD()), 1587 "fmsb z3.d, p4/m, z8.d, z22.d"); 1588 COMPARE(fnmad(z0.VnH(), p5.Merging(), z20.VnH(), z17.VnH()), 1589 "fnmad z0.h, p5/m, z20.h, z17.h"); 1590 COMPARE(fnmad(z0.VnS(), p5.Merging(), z20.VnS(), z17.VnS()), 1591 "fnmad z0.s, p5/m, z20.s, z17.s"); 1592 COMPARE(fnmad(z0.VnD(), p5.Merging(), z20.VnD(), z17.VnD()), 1593 "fnmad z0.d, p5/m, z20.d, z17.d"); 1594 COMPARE(fnmla(z31.VnH(), p6.Merging(), z14.VnH(), z8.VnH()), 1595 "fnmla z31.h, p6/m, z14.h, z8.h"); 1596 COMPARE(fnmla(z31.VnS(), p6.Merging(), z14.VnS(), z8.VnS()), 1597 "fnmla z31.s, p6/m, z14.s, z8.s"); 1598 COMPARE(fnmla(z31.VnD(), p6.Merging(), z14.VnD(), z8.VnD()), 1599 "fnmla z31.d, p6/m, z14.d, z8.d"); 1600 COMPARE(fnmls(z2.VnH(), p1.Merging(), z23.VnH(), z15.VnH()), 1601 "fnmls z2.h, p1/m, z23.h, z15.h"); 1602 COMPARE(fnmls(z2.VnS(), p1.Merging(), z23.VnS(), z15.VnS()), 1603 "fnmls z2.s, p1/m, z23.s, z15.s"); 1604 COMPARE(fnmls(z2.VnD(), p1.Merging(), z23.VnD(), z15.VnD()), 1605 "fnmls z2.d, p1/m, z23.d, z15.d"); 1606 COMPARE(fnmsb(z28.VnH(), p3.Merging(), z26.VnH(), z11.VnH()), 1607 "fnmsb z28.h, p3/m, z26.h, z11.h"); 1608 COMPARE(fnmsb(z28.VnS(), p3.Merging(), z26.VnS(), z11.VnS()), 1609 "fnmsb z28.s, p3/m, z26.s, z11.s"); 1610 COMPARE(fnmsb(z28.VnD(), p3.Merging(), z26.VnD(), z11.VnD()), 1611 "fnmsb z28.d, p3/m, z26.d, z11.d"); 1612 1613 CLEANUP(); 1614} 1615 1616TEST(sve_fp_mul_add_macro_strict_nan_propagation) { 1617#pragma GCC diagnostic push 1618#pragma GCC diagnostic ignored "-Wshadow" 1619 1620 // Shadow the `MacroAssembler` type so that the test macros work without 1621 // modification. 1622 typedef StrictNaNPropagationMacroAssembler MacroAssembler; 1623 1624 SETUP(); 1625 1626 COMPARE_MACRO(Fmla(z0.VnH(), p1.Merging(), z0.VnH(), z2.VnH(), z4.VnH()), 1627 "fmla z0.h, p1/m, z2.h, z4.h"); 1628 COMPARE_MACRO(Fmla(z3.VnH(), p2.Merging(), z4.VnH(), z3.VnH(), z5.VnH()), 1629 "fmad z3.h, p2/m, z5.h, z4.h"); 1630 COMPARE_MACRO(Fmla(z4.VnS(), p3.Merging(), z5.VnS(), z6.VnS(), z4.VnS()), 1631 "movprfx z31.s, p3/m, z5.s\n" 1632 "fmla z31.s, p3/m, z6.s, z4.s\n" 1633 "mov z4.d, z31.d"); 1634 COMPARE_MACRO(Fmla(z5.VnD(), p4.Merging(), z6.VnD(), z7.VnD(), z8.VnD()), 1635 "movprfx z5.d, p4/m, z6.d\n" 1636 "fmla z5.d, p4/m, z7.d, z8.d"); 1637 1638 COMPARE_MACRO(Fmls(z0.VnD(), p1.Merging(), z0.VnD(), z2.VnD(), z4.VnD()), 1639 "fmls z0.d, p1/m, z2.d, z4.d"); 1640 COMPARE_MACRO(Fmls(z3.VnS(), p2.Merging(), z4.VnS(), z3.VnS(), z5.VnS()), 1641 "fmsb z3.s, p2/m, z5.s, z4.s"); 1642 COMPARE_MACRO(Fmls(z4.VnH(), p3.Merging(), z5.VnH(), z6.VnH(), z4.VnH()), 1643 "movprfx z31.h, p3/m, z5.h\n" 1644 "fmls z31.h, p3/m, z6.h, z4.h\n" 1645 "mov z4.d, z31.d"); 1646 COMPARE_MACRO(Fmls(z5.VnD(), p4.Merging(), z6.VnD(), z7.VnD(), z8.VnD()), 1647 "movprfx z5.d, p4/m, z6.d\n" 1648 "fmls z5.d, p4/m, z7.d, z8.d"); 1649 1650 COMPARE_MACRO(Fnmla(z10.VnH(), p5.Merging(), z10.VnH(), z12.VnH(), z14.VnH()), 1651 "fnmla z10.h, p5/m, z12.h, z14.h"); 1652 COMPARE_MACRO(Fnmla(z13.VnH(), p6.Merging(), z14.VnH(), z13.VnH(), z15.VnH()), 1653 "fnmad z13.h, p6/m, z15.h, z14.h"); 1654 COMPARE_MACRO(Fnmla(z14.VnS(), p7.Merging(), z15.VnS(), z16.VnS(), z14.VnS()), 1655 "movprfx z31.s, p7/m, z15.s\n" 1656 "fnmla z31.s, p7/m, z16.s, z14.s\n" 1657 "mov z14.d, z31.d"); 1658 COMPARE_MACRO(Fnmla(z15.VnD(), p0.Merging(), z16.VnD(), z17.VnD(), z18.VnD()), 1659 "movprfx z15.d, p0/m, z16.d\n" 1660 "fnmla z15.d, p0/m, z17.d, z18.d"); 1661 1662 COMPARE_MACRO(Fnmls(z10.VnD(), p5.Merging(), z10.VnD(), z12.VnD(), z14.VnD()), 1663 "fnmls z10.d, p5/m, z12.d, z14.d"); 1664 COMPARE_MACRO(Fnmls(z13.VnS(), p6.Merging(), z14.VnS(), z13.VnS(), z15.VnS()), 1665 "fnmsb z13.s, p6/m, z15.s, z14.s"); 1666 COMPARE_MACRO(Fnmls(z14.VnH(), p7.Merging(), z15.VnH(), z16.VnH(), z14.VnH()), 1667 "movprfx z31.h, p7/m, z15.h\n" 1668 "fnmls z31.h, p7/m, z16.h, z14.h\n" 1669 "mov z14.d, z31.d"); 1670 COMPARE_MACRO(Fnmls(z15.VnD(), p0.Merging(), z16.VnD(), z17.VnD(), z18.VnD()), 1671 "movprfx z15.d, p0/m, z16.d\n" 1672 "fnmls z15.d, p0/m, z17.d, z18.d"); 1673 1674 CLEANUP(); 1675 1676#pragma GCC diagnostic pop 1677} 1678 1679TEST(sve_fp_mul_add_macro_fast_nan_propagation) { 1680#pragma GCC diagnostic push 1681#pragma GCC diagnostic ignored "-Wshadow" 1682 1683 // Shadow the `MacroAssembler` type so that the test macros work without 1684 // modification. 1685 typedef FastNaNPropagationMacroAssembler MacroAssembler; 1686 1687 SETUP(); 1688 1689 COMPARE_MACRO(Fmla(z0.VnH(), p1.Merging(), z0.VnH(), z2.VnH(), z4.VnH()), 1690 "fmla z0.h, p1/m, z2.h, z4.h"); 1691 COMPARE_MACRO(Fmla(z3.VnH(), p2.Merging(), z4.VnH(), z3.VnH(), z5.VnH()), 1692 "fmad z3.h, p2/m, z5.h, z4.h"); 1693 COMPARE_MACRO(Fmla(z4.VnS(), p3.Merging(), z5.VnS(), z6.VnS(), z4.VnS()), 1694 "fmad z4.s, p3/m, z6.s, z5.s"); 1695 COMPARE_MACRO(Fmla(z5.VnD(), p4.Merging(), z6.VnD(), z7.VnD(), z8.VnD()), 1696 "movprfx z5.d, p4/m, z6.d\n" 1697 "fmla z5.d, p4/m, z7.d, z8.d"); 1698 1699 COMPARE_MACRO(Fmls(z0.VnD(), p1.Merging(), z0.VnD(), z2.VnD(), z4.VnD()), 1700 "fmls z0.d, p1/m, z2.d, z4.d"); 1701 COMPARE_MACRO(Fmls(z3.VnS(), p2.Merging(), z4.VnS(), z3.VnS(), z5.VnS()), 1702 "fmsb z3.s, p2/m, z5.s, z4.s"); 1703 COMPARE_MACRO(Fmls(z4.VnH(), p3.Merging(), z5.VnH(), z6.VnH(), z4.VnH()), 1704 "fmsb z4.h, p3/m, z6.h, z5.h"); 1705 COMPARE_MACRO(Fmls(z5.VnD(), p4.Merging(), z6.VnD(), z7.VnD(), z8.VnD()), 1706 "movprfx z5.d, p4/m, z6.d\n" 1707 "fmls z5.d, p4/m, z7.d, z8.d"); 1708 1709 COMPARE_MACRO(Fnmla(z10.VnH(), p5.Merging(), z10.VnH(), z12.VnH(), z14.VnH()), 1710 "fnmla z10.h, p5/m, z12.h, z14.h"); 1711 COMPARE_MACRO(Fnmla(z13.VnH(), p6.Merging(), z14.VnH(), z13.VnH(), z15.VnH()), 1712 "fnmad z13.h, p6/m, z15.h, z14.h"); 1713 COMPARE_MACRO(Fnmla(z14.VnS(), p7.Merging(), z15.VnS(), z16.VnS(), z14.VnS()), 1714 "fnmad z14.s, p7/m, z16.s, z15.s"); 1715 COMPARE_MACRO(Fnmla(z15.VnD(), p0.Merging(), z16.VnD(), z17.VnD(), z18.VnD()), 1716 "movprfx z15.d, p0/m, z16.d\n" 1717 "fnmla z15.d, p0/m, z17.d, z18.d"); 1718 1719 COMPARE_MACRO(Fnmls(z10.VnD(), p5.Merging(), z10.VnD(), z12.VnD(), z14.VnD()), 1720 "fnmls z10.d, p5/m, z12.d, z14.d"); 1721 COMPARE_MACRO(Fnmls(z13.VnS(), p6.Merging(), z14.VnS(), z13.VnS(), z15.VnS()), 1722 "fnmsb z13.s, p6/m, z15.s, z14.s"); 1723 COMPARE_MACRO(Fnmls(z14.VnH(), p7.Merging(), z15.VnH(), z16.VnH(), z14.VnH()), 1724 "fnmsb z14.h, p7/m, z16.h, z15.h"); 1725 COMPARE_MACRO(Fnmls(z15.VnD(), p0.Merging(), z16.VnD(), z17.VnD(), z18.VnD()), 1726 "movprfx z15.d, p0/m, z16.d\n" 1727 "fnmls z15.d, p0/m, z17.d, z18.d"); 1728 1729 CLEANUP(); 1730 1731#pragma GCC diagnostic pop 1732} 1733 1734TEST(sve_fp_mul_add_index) { 1735 SETUP(); 1736 1737 COMPARE(fmla(z25.VnD(), z9.VnD(), z1.VnD(), 0), "fmla z25.d, z9.d, z1.d[0]"); 1738 COMPARE(fmla(z25.VnD(), z9.VnD(), z1.VnD(), 1), "fmla z25.d, z9.d, z1.d[1]"); 1739 1740 COMPARE(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 0), "fmla z13.h, z7.h, z7.h[0]"); 1741 COMPARE(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 2), "fmla z13.h, z7.h, z7.h[2]"); 1742 COMPARE(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 5), "fmla z13.h, z7.h, z7.h[5]"); 1743 COMPARE(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 7), "fmla z13.h, z7.h, z7.h[7]"); 1744 1745 COMPARE(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 0), 1746 "fmla z17.s, z27.s, z2.s[0]"); 1747 COMPARE(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 1), 1748 "fmla z17.s, z27.s, z2.s[1]"); 1749 COMPARE(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 2), 1750 "fmla z17.s, z27.s, z2.s[2]"); 1751 COMPARE(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 3), 1752 "fmla z17.s, z27.s, z2.s[3]"); 1753 1754 COMPARE(fmls(z28.VnD(), z2.VnD(), z0.VnD(), 0), "fmls z28.d, z2.d, z0.d[0]"); 1755 COMPARE(fmls(z28.VnD(), z2.VnD(), z0.VnD(), 1), "fmls z28.d, z2.d, z0.d[1]"); 1756 1757 COMPARE(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 1), 1758 "fmls z30.h, z29.h, z7.h[1]"); 1759 COMPARE(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 4), 1760 "fmls z30.h, z29.h, z7.h[4]"); 1761 COMPARE(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 3), 1762 "fmls z30.h, z29.h, z7.h[3]"); 1763 COMPARE(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 6), 1764 "fmls z30.h, z29.h, z7.h[6]"); 1765 1766 COMPARE(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 0), "fmls z30.s, z1.s, z6.s[0]"); 1767 COMPARE(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 1), "fmls z30.s, z1.s, z6.s[1]"); 1768 COMPARE(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 2), "fmls z30.s, z1.s, z6.s[2]"); 1769 COMPARE(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 3), "fmls z30.s, z1.s, z6.s[3]"); 1770 1771 COMPARE_MACRO(Fmla(z10.VnH(), z11.VnH(), z12.VnH(), z4.VnH(), 7), 1772 "movprfx z10, z11\n" 1773 "fmla z10.h, z12.h, z4.h[7]"); 1774 COMPARE_MACRO(Fmla(z10.VnH(), z10.VnH(), z12.VnH(), z4.VnH(), 6), 1775 "fmla z10.h, z12.h, z4.h[6]"); 1776 COMPARE_MACRO(Fmla(z11.VnS(), z12.VnS(), z11.VnS(), z5.VnS(), 3), 1777 "movprfx z31, z12\n" 1778 "fmla z31.s, z11.s, z5.s[3]\n" 1779 "mov z11.d, z31.d"); 1780 COMPARE_MACRO(Fmla(z12.VnD(), z13.VnD(), z14.VnD(), z12.VnD(), 1), 1781 "movprfx z31, z13\n" 1782 "fmla z31.d, z14.d, z12.d[1]\n" 1783 "mov z12.d, z31.d"); 1784 1785 COMPARE_MACRO(Fmls(z10.VnH(), z11.VnH(), z12.VnH(), z4.VnH(), 7), 1786 "movprfx z10, z11\n" 1787 "fmls z10.h, z12.h, z4.h[7]"); 1788 COMPARE_MACRO(Fmls(z10.VnH(), z10.VnH(), z12.VnH(), z4.VnH(), 6), 1789 "fmls z10.h, z12.h, z4.h[6]"); 1790 COMPARE_MACRO(Fmls(z11.VnS(), z12.VnS(), z11.VnS(), z5.VnS(), 3), 1791 "movprfx z31, z12\n" 1792 "fmls z31.s, z11.s, z5.s[3]\n" 1793 "mov z11.d, z31.d"); 1794 COMPARE_MACRO(Fmls(z12.VnD(), z13.VnD(), z14.VnD(), z12.VnD(), 1), 1795 "movprfx z31, z13\n" 1796 "fmls z31.d, z14.d, z12.d[1]\n" 1797 "mov z12.d, z31.d"); 1798 1799 CLEANUP(); 1800} 1801 1802TEST(sve_fp_mul_index) { 1803 SETUP(); 1804 1805 COMPARE(fmul(z12.VnD(), z3.VnD(), z4.VnD(), 0), "fmul z12.d, z3.d, z4.d[0]"); 1806 COMPARE(fmul(z12.VnD(), z3.VnD(), z4.VnD(), 1), "fmul z12.d, z3.d, z4.d[1]"); 1807 1808 COMPARE(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 0), "fmul z22.h, z2.h, z3.h[0]"); 1809 COMPARE(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 3), "fmul z22.h, z2.h, z3.h[3]"); 1810 COMPARE(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 4), "fmul z22.h, z2.h, z3.h[4]"); 1811 COMPARE(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 7), "fmul z22.h, z2.h, z3.h[7]"); 1812 1813 COMPARE(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 0), "fmul z2.s, z8.s, z7.s[0]"); 1814 COMPARE(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 1), "fmul z2.s, z8.s, z7.s[1]"); 1815 COMPARE(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 2), "fmul z2.s, z8.s, z7.s[2]"); 1816 COMPARE(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 3), "fmul z2.s, z8.s, z7.s[3]"); 1817 1818 CLEANUP(); 1819} 1820 1821TEST(sve_fp_unary_op_predicated) { 1822 SETUP(); 1823 1824 COMPARE(fcvtzs(z29.VnS(), p5.Merging(), z8.VnD()), 1825 "fcvtzs z29.s, p5/m, z8.d"); 1826 COMPARE(fcvtzs(z30.VnD(), p5.Merging(), z8.VnD()), 1827 "fcvtzs z30.d, p5/m, z8.d"); 1828 COMPARE(fcvtzs(z14.VnH(), p1.Merging(), z29.VnH()), 1829 "fcvtzs z14.h, p1/m, z29.h"); 1830 COMPARE(fcvtzs(z11.VnS(), p3.Merging(), z16.VnH()), 1831 "fcvtzs z11.s, p3/m, z16.h"); 1832 COMPARE(fcvtzs(z4.VnD(), p7.Merging(), z4.VnH()), "fcvtzs z4.d, p7/m, z4.h"); 1833 COMPARE(fcvtzs(z24.VnS(), p1.Merging(), z4.VnS()), 1834 "fcvtzs z24.s, p1/m, z4.s"); 1835 COMPARE(fcvtzs(z25.VnD(), p4.Merging(), z24.VnS()), 1836 "fcvtzs z25.d, p4/m, z24.s"); 1837 COMPARE(fcvtzu(z16.VnS(), p7.Merging(), z14.VnD()), 1838 "fcvtzu z16.s, p7/m, z14.d"); 1839 COMPARE(fcvtzu(z31.VnD(), p1.Merging(), z16.VnD()), 1840 "fcvtzu z31.d, p1/m, z16.d"); 1841 COMPARE(fcvtzu(z12.VnH(), p2.Merging(), z27.VnH()), 1842 "fcvtzu z12.h, p2/m, z27.h"); 1843 COMPARE(fcvtzu(z26.VnS(), p6.Merging(), z29.VnH()), 1844 "fcvtzu z26.s, p6/m, z29.h"); 1845 COMPARE(fcvtzu(z29.VnD(), p5.Merging(), z27.VnH()), 1846 "fcvtzu z29.d, p5/m, z27.h"); 1847 COMPARE(fcvtzu(z13.VnS(), p2.Merging(), z17.VnS()), 1848 "fcvtzu z13.s, p2/m, z17.s"); 1849 COMPARE(fcvtzu(z25.VnD(), p7.Merging(), z28.VnS()), 1850 "fcvtzu z25.d, p7/m, z28.s"); 1851 COMPARE(scvtf(z16.VnH(), p6.Merging(), z5.VnH()), "scvtf z16.h, p6/m, z5.h"); 1852 COMPARE(scvtf(z31.VnD(), p5.Merging(), z26.VnS()), 1853 "scvtf z31.d, p5/m, z26.s"); 1854 COMPARE(scvtf(z0.VnH(), p7.Merging(), z0.VnS()), "scvtf z0.h, p7/m, z0.s"); 1855 COMPARE(scvtf(z12.VnS(), p7.Merging(), z0.VnS()), "scvtf z12.s, p7/m, z0.s"); 1856 COMPARE(scvtf(z17.VnD(), p1.Merging(), z17.VnD()), 1857 "scvtf z17.d, p1/m, z17.d"); 1858 COMPARE(scvtf(z2.VnH(), p0.Merging(), z9.VnD()), "scvtf z2.h, p0/m, z9.d"); 1859 COMPARE(scvtf(z26.VnS(), p5.Merging(), z4.VnD()), "scvtf z26.s, p5/m, z4.d"); 1860 COMPARE(ucvtf(z27.VnH(), p4.Merging(), z25.VnH()), 1861 "ucvtf z27.h, p4/m, z25.h"); 1862 COMPARE(ucvtf(z3.VnD(), p4.Merging(), z3.VnS()), "ucvtf z3.d, p4/m, z3.s"); 1863 COMPARE(ucvtf(z24.VnH(), p2.Merging(), z29.VnS()), 1864 "ucvtf z24.h, p2/m, z29.s"); 1865 COMPARE(ucvtf(z29.VnS(), p5.Merging(), z14.VnS()), 1866 "ucvtf z29.s, p5/m, z14.s"); 1867 COMPARE(ucvtf(z7.VnD(), p2.Merging(), z14.VnD()), "ucvtf z7.d, p2/m, z14.d"); 1868 COMPARE(ucvtf(z20.VnH(), p2.Merging(), z14.VnD()), 1869 "ucvtf z20.h, p2/m, z14.d"); 1870 COMPARE(ucvtf(z26.VnS(), p1.Merging(), z18.VnD()), 1871 "ucvtf z26.s, p1/m, z18.d"); 1872 COMPARE(frinta(z11.VnH(), p0.Merging(), z3.VnH()), 1873 "frinta z11.h, p0/m, z3.h"); 1874 COMPARE(frinta(z11.VnS(), p0.Merging(), z3.VnS()), 1875 "frinta z11.s, p0/m, z3.s"); 1876 COMPARE(frinta(z11.VnD(), p0.Merging(), z3.VnD()), 1877 "frinta z11.d, p0/m, z3.d"); 1878 COMPARE(frinti(z17.VnH(), p0.Merging(), z16.VnH()), 1879 "frinti z17.h, p0/m, z16.h"); 1880 COMPARE(frinti(z17.VnS(), p0.Merging(), z16.VnS()), 1881 "frinti z17.s, p0/m, z16.s"); 1882 COMPARE(frinti(z17.VnD(), p0.Merging(), z16.VnD()), 1883 "frinti z17.d, p0/m, z16.d"); 1884 COMPARE(frintm(z2.VnH(), p7.Merging(), z15.VnH()), 1885 "frintm z2.h, p7/m, z15.h"); 1886 COMPARE(frintm(z2.VnS(), p7.Merging(), z15.VnS()), 1887 "frintm z2.s, p7/m, z15.s"); 1888 COMPARE(frintm(z2.VnD(), p7.Merging(), z15.VnD()), 1889 "frintm z2.d, p7/m, z15.d"); 1890 COMPARE(frintn(z14.VnH(), p5.Merging(), z18.VnH()), 1891 "frintn z14.h, p5/m, z18.h"); 1892 COMPARE(frintn(z14.VnS(), p5.Merging(), z18.VnS()), 1893 "frintn z14.s, p5/m, z18.s"); 1894 COMPARE(frintn(z14.VnD(), p5.Merging(), z18.VnD()), 1895 "frintn z14.d, p5/m, z18.d"); 1896 COMPARE(frintp(z20.VnH(), p6.Merging(), z23.VnH()), 1897 "frintp z20.h, p6/m, z23.h"); 1898 COMPARE(frintp(z20.VnS(), p6.Merging(), z23.VnS()), 1899 "frintp z20.s, p6/m, z23.s"); 1900 COMPARE(frintp(z20.VnD(), p6.Merging(), z23.VnD()), 1901 "frintp z20.d, p6/m, z23.d"); 1902 COMPARE(frintx(z2.VnH(), p6.Merging(), z18.VnH()), 1903 "frintx z2.h, p6/m, z18.h"); 1904 COMPARE(frintx(z2.VnS(), p6.Merging(), z18.VnS()), 1905 "frintx z2.s, p6/m, z18.s"); 1906 COMPARE(frintx(z2.VnD(), p6.Merging(), z18.VnD()), 1907 "frintx z2.d, p6/m, z18.d"); 1908 COMPARE(frintz(z26.VnH(), p7.Merging(), z25.VnH()), 1909 "frintz z26.h, p7/m, z25.h"); 1910 COMPARE(frintz(z26.VnS(), p7.Merging(), z25.VnS()), 1911 "frintz z26.s, p7/m, z25.s"); 1912 COMPARE(frintz(z26.VnD(), p7.Merging(), z25.VnD()), 1913 "frintz z26.d, p7/m, z25.d"); 1914 COMPARE(fcvt(z5.VnH(), p2.Merging(), z11.VnD()), "fcvt z5.h, p2/m, z11.d"); 1915 COMPARE(fcvt(z30.VnS(), p7.Merging(), z0.VnD()), "fcvt z30.s, p7/m, z0.d"); 1916 COMPARE(fcvt(z10.VnD(), p0.Merging(), z17.VnH()), "fcvt z10.d, p0/m, z17.h"); 1917 COMPARE(fcvt(z28.VnS(), p3.Merging(), z27.VnH()), "fcvt z28.s, p3/m, z27.h"); 1918 COMPARE(fcvt(z9.VnD(), p7.Merging(), z0.VnS()), "fcvt z9.d, p7/m, z0.s"); 1919 COMPARE(fcvt(z27.VnH(), p7.Merging(), z9.VnS()), "fcvt z27.h, p7/m, z9.s"); 1920 COMPARE(frecpx(z16.VnH(), p1.Merging(), z29.VnH()), 1921 "frecpx z16.h, p1/m, z29.h"); 1922 COMPARE(frecpx(z16.VnS(), p1.Merging(), z29.VnS()), 1923 "frecpx z16.s, p1/m, z29.s"); 1924 COMPARE(frecpx(z16.VnD(), p1.Merging(), z29.VnD()), 1925 "frecpx z16.d, p1/m, z29.d"); 1926 COMPARE(fsqrt(z30.VnH(), p3.Merging(), z13.VnH()), 1927 "fsqrt z30.h, p3/m, z13.h"); 1928 COMPARE(fsqrt(z30.VnS(), p3.Merging(), z13.VnS()), 1929 "fsqrt z30.s, p3/m, z13.s"); 1930 COMPARE(fsqrt(z30.VnD(), p3.Merging(), z13.VnD()), 1931 "fsqrt z30.d, p3/m, z13.d"); 1932 1933 CLEANUP(); 1934} 1935 1936TEST(sve_fp_unary_op_predicated_macro) { 1937 SETUP(); 1938 1939 COMPARE_MACRO(Fcvt(z5.VnH(), p2.Zeroing(), z11.VnD()), 1940 "movprfx z5.d, p2/z, z11.d\n" 1941 "fcvt z5.h, p2/m, z11.d"); 1942 COMPARE_MACRO(Fcvt(z30.VnS(), p7.Zeroing(), z0.VnD()), 1943 "movprfx z30.d, p7/z, z0.d\n" 1944 "fcvt z30.s, p7/m, z0.d"); 1945 COMPARE_MACRO(Fcvt(z10.VnD(), p0.Zeroing(), z17.VnH()), 1946 "movprfx z10.d, p0/z, z17.d\n" 1947 "fcvt z10.d, p0/m, z17.h"); 1948 COMPARE_MACRO(Fcvt(z28.VnS(), p3.Zeroing(), z27.VnH()), 1949 "movprfx z28.s, p3/z, z27.s\n" 1950 "fcvt z28.s, p3/m, z27.h"); 1951 COMPARE_MACRO(Fcvt(z9.VnD(), p7.Zeroing(), z0.VnS()), 1952 "movprfx z9.d, p7/z, z0.d\n" 1953 "fcvt z9.d, p7/m, z0.s"); 1954 COMPARE_MACRO(Fcvt(z27.VnH(), p7.Zeroing(), z9.VnS()), 1955 "movprfx z27.s, p7/z, z9.s\n" 1956 "fcvt z27.h, p7/m, z9.s"); 1957 COMPARE_MACRO(Frecpx(z16.VnH(), p1.Zeroing(), z29.VnH()), 1958 "movprfx z16.h, p1/z, z29.h\n" 1959 "frecpx z16.h, p1/m, z29.h"); 1960 COMPARE_MACRO(Frecpx(z17.VnS(), p2.Zeroing(), z30.VnS()), 1961 "movprfx z17.s, p2/z, z30.s\n" 1962 "frecpx z17.s, p2/m, z30.s"); 1963 COMPARE_MACRO(Frecpx(z18.VnD(), p3.Zeroing(), z31.VnD()), 1964 "movprfx z18.d, p3/z, z31.d\n" 1965 "frecpx z18.d, p3/m, z31.d"); 1966 COMPARE_MACRO(Frinta(z6.VnD(), p3.Zeroing(), z12.VnD()), 1967 "movprfx z6.d, p3/z, z12.d\n" 1968 "frinta z6.d, p3/m, z12.d"); 1969 COMPARE_MACRO(Frinti(z7.VnS(), p3.Zeroing(), z11.VnS()), 1970 "movprfx z7.s, p3/z, z11.s\n" 1971 "frinti z7.s, p3/m, z11.s"); 1972 COMPARE_MACRO(Frintm(z8.VnH(), p3.Zeroing(), z10.VnH()), 1973 "movprfx z8.h, p3/z, z10.h\n" 1974 "frintm z8.h, p3/m, z10.h"); 1975 COMPARE_MACRO(Frintn(z9.VnD(), p3.Zeroing(), z9.VnD()), 1976 "movprfx z9.d, p3/z, z9.d\n" 1977 "frintn z9.d, p3/m, z9.d"); 1978 COMPARE_MACRO(Frintp(z10.VnS(), p3.Zeroing(), z8.VnS()), 1979 "movprfx z10.s, p3/z, z8.s\n" 1980 "frintp z10.s, p3/m, z8.s"); 1981 COMPARE_MACRO(Frintx(z11.VnH(), p3.Zeroing(), z7.VnH()), 1982 "movprfx z11.h, p3/z, z7.h\n" 1983 "frintx z11.h, p3/m, z7.h"); 1984 COMPARE_MACRO(Frintz(z12.VnD(), p3.Zeroing(), z6.VnD()), 1985 "movprfx z12.d, p3/z, z6.d\n" 1986 "frintz z12.d, p3/m, z6.d"); 1987 COMPARE_MACRO(Fsqrt(z30.VnH(), p3.Zeroing(), z13.VnH()), 1988 "movprfx z30.h, p3/z, z13.h\n" 1989 "fsqrt z30.h, p3/m, z13.h"); 1990 COMPARE_MACRO(Fsqrt(z29.VnS(), p3.Zeroing(), z14.VnS()), 1991 "movprfx z29.s, p3/z, z14.s\n" 1992 "fsqrt z29.s, p3/m, z14.s"); 1993 COMPARE_MACRO(Fsqrt(z28.VnD(), p3.Zeroing(), z15.VnD()), 1994 "movprfx z28.d, p3/z, z15.d\n" 1995 "fsqrt z28.d, p3/m, z15.d"); 1996 1997 CLEANUP(); 1998} 1999 2000TEST(sve_fp_unary_op_unpredicated) { 2001 SETUP(); 2002 2003 COMPARE(frecpe(z0.VnH(), z2.VnH()), "frecpe z0.h, z2.h"); 2004 COMPARE(frecpe(z0.VnS(), z2.VnS()), "frecpe z0.s, z2.s"); 2005 COMPARE(frecpe(z0.VnD(), z2.VnD()), "frecpe z0.d, z2.d"); 2006 COMPARE(frsqrte(z27.VnH(), z14.VnH()), "frsqrte z27.h, z14.h"); 2007 COMPARE(frsqrte(z27.VnS(), z14.VnS()), "frsqrte z27.s, z14.s"); 2008 COMPARE(frsqrte(z27.VnD(), z14.VnD()), "frsqrte z27.d, z14.d"); 2009 2010 CLEANUP(); 2011} 2012 2013TEST(sve_inc_dec_by_predicate_count) { 2014 SETUP(); 2015 2016 COMPARE(decp(x17, p0.VnB()), "decp x17, p0.b"); 2017 COMPARE(decp(x17, p0.VnH()), "decp x17, p0.h"); 2018 COMPARE(decp(x17, p0.VnS()), "decp x17, p0.s"); 2019 COMPARE(decp(x17, p0.VnD()), "decp x17, p0.d"); 2020 COMPARE(decp(z2.VnH(), p11), "decp z2.h, p11"); 2021 COMPARE(decp(z2.VnS(), p11), "decp z2.s, p11"); 2022 COMPARE(decp(z2.VnD(), p11), "decp z2.d, p11"); 2023 COMPARE(incp(x26, p8.VnB()), "incp x26, p8.b"); 2024 COMPARE(incp(x26, p8.VnH()), "incp x26, p8.h"); 2025 COMPARE(incp(x26, p8.VnS()), "incp x26, p8.s"); 2026 COMPARE(incp(x26, p8.VnD()), "incp x26, p8.d"); 2027 COMPARE(incp(z27.VnH(), p9), "incp z27.h, p9"); 2028 COMPARE(incp(z27.VnS(), p9), "incp z27.s, p9"); 2029 COMPARE(incp(z27.VnD(), p9), "incp z27.d, p9"); 2030 COMPARE(sqdecp(x12, p7.VnB(), w12), "sqdecp x12, p7.b, w12"); 2031 COMPARE(sqdecp(x12, p7.VnH(), w12), "sqdecp x12, p7.h, w12"); 2032 COMPARE(sqdecp(x12, p7.VnS(), w12), "sqdecp x12, p7.s, w12"); 2033 COMPARE(sqdecp(x12, p7.VnD(), w12), "sqdecp x12, p7.d, w12"); 2034 COMPARE(sqdecp(x30, p5.VnB()), "sqdecp x30, p5.b"); 2035 COMPARE(sqdecp(x30, p5.VnH()), "sqdecp x30, p5.h"); 2036 COMPARE(sqdecp(x30, p5.VnS()), "sqdecp x30, p5.s"); 2037 COMPARE(sqdecp(x30, p5.VnD()), "sqdecp x30, p5.d"); 2038 COMPARE(sqdecp(z13.VnH(), p1), "sqdecp z13.h, p1"); 2039 COMPARE(sqdecp(z13.VnS(), p1), "sqdecp z13.s, p1"); 2040 COMPARE(sqdecp(z13.VnD(), p1), "sqdecp z13.d, p1"); 2041 COMPARE(sqincp(x26, p5.VnB(), w26), "sqincp x26, p5.b, w26"); 2042 COMPARE(sqincp(x26, p5.VnH(), w26), "sqincp x26, p5.h, w26"); 2043 COMPARE(sqincp(x26, p5.VnS(), w26), "sqincp x26, p5.s, w26"); 2044 COMPARE(sqincp(x26, p5.VnD(), w26), "sqincp x26, p5.d, w26"); 2045 COMPARE(sqincp(x5, p15.VnB()), "sqincp x5, p15.b"); 2046 COMPARE(sqincp(x5, p15.VnH()), "sqincp x5, p15.h"); 2047 COMPARE(sqincp(x5, p15.VnS()), "sqincp x5, p15.s"); 2048 COMPARE(sqincp(x5, p15.VnD()), "sqincp x5, p15.d"); 2049 COMPARE(sqincp(z14.VnH(), p4), "sqincp z14.h, p4"); 2050 COMPARE(sqincp(z14.VnS(), p4), "sqincp z14.s, p4"); 2051 COMPARE(sqincp(z14.VnD(), p4), "sqincp z14.d, p4"); 2052 COMPARE(uqdecp(w3, p13.VnB()), "uqdecp w3, p13.b"); 2053 COMPARE(uqdecp(w3, p13.VnH()), "uqdecp w3, p13.h"); 2054 COMPARE(uqdecp(w3, p13.VnS()), "uqdecp w3, p13.s"); 2055 COMPARE(uqdecp(w3, p13.VnD()), "uqdecp w3, p13.d"); 2056 COMPARE(uqdecp(x19, p0.VnB()), "uqdecp x19, p0.b"); 2057 COMPARE(uqdecp(x19, p0.VnH()), "uqdecp x19, p0.h"); 2058 COMPARE(uqdecp(x19, p0.VnS()), "uqdecp x19, p0.s"); 2059 COMPARE(uqdecp(x19, p0.VnD()), "uqdecp x19, p0.d"); 2060 COMPARE(uqdecp(z15.VnH(), p9), "uqdecp z15.h, p9"); 2061 COMPARE(uqdecp(z15.VnS(), p9), "uqdecp z15.s, p9"); 2062 COMPARE(uqdecp(z15.VnD(), p9), "uqdecp z15.d, p9"); 2063 COMPARE(uqincp(w18, p1.VnB()), "uqincp w18, p1.b"); 2064 COMPARE(uqincp(w18, p1.VnH()), "uqincp w18, p1.h"); 2065 COMPARE(uqincp(w18, p1.VnS()), "uqincp w18, p1.s"); 2066 COMPARE(uqincp(w18, p1.VnD()), "uqincp w18, p1.d"); 2067 COMPARE(uqincp(x17, p15.VnB()), "uqincp x17, p15.b"); 2068 COMPARE(uqincp(x17, p15.VnH()), "uqincp x17, p15.h"); 2069 COMPARE(uqincp(x17, p15.VnS()), "uqincp x17, p15.s"); 2070 COMPARE(uqincp(x17, p15.VnD()), "uqincp x17, p15.d"); 2071 COMPARE(uqincp(z4.VnH(), p3), "uqincp z4.h, p3"); 2072 COMPARE(uqincp(z4.VnS(), p3), "uqincp z4.s, p3"); 2073 COMPARE(uqincp(z4.VnD(), p3), "uqincp z4.d, p3"); 2074 2075 CLEANUP(); 2076} 2077 2078TEST(sve_inc_dec_by_predicate_count_macro) { 2079 SETUP(); 2080 2081 // The MacroAssembler automatically generates movprfx where it can. 2082 COMPARE_MACRO(Decp(z0.VnD(), p1), "decp z0.d, p1"); 2083 COMPARE_MACRO(Decp(z2.VnS(), p3, z2.VnS()), "decp z2.s, p3"); 2084 COMPARE_MACRO(Decp(z3.VnS(), p3, z3.VnS()), "decp z3.s, p3"); 2085 COMPARE_MACRO(Decp(z4.VnH(), p5, z6.VnH()), 2086 "movprfx z4, z6\n" 2087 "decp z4.h, p5"); 2088 COMPARE_MACRO(Incp(z7.VnD(), p8), "incp z7.d, p8"); 2089 COMPARE_MACRO(Incp(z9.VnS(), p10, z9.VnS()), "incp z9.s, p10"); 2090 COMPARE_MACRO(Incp(z10.VnS(), p10, z10.VnS()), "incp z10.s, p10"); 2091 COMPARE_MACRO(Incp(z10.VnH(), p11, z12.VnH()), 2092 "movprfx z10, z12\n" 2093 "incp z10.h, p11"); 2094 COMPARE_MACRO(Sqdecp(z0.VnD(), p1), "sqdecp z0.d, p1"); 2095 COMPARE_MACRO(Sqdecp(z2.VnS(), p3, z2.VnS()), "sqdecp z2.s, p3"); 2096 COMPARE_MACRO(Sqdecp(z3.VnS(), p3, z3.VnS()), "sqdecp z3.s, p3"); 2097 COMPARE_MACRO(Sqdecp(z4.VnH(), p5, z6.VnH()), 2098 "movprfx z4, z6\n" 2099 "sqdecp z4.h, p5"); 2100 COMPARE_MACRO(Sqincp(z7.VnD(), p8), "sqincp z7.d, p8"); 2101 COMPARE_MACRO(Sqincp(z9.VnS(), p10, z9.VnS()), "sqincp z9.s, p10"); 2102 COMPARE_MACRO(Sqincp(z10.VnS(), p10, z10.VnS()), "sqincp z10.s, p10"); 2103 COMPARE_MACRO(Sqincp(z10.VnH(), p11, z12.VnH()), 2104 "movprfx z10, z12\n" 2105 "sqincp z10.h, p11"); 2106 COMPARE_MACRO(Uqdecp(z0.VnD(), p1), "uqdecp z0.d, p1"); 2107 COMPARE_MACRO(Uqdecp(z2.VnS(), p3, z2.VnS()), "uqdecp z2.s, p3"); 2108 COMPARE_MACRO(Uqdecp(z3.VnS(), p3, z3.VnS()), "uqdecp z3.s, p3"); 2109 COMPARE_MACRO(Uqdecp(z4.VnH(), p5, z6.VnH()), 2110 "movprfx z4, z6\n" 2111 "uqdecp z4.h, p5"); 2112 COMPARE_MACRO(Uqincp(z7.VnD(), p8), "uqincp z7.d, p8"); 2113 COMPARE_MACRO(Uqincp(z9.VnS(), p10, z9.VnS()), "uqincp z9.s, p10"); 2114 COMPARE_MACRO(Uqincp(z10.VnS(), p10, z10.VnS()), "uqincp z10.s, p10"); 2115 COMPARE_MACRO(Uqincp(z10.VnH(), p11, z12.VnH()), 2116 "movprfx z10, z12\n" 2117 "uqincp z10.h, p11"); 2118 2119 // Sqdecp cannot write into a W register, but Uqdecp can. 2120 COMPARE_MACRO(Uqdecp(w6, p7.VnD()), "uqdecp w6, p7.d"); 2121 COMPARE_MACRO(Uqdecp(x10, p11.VnH()), "uqdecp x10, p11.h"); 2122 COMPARE_MACRO(Uqdecp(x12, p13.VnS()), "uqdecp x12, p13.s"); 2123 COMPARE_MACRO(Uqdecp(w14, p15.VnD()), "uqdecp w14, p15.d"); 2124 2125 CLEANUP(); 2126} 2127 2128TEST(sve_index_generation) { 2129 SETUP(); 2130 2131 COMPARE(index(z21.VnB(), -16, 15), "index z21.b, #-16, #15"); 2132 COMPARE(index(z22.VnB(), -2, 1), "index z22.b, #-2, #1"); 2133 COMPARE(index(z23.VnH(), -1, 0), "index z23.h, #-1, #0"); 2134 COMPARE(index(z24.VnS(), 0, -1), "index z24.s, #0, #-1"); 2135 COMPARE(index(z25.VnD(), 1, -2), "index z25.d, #1, #-2"); 2136 COMPARE(index(z26.VnB(), 15, -16), "index z26.b, #15, #-16"); 2137 COMPARE(index(z23.VnB(), -16, w8), "index z23.b, #-16, w8"); 2138 COMPARE(index(z24.VnH(), -1, x9), "index z24.h, #-1, w9"); 2139 COMPARE(index(z25.VnS(), 0, w10), "index z25.s, #0, w10"); 2140 COMPARE(index(z26.VnD(), 15, x11), "index z26.d, #15, x11"); 2141 COMPARE(index(z14.VnB(), w15, 15), "index z14.b, w15, #15"); 2142 COMPARE(index(z15.VnH(), x16, 1), "index z15.h, w16, #1"); 2143 COMPARE(index(z16.VnS(), w17, 0), "index z16.s, w17, #0"); 2144 COMPARE(index(z17.VnD(), x18, -16), "index z17.d, x18, #-16"); 2145 COMPARE(index(z20.VnB(), w23, w21), "index z20.b, w23, w21"); 2146 COMPARE(index(z21.VnH(), x24, w22), "index z21.h, w24, w22"); 2147 COMPARE(index(z22.VnS(), w25, x23), "index z22.s, w25, w23"); 2148 COMPARE(index(z23.VnD(), x26, x24), "index z23.d, x26, x24"); 2149 2150 // Simple pass-through macros. 2151 COMPARE_MACRO(Index(z21.VnB(), -16, 15), "index z21.b, #-16, #15"); 2152 COMPARE_MACRO(Index(z22.VnB(), -2, 1), "index z22.b, #-2, #1"); 2153 COMPARE_MACRO(Index(z23.VnH(), -1, 0), "index z23.h, #-1, #0"); 2154 COMPARE_MACRO(Index(z24.VnS(), 0, -1), "index z24.s, #0, #-1"); 2155 COMPARE_MACRO(Index(z25.VnD(), 1, -2), "index z25.d, #1, #-2"); 2156 COMPARE_MACRO(Index(z26.VnB(), 15, -16), "index z26.b, #15, #-16"); 2157 COMPARE_MACRO(Index(z23.VnB(), -16, w8), "index z23.b, #-16, w8"); 2158 COMPARE_MACRO(Index(z24.VnH(), -1, x9), "index z24.h, #-1, w9"); 2159 COMPARE_MACRO(Index(z25.VnS(), 0, w10), "index z25.s, #0, w10"); 2160 COMPARE_MACRO(Index(z26.VnD(), 15, x11), "index z26.d, #15, x11"); 2161 COMPARE_MACRO(Index(z14.VnB(), w15, 15), "index z14.b, w15, #15"); 2162 COMPARE_MACRO(Index(z15.VnH(), x16, 1), "index z15.h, w16, #1"); 2163 COMPARE_MACRO(Index(z16.VnS(), w17, 0), "index z16.s, w17, #0"); 2164 COMPARE_MACRO(Index(z17.VnD(), x18, -16), "index z17.d, x18, #-16"); 2165 COMPARE_MACRO(Index(z20.VnB(), w23, w21), "index z20.b, w23, w21"); 2166 COMPARE_MACRO(Index(z21.VnH(), x24, w22), "index z21.h, w24, w22"); 2167 COMPARE_MACRO(Index(z22.VnS(), w25, x23), "index z22.s, w25, w23"); 2168 COMPARE_MACRO(Index(z23.VnD(), x26, x24), "index z23.d, x26, x24"); 2169 2170 // Argument synthesis. 2171 COMPARE_MACRO(Index(z0.VnB(), 16, -17), 2172 "mov w16, #0x10\n" 2173 "mov w17, #0xffffffef\n" 2174 "index z0.b, w16, w17"); 2175 COMPARE_MACRO(Index(z1.VnH(), x2, -17), 2176 "mov w16, #0xffffffef\n" 2177 "index z1.h, w2, w16"); 2178 COMPARE_MACRO(Index(z3.VnS(), 16, w4), 2179 "mov w16, #0x10\n" 2180 "index z3.s, w16, w4"); 2181 COMPARE_MACRO(Index(z4.VnD(), -17, 16), 2182 "mov x16, #0xffffffffffffffef\n" 2183 "mov x17, #0x10\n" 2184 "index z4.d, x16, x17"); 2185 2186 CLEANUP(); 2187} 2188 2189TEST(sve_int_arithmetic_unpredicated) { 2190 SETUP(); 2191 2192 COMPARE(add(z23.VnB(), z30.VnB(), z31.VnB()), "add z23.b, z30.b, z31.b"); 2193 COMPARE(add(z24.VnH(), z29.VnH(), z30.VnH()), "add z24.h, z29.h, z30.h"); 2194 COMPARE(add(z25.VnS(), z28.VnS(), z29.VnS()), "add z25.s, z28.s, z29.s"); 2195 COMPARE(add(z26.VnD(), z27.VnD(), z28.VnD()), "add z26.d, z27.d, z28.d"); 2196 COMPARE(sqadd(z26.VnB(), z21.VnB(), z1.VnB()), "sqadd z26.b, z21.b, z1.b"); 2197 COMPARE(sqadd(z25.VnH(), z20.VnH(), z2.VnH()), "sqadd z25.h, z20.h, z2.h"); 2198 COMPARE(sqadd(z24.VnS(), z19.VnS(), z3.VnS()), "sqadd z24.s, z19.s, z3.s"); 2199 COMPARE(sqadd(z23.VnD(), z18.VnD(), z4.VnD()), "sqadd z23.d, z18.d, z4.d"); 2200 COMPARE(sqsub(z1.VnB(), z10.VnB(), z0.VnB()), "sqsub z1.b, z10.b, z0.b"); 2201 COMPARE(sqsub(z2.VnH(), z11.VnH(), z1.VnH()), "sqsub z2.h, z11.h, z1.h"); 2202 COMPARE(sqsub(z3.VnS(), z12.VnS(), z2.VnS()), "sqsub z3.s, z12.s, z2.s"); 2203 COMPARE(sqsub(z4.VnD(), z13.VnD(), z3.VnD()), "sqsub z4.d, z13.d, z3.d"); 2204 COMPARE(sub(z9.VnB(), z7.VnB(), z25.VnB()), "sub z9.b, z7.b, z25.b"); 2205 COMPARE(sub(z8.VnH(), z8.VnH(), z26.VnH()), "sub z8.h, z8.h, z26.h"); 2206 COMPARE(sub(z7.VnS(), z9.VnS(), z27.VnS()), "sub z7.s, z9.s, z27.s"); 2207 COMPARE(sub(z6.VnD(), z10.VnD(), z28.VnD()), "sub z6.d, z10.d, z28.d"); 2208 COMPARE(uqadd(z13.VnB(), z15.VnB(), z3.VnB()), "uqadd z13.b, z15.b, z3.b"); 2209 COMPARE(uqadd(z12.VnH(), z16.VnH(), z2.VnH()), "uqadd z12.h, z16.h, z2.h"); 2210 COMPARE(uqadd(z11.VnS(), z17.VnS(), z1.VnS()), "uqadd z11.s, z17.s, z1.s"); 2211 COMPARE(uqadd(z10.VnD(), z18.VnD(), z0.VnD()), "uqadd z10.d, z18.d, z0.d"); 2212 COMPARE(uqsub(z9.VnB(), z13.VnB(), z13.VnB()), "uqsub z9.b, z13.b, z13.b"); 2213 COMPARE(uqsub(z11.VnH(), z15.VnH(), z11.VnH()), "uqsub z11.h, z15.h, z11.h"); 2214 COMPARE(uqsub(z13.VnS(), z17.VnS(), z13.VnS()), "uqsub z13.s, z17.s, z13.s"); 2215 COMPARE(uqsub(z15.VnD(), z19.VnD(), z15.VnD()), "uqsub z15.d, z19.d, z15.d"); 2216 2217 CLEANUP(); 2218} 2219 2220TEST(sve_int_binary_arithmetic_predicated) { 2221 SETUP(); 2222 2223 COMPARE(add(z22.VnB(), p4.Merging(), z22.VnB(), z20.VnB()), 2224 "add z22.b, p4/m, z22.b, z20.b"); 2225 COMPARE(add(z22.VnH(), p4.Merging(), z22.VnH(), z20.VnH()), 2226 "add z22.h, p4/m, z22.h, z20.h"); 2227 COMPARE(add(z22.VnS(), p4.Merging(), z22.VnS(), z20.VnS()), 2228 "add z22.s, p4/m, z22.s, z20.s"); 2229 COMPARE(add(z22.VnD(), p4.Merging(), z22.VnD(), z20.VnD()), 2230 "add z22.d, p4/m, z22.d, z20.d"); 2231 COMPARE(and_(z22.VnB(), p3.Merging(), z22.VnB(), z3.VnB()), 2232 "and z22.b, p3/m, z22.b, z3.b"); 2233 COMPARE(and_(z22.VnH(), p3.Merging(), z22.VnH(), z3.VnH()), 2234 "and z22.h, p3/m, z22.h, z3.h"); 2235 COMPARE(and_(z22.VnS(), p3.Merging(), z22.VnS(), z3.VnS()), 2236 "and z22.s, p3/m, z22.s, z3.s"); 2237 COMPARE(and_(z22.VnD(), p3.Merging(), z22.VnD(), z3.VnD()), 2238 "and z22.d, p3/m, z22.d, z3.d"); 2239 COMPARE(bic(z17.VnB(), p7.Merging(), z17.VnB(), z10.VnB()), 2240 "bic z17.b, p7/m, z17.b, z10.b"); 2241 COMPARE(bic(z17.VnH(), p7.Merging(), z17.VnH(), z10.VnH()), 2242 "bic z17.h, p7/m, z17.h, z10.h"); 2243 COMPARE(bic(z17.VnS(), p7.Merging(), z17.VnS(), z10.VnS()), 2244 "bic z17.s, p7/m, z17.s, z10.s"); 2245 COMPARE(bic(z17.VnD(), p7.Merging(), z17.VnD(), z10.VnD()), 2246 "bic z17.d, p7/m, z17.d, z10.d"); 2247 COMPARE(eor(z23.VnB(), p4.Merging(), z23.VnB(), z15.VnB()), 2248 "eor z23.b, p4/m, z23.b, z15.b"); 2249 COMPARE(eor(z23.VnH(), p4.Merging(), z23.VnH(), z15.VnH()), 2250 "eor z23.h, p4/m, z23.h, z15.h"); 2251 COMPARE(eor(z23.VnS(), p4.Merging(), z23.VnS(), z15.VnS()), 2252 "eor z23.s, p4/m, z23.s, z15.s"); 2253 COMPARE(eor(z23.VnD(), p4.Merging(), z23.VnD(), z15.VnD()), 2254 "eor z23.d, p4/m, z23.d, z15.d"); 2255 COMPARE(mul(z15.VnB(), p5.Merging(), z15.VnB(), z15.VnB()), 2256 "mul z15.b, p5/m, z15.b, z15.b"); 2257 COMPARE(mul(z15.VnH(), p5.Merging(), z15.VnH(), z15.VnH()), 2258 "mul z15.h, p5/m, z15.h, z15.h"); 2259 COMPARE(mul(z15.VnS(), p5.Merging(), z15.VnS(), z15.VnS()), 2260 "mul z15.s, p5/m, z15.s, z15.s"); 2261 COMPARE(mul(z15.VnD(), p5.Merging(), z15.VnD(), z15.VnD()), 2262 "mul z15.d, p5/m, z15.d, z15.d"); 2263 COMPARE(orr(z9.VnB(), p1.Merging(), z9.VnB(), z28.VnB()), 2264 "orr z9.b, p1/m, z9.b, z28.b"); 2265 COMPARE(orr(z9.VnH(), p1.Merging(), z9.VnH(), z28.VnH()), 2266 "orr z9.h, p1/m, z9.h, z28.h"); 2267 COMPARE(orr(z9.VnS(), p1.Merging(), z9.VnS(), z28.VnS()), 2268 "orr z9.s, p1/m, z9.s, z28.s"); 2269 COMPARE(orr(z9.VnD(), p1.Merging(), z9.VnD(), z28.VnD()), 2270 "orr z9.d, p1/m, z9.d, z28.d"); 2271 COMPARE(sabd(z11.VnB(), p6.Merging(), z11.VnB(), z31.VnB()), 2272 "sabd z11.b, p6/m, z11.b, z31.b"); 2273 COMPARE(sabd(z11.VnH(), p6.Merging(), z11.VnH(), z31.VnH()), 2274 "sabd z11.h, p6/m, z11.h, z31.h"); 2275 COMPARE(sabd(z11.VnS(), p6.Merging(), z11.VnS(), z31.VnS()), 2276 "sabd z11.s, p6/m, z11.s, z31.s"); 2277 COMPARE(sabd(z11.VnD(), p6.Merging(), z11.VnD(), z31.VnD()), 2278 "sabd z11.d, p6/m, z11.d, z31.d"); 2279 COMPARE(sdivr(z20.VnS(), p5.Merging(), z20.VnS(), z23.VnS()), 2280 "sdivr z20.s, p5/m, z20.s, z23.s"); 2281 COMPARE(sdiv(z15.VnD(), p6.Merging(), z15.VnD(), z8.VnD()), 2282 "sdiv z15.d, p6/m, z15.d, z8.d"); 2283 COMPARE(smax(z30.VnB(), p4.Merging(), z30.VnB(), z30.VnB()), 2284 "smax z30.b, p4/m, z30.b, z30.b"); 2285 COMPARE(smax(z30.VnH(), p4.Merging(), z30.VnH(), z30.VnH()), 2286 "smax z30.h, p4/m, z30.h, z30.h"); 2287 COMPARE(smax(z30.VnS(), p4.Merging(), z30.VnS(), z30.VnS()), 2288 "smax z30.s, p4/m, z30.s, z30.s"); 2289 COMPARE(smax(z30.VnD(), p4.Merging(), z30.VnD(), z30.VnD()), 2290 "smax z30.d, p4/m, z30.d, z30.d"); 2291 COMPARE(smin(z20.VnB(), p7.Merging(), z20.VnB(), z19.VnB()), 2292 "smin z20.b, p7/m, z20.b, z19.b"); 2293 COMPARE(smin(z20.VnH(), p7.Merging(), z20.VnH(), z19.VnH()), 2294 "smin z20.h, p7/m, z20.h, z19.h"); 2295 COMPARE(smin(z20.VnS(), p7.Merging(), z20.VnS(), z19.VnS()), 2296 "smin z20.s, p7/m, z20.s, z19.s"); 2297 COMPARE(smin(z20.VnD(), p7.Merging(), z20.VnD(), z19.VnD()), 2298 "smin z20.d, p7/m, z20.d, z19.d"); 2299 COMPARE(smulh(z23.VnB(), p0.Merging(), z23.VnB(), z3.VnB()), 2300 "smulh z23.b, p0/m, z23.b, z3.b"); 2301 COMPARE(smulh(z23.VnH(), p0.Merging(), z23.VnH(), z3.VnH()), 2302 "smulh z23.h, p0/m, z23.h, z3.h"); 2303 COMPARE(smulh(z23.VnS(), p0.Merging(), z23.VnS(), z3.VnS()), 2304 "smulh z23.s, p0/m, z23.s, z3.s"); 2305 COMPARE(smulh(z23.VnD(), p0.Merging(), z23.VnD(), z3.VnD()), 2306 "smulh z23.d, p0/m, z23.d, z3.d"); 2307 COMPARE(subr(z1.VnB(), p6.Merging(), z1.VnB(), z1.VnB()), 2308 "subr z1.b, p6/m, z1.b, z1.b"); 2309 COMPARE(subr(z1.VnH(), p6.Merging(), z1.VnH(), z1.VnH()), 2310 "subr z1.h, p6/m, z1.h, z1.h"); 2311 COMPARE(subr(z1.VnS(), p6.Merging(), z1.VnS(), z1.VnS()), 2312 "subr z1.s, p6/m, z1.s, z1.s"); 2313 COMPARE(subr(z1.VnD(), p6.Merging(), z1.VnD(), z1.VnD()), 2314 "subr z1.d, p6/m, z1.d, z1.d"); 2315 COMPARE(sub(z28.VnB(), p2.Merging(), z28.VnB(), z0.VnB()), 2316 "sub z28.b, p2/m, z28.b, z0.b"); 2317 COMPARE(sub(z28.VnH(), p2.Merging(), z28.VnH(), z0.VnH()), 2318 "sub z28.h, p2/m, z28.h, z0.h"); 2319 COMPARE(sub(z28.VnS(), p2.Merging(), z28.VnS(), z0.VnS()), 2320 "sub z28.s, p2/m, z28.s, z0.s"); 2321 COMPARE(sub(z28.VnD(), p2.Merging(), z28.VnD(), z0.VnD()), 2322 "sub z28.d, p2/m, z28.d, z0.d"); 2323 COMPARE(uabd(z14.VnB(), p6.Merging(), z14.VnB(), z22.VnB()), 2324 "uabd z14.b, p6/m, z14.b, z22.b"); 2325 COMPARE(uabd(z14.VnH(), p6.Merging(), z14.VnH(), z22.VnH()), 2326 "uabd z14.h, p6/m, z14.h, z22.h"); 2327 COMPARE(uabd(z14.VnS(), p6.Merging(), z14.VnS(), z22.VnS()), 2328 "uabd z14.s, p6/m, z14.s, z22.s"); 2329 COMPARE(uabd(z14.VnD(), p6.Merging(), z14.VnD(), z22.VnD()), 2330 "uabd z14.d, p6/m, z14.d, z22.d"); 2331 COMPARE(udivr(z27.VnS(), p5.Merging(), z27.VnS(), z31.VnS()), 2332 "udivr z27.s, p5/m, z27.s, z31.s"); 2333 COMPARE(udiv(z13.VnD(), p4.Merging(), z13.VnD(), z11.VnD()), 2334 "udiv z13.d, p4/m, z13.d, z11.d"); 2335 COMPARE(umax(z0.VnB(), p5.Merging(), z0.VnB(), z14.VnB()), 2336 "umax z0.b, p5/m, z0.b, z14.b"); 2337 COMPARE(umax(z0.VnH(), p5.Merging(), z0.VnH(), z14.VnH()), 2338 "umax z0.h, p5/m, z0.h, z14.h"); 2339 COMPARE(umax(z0.VnS(), p5.Merging(), z0.VnS(), z14.VnS()), 2340 "umax z0.s, p5/m, z0.s, z14.s"); 2341 COMPARE(umax(z0.VnD(), p5.Merging(), z0.VnD(), z14.VnD()), 2342 "umax z0.d, p5/m, z0.d, z14.d"); 2343 COMPARE(umin(z26.VnB(), p5.Merging(), z26.VnB(), z12.VnB()), 2344 "umin z26.b, p5/m, z26.b, z12.b"); 2345 COMPARE(umin(z26.VnH(), p5.Merging(), z26.VnH(), z12.VnH()), 2346 "umin z26.h, p5/m, z26.h, z12.h"); 2347 COMPARE(umin(z26.VnS(), p5.Merging(), z26.VnS(), z12.VnS()), 2348 "umin z26.s, p5/m, z26.s, z12.s"); 2349 COMPARE(umin(z26.VnD(), p5.Merging(), z26.VnD(), z12.VnD()), 2350 "umin z26.d, p5/m, z26.d, z12.d"); 2351 COMPARE(umulh(z12.VnB(), p2.Merging(), z12.VnB(), z17.VnB()), 2352 "umulh z12.b, p2/m, z12.b, z17.b"); 2353 COMPARE(umulh(z12.VnH(), p2.Merging(), z12.VnH(), z17.VnH()), 2354 "umulh z12.h, p2/m, z12.h, z17.h"); 2355 COMPARE(umulh(z12.VnS(), p2.Merging(), z12.VnS(), z17.VnS()), 2356 "umulh z12.s, p2/m, z12.s, z17.s"); 2357 COMPARE(umulh(z12.VnD(), p2.Merging(), z12.VnD(), z17.VnD()), 2358 "umulh z12.d, p2/m, z12.d, z17.d"); 2359 CLEANUP(); 2360} 2361 2362TEST(sve_int_binary_arithmetic_predicated_macro) { 2363 SETUP(); 2364 2365 COMPARE_MACRO(Add(z22.VnB(), p4.Merging(), z22.VnB(), z20.VnB()), 2366 "add z22.b, p4/m, z22.b, z20.b"); 2367 COMPARE_MACRO(Add(z22.VnH(), p4.Merging(), z20.VnH(), z22.VnH()), 2368 "add z22.h, p4/m, z22.h, z20.h"); 2369 COMPARE_MACRO(Add(z22.VnS(), p4.Merging(), z21.VnS(), z20.VnS()), 2370 "movprfx z22.s, p4/m, z21.s\n" 2371 "add z22.s, p4/m, z22.s, z20.s"); 2372 2373 COMPARE_MACRO(And(z22.VnH(), p3.Merging(), z22.VnH(), z3.VnH()), 2374 "and z22.h, p3/m, z22.h, z3.h"); 2375 COMPARE_MACRO(And(z22.VnS(), p3.Merging(), z3.VnS(), z22.VnS()), 2376 "and z22.s, p3/m, z22.s, z3.s"); 2377 COMPARE_MACRO(And(z22.VnD(), p3.Merging(), z2.VnD(), z3.VnD()), 2378 "movprfx z22.d, p3/m, z2.d\n" 2379 "and z22.d, p3/m, z22.d, z3.d"); 2380 2381 COMPARE_MACRO(Bic(z17.VnB(), p7.Merging(), z17.VnB(), z10.VnB()), 2382 "bic z17.b, p7/m, z17.b, z10.b"); 2383 COMPARE_MACRO(Bic(z17.VnS(), p7.Merging(), z10.VnS(), z17.VnS()), 2384 "mov z31.d, z17.d\n" 2385 "movprfx z17.s, p7/m, z10.s\n" 2386 "bic z17.s, p7/m, z17.s, z31.s"); 2387 COMPARE_MACRO(Bic(z17.VnD(), p7.Merging(), z7.VnD(), z27.VnD()), 2388 "movprfx z17.d, p7/m, z7.d\n" 2389 "bic z17.d, p7/m, z17.d, z27.d"); 2390 2391 COMPARE_MACRO(Eor(z23.VnB(), p4.Merging(), z23.VnB(), z15.VnB()), 2392 "eor z23.b, p4/m, z23.b, z15.b"); 2393 COMPARE_MACRO(Eor(z23.VnH(), p4.Merging(), z23.VnH(), z15.VnH()), 2394 "eor z23.h, p4/m, z23.h, z15.h"); 2395 COMPARE_MACRO(Eor(z23.VnD(), p4.Merging(), z18.VnD(), z15.VnD()), 2396 "movprfx z23.d, p4/m, z18.d\n" 2397 "eor z23.d, p4/m, z23.d, z15.d"); 2398 2399 COMPARE_MACRO(Mul(z15.VnB(), p5.Merging(), z15.VnB(), z15.VnB()), 2400 "mul z15.b, p5/m, z15.b, z15.b"); 2401 COMPARE_MACRO(Mul(z15.VnH(), p5.Merging(), z7.VnH(), z15.VnH()), 2402 "mul z15.h, p5/m, z15.h, z7.h"); 2403 COMPARE_MACRO(Mul(z15.VnS(), p5.Merging(), z0.VnS(), z1.VnS()), 2404 "movprfx z15.s, p5/m, z0.s\n" 2405 "mul z15.s, p5/m, z15.s, z1.s"); 2406 2407 COMPARE_MACRO(Orr(z9.VnH(), p1.Merging(), z9.VnH(), z28.VnH()), 2408 "orr z9.h, p1/m, z9.h, z28.h"); 2409 COMPARE_MACRO(Orr(z9.VnS(), p1.Merging(), z9.VnS(), z28.VnS()), 2410 "orr z9.s, p1/m, z9.s, z28.s"); 2411 COMPARE_MACRO(Orr(z9.VnD(), p1.Merging(), z6.VnD(), z7.VnD()), 2412 "movprfx z9.d, p1/m, z6.d\n" 2413 "orr z9.d, p1/m, z9.d, z7.d"); 2414 2415 COMPARE_MACRO(Sabd(z11.VnB(), p6.Merging(), z11.VnB(), z31.VnB()), 2416 "sabd z11.b, p6/m, z11.b, z31.b"); 2417 COMPARE_MACRO(Sabd(z11.VnH(), p6.Merging(), z31.VnH(), z11.VnH()), 2418 "sabd z11.h, p6/m, z11.h, z31.h"); 2419 COMPARE_MACRO(Sabd(z11.VnS(), p6.Merging(), z21.VnS(), z31.VnS()), 2420 "movprfx z11.s, p6/m, z21.s\n" 2421 "sabd z11.s, p6/m, z11.s, z31.s"); 2422 2423 COMPARE_MACRO(Sdiv(z20.VnS(), p5.Merging(), z23.VnS(), z20.VnS()), 2424 "sdivr z20.s, p5/m, z20.s, z23.s"); 2425 COMPARE_MACRO(Sdiv(z15.VnD(), p6.Merging(), z30.VnD(), z8.VnD()), 2426 "movprfx z15.d, p6/m, z30.d\n" 2427 "sdiv z15.d, p6/m, z15.d, z8.d"); 2428 2429 COMPARE_MACRO(Smax(z30.VnB(), p4.Merging(), z30.VnB(), z31.VnB()), 2430 "smax z30.b, p4/m, z30.b, z31.b"); 2431 COMPARE_MACRO(Smax(z30.VnS(), p4.Merging(), z3.VnS(), z30.VnS()), 2432 "smax z30.s, p4/m, z30.s, z3.s"); 2433 COMPARE_MACRO(Smax(z30.VnD(), p4.Merging(), z1.VnD(), z5.VnD()), 2434 "movprfx z30.d, p4/m, z1.d\n" 2435 "smax z30.d, p4/m, z30.d, z5.d"); 2436 2437 COMPARE_MACRO(Smin(z20.VnH(), p7.Merging(), z20.VnH(), z19.VnH()), 2438 "smin z20.h, p7/m, z20.h, z19.h"); 2439 COMPARE_MACRO(Smin(z20.VnS(), p7.Merging(), z19.VnS(), z20.VnS()), 2440 "smin z20.s, p7/m, z20.s, z19.s"); 2441 COMPARE_MACRO(Smin(z20.VnD(), p7.Merging(), z14.VnD(), z15.VnD()), 2442 "movprfx z20.d, p7/m, z14.d\n" 2443 "smin z20.d, p7/m, z20.d, z15.d"); 2444 2445 COMPARE_MACRO(Smulh(z23.VnB(), p0.Merging(), z23.VnB(), z3.VnB()), 2446 "smulh z23.b, p0/m, z23.b, z3.b"); 2447 COMPARE_MACRO(Smulh(z23.VnH(), p0.Merging(), z13.VnH(), z23.VnH()), 2448 "smulh z23.h, p0/m, z23.h, z13.h"); 2449 COMPARE_MACRO(Smulh(z23.VnD(), p0.Merging(), z30.VnD(), z31.VnD()), 2450 "movprfx z23.d, p0/m, z30.d\n" 2451 "smulh z23.d, p0/m, z23.d, z31.d"); 2452 2453 COMPARE_MACRO(Sub(z28.VnB(), p2.Merging(), z28.VnB(), z0.VnB()), 2454 "sub z28.b, p2/m, z28.b, z0.b"); 2455 COMPARE_MACRO(Sub(z28.VnH(), p2.Merging(), z14.VnH(), z28.VnH()), 2456 "subr z28.h, p2/m, z28.h, z14.h"); 2457 COMPARE_MACRO(Sub(z28.VnS(), p2.Merging(), z7.VnS(), z13.VnS()), 2458 "movprfx z28.s, p2/m, z7.s\n" 2459 "sub z28.s, p2/m, z28.s, z13.s"); 2460 2461 COMPARE_MACRO(Uabd(z14.VnH(), p6.Merging(), z14.VnH(), z22.VnH()), 2462 "uabd z14.h, p6/m, z14.h, z22.h"); 2463 COMPARE_MACRO(Uabd(z14.VnS(), p6.Merging(), z14.VnS(), z22.VnS()), 2464 "uabd z14.s, p6/m, z14.s, z22.s"); 2465 COMPARE_MACRO(Uabd(z14.VnD(), p6.Merging(), z13.VnD(), z22.VnD()), 2466 "movprfx z14.d, p6/m, z13.d\n" 2467 "uabd z14.d, p6/m, z14.d, z22.d"); 2468 2469 COMPARE_MACRO(Udiv(z27.VnS(), p5.Merging(), z16.VnS(), z27.VnS()), 2470 "udivr z27.s, p5/m, z27.s, z16.s"); 2471 COMPARE_MACRO(Udiv(z13.VnD(), p4.Merging(), z22.VnD(), z11.VnD()), 2472 "movprfx z13.d, p4/m, z22.d\n" 2473 "udiv z13.d, p4/m, z13.d, z11.d"); 2474 2475 COMPARE_MACRO(Umax(z0.VnB(), p5.Merging(), z0.VnB(), z14.VnB()), 2476 "umax z0.b, p5/m, z0.b, z14.b"); 2477 COMPARE_MACRO(Umax(z0.VnS(), p5.Merging(), z14.VnS(), z0.VnS()), 2478 "umax z0.s, p5/m, z0.s, z14.s"); 2479 COMPARE_MACRO(Umax(z0.VnD(), p5.Merging(), z29.VnD(), z14.VnD()), 2480 "movprfx z0.d, p5/m, z29.d\n" 2481 "umax z0.d, p5/m, z0.d, z14.d"); 2482 2483 COMPARE_MACRO(Umin(z26.VnB(), p5.Merging(), z26.VnB(), z12.VnB()), 2484 "umin z26.b, p5/m, z26.b, z12.b"); 2485 COMPARE_MACRO(Umin(z26.VnH(), p5.Merging(), z27.VnH(), z26.VnH()), 2486 "umin z26.h, p5/m, z26.h, z27.h"); 2487 COMPARE_MACRO(Umin(z26.VnD(), p5.Merging(), z13.VnD(), z12.VnD()), 2488 "movprfx z26.d, p5/m, z13.d\n" 2489 "umin z26.d, p5/m, z26.d, z12.d"); 2490 2491 COMPARE_MACRO(Umulh(z12.VnB(), p2.Merging(), z12.VnB(), z17.VnB()), 2492 "umulh z12.b, p2/m, z12.b, z17.b"); 2493 COMPARE_MACRO(Umulh(z12.VnH(), p2.Merging(), z0.VnH(), z12.VnH()), 2494 "umulh z12.h, p2/m, z12.h, z0.h"); 2495 COMPARE_MACRO(Umulh(z12.VnS(), p2.Merging(), z25.VnS(), z17.VnS()), 2496 "movprfx z12.s, p2/m, z25.s\n" 2497 "umulh z12.s, p2/m, z12.s, z17.s"); 2498 2499 CLEANUP(); 2500} 2501 2502TEST(sve_int_compare_scalars) { 2503 SETUP(); 2504 2505 COMPARE(ctermeq(w30, w26), "ctermeq w30, w26"); 2506 COMPARE(ctermne(x21, x18), "ctermne x21, x18"); 2507 COMPARE(whilele(p10.VnB(), x11, x6), "whilele p10.b, x11, x6"); 2508 COMPARE(whilele(p10.VnH(), w11, w6), "whilele p10.h, w11, w6"); 2509 COMPARE(whilele(p10.VnH(), x11, x6), "whilele p10.h, x11, x6"); 2510 COMPARE(whilele(p10.VnS(), w11, w6), "whilele p10.s, w11, w6"); 2511 COMPARE(whilele(p10.VnD(), x11, x6), "whilele p10.d, x11, x6"); 2512 COMPARE(whilelo(p4.VnB(), w3, w25), "whilelo p4.b, w3, w25"); 2513 COMPARE(whilelo(p4.VnH(), x3, x25), "whilelo p4.h, x3, x25"); 2514 COMPARE(whilelo(p4.VnS(), w3, w25), "whilelo p4.s, w3, w25"); 2515 COMPARE(whilelo(p4.VnD(), x3, x25), "whilelo p4.d, x3, x25"); 2516 COMPARE(whilels(p7.VnB(), w15, w15), "whilels p7.b, w15, w15"); 2517 COMPARE(whilels(p7.VnH(), x15, x15), "whilels p7.h, x15, x15"); 2518 COMPARE(whilels(p7.VnS(), w15, w15), "whilels p7.s, w15, w15"); 2519 COMPARE(whilels(p7.VnD(), x15, x15), "whilels p7.d, x15, x15"); 2520 COMPARE(whilelt(p14.VnB(), w11, w14), "whilelt p14.b, w11, w14"); 2521 COMPARE(whilelt(p14.VnH(), x11, x14), "whilelt p14.h, x11, x14"); 2522 COMPARE(whilelt(p14.VnS(), w11, w14), "whilelt p14.s, w11, w14"); 2523 COMPARE(whilelt(p14.VnD(), x11, x14), "whilelt p14.d, x11, x14"); 2524 2525 CLEANUP(); 2526} 2527 2528TEST(sve_int_compare_signed_imm) { 2529 SETUP(); 2530 2531 COMPARE(cmpeq(p0.VnB(), p3.Zeroing(), z1.VnB(), 15), 2532 "cmpeq p0.b, p3/z, z1.b, #15"); 2533 COMPARE(cmpeq(p0.VnH(), p3.Zeroing(), z1.VnH(), 7), 2534 "cmpeq p0.h, p3/z, z1.h, #7"); 2535 COMPARE(cmpeq(p0.VnS(), p3.Zeroing(), z1.VnS(), -3), 2536 "cmpeq p0.s, p3/z, z1.s, #-3"); 2537 COMPARE(cmpeq(p0.VnD(), p3.Zeroing(), z1.VnD(), -14), 2538 "cmpeq p0.d, p3/z, z1.d, #-14"); 2539 COMPARE(cmpge(p9.VnB(), p6.Zeroing(), z12.VnB(), 14), 2540 "cmpge p9.b, p6/z, z12.b, #14"); 2541 COMPARE(cmpge(p9.VnH(), p6.Zeroing(), z12.VnH(), 6), 2542 "cmpge p9.h, p6/z, z12.h, #6"); 2543 COMPARE(cmpge(p9.VnS(), p6.Zeroing(), z12.VnS(), -4), 2544 "cmpge p9.s, p6/z, z12.s, #-4"); 2545 COMPARE(cmpge(p9.VnD(), p6.Zeroing(), z12.VnD(), -13), 2546 "cmpge p9.d, p6/z, z12.d, #-13"); 2547 COMPARE(cmpgt(p15.VnB(), p4.Zeroing(), z23.VnB(), 13), 2548 "cmpgt p15.b, p4/z, z23.b, #13"); 2549 COMPARE(cmpgt(p15.VnH(), p4.Zeroing(), z23.VnH(), 5), 2550 "cmpgt p15.h, p4/z, z23.h, #5"); 2551 COMPARE(cmpgt(p15.VnS(), p4.Zeroing(), z23.VnS(), -12), 2552 "cmpgt p15.s, p4/z, z23.s, #-12"); 2553 COMPARE(cmpgt(p15.VnD(), p4.Zeroing(), z23.VnD(), -5), 2554 "cmpgt p15.d, p4/z, z23.d, #-5"); 2555 COMPARE(cmple(p4.VnB(), p3.Zeroing(), z5.VnB(), 12), 2556 "cmple p4.b, p3/z, z5.b, #12"); 2557 COMPARE(cmple(p4.VnH(), p3.Zeroing(), z5.VnH(), 4), 2558 "cmple p4.h, p3/z, z5.h, #4"); 2559 COMPARE(cmple(p4.VnS(), p3.Zeroing(), z5.VnS(), -11), 2560 "cmple p4.s, p3/z, z5.s, #-11"); 2561 COMPARE(cmple(p4.VnD(), p3.Zeroing(), z5.VnD(), -6), 2562 "cmple p4.d, p3/z, z5.d, #-6"); 2563 COMPARE(cmplt(p3.VnB(), p7.Zeroing(), z15.VnB(), 11), 2564 "cmplt p3.b, p7/z, z15.b, #11"); 2565 COMPARE(cmplt(p3.VnH(), p7.Zeroing(), z15.VnH(), 3), 2566 "cmplt p3.h, p7/z, z15.h, #3"); 2567 COMPARE(cmplt(p3.VnS(), p7.Zeroing(), z15.VnS(), -10), 2568 "cmplt p3.s, p7/z, z15.s, #-10"); 2569 COMPARE(cmplt(p3.VnD(), p7.Zeroing(), z15.VnD(), -7), 2570 "cmplt p3.d, p7/z, z15.d, #-7"); 2571 COMPARE(cmpne(p13.VnB(), p5.Zeroing(), z20.VnB(), 10), 2572 "cmpne p13.b, p5/z, z20.b, #10"); 2573 COMPARE(cmpne(p13.VnH(), p5.Zeroing(), z20.VnH(), 2), 2574 "cmpne p13.h, p5/z, z20.h, #2"); 2575 COMPARE(cmpne(p13.VnS(), p5.Zeroing(), z20.VnS(), -9), 2576 "cmpne p13.s, p5/z, z20.s, #-9"); 2577 COMPARE(cmpne(p13.VnD(), p5.Zeroing(), z20.VnD(), -8), 2578 "cmpne p13.d, p5/z, z20.d, #-8"); 2579 2580 CLEANUP(); 2581} 2582 2583TEST(sve_int_compare_unsigned_imm) { 2584 SETUP(); 2585 2586 COMPARE(cmphi(p8.VnB(), p6.Zeroing(), z1.VnB(), 127), 2587 "cmphi p8.b, p6/z, z1.b, #127"); 2588 COMPARE(cmphi(p8.VnH(), p6.Zeroing(), z1.VnH(), 126), 2589 "cmphi p8.h, p6/z, z1.h, #126"); 2590 COMPARE(cmphi(p8.VnS(), p6.Zeroing(), z1.VnS(), 99), 2591 "cmphi p8.s, p6/z, z1.s, #99"); 2592 COMPARE(cmphi(p8.VnD(), p6.Zeroing(), z1.VnD(), 78), 2593 "cmphi p8.d, p6/z, z1.d, #78"); 2594 COMPARE(cmphs(p11.VnB(), p2.Zeroing(), z8.VnB(), 67), 2595 "cmphs p11.b, p2/z, z8.b, #67"); 2596 COMPARE(cmphs(p11.VnH(), p2.Zeroing(), z8.VnH(), 63), 2597 "cmphs p11.h, p2/z, z8.h, #63"); 2598 COMPARE(cmphs(p11.VnS(), p2.Zeroing(), z8.VnS(), 51), 2599 "cmphs p11.s, p2/z, z8.s, #51"); 2600 COMPARE(cmphs(p11.VnD(), p2.Zeroing(), z8.VnD(), 40), 2601 "cmphs p11.d, p2/z, z8.d, #40"); 2602 COMPARE(cmplo(p9.VnB(), p4.Zeroing(), z4.VnB(), 32), 2603 "cmplo p9.b, p4/z, z4.b, #32"); 2604 COMPARE(cmplo(p9.VnH(), p4.Zeroing(), z4.VnH(), 22), 2605 "cmplo p9.h, p4/z, z4.h, #22"); 2606 COMPARE(cmplo(p9.VnS(), p4.Zeroing(), z4.VnS(), 15), 2607 "cmplo p9.s, p4/z, z4.s, #15"); 2608 COMPARE(cmplo(p9.VnD(), p4.Zeroing(), z4.VnD(), 11), 2609 "cmplo p9.d, p4/z, z4.d, #11"); 2610 COMPARE(cmpls(p14.VnB(), p5.Zeroing(), z9.VnB(), 7), 2611 "cmpls p14.b, p5/z, z9.b, #7"); 2612 COMPARE(cmpls(p14.VnH(), p5.Zeroing(), z9.VnH(), 4), 2613 "cmpls p14.h, p5/z, z9.h, #4"); 2614 COMPARE(cmpls(p14.VnS(), p5.Zeroing(), z9.VnS(), 3), 2615 "cmpls p14.s, p5/z, z9.s, #3"); 2616 COMPARE(cmpls(p14.VnD(), p5.Zeroing(), z9.VnD(), 1), 2617 "cmpls p14.d, p5/z, z9.d, #1"); 2618 2619 CLEANUP(); 2620} 2621 2622TEST(sve_int_compare_vectors) { 2623 SETUP(); 2624 2625 COMPARE(cmpeq(p13.VnB(), p0.Zeroing(), z26.VnB(), z10.VnD()), 2626 "cmpeq p13.b, p0/z, z26.b, z10.d"); 2627 COMPARE(cmpeq(p13.VnH(), p0.Zeroing(), z26.VnH(), z10.VnD()), 2628 "cmpeq p13.h, p0/z, z26.h, z10.d"); 2629 COMPARE(cmpeq(p13.VnS(), p0.Zeroing(), z26.VnS(), z10.VnD()), 2630 "cmpeq p13.s, p0/z, z26.s, z10.d"); 2631 COMPARE(cmpeq(p14.VnB(), p3.Zeroing(), z18.VnB(), z15.VnB()), 2632 "cmpeq p14.b, p3/z, z18.b, z15.b"); 2633 COMPARE(cmpeq(p14.VnH(), p3.Zeroing(), z18.VnH(), z15.VnH()), 2634 "cmpeq p14.h, p3/z, z18.h, z15.h"); 2635 COMPARE(cmpeq(p14.VnS(), p3.Zeroing(), z18.VnS(), z15.VnS()), 2636 "cmpeq p14.s, p3/z, z18.s, z15.s"); 2637 COMPARE(cmpeq(p14.VnD(), p3.Zeroing(), z18.VnD(), z15.VnD()), 2638 "cmpeq p14.d, p3/z, z18.d, z15.d"); 2639 COMPARE(cmpge(p8.VnB(), p3.Zeroing(), z13.VnB(), z0.VnD()), 2640 "cmpge p8.b, p3/z, z13.b, z0.d"); 2641 COMPARE(cmpge(p8.VnH(), p3.Zeroing(), z13.VnH(), z0.VnD()), 2642 "cmpge p8.h, p3/z, z13.h, z0.d"); 2643 COMPARE(cmpge(p8.VnS(), p3.Zeroing(), z13.VnS(), z0.VnD()), 2644 "cmpge p8.s, p3/z, z13.s, z0.d"); 2645 COMPARE(cmpge(p3.VnB(), p4.Zeroing(), z6.VnB(), z1.VnB()), 2646 "cmpge p3.b, p4/z, z6.b, z1.b"); 2647 COMPARE(cmpge(p3.VnH(), p4.Zeroing(), z6.VnH(), z1.VnH()), 2648 "cmpge p3.h, p4/z, z6.h, z1.h"); 2649 COMPARE(cmpge(p3.VnS(), p4.Zeroing(), z6.VnS(), z1.VnS()), 2650 "cmpge p3.s, p4/z, z6.s, z1.s"); 2651 COMPARE(cmpge(p3.VnD(), p4.Zeroing(), z6.VnD(), z1.VnD()), 2652 "cmpge p3.d, p4/z, z6.d, z1.d"); 2653 COMPARE(cmpgt(p4.VnB(), p2.Zeroing(), z24.VnB(), z1.VnD()), 2654 "cmpgt p4.b, p2/z, z24.b, z1.d"); 2655 COMPARE(cmpgt(p4.VnH(), p2.Zeroing(), z24.VnH(), z1.VnD()), 2656 "cmpgt p4.h, p2/z, z24.h, z1.d"); 2657 COMPARE(cmpgt(p4.VnS(), p2.Zeroing(), z24.VnS(), z1.VnD()), 2658 "cmpgt p4.s, p2/z, z24.s, z1.d"); 2659 COMPARE(cmpgt(p10.VnB(), p3.Zeroing(), z23.VnB(), z19.VnB()), 2660 "cmpgt p10.b, p3/z, z23.b, z19.b"); 2661 COMPARE(cmpgt(p10.VnH(), p3.Zeroing(), z23.VnH(), z19.VnH()), 2662 "cmpgt p10.h, p3/z, z23.h, z19.h"); 2663 COMPARE(cmpgt(p10.VnS(), p3.Zeroing(), z23.VnS(), z19.VnS()), 2664 "cmpgt p10.s, p3/z, z23.s, z19.s"); 2665 COMPARE(cmpgt(p10.VnD(), p3.Zeroing(), z23.VnD(), z19.VnD()), 2666 "cmpgt p10.d, p3/z, z23.d, z19.d"); 2667 COMPARE(cmphi(p10.VnB(), p6.Zeroing(), z6.VnB(), z11.VnD()), 2668 "cmphi p10.b, p6/z, z6.b, z11.d"); 2669 COMPARE(cmphi(p10.VnH(), p6.Zeroing(), z6.VnH(), z11.VnD()), 2670 "cmphi p10.h, p6/z, z6.h, z11.d"); 2671 COMPARE(cmphi(p10.VnS(), p6.Zeroing(), z6.VnS(), z11.VnD()), 2672 "cmphi p10.s, p6/z, z6.s, z11.d"); 2673 COMPARE(cmphi(p1.VnB(), p0.Zeroing(), z4.VnB(), z2.VnB()), 2674 "cmphi p1.b, p0/z, z4.b, z2.b"); 2675 COMPARE(cmphi(p1.VnH(), p0.Zeroing(), z4.VnH(), z2.VnH()), 2676 "cmphi p1.h, p0/z, z4.h, z2.h"); 2677 COMPARE(cmphi(p1.VnS(), p0.Zeroing(), z4.VnS(), z2.VnS()), 2678 "cmphi p1.s, p0/z, z4.s, z2.s"); 2679 COMPARE(cmphi(p1.VnD(), p0.Zeroing(), z4.VnD(), z2.VnD()), 2680 "cmphi p1.d, p0/z, z4.d, z2.d"); 2681 COMPARE(cmphs(p10.VnB(), p5.Zeroing(), z22.VnB(), z5.VnD()), 2682 "cmphs p10.b, p5/z, z22.b, z5.d"); 2683 COMPARE(cmphs(p10.VnH(), p5.Zeroing(), z22.VnH(), z5.VnD()), 2684 "cmphs p10.h, p5/z, z22.h, z5.d"); 2685 COMPARE(cmphs(p10.VnS(), p5.Zeroing(), z22.VnS(), z5.VnD()), 2686 "cmphs p10.s, p5/z, z22.s, z5.d"); 2687 COMPARE(cmphs(p12.VnB(), p6.Zeroing(), z20.VnB(), z24.VnB()), 2688 "cmphs p12.b, p6/z, z20.b, z24.b"); 2689 COMPARE(cmphs(p12.VnH(), p6.Zeroing(), z20.VnH(), z24.VnH()), 2690 "cmphs p12.h, p6/z, z20.h, z24.h"); 2691 COMPARE(cmphs(p12.VnS(), p6.Zeroing(), z20.VnS(), z24.VnS()), 2692 "cmphs p12.s, p6/z, z20.s, z24.s"); 2693 COMPARE(cmphs(p12.VnD(), p6.Zeroing(), z20.VnD(), z24.VnD()), 2694 "cmphs p12.d, p6/z, z20.d, z24.d"); 2695 COMPARE(cmple(p11.VnB(), p2.Zeroing(), z18.VnB(), z0.VnD()), 2696 "cmple p11.b, p2/z, z18.b, z0.d"); 2697 COMPARE(cmple(p11.VnH(), p2.Zeroing(), z18.VnH(), z0.VnD()), 2698 "cmple p11.h, p2/z, z18.h, z0.d"); 2699 COMPARE(cmple(p11.VnS(), p2.Zeroing(), z18.VnS(), z0.VnD()), 2700 "cmple p11.s, p2/z, z18.s, z0.d"); 2701 COMPARE(cmplo(p12.VnB(), p6.Zeroing(), z21.VnB(), z10.VnD()), 2702 "cmplo p12.b, p6/z, z21.b, z10.d"); 2703 COMPARE(cmplo(p12.VnH(), p6.Zeroing(), z21.VnH(), z10.VnD()), 2704 "cmplo p12.h, p6/z, z21.h, z10.d"); 2705 COMPARE(cmplo(p12.VnS(), p6.Zeroing(), z21.VnS(), z10.VnD()), 2706 "cmplo p12.s, p6/z, z21.s, z10.d"); 2707 COMPARE(cmpls(p8.VnB(), p4.Zeroing(), z9.VnB(), z15.VnD()), 2708 "cmpls p8.b, p4/z, z9.b, z15.d"); 2709 COMPARE(cmpls(p8.VnH(), p4.Zeroing(), z9.VnH(), z15.VnD()), 2710 "cmpls p8.h, p4/z, z9.h, z15.d"); 2711 COMPARE(cmpls(p8.VnS(), p4.Zeroing(), z9.VnS(), z15.VnD()), 2712 "cmpls p8.s, p4/z, z9.s, z15.d"); 2713 COMPARE(cmplt(p6.VnB(), p6.Zeroing(), z4.VnB(), z8.VnD()), 2714 "cmplt p6.b, p6/z, z4.b, z8.d"); 2715 COMPARE(cmplt(p6.VnH(), p6.Zeroing(), z4.VnH(), z8.VnD()), 2716 "cmplt p6.h, p6/z, z4.h, z8.d"); 2717 COMPARE(cmplt(p6.VnS(), p6.Zeroing(), z4.VnS(), z8.VnD()), 2718 "cmplt p6.s, p6/z, z4.s, z8.d"); 2719 COMPARE(cmpne(p1.VnB(), p6.Zeroing(), z31.VnB(), z16.VnD()), 2720 "cmpne p1.b, p6/z, z31.b, z16.d"); 2721 COMPARE(cmpne(p1.VnH(), p6.Zeroing(), z31.VnH(), z16.VnD()), 2722 "cmpne p1.h, p6/z, z31.h, z16.d"); 2723 COMPARE(cmpne(p1.VnS(), p6.Zeroing(), z31.VnS(), z16.VnD()), 2724 "cmpne p1.s, p6/z, z31.s, z16.d"); 2725 COMPARE(cmpne(p11.VnB(), p1.Zeroing(), z3.VnB(), z24.VnB()), 2726 "cmpne p11.b, p1/z, z3.b, z24.b"); 2727 COMPARE(cmpne(p11.VnH(), p1.Zeroing(), z3.VnH(), z24.VnH()), 2728 "cmpne p11.h, p1/z, z3.h, z24.h"); 2729 COMPARE(cmpne(p11.VnS(), p1.Zeroing(), z3.VnS(), z24.VnS()), 2730 "cmpne p11.s, p1/z, z3.s, z24.s"); 2731 COMPARE(cmpne(p11.VnD(), p1.Zeroing(), z3.VnD(), z24.VnD()), 2732 "cmpne p11.d, p1/z, z3.d, z24.d"); 2733 COMPARE(cmpls(p8.VnB(), p4.Zeroing(), z9.VnB(), z15.VnB()), 2734 "cmphs p8.b, p4/z, z15.b, z9.b"); 2735 COMPARE(cmpls(p8.VnH(), p4.Zeroing(), z9.VnH(), z15.VnH()), 2736 "cmphs p8.h, p4/z, z15.h, z9.h"); 2737 COMPARE(cmpls(p8.VnS(), p4.Zeroing(), z9.VnS(), z15.VnS()), 2738 "cmphs p8.s, p4/z, z15.s, z9.s"); 2739 COMPARE(cmpls(p8.VnD(), p4.Zeroing(), z9.VnD(), z15.VnD()), 2740 "cmphs p8.d, p4/z, z15.d, z9.d"); 2741 COMPARE(cmplo(p10.VnB(), p3.Zeroing(), z14.VnB(), z20.VnB()), 2742 "cmphi p10.b, p3/z, z20.b, z14.b"); 2743 COMPARE(cmplo(p10.VnH(), p3.Zeroing(), z14.VnH(), z20.VnH()), 2744 "cmphi p10.h, p3/z, z20.h, z14.h"); 2745 COMPARE(cmplo(p10.VnS(), p3.Zeroing(), z14.VnS(), z20.VnS()), 2746 "cmphi p10.s, p3/z, z20.s, z14.s"); 2747 COMPARE(cmplo(p10.VnD(), p3.Zeroing(), z14.VnD(), z20.VnD()), 2748 "cmphi p10.d, p3/z, z20.d, z14.d"); 2749 COMPARE(cmple(p12.VnB(), p2.Zeroing(), z19.VnB(), z25.VnB()), 2750 "cmpge p12.b, p2/z, z25.b, z19.b"); 2751 COMPARE(cmple(p12.VnH(), p2.Zeroing(), z19.VnH(), z25.VnH()), 2752 "cmpge p12.h, p2/z, z25.h, z19.h"); 2753 COMPARE(cmple(p12.VnS(), p2.Zeroing(), z19.VnS(), z25.VnS()), 2754 "cmpge p12.s, p2/z, z25.s, z19.s"); 2755 COMPARE(cmple(p12.VnD(), p2.Zeroing(), z19.VnD(), z25.VnD()), 2756 "cmpge p12.d, p2/z, z25.d, z19.d"); 2757 COMPARE(cmplt(p14.VnB(), p1.Zeroing(), z24.VnB(), z30.VnB()), 2758 "cmpgt p14.b, p1/z, z30.b, z24.b"); 2759 COMPARE(cmplt(p14.VnH(), p1.Zeroing(), z24.VnH(), z30.VnH()), 2760 "cmpgt p14.h, p1/z, z30.h, z24.h"); 2761 COMPARE(cmplt(p14.VnS(), p1.Zeroing(), z24.VnS(), z30.VnS()), 2762 "cmpgt p14.s, p1/z, z30.s, z24.s"); 2763 COMPARE(cmplt(p14.VnD(), p1.Zeroing(), z24.VnD(), z30.VnD()), 2764 "cmpgt p14.d, p1/z, z30.d, z24.d"); 2765 2766 CLEANUP(); 2767} 2768 2769TEST(sve_int_misc_unpredicated) { 2770 SETUP(); 2771 2772 COMPARE(fexpa(z5.VnH(), z9.VnH()), "fexpa z5.h, z9.h"); 2773 COMPARE(fexpa(z5.VnS(), z9.VnS()), "fexpa z5.s, z9.s"); 2774 COMPARE(fexpa(z5.VnD(), z9.VnD()), "fexpa z5.d, z9.d"); 2775 COMPARE(ftssel(z17.VnH(), z24.VnH(), z14.VnH()), 2776 "ftssel z17.h, z24.h, z14.h"); 2777 COMPARE(ftssel(z17.VnS(), z24.VnS(), z14.VnS()), 2778 "ftssel z17.s, z24.s, z14.s"); 2779 COMPARE(ftssel(z17.VnD(), z24.VnD(), z14.VnD()), 2780 "ftssel z17.d, z24.d, z14.d"); 2781 COMPARE(movprfx(z24, z1), "movprfx z24, z1"); 2782 2783 CLEANUP(); 2784} 2785 2786TEST(sve_int_mul_add_predicated) { 2787 SETUP(); 2788 2789 COMPARE(mad(z29.VnB(), p6.Merging(), z22.VnB(), z21.VnB()), 2790 "mad z29.b, p6/m, z22.b, z21.b"); 2791 COMPARE(mad(z29.VnH(), p6.Merging(), z22.VnH(), z21.VnH()), 2792 "mad z29.h, p6/m, z22.h, z21.h"); 2793 COMPARE(mad(z29.VnS(), p6.Merging(), z22.VnS(), z21.VnS()), 2794 "mad z29.s, p6/m, z22.s, z21.s"); 2795 COMPARE(mad(z29.VnD(), p6.Merging(), z22.VnD(), z21.VnD()), 2796 "mad z29.d, p6/m, z22.d, z21.d"); 2797 COMPARE(mla(z23.VnB(), p1.Merging(), z21.VnB(), z23.VnB()), 2798 "mla z23.b, p1/m, z21.b, z23.b"); 2799 COMPARE(mla(z23.VnH(), p1.Merging(), z21.VnH(), z23.VnH()), 2800 "mla z23.h, p1/m, z21.h, z23.h"); 2801 COMPARE(mla(z23.VnS(), p1.Merging(), z21.VnS(), z23.VnS()), 2802 "mla z23.s, p1/m, z21.s, z23.s"); 2803 COMPARE(mla(z23.VnD(), p1.Merging(), z21.VnD(), z23.VnD()), 2804 "mla z23.d, p1/m, z21.d, z23.d"); 2805 COMPARE(mls(z4.VnB(), p6.Merging(), z17.VnB(), z28.VnB()), 2806 "mls z4.b, p6/m, z17.b, z28.b"); 2807 COMPARE(mls(z4.VnH(), p6.Merging(), z17.VnH(), z28.VnH()), 2808 "mls z4.h, p6/m, z17.h, z28.h"); 2809 COMPARE(mls(z4.VnS(), p6.Merging(), z17.VnS(), z28.VnS()), 2810 "mls z4.s, p6/m, z17.s, z28.s"); 2811 COMPARE(mls(z4.VnD(), p6.Merging(), z17.VnD(), z28.VnD()), 2812 "mls z4.d, p6/m, z17.d, z28.d"); 2813 COMPARE(msb(z27.VnB(), p7.Merging(), z29.VnB(), z1.VnB()), 2814 "msb z27.b, p7/m, z29.b, z1.b"); 2815 COMPARE(msb(z27.VnH(), p7.Merging(), z29.VnH(), z1.VnH()), 2816 "msb z27.h, p7/m, z29.h, z1.h"); 2817 COMPARE(msb(z27.VnS(), p7.Merging(), z29.VnS(), z1.VnS()), 2818 "msb z27.s, p7/m, z29.s, z1.s"); 2819 COMPARE(msb(z27.VnD(), p7.Merging(), z29.VnD(), z1.VnD()), 2820 "msb z27.d, p7/m, z29.d, z1.d"); 2821 2822 CLEANUP(); 2823} 2824 2825TEST(sve_int_mul_add_predicated_macro) { 2826 SETUP(); 2827 2828 COMPARE_MACRO(Mla(z0.VnB(), p1.Merging(), z0.VnB(), z2.VnB(), z4.VnB()), 2829 "mla z0.b, p1/m, z2.b, z4.b"); 2830 COMPARE_MACRO(Mla(z3.VnH(), p2.Merging(), z4.VnH(), z3.VnH(), z5.VnH()), 2831 "mad z3.h, p2/m, z5.h, z4.h"); 2832 COMPARE_MACRO(Mla(z4.VnS(), p3.Merging(), z5.VnS(), z6.VnS(), z4.VnS()), 2833 "mad z4.s, p3/m, z6.s, z5.s"); 2834 COMPARE_MACRO(Mla(z5.VnD(), p4.Merging(), z6.VnD(), z7.VnD(), z8.VnD()), 2835 "movprfx z5.d, p4/m, z6.d\n" 2836 "mla z5.d, p4/m, z7.d, z8.d"); 2837 2838 COMPARE_MACRO(Mls(z0.VnD(), p1.Merging(), z0.VnD(), z2.VnD(), z4.VnD()), 2839 "mls z0.d, p1/m, z2.d, z4.d"); 2840 COMPARE_MACRO(Mls(z3.VnS(), p2.Merging(), z4.VnS(), z3.VnS(), z5.VnS()), 2841 "msb z3.s, p2/m, z5.s, z4.s"); 2842 COMPARE_MACRO(Mls(z4.VnH(), p3.Merging(), z5.VnH(), z6.VnH(), z4.VnH()), 2843 "msb z4.h, p3/m, z6.h, z5.h"); 2844 COMPARE_MACRO(Mls(z5.VnB(), p4.Merging(), z6.VnB(), z7.VnB(), z8.VnB()), 2845 "movprfx z5.b, p4/m, z6.b\n" 2846 "mls z5.b, p4/m, z7.b, z8.b"); 2847 2848 CLEANUP(); 2849} 2850 2851TEST(sve_int_mul_add_unpredicated) { 2852 SETUP(); 2853 2854 COMPARE(sdot(z13.VnS(), z12.VnB(), z12.VnB()), "sdot z13.s, z12.b, z12.b"); 2855 COMPARE(sdot(z18.VnD(), z27.VnH(), z22.VnH()), "sdot z18.d, z27.h, z22.h"); 2856 COMPARE(udot(z23.VnS(), z22.VnB(), z11.VnB()), "udot z23.s, z22.b, z11.b"); 2857 COMPARE(udot(z21.VnD(), z27.VnH(), z27.VnH()), "udot z21.d, z27.h, z27.h"); 2858 2859 CLEANUP(); 2860} 2861 2862TEST(sve_int_mul_add_unpredicated_macro) { 2863 SETUP(); 2864 2865 COMPARE_MACRO(Sdot(z0.VnS(), z0.VnS(), z2.VnB(), z4.VnB()), 2866 "sdot z0.s, z2.b, z4.b"); 2867 COMPARE_MACRO(Sdot(z3.VnD(), z4.VnD(), z3.VnH(), z5.VnH()), 2868 "movprfx z31, z4\n" 2869 "sdot z31.d, z3.h, z5.h\n" 2870 "mov z3.d, z31.d"); 2871 COMPARE_MACRO(Sdot(z4.VnS(), z5.VnS(), z6.VnB(), z4.VnB()), 2872 "movprfx z31, z5\n" 2873 "sdot z31.s, z6.b, z4.b\n" 2874 "mov z4.d, z31.d"); 2875 COMPARE_MACRO(Sdot(z6.VnD(), z7.VnD(), z8.VnH(), z9.VnH()), 2876 "movprfx z6, z7\n" 2877 "sdot z6.d, z8.h, z9.h"); 2878 COMPARE_MACRO(Sdot(z5.VnD(), z5.VnD(), z5.VnH(), z5.VnH()), 2879 "sdot z5.d, z5.h, z5.h"); 2880 2881 COMPARE_MACRO(Udot(z0.VnD(), z0.VnD(), z2.VnH(), z4.VnH()), 2882 "udot z0.d, z2.h, z4.h"); 2883 COMPARE_MACRO(Udot(z3.VnS(), z4.VnS(), z3.VnB(), z5.VnB()), 2884 "movprfx z31, z4\n" 2885 "udot z31.s, z3.b, z5.b\n" 2886 "mov z3.d, z31.d"); 2887 COMPARE_MACRO(Udot(z4.VnD(), z5.VnD(), z6.VnH(), z4.VnH()), 2888 "movprfx z31, z5\n" 2889 "udot z31.d, z6.h, z4.h\n" 2890 "mov z4.d, z31.d"); 2891 COMPARE_MACRO(Udot(z6.VnS(), z7.VnS(), z8.VnB(), z9.VnB()), 2892 "movprfx z6, z7\n" 2893 "udot z6.s, z8.b, z9.b"); 2894 COMPARE_MACRO(Udot(z5.VnS(), z5.VnS(), z5.VnB(), z5.VnB()), 2895 "udot z5.s, z5.b, z5.b"); 2896 CLEANUP(); 2897} 2898 2899TEST(sve_int_reduction) { 2900 SETUP(); 2901 2902 COMPARE(andv(b15, p1, z4.VnB()), "andv b15, p1, z4.b"); 2903 COMPARE(andv(h14, p2, z3.VnH()), "andv h14, p2, z3.h"); 2904 COMPARE(andv(s13, p3, z2.VnS()), "andv s13, p3, z2.s"); 2905 COMPARE(andv(d12, p4, z1.VnD()), "andv d12, p4, z1.d"); 2906 COMPARE(eorv(b12, p0, z30.VnB()), "eorv b12, p0, z30.b"); 2907 COMPARE(eorv(h11, p1, z29.VnH()), "eorv h11, p1, z29.h"); 2908 COMPARE(eorv(s10, p2, z28.VnS()), "eorv s10, p2, z28.s"); 2909 COMPARE(eorv(d9, p3, z27.VnD()), "eorv d9, p3, z27.d"); 2910 COMPARE(movprfx(z30.VnB(), p2.Zeroing(), z23.VnB()), 2911 "movprfx z30.b, p2/z, z23.b"); 2912 COMPARE(movprfx(z10.VnH(), p0.Merging(), z10.VnH()), 2913 "movprfx z10.h, p0/m, z10.h"); 2914 COMPARE(movprfx(z0.VnS(), p2.Zeroing(), z23.VnS()), 2915 "movprfx z0.s, p2/z, z23.s"); 2916 COMPARE(movprfx(z31.VnD(), p7.Merging(), z23.VnD()), 2917 "movprfx z31.d, p7/m, z23.d"); 2918 COMPARE(orv(b4, p0, z16.VnB()), "orv b4, p0, z16.b"); 2919 COMPARE(orv(h6, p2, z18.VnH()), "orv h6, p2, z18.h"); 2920 COMPARE(orv(s8, p4, z20.VnS()), "orv s8, p4, z20.s"); 2921 COMPARE(orv(d10, p6, z22.VnD()), "orv d10, p6, z22.d"); 2922 COMPARE(saddv(d20, p1, z12.VnB()), "saddv d20, p1, z12.b"); 2923 COMPARE(saddv(d22, p3, z15.VnH()), "saddv d22, p3, z15.h"); 2924 COMPARE(saddv(d24, p5, z18.VnS()), "saddv d24, p5, z18.s"); 2925 COMPARE(smaxv(b9, p3, z1.VnB()), "smaxv b9, p3, z1.b"); 2926 COMPARE(smaxv(h19, p2, z1.VnH()), "smaxv h19, p2, z1.h"); 2927 COMPARE(smaxv(s29, p1, z1.VnS()), "smaxv s29, p1, z1.s"); 2928 COMPARE(smaxv(d9, p0, z1.VnD()), "smaxv d9, p0, z1.d"); 2929 COMPARE(sminv(b8, p3, z14.VnB()), "sminv b8, p3, z14.b"); 2930 COMPARE(sminv(h18, p2, z4.VnH()), "sminv h18, p2, z4.h"); 2931 COMPARE(sminv(s28, p1, z4.VnS()), "sminv s28, p1, z4.s"); 2932 COMPARE(sminv(d8, p0, z24.VnD()), "sminv d8, p0, z24.d"); 2933 COMPARE(uaddv(d13, p0, z15.VnB()), "uaddv d13, p0, z15.b"); 2934 COMPARE(uaddv(d15, p2, z20.VnH()), "uaddv d15, p2, z20.h"); 2935 COMPARE(uaddv(d17, p4, z25.VnS()), "uaddv d17, p4, z25.s"); 2936 COMPARE(uaddv(d19, p6, z30.VnD()), "uaddv d19, p6, z30.d"); 2937 COMPARE(umaxv(b28, p3, z4.VnB()), "umaxv b28, p3, z4.b"); 2938 COMPARE(umaxv(h18, p6, z2.VnH()), "umaxv h18, p6, z2.h"); 2939 COMPARE(umaxv(s18, p4, z29.VnS()), "umaxv s18, p4, z29.s"); 2940 COMPARE(umaxv(d28, p1, z24.VnD()), "umaxv d28, p1, z24.d"); 2941 COMPARE(uminv(b16, p3, z0.VnB()), "uminv b16, p3, z0.b"); 2942 COMPARE(uminv(h16, p2, z3.VnH()), "uminv h16, p2, z3.h"); 2943 COMPARE(uminv(s16, p1, z5.VnS()), "uminv s16, p1, z5.s"); 2944 COMPARE(uminv(d16, p7, z7.VnD()), "uminv d16, p7, z7.d"); 2945 2946 CLEANUP(); 2947} 2948 2949TEST(sve_int_unary_arithmetic_predicated) { 2950 SETUP(); 2951 2952 COMPARE(abs(z5.VnB(), p5.Merging(), z31.VnB()), "abs z5.b, p5/m, z31.b"); 2953 COMPARE(abs(z29.VnH(), p5.Merging(), z17.VnH()), "abs z29.h, p5/m, z17.h"); 2954 COMPARE(abs(z6.VnS(), p4.Merging(), z24.VnS()), "abs z6.s, p4/m, z24.s"); 2955 COMPARE(abs(z19.VnD(), p3.Merging(), z25.VnD()), "abs z19.d, p3/m, z25.d"); 2956 COMPARE(cls(z4.VnB(), p0.Merging(), z20.VnB()), "cls z4.b, p0/m, z20.b"); 2957 COMPARE(cls(z11.VnH(), p0.Merging(), z26.VnH()), "cls z11.h, p0/m, z26.h"); 2958 COMPARE(cls(z10.VnS(), p1.Merging(), z10.VnS()), "cls z10.s, p1/m, z10.s"); 2959 COMPARE(cls(z5.VnD(), p1.Merging(), z4.VnD()), "cls z5.d, p1/m, z4.d"); 2960 COMPARE(clz(z18.VnB(), p3.Merging(), z1.VnB()), "clz z18.b, p3/m, z1.b"); 2961 COMPARE(clz(z13.VnH(), p4.Merging(), z18.VnH()), "clz z13.h, p4/m, z18.h"); 2962 COMPARE(clz(z15.VnS(), p4.Merging(), z24.VnS()), "clz z15.s, p4/m, z24.s"); 2963 COMPARE(clz(z29.VnD(), p2.Merging(), z22.VnD()), "clz z29.d, p2/m, z22.d"); 2964 COMPARE(cnot(z16.VnB(), p6.Merging(), z20.VnB()), "cnot z16.b, p6/m, z20.b"); 2965 COMPARE(cnot(z10.VnH(), p5.Merging(), z12.VnH()), "cnot z10.h, p5/m, z12.h"); 2966 COMPARE(cnot(z8.VnS(), p5.Merging(), z21.VnS()), "cnot z8.s, p5/m, z21.s"); 2967 COMPARE(cnot(z3.VnD(), p3.Merging(), z18.VnD()), "cnot z3.d, p3/m, z18.d"); 2968 COMPARE(cnt(z29.VnB(), p3.Merging(), z7.VnB()), "cnt z29.b, p3/m, z7.b"); 2969 COMPARE(cnt(z3.VnH(), p6.Merging(), z31.VnH()), "cnt z3.h, p6/m, z31.h"); 2970 COMPARE(cnt(z2.VnS(), p4.Merging(), z16.VnS()), "cnt z2.s, p4/m, z16.s"); 2971 COMPARE(cnt(z0.VnD(), p0.Merging(), z24.VnD()), "cnt z0.d, p0/m, z24.d"); 2972 COMPARE(fabs(z17.VnH(), p7.Merging(), z15.VnH()), "fabs z17.h, p7/m, z15.h"); 2973 COMPARE(fabs(z18.VnS(), p0.Merging(), z29.VnS()), "fabs z18.s, p0/m, z29.s"); 2974 COMPARE(fabs(z17.VnD(), p1.Merging(), z9.VnD()), "fabs z17.d, p1/m, z9.d"); 2975 COMPARE(fneg(z25.VnH(), p1.Merging(), z28.VnH()), "fneg z25.h, p1/m, z28.h"); 2976 COMPARE(fneg(z5.VnS(), p1.Merging(), z25.VnS()), "fneg z5.s, p1/m, z25.s"); 2977 COMPARE(fneg(z6.VnD(), p1.Merging(), z17.VnD()), "fneg z6.d, p1/m, z17.d"); 2978 COMPARE(neg(z25.VnB(), p4.Merging(), z8.VnB()), "neg z25.b, p4/m, z8.b"); 2979 COMPARE(neg(z30.VnH(), p3.Merging(), z23.VnH()), "neg z30.h, p3/m, z23.h"); 2980 COMPARE(neg(z7.VnS(), p2.Merging(), z26.VnS()), "neg z7.s, p2/m, z26.s"); 2981 COMPARE(neg(z21.VnD(), p3.Merging(), z5.VnD()), "neg z21.d, p3/m, z5.d"); 2982 COMPARE(not_(z24.VnB(), p1.Merging(), z27.VnB()), "not z24.b, p1/m, z27.b"); 2983 COMPARE(not_(z31.VnH(), p6.Merging(), z19.VnH()), "not z31.h, p6/m, z19.h"); 2984 COMPARE(not_(z18.VnS(), p5.Merging(), z13.VnS()), "not z18.s, p5/m, z13.s"); 2985 COMPARE(not_(z12.VnD(), p2.Merging(), z28.VnD()), "not z12.d, p2/m, z28.d"); 2986 COMPARE(sxtb(z19.VnH(), p7.Merging(), z3.VnH()), "sxtb z19.h, p7/m, z3.h"); 2987 COMPARE(sxtb(z3.VnS(), p1.Merging(), z17.VnS()), "sxtb z3.s, p1/m, z17.s"); 2988 COMPARE(sxtb(z27.VnD(), p0.Merging(), z12.VnD()), "sxtb z27.d, p0/m, z12.d"); 2989 COMPARE(sxth(z6.VnS(), p1.Merging(), z17.VnS()), "sxth z6.s, p1/m, z17.s"); 2990 COMPARE(sxth(z8.VnD(), p6.Merging(), z2.VnD()), "sxth z8.d, p6/m, z2.d"); 2991 COMPARE(sxtw(z13.VnD(), p3.Merging(), z27.VnD()), "sxtw z13.d, p3/m, z27.d"); 2992 COMPARE(uxtb(z23.VnH(), p3.Merging(), z21.VnH()), "uxtb z23.h, p3/m, z21.h"); 2993 COMPARE(uxtb(z0.VnS(), p2.Merging(), z13.VnS()), "uxtb z0.s, p2/m, z13.s"); 2994 COMPARE(uxtb(z1.VnD(), p3.Merging(), z13.VnD()), "uxtb z1.d, p3/m, z13.d"); 2995 COMPARE(uxth(z27.VnS(), p0.Merging(), z29.VnS()), "uxth z27.s, p0/m, z29.s"); 2996 COMPARE(uxth(z22.VnD(), p4.Merging(), z20.VnD()), "uxth z22.d, p4/m, z20.d"); 2997 COMPARE(uxtw(z14.VnD(), p1.Merging(), z13.VnD()), "uxtw z14.d, p1/m, z13.d"); 2998 2999 // Check related but undefined encodings. 3000 COMPARE(dci(0x0410a000), "unallocated (Unallocated)"); // sxtb b 3001 COMPARE(dci(0x0412a000), "unallocated (Unallocated)"); // sxth b 3002 COMPARE(dci(0x0452a000), "unallocated (Unallocated)"); // sxth h 3003 COMPARE(dci(0x0414a000), "unallocated (Unallocated)"); // sxtw b 3004 COMPARE(dci(0x0454a000), "unallocated (Unallocated)"); // sxtw h 3005 COMPARE(dci(0x0494a000), "unallocated (Unallocated)"); // sxtw s 3006 3007 COMPARE(dci(0x0411a000), "unallocated (Unallocated)"); // uxtb b 3008 COMPARE(dci(0x0413a000), "unallocated (Unallocated)"); // uxth b 3009 COMPARE(dci(0x0453a000), "unallocated (Unallocated)"); // uxth h 3010 COMPARE(dci(0x0415a000), "unallocated (Unallocated)"); // uxtw b 3011 COMPARE(dci(0x0455a000), "unallocated (Unallocated)"); // uxtw h 3012 COMPARE(dci(0x0495a000), "unallocated (Unallocated)"); // uxtw s 3013 3014 COMPARE(dci(0x041ca000), "unallocated (Unallocated)"); // fabs b 3015 COMPARE(dci(0x041da000), "unallocated (Unallocated)"); // fneg b 3016 3017 CLEANUP(); 3018} 3019 3020TEST(sve_neg_macro) { 3021 SETUP(); 3022 3023 COMPARE_MACRO(Neg(z0.VnB(), z0.VnB()), "subr z0.b, z0.b, #0"); 3024 COMPARE_MACRO(Neg(z1.VnH(), z2.VnH()), 3025 "movprfx z1, z2\n" 3026 "subr z1.h, z1.h, #0"); 3027 COMPARE_MACRO(Neg(z29.VnS(), z29.VnS()), "subr z29.s, z29.s, #0"); 3028 COMPARE_MACRO(Neg(z30.VnD(), z31.VnD()), 3029 "movprfx z30, z31\n" 3030 "subr z30.d, z30.d, #0"); 3031 3032 CLEANUP(); 3033} 3034 3035TEST(sve_cpy_fcpy_imm) { 3036 SETUP(); 3037 3038 COMPARE(cpy(z25.VnB(), p13.Zeroing(), -1), "mov z25.b, p13/z, #-1"); 3039 COMPARE(cpy(z25.VnB(), p13.Merging(), -1), "mov z25.b, p13/m, #-1"); 3040 COMPARE(cpy(z25.VnH(), p13.Merging(), 127), "mov z25.h, p13/m, #127"); 3041 COMPARE(cpy(z25.VnS(), p13.Merging(), 10752), 3042 "mov z25.s, p13/m, #42, lsl #8"); 3043 COMPARE(cpy(z25.VnD(), p13.Merging(), -10752), 3044 "mov z25.d, p13/m, #-42, lsl #8"); 3045 COMPARE(mov(z25.VnD(), p13.Merging(), -10752), 3046 "mov z25.d, p13/m, #-42, lsl #8"); 3047 3048 COMPARE(fcpy(z20.VnH(), p11.Merging(), 29.0), 3049 "fmov z20.h, p11/m, #0x3d (29.0000)"); 3050 COMPARE(fmov(z20.VnS(), p11.Merging(), -31.0), 3051 "fmov z20.s, p11/m, #0xbf (-31.0000)"); 3052 COMPARE(fcpy(z20.VnD(), p11.Merging(), 1.0), 3053 "fmov z20.d, p11/m, #0x70 (1.0000)"); 3054 3055 CLEANUP(); 3056} 3057 3058TEST(sve_fmov_zero) { 3059 SETUP(); 3060 3061 // Predicated `fmov` is an alias for either `fcpy` or `cpy`. 3062 COMPARE(fmov(z13.VnS(), p0.Merging(), 1.0), 3063 "fmov z13.s, p0/m, #0x70 (1.0000)"); 3064 COMPARE(fmov(z13.VnS(), p0.Merging(), 0.0), "mov z13.s, p0/m, #0"); 3065 COMPARE_MACRO(Fmov(z13.VnD(), p0.Merging(), 1.0), 3066 "fmov z13.d, p0/m, #0x70 (1.0000)"); 3067 COMPARE_MACRO(Fmov(z13.VnD(), p0.Merging(), 0.0), "mov z13.d, p0/m, #0"); 3068 3069 // Unpredicated `fmov` is an alias for either `fdup` or `dup`. 3070 COMPARE(fmov(z13.VnS(), 1.0), "fmov z13.s, #0x70 (1.0000)"); 3071 COMPARE(fmov(z13.VnS(), 0.0), "mov z13.s, #0"); 3072 COMPARE_MACRO(Fmov(z13.VnD(), 1.0), "fmov z13.d, #0x70 (1.0000)"); 3073 COMPARE_MACRO(Fmov(z13.VnD(), 0.0), "mov z13.d, #0"); 3074 3075 // -0.0 cannot be encoded by this alias, but is handled by the MacroAssembler. 3076 COMPARE_MACRO(Fmov(z13.VnD(), p0.Merging(), -0.0), 3077 "mov x16, #0x8000000000000000\n" 3078 "mov z13.d, p0/m, x16"); 3079 COMPARE_MACRO(Fmov(z13.VnD(), -0.0), "mov z13.d, #0x8000000000000000"); 3080 3081 CLEANUP(); 3082} 3083 3084TEST(sve_int_wide_imm_unpredicated) { 3085 SETUP(); 3086 3087 COMPARE(add(z12.VnB(), z12.VnB(), 0), "add z12.b, z12.b, #0"); 3088 COMPARE(add(z13.VnH(), z13.VnH(), 255), "add z13.h, z13.h, #255"); 3089 COMPARE(add(z14.VnS(), z14.VnS(), 256), "add z14.s, z14.s, #1, lsl #8"); 3090 COMPARE(add(z15.VnD(), z15.VnD(), 255 * 256), 3091 "add z15.d, z15.d, #255, lsl #8"); 3092 3093 COMPARE(dup(z6.VnB(), -128), "mov z6.b, #-128"); 3094 COMPARE(dup(z7.VnH(), 127), "mov z7.h, #127"); 3095 COMPARE(dup(z8.VnS(), -128 * 256), "mov z8.s, #-128, lsl #8"); 3096 COMPARE(dup(z9.VnD(), 127 * 256), "mov z9.d, #127, lsl #8"); 3097 COMPARE(mov(z8.VnS(), -128 * 256, -1), "mov z8.s, #-128, lsl #8"); 3098 COMPARE(mov(z9.VnD(), 127 * 256, -1), "mov z9.d, #127, lsl #8"); 3099 3100 COMPARE(sqadd(z7.VnB(), z7.VnB(), 124), "sqadd z7.b, z7.b, #124"); 3101 COMPARE(sqadd(z8.VnH(), z8.VnH(), 131), "sqadd z8.h, z8.h, #131"); 3102 COMPARE(sqadd(z9.VnS(), z9.VnS(), 252 * 256), 3103 "sqadd z9.s, z9.s, #252, lsl #8"); 3104 COMPARE(sqadd(z10.VnD(), z10.VnD(), 20 * 256), 3105 "sqadd z10.d, z10.d, #20, lsl #8"); 3106 3107 COMPARE(sqsub(z31.VnB(), z31.VnB(), 132), "sqsub z31.b, z31.b, #132"); 3108 COMPARE(sqsub(z30.VnH(), z30.VnH(), 251), "sqsub z30.h, z30.h, #251"); 3109 COMPARE(sqsub(z29.VnS(), z29.VnS(), 21 * 256), 3110 "sqsub z29.s, z29.s, #21, lsl #8"); 3111 COMPARE(sqsub(z28.VnD(), z28.VnD(), 123 * 256), 3112 "sqsub z28.d, z28.d, #123, lsl #8"); 3113 3114 COMPARE(subr(z20.VnB(), z20.VnB(), 250), "subr z20.b, z20.b, #250"); 3115 COMPARE(subr(z21.VnH(), z21.VnH(), 22), "subr z21.h, z21.h, #22"); 3116 COMPARE(subr(z22.VnS(), z22.VnS(), 122 * 256), 3117 "subr z22.s, z22.s, #122, lsl #8"); 3118 COMPARE(subr(z23.VnD(), z23.VnD(), 133 * 256), 3119 "subr z23.d, z23.d, #133, lsl #8"); 3120 3121 COMPARE(sub(z18.VnB(), z18.VnB(), 23), "sub z18.b, z18.b, #23"); 3122 COMPARE(sub(z19.VnH(), z19.VnH(), 121), "sub z19.h, z19.h, #121"); 3123 COMPARE(sub(z20.VnS(), z20.VnS(), 134 * 256), 3124 "sub z20.s, z20.s, #134, lsl #8"); 3125 COMPARE(sub(z21.VnD(), z21.VnD(), 249 * 256), 3126 "sub z21.d, z21.d, #249, lsl #8"); 3127 3128 COMPARE(uqadd(z21.VnB(), z21.VnB(), 246), "uqadd z21.b, z21.b, #246"); 3129 COMPARE(uqadd(z22.VnH(), z22.VnH(), 26), "uqadd z22.h, z22.h, #26"); 3130 COMPARE(uqadd(z23.VnS(), z23.VnS(), 118 * 256), 3131 "uqadd z23.s, z23.s, #118, lsl #8"); 3132 COMPARE(uqadd(z24.VnD(), z24.VnD(), 137 * 256), 3133 "uqadd z24.d, z24.d, #137, lsl #8"); 3134 3135 COMPARE(uqsub(z10.VnB(), z10.VnB(), 27), "uqsub z10.b, z10.b, #27"); 3136 COMPARE(uqsub(z11.VnH(), z11.VnH(), 117), "uqsub z11.h, z11.h, #117"); 3137 COMPARE(uqsub(z12.VnS(), z12.VnS(), 138 * 256), 3138 "uqsub z12.s, z12.s, #138, lsl #8"); 3139 COMPARE(uqsub(z13.VnD(), z13.VnD(), 245 * 256), 3140 "uqsub z13.d, z13.d, #245, lsl #8"); 3141 3142 COMPARE(fdup(z26.VnH(), Float16(-5.0f)), "fmov z26.h, #0x94 (-5.0000)"); 3143 COMPARE(fdup(z27.VnS(), -13.0f), "fmov z27.s, #0xaa (-13.0000)"); 3144 COMPARE(fdup(z28.VnD(), 1.0f), "fmov z28.d, #0x70 (1.0000)"); 3145 COMPARE(fmov(z28.VnD(), 1.0f), "fmov z28.d, #0x70 (1.0000)"); 3146 3147 COMPARE(mul(z15.VnB(), z15.VnB(), -128), "mul z15.b, z15.b, #-128"); 3148 COMPARE(mul(z16.VnH(), z16.VnH(), -1), "mul z16.h, z16.h, #-1"); 3149 COMPARE(mul(z17.VnS(), z17.VnS(), 17), "mul z17.s, z17.s, #17"); 3150 COMPARE(mul(z18.VnD(), z18.VnD(), 127), "mul z18.d, z18.d, #127"); 3151 3152 COMPARE(smax(z7.VnB(), z7.VnB(), -2), "smax z7.b, z7.b, #-2"); 3153 COMPARE(smax(z8.VnH(), z8.VnH(), 18), "smax z8.h, z8.h, #18"); 3154 COMPARE(smax(z9.VnS(), z9.VnS(), 126), "smax z9.s, z9.s, #126"); 3155 COMPARE(smax(z10.VnD(), z10.VnD(), -127), "smax z10.d, z10.d, #-127"); 3156 3157 COMPARE(smin(z5.VnB(), z5.VnB(), 19), "smin z5.b, z5.b, #19"); 3158 COMPARE(smin(z6.VnH(), z6.VnH(), 125), "smin z6.h, z6.h, #125"); 3159 COMPARE(smin(z7.VnS(), z7.VnS(), -126), "smin z7.s, z7.s, #-126"); 3160 COMPARE(smin(z8.VnD(), z8.VnD(), -3), "smin z8.d, z8.d, #-3"); 3161 3162 COMPARE(umax(z15.VnB(), z15.VnB(), 120), "umax z15.b, z15.b, #120"); 3163 COMPARE(umax(z16.VnH(), z16.VnH(), 135), "umax z16.h, z16.h, #135"); 3164 COMPARE(umax(z17.VnS(), z17.VnS(), 248), "umax z17.s, z17.s, #248"); 3165 COMPARE(umax(z18.VnD(), z18.VnD(), 24), "umax z18.d, z18.d, #24"); 3166 3167 COMPARE(umin(z22.VnB(), z22.VnB(), 136), "umin z22.b, z22.b, #136"); 3168 COMPARE(umin(z23.VnH(), z23.VnH(), 247), "umin z23.h, z23.h, #247"); 3169 COMPARE(umin(z24.VnS(), z24.VnS(), 25), "umin z24.s, z24.s, #25"); 3170 COMPARE(umin(z25.VnD(), z25.VnD(), 119), "umin z25.d, z25.d, #119"); 3171 3172 CLEANUP(); 3173} 3174 3175TEST(sve_add_sub_imm_macro) { 3176 SETUP(); 3177 3178 // The MacroAssembler automatically generates movprfx where necessary. 3179 COMPARE_MACRO(Add(z12.VnB(), z13.VnB(), 0), 3180 "movprfx z12, z13\n" 3181 "add z12.b, z12.b, #0"); 3182 COMPARE_MACRO(Sub(z20.VnB(), 250, z2.VnB()), 3183 "movprfx z20, z2\n" 3184 "subr z20.b, z20.b, #250"); 3185 COMPARE_MACRO(Sub(z19.VnH(), z4.VnH(), 121), 3186 "movprfx z19, z4\n" 3187 "sub z19.h, z19.h, #121"); 3188 3189 // Add and Sub can make use of two's complement equivalences. 3190 COMPARE_MACRO(Add(z13.VnH(), z13.VnH(), 0xffff), "sub z13.h, z13.h, #1"); 3191 COMPARE_MACRO(Add(z15.VnD(), z15.VnD(), 0xffffffffffffffd6), 3192 "sub z15.d, z15.d, #42"); 3193 COMPARE_MACRO(Add(z16.VnH(), z16.VnH(), 0xff00), 3194 "add z16.h, z16.h, #255, lsl #8"); 3195 COMPARE_MACRO(Sub(z17.VnH(), z17.VnH(), 0xfffe), "add z17.h, z17.h, #2"); 3196 COMPARE_MACRO(Sub(z14.VnB(), z14.VnB(), 0x80), "sub z14.b, z14.b, #128"); 3197 3198 // The MacroAssembler automatically generates dup if an immediate isn't 3199 // encodable. 3200 COMPARE_MACRO(Add(z15.VnD(), z20.VnD(), 1234567890), 3201 "mov x16, #0x2d2\n" 3202 "movk x16, #0x4996, lsl #16\n" 3203 "mov z31.d, x16\n" 3204 "add z15.d, z20.d, z31.d"); 3205 COMPARE_MACRO(Sub(z22.VnS(), 256 * 256, z2.VnS()), 3206 "mov z31.s, #0x10000\n" 3207 "sub z22.s, z31.s, z2.s"); 3208 COMPARE_MACRO(Sub(z21.VnD(), z11.VnD(), 111111111111), 3209 "mov x16, #0x1c7\n" 3210 "movk x16, #0xdebd, lsl #16\n" 3211 "movk x16, #0x19, lsl #32\n" 3212 "mov z31.d, x16\n" 3213 "sub z21.d, z11.d, z31.d"); 3214 3215 CLEANUP(); 3216} 3217 3218TEST(sve_uqadd_uqsub_imm_macro) { 3219 SETUP(); 3220 3221 // The MacroAssembler automatically generates movprfx where necessary. 3222 COMPARE_MACRO(Uqadd(z21.VnB(), z14.VnB(), 246), 3223 "movprfx z21, z14\n" 3224 "uqadd z21.b, z21.b, #246"); 3225 COMPARE_MACRO(Uqsub(z10.VnB(), z27.VnB(), 27), 3226 "movprfx z10, z27\n" 3227 "uqsub z10.b, z10.b, #27"); 3228 COMPARE_MACRO(Uqadd(z1.VnS(), z2.VnS(), 42 * 256), 3229 "movprfx z1, z2\n" 3230 "uqadd z1.s, z1.s, #42, lsl #8"); 3231 3232 COMPARE_MACRO(Uqsub(z3.VnB(), z3.VnB(), 0xff), "uqsub z3.b, z3.b, #255"); 3233 COMPARE_MACRO(Uqadd(z8.VnS(), z8.VnS(), 0xff00), 3234 "uqadd z8.s, z8.s, #255, lsl #8"); 3235 3236 CLEANUP(); 3237} 3238 3239TEST(sve_sqadd_sqsub_imm_macro) { 3240 SETUP(); 3241 3242 // The MacroAssembler automatically generates movprfx where necessary. 3243 COMPARE_MACRO(Sqadd(z21.VnB(), z14.VnB(), 123), 3244 "movprfx z21, z14\n" 3245 "sqadd z21.b, z21.b, #123"); 3246 COMPARE_MACRO(Sqsub(z10.VnB(), z27.VnB(), 27), 3247 "movprfx z10, z27\n" 3248 "sqsub z10.b, z10.b, #27"); 3249 COMPARE_MACRO(Sqadd(z22.VnS(), z15.VnS(), 256), 3250 "movprfx z22, z15\n" 3251 "sqadd z22.s, z22.s, #1, lsl #8"); 3252 3253 COMPARE_MACRO(Sqsub(z3.VnB(), z3.VnB(), 0xff), "sqsub z3.b, z3.b, #255"); 3254 COMPARE_MACRO(Sqadd(z4.VnH(), z4.VnH(), 0xff00), 3255 "sqadd z4.h, z4.h, #255, lsl #8"); 3256 3257 CLEANUP(); 3258} 3259 3260TEST(sve_int_wide_imm_unpredicated_macro) { 3261 SETUP(); 3262 3263 // The MacroAssembler automatically generates movprfx where it can. 3264 COMPARE_MACRO(Mul(z1.VnD(), z18.VnD(), 127), 3265 "movprfx z1, z18\n" 3266 "mul z1.d, z1.d, #127"); 3267 COMPARE_MACRO(Smax(z3.VnS(), z9.VnS(), 126), 3268 "movprfx z3, z9\n" 3269 "smax z3.s, z3.s, #126"); 3270 COMPARE_MACRO(Smin(z26.VnH(), z6.VnH(), 125), 3271 "movprfx z26, z6\n" 3272 "smin z26.h, z26.h, #125"); 3273 COMPARE_MACRO(Umax(z25.VnB(), z15.VnB(), 120), 3274 "movprfx z25, z15\n" 3275 "umax z25.b, z25.b, #120"); 3276 COMPARE_MACRO(Umin(z13.VnD(), z25.VnD(), 119), 3277 "movprfx z13, z25\n" 3278 "umin z13.d, z13.d, #119"); 3279 COMPARE_MACRO(Dup(z8.VnS(), -7654321), 3280 "mov w16, #0x344f\n" 3281 "movk w16, #0xff8b, lsl #16\n" 3282 "mov z8.s, w16"); 3283 3284 // The MacroAssembler automatically generates dup if an immediate isn't 3285 // encodable, when it is out-of-range for example. 3286 COMPARE_MACRO(Dup(z9.VnD(), 0x80000000), "mov z9.d, #0x80000000"); 3287 COMPARE_MACRO(Mov(z9.VnD(), 0x80000000), "mov z9.d, #0x80000000"); 3288 COMPARE_MACRO(Fdup(z26.VnH(), Float16(0.0)), "mov z26.h, #0"); 3289 COMPARE_MACRO(Fdup(z26.VnH(), Float16(0.0)), "mov z26.h, #0"); 3290 COMPARE_MACRO(Fdup(z27.VnS(), 255.0f), 3291 "mov w16, #0x437f0000\n" 3292 "mov z27.s, w16"); 3293 COMPARE_MACRO(Fdup(z28.VnD(), 12.3456), 3294 "mov x16, #0xfec5\n" 3295 "movk x16, #0x7bb2, lsl #16\n" 3296 "movk x16, #0xb0f2, lsl #32\n" 3297 "movk x16, #0x4028, lsl #48\n" 3298 "mov z28.d, x16"); 3299 COMPARE_MACRO(Fmov(z26.VnH(), Float16(0.0)), "mov z26.h, #0"); 3300 COMPARE_MACRO(Fmov(z26.VnH(), Float16(0.0)), "mov z26.h, #0"); 3301 COMPARE_MACRO(Fmov(z27.VnS(), 255.0f), 3302 "mov w16, #0x437f0000\n" 3303 "mov z27.s, w16"); 3304 COMPARE_MACRO(Fmov(z28.VnD(), 12.3456), 3305 "mov x16, #0xfec5\n" 3306 "movk x16, #0x7bb2, lsl #16\n" 3307 "movk x16, #0xb0f2, lsl #32\n" 3308 "movk x16, #0x4028, lsl #48\n" 3309 "mov z28.d, x16"); 3310 3311 // Only predicated version of instruction is supported for unencodable 3312 // immediate. 3313 { 3314 UseScratchRegisterScope temps(&masm); 3315 temps.Include(p7, p15); 3316 COMPARE_MACRO(Mul(z18.VnD(), z18.VnD(), -1270000000), 3317 "ptrue p7.d\n" 3318 "mov x16, #0xffffffffffff5680\n" 3319 "movk x16, #0xb44d, lsl #16\n" 3320 "mov z31.d, x16\n" 3321 "mul z18.d, p7/m, z18.d, z31.d"); 3322 COMPARE_MACRO(Smax(z9.VnS(), z11.VnS(), -0x70000001), 3323 "ptrue p7.s\n" 3324 "mov z9.s, #0x8fffffff\n" 3325 "smax z9.s, p7/m, z9.s, z11.s"); 3326 COMPARE_MACRO(Smin(z6.VnH(), z6.VnH(), -0x7eef), 3327 "ptrue p7.h\n" 3328 "mov w16, #0xffff8111\n" 3329 "mov z31.h, w16\n" 3330 "smin z6.h, p7/m, z6.h, z31.h"); 3331 COMPARE_MACRO(Umax(z15.VnH(), z7.VnH(), 0xfeee), 3332 "ptrue p7.h\n" 3333 "mov w16, #0xfeee\n" 3334 "mov z15.h, w16\n" 3335 "umax z15.h, p7/m, z15.h, z7.h"); 3336 COMPARE_MACRO(Umin(z25.VnD(), z25.VnD(), 123123123), 3337 "ptrue p7.d\n" 3338 "mov x16, #0xb5b3\n" 3339 "movk x16, #0x756, lsl #16\n" 3340 "mov z31.d, x16\n" 3341 "umin z25.d, p7/m, z25.d, z31.d"); 3342 } 3343} 3344 3345TEST(sve_mem_32bit_gather_vector_plus_immediate_macro) { 3346 SETUP(); 3347 3348 // Simple cases. 3349 COMPARE_MACRO(Ld1b(z4.VnS(), p4.Zeroing(), SVEMemOperand(z12.VnS(), 31)), 3350 "ld1b {z4.s}, p4/z, [z12.s, #31]"); 3351 COMPARE_MACRO(Ld1h(z10.VnS(), p6.Zeroing(), SVEMemOperand(z4.VnS(), 10)), 3352 "ld1h {z10.s}, p6/z, [z4.s, #10]"); 3353 COMPARE_MACRO(Ld1w(z16.VnS(), p0.Zeroing(), SVEMemOperand(z26.VnS(), 124)), 3354 "ld1w {z16.s}, p0/z, [z26.s, #124]"); 3355 COMPARE_MACRO(Ld1sb(z9.VnS(), p3.Zeroing(), SVEMemOperand(z22.VnS())), 3356 "ld1sb {z9.s}, p3/z, [z22.s]"); 3357 COMPARE_MACRO(Ld1sh(z22.VnS(), p1.Zeroing(), SVEMemOperand(z9.VnS(), 62)), 3358 "ld1sh {z22.s}, p1/z, [z9.s, #62]"); 3359 COMPARE_MACRO(Ldff1b(z17.VnS(), p2.Zeroing(), SVEMemOperand(z29.VnS(), 0)), 3360 "ldff1b {z17.s}, p2/z, [z29.s]"); 3361 COMPARE_MACRO(Ldff1h(z16.VnS(), p3.Zeroing(), SVEMemOperand(z15.VnS())), 3362 "ldff1h {z16.s}, p3/z, [z15.s]"); 3363 COMPARE_MACRO(Ldff1w(z7.VnS(), p3.Zeroing(), SVEMemOperand(z20.VnS(), 4)), 3364 "ldff1w {z7.s}, p3/z, [z20.s, #4]"); 3365 COMPARE_MACRO(Ldff1sb(z7.VnS(), p1.Zeroing(), SVEMemOperand(z10.VnS(), 21)), 3366 "ldff1sb {z7.s}, p1/z, [z10.s, #21]"); 3367 COMPARE_MACRO(Ldff1sh(z0.VnS(), p1.Zeroing(), SVEMemOperand(z23.VnS(), 42)), 3368 "ldff1sh {z0.s}, p1/z, [z23.s, #42]"); 3369 3370 // Unencodable cases use a scalar-plus-vector form. 3371 COMPARE_MACRO(Ld1b(z17.VnS(), p2.Zeroing(), SVEMemOperand(z11.VnS(), 32)), 3372 "mov x16, #0x20\n" 3373 "ld1b {z17.s}, p2/z, [x16, z11.s, uxtw]"); 3374 COMPARE_MACRO(Ld1h(z11.VnS(), p1.Zeroing(), SVEMemOperand(z8.VnS(), -2)), 3375 "mov x16, #0xfffffffffffffffe\n" 3376 "ld1h {z11.s}, p1/z, [x16, z8.s, uxtw]"); 3377 COMPARE_MACRO(Ld1w(z5.VnS(), p6.Zeroing(), SVEMemOperand(z9.VnS(), 42)), 3378 "mov x16, #0x2a\n" 3379 "ld1w {z5.s}, p6/z, [x16, z9.s, uxtw]"); 3380 COMPARE_MACRO(Ld1sb(z28.VnS(), p5.Zeroing(), SVEMemOperand(z12.VnS(), -1)), 3381 "mov x16, #0xffffffffffffffff\n" 3382 "ld1sb {z28.s}, p5/z, [x16, z12.s, uxtw]"); 3383 COMPARE_MACRO(Ld1sh(z30.VnS(), p5.Zeroing(), SVEMemOperand(z21.VnS(), 64)), 3384 "mov x16, #0x40\n" 3385 "ld1sh {z30.s}, p5/z, [x16, z21.s, uxtw]"); 3386 COMPARE_MACRO(Ldff1b(z19.VnS(), p6.Zeroing(), SVEMemOperand(z24.VnS(), 32)), 3387 "mov x16, #0x20\n" 3388 "ldff1b {z19.s}, p6/z, [x16, z24.s, uxtw]"); 3389 COMPARE_MACRO(Ldff1h(z5.VnS(), p1.Zeroing(), SVEMemOperand(z24.VnS(), -2)), 3390 "mov x16, #0xfffffffffffffffe\n" 3391 "ldff1h {z5.s}, p1/z, [x16, z24.s, uxtw]"); 3392 COMPARE_MACRO(Ldff1w(z17.VnS(), p6.Zeroing(), SVEMemOperand(z18.VnS(), 42)), 3393 "mov x16, #0x2a\n" 3394 "ldff1w {z17.s}, p6/z, [x16, z18.s, uxtw]"); 3395 COMPARE_MACRO(Ldff1sb(z31.VnS(), p5.Zeroing(), SVEMemOperand(z21.VnS(), -1)), 3396 "mov x16, #0xffffffffffffffff\n" 3397 "ldff1sb {z31.s}, p5/z, [x16, z21.s, uxtw]"); 3398 COMPARE_MACRO(Ldff1sh(z6.VnS(), p5.Zeroing(), SVEMemOperand(z22.VnS(), 64)), 3399 "mov x16, #0x40\n" 3400 "ldff1sh {z6.s}, p5/z, [x16, z22.s, uxtw]"); 3401 3402 CLEANUP(); 3403} 3404 3405TEST(sve_mem_32bit_gather_and_unsized_contiguous) { 3406 SETUP(); 3407 3408 // 32-bit gather load in scalar-plus-vector vform with unscaled offset. 3409 COMPARE(ld1b(z9.VnS(), p5.Zeroing(), SVEMemOperand(x2, z1.VnS(), SXTW)), 3410 "ld1b {z9.s}, p5/z, [x2, z1.s, sxtw]"); 3411 COMPARE(ld1b(z9.VnS(), p5.Zeroing(), SVEMemOperand(sp, z1.VnS(), UXTW)), 3412 "ld1b {z9.s}, p5/z, [sp, z1.s, uxtw]"); 3413 COMPARE(ld1h(z17.VnS(), p2.Zeroing(), SVEMemOperand(x11, z24.VnS(), SXTW)), 3414 "ld1h {z17.s}, p2/z, [x11, z24.s, sxtw]"); 3415 COMPARE(ld1w(z22.VnS(), p6.Zeroing(), SVEMemOperand(sp, z5.VnS(), UXTW)), 3416 "ld1w {z22.s}, p6/z, [sp, z5.s, uxtw]"); 3417 COMPARE(ld1sb(z12.VnS(), p7.Zeroing(), SVEMemOperand(x17, z23.VnS(), UXTW)), 3418 "ld1sb {z12.s}, p7/z, [x17, z23.s, uxtw]"); 3419 COMPARE(ld1sb(z22.VnS(), p3.Zeroing(), SVEMemOperand(x23, z23.VnS(), SXTW)), 3420 "ld1sb {z22.s}, p3/z, [x23, z23.s, sxtw]"); 3421 COMPARE(ld1sh(z11.VnS(), p2.Zeroing(), SVEMemOperand(x18, z10.VnS(), UXTW)), 3422 "ld1sh {z11.s}, p2/z, [x18, z10.s, uxtw]"); 3423 3424 // 32-bit gather load in scalar-plus-vector vform with scaled offset. 3425 COMPARE(ld1h(z9.VnS(), p3.Zeroing(), SVEMemOperand(sp, z4.VnS(), UXTW, 1)), 3426 "ld1h {z9.s}, p3/z, [sp, z4.s, uxtw #1]"); 3427 COMPARE(ld1w(z0.VnS(), p6.Zeroing(), SVEMemOperand(x28, z21.VnS(), SXTW, 2)), 3428 "ld1w {z0.s}, p6/z, [x28, z21.s, sxtw #2]"); 3429 COMPARE(ld1sh(z11.VnS(), p4.Zeroing(), SVEMemOperand(sp, z0.VnS(), SXTW, 1)), 3430 "ld1sh {z11.s}, p4/z, [sp, z0.s, sxtw #1]"); 3431 3432 // 32-bit gather first-fault load in scalar-plus-vector vform with 32-bit 3433 // unpacked unscaled offset. 3434 COMPARE(ldff1b(z18.VnS(), p6.Zeroing(), SVEMemOperand(x27, z24.VnS(), UXTW)), 3435 "ldff1b {z18.s}, p6/z, [x27, z24.s, uxtw]"); 3436 COMPARE(ldff1h(z28.VnS(), p6.Zeroing(), SVEMemOperand(x1, z30.VnS(), UXTW)), 3437 "ldff1h {z28.s}, p6/z, [x1, z30.s, uxtw]"); 3438 COMPARE(ldff1w(z12.VnS(), p3.Zeroing(), SVEMemOperand(x25, z27.VnS(), SXTW)), 3439 "ldff1w {z12.s}, p3/z, [x25, z27.s, sxtw]"); 3440 COMPARE(ldff1sb(z15.VnS(), p5.Zeroing(), SVEMemOperand(x5, z14.VnS(), SXTW)), 3441 "ldff1sb {z15.s}, p5/z, [x5, z14.s, sxtw]"); 3442 COMPARE(ldff1sh(z18.VnS(), p4.Zeroing(), SVEMemOperand(x25, z25.VnS(), SXTW)), 3443 "ldff1sh {z18.s}, p4/z, [x25, z25.s, sxtw]"); 3444 3445 // 32-bit gather first-fault load in scalar-plus-vector vform with 32-bit 3446 // scaled offset. 3447 COMPARE(ldff1h(z25.VnS(), 3448 p3.Zeroing(), 3449 SVEMemOperand(x17, z15.VnS(), SXTW, 1)), 3450 "ldff1h {z25.s}, p3/z, [x17, z15.s, sxtw #1]"); 3451 COMPARE(ldff1w(z5.VnS(), 3452 p4.Zeroing(), 3453 SVEMemOperand(x23, z31.VnS(), UXTW, 2)), 3454 "ldff1w {z5.s}, p4/z, [x23, z31.s, uxtw #2]"); 3455 COMPARE(ldff1sh(z10.VnS(), 3456 p0.Zeroing(), 3457 SVEMemOperand(x19, z15.VnS(), UXTW, 1)), 3458 "ldff1sh {z10.s}, p0/z, [x19, z15.s, uxtw #1]"); 3459 3460 // Load and broadcast data to vector. 3461 COMPARE(ld1rb(z2.VnH(), p0.Zeroing(), SVEMemOperand(x30, 0)), 3462 "ld1rb {z2.h}, p0/z, [x30]"); 3463 COMPARE(ld1rb(z14.VnS(), p2.Zeroing(), SVEMemOperand(x11, 63)), 3464 "ld1rb {z14.s}, p2/z, [x11, #63]"); 3465 COMPARE(ld1rb(z27.VnD(), p1.Zeroing(), SVEMemOperand(x29, 2)), 3466 "ld1rb {z27.d}, p1/z, [x29, #2]"); 3467 COMPARE(ld1rb(z0.VnB(), p3.Zeroing(), SVEMemOperand(sp, 59)), 3468 "ld1rb {z0.b}, p3/z, [sp, #59]"); 3469 COMPARE(ld1rh(z19.VnH(), p5.Zeroing(), SVEMemOperand(x1, 0)), 3470 "ld1rh {z19.h}, p5/z, [x1]"); 3471 COMPARE(ld1rh(z4.VnS(), p7.Zeroing(), SVEMemOperand(x29, 126)), 3472 "ld1rh {z4.s}, p7/z, [x29, #126]"); 3473 COMPARE(ld1rh(z24.VnD(), p0.Zeroing(), SVEMemOperand(sp, 78)), 3474 "ld1rh {z24.d}, p0/z, [sp, #78]"); 3475 COMPARE(ld1rw(z19.VnS(), p5.Zeroing(), SVEMemOperand(x4, 252)), 3476 "ld1rw {z19.s}, p5/z, [x4, #252]"); 3477 COMPARE(ld1rw(z13.VnD(), p3.Zeroing(), SVEMemOperand(x2, 100)), 3478 "ld1rw {z13.d}, p3/z, [x2, #100]"); 3479 COMPARE(ld1rd(z19.VnD(), p7.Zeroing(), SVEMemOperand(x14, 504)), 3480 "ld1rd {z19.d}, p7/z, [x14, #504]"); 3481 COMPARE(ld1rsb(z16.VnH(), p1.Zeroing(), SVEMemOperand(x29, 0)), 3482 "ld1rsb {z16.h}, p1/z, [x29]"); 3483 COMPARE(ld1rsb(z8.VnS(), p6.Zeroing(), SVEMemOperand(sp, 33)), 3484 "ld1rsb {z8.s}, p6/z, [sp, #33]"); 3485 COMPARE(ld1rsb(z25.VnD(), p2.Zeroing(), SVEMemOperand(x18, 63)), 3486 "ld1rsb {z25.d}, p2/z, [x18, #63]"); 3487 COMPARE(ld1rsh(z11.VnS(), p5.Zeroing(), SVEMemOperand(x14, 2)), 3488 "ld1rsh {z11.s}, p5/z, [x14, #2]"); 3489 COMPARE(ld1rsh(z28.VnD(), p1.Zeroing(), SVEMemOperand(x19, 124)), 3490 "ld1rsh {z28.d}, p1/z, [x19, #124]"); 3491 COMPARE(ld1rsw(z23.VnD(), p4.Zeroing(), SVEMemOperand(x10, 8)), 3492 "ld1rsw {z23.d}, p4/z, [x10, #8]"); 3493 3494 CLEANUP(); 3495} 3496 3497TEST(sve_mem_32bit_gather_and_unsized_contiguous_macro) { 3498 SETUP(); 3499 3500 COMPARE_MACRO(Ld1rb(z2.VnB(), p0.Zeroing(), SVEMemOperand(x30, 100)), 3501 "add x16, x30, #0x64 (100)\n" 3502 "ld1rb {z2.b}, p0/z, [x16]"); 3503 COMPARE_MACRO(Ld1rh(z4.VnH(), p1.Zeroing(), SVEMemOperand(x21, 201)), 3504 "add x16, x21, #0xc9 (201)\n" 3505 "ld1rh {z4.h}, p1/z, [x16]"); 3506 COMPARE_MACRO(Ld1rw(z6.VnS(), p2.Zeroing(), SVEMemOperand(x14, 512)), 3507 "add x16, x14, #0x200 (512)\n" 3508 "ld1rw {z6.s}, p2/z, [x16]"); 3509 COMPARE_MACRO(Ld1rd(z8.VnD(), p3.Zeroing(), SVEMemOperand(x3, 1024)), 3510 "add x16, x3, #0x400 (1024)\n" 3511 "ld1rd {z8.d}, p3/z, [x16]"); 3512 COMPARE_MACRO(Ld1rsb(z10.VnH(), p4.Zeroing(), SVEMemOperand(sp, -100)), 3513 "sub x16, sp, #0x64 (100)\n" 3514 "ld1rsb {z10.h}, p4/z, [x16]"); 3515 COMPARE_MACRO(Ld1rsh(z12.VnS(), p5.Zeroing(), SVEMemOperand(x30, -255)), 3516 "sub x16, x30, #0xff (255)\n" 3517 "ld1rsh {z12.s}, p5/z, [x16]"); 3518 COMPARE_MACRO(Ld1rsw(z14.VnD(), p6.Zeroing(), SVEMemOperand(x1, -1024)), 3519 "sub x16, x1, #0x400 (1024)\n" 3520 "ld1rsw {z14.d}, p6/z, [x16]"); 3521} 3522 3523TEST(sve_mem_64bit_gather_vector_plus_immediate) { 3524 SETUP(); 3525 3526 COMPARE(ld1b(z2.VnD(), p2.Zeroing(), SVEMemOperand(z12.VnD(), 31)), 3527 "ld1b {z2.d}, p2/z, [z12.d, #31]"); 3528 COMPARE(ld1h(z30.VnD(), p7.Zeroing(), SVEMemOperand(z28.VnD(), 10)), 3529 "ld1h {z30.d}, p7/z, [z28.d, #10]"); 3530 COMPARE(ld1w(z10.VnD(), p5.Zeroing(), SVEMemOperand(z4.VnD(), 124)), 3531 "ld1w {z10.d}, p5/z, [z4.d, #124]"); 3532 COMPARE(ld1d(z13.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD(), 248)), 3533 "ld1d {z13.d}, p3/z, [z19.d, #248]"); 3534 COMPARE(ld1sb(z16.VnD(), p7.Zeroing(), SVEMemOperand(z31.VnD())), 3535 "ld1sb {z16.d}, p7/z, [z31.d]"); 3536 COMPARE(ld1sh(z20.VnD(), p2.Zeroing(), SVEMemOperand(z2.VnD(), 62)), 3537 "ld1sh {z20.d}, p2/z, [z2.d, #62]"); 3538 COMPARE(ld1sw(z2.VnD(), p7.Zeroing(), SVEMemOperand(z25.VnD())), 3539 "ld1sw {z2.d}, p7/z, [z25.d]"); 3540 COMPARE(ldff1b(z24.VnD(), p5.Zeroing(), SVEMemOperand(z8.VnD(), 0)), 3541 "ldff1b {z24.d}, p5/z, [z8.d]"); 3542 COMPARE(ldff1h(z9.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD())), 3543 "ldff1h {z9.d}, p3/z, [z19.d]"); 3544 COMPARE(ldff1w(z26.VnD(), p6.Zeroing(), SVEMemOperand(z15.VnD(), 4)), 3545 "ldff1w {z26.d}, p6/z, [z15.d, #4]"); 3546 COMPARE(ldff1d(z19.VnD(), p1.Zeroing(), SVEMemOperand(z14.VnD())), 3547 "ldff1d {z19.d}, p1/z, [z14.d]"); 3548 COMPARE(ldff1sb(z26.VnD(), p5.Zeroing(), SVEMemOperand(z14.VnD(), 21)), 3549 "ldff1sb {z26.d}, p5/z, [z14.d, #21]"); 3550 COMPARE(ldff1sh(z6.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD(), 42)), 3551 "ldff1sh {z6.d}, p3/z, [z19.d, #42]"); 3552 COMPARE(ldff1sw(z19.VnD(), p7.Zeroing(), SVEMemOperand(z14.VnD(), 84)), 3553 "ldff1sw {z19.d}, p7/z, [z14.d, #84]"); 3554 3555 CLEANUP(); 3556} 3557 3558TEST(sve_mem_64bit_gather_vector_plus_immediate_macro) { 3559 SETUP(); 3560 3561 // Simple cases. 3562 COMPARE_MACRO(Ld1b(z18.VnD(), p6.Zeroing(), SVEMemOperand(z31.VnD(), 31)), 3563 "ld1b {z18.d}, p6/z, [z31.d, #31]"); 3564 COMPARE_MACRO(Ld1h(z5.VnD(), p3.Zeroing(), SVEMemOperand(z18.VnD(), 10)), 3565 "ld1h {z5.d}, p3/z, [z18.d, #10]"); 3566 COMPARE_MACRO(Ld1w(z0.VnD(), p6.Zeroing(), SVEMemOperand(z22.VnD(), 124)), 3567 "ld1w {z0.d}, p6/z, [z22.d, #124]"); 3568 COMPARE_MACRO(Ld1d(z18.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD(), 248)), 3569 "ld1d {z18.d}, p3/z, [z19.d, #248]"); 3570 COMPARE_MACRO(Ld1sb(z18.VnD(), p6.Zeroing(), SVEMemOperand(z17.VnD())), 3571 "ld1sb {z18.d}, p6/z, [z17.d]"); 3572 COMPARE_MACRO(Ld1sh(z14.VnD(), p3.Zeroing(), SVEMemOperand(z11.VnD(), 62)), 3573 "ld1sh {z14.d}, p3/z, [z11.d, #62]"); 3574 COMPARE_MACRO(Ld1sw(z18.VnD(), p5.Zeroing(), SVEMemOperand(z14.VnD())), 3575 "ld1sw {z18.d}, p5/z, [z14.d]"); 3576 COMPARE_MACRO(Ldff1b(z20.VnD(), p5.Zeroing(), SVEMemOperand(z17.VnD(), 0)), 3577 "ldff1b {z20.d}, p5/z, [z17.d]"); 3578 COMPARE_MACRO(Ldff1h(z20.VnD(), p5.Zeroing(), SVEMemOperand(z16.VnD())), 3579 "ldff1h {z20.d}, p5/z, [z16.d]"); 3580 COMPARE_MACRO(Ldff1w(z1.VnD(), p6.Zeroing(), SVEMemOperand(z16.VnD(), 4)), 3581 "ldff1w {z1.d}, p6/z, [z16.d, #4]"); 3582 COMPARE_MACRO(Ldff1d(z16.VnD(), p1.Zeroing(), SVEMemOperand(z3.VnD())), 3583 "ldff1d {z16.d}, p1/z, [z3.d]"); 3584 COMPARE_MACRO(Ldff1sb(z26.VnD(), p7.Zeroing(), SVEMemOperand(z3.VnD(), 21)), 3585 "ldff1sb {z26.d}, p7/z, [z3.d, #21]"); 3586 COMPARE_MACRO(Ldff1sh(z1.VnD(), p7.Zeroing(), SVEMemOperand(z9.VnD(), 42)), 3587 "ldff1sh {z1.d}, p7/z, [z9.d, #42]"); 3588 COMPARE_MACRO(Ldff1sw(z19.VnD(), p4.Zeroing(), SVEMemOperand(z3.VnD(), 84)), 3589 "ldff1sw {z19.d}, p4/z, [z3.d, #84]"); 3590 3591 // Unencodable cases use a scalar-plus-vector form. 3592 COMPARE_MACRO(Ld1b(z23.VnD(), p6.Zeroing(), SVEMemOperand(z16.VnD(), 32)), 3593 "mov x16, #0x20\n" 3594 "ld1b {z23.d}, p6/z, [x16, z16.d]"); 3595 COMPARE_MACRO(Ld1h(z10.VnD(), p6.Zeroing(), SVEMemOperand(z11.VnD(), -2)), 3596 "mov x16, #0xfffffffffffffffe\n" 3597 "ld1h {z10.d}, p6/z, [x16, z11.d]"); 3598 COMPARE_MACRO(Ld1w(z14.VnD(), p3.Zeroing(), SVEMemOperand(z11.VnD(), 42)), 3599 "mov x16, #0x2a\n" 3600 "ld1w {z14.d}, p3/z, [x16, z11.d]"); 3601 COMPARE_MACRO(Ld1d(z10.VnD(), p4.Zeroing(), SVEMemOperand(z3.VnD(), 256)), 3602 "mov x16, #0x100\n" 3603 "ld1d {z10.d}, p4/z, [x16, z3.d]"); 3604 COMPARE_MACRO(Ld1sb(z14.VnD(), p2.Zeroing(), SVEMemOperand(z11.VnD(), -1)), 3605 "mov x16, #0xffffffffffffffff\n" 3606 "ld1sb {z14.d}, p2/z, [x16, z11.d]"); 3607 COMPARE_MACRO(Ld1sh(z20.VnD(), p7.Zeroing(), SVEMemOperand(z12.VnD(), 64)), 3608 "mov x16, #0x40\n" 3609 "ld1sh {z20.d}, p7/z, [x16, z12.d]"); 3610 COMPARE_MACRO(Ld1sw(z15.VnD(), p6.Zeroing(), SVEMemOperand(z18.VnD(), 42)), 3611 "mov x16, #0x2a\n" 3612 "ld1sw {z15.d}, p6/z, [x16, z18.d]"); 3613 COMPARE_MACRO(Ldff1b(z15.VnD(), p0.Zeroing(), SVEMemOperand(z0.VnD(), 32)), 3614 "mov x16, #0x20\n" 3615 "ldff1b {z15.d}, p0/z, [x16, z0.d]"); 3616 COMPARE_MACRO(Ldff1h(z23.VnD(), p3.Zeroing(), SVEMemOperand(z31.VnD(), -2)), 3617 "mov x16, #0xfffffffffffffffe\n" 3618 "ldff1h {z23.d}, p3/z, [x16, z31.d]"); 3619 COMPARE_MACRO(Ldff1w(z28.VnD(), p3.Zeroing(), SVEMemOperand(z17.VnD(), 42)), 3620 "mov x16, #0x2a\n" 3621 "ldff1w {z28.d}, p3/z, [x16, z17.d]"); 3622 COMPARE_MACRO(Ldff1d(z18.VnD(), p3.Zeroing(), SVEMemOperand(z13.VnD(), 256)), 3623 "mov x16, #0x100\n" 3624 "ldff1d {z18.d}, p3/z, [x16, z13.d]"); 3625 COMPARE_MACRO(Ldff1sb(z31.VnD(), p7.Zeroing(), SVEMemOperand(z3.VnD(), -1)), 3626 "mov x16, #0xffffffffffffffff\n" 3627 "ldff1sb {z31.d}, p7/z, [x16, z3.d]"); 3628 COMPARE_MACRO(Ldff1sh(z13.VnD(), p0.Zeroing(), SVEMemOperand(z15.VnD(), 64)), 3629 "mov x16, #0x40\n" 3630 "ldff1sh {z13.d}, p0/z, [x16, z15.d]"); 3631 COMPARE_MACRO(Ldff1sw(z30.VnD(), p7.Zeroing(), SVEMemOperand(z10.VnD(), 42)), 3632 "mov x16, #0x2a\n" 3633 "ldff1sw {z30.d}, p7/z, [x16, z10.d]"); 3634 3635 CLEANUP(); 3636} 3637 3638TEST(sve_mem_64bit_gather_scalar_plus_vector) { 3639 SETUP(); 3640 3641 COMPARE(ld1b(z30.VnD(), p6.Zeroing(), SVEMemOperand(sp, z24.VnD())), 3642 "ld1b {z30.d}, p6/z, [sp, z24.d]"); 3643 COMPARE(ld1d(z18.VnD(), p5.Zeroing(), SVEMemOperand(x11, z11.VnD())), 3644 "ld1d {z18.d}, p5/z, [x11, z11.d]"); 3645 COMPARE(ld1h(z2.VnD(), p3.Zeroing(), SVEMemOperand(x16, z18.VnD())), 3646 "ld1h {z2.d}, p3/z, [x16, z18.d]"); 3647 COMPARE(ld1sb(z11.VnD(), p3.Zeroing(), SVEMemOperand(x24, z21.VnD())), 3648 "ld1sb {z11.d}, p3/z, [x24, z21.d]"); 3649 COMPARE(ld1sh(z7.VnD(), p7.Zeroing(), SVEMemOperand(x28, z23.VnD())), 3650 "ld1sh {z7.d}, p7/z, [x28, z23.d]"); 3651 COMPARE(ld1sw(z29.VnD(), p7.Zeroing(), SVEMemOperand(x27, z4.VnD())), 3652 "ld1sw {z29.d}, p7/z, [x27, z4.d]"); 3653 COMPARE(ld1w(z19.VnD(), p1.Zeroing(), SVEMemOperand(x27, z4.VnD())), 3654 "ld1w {z19.d}, p1/z, [x27, z4.d]"); 3655 3656 COMPARE(ld1d(z20.VnD(), p3.Zeroing(), SVEMemOperand(x3, z15.VnD(), LSL, 3)), 3657 "ld1d {z20.d}, p3/z, [x3, z15.d, lsl #3]"); 3658 COMPARE(ld1h(z24.VnD(), p4.Zeroing(), SVEMemOperand(x6, z11.VnD(), LSL, 1)), 3659 "ld1h {z24.d}, p4/z, [x6, z11.d, lsl #1]"); 3660 COMPARE(ld1sh(z22.VnD(), p6.Zeroing(), SVEMemOperand(x7, z31.VnD(), LSL, 1)), 3661 "ld1sh {z22.d}, p6/z, [x7, z31.d, lsl #1]"); 3662 COMPARE(ld1sw(z9.VnD(), p0.Zeroing(), SVEMemOperand(x2, z27.VnD(), LSL, 2)), 3663 "ld1sw {z9.d}, p0/z, [x2, z27.d, lsl #2]"); 3664 COMPARE(ld1w(z9.VnD(), p2.Zeroing(), SVEMemOperand(x0, z0.VnD(), LSL, 2)), 3665 "ld1w {z9.d}, p2/z, [x0, z0.d, lsl #2]"); 3666 3667 COMPARE(ld1b(z19.VnD(), p5.Zeroing(), SVEMemOperand(x21, z29.VnD(), UXTW)), 3668 "ld1b {z19.d}, p5/z, [x21, z29.d, uxtw]"); 3669 COMPARE(ld1d(z9.VnD(), p5.Zeroing(), SVEMemOperand(x5, z21.VnD(), SXTW)), 3670 "ld1d {z9.d}, p5/z, [x5, z21.d, sxtw]"); 3671 COMPARE(ld1h(z26.VnD(), p3.Zeroing(), SVEMemOperand(x1, z10.VnD(), UXTW)), 3672 "ld1h {z26.d}, p3/z, [x1, z10.d, uxtw]"); 3673 COMPARE(ld1sb(z4.VnD(), p1.Zeroing(), SVEMemOperand(x24, z15.VnD(), SXTW)), 3674 "ld1sb {z4.d}, p1/z, [x24, z15.d, sxtw]"); 3675 COMPARE(ld1sh(z9.VnD(), p1.Zeroing(), SVEMemOperand(x0, z12.VnD(), UXTW)), 3676 "ld1sh {z9.d}, p1/z, [x0, z12.d, uxtw]"); 3677 COMPARE(ld1sw(z19.VnD(), p2.Zeroing(), SVEMemOperand(x19, z16.VnD(), SXTW)), 3678 "ld1sw {z19.d}, p2/z, [x19, z16.d, sxtw]"); 3679 COMPARE(ld1w(z13.VnD(), p3.Zeroing(), SVEMemOperand(x8, z10.VnD(), UXTW)), 3680 "ld1w {z13.d}, p3/z, [x8, z10.d, uxtw]"); 3681 3682 COMPARE(ld1d(z25.VnD(), p3.Zeroing(), SVEMemOperand(x14, z0.VnD(), UXTW, 3)), 3683 "ld1d {z25.d}, p3/z, [x14, z0.d, uxtw #3]"); 3684 COMPARE(ld1h(z21.VnD(), p5.Zeroing(), SVEMemOperand(x13, z8.VnD(), SXTW, 1)), 3685 "ld1h {z21.d}, p5/z, [x13, z8.d, sxtw #1]"); 3686 COMPARE(ld1sh(z29.VnD(), p0.Zeroing(), SVEMemOperand(x9, z10.VnD(), UXTW, 1)), 3687 "ld1sh {z29.d}, p0/z, [x9, z10.d, uxtw #1]"); 3688 COMPARE(ld1sw(z5.VnD(), p2.Zeroing(), SVEMemOperand(x1, z23.VnD(), SXTW, 2)), 3689 "ld1sw {z5.d}, p2/z, [x1, z23.d, sxtw #2]"); 3690 COMPARE(ld1w(z21.VnD(), p1.Zeroing(), SVEMemOperand(x7, z8.VnD(), UXTW, 2)), 3691 "ld1w {z21.d}, p1/z, [x7, z8.d, uxtw #2]"); 3692 3693 CLEANUP(); 3694} 3695 3696TEST(sve_mem_prefetch) { 3697 SETUP(); 3698 3699 // Test every encodable prefetch operation. 3700 const char* expected[] = {" pldl1keep", 3701 " pldl1strm", 3702 " pldl2keep", 3703 " pldl2strm", 3704 " pldl3keep", 3705 " pldl3strm", 3706 " pstl1keep", 3707 " pstl1strm", 3708 " pstl2keep", 3709 " pstl2strm", 3710 " pstl3keep", 3711 " pstl3strm"}; 3712 3713 const PrefetchOperation kSVEPrfOperations[] = {PLDL1KEEP, 3714 PLDL1STRM, 3715 PLDL2KEEP, 3716 PLDL2STRM, 3717 PLDL3KEEP, 3718 PLDL3STRM, 3719 PSTL1KEEP, 3720 PSTL1STRM, 3721 PSTL2KEEP, 3722 PSTL2STRM, 3723 PSTL3KEEP, 3724 PSTL3STRM}; 3725 3726 VIXL_STATIC_ASSERT(ArrayLength(expected) == ArrayLength(kSVEPrfOperations)); 3727 3728 3729#define VIXL_DISAM_PREFETCH_TEST(INSN, NAME, SH) \ 3730 do { \ 3731 for (size_t i = 0; i < ArrayLength(kSVEPrfOperations); i++) { \ 3732 PrefetchOperation op = kSVEPrfOperations[i]; \ 3733 std::string str(NAME); \ 3734 str.append(expected[i]); \ 3735 /* Vector plus immediate */ \ 3736 COMPARE_PREFIX(INSN(op, p6, SVEMemOperand(z30.VnS(), 31)), str.c_str()); \ 3737 COMPARE_PREFIX(INSN(op, p5, SVEMemOperand(z29.VnD(), 17)), str.c_str()); \ 3738 /* Scalar plus immediate */ \ 3739 COMPARE_PREFIX(INSN(op, p4, SVEMemOperand(x11, -32, SVE_MUL_VL)), \ 3740 str.c_str()); \ 3741 COMPARE_PREFIX(INSN(op, p4, SVEMemOperand(sp, 31, SVE_MUL_VL)), \ 3742 str.c_str()); \ 3743 /* Scalar plus vector */ \ 3744 COMPARE_PREFIX(INSN(op, p3, SVEMemOperand(x24, z22.VnS(), UXTW, SH)), \ 3745 str.c_str()); \ 3746 COMPARE_PREFIX(INSN(op, p2, SVEMemOperand(x24, z22.VnD(), SXTW, SH)), \ 3747 str.c_str()); \ 3748 COMPARE_PREFIX(INSN(op, p1, SVEMemOperand(x4, z2.VnD(), LSL, SH)), \ 3749 str.c_str()); \ 3750 /* Scalar plus scalar */ \ 3751 COMPARE_PREFIX(INSN(op, p1, SVEMemOperand(x8, x29, LSL, SH)), \ 3752 str.c_str()); \ 3753 COMPARE_PREFIX(INSN(op, p0, SVEMemOperand(sp, x6, LSL, SH)), \ 3754 str.c_str()); \ 3755 } \ 3756 } while (0) 3757 3758 VIXL_DISAM_PREFETCH_TEST(prfh, "prfh", 1); 3759 VIXL_DISAM_PREFETCH_TEST(prfw, "prfw", 2); 3760 VIXL_DISAM_PREFETCH_TEST(prfd, "prfd", 3); 3761#undef VIXL_DISAM_PREFETCH_TEST 3762 3763 COMPARE(prfb(PLDL1KEEP, p5, SVEMemOperand(z30.VnS(), 0)), 3764 "prfb pldl1keep, p5, [z30.s]"); 3765 COMPARE(prfb(PLDL1STRM, p5, SVEMemOperand(x28, -11, SVE_MUL_VL)), 3766 "prfb pldl1strm, p5, [x28, #-11, mul vl]"); 3767 COMPARE(prfb(PLDL2KEEP, p6, SVEMemOperand(x30, x29)), 3768 "prfb pldl2keep, p6, [x30, x29]"); 3769 COMPARE(prfb(PLDL2STRM, p6, SVEMemOperand(x7, z12.VnD())), 3770 "prfb pldl2strm, p6, [x7, z12.d]"); 3771 COMPARE(prfb(PLDL2STRM, p6, SVEMemOperand(x7, z12.VnS(), UXTW)), 3772 "prfb pldl2strm, p6, [x7, z12.s, uxtw]"); 3773 COMPARE(prfd(PLDL3KEEP, p5, SVEMemOperand(z11.VnD(), 9)), 3774 "prfd pldl3keep, p5, [z11.d, #9]"); 3775 COMPARE(prfd(PLDL3STRM, p3, SVEMemOperand(x0, 0, SVE_MUL_VL)), 3776 "prfd pldl3strm, p3, [x0]"); 3777 COMPARE(prfd(PSTL1KEEP, p7, SVEMemOperand(x5, x5, LSL, 3)), 3778 "prfd pstl1keep, p7, [x5, x5, lsl #3]"); 3779 COMPARE(prfd(PSTL1STRM, p1, SVEMemOperand(x19, z18.VnS(), SXTW, 3)), 3780 "prfd pstl1strm, p1, [x19, z18.s, sxtw #3]"); 3781 COMPARE(prfh(PSTL2KEEP, p6, SVEMemOperand(z0.VnS(), 31)), 3782 "prfh pstl2keep, p6, [z0.s, #31]"); 3783 COMPARE(prfh(PSTL2STRM, p4, SVEMemOperand(x17, -3, SVE_MUL_VL)), 3784 "prfh pstl2strm, p4, [x17, #-3, mul vl]"); 3785 COMPARE(prfh(PSTL3KEEP, p3, SVEMemOperand(x0, x0, LSL, 1)), 3786 "prfh pstl3keep, p3, [x0, x0, lsl #1]"); 3787 COMPARE(prfh(PSTL3STRM, p4, SVEMemOperand(x20, z0.VnD(), LSL, 1)), 3788 "prfh pstl3strm, p4, [x20, z0.d, lsl #1]"); 3789 COMPARE(prfw(PLDL1KEEP, p3, SVEMemOperand(z23.VnD(), 5)), 3790 "prfw pldl1keep, p3, [z23.d, #5]"); 3791 COMPARE(prfw(PLDL1STRM, p1, SVEMemOperand(x4, 31, SVE_MUL_VL)), 3792 "prfw pldl1strm, p1, [x4, #31, mul vl]"); 3793 COMPARE(prfw(PLDL2KEEP, p2, SVEMemOperand(x22, x22, LSL, 2)), 3794 "prfw pldl2keep, p2, [x22, x22, lsl #2]"); 3795 COMPARE(prfw(PLDL2STRM, p1, SVEMemOperand(x2, z6.VnS(), SXTW, 2)), 3796 "prfw pldl2strm, p1, [x2, z6.s, sxtw #2]"); 3797 3798 CLEANUP(); 3799} 3800 3801TEST(sve_mem_64bit_ff_gather_scalar_plus_vector) { 3802 SETUP(); 3803 3804 // 64-bit unscaled offset. 3805 COMPARE(ldff1b(z18.VnD(), p6.Zeroing(), SVEMemOperand(x27, z24.VnD())), 3806 "ldff1b {z18.d}, p6/z, [x27, z24.d]"); 3807 COMPARE(ldff1h(z28.VnD(), p6.Zeroing(), SVEMemOperand(x1, z30.VnD())), 3808 "ldff1h {z28.d}, p6/z, [x1, z30.d]"); 3809 COMPARE(ldff1w(z12.VnD(), p3.Zeroing(), SVEMemOperand(x25, z27.VnD())), 3810 "ldff1w {z12.d}, p3/z, [x25, z27.d]"); 3811 COMPARE(ldff1d(z23.VnD(), p5.Zeroing(), SVEMemOperand(x29, z31.VnD())), 3812 "ldff1d {z23.d}, p5/z, [x29, z31.d]"); 3813 COMPARE(ldff1sb(z15.VnD(), p5.Zeroing(), SVEMemOperand(x5, z14.VnD())), 3814 "ldff1sb {z15.d}, p5/z, [x5, z14.d]"); 3815 COMPARE(ldff1sh(z18.VnD(), p4.Zeroing(), SVEMemOperand(x25, z25.VnD())), 3816 "ldff1sh {z18.d}, p4/z, [x25, z25.d]"); 3817 COMPARE(ldff1sw(z12.VnD(), p3.Zeroing(), SVEMemOperand(x25, z27.VnD())), 3818 "ldff1sw {z12.d}, p3/z, [x25, z27.d]"); 3819 3820 // 64-bit scaled offset. 3821 COMPARE(ldff1h(z25.VnD(), 3822 p3.Zeroing(), 3823 SVEMemOperand(x17, z15.VnD(), LSL, 1)), 3824 "ldff1h {z25.d}, p3/z, [x17, z15.d, lsl #1]"); 3825 COMPARE(ldff1w(z5.VnD(), p4.Zeroing(), SVEMemOperand(x23, z31.VnD(), LSL, 2)), 3826 "ldff1w {z5.d}, p4/z, [x23, z31.d, lsl #2]"); 3827 COMPARE(ldff1d(z2.VnD(), p0.Zeroing(), SVEMemOperand(sp, z7.VnD(), LSL, 3)), 3828 "ldff1d {z2.d}, p0/z, [sp, z7.d, lsl #3]"); 3829 COMPARE(ldff1sh(z10.VnD(), 3830 p0.Zeroing(), 3831 SVEMemOperand(x19, z15.VnD(), LSL, 1)), 3832 "ldff1sh {z10.d}, p0/z, [x19, z15.d, lsl #1]"); 3833 COMPARE(ldff1sw(z5.VnD(), 3834 p4.Zeroing(), 3835 SVEMemOperand(x23, z31.VnD(), LSL, 2)), 3836 "ldff1sw {z5.d}, p4/z, [x23, z31.d, lsl #2]"); 3837 3838 // 32-bit unpacked unscaled offset 3839 COMPARE(ldff1b(z18.VnD(), p6.Zeroing(), SVEMemOperand(sp, z24.VnD(), UXTW)), 3840 "ldff1b {z18.d}, p6/z, [sp, z24.d, uxtw]"); 3841 COMPARE(ldff1h(z20.VnD(), p5.Zeroing(), SVEMemOperand(x7, z14.VnD(), SXTW)), 3842 "ldff1h {z20.d}, p5/z, [x7, z14.d, sxtw]"); 3843 COMPARE(ldff1w(z22.VnD(), p4.Zeroing(), SVEMemOperand(x17, z4.VnD(), UXTW)), 3844 "ldff1w {z22.d}, p4/z, [x17, z4.d, uxtw]"); 3845 COMPARE(ldff1d(z24.VnD(), p3.Zeroing(), SVEMemOperand(x3, z24.VnD(), SXTW)), 3846 "ldff1d {z24.d}, p3/z, [x3, z24.d, sxtw]"); 3847 COMPARE(ldff1sb(z26.VnD(), p2.Zeroing(), SVEMemOperand(x13, z14.VnD(), UXTW)), 3848 "ldff1sb {z26.d}, p2/z, [x13, z14.d, uxtw]"); 3849 COMPARE(ldff1sh(z28.VnD(), p1.Zeroing(), SVEMemOperand(x23, z4.VnD(), SXTW)), 3850 "ldff1sh {z28.d}, p1/z, [x23, z4.d, sxtw]"); 3851 COMPARE(ldff1sw(z30.VnD(), p0.Zeroing(), SVEMemOperand(x8, z24.VnD(), UXTW)), 3852 "ldff1sw {z30.d}, p0/z, [x8, z24.d, uxtw]"); 3853 3854 // 32-bit unpacked scaled offset 3855 COMPARE(ldff1h(z4.VnD(), p5.Zeroing(), SVEMemOperand(x7, z1.VnD(), SXTW, 1)), 3856 "ldff1h {z4.d}, p5/z, [x7, z1.d, sxtw #1]"); 3857 COMPARE(ldff1w(z5.VnD(), 3858 p4.Zeroing(), 3859 SVEMemOperand(x17, z11.VnD(), UXTW, 2)), 3860 "ldff1w {z5.d}, p4/z, [x17, z11.d, uxtw #2]"); 3861 COMPARE(ldff1d(z6.VnD(), p3.Zeroing(), SVEMemOperand(x3, z31.VnD(), SXTW, 3)), 3862 "ldff1d {z6.d}, p3/z, [x3, z31.d, sxtw #3]"); 3863 COMPARE(ldff1sh(z7.VnD(), 3864 p1.Zeroing(), 3865 SVEMemOperand(x23, z7.VnD(), UXTW, 1)), 3866 "ldff1sh {z7.d}, p1/z, [x23, z7.d, uxtw #1]"); 3867 COMPARE(ldff1sw(z8.VnD(), 3868 p0.Zeroing(), 3869 SVEMemOperand(x8, z17.VnD(), SXTW, 2)), 3870 "ldff1sw {z8.d}, p0/z, [x8, z17.d, sxtw #2]"); 3871 3872 CLEANUP(); 3873} 3874 3875TEST(sve_ld2_scalar_plus_immediate) { 3876 SETUP(); 3877 3878 COMPARE(ld2b(z31.VnB(), z0.VnB(), p6.Zeroing(), SVEMemOperand(x19)), 3879 "ld2b {z31.b, z0.b}, p6/z, [x19]"); 3880 COMPARE(ld2b(z31.VnB(), 3881 z0.VnB(), 3882 p6.Zeroing(), 3883 SVEMemOperand(x19, 14, SVE_MUL_VL)), 3884 "ld2b {z31.b, z0.b}, p6/z, [x19, #14, mul vl]"); 3885 COMPARE(ld2b(z15.VnB(), 3886 z16.VnB(), 3887 p6.Zeroing(), 3888 SVEMemOperand(x19, -16, SVE_MUL_VL)), 3889 "ld2b {z15.b, z16.b}, p6/z, [x19, #-16, mul vl]"); 3890 3891 COMPARE(ld2h(z15.VnH(), z16.VnH(), p6.Zeroing(), SVEMemOperand(x19)), 3892 "ld2h {z15.h, z16.h}, p6/z, [x19]"); 3893 COMPARE(ld2h(z15.VnH(), 3894 z16.VnH(), 3895 p0.Zeroing(), 3896 SVEMemOperand(x19, 14, SVE_MUL_VL)), 3897 "ld2h {z15.h, z16.h}, p0/z, [x19, #14, mul vl]"); 3898 COMPARE(ld2h(z15.VnH(), 3899 z16.VnH(), 3900 p0.Zeroing(), 3901 SVEMemOperand(x19, -16, SVE_MUL_VL)), 3902 "ld2h {z15.h, z16.h}, p0/z, [x19, #-16, mul vl]"); 3903 3904 COMPARE(ld2w(z0.VnS(), z1.VnS(), p0.Zeroing(), SVEMemOperand(x19)), 3905 "ld2w {z0.s, z1.s}, p0/z, [x19]"); 3906 COMPARE(ld2w(z0.VnS(), 3907 z1.VnS(), 3908 p0.Zeroing(), 3909 SVEMemOperand(x19, 14, SVE_MUL_VL)), 3910 "ld2w {z0.s, z1.s}, p0/z, [x19, #14, mul vl]"); 3911 COMPARE(ld2w(z0.VnS(), 3912 z1.VnS(), 3913 p7.Zeroing(), 3914 SVEMemOperand(x19, -16, SVE_MUL_VL)), 3915 "ld2w {z0.s, z1.s}, p7/z, [x19, #-16, mul vl]"); 3916 3917 COMPARE(ld2d(z0.VnD(), z1.VnD(), p7.Zeroing(), SVEMemOperand(x19)), 3918 "ld2d {z0.d, z1.d}, p7/z, [x19]"); 3919 COMPARE(ld2d(z31.VnD(), 3920 z0.VnD(), 3921 p7.Zeroing(), 3922 SVEMemOperand(x19, 14, SVE_MUL_VL)), 3923 "ld2d {z31.d, z0.d}, p7/z, [x19, #14, mul vl]"); 3924 COMPARE(ld2d(z31.VnD(), 3925 z0.VnD(), 3926 p7.Zeroing(), 3927 SVEMemOperand(x19, -16, SVE_MUL_VL)), 3928 "ld2d {z31.d, z0.d}, p7/z, [x19, #-16, mul vl]"); 3929 3930 CLEANUP(); 3931} 3932 3933TEST(sve_ld3_scalar_plus_immediate) { 3934 SETUP(); 3935 3936 COMPARE(ld3b(z30.VnB(), 3937 z31.VnB(), 3938 z0.VnB(), 3939 p7.Zeroing(), 3940 SVEMemOperand(x19)), 3941 "ld3b {z30.b, z31.b, z0.b}, p7/z, [x19]"); 3942 COMPARE(ld3b(z30.VnB(), 3943 z31.VnB(), 3944 z0.VnB(), 3945 p6.Zeroing(), 3946 SVEMemOperand(x19, 21, SVE_MUL_VL)), 3947 "ld3b {z30.b, z31.b, z0.b}, p6/z, [x19, #21, mul vl]"); 3948 COMPARE(ld3b(z30.VnB(), 3949 z31.VnB(), 3950 z0.VnB(), 3951 p6.Zeroing(), 3952 SVEMemOperand(x19, -24, SVE_MUL_VL)), 3953 "ld3b {z30.b, z31.b, z0.b}, p6/z, [x19, #-24, mul vl]"); 3954 3955 COMPARE(ld3h(z15.VnH(), 3956 z16.VnH(), 3957 z17.VnH(), 3958 p6.Zeroing(), 3959 SVEMemOperand(x19)), 3960 "ld3h {z15.h, z16.h, z17.h}, p6/z, [x19]"); 3961 COMPARE(ld3h(z15.VnH(), 3962 z16.VnH(), 3963 z17.VnH(), 3964 p6.Zeroing(), 3965 SVEMemOperand(x19, 21, SVE_MUL_VL)), 3966 "ld3h {z15.h, z16.h, z17.h}, p6/z, [x19, #21, mul vl]"); 3967 COMPARE(ld3h(z15.VnH(), 3968 z16.VnH(), 3969 z17.VnH(), 3970 p0.Zeroing(), 3971 SVEMemOperand(x19, -24, SVE_MUL_VL)), 3972 "ld3h {z15.h, z16.h, z17.h}, p0/z, [x19, #-24, mul vl]"); 3973 3974 COMPARE(ld3w(z15.VnS(), 3975 z16.VnS(), 3976 z17.VnS(), 3977 p0.Zeroing(), 3978 SVEMemOperand(x19)), 3979 "ld3w {z15.s, z16.s, z17.s}, p0/z, [x19]"); 3980 COMPARE(ld3w(z0.VnS(), 3981 z1.VnS(), 3982 z2.VnS(), 3983 p0.Zeroing(), 3984 SVEMemOperand(x19, 21, SVE_MUL_VL)), 3985 "ld3w {z0.s, z1.s, z2.s}, p0/z, [x19, #21, mul vl]"); 3986 COMPARE(ld3w(z0.VnS(), 3987 z1.VnS(), 3988 z2.VnS(), 3989 p0.Zeroing(), 3990 SVEMemOperand(x19, -24, SVE_MUL_VL)), 3991 "ld3w {z0.s, z1.s, z2.s}, p0/z, [x19, #-24, mul vl]"); 3992 3993 COMPARE(ld3d(z0.VnD(), z1.VnD(), z2.VnD(), p7.Zeroing(), SVEMemOperand(x19)), 3994 "ld3d {z0.d, z1.d, z2.d}, p7/z, [x19]"); 3995 COMPARE(ld3d(z0.VnD(), 3996 z1.VnD(), 3997 z2.VnD(), 3998 p7.Zeroing(), 3999 SVEMemOperand(x19, 21, SVE_MUL_VL)), 4000 "ld3d {z0.d, z1.d, z2.d}, p7/z, [x19, #21, mul vl]"); 4001 COMPARE(ld3d(z30.VnD(), 4002 z31.VnD(), 4003 z0.VnD(), 4004 p7.Zeroing(), 4005 SVEMemOperand(x19, -24, SVE_MUL_VL)), 4006 "ld3d {z30.d, z31.d, z0.d}, p7/z, [x19, #-24, mul vl]"); 4007 4008 CLEANUP(); 4009} 4010 4011TEST(sve_ld4_scalar_plus_immediate) { 4012 SETUP(); 4013 4014 COMPARE(ld4b(z31.VnB(), 4015 z0.VnB(), 4016 z1.VnB(), 4017 z2.VnB(), 4018 p7.Zeroing(), 4019 SVEMemOperand(x19)), 4020 "ld4b {z31.b, z0.b, z1.b, z2.b}, p7/z, [x19]"); 4021 COMPARE(ld4b(z31.VnB(), 4022 z0.VnB(), 4023 z1.VnB(), 4024 z2.VnB(), 4025 p7.Zeroing(), 4026 SVEMemOperand(x19, 28, SVE_MUL_VL)), 4027 "ld4b {z31.b, z0.b, z1.b, z2.b}, p7/z, [x19, #28, mul vl]"); 4028 COMPARE(ld4b(z31.VnB(), 4029 z0.VnB(), 4030 z1.VnB(), 4031 z2.VnB(), 4032 p6.Zeroing(), 4033 SVEMemOperand(x19, -32, SVE_MUL_VL)), 4034 "ld4b {z31.b, z0.b, z1.b, z2.b}, p6/z, [x19, #-32, mul vl]"); 4035 4036 COMPARE(ld4h(z31.VnH(), 4037 z0.VnH(), 4038 z1.VnH(), 4039 z2.VnH(), 4040 p6.Zeroing(), 4041 SVEMemOperand(x19)), 4042 "ld4h {z31.h, z0.h, z1.h, z2.h}, p6/z, [x19]"); 4043 COMPARE(ld4h(z15.VnH(), 4044 z16.VnH(), 4045 z17.VnH(), 4046 z18.VnH(), 4047 p6.Zeroing(), 4048 SVEMemOperand(x19, 28, SVE_MUL_VL)), 4049 "ld4h {z15.h, z16.h, z17.h, z18.h}, p6/z, " 4050 "[x19, #28, mul vl]"); 4051 COMPARE(ld4h(z15.VnH(), 4052 z16.VnH(), 4053 z17.VnH(), 4054 z18.VnH(), 4055 p6.Zeroing(), 4056 SVEMemOperand(x19, -32, SVE_MUL_VL)), 4057 "ld4h {z15.h, z16.h, z17.h, z18.h}, p6/z, " 4058 "[x19, #-32, mul vl]"); 4059 4060 COMPARE(ld4w(z15.VnS(), 4061 z16.VnS(), 4062 z17.VnS(), 4063 z18.VnS(), 4064 p0.Zeroing(), 4065 SVEMemOperand(x19)), 4066 "ld4w {z15.s, z16.s, z17.s, z18.s}, p0/z, [x19]"); 4067 COMPARE(ld4w(z15.VnS(), 4068 z16.VnS(), 4069 z17.VnS(), 4070 z18.VnS(), 4071 p0.Zeroing(), 4072 SVEMemOperand(x19, 28, SVE_MUL_VL)), 4073 "ld4w {z15.s, z16.s, z17.s, z18.s}, p0/z, " 4074 "[x19, #28, mul vl]"); 4075 COMPARE(ld4w(z0.VnS(), 4076 z1.VnS(), 4077 z2.VnS(), 4078 z3.VnS(), 4079 p0.Zeroing(), 4080 SVEMemOperand(x19, -32, SVE_MUL_VL)), 4081 "ld4w {z0.s, z1.s, z2.s, z3.s}, p0/z, [x19, #-32, mul vl]"); 4082 4083 COMPARE(ld4d(z0.VnD(), 4084 z1.VnD(), 4085 z2.VnD(), 4086 z3.VnD(), 4087 p0.Zeroing(), 4088 SVEMemOperand(x19)), 4089 "ld4d {z0.d, z1.d, z2.d, z3.d}, p0/z, [x19]"); 4090 COMPARE(ld4d(z0.VnD(), 4091 z1.VnD(), 4092 z2.VnD(), 4093 z3.VnD(), 4094 p7.Zeroing(), 4095 SVEMemOperand(x19, 28, SVE_MUL_VL)), 4096 "ld4d {z0.d, z1.d, z2.d, z3.d}, p7/z, [x19, #28, mul vl]"); 4097 COMPARE(ld4d(z0.VnD(), 4098 z1.VnD(), 4099 z2.VnD(), 4100 z3.VnD(), 4101 p7.Zeroing(), 4102 SVEMemOperand(x19, -32, SVE_MUL_VL)), 4103 "ld4d {z0.d, z1.d, z2.d, z3.d}, p7/z, [x19, #-32, mul vl]"); 4104 4105 CLEANUP(); 4106} 4107 4108TEST(sve_ld2_scalar_plus_scalar) { 4109 SETUP(); 4110 4111 COMPARE(ld2b(z25.VnB(), z26.VnB(), p1.Zeroing(), SVEMemOperand(x20, x19)), 4112 "ld2b {z25.b, z26.b}, p1/z, [x20, x19]"); 4113 COMPARE(ld2b(z25.VnB(), z26.VnB(), p1.Zeroing(), SVEMemOperand(sp, x19)), 4114 "ld2b {z25.b, z26.b}, p1/z, [sp, x19]"); 4115 COMPARE(ld2b(z31.VnB(), z0.VnB(), p1.Zeroing(), SVEMemOperand(sp, x19)), 4116 "ld2b {z31.b, z0.b}, p1/z, [sp, x19]"); 4117 4118 COMPARE(ld2h(z31.VnH(), 4119 z0.VnH(), 4120 p1.Zeroing(), 4121 SVEMemOperand(x20, x19, LSL, 1)), 4122 "ld2h {z31.h, z0.h}, p1/z, [x20, x19, lsl #1]"); 4123 COMPARE(ld2h(z31.VnH(), 4124 z0.VnH(), 4125 p7.Zeroing(), 4126 SVEMemOperand(sp, x19, LSL, 1)), 4127 "ld2h {z31.h, z0.h}, p7/z, [sp, x19, lsl #1]"); 4128 COMPARE(ld2h(z31.VnH(), 4129 z0.VnH(), 4130 p7.Zeroing(), 4131 SVEMemOperand(sp, x19, LSL, 1)), 4132 "ld2h {z31.h, z0.h}, p7/z, [sp, x19, lsl #1]"); 4133 4134 COMPARE(ld2w(z16.VnS(), 4135 z17.VnS(), 4136 p7.Zeroing(), 4137 SVEMemOperand(x20, x19, LSL, 2)), 4138 "ld2w {z16.s, z17.s}, p7/z, [x20, x19, lsl #2]"); 4139 COMPARE(ld2w(z16.VnS(), 4140 z17.VnS(), 4141 p7.Zeroing(), 4142 SVEMemOperand(sp, x19, LSL, 2)), 4143 "ld2w {z16.s, z17.s}, p7/z, [sp, x19, lsl #2]"); 4144 COMPARE(ld2w(z16.VnS(), 4145 z17.VnS(), 4146 p0.Zeroing(), 4147 SVEMemOperand(sp, x19, LSL, 2)), 4148 "ld2w {z16.s, z17.s}, p0/z, [sp, x19, lsl #2]"); 4149 4150 COMPARE(ld2d(z16.VnD(), 4151 z17.VnD(), 4152 p0.Zeroing(), 4153 SVEMemOperand(x20, x19, LSL, 3)), 4154 "ld2d {z16.d, z17.d}, p0/z, [x20, x19, lsl #3]"); 4155 COMPARE(ld2d(z25.VnD(), 4156 z26.VnD(), 4157 p0.Zeroing(), 4158 SVEMemOperand(sp, x19, LSL, 3)), 4159 "ld2d {z25.d, z26.d}, p0/z, [sp, x19, lsl #3]"); 4160 COMPARE(ld2d(z25.VnD(), 4161 z26.VnD(), 4162 p0.Zeroing(), 4163 SVEMemOperand(sp, x19, LSL, 3)), 4164 "ld2d {z25.d, z26.d}, p0/z, [sp, x19, lsl #3]"); 4165 4166 CLEANUP(); 4167} 4168 4169TEST(sve_ld3_scalar_plus_scalar) { 4170 SETUP(); 4171 4172 COMPARE(ld3b(z25.VnB(), 4173 z26.VnB(), 4174 z27.VnB(), 4175 p1.Zeroing(), 4176 SVEMemOperand(x20, x19)), 4177 "ld3b {z25.b, z26.b, z27.b}, p1/z, [x20, x19]"); 4178 COMPARE(ld3b(z25.VnB(), 4179 z26.VnB(), 4180 z27.VnB(), 4181 p1.Zeroing(), 4182 SVEMemOperand(sp, x19)), 4183 "ld3b {z25.b, z26.b, z27.b}, p1/z, [sp, x19]"); 4184 COMPARE(ld3b(z30.VnB(), 4185 z31.VnB(), 4186 z0.VnB(), 4187 p1.Zeroing(), 4188 SVEMemOperand(sp, x19)), 4189 "ld3b {z30.b, z31.b, z0.b}, p1/z, [sp, x19]"); 4190 4191 COMPARE(ld3h(z30.VnH(), 4192 z31.VnH(), 4193 z0.VnH(), 4194 p1.Zeroing(), 4195 SVEMemOperand(x20, x19, LSL, 1)), 4196 "ld3h {z30.h, z31.h, z0.h}, p1/z, [x20, x19, lsl #1]"); 4197 COMPARE(ld3h(z30.VnH(), 4198 z31.VnH(), 4199 z0.VnH(), 4200 p7.Zeroing(), 4201 SVEMemOperand(sp, x19, LSL, 1)), 4202 "ld3h {z30.h, z31.h, z0.h}, p7/z, [sp, x19, lsl #1]"); 4203 COMPARE(ld3h(z30.VnH(), 4204 z31.VnH(), 4205 z0.VnH(), 4206 p7.Zeroing(), 4207 SVEMemOperand(sp, x19, LSL, 1)), 4208 "ld3h {z30.h, z31.h, z0.h}, p7/z, [sp, x19, lsl #1]"); 4209 4210 COMPARE(ld3w(z16.VnS(), 4211 z17.VnS(), 4212 z18.VnS(), 4213 p7.Zeroing(), 4214 SVEMemOperand(x20, x19, LSL, 2)), 4215 "ld3w {z16.s, z17.s, z18.s}, p7/z, [x20, x19, lsl #2]"); 4216 COMPARE(ld3w(z16.VnS(), 4217 z17.VnS(), 4218 z18.VnS(), 4219 p7.Zeroing(), 4220 SVEMemOperand(sp, x19, LSL, 2)), 4221 "ld3w {z16.s, z17.s, z18.s}, p7/z, [sp, x19, lsl #2]"); 4222 COMPARE(ld3w(z16.VnS(), 4223 z17.VnS(), 4224 z18.VnS(), 4225 p0.Zeroing(), 4226 SVEMemOperand(sp, x19, LSL, 2)), 4227 "ld3w {z16.s, z17.s, z18.s}, p0/z, [sp, x19, lsl #2]"); 4228 4229 COMPARE(ld3d(z16.VnD(), 4230 z17.VnD(), 4231 z18.VnD(), 4232 p0.Zeroing(), 4233 SVEMemOperand(x20, x19, LSL, 3)), 4234 "ld3d {z16.d, z17.d, z18.d}, p0/z, [x20, x19, lsl #3]"); 4235 COMPARE(ld3d(z25.VnD(), 4236 z26.VnD(), 4237 z27.VnD(), 4238 p0.Zeroing(), 4239 SVEMemOperand(sp, x19, LSL, 3)), 4240 "ld3d {z25.d, z26.d, z27.d}, p0/z, [sp, x19, lsl #3]"); 4241 COMPARE(ld3d(z25.VnD(), 4242 z26.VnD(), 4243 z27.VnD(), 4244 p0.Zeroing(), 4245 SVEMemOperand(sp, x19, LSL, 3)), 4246 "ld3d {z25.d, z26.d, z27.d}, p0/z, [sp, x19, lsl #3]"); 4247 4248 CLEANUP(); 4249} 4250 4251TEST(sve_ld4_scalar_plus_scalar) { 4252 SETUP(); 4253 4254 COMPARE(ld4b(z25.VnB(), 4255 z26.VnB(), 4256 z27.VnB(), 4257 z28.VnB(), 4258 p0.Zeroing(), 4259 SVEMemOperand(x20, x19)), 4260 "ld4b {z25.b, z26.b, z27.b, z28.b}, p0/z, [x20, x19]"); 4261 COMPARE(ld4b(z25.VnB(), 4262 z26.VnB(), 4263 z27.VnB(), 4264 z28.VnB(), 4265 p1.Zeroing(), 4266 SVEMemOperand(sp, x19)), 4267 "ld4b {z25.b, z26.b, z27.b, z28.b}, p1/z, [sp, x19]"); 4268 COMPARE(ld4b(z25.VnB(), 4269 z26.VnB(), 4270 z27.VnB(), 4271 z28.VnB(), 4272 p1.Zeroing(), 4273 SVEMemOperand(sp, x19)), 4274 "ld4b {z25.b, z26.b, z27.b, z28.b}, p1/z, [sp, x19]"); 4275 4276 COMPARE(ld4h(z31.VnH(), 4277 z0.VnH(), 4278 z1.VnH(), 4279 z2.VnH(), 4280 p1.Zeroing(), 4281 SVEMemOperand(x20, x19, LSL, 1)), 4282 "ld4h {z31.h, z0.h, z1.h, z2.h}, p1/z, [x20, x19, lsl #1]"); 4283 COMPARE(ld4h(z31.VnH(), 4284 z0.VnH(), 4285 z1.VnH(), 4286 z2.VnH(), 4287 p1.Zeroing(), 4288 SVEMemOperand(sp, x19, LSL, 1)), 4289 "ld4h {z31.h, z0.h, z1.h, z2.h}, p1/z, [sp, x19, lsl #1]"); 4290 COMPARE(ld4h(z31.VnH(), 4291 z0.VnH(), 4292 z1.VnH(), 4293 z2.VnH(), 4294 p7.Zeroing(), 4295 SVEMemOperand(sp, x19, LSL, 1)), 4296 "ld4h {z31.h, z0.h, z1.h, z2.h}, p7/z, [sp, x19, lsl #1]"); 4297 4298 COMPARE(ld4w(z31.VnS(), 4299 z0.VnS(), 4300 z1.VnS(), 4301 z2.VnS(), 4302 p7.Zeroing(), 4303 SVEMemOperand(x20, x19, LSL, 2)), 4304 "ld4w {z31.s, z0.s, z1.s, z2.s}, p7/z, [x20, x19, lsl #2]"); 4305 COMPARE(ld4w(z16.VnS(), 4306 z17.VnS(), 4307 z18.VnS(), 4308 z19.VnS(), 4309 p7.Zeroing(), 4310 SVEMemOperand(sp, x19, LSL, 2)), 4311 "ld4w {z16.s, z17.s, z18.s, z19.s}, p7/z, " 4312 "[sp, x19, lsl #2]"); 4313 COMPARE(ld4w(z16.VnS(), 4314 z17.VnS(), 4315 z18.VnS(), 4316 z19.VnS(), 4317 p7.Zeroing(), 4318 SVEMemOperand(sp, x19, LSL, 2)), 4319 "ld4w {z16.s, z17.s, z18.s, z19.s}, p7/z, " 4320 "[sp, x19, lsl #2]"); 4321 4322 COMPARE(ld4d(z16.VnD(), 4323 z17.VnD(), 4324 z18.VnD(), 4325 z19.VnD(), 4326 p0.Zeroing(), 4327 SVEMemOperand(x20, x19, LSL, 3)), 4328 "ld4d {z16.d, z17.d, z18.d, z19.d}, p0/z, " 4329 "[x20, x19, lsl #3]"); 4330 COMPARE(ld4d(z16.VnD(), 4331 z17.VnD(), 4332 z18.VnD(), 4333 z19.VnD(), 4334 p0.Zeroing(), 4335 SVEMemOperand(sp, x19, LSL, 3)), 4336 "ld4d {z16.d, z17.d, z18.d, z19.d}, p0/z, " 4337 "[sp, x19, lsl #3]"); 4338 COMPARE(ld4d(z25.VnD(), 4339 z26.VnD(), 4340 z27.VnD(), 4341 z28.VnD(), 4342 p0.Zeroing(), 4343 SVEMemOperand(sp, x19, LSL, 3)), 4344 "ld4d {z25.d, z26.d, z27.d, z28.d}, p0/z, " 4345 "[sp, x19, lsl #3]"); 4346 4347 CLEANUP(); 4348} 4349 4350TEST(sve_ff_contiguous) { 4351 SETUP(); 4352 4353 COMPARE(ldff1b(z24.VnB(), p1.Zeroing(), SVEMemOperand(x21)), 4354 "ldff1b {z24.b}, p1/z, [x21]"); 4355 COMPARE(ldff1b(z22.VnH(), p5.Zeroing(), SVEMemOperand(x5, x28)), 4356 "ldff1b {z22.h}, p5/z, [x5, x28]"); 4357 COMPARE(ldff1b(z2.VnS(), p5.Zeroing(), SVEMemOperand(sp, x11)), 4358 "ldff1b {z2.s}, p5/z, [sp, x11]"); 4359 COMPARE(ldff1b(z12.VnD(), p3.Zeroing(), SVEMemOperand(x26, xzr)), 4360 "ldff1b {z12.d}, p3/z, [x26]"); 4361 COMPARE(ldff1h(z21.VnH(), p3.Zeroing(), SVEMemOperand(x27)), 4362 "ldff1h {z21.h}, p3/z, [x27]"); 4363 COMPARE(ldff1h(z11.VnS(), p6.Zeroing(), SVEMemOperand(sp, x15, LSL, 1)), 4364 "ldff1h {z11.s}, p6/z, [sp, x15, lsl #1]"); 4365 COMPARE(ldff1h(z6.VnD(), p7.Zeroing(), SVEMemOperand(x8, xzr, LSL, 1)), 4366 "ldff1h {z6.d}, p7/z, [x8]"); 4367 COMPARE(ldff1w(z11.VnS(), p7.Zeroing(), SVEMemOperand(sp)), 4368 "ldff1w {z11.s}, p7/z, [sp]"); 4369 COMPARE(ldff1w(z6.VnD(), p6.Zeroing(), SVEMemOperand(x5, x0, LSL, 2)), 4370 "ldff1w {z6.d}, p6/z, [x5, x0, lsl #2]"); 4371 COMPARE(ldff1d(z0.VnD(), p3.Zeroing(), SVEMemOperand(x15, x1, LSL, 3)), 4372 "ldff1d {z0.d}, p3/z, [x15, x1, lsl #3]"); 4373 4374 COMPARE(ldff1sb(z31.VnH(), p4.Zeroing(), SVEMemOperand(x10, x25)), 4375 "ldff1sb {z31.h}, p4/z, [x10, x25]"); 4376 COMPARE(ldff1sb(z25.VnS(), p7.Zeroing(), SVEMemOperand(sp, x20)), 4377 "ldff1sb {z25.s}, p7/z, [sp, x20]"); 4378 COMPARE(ldff1sb(z20.VnD(), p3.Zeroing(), SVEMemOperand(x19, xzr)), 4379 "ldff1sb {z20.d}, p3/z, [x19]"); 4380 COMPARE(ldff1sh(z18.VnS(), p3.Zeroing(), SVEMemOperand(sp, x0, LSL, 1)), 4381 "ldff1sh {z18.s}, p3/z, [sp, x0, lsl #1]"); 4382 COMPARE(ldff1sh(z30.VnD(), p1.Zeroing(), SVEMemOperand(x28, xzr, LSL, 1)), 4383 "ldff1sh {z30.d}, p1/z, [x28]"); 4384 COMPARE(ldff1sw(z3.VnD(), p4.Zeroing(), SVEMemOperand(x22, x18, LSL, 2)), 4385 "ldff1sw {z3.d}, p4/z, [x22, x18, lsl #2]"); 4386 4387 CLEANUP(); 4388} 4389 4390TEST(sve_mem_contiguous_load) { 4391 SETUP(); 4392 4393 COMPARE(ld1rqb(z3.VnB(), p2.Zeroing(), SVEMemOperand(x22, x18)), 4394 "ld1rqb {z3.b}, p2/z, [x22, x18]"); 4395 COMPARE(ld1rqd(z6.VnD(), p0.Zeroing(), SVEMemOperand(x18, x9, LSL, 3)), 4396 "ld1rqd {z6.d}, p0/z, [x18, x9, lsl #3]"); 4397 COMPARE(ld1rqh(z1.VnH(), p7.Zeroing(), SVEMemOperand(x9, x6, LSL, 1)), 4398 "ld1rqh {z1.h}, p7/z, [x9, x6, lsl #1]"); 4399 COMPARE(ld1rqw(z12.VnS(), p4.Zeroing(), SVEMemOperand(sp, xzr, LSL, 2)), 4400 "ld1rqw {z12.s}, p4/z, [sp, xzr, lsl #2]"); 4401 COMPARE(ld1rqb(z18.VnB(), p2.Zeroing(), SVEMemOperand(x18, 0)), 4402 "ld1rqb {z18.b}, p2/z, [x18]"); 4403 COMPARE(ld1rqb(z18.VnB(), p2.Zeroing(), SVEMemOperand(x18, 16)), 4404 "ld1rqb {z18.b}, p2/z, [x18, #16]"); 4405 COMPARE(ld1rqd(z11.VnD(), p1.Zeroing(), SVEMemOperand(x23, -16)), 4406 "ld1rqd {z11.d}, p1/z, [x23, #-16]"); 4407 COMPARE(ld1rqh(z11.VnH(), p1.Zeroing(), SVEMemOperand(x0, 112)), 4408 "ld1rqh {z11.h}, p1/z, [x0, #112]"); 4409 COMPARE(ld1rqw(z22.VnS(), p3.Zeroing(), SVEMemOperand(sp, -128)), 4410 "ld1rqw {z22.s}, p3/z, [sp, #-128]"); 4411 4412 COMPARE_MACRO(Ld1rqb(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0, x1)), 4413 "ld1rqb {z0.b}, p0/z, [x0, x1]"); 4414 COMPARE_MACRO(Ld1rqh(z0.VnH(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 1)), 4415 "ld1rqh {z0.h}, p0/z, [x0, x1, lsl #1]"); 4416 COMPARE_MACRO(Ld1rqw(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 2)), 4417 "ld1rqw {z0.s}, p0/z, [x0, x1, lsl #2]"); 4418 COMPARE_MACRO(Ld1rqd(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)), 4419 "ld1rqd {z0.d}, p0/z, [x0, x1, lsl #3]"); 4420 COMPARE_MACRO(Ld1rqh(z11.VnH(), p1.Zeroing(), SVEMemOperand(x0, 112)), 4421 "ld1rqh {z11.h}, p1/z, [x0, #112]"); 4422 COMPARE_MACRO(Ld1rqw(z22.VnS(), p3.Zeroing(), SVEMemOperand(sp, -128)), 4423 "ld1rqw {z22.s}, p3/z, [sp, #-128]"); 4424 4425 COMPARE_MACRO(Ld1rqb(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0, 2222)), 4426 "add x16, x0, #0x8ae (2222)\n" 4427 "ld1rqb {z0.b}, p0/z, [x16]"); 4428 COMPARE_MACRO(Ld1rqw(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1)), 4429 "add x16, x0, x1\n" 4430 "ld1rqw {z0.s}, p0/z, [x16]"); 4431 COMPARE_MACRO(Ld1rqd(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 1)), 4432 "add x16, x0, x1, lsl #1\n" 4433 "ld1rqd {z0.d}, p0/z, [x16]"); 4434 4435 COMPARE(ldnt1b(z21.VnB(), p5.Zeroing(), SVEMemOperand(x1, x23)), 4436 "ldnt1b {z21.b}, p5/z, [x1, x23]"); 4437 COMPARE(ldnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(x23, x6, LSL, 3)), 4438 "ldnt1d {z10.d}, p0/z, [x23, x6, lsl #3]"); 4439 COMPARE(ldnt1h(z30.VnH(), p4.Zeroing(), SVEMemOperand(x6, x11, LSL, 1)), 4440 "ldnt1h {z30.h}, p4/z, [x6, x11, lsl #1]"); 4441 COMPARE(ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(x11, x1, LSL, 2)), 4442 "ldnt1w {z0.s}, p4/z, [x11, x1, lsl #2]"); 4443 COMPARE(ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(sp, xzr, LSL, 2)), 4444 "ldnt1w {z0.s}, p4/z, [sp, xzr, lsl #2]"); 4445 4446 COMPARE_MACRO(Ldnt1b(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0, x1)), 4447 "ldnt1b {z0.b}, p0/z, [x0, x1]"); 4448 COMPARE_MACRO(Ldnt1h(z0.VnH(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 1)), 4449 "ldnt1h {z0.h}, p0/z, [x0, x1, lsl #1]"); 4450 COMPARE_MACRO(Ldnt1w(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 2)), 4451 "ldnt1w {z0.s}, p0/z, [x0, x1, lsl #2]"); 4452 COMPARE_MACRO(Ldnt1d(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)), 4453 "ldnt1d {z0.d}, p0/z, [x0, x1, lsl #3]"); 4454 4455 COMPARE_MACRO(Ldnt1w(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)), 4456 "add x16, x0, x1, lsl #3\n" 4457 "ldnt1w {z0.s}, p0/z, [x16]"); 4458 COMPARE_MACRO(Ldnt1d(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 4)), 4459 "add x16, x0, x1, lsl #4\n" 4460 "ldnt1d {z0.d}, p0/z, [x16]"); 4461 4462 COMPARE(ldnt1b(z1.VnB(), p3.Zeroing(), SVEMemOperand(x11)), 4463 "ldnt1b {z1.b}, p3/z, [x11]"); 4464 COMPARE(ldnt1b(z2.VnB(), p2.Zeroing(), SVEMemOperand(x12, -8, SVE_MUL_VL)), 4465 "ldnt1b {z2.b}, p2/z, [x12, #-8, mul vl]"); 4466 COMPARE(ldnt1d(z2.VnD(), p7.Zeroing(), SVEMemOperand(x13, -2, SVE_MUL_VL)), 4467 "ldnt1d {z2.d}, p7/z, [x13, #-2, mul vl]"); 4468 COMPARE(ldnt1h(z26.VnH(), p4.Zeroing(), SVEMemOperand(x16, 3, SVE_MUL_VL)), 4469 "ldnt1h {z26.h}, p4/z, [x16, #3, mul vl]"); 4470 COMPARE(ldnt1w(z17.VnS(), p4.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)), 4471 "ldnt1w {z17.s}, p4/z, [x15, #7, mul vl]"); 4472 COMPARE(ldnt1w(z17.VnS(), p4.Zeroing(), SVEMemOperand(sp, 7, SVE_MUL_VL)), 4473 "ldnt1w {z17.s}, p4/z, [sp, #7, mul vl]"); 4474 4475 COMPARE_MACRO(Ldnt1b(z2.VnB(), 4476 p0.Zeroing(), 4477 SVEMemOperand(x10, 42, SVE_MUL_VL)), 4478 "mov x16, #0x2a\n" 4479 "rdvl x17, #1\n" 4480 "madd x16, x16, x17, x10\n" 4481 "ldnt1b {z2.b}, p0/z, [x16]"); 4482 COMPARE_MACRO(Ldnt1h(z3.VnH(), 4483 p1.Zeroing(), 4484 SVEMemOperand(x11, 31, SVE_MUL_VL)), 4485 "addvl x16, x11, #31\n" 4486 "ldnt1h {z3.h}, p1/z, [x16]"); 4487 COMPARE_MACRO(Ldnt1w(z4.VnS(), 4488 p2.Zeroing(), 4489 SVEMemOperand(x12, -35, SVE_MUL_VL)), 4490 "mov x16, #0xffffffffffffffdd\n" 4491 "rdvl x17, #1\n" 4492 "madd x16, x16, x17, x12\n" 4493 "ldnt1w {z4.s}, p2/z, [x16]"); 4494 COMPARE_MACRO(Ldnt1d(z5.VnD(), p3.Zeroing(), SVEMemOperand(x13, 3)), 4495 "add x16, x13, #0x3 (3)\n" 4496 "ldnt1d {z5.d}, p3/z, [x16]"); 4497 4498 COMPARE(ldnf1b(z1.VnH(), p0.Zeroing(), SVEMemOperand(x25, -8, SVE_MUL_VL)), 4499 "ldnf1b {z1.h}, p0/z, [x25, #-8, mul vl]"); 4500 COMPARE(ldnf1b(z0.VnS(), p0.Zeroing(), SVEMemOperand(x2, 7, SVE_MUL_VL)), 4501 "ldnf1b {z0.s}, p0/z, [x2, #7, mul vl]"); 4502 COMPARE(ldnf1b(z31.VnD(), p6.Zeroing(), SVEMemOperand(x0, -7, SVE_MUL_VL)), 4503 "ldnf1b {z31.d}, p6/z, [x0, #-7, mul vl]"); 4504 COMPARE(ldnf1b(z25.VnB(), p1.Zeroing(), SVEMemOperand(x5, 6, SVE_MUL_VL)), 4505 "ldnf1b {z25.b}, p1/z, [x5, #6, mul vl]"); 4506 COMPARE(ldnf1d(z25.VnD(), p0.Zeroing(), SVEMemOperand(x11, -6, SVE_MUL_VL)), 4507 "ldnf1d {z25.d}, p0/z, [x11, #-6, mul vl]"); 4508 COMPARE(ldnf1h(z22.VnH(), p4.Zeroing(), SVEMemOperand(x7, 5, SVE_MUL_VL)), 4509 "ldnf1h {z22.h}, p4/z, [x7, #5, mul vl]"); 4510 COMPARE(ldnf1h(z7.VnS(), p2.Zeroing(), SVEMemOperand(x1, -5, SVE_MUL_VL)), 4511 "ldnf1h {z7.s}, p2/z, [x1, #-5, mul vl]"); 4512 COMPARE(ldnf1h(z5.VnD(), p3.Zeroing(), SVEMemOperand(x29, 4, SVE_MUL_VL)), 4513 "ldnf1h {z5.d}, p3/z, [x29, #4, mul vl]"); 4514 COMPARE(ldnf1sb(z12.VnH(), p5.Zeroing(), SVEMemOperand(x27, -4, SVE_MUL_VL)), 4515 "ldnf1sb {z12.h}, p5/z, [x27, #-4, mul vl]"); 4516 COMPARE(ldnf1sb(z10.VnS(), p2.Zeroing(), SVEMemOperand(x13, 3, SVE_MUL_VL)), 4517 "ldnf1sb {z10.s}, p2/z, [x13, #3, mul vl]"); 4518 COMPARE(ldnf1sb(z25.VnD(), p6.Zeroing(), SVEMemOperand(x26, -3, SVE_MUL_VL)), 4519 "ldnf1sb {z25.d}, p6/z, [x26, #-3, mul vl]"); 4520 COMPARE(ldnf1sh(z3.VnS(), p5.Zeroing(), SVEMemOperand(x1, 2, SVE_MUL_VL)), 4521 "ldnf1sh {z3.s}, p5/z, [x1, #2, mul vl]"); 4522 COMPARE(ldnf1sh(z8.VnD(), p6.Zeroing(), SVEMemOperand(x13, -2, SVE_MUL_VL)), 4523 "ldnf1sh {z8.d}, p6/z, [x13, #-2, mul vl]"); 4524 COMPARE(ldnf1sw(z5.VnD(), p6.Zeroing(), SVEMemOperand(x2, 1, SVE_MUL_VL)), 4525 "ldnf1sw {z5.d}, p6/z, [x2, #1, mul vl]"); 4526 COMPARE(ldnf1w(z11.VnS(), p3.Zeroing(), SVEMemOperand(sp, -1, SVE_MUL_VL)), 4527 "ldnf1w {z11.s}, p3/z, [sp, #-1, mul vl]"); 4528 COMPARE(ldnf1w(z10.VnD(), p6.Zeroing(), SVEMemOperand(x12)), 4529 "ldnf1w {z10.d}, p6/z, [x12]"); 4530 4531 CLEANUP(); 4532} 4533 4534TEST(sve_mem_contiguous_store) { 4535 SETUP(); 4536 4537 COMPARE(stnt1b(z21.VnB(), p5.Zeroing(), SVEMemOperand(x1, x23)), 4538 "stnt1b {z21.b}, p5, [x1, x23]"); 4539 COMPARE(stnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(x23, x6, LSL, 3)), 4540 "stnt1d {z10.d}, p0, [x23, x6, lsl #3]"); 4541 COMPARE(stnt1h(z30.VnH(), p4.Zeroing(), SVEMemOperand(x6, x11, LSL, 1)), 4542 "stnt1h {z30.h}, p4, [x6, x11, lsl #1]"); 4543 COMPARE(stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(x11, x1, LSL, 2)), 4544 "stnt1w {z0.s}, p4, [x11, x1, lsl #2]"); 4545 COMPARE(stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(sp, xzr, LSL, 2)), 4546 "stnt1w {z0.s}, p4, [sp, xzr, lsl #2]"); 4547 4548 COMPARE(stnt1b(z1.VnB(), p3.Zeroing(), SVEMemOperand(x11)), 4549 "stnt1b {z1.b}, p3, [x11]"); 4550 COMPARE(stnt1b(z2.VnB(), p2.Zeroing(), SVEMemOperand(x12, -8, SVE_MUL_VL)), 4551 "stnt1b {z2.b}, p2, [x12, #-8, mul vl]"); 4552 COMPARE(stnt1d(z2.VnD(), p7.Zeroing(), SVEMemOperand(x13, -2, SVE_MUL_VL)), 4553 "stnt1d {z2.d}, p7, [x13, #-2, mul vl]"); 4554 COMPARE(stnt1h(z26.VnH(), p4.Zeroing(), SVEMemOperand(x16, 3, SVE_MUL_VL)), 4555 "stnt1h {z26.h}, p4, [x16, #3, mul vl]"); 4556 COMPARE(stnt1w(z17.VnS(), p4.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)), 4557 "stnt1w {z17.s}, p4, [x15, #7, mul vl]"); 4558 COMPARE(stnt1w(z17.VnS(), p4.Zeroing(), SVEMemOperand(sp, 7, SVE_MUL_VL)), 4559 "stnt1w {z17.s}, p4, [sp, #7, mul vl]"); 4560 4561 COMPARE_MACRO(Stnt1b(z2.VnB(), 4562 p0.Zeroing(), 4563 SVEMemOperand(x10, 42, SVE_MUL_VL)), 4564 "mov x16, #0x2a\n" 4565 "rdvl x17, #1\n" 4566 "madd x16, x16, x17, x10\n" 4567 "stnt1b {z2.b}, p0, [x16]"); 4568 COMPARE_MACRO(Stnt1h(z3.VnH(), 4569 p1.Zeroing(), 4570 SVEMemOperand(x11, 31, SVE_MUL_VL)), 4571 "addvl x16, x11, #31\n" 4572 "stnt1h {z3.h}, p1, [x16]"); 4573 COMPARE_MACRO(Stnt1w(z4.VnS(), 4574 p2.Zeroing(), 4575 SVEMemOperand(x12, -35, SVE_MUL_VL)), 4576 "mov x16, #0xffffffffffffffdd\n" 4577 "rdvl x17, #1\n" 4578 "madd x16, x16, x17, x12\n" 4579 "stnt1w {z4.s}, p2, [x16]"); 4580 4581 COMPARE_MACRO(Stnt1b(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0, x1)), 4582 "stnt1b {z0.b}, p0, [x0, x1]"); 4583 COMPARE_MACRO(Stnt1h(z0.VnH(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 1)), 4584 "stnt1h {z0.h}, p0, [x0, x1, lsl #1]"); 4585 COMPARE_MACRO(Stnt1w(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 2)), 4586 "stnt1w {z0.s}, p0, [x0, x1, lsl #2]"); 4587 COMPARE_MACRO(Stnt1d(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)), 4588 "stnt1d {z0.d}, p0, [x0, x1, lsl #3]"); 4589 4590 CLEANUP(); 4591} 4592 4593TEST(sve_load_broadcast_octo) { 4594 SETUP(); 4595 4596 COMPARE_MACRO(Ld1rob(z3.VnB(), p1.Zeroing(), SVEMemOperand(x0, x1)), 4597 "ld1rob {z3.b}, p1/z, [x0, x1]"); 4598 COMPARE_MACRO(Ld1roh(z6.VnH(), p4.Zeroing(), SVEMemOperand(sp, x31, LSL, 1)), 4599 "ld1roh {z6.h}, p4/z, [sp]"); 4600 COMPARE_MACRO(Ld1roh(z6.VnH(), p4.Zeroing(), SVEMemOperand(sp, x30, LSL, 1)), 4601 "ld1roh {z6.h}, p4/z, [sp, x30, lsl #1]"); 4602 COMPARE_MACRO(Ld1row(z2.VnS(), p6.Zeroing(), SVEMemOperand(x30, x30, LSL, 2)), 4603 "ld1row {z2.s}, p6/z, [x30, x30, lsl #2]"); 4604 COMPARE_MACRO(Ld1rod(z30.VnD(), p7.Zeroing(), SVEMemOperand(x21, x5, LSL, 3)), 4605 "ld1rod {z30.d}, p7/z, [x21, x5, lsl #3]"); 4606 COMPARE_MACRO(Ld1rob(z9.VnB(), p0.Zeroing(), SVEMemOperand(sp, 32)), 4607 "ld1rob {z9.b}, p0/z, [sp, #32]"); 4608 COMPARE_MACRO(Ld1roh(z19.VnH(), p3.Zeroing(), SVEMemOperand(x4)), 4609 "ld1roh {z19.h}, p3/z, [x4]"); 4610 COMPARE_MACRO(Ld1row(z21.VnS(), p3.Zeroing(), SVEMemOperand(x11, 224)), 4611 "ld1row {z21.s}, p3/z, [x11, #224]"); 4612 COMPARE_MACRO(Ld1rod(z0.VnD(), p2.Zeroing(), SVEMemOperand(x16, -256)), 4613 "ld1rod {z0.d}, p2/z, [x16, #-256]"); 4614 4615 CLEANUP(); 4616} 4617 4618TEST(sve_ldr_str_simple) { 4619 SETUP(); 4620 4621 COMPARE(str(p14, SVEMemOperand(x0)), "str p14, [x0]"); 4622 COMPARE(str(z14, SVEMemOperand(sp)), "str z14, [sp]"); 4623 COMPARE(ldr(p4, SVEMemOperand(x0)), "ldr p4, [x0]"); 4624 COMPARE(ldr(z4, SVEMemOperand(sp)), "ldr z4, [sp]"); 4625 COMPARE(str(p15, SVEMemOperand(sp, -256, SVE_MUL_VL)), 4626 "str p15, [sp, #-256, mul vl]"); 4627 COMPARE(str(z16, SVEMemOperand(x13, 255, SVE_MUL_VL)), 4628 "str z16, [x13, #255, mul vl]"); 4629 COMPARE(ldr(p5, SVEMemOperand(sp, -42, SVE_MUL_VL)), 4630 "ldr p5, [sp, #-42, mul vl]"); 4631 COMPARE(ldr(z6, SVEMemOperand(x28, 42, SVE_MUL_VL)), 4632 "ldr z6, [x28, #42, mul vl]"); 4633 4634 COMPARE_MACRO(Str(p14, SVEMemOperand(x0)), "str p14, [x0]"); 4635 COMPARE_MACRO(Str(z14, SVEMemOperand(sp)), "str z14, [sp]"); 4636 COMPARE_MACRO(Ldr(p4, SVEMemOperand(x0)), "ldr p4, [x0]"); 4637 COMPARE_MACRO(Ldr(z4, SVEMemOperand(sp)), "ldr z4, [sp]"); 4638 COMPARE_MACRO(Str(p15, SVEMemOperand(sp, -256, SVE_MUL_VL)), 4639 "str p15, [sp, #-256, mul vl]"); 4640 COMPARE_MACRO(Str(z16, SVEMemOperand(x13, 255, SVE_MUL_VL)), 4641 "str z16, [x13, #255, mul vl]"); 4642 COMPARE_MACRO(Ldr(p5, SVEMemOperand(sp, -42, SVE_MUL_VL)), 4643 "ldr p5, [sp, #-42, mul vl]"); 4644 COMPARE_MACRO(Ldr(z6, SVEMemOperand(x28, 42, SVE_MUL_VL)), 4645 "ldr z6, [x28, #42, mul vl]"); 4646 4647 COMPARE_MACRO(Ldr(z6, SVEMemOperand(x28, 42, SVE_MUL_VL)), 4648 "ldr z6, [x28, #42, mul vl]"); 4649 4650 // IsEquivalentToScalar 4651 COMPARE_MACRO(Str(p0, SVEMemOperand(x0, xzr)), "str p0, [x0]"); 4652 COMPARE_MACRO(Ldr(p1, SVEMemOperand(sp, xzr)), "ldr p1, [sp]"); 4653 COMPARE_MACRO(Str(z2, SVEMemOperand(x12, xzr)), "str z2, [x12]"); 4654 COMPARE_MACRO(Ldr(z3, SVEMemOperand(x7, xzr)), "ldr z3, [x7]"); 4655 4656 // Other cases fall back on Adr. We test Adr separately, so here we just test 4657 // sequences that stress scratch register allocation. 4658 COMPARE_MACRO(Str(p4, SVEMemOperand(x5, 4242, SVE_MUL_VL)), 4659 "mov x16, #0x1092\n" 4660 "rdvl x17, #1\n" 4661 "mul x16, x16, x17\n" 4662 "add x16, x5, x16, asr #3\n" 4663 "str p4, [x16]"); 4664 COMPARE_MACRO(Ldr(p6, SVEMemOperand(sp, 4242, SVE_MUL_VL)), 4665 "mov x16, #0x1092\n" 4666 "rdvl x17, #1\n" 4667 "mul x16, x16, x17\n" 4668 "asr x16, x16, #3\n" 4669 "add x16, sp, x16\n" 4670 "ldr p6, [x16]"); 4671 COMPARE_MACRO(Str(z7, SVEMemOperand(sp, 4242, SVE_MUL_VL)), 4672 "mov x16, #0x1092\n" 4673 "rdvl x17, #1\n" 4674 "mul x16, x16, x17\n" 4675 "add x16, sp, x16\n" 4676 "str z7, [x16]"); 4677 COMPARE_MACRO(Ldr(z8, SVEMemOperand(x9, 4242, SVE_MUL_VL)), 4678 "mov x16, #0x1092\n" 4679 "rdvl x17, #1\n" 4680 "madd x16, x16, x17, x9\n" 4681 "ldr z8, [x16]"); 4682 4683 CLEANUP(); 4684} 4685 4686TEST(sve_ld1_st1) { 4687 SETUP(); 4688 4689 COMPARE(st1b(z11.VnB(), p0, SVEMemOperand(x22)), "st1b {z11.b}, p0, [x22]"); 4690 COMPARE(st1b(z15.VnH(), p1, SVEMemOperand(x15, 7, SVE_MUL_VL)), 4691 "st1b {z15.h}, p1, [x15, #7, mul vl]"); 4692 COMPARE(st1b(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)), 4693 "st1b {z19.s}, p2, [sp, #-8, mul vl]"); 4694 COMPARE(st1b(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)), 4695 "st1b {z23.d}, p3, [x1]"); 4696 COMPARE(st1b(z2.VnB(), p4, SVEMemOperand(x1, x2)), 4697 "st1b {z2.b}, p4, [x1, x2]"); 4698 COMPARE(st1b(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 0)), 4699 "st1b {z31.d}, p7, [x9, x9]"); 4700 COMPARE(st1b(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 30)), 4701 "st1b {z3.s}, p0, [z14.s, #30]"); 4702 COMPARE(st1b(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 31)), 4703 "st1b {z14.d}, p4, [z3.d, #31]"); 4704 COMPARE(st1b(z15.VnD(), p5, SVEMemOperand(x0, z5.VnD())), 4705 "st1b {z15.d}, p5, [x0, z5.d]"); 4706 COMPARE(st1b(z15.VnS(), p5, SVEMemOperand(sp, z2.VnS(), UXTW)), 4707 "st1b {z15.s}, p5, [sp, z2.s, uxtw]"); 4708 COMPARE(st1b(z15.VnD(), p5, SVEMemOperand(x0, z25.VnD(), SXTW)), 4709 "st1b {z15.d}, p5, [x0, z25.d, sxtw]"); 4710 4711 COMPARE(st1h(z15.VnH(), p1, SVEMemOperand(x15, 7, SVE_MUL_VL)), 4712 "st1h {z15.h}, p1, [x15, #7, mul vl]"); 4713 COMPARE(st1h(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)), 4714 "st1h {z19.s}, p2, [sp, #-8, mul vl]"); 4715 COMPARE(st1h(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)), 4716 "st1h {z23.d}, p3, [x1]"); 4717 COMPARE(st1h(z2.VnH(), p4, SVEMemOperand(x1, x2, LSL, 1)), 4718 "st1h {z2.h}, p4, [x1, x2, lsl #1]"); 4719 COMPARE(st1h(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 1)), 4720 "st1h {z31.d}, p7, [x9, x9, lsl #1]"); 4721 COMPARE(st1h(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 30)), 4722 "st1h {z3.s}, p0, [z14.s, #30]"); 4723 COMPARE(st1h(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 62)), 4724 "st1h {z14.d}, p4, [z3.d, #62]"); 4725 COMPARE(st1h(z15.VnD(), p6, SVEMemOperand(sp, z6.VnD())), 4726 "st1h {z15.d}, p6, [sp, z6.d]"); 4727 COMPARE(st1h(z15.VnD(), p6, SVEMemOperand(sp, z6.VnD(), LSL, 1)), 4728 "st1h {z15.d}, p6, [sp, z6.d, lsl #1]"); 4729 COMPARE(st1h(z15.VnS(), p3, SVEMemOperand(x25, z3.VnS(), SXTW)), 4730 "st1h {z15.s}, p3, [x25, z3.s, sxtw]"); 4731 COMPARE(st1h(z15.VnS(), p6, SVEMemOperand(x7, z15.VnS(), SXTW, 1)), 4732 "st1h {z15.s}, p6, [x7, z15.s, sxtw #1]"); 4733 COMPARE(st1h(z17.VnD(), p3, SVEMemOperand(sp, z26.VnD(), SXTW)), 4734 "st1h {z17.d}, p3, [sp, z26.d, sxtw]"); 4735 COMPARE(st1h(z15.VnD(), p6, SVEMemOperand(x13, z9.VnD(), UXTW, 1)), 4736 "st1h {z15.d}, p6, [x13, z9.d, uxtw #1]"); 4737 4738 COMPARE(st1w(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)), 4739 "st1w {z19.s}, p2, [sp, #-8, mul vl]"); 4740 COMPARE(st1w(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)), 4741 "st1w {z23.d}, p3, [x1]"); 4742 COMPARE(st1w(z2.VnS(), p4, SVEMemOperand(x1, x2, LSL, 2)), 4743 "st1w {z2.s}, p4, [x1, x2, lsl #2]"); 4744 COMPARE(st1w(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 2)), 4745 "st1w {z31.d}, p7, [x9, x9, lsl #2]"); 4746 COMPARE(st1w(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 32)), 4747 "st1w {z3.s}, p0, [z14.s, #32]"); 4748 COMPARE(st1w(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 124)), 4749 "st1w {z14.d}, p4, [z3.d, #124]"); 4750 COMPARE(st1w(z17.VnD(), p2, SVEMemOperand(x30, z5.VnD())), 4751 "st1w {z17.d}, p2, [x30, z5.d]"); 4752 COMPARE(st1w(z17.VnD(), p2, SVEMemOperand(x30, z5.VnD(), LSL, 2)), 4753 "st1w {z17.d}, p2, [x30, z5.d, lsl #2]"); 4754 COMPARE(st1w(z15.VnS(), p7, SVEMemOperand(x26, z4.VnS(), UXTW)), 4755 "st1w {z15.s}, p7, [x26, z4.s, uxtw]"); 4756 COMPARE(st1w(z15.VnS(), p4, SVEMemOperand(x8, z16.VnS(), UXTW, 2)), 4757 "st1w {z15.s}, p4, [x8, z16.s, uxtw #2]"); 4758 COMPARE(st1w(z19.VnD(), p7, SVEMemOperand(x1, z27.VnD(), UXTW)), 4759 "st1w {z19.d}, p7, [x1, z27.d, uxtw]"); 4760 COMPARE(st1w(z15.VnD(), p4, SVEMemOperand(sp, z10.VnD(), SXTW, 2)), 4761 "st1w {z15.d}, p4, [sp, z10.d, sxtw #2]"); 4762 4763 COMPARE(st1d(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)), 4764 "st1d {z23.d}, p3, [x1]"); 4765 COMPARE(st1d(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 3)), 4766 "st1d {z31.d}, p7, [x9, x9, lsl #3]"); 4767 COMPARE(st1d(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 32)), 4768 "st1d {z14.d}, p4, [z3.d, #32]"); 4769 COMPARE(st1d(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 248)), 4770 "st1d {z14.d}, p4, [z3.d, #248]"); 4771 COMPARE(st1d(z19.VnD(), p2, SVEMemOperand(x29, z22.VnD())), 4772 "st1d {z19.d}, p2, [x29, z22.d]"); 4773 COMPARE(st1d(z19.VnD(), p2, SVEMemOperand(x29, z22.VnD(), LSL, 3)), 4774 "st1d {z19.d}, p2, [x29, z22.d, lsl #3]"); 4775 COMPARE(st1d(z21.VnD(), p1, SVEMemOperand(x2, z28.VnD(), SXTW)), 4776 "st1d {z21.d}, p1, [x2, z28.d, sxtw]"); 4777 COMPARE(st1d(z15.VnD(), p2, SVEMemOperand(x14, z11.VnD(), UXTW, 3)), 4778 "st1d {z15.d}, p2, [x14, z11.d, uxtw #3]"); 4779 4780 COMPARE(ld1b(z11.VnB(), p0.Zeroing(), SVEMemOperand(x22)), 4781 "ld1b {z11.b}, p0/z, [x22]"); 4782 COMPARE(ld1b(z15.VnH(), p1.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)), 4783 "ld1b {z15.h}, p1/z, [x15, #7, mul vl]"); 4784 COMPARE(ld1b(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)), 4785 "ld1b {z19.s}, p2/z, [sp, #-8, mul vl]"); 4786 COMPARE(ld1b(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), 4787 "ld1b {z23.d}, p3/z, [x1]"); 4788 COMPARE(ld1b(z2.VnB(), p4.Zeroing(), SVEMemOperand(x1, x2)), 4789 "ld1b {z2.b}, p4/z, [x1, x2]"); 4790 COMPARE(ld1b(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 0)), 4791 "ld1b {z31.d}, p7/z, [x9, x9]"); 4792 4793 COMPARE(ld1h(z15.VnH(), p1.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)), 4794 "ld1h {z15.h}, p1/z, [x15, #7, mul vl]"); 4795 COMPARE(ld1h(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)), 4796 "ld1h {z19.s}, p2/z, [sp, #-8, mul vl]"); 4797 COMPARE(ld1h(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), 4798 "ld1h {z23.d}, p3/z, [x1]"); 4799 COMPARE(ld1h(z2.VnH(), p4.Zeroing(), SVEMemOperand(x1, x2, LSL, 1)), 4800 "ld1h {z2.h}, p4/z, [x1, x2, lsl #1]"); 4801 COMPARE(ld1h(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 1)), 4802 "ld1h {z31.d}, p7/z, [x9, x9, lsl #1]"); 4803 4804 COMPARE(ld1w(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)), 4805 "ld1w {z19.s}, p2/z, [sp, #-8, mul vl]"); 4806 COMPARE(ld1w(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), 4807 "ld1w {z23.d}, p3/z, [x1]"); 4808 COMPARE(ld1w(z2.VnS(), p4.Zeroing(), SVEMemOperand(x1, x2, LSL, 2)), 4809 "ld1w {z2.s}, p4/z, [x1, x2, lsl #2]"); 4810 COMPARE(ld1w(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 2)), 4811 "ld1w {z31.d}, p7/z, [x9, x9, lsl #2]"); 4812 4813 COMPARE(ld1d(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), 4814 "ld1d {z23.d}, p3/z, [x1]"); 4815 COMPARE(ld1d(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 3)), 4816 "ld1d {z31.d}, p7/z, [x9, x9, lsl #3]"); 4817 4818 COMPARE(ld1sb(z15.VnH(), p1.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)), 4819 "ld1sb {z15.h}, p1/z, [x15, #7, mul vl]"); 4820 COMPARE(ld1sb(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)), 4821 "ld1sb {z19.s}, p2/z, [sp, #-8, mul vl]"); 4822 COMPARE(ld1d(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), 4823 "ld1d {z23.d}, p3/z, [x1]"); 4824 COMPARE(ld1sb(z5.VnH(), p1.Zeroing(), SVEMemOperand(x15, x1, LSL, 0)), 4825 "ld1sb {z5.h}, p1/z, [x15, x1]"); 4826 COMPARE(ld1sb(z9.VnS(), p2.Zeroing(), SVEMemOperand(x29, x3, LSL, 0)), 4827 "ld1sb {z9.s}, p2/z, [x29, x3]"); 4828 COMPARE(ld1sb(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 0)), 4829 "ld1sb {z31.d}, p7/z, [x9, x9]"); 4830 4831 COMPARE(ld1sh(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)), 4832 "ld1sh {z19.s}, p2/z, [sp, #-8, mul vl]"); 4833 COMPARE(ld1sh(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), 4834 "ld1sh {z23.d}, p3/z, [x1]"); 4835 COMPARE(ld1sh(z11.VnS(), p4.Zeroing(), SVEMemOperand(x22, x10, LSL, 1)), 4836 "ld1sh {z11.s}, p4/z, [x22, x10, lsl #1]"); 4837 COMPARE(ld1sh(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 1)), 4838 "ld1sh {z31.d}, p7/z, [x9, x9, lsl #1]"); 4839 4840 COMPARE(ld1sw(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), 4841 "ld1sw {z23.d}, p3/z, [x1]"); 4842 COMPARE(ld1sw(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 2)), 4843 "ld1sw {z31.d}, p7/z, [x9, x9, lsl #2]"); 4844 4845 CLEANUP(); 4846} 4847 4848TEST(sve_ld1_st1_macro) { 4849 SETUP(); 4850 4851 // Pass-through cases. 4852 COMPARE_MACRO(St1b(z11.VnB(), p0, SVEMemOperand(x22)), 4853 "st1b {z11.b}, p0, [x22]"); 4854 COMPARE_MACRO(St1b(z15.VnH(), p1, SVEMemOperand(x15, 7, SVE_MUL_VL)), 4855 "st1b {z15.h}, p1, [x15, #7, mul vl]"); 4856 COMPARE_MACRO(St1b(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)), 4857 "st1b {z19.s}, p2, [sp, #-8, mul vl]"); 4858 COMPARE_MACRO(St1b(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)), 4859 "st1b {z23.d}, p3, [x1]"); 4860 COMPARE_MACRO(St1b(z2.VnB(), p4, SVEMemOperand(x1, x2)), 4861 "st1b {z2.b}, p4, [x1, x2]"); 4862 COMPARE_MACRO(St1b(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 0)), 4863 "st1b {z31.d}, p7, [x9, x9]"); 4864 COMPARE_MACRO(St1b(z3.VnS(), p6, SVEMemOperand(z4.VnS(), 22)), 4865 "st1b {z3.s}, p6, [z4.s, #22]"); 4866 4867 COMPARE_MACRO(St1h(z15.VnH(), p1, SVEMemOperand(x15, 7, SVE_MUL_VL)), 4868 "st1h {z15.h}, p1, [x15, #7, mul vl]"); 4869 COMPARE_MACRO(St1h(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)), 4870 "st1h {z19.s}, p2, [sp, #-8, mul vl]"); 4871 COMPARE_MACRO(St1h(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)), 4872 "st1h {z23.d}, p3, [x1]"); 4873 COMPARE_MACRO(St1h(z2.VnH(), p4, SVEMemOperand(x1, x2, LSL, 1)), 4874 "st1h {z2.h}, p4, [x1, x2, lsl #1]"); 4875 COMPARE_MACRO(St1h(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 1)), 4876 "st1h {z31.d}, p7, [x9, x9, lsl #1]"); 4877 COMPARE_MACRO(St1h(z3.VnD(), p5, SVEMemOperand(z0.VnD())), 4878 "st1h {z3.d}, p5, [z0.d]"); 4879 4880 COMPARE_MACRO(St1w(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)), 4881 "st1w {z19.s}, p2, [sp, #-8, mul vl]"); 4882 COMPARE_MACRO(St1w(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)), 4883 "st1w {z23.d}, p3, [x1]"); 4884 COMPARE_MACRO(St1w(z2.VnS(), p4, SVEMemOperand(x1, x2, LSL, 2)), 4885 "st1w {z2.s}, p4, [x1, x2, lsl #2]"); 4886 COMPARE_MACRO(St1w(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 2)), 4887 "st1w {z31.d}, p7, [x9, x9, lsl #2]"); 4888 COMPARE_MACRO(St1w(z12.VnS(), p2, SVEMemOperand(z13.VnS(), 124)), 4889 "st1w {z12.s}, p2, [z13.s, #124]"); 4890 4891 COMPARE_MACRO(St1d(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)), 4892 "st1d {z23.d}, p3, [x1]"); 4893 COMPARE_MACRO(St1d(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 3)), 4894 "st1d {z31.d}, p7, [x9, x9, lsl #3]"); 4895 COMPARE_MACRO(St1d(z13.VnD(), p3, SVEMemOperand(z12.VnD(), 248)), 4896 "st1d {z13.d}, p3, [z12.d, #248]"); 4897 4898 // SVEMemOperand synthesis. 4899 // Check that the MacroAssembler falls back on `CalculateSVEAddress` at the 4900 // boundary conditions. We test this helper independently. 4901 COMPARE_MACRO(St1b(z10.VnB(), p7, SVEMemOperand(x0, 8, SVE_MUL_VL)), 4902 "addvl x16, x0, #8\n" 4903 "st1b {z10.b}, p7, [x16]"); 4904 COMPARE_MACRO(St1h(z11.VnS(), p5, SVEMemOperand(sp, -9, SVE_MUL_VL)), 4905 "mov x16, #0xffffffffffffffdc\n" 4906 "rdvl x17, #1\n" 4907 "mul x16, x16, x17\n" 4908 "asr x16, x16, #3\n" 4909 "add x16, sp, x16\n" 4910 "st1h {z11.s}, p5, [x16]"); 4911 COMPARE_MACRO(St1w(z22.VnS(), p3, SVEMemOperand(sp, 42)), 4912 "add x16, sp, #0x2a (42)\n" 4913 "st1w {z22.s}, p3, [x16]"); 4914 COMPARE_MACRO(St1d(z22.VnD(), p1, SVEMemOperand(x3, x4)), 4915 "add x16, x3, x4\n" 4916 "st1d {z22.d}, p1, [x16]"); 4917 COMPARE_MACRO(St1b(z30.VnD(), p0, SVEMemOperand(x9, xzr)), 4918 "st1b {z30.d}, p0, [x9]"); 4919 4920// TODO: Fix these - they need scatter-store-to-scalar-plus-vector support. 4921#if 0 4922 COMPARE_MACRO(St1b(z1.VnD(), p6, SVEMemOperand(z0.VnD(), 32)), "mov x16, #0x20\n" "st1b {z1.d}, p6/z, [x16, z0.d]"); 4923 COMPARE_MACRO(St1h(z1.VnS(), p6, SVEMemOperand(z0.VnS(), -1)), "mov x16, #0xffffffffffffffff\n" "st1h {z1.s}, p6/z, [x16, z0.s]"); 4924#endif 4925 4926 COMPARE_MACRO(Ld1b(z11.VnB(), p0.Zeroing(), SVEMemOperand(x22)), 4927 "ld1b {z11.b}, p0/z, [x22]"); 4928 COMPARE_MACRO(Ld1b(z15.VnH(), 4929 p1.Zeroing(), 4930 SVEMemOperand(x15, 7, SVE_MUL_VL)), 4931 "ld1b {z15.h}, p1/z, [x15, #7, mul vl]"); 4932 COMPARE_MACRO(Ld1b(z19.VnS(), 4933 p2.Zeroing(), 4934 SVEMemOperand(sp, -8, SVE_MUL_VL)), 4935 "ld1b {z19.s}, p2/z, [sp, #-8, mul vl]"); 4936 COMPARE_MACRO(Ld1b(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), 4937 "ld1b {z23.d}, p3/z, [x1]"); 4938 COMPARE_MACRO(Ld1b(z2.VnB(), p4.Zeroing(), SVEMemOperand(x1, x2)), 4939 "ld1b {z2.b}, p4/z, [x1, x2]"); 4940 COMPARE_MACRO(Ld1b(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 0)), 4941 "ld1b {z31.d}, p7/z, [x9, x9]"); 4942 4943 COMPARE_MACRO(Ld1h(z15.VnH(), 4944 p1.Zeroing(), 4945 SVEMemOperand(x15, 7, SVE_MUL_VL)), 4946 "ld1h {z15.h}, p1/z, [x15, #7, mul vl]"); 4947 COMPARE_MACRO(Ld1h(z19.VnS(), 4948 p2.Zeroing(), 4949 SVEMemOperand(sp, -8, SVE_MUL_VL)), 4950 "ld1h {z19.s}, p2/z, [sp, #-8, mul vl]"); 4951 COMPARE_MACRO(Ld1h(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), 4952 "ld1h {z23.d}, p3/z, [x1]"); 4953 COMPARE_MACRO(Ld1h(z2.VnH(), p4.Zeroing(), SVEMemOperand(x1, x2, LSL, 1)), 4954 "ld1h {z2.h}, p4/z, [x1, x2, lsl #1]"); 4955 COMPARE_MACRO(Ld1h(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 1)), 4956 "ld1h {z31.d}, p7/z, [x9, x9, lsl #1]"); 4957 4958 COMPARE_MACRO(Ld1w(z19.VnS(), 4959 p2.Zeroing(), 4960 SVEMemOperand(sp, -8, SVE_MUL_VL)), 4961 "ld1w {z19.s}, p2/z, [sp, #-8, mul vl]"); 4962 COMPARE_MACRO(Ld1w(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), 4963 "ld1w {z23.d}, p3/z, [x1]"); 4964 COMPARE_MACRO(Ld1w(z2.VnS(), p4.Zeroing(), SVEMemOperand(x1, x2, LSL, 2)), 4965 "ld1w {z2.s}, p4/z, [x1, x2, lsl #2]"); 4966 COMPARE_MACRO(Ld1w(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 2)), 4967 "ld1w {z31.d}, p7/z, [x9, x9, lsl #2]"); 4968 4969 COMPARE_MACRO(Ld1d(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)), 4970 "ld1d {z23.d}, p3/z, [x1]"); 4971 COMPARE_MACRO(Ld1d(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 3)), 4972 "ld1d {z31.d}, p7/z, [x9, x9, lsl #3]"); 4973 4974 // SVEMemOperand synthesis. 4975 // Check that the MacroAssembler falls back on `CalculateSVEAddress` at the 4976 // boundary conditions. We test this helper independently. 4977 COMPARE_MACRO(Ld1b(z10.VnB(), p7.Zeroing(), SVEMemOperand(x0, 8, SVE_MUL_VL)), 4978 "addvl x16, x0, #8\n" 4979 "ld1b {z10.b}, p7/z, [x16]"); 4980 COMPARE_MACRO(Ld1h(z11.VnS(), 4981 p5.Zeroing(), 4982 SVEMemOperand(sp, -9, SVE_MUL_VL)), 4983 "mov x16, #0xffffffffffffffdc\n" 4984 "rdvl x17, #1\n" 4985 "mul x16, x16, x17\n" 4986 "asr x16, x16, #3\n" 4987 "add x16, sp, x16\n" 4988 "ld1h {z11.s}, p5/z, [x16]"); 4989 COMPARE_MACRO(Ld1w(z22.VnS(), p3.Zeroing(), SVEMemOperand(sp, 42)), 4990 "add x16, sp, #0x2a (42)\n" 4991 "ld1w {z22.s}, p3/z, [x16]"); 4992 COMPARE_MACRO(Ld1d(z22.VnD(), p1.Zeroing(), SVEMemOperand(x3, x4)), 4993 "add x16, x3, x4\n" 4994 "ld1d {z22.d}, p1/z, [x16]"); 4995 COMPARE_MACRO(Ld1b(z30.VnD(), p0.Zeroing(), SVEMemOperand(x9, xzr)), 4996 "ld1b {z30.d}, p0/z, [x9]"); 4997 CLEANUP(); 4998} 4999 5000TEST(sve_st2_scalar_plus_immediate) { 5001 SETUP(); 5002 5003 COMPARE(st2b(z31.VnB(), z0.VnB(), p6, SVEMemOperand(x19)), 5004 "st2b {z31.b, z0.b}, p6, [x19]"); 5005 COMPARE(st2b(z31.VnB(), z0.VnB(), p6, SVEMemOperand(x19, 14, SVE_MUL_VL)), 5006 "st2b {z31.b, z0.b}, p6, [x19, #14, mul vl]"); 5007 COMPARE(st2b(z15.VnB(), z16.VnB(), p6, SVEMemOperand(x19, -16, SVE_MUL_VL)), 5008 "st2b {z15.b, z16.b}, p6, [x19, #-16, mul vl]"); 5009 5010 COMPARE(st2h(z15.VnH(), z16.VnH(), p6, SVEMemOperand(x19)), 5011 "st2h {z15.h, z16.h}, p6, [x19]"); 5012 COMPARE(st2h(z15.VnH(), z16.VnH(), p0, SVEMemOperand(x19, 14, SVE_MUL_VL)), 5013 "st2h {z15.h, z16.h}, p0, [x19, #14, mul vl]"); 5014 COMPARE(st2h(z15.VnH(), z16.VnH(), p0, SVEMemOperand(x19, -16, SVE_MUL_VL)), 5015 "st2h {z15.h, z16.h}, p0, [x19, #-16, mul vl]"); 5016 5017 COMPARE(st2w(z0.VnS(), z1.VnS(), p0, SVEMemOperand(x19)), 5018 "st2w {z0.s, z1.s}, p0, [x19]"); 5019 COMPARE(st2w(z0.VnS(), z1.VnS(), p0, SVEMemOperand(x19, 14, SVE_MUL_VL)), 5020 "st2w {z0.s, z1.s}, p0, [x19, #14, mul vl]"); 5021 COMPARE(st2w(z0.VnS(), z1.VnS(), p7, SVEMemOperand(x19, -16, SVE_MUL_VL)), 5022 "st2w {z0.s, z1.s}, p7, [x19, #-16, mul vl]"); 5023 5024 COMPARE(st2d(z0.VnD(), z1.VnD(), p7, SVEMemOperand(x19)), 5025 "st2d {z0.d, z1.d}, p7, [x19]"); 5026 COMPARE(st2d(z31.VnD(), z0.VnD(), p7, SVEMemOperand(x19, 14, SVE_MUL_VL)), 5027 "st2d {z31.d, z0.d}, p7, [x19, #14, mul vl]"); 5028 COMPARE(st2d(z31.VnD(), z0.VnD(), p7, SVEMemOperand(x19, -16, SVE_MUL_VL)), 5029 "st2d {z31.d, z0.d}, p7, [x19, #-16, mul vl]"); 5030 5031 CLEANUP(); 5032} 5033 5034TEST(sve_st3_scalar_plus_immediate) { 5035 SETUP(); 5036 5037 COMPARE(st3b(z30.VnB(), z31.VnB(), z0.VnB(), p7, SVEMemOperand(x19)), 5038 "st3b {z30.b, z31.b, z0.b}, p7, [x19]"); 5039 COMPARE(st3b(z30.VnB(), 5040 z31.VnB(), 5041 z0.VnB(), 5042 p6, 5043 SVEMemOperand(x19, 21, SVE_MUL_VL)), 5044 "st3b {z30.b, z31.b, z0.b}, p6, [x19, #21, mul vl]"); 5045 COMPARE(st3b(z30.VnB(), 5046 z31.VnB(), 5047 z0.VnB(), 5048 p6, 5049 SVEMemOperand(x19, -24, SVE_MUL_VL)), 5050 "st3b {z30.b, z31.b, z0.b}, p6, [x19, #-24, mul vl]"); 5051 5052 COMPARE(st3h(z15.VnH(), z16.VnH(), z17.VnH(), p6, SVEMemOperand(x19)), 5053 "st3h {z15.h, z16.h, z17.h}, p6, [x19]"); 5054 COMPARE(st3h(z15.VnH(), 5055 z16.VnH(), 5056 z17.VnH(), 5057 p6, 5058 SVEMemOperand(x19, 21, SVE_MUL_VL)), 5059 "st3h {z15.h, z16.h, z17.h}, p6, [x19, #21, mul vl]"); 5060 COMPARE(st3h(z15.VnH(), 5061 z16.VnH(), 5062 z17.VnH(), 5063 p0, 5064 SVEMemOperand(x19, -24, SVE_MUL_VL)), 5065 "st3h {z15.h, z16.h, z17.h}, p0, [x19, #-24, mul vl]"); 5066 5067 COMPARE(st3w(z15.VnS(), z16.VnS(), z17.VnS(), p0, SVEMemOperand(x19)), 5068 "st3w {z15.s, z16.s, z17.s}, p0, [x19]"); 5069 COMPARE(st3w(z0.VnS(), 5070 z1.VnS(), 5071 z2.VnS(), 5072 p0, 5073 SVEMemOperand(x19, 21, SVE_MUL_VL)), 5074 "st3w {z0.s, z1.s, z2.s}, p0, [x19, #21, mul vl]"); 5075 COMPARE(st3w(z0.VnS(), 5076 z1.VnS(), 5077 z2.VnS(), 5078 p0, 5079 SVEMemOperand(x19, -24, SVE_MUL_VL)), 5080 "st3w {z0.s, z1.s, z2.s}, p0, [x19, #-24, mul vl]"); 5081 5082 COMPARE(st3d(z0.VnD(), z1.VnD(), z2.VnD(), p7, SVEMemOperand(x19)), 5083 "st3d {z0.d, z1.d, z2.d}, p7, [x19]"); 5084 COMPARE(st3d(z0.VnD(), 5085 z1.VnD(), 5086 z2.VnD(), 5087 p7, 5088 SVEMemOperand(x19, 21, SVE_MUL_VL)), 5089 "st3d {z0.d, z1.d, z2.d}, p7, [x19, #21, mul vl]"); 5090 COMPARE(st3d(z30.VnD(), 5091 z31.VnD(), 5092 z0.VnD(), 5093 p7, 5094 SVEMemOperand(x19, -24, SVE_MUL_VL)), 5095 "st3d {z30.d, z31.d, z0.d}, p7, [x19, #-24, mul vl]"); 5096 5097 CLEANUP(); 5098} 5099 5100TEST(sve_st4_scalar_plus_immediate) { 5101 SETUP(); 5102 5103 COMPARE(st4b(z31.VnB(), z0.VnB(), z1.VnB(), z2.VnB(), p7, SVEMemOperand(x19)), 5104 "st4b {z31.b, z0.b, z1.b, z2.b}, p7, [x19]"); 5105 COMPARE(st4b(z31.VnB(), 5106 z0.VnB(), 5107 z1.VnB(), 5108 z2.VnB(), 5109 p7, 5110 SVEMemOperand(x19, 28, SVE_MUL_VL)), 5111 "st4b {z31.b, z0.b, z1.b, z2.b}, p7, [x19, #28, mul vl]"); 5112 COMPARE(st4b(z31.VnB(), 5113 z0.VnB(), 5114 z1.VnB(), 5115 z2.VnB(), 5116 p6, 5117 SVEMemOperand(x19, -32, SVE_MUL_VL)), 5118 "st4b {z31.b, z0.b, z1.b, z2.b}, p6, [x19, #-32, mul vl]"); 5119 5120 COMPARE(st4h(z31.VnH(), z0.VnH(), z1.VnH(), z2.VnH(), p6, SVEMemOperand(x19)), 5121 "st4h {z31.h, z0.h, z1.h, z2.h}, p6, [x19]"); 5122 COMPARE(st4h(z15.VnH(), 5123 z16.VnH(), 5124 z17.VnH(), 5125 z18.VnH(), 5126 p6, 5127 SVEMemOperand(x19, 28, SVE_MUL_VL)), 5128 "st4h {z15.h, z16.h, z17.h, z18.h}, p6, [x19, #28, mul vl]"); 5129 COMPARE(st4h(z15.VnH(), 5130 z16.VnH(), 5131 z17.VnH(), 5132 z18.VnH(), 5133 p6, 5134 SVEMemOperand(x19, -32, SVE_MUL_VL)), 5135 "st4h {z15.h, z16.h, z17.h, z18.h}, p6, " 5136 "[x19, #-32, mul vl]"); 5137 5138 COMPARE(st4w(z15.VnS(), 5139 z16.VnS(), 5140 z17.VnS(), 5141 z18.VnS(), 5142 p0, 5143 SVEMemOperand(x19)), 5144 "st4w {z15.s, z16.s, z17.s, z18.s}, p0, [x19]"); 5145 COMPARE(st4w(z15.VnS(), 5146 z16.VnS(), 5147 z17.VnS(), 5148 z18.VnS(), 5149 p0, 5150 SVEMemOperand(x19, 28, SVE_MUL_VL)), 5151 "st4w {z15.s, z16.s, z17.s, z18.s}, p0, [x19, #28, mul vl]"); 5152 COMPARE(st4w(z0.VnS(), 5153 z1.VnS(), 5154 z2.VnS(), 5155 z3.VnS(), 5156 p0, 5157 SVEMemOperand(x19, -32, SVE_MUL_VL)), 5158 "st4w {z0.s, z1.s, z2.s, z3.s}, p0, [x19, #-32, mul vl]"); 5159 5160 COMPARE(st4d(z0.VnD(), z1.VnD(), z2.VnD(), z3.VnD(), p0, SVEMemOperand(x19)), 5161 "st4d {z0.d, z1.d, z2.d, z3.d}, p0, [x19]"); 5162 COMPARE(st4d(z0.VnD(), 5163 z1.VnD(), 5164 z2.VnD(), 5165 z3.VnD(), 5166 p7, 5167 SVEMemOperand(x19, 28, SVE_MUL_VL)), 5168 "st4d {z0.d, z1.d, z2.d, z3.d}, p7, [x19, #28, mul vl]"); 5169 COMPARE(st4d(z0.VnD(), 5170 z1.VnD(), 5171 z2.VnD(), 5172 z3.VnD(), 5173 p7, 5174 SVEMemOperand(x19, -32, SVE_MUL_VL)), 5175 "st4d {z0.d, z1.d, z2.d, z3.d}, p7, [x19, #-32, mul vl]"); 5176 5177 CLEANUP(); 5178} 5179 5180TEST(sve_st2_scalar_plus_scalar) { 5181 SETUP(); 5182 5183 COMPARE(st2b(z25.VnB(), z26.VnB(), p1, SVEMemOperand(x20, x19)), 5184 "st2b {z25.b, z26.b}, p1, [x20, x19]"); 5185 COMPARE(st2b(z25.VnB(), z26.VnB(), p1, SVEMemOperand(sp, x19)), 5186 "st2b {z25.b, z26.b}, p1, [sp, x19]"); 5187 COMPARE(st2b(z31.VnB(), z0.VnB(), p1, SVEMemOperand(sp, x19)), 5188 "st2b {z31.b, z0.b}, p1, [sp, x19]"); 5189 5190 COMPARE(st2h(z31.VnH(), z0.VnH(), p1, SVEMemOperand(x20, x19, LSL, 1)), 5191 "st2h {z31.h, z0.h}, p1, [x20, x19, lsl #1]"); 5192 COMPARE(st2h(z31.VnH(), z0.VnH(), p7, SVEMemOperand(sp, x19, LSL, 1)), 5193 "st2h {z31.h, z0.h}, p7, [sp, x19, lsl #1]"); 5194 COMPARE(st2h(z31.VnH(), z0.VnH(), p7, SVEMemOperand(sp, x19, LSL, 1)), 5195 "st2h {z31.h, z0.h}, p7, [sp, x19, lsl #1]"); 5196 5197 COMPARE(st2w(z16.VnS(), z17.VnS(), p7, SVEMemOperand(x20, x19, LSL, 2)), 5198 "st2w {z16.s, z17.s}, p7, [x20, x19, lsl #2]"); 5199 COMPARE(st2w(z16.VnS(), z17.VnS(), p7, SVEMemOperand(sp, x19, LSL, 2)), 5200 "st2w {z16.s, z17.s}, p7, [sp, x19, lsl #2]"); 5201 COMPARE(st2w(z16.VnS(), z17.VnS(), p0, SVEMemOperand(sp, x19, LSL, 2)), 5202 "st2w {z16.s, z17.s}, p0, [sp, x19, lsl #2]"); 5203 5204 COMPARE(st2d(z16.VnD(), z17.VnD(), p0, SVEMemOperand(x20, x19, LSL, 3)), 5205 "st2d {z16.d, z17.d}, p0, [x20, x19, lsl #3]"); 5206 COMPARE(st2d(z25.VnD(), z26.VnD(), p0, SVEMemOperand(sp, x19, LSL, 3)), 5207 "st2d {z25.d, z26.d}, p0, [sp, x19, lsl #3]"); 5208 COMPARE(st2d(z25.VnD(), z26.VnD(), p0, SVEMemOperand(sp, x19, LSL, 3)), 5209 "st2d {z25.d, z26.d}, p0, [sp, x19, lsl #3]"); 5210 5211 CLEANUP(); 5212} 5213 5214TEST(sve_st3_scalar_plus_scalar) { 5215 SETUP(); 5216 5217 COMPARE(st3b(z25.VnB(), z26.VnB(), z27.VnB(), p1, SVEMemOperand(x20, x19)), 5218 "st3b {z25.b, z26.b, z27.b}, p1, [x20, x19]"); 5219 COMPARE(st3b(z25.VnB(), z26.VnB(), z27.VnB(), p1, SVEMemOperand(sp, x19)), 5220 "st3b {z25.b, z26.b, z27.b}, p1, [sp, x19]"); 5221 COMPARE(st3b(z30.VnB(), z31.VnB(), z0.VnB(), p1, SVEMemOperand(sp, x19)), 5222 "st3b {z30.b, z31.b, z0.b}, p1, [sp, x19]"); 5223 5224 COMPARE(st3h(z30.VnH(), 5225 z31.VnH(), 5226 z0.VnH(), 5227 p1, 5228 SVEMemOperand(x20, x19, LSL, 1)), 5229 "st3h {z30.h, z31.h, z0.h}, p1, [x20, x19, lsl #1]"); 5230 COMPARE(st3h(z30.VnH(), 5231 z31.VnH(), 5232 z0.VnH(), 5233 p7, 5234 SVEMemOperand(sp, x19, LSL, 1)), 5235 "st3h {z30.h, z31.h, z0.h}, p7, [sp, x19, lsl #1]"); 5236 COMPARE(st3h(z30.VnH(), 5237 z31.VnH(), 5238 z0.VnH(), 5239 p7, 5240 SVEMemOperand(sp, x19, LSL, 1)), 5241 "st3h {z30.h, z31.h, z0.h}, p7, [sp, x19, lsl #1]"); 5242 5243 COMPARE(st3w(z16.VnS(), 5244 z17.VnS(), 5245 z18.VnS(), 5246 p7, 5247 SVEMemOperand(x20, x19, LSL, 2)), 5248 "st3w {z16.s, z17.s, z18.s}, p7, [x20, x19, lsl #2]"); 5249 COMPARE(st3w(z16.VnS(), 5250 z17.VnS(), 5251 z18.VnS(), 5252 p7, 5253 SVEMemOperand(sp, x19, LSL, 2)), 5254 "st3w {z16.s, z17.s, z18.s}, p7, [sp, x19, lsl #2]"); 5255 COMPARE(st3w(z16.VnS(), 5256 z17.VnS(), 5257 z18.VnS(), 5258 p0, 5259 SVEMemOperand(sp, x19, LSL, 2)), 5260 "st3w {z16.s, z17.s, z18.s}, p0, [sp, x19, lsl #2]"); 5261 5262 COMPARE(st3d(z16.VnD(), 5263 z17.VnD(), 5264 z18.VnD(), 5265 p0, 5266 SVEMemOperand(x20, x19, LSL, 3)), 5267 "st3d {z16.d, z17.d, z18.d}, p0, [x20, x19, lsl #3]"); 5268 COMPARE(st3d(z25.VnD(), 5269 z26.VnD(), 5270 z27.VnD(), 5271 p0, 5272 SVEMemOperand(sp, x19, LSL, 3)), 5273 "st3d {z25.d, z26.d, z27.d}, p0, [sp, x19, lsl #3]"); 5274 COMPARE(st3d(z25.VnD(), 5275 z26.VnD(), 5276 z27.VnD(), 5277 p0, 5278 SVEMemOperand(sp, x19, LSL, 3)), 5279 "st3d {z25.d, z26.d, z27.d}, p0, [sp, x19, lsl #3]"); 5280 5281 CLEANUP(); 5282} 5283 5284TEST(sve_st4_scalar_plus_scalar) { 5285 SETUP(); 5286 5287 COMPARE(st4b(z25.VnB(), 5288 z26.VnB(), 5289 z27.VnB(), 5290 z28.VnB(), 5291 p0, 5292 SVEMemOperand(x20, x19)), 5293 "st4b {z25.b, z26.b, z27.b, z28.b}, p0, [x20, x19]"); 5294 COMPARE(st4b(z25.VnB(), 5295 z26.VnB(), 5296 z27.VnB(), 5297 z28.VnB(), 5298 p1, 5299 SVEMemOperand(sp, x19)), 5300 "st4b {z25.b, z26.b, z27.b, z28.b}, p1, [sp, x19]"); 5301 COMPARE(st4b(z25.VnB(), 5302 z26.VnB(), 5303 z27.VnB(), 5304 z28.VnB(), 5305 p1, 5306 SVEMemOperand(sp, x19)), 5307 "st4b {z25.b, z26.b, z27.b, z28.b}, p1, [sp, x19]"); 5308 5309 COMPARE(st4h(z31.VnH(), 5310 z0.VnH(), 5311 z1.VnH(), 5312 z2.VnH(), 5313 p1, 5314 SVEMemOperand(x20, x19, LSL, 1)), 5315 "st4h {z31.h, z0.h, z1.h, z2.h}, p1, [x20, x19, lsl #1]"); 5316 COMPARE(st4h(z31.VnH(), 5317 z0.VnH(), 5318 z1.VnH(), 5319 z2.VnH(), 5320 p1, 5321 SVEMemOperand(sp, x19, LSL, 1)), 5322 "st4h {z31.h, z0.h, z1.h, z2.h}, p1, [sp, x19, lsl #1]"); 5323 COMPARE(st4h(z31.VnH(), 5324 z0.VnH(), 5325 z1.VnH(), 5326 z2.VnH(), 5327 p7, 5328 SVEMemOperand(sp, x19, LSL, 1)), 5329 "st4h {z31.h, z0.h, z1.h, z2.h}, p7, [sp, x19, lsl #1]"); 5330 5331 COMPARE(st4w(z31.VnS(), 5332 z0.VnS(), 5333 z1.VnS(), 5334 z2.VnS(), 5335 p7, 5336 SVEMemOperand(x20, x19, LSL, 2)), 5337 "st4w {z31.s, z0.s, z1.s, z2.s}, p7, [x20, x19, lsl #2]"); 5338 COMPARE(st4w(z16.VnS(), 5339 z17.VnS(), 5340 z18.VnS(), 5341 z19.VnS(), 5342 p7, 5343 SVEMemOperand(sp, x19, LSL, 2)), 5344 "st4w {z16.s, z17.s, z18.s, z19.s}, p7, [sp, x19, lsl #2]"); 5345 COMPARE(st4w(z16.VnS(), 5346 z17.VnS(), 5347 z18.VnS(), 5348 z19.VnS(), 5349 p7, 5350 SVEMemOperand(sp, x19, LSL, 2)), 5351 "st4w {z16.s, z17.s, z18.s, z19.s}, p7, [sp, x19, lsl #2]"); 5352 5353 COMPARE(st4d(z16.VnD(), 5354 z17.VnD(), 5355 z18.VnD(), 5356 z19.VnD(), 5357 p0, 5358 SVEMemOperand(x20, x19, LSL, 3)), 5359 "st4d {z16.d, z17.d, z18.d, z19.d}, p0, [x20, x19, lsl #3]"); 5360 COMPARE(st4d(z16.VnD(), 5361 z17.VnD(), 5362 z18.VnD(), 5363 z19.VnD(), 5364 p0, 5365 SVEMemOperand(sp, x19, LSL, 3)), 5366 "st4d {z16.d, z17.d, z18.d, z19.d}, p0, [sp, x19, lsl #3]"); 5367 COMPARE(st4d(z25.VnD(), 5368 z26.VnD(), 5369 z27.VnD(), 5370 z28.VnD(), 5371 p0, 5372 SVEMemOperand(sp, x19, LSL, 3)), 5373 "st4d {z25.d, z26.d, z27.d, z28.d}, p0, [sp, x19, lsl #3]"); 5374 5375 CLEANUP(); 5376} 5377 5378TEST(sve_mul_index) { 5379 SETUP(); 5380 5381 COMPARE(sdot(z17.VnD(), z21.VnH(), z15.VnH(), 0), 5382 "sdot z17.d, z21.h, z15.h[0]"); 5383 COMPARE(sdot(z28.VnS(), z9.VnB(), z7.VnB(), 1), "sdot z28.s, z9.b, z7.b[1]"); 5384 COMPARE(udot(z26.VnD(), z15.VnH(), z1.VnH(), 1), 5385 "udot z26.d, z15.h, z1.h[1]"); 5386 COMPARE(udot(z23.VnS(), z24.VnB(), z5.VnB(), 3), 5387 "udot z23.s, z24.b, z5.b[3]"); 5388 5389 CLEANUP(); 5390} 5391 5392TEST(sve_mul_index_macro) { 5393 SETUP(); 5394 5395 COMPARE_MACRO(Sdot(z0.VnS(), z0.VnS(), z2.VnB(), z4.VnB(), 0), 5396 "sdot z0.s, z2.b, z4.b[0]"); 5397 COMPARE_MACRO(Sdot(z3.VnD(), z4.VnD(), z3.VnH(), z5.VnH(), 1), 5398 "movprfx z31, z4\n" 5399 "sdot z31.d, z3.h, z5.h[1]\n" 5400 "mov z3.d, z31.d"); 5401 COMPARE_MACRO(Sdot(z4.VnS(), z5.VnS(), z6.VnB(), z4.VnB(), 2), 5402 "movprfx z31, z5\n" 5403 "sdot z31.s, z6.b, z4.b[2]\n" 5404 "mov z4.d, z31.d"); 5405 COMPARE_MACRO(Sdot(z6.VnD(), z7.VnD(), z8.VnH(), z9.VnH(), 0), 5406 "movprfx z6, z7\n" 5407 "sdot z6.d, z8.h, z9.h[0]"); 5408 COMPARE_MACRO(Sdot(z5.VnD(), z5.VnD(), z5.VnH(), z5.VnH(), 1), 5409 "sdot z5.d, z5.h, z5.h[1]"); 5410 5411 COMPARE_MACRO(Udot(z0.VnD(), z0.VnD(), z2.VnH(), z4.VnH(), 1), 5412 "udot z0.d, z2.h, z4.h[1]"); 5413 COMPARE_MACRO(Udot(z3.VnS(), z4.VnS(), z3.VnB(), z5.VnB(), 3), 5414 "movprfx z31, z4\n" 5415 "udot z31.s, z3.b, z5.b[3]\n" 5416 "mov z3.d, z31.d"); 5417 COMPARE_MACRO(Udot(z4.VnD(), z5.VnD(), z6.VnH(), z4.VnH(), 0), 5418 "movprfx z31, z5\n" 5419 "udot z31.d, z6.h, z4.h[0]\n" 5420 "mov z4.d, z31.d"); 5421 COMPARE_MACRO(Udot(z9.VnS(), z8.VnS(), z7.VnB(), z6.VnB(), 2), 5422 "movprfx z9, z8\n" 5423 "udot z9.s, z7.b, z6.b[2]"); 5424 COMPARE_MACRO(Udot(z5.VnS(), z5.VnS(), z5.VnB(), z5.VnB(), 1), 5425 "udot z5.s, z5.b, z5.b[1]"); 5426 CLEANUP(); 5427} 5428 5429TEST(sve_partition_break) { 5430 SETUP(); 5431 5432 COMPARE(brkas(p8.VnB(), p5.Zeroing(), p4.VnB()), "brkas p8.b, p5/z, p4.b"); 5433 COMPARE(brka(p11.VnB(), p7.Zeroing(), p15.VnB()), "brka p11.b, p7/z, p15.b"); 5434 COMPARE(brka(p12.VnB(), p8.Merging(), p13.VnB()), "brka p12.b, p8/m, p13.b"); 5435 COMPARE(brkbs(p6.VnB(), p9.Zeroing(), p14.VnB()), "brkbs p6.b, p9/z, p14.b"); 5436 COMPARE(brkb(p11.VnB(), p6.Zeroing(), p4.VnB()), "brkb p11.b, p6/z, p4.b"); 5437 COMPARE(brkb(p12.VnB(), p7.Merging(), p5.VnB()), "brkb p12.b, p7/m, p5.b"); 5438 COMPARE(brkns(p2.VnB(), p11.Zeroing(), p0.VnB(), p2.VnB()), 5439 "brkns p2.b, p11/z, p0.b, p2.b"); 5440 COMPARE(brkn(p4.VnB(), p3.Zeroing(), p1.VnB(), p4.VnB()), 5441 "brkn p4.b, p3/z, p1.b, p4.b"); 5442 5443 COMPARE_MACRO(Brkns(p3.VnB(), p10.Zeroing(), p2.VnB(), p5.VnB()), 5444 "mov p3.b, p5.b\n" 5445 "brkns p3.b, p10/z, p2.b, p3.b"); 5446 COMPARE_MACRO(Brkn(p5.VnB(), p4.Zeroing(), p3.VnB(), p7.VnB()), 5447 "mov p5.b, p7.b\n" 5448 "brkn p5.b, p4/z, p3.b, p5.b"); 5449 5450 CLEANUP(); 5451} 5452 5453TEST(sve_permute_predicate) { 5454 SETUP(); 5455 5456 COMPARE(rev(p15.VnB(), p6.VnB()), "rev p15.b, p6.b"); 5457 COMPARE(rev(p15.VnH(), p6.VnH()), "rev p15.h, p6.h"); 5458 COMPARE(rev(p15.VnS(), p6.VnS()), "rev p15.s, p6.s"); 5459 COMPARE(rev(p15.VnD(), p6.VnD()), "rev p15.d, p6.d"); 5460 COMPARE(trn1(p13.VnB(), p15.VnB(), p12.VnB()), "trn1 p13.b, p15.b, p12.b"); 5461 COMPARE(trn1(p13.VnH(), p15.VnH(), p12.VnH()), "trn1 p13.h, p15.h, p12.h"); 5462 COMPARE(trn1(p13.VnS(), p15.VnS(), p12.VnS()), "trn1 p13.s, p15.s, p12.s"); 5463 COMPARE(trn1(p13.VnD(), p15.VnD(), p12.VnD()), "trn1 p13.d, p15.d, p12.d"); 5464 COMPARE(trn2(p5.VnB(), p5.VnB(), p6.VnB()), "trn2 p5.b, p5.b, p6.b"); 5465 COMPARE(trn2(p5.VnH(), p5.VnH(), p6.VnH()), "trn2 p5.h, p5.h, p6.h"); 5466 COMPARE(trn2(p5.VnS(), p5.VnS(), p6.VnS()), "trn2 p5.s, p5.s, p6.s"); 5467 COMPARE(trn2(p5.VnD(), p5.VnD(), p6.VnD()), "trn2 p5.d, p5.d, p6.d"); 5468 COMPARE(uzp1(p14.VnB(), p4.VnB(), p14.VnB()), "uzp1 p14.b, p4.b, p14.b"); 5469 COMPARE(uzp1(p14.VnH(), p4.VnH(), p14.VnH()), "uzp1 p14.h, p4.h, p14.h"); 5470 COMPARE(uzp1(p14.VnS(), p4.VnS(), p14.VnS()), "uzp1 p14.s, p4.s, p14.s"); 5471 COMPARE(uzp1(p14.VnD(), p4.VnD(), p14.VnD()), "uzp1 p14.d, p4.d, p14.d"); 5472 COMPARE(uzp2(p6.VnB(), p11.VnB(), p2.VnB()), "uzp2 p6.b, p11.b, p2.b"); 5473 COMPARE(uzp2(p6.VnH(), p11.VnH(), p2.VnH()), "uzp2 p6.h, p11.h, p2.h"); 5474 COMPARE(uzp2(p6.VnS(), p11.VnS(), p2.VnS()), "uzp2 p6.s, p11.s, p2.s"); 5475 COMPARE(uzp2(p6.VnD(), p11.VnD(), p2.VnD()), "uzp2 p6.d, p11.d, p2.d"); 5476 COMPARE(zip1(p13.VnB(), p4.VnB(), p12.VnB()), "zip1 p13.b, p4.b, p12.b"); 5477 COMPARE(zip1(p13.VnH(), p4.VnH(), p12.VnH()), "zip1 p13.h, p4.h, p12.h"); 5478 COMPARE(zip1(p13.VnS(), p4.VnS(), p12.VnS()), "zip1 p13.s, p4.s, p12.s"); 5479 COMPARE(zip1(p13.VnD(), p4.VnD(), p12.VnD()), "zip1 p13.d, p4.d, p12.d"); 5480 COMPARE(zip2(p1.VnB(), p15.VnB(), p2.VnB()), "zip2 p1.b, p15.b, p2.b"); 5481 COMPARE(zip2(p1.VnH(), p15.VnH(), p2.VnH()), "zip2 p1.h, p15.h, p2.h"); 5482 COMPARE(zip2(p1.VnS(), p15.VnS(), p2.VnS()), "zip2 p1.s, p15.s, p2.s"); 5483 COMPARE(zip2(p1.VnD(), p15.VnD(), p2.VnD()), "zip2 p1.d, p15.d, p2.d"); 5484 COMPARE(punpkhi(p12.VnH(), p6.VnB()), "punpkhi p12.h, p6.b"); 5485 COMPARE(punpklo(p4.VnH(), p14.VnB()), "punpklo p4.h, p14.b"); 5486 5487 CLEANUP(); 5488} 5489 5490TEST(sve_permute_vector_extract) { 5491 SETUP(); 5492 5493 COMPARE_MACRO(Ext(z10.VnB(), z10.VnB(), z2.VnB(), 0), 5494 "ext z10.b, z10.b, z2.b, #0"); 5495 COMPARE_MACRO(Ext(z10.VnB(), z10.VnB(), z2.VnB(), 1), 5496 "ext z10.b, z10.b, z2.b, #1"); 5497 COMPARE_MACRO(Ext(z2.VnB(), z2.VnB(), z10.VnB(), 254), 5498 "ext z2.b, z2.b, z10.b, #254"); 5499 COMPARE_MACRO(Ext(z2.VnB(), z2.VnB(), z10.VnB(), 255), 5500 "ext z2.b, z2.b, z10.b, #255"); 5501 5502 CLEANUP(); 5503} 5504 5505TEST(sve_permute_vector_interleaving) { 5506 SETUP(); 5507 5508 COMPARE(trn1(z25.VnB(), z31.VnB(), z17.VnB()), "trn1 z25.b, z31.b, z17.b"); 5509 COMPARE(trn1(z25.VnH(), z31.VnH(), z17.VnH()), "trn1 z25.h, z31.h, z17.h"); 5510 COMPARE(trn1(z25.VnS(), z31.VnS(), z17.VnS()), "trn1 z25.s, z31.s, z17.s"); 5511 COMPARE(trn1(z25.VnD(), z31.VnD(), z17.VnD()), "trn1 z25.d, z31.d, z17.d"); 5512 COMPARE(trn2(z23.VnB(), z19.VnB(), z5.VnB()), "trn2 z23.b, z19.b, z5.b"); 5513 COMPARE(trn2(z23.VnH(), z19.VnH(), z5.VnH()), "trn2 z23.h, z19.h, z5.h"); 5514 COMPARE(trn2(z23.VnS(), z19.VnS(), z5.VnS()), "trn2 z23.s, z19.s, z5.s"); 5515 COMPARE(trn2(z23.VnD(), z19.VnD(), z5.VnD()), "trn2 z23.d, z19.d, z5.d"); 5516 COMPARE(uzp1(z3.VnB(), z27.VnB(), z10.VnB()), "uzp1 z3.b, z27.b, z10.b"); 5517 COMPARE(uzp1(z3.VnH(), z27.VnH(), z10.VnH()), "uzp1 z3.h, z27.h, z10.h"); 5518 COMPARE(uzp1(z3.VnS(), z27.VnS(), z10.VnS()), "uzp1 z3.s, z27.s, z10.s"); 5519 COMPARE(uzp1(z3.VnD(), z27.VnD(), z10.VnD()), "uzp1 z3.d, z27.d, z10.d"); 5520 COMPARE(uzp2(z22.VnB(), z26.VnB(), z15.VnB()), "uzp2 z22.b, z26.b, z15.b"); 5521 COMPARE(uzp2(z22.VnH(), z26.VnH(), z15.VnH()), "uzp2 z22.h, z26.h, z15.h"); 5522 COMPARE(uzp2(z22.VnS(), z26.VnS(), z15.VnS()), "uzp2 z22.s, z26.s, z15.s"); 5523 COMPARE(uzp2(z22.VnD(), z26.VnD(), z15.VnD()), "uzp2 z22.d, z26.d, z15.d"); 5524 COMPARE(zip1(z31.VnB(), z2.VnB(), z20.VnB()), "zip1 z31.b, z2.b, z20.b"); 5525 COMPARE(zip1(z31.VnH(), z2.VnH(), z20.VnH()), "zip1 z31.h, z2.h, z20.h"); 5526 COMPARE(zip1(z31.VnS(), z2.VnS(), z20.VnS()), "zip1 z31.s, z2.s, z20.s"); 5527 COMPARE(zip1(z31.VnD(), z2.VnD(), z20.VnD()), "zip1 z31.d, z2.d, z20.d"); 5528 COMPARE(zip2(z15.VnB(), z23.VnB(), z12.VnB()), "zip2 z15.b, z23.b, z12.b"); 5529 COMPARE(zip2(z15.VnH(), z23.VnH(), z12.VnH()), "zip2 z15.h, z23.h, z12.h"); 5530 COMPARE(zip2(z15.VnS(), z23.VnS(), z12.VnS()), "zip2 z15.s, z23.s, z12.s"); 5531 COMPARE(zip2(z15.VnD(), z23.VnD(), z12.VnD()), "zip2 z15.d, z23.d, z12.d"); 5532 5533 CLEANUP(); 5534} 5535 5536TEST(sve_cpy_reg) { 5537 SETUP(); 5538 5539 COMPARE(cpy(z1.VnB(), p2.Merging(), wsp), "mov z1.b, p2/m, wsp"); 5540 COMPARE(cpy(z2.VnH(), p6.Merging(), w3), "mov z2.h, p6/m, w3"); 5541 COMPARE(cpy(z3.VnS(), p7.Merging(), x5), "mov z3.s, p7/m, w5"); 5542 COMPARE(cpy(z4.VnD(), p7.Merging(), x30), "mov z4.d, p7/m, x30"); 5543 COMPARE(cpy(z5.VnD(), p7.Merging(), sp), "mov z5.d, p7/m, sp"); 5544 5545 COMPARE(cpy(z27.VnB(), p3.Merging(), b23), "mov z27.b, p3/m, b23"); 5546 COMPARE(cpy(z27.VnH(), p3.Merging(), h23), "mov z27.h, p3/m, h23"); 5547 COMPARE(cpy(z27.VnS(), p3.Merging(), s23), "mov z27.s, p3/m, s23"); 5548 COMPARE(cpy(z27.VnD(), p3.Merging(), d23), "mov z27.d, p3/m, d23"); 5549 5550 COMPARE(mov(z1.VnB(), p2.Merging(), wsp), "mov z1.b, p2/m, wsp"); 5551 COMPARE(mov(z4.VnD(), p7.Merging(), x30), "mov z4.d, p7/m, x30"); 5552 COMPARE(mov(z5.VnD(), p7.Merging(), sp), "mov z5.d, p7/m, sp"); 5553 COMPARE(mov(z27.VnB(), p3.Merging(), b23), "mov z27.b, p3/m, b23"); 5554 COMPARE(mov(z27.VnD(), p3.Merging(), d23), "mov z27.d, p3/m, d23"); 5555 5556 COMPARE_MACRO(Mov(z1.VnB(), p2.Merging(), wsp), "mov z1.b, p2/m, wsp"); 5557 COMPARE_MACRO(Mov(z4.VnD(), p7.Merging(), x30), "mov z4.d, p7/m, x30"); 5558 COMPARE_MACRO(Mov(z5.VnD(), p7.Merging(), sp), "mov z5.d, p7/m, sp"); 5559 COMPARE_MACRO(Mov(z27.VnB(), p3.Merging(), b23), "mov z27.b, p3/m, b23"); 5560 COMPARE_MACRO(Mov(z27.VnD(), p3.Merging(), d23), "mov z27.d, p3/m, d23"); 5561 5562 CLEANUP(); 5563} 5564 5565TEST(sve_permute_vector_predicated) { 5566 SETUP(); 5567 5568 COMPARE(compact(z13.VnS(), p7, z1.VnS()), "compact z13.s, p7, z1.s"); 5569 COMPARE(compact(z13.VnD(), p7, z1.VnD()), "compact z13.d, p7, z1.d"); 5570 COMPARE(splice(z7.VnB(), p6, z7.VnB(), z2.VnB()), 5571 "splice z7.b, p6, z7.b, z2.b"); 5572 COMPARE(splice(z7.VnH(), p6, z7.VnH(), z2.VnH()), 5573 "splice z7.h, p6, z7.h, z2.h"); 5574 COMPARE(splice(z7.VnS(), p6, z7.VnS(), z2.VnS()), 5575 "splice z7.s, p6, z7.s, z2.s"); 5576 COMPARE(splice(z7.VnD(), p6, z7.VnD(), z2.VnD()), 5577 "splice z7.d, p6, z7.d, z2.d"); 5578 5579 COMPARE_MACRO(Splice(z0.VnB(), p1, z2.VnB(), z4.VnB()), 5580 "movprfx z0, z2\n" 5581 "splice z0.b, p1, z0.b, z4.b"); 5582 COMPARE_MACRO(Splice(z0.VnH(), p1, z2.VnH(), z0.VnH()), 5583 "movprfx z31, z2\n" 5584 "splice z31.h, p1, z31.h, z0.h\n" 5585 "mov z0.d, z31.d"); 5586 5587 COMPARE(clasta(z4.VnB(), p2, z4.VnB(), z12.VnB()), 5588 "clasta z4.b, p2, z4.b, z12.b"); 5589 COMPARE(clasta(z4.VnH(), p2, z4.VnH(), z12.VnH()), 5590 "clasta z4.h, p2, z4.h, z12.h"); 5591 COMPARE(clasta(z4.VnS(), p2, z4.VnS(), z12.VnS()), 5592 "clasta z4.s, p2, z4.s, z12.s"); 5593 COMPARE(clasta(z4.VnD(), p2, z4.VnD(), z12.VnD()), 5594 "clasta z4.d, p2, z4.d, z12.d"); 5595 COMPARE(clastb(z29.VnB(), p7, z29.VnB(), z26.VnB()), 5596 "clastb z29.b, p7, z29.b, z26.b"); 5597 COMPARE(clastb(z29.VnH(), p7, z29.VnH(), z26.VnH()), 5598 "clastb z29.h, p7, z29.h, z26.h"); 5599 COMPARE(clastb(z29.VnS(), p7, z29.VnS(), z26.VnS()), 5600 "clastb z29.s, p7, z29.s, z26.s"); 5601 COMPARE(clastb(z29.VnD(), p7, z29.VnD(), z26.VnD()), 5602 "clastb z29.d, p7, z29.d, z26.d"); 5603 5604 COMPARE_MACRO(Clasta(z5.VnD(), p2, z4.VnD(), z12.VnD()), 5605 "movprfx z5, z4\n" 5606 "clasta z5.d, p2, z5.d, z12.d"); 5607 COMPARE_MACRO(Clastb(z30.VnD(), p7, z29.VnD(), z26.VnD()), 5608 "movprfx z30, z29\n" 5609 "clastb z30.d, p7, z30.d, z26.d"); 5610 COMPARE_MACRO(Clasta(z9.VnH(), p3, z8.VnH(), z9.VnH()), 5611 "movprfx z31, z8\n" 5612 "clasta z31.h, p3, z31.h, z9.h\n" 5613 "mov z9.d, z31.d"); 5614 COMPARE_MACRO(Clastb(z1.VnS(), p1, z1.VnS(), z1.VnS()), 5615 "clastb z1.s, p1, z1.s, z1.s"); 5616 5617 COMPARE(clasta(w6, p0, w6, z13.VnB()), "clasta w6, p0, w6, z13.b"); 5618 COMPARE(clasta(w6, p0, w6, z13.VnH()), "clasta w6, p0, w6, z13.h"); 5619 COMPARE(clasta(w6, p0, w6, z13.VnS()), "clasta w6, p0, w6, z13.s"); 5620 COMPARE(clasta(x6, p0, x6, z13.VnD()), "clasta x6, p0, x6, z13.d"); 5621 COMPARE(clastb(w21, p2, w21, z27.VnB()), "clastb w21, p2, w21, z27.b"); 5622 COMPARE(clastb(w21, p2, w21, z27.VnH()), "clastb w21, p2, w21, z27.h"); 5623 COMPARE(clastb(w21, p2, w21, z27.VnS()), "clastb w21, p2, w21, z27.s"); 5624 COMPARE(clastb(x21, p2, x21, z27.VnD()), "clastb x21, p2, x21, z27.d"); 5625 5626 COMPARE(clasta(b8, p6, b8, z7.VnB()), "clasta b8, p6, b8, z7.b"); 5627 COMPARE(clasta(h8, p6, h8, z7.VnH()), "clasta h8, p6, h8, z7.h"); 5628 COMPARE(clasta(s8, p6, s8, z7.VnS()), "clasta s8, p6, s8, z7.s"); 5629 COMPARE(clasta(d8, p6, d8, z7.VnD()), "clasta d8, p6, d8, z7.d"); 5630 COMPARE(clastb(b17, p0, b17, z19.VnB()), "clastb b17, p0, b17, z19.b"); 5631 COMPARE(clastb(h17, p0, h17, z19.VnH()), "clastb h17, p0, h17, z19.h"); 5632 COMPARE(clastb(s17, p0, s17, z19.VnS()), "clastb s17, p0, s17, z19.s"); 5633 COMPARE(clastb(d17, p0, d17, z19.VnD()), "clastb d17, p0, d17, z19.d"); 5634 5635 COMPARE(lasta(w15, p3, z3.VnB()), "lasta w15, p3, z3.b"); 5636 COMPARE(lasta(w15, p3, z3.VnH()), "lasta w15, p3, z3.h"); 5637 COMPARE(lasta(w15, p3, z3.VnS()), "lasta w15, p3, z3.s"); 5638 COMPARE(lasta(x15, p3, z3.VnD()), "lasta x15, p3, z3.d"); 5639 COMPARE(lasta(b30, p4, z24.VnB()), "lasta b30, p4, z24.b"); 5640 COMPARE(lasta(h30, p4, z24.VnH()), "lasta h30, p4, z24.h"); 5641 COMPARE(lasta(s30, p4, z24.VnS()), "lasta s30, p4, z24.s"); 5642 COMPARE(lasta(d30, p4, z24.VnD()), "lasta d30, p4, z24.d"); 5643 5644 COMPARE(lastb(w9, p2, z16.VnB()), "lastb w9, p2, z16.b"); 5645 COMPARE(lastb(w9, p2, z16.VnH()), "lastb w9, p2, z16.h"); 5646 COMPARE(lastb(w9, p2, z16.VnS()), "lastb w9, p2, z16.s"); 5647 COMPARE(lastb(x9, p2, z16.VnD()), "lastb x9, p2, z16.d"); 5648 COMPARE(lastb(b14, p5, z2.VnB()), "lastb b14, p5, z2.b"); 5649 COMPARE(lastb(h14, p5, z2.VnH()), "lastb h14, p5, z2.h"); 5650 COMPARE(lastb(s14, p5, z2.VnS()), "lastb s14, p5, z2.s"); 5651 COMPARE(lastb(d14, p5, z2.VnD()), "lastb d14, p5, z2.d"); 5652 5653 CLEANUP(); 5654} 5655 5656TEST(sve_reverse) { 5657 SETUP(); 5658 5659 COMPARE_MACRO(Rbit(z22.VnB(), p2.Merging(), z24.VnB()), 5660 "rbit z22.b, p2/m, z24.b"); 5661 COMPARE_MACRO(Rbit(z22.VnH(), p2.Merging(), z24.VnH()), 5662 "rbit z22.h, p2/m, z24.h"); 5663 COMPARE_MACRO(Rbit(z22.VnS(), p2.Merging(), z24.VnS()), 5664 "rbit z22.s, p2/m, z24.s"); 5665 COMPARE_MACRO(Rbit(z22.VnD(), p2.Merging(), z24.VnD()), 5666 "rbit z22.d, p2/m, z24.d"); 5667 COMPARE_MACRO(Revb(z14.VnH(), p7.Merging(), z25.VnH()), 5668 "revb z14.h, p7/m, z25.h"); 5669 COMPARE_MACRO(Revb(z14.VnS(), p7.Merging(), z25.VnS()), 5670 "revb z14.s, p7/m, z25.s"); 5671 COMPARE_MACRO(Revb(z14.VnD(), p7.Merging(), z25.VnD()), 5672 "revb z14.d, p7/m, z25.d"); 5673 COMPARE_MACRO(Revh(z16.VnS(), p2.Merging(), z4.VnS()), 5674 "revh z16.s, p2/m, z4.s"); 5675 COMPARE_MACRO(Revh(z16.VnD(), p2.Merging(), z4.VnD()), 5676 "revh z16.d, p2/m, z4.d"); 5677 COMPARE_MACRO(Revw(z26.VnD(), p5.Merging(), z10.VnD()), 5678 "revw z26.d, p5/m, z10.d"); 5679 5680 CLEANUP(); 5681} 5682 5683TEST(sve_permute_vector_unpredicated) { 5684 SETUP(); 5685 5686 COMPARE(dup(z4.VnB(), w7), "mov z4.b, w7"); 5687 COMPARE(dup(z5.VnH(), w6), "mov z5.h, w6"); 5688 COMPARE(dup(z6.VnS(), sp), "mov z6.s, wsp"); 5689 COMPARE(dup(z7.VnD(), x4), "mov z7.d, x4"); 5690 COMPARE(dup(z25.VnQ(), z28.VnQ(), 2), "mov z25.q, z28.q[2]"); 5691 COMPARE(dup(z26.VnH(), z12.VnH(), 0), "mov z26.h, h12"); 5692 5693 COMPARE(mov(z4.VnB(), w7), "mov z4.b, w7"); 5694 COMPARE(mov(z5.VnH(), w6), "mov z5.h, w6"); 5695 COMPARE(mov(z6.VnS(), sp), "mov z6.s, wsp"); 5696 COMPARE(mov(z7.VnD(), x4), "mov z7.d, x4"); 5697 COMPARE(mov(z25.VnQ(), z28.VnQ(), 2), "mov z25.q, z28.q[2]"); 5698 COMPARE(mov(z0.VnS(), s1), "mov z0.s, s1"); 5699 5700 COMPARE_MACRO(Mov(z7.VnD(), x4), "mov z7.d, x4"); 5701 COMPARE_MACRO(Mov(z25.VnQ(), z28.VnQ(), 2), "mov z25.q, z28.q[2]"); 5702 COMPARE_MACRO(Mov(z2.VnB(), b13), "mov z2.b, b13"); 5703 COMPARE_MACRO(Mov(z31.VnQ(), q31), "mov z31.q, q31"); 5704 5705 // Test dup with reserved tsz field. 5706 COMPARE_PREFIX(dci(0x05202000), "unimplemented"); 5707 5708 COMPARE(insr(z15.VnB(), w13), "insr z15.b, w13"); 5709 COMPARE(insr(z16.VnH(), w14), "insr z16.h, w14"); 5710 COMPARE(insr(z17.VnS(), w15), "insr z17.s, w15"); 5711 COMPARE(insr(z18.VnD(), x16), "insr z18.d, x16"); 5712 COMPARE(insr(z5.VnB(), b3), "insr z5.b, b3"); 5713 COMPARE(insr(z6.VnH(), h15), "insr z6.h, h15"); 5714 COMPARE(insr(z7.VnS(), s22), "insr z7.s, s22"); 5715 COMPARE(insr(z8.VnD(), d30), "insr z8.d, d30"); 5716 COMPARE(rev(z13.VnB(), z10.VnB()), "rev z13.b, z10.b"); 5717 COMPARE(rev(z14.VnH(), z10.VnH()), "rev z14.h, z10.h"); 5718 COMPARE(rev(z15.VnS(), z10.VnS()), "rev z15.s, z10.s"); 5719 COMPARE(rev(z16.VnD(), z10.VnD()), "rev z16.d, z10.d"); 5720 COMPARE(sunpkhi(z10.VnH(), z11.VnB()), "sunpkhi z10.h, z11.b"); 5721 COMPARE(sunpkhi(z11.VnS(), z11.VnH()), "sunpkhi z11.s, z11.h"); 5722 COMPARE(sunpkhi(z12.VnD(), z11.VnS()), "sunpkhi z12.d, z11.s"); 5723 COMPARE(sunpklo(z20.VnH(), z12.VnB()), "sunpklo z20.h, z12.b"); 5724 COMPARE(sunpklo(z21.VnS(), z12.VnH()), "sunpklo z21.s, z12.h"); 5725 COMPARE(sunpklo(z22.VnD(), z12.VnS()), "sunpklo z22.d, z12.s"); 5726 COMPARE(uunpkhi(z17.VnH(), z14.VnB()), "uunpkhi z17.h, z14.b"); 5727 COMPARE(uunpkhi(z18.VnS(), z14.VnH()), "uunpkhi z18.s, z14.h"); 5728 COMPARE(uunpkhi(z19.VnD(), z14.VnS()), "uunpkhi z19.d, z14.s"); 5729 COMPARE(uunpklo(z27.VnH(), z6.VnB()), "uunpklo z27.h, z6.b"); 5730 COMPARE(uunpklo(z28.VnS(), z6.VnH()), "uunpklo z28.s, z6.h"); 5731 COMPARE(uunpklo(z29.VnD(), z6.VnS()), "uunpklo z29.d, z6.s"); 5732 COMPARE(tbl(z24.VnB(), z29.VnB(), z0.VnB()), "tbl z24.b, {z29.b}, z0.b"); 5733 COMPARE(tbl(z25.VnH(), z29.VnH(), z1.VnH()), "tbl z25.h, {z29.h}, z1.h"); 5734 COMPARE(tbl(z26.VnS(), z29.VnS(), z2.VnS()), "tbl z26.s, {z29.s}, z2.s"); 5735 COMPARE(tbl(z27.VnD(), z29.VnD(), z3.VnD()), "tbl z27.d, {z29.d}, z3.d"); 5736 5737 CLEANUP(); 5738} 5739 5740TEST(sve_predicate_count) { 5741 SETUP(); 5742 5743 COMPARE(cntp(x9, p1, p0.VnB()), "cntp x9, p1, p0.b"); 5744 COMPARE(cntp(x10, p12, p1.VnH()), "cntp x10, p12, p1.h"); 5745 COMPARE(cntp(x11, p13, p14.VnS()), "cntp x11, p13, p14.s"); 5746 COMPARE(cntp(x12, p4, p15.VnD()), "cntp x12, p4, p15.d"); 5747 5748 COMPARE_MACRO(Cntp(x0, p1, p2.VnB()), "cntp x0, p1, p2.b"); 5749 COMPARE_MACRO(Cntp(w10, p11, p12.VnH()), "cntp x10, p11, p12.h"); 5750 5751 CLEANUP(); 5752} 5753 5754TEST(sve_predicate_logical_op) { 5755 SETUP(); 5756 5757 COMPARE(ands(p13.VnB(), p9.Zeroing(), p5.VnB(), p15.VnB()), 5758 "ands p13.b, p9/z, p5.b, p15.b"); 5759 COMPARE(and_(p9.VnB(), p3.Zeroing(), p0.VnB(), p14.VnB()), 5760 "and p9.b, p3/z, p0.b, p14.b"); 5761 COMPARE(bics(p8.VnB(), p5.Zeroing(), p3.VnB(), p1.VnB()), 5762 "bics p8.b, p5/z, p3.b, p1.b"); 5763 COMPARE(bic(p5.VnB(), p5.Zeroing(), p9.VnB(), p9.VnB()), 5764 "bic p5.b, p5/z, p9.b, p9.b"); 5765 COMPARE(eors(p11.VnB(), p1.Zeroing(), p1.VnB(), p2.VnB()), 5766 "eors p11.b, p1/z, p1.b, p2.b"); 5767 COMPARE(eor(p8.VnB(), p6.Zeroing(), p1.VnB(), p11.VnB()), 5768 "eor p8.b, p6/z, p1.b, p11.b"); 5769 COMPARE(nands(p13.VnB(), p0.Zeroing(), p9.VnB(), p4.VnB()), 5770 "nands p13.b, p0/z, p9.b, p4.b"); 5771 COMPARE(nand(p7.VnB(), p7.Zeroing(), p15.VnB(), p2.VnB()), 5772 "nand p7.b, p7/z, p15.b, p2.b"); 5773 COMPARE(nors(p8.VnB(), p8.Zeroing(), p12.VnB(), p11.VnB()), 5774 "nors p8.b, p8/z, p12.b, p11.b"); 5775 COMPARE(nor(p3.VnB(), p6.Zeroing(), p15.VnB(), p12.VnB()), 5776 "nor p3.b, p6/z, p15.b, p12.b"); 5777 COMPARE(orns(p10.VnB(), p11.Zeroing(), p0.VnB(), p15.VnB()), 5778 "orns p10.b, p11/z, p0.b, p15.b"); 5779 COMPARE(orn(p0.VnB(), p1.Zeroing(), p7.VnB(), p4.VnB()), 5780 "orn p0.b, p1/z, p7.b, p4.b"); 5781 COMPARE(orrs(p14.VnB(), p6.Zeroing(), p1.VnB(), p5.VnB()), 5782 "orrs p14.b, p6/z, p1.b, p5.b"); 5783 COMPARE(orr(p13.VnB(), p7.Zeroing(), p10.VnB(), p4.VnB()), 5784 "orr p13.b, p7/z, p10.b, p4.b"); 5785 COMPARE(sel(p9.VnB(), p15, p15.VnB(), p7.VnB()), 5786 "sel p9.b, p15, p15.b, p7.b"); 5787 5788 // Aliases. 5789 COMPARE(eor(p7.VnB(), p6.Zeroing(), p1.VnB(), p6.VnB()), 5790 "not p7.b, p6/z, p1.b"); 5791 COMPARE(not_(p7.VnB(), p6.Zeroing(), p1.VnB()), "not p7.b, p6/z, p1.b"); 5792 COMPARE(eors(p6.VnB(), p5.Zeroing(), p2.VnB(), p5.VnB()), 5793 "nots p6.b, p5/z, p2.b"); 5794 COMPARE(nots(p6.VnB(), p5.Zeroing(), p2.VnB()), "nots p6.b, p5/z, p2.b"); 5795 COMPARE(ands(p5.VnB(), p4.Zeroing(), p3.VnB(), p3.VnB()), 5796 "movs p5.b, p4/z, p3.b"); 5797 COMPARE(movs(p5.VnB(), p4.Zeroing(), p3.VnB()), "movs p5.b, p4/z, p3.b"); 5798 COMPARE(and_(p5.VnB(), p4.Zeroing(), p3.VnB(), p3.VnB()), 5799 "mov p5.b, p4/z, p3.b"); 5800 COMPARE(mov(p5.VnB(), p4.Zeroing(), p3.VnB()), "mov p5.b, p4/z, p3.b"); 5801 COMPARE(orrs(p4.VnB(), p3.Zeroing(), p3.VnB(), p3.VnB()), "movs p4.b, p3.b"); 5802 COMPARE(movs(p4.VnB(), p3.VnB()), "movs p4.b, p3.b"); 5803 COMPARE(orr(p4.VnB(), p3.Zeroing(), p3.VnB(), p3.VnB()), "mov p4.b, p3.b"); 5804 COMPARE(mov(p4.VnB(), p3.VnB()), "mov p4.b, p3.b"); 5805 COMPARE(sel(p3.VnB(), p2, p4.VnB(), p3.VnB()), "mov p3.b, p2/m, p4.b"); 5806 COMPARE(mov(p3.VnB(), p2.Merging(), p4.VnB()), "mov p3.b, p2/m, p4.b"); 5807 5808 COMPARE_MACRO(Not(p7.VnB(), p6.Zeroing(), p1.VnB()), "not p7.b, p6/z, p1.b"); 5809 COMPARE_MACRO(Nots(p6.VnB(), p5.Zeroing(), p2.VnB()), 5810 "nots p6.b, p5/z, p2.b"); 5811 COMPARE_MACRO(Movs(p5.VnB(), p4.Zeroing(), p3.VnB()), 5812 "movs p5.b, p4/z, p3.b"); 5813 COMPARE_MACRO(Mov(p5.VnB(), p4.Zeroing(), p3.VnB()), "mov p5.b, p4/z, p3.b"); 5814 COMPARE_MACRO(Movs(p4.VnB(), p3.VnB()), "movs p4.b, p3.b"); 5815 COMPARE_MACRO(Mov(p4, p3), "mov p4.b, p3.b"); 5816 COMPARE_MACRO(Mov(p3.VnB(), p2.Merging(), p4.VnB()), "mov p3.b, p2/m, p4.b"); 5817 5818 CLEANUP(); 5819} 5820 5821TEST(sve_predicate_first_active) { 5822 SETUP(); 5823 5824 COMPARE(pfirst(p0.VnB(), p7, p0.VnB()), "pfirst p0.b, p7, p0.b"); 5825 COMPARE(pfirst(p7.VnB(), p0, p7.VnB()), "pfirst p7.b, p0, p7.b"); 5826 5827 COMPARE_MACRO(Pfirst(p1.VnB(), p2, p1.VnB()), "pfirst p1.b, p2, p1.b"); 5828 COMPARE_MACRO(Pfirst(p3.VnB(), p4, p5.VnB()), 5829 "mov p3.b, p5.b\n" 5830 "pfirst p3.b, p4, p3.b"); 5831 5832 { 5833 UseScratchRegisterScope temps(&masm); 5834 temps.Include(p7, p15); 5835 COMPARE_MACRO(Pfirst(p6.VnB(), p6, p0.VnB()), 5836 "mov p15.b, p6.b\n" 5837 "mov p6.b, p0.b\n" 5838 "pfirst p6.b, p15, p6.b"); 5839 } 5840 5841 CLEANUP(); 5842} 5843 5844TEST(sve_predicate_next_active) { 5845 SETUP(); 5846 5847 COMPARE(pnext(p0.VnB(), p8, p0.VnB()), "pnext p0.b, p8, p0.b"); 5848 COMPARE(pnext(p1.VnH(), p9, p1.VnH()), "pnext p1.h, p9, p1.h"); 5849 COMPARE(pnext(p2.VnS(), p10, p2.VnS()), "pnext p2.s, p10, p2.s"); 5850 COMPARE(pnext(p3.VnD(), p11, p3.VnD()), "pnext p3.d, p11, p3.d"); 5851 5852 COMPARE(pnext(p12.VnB(), p4, p12.VnB()), "pnext p12.b, p4, p12.b"); 5853 COMPARE(pnext(p13.VnH(), p5, p13.VnH()), "pnext p13.h, p5, p13.h"); 5854 COMPARE(pnext(p14.VnS(), p6, p14.VnS()), "pnext p14.s, p6, p14.s"); 5855 COMPARE(pnext(p15.VnD(), p7, p15.VnD()), "pnext p15.d, p7, p15.d"); 5856 5857 COMPARE_MACRO(Pnext(p5.VnB(), p9, p5.VnB()), "pnext p5.b, p9, p5.b"); 5858 COMPARE_MACRO(Pnext(p6.VnH(), p8, p6.VnH()), "pnext p6.h, p8, p6.h"); 5859 COMPARE_MACRO(Pnext(p7.VnS(), p5, p7.VnS()), "pnext p7.s, p5, p7.s"); 5860 COMPARE_MACRO(Pnext(p8.VnD(), p6, p8.VnD()), "pnext p8.d, p6, p8.d"); 5861 5862 COMPARE_MACRO(Pnext(p6.VnB(), p4, p5.VnB()), 5863 "mov p6.b, p5.b\n" 5864 "pnext p6.b, p4, p6.b"); 5865 COMPARE_MACRO(Pnext(p7.VnH(), p3, p8.VnH()), 5866 "mov p7.b, p8.b\n" 5867 "pnext p7.h, p3, p7.h"); 5868 COMPARE_MACRO(Pnext(p8.VnS(), p2, p9.VnS()), 5869 "mov p8.b, p9.b\n" 5870 "pnext p8.s, p2, p8.s"); 5871 COMPARE_MACRO(Pnext(p9.VnD(), p1, p10.VnD()), 5872 "mov p9.b, p10.b\n" 5873 "pnext p9.d, p1, p9.d"); 5874 5875 { 5876 UseScratchRegisterScope temps(&masm); 5877 temps.Include(p7, p8); 5878 COMPARE_MACRO(Pnext(p6.VnB(), p6, p0.VnB()), 5879 "mov p8.b, p6.b\n" 5880 "mov p6.b, p0.b\n" 5881 "pnext p6.b, p8, p6.b"); 5882 COMPARE_MACRO(Pnext(p7.VnH(), p7, p1.VnH()), 5883 "mov p8.b, p7.b\n" 5884 "mov p7.b, p1.b\n" 5885 "pnext p7.h, p8, p7.h"); 5886 COMPARE_MACRO(Pnext(p10.VnS(), p10, p2.VnS()), 5887 "mov p8.b, p10.b\n" 5888 "mov p10.b, p2.b\n" 5889 "pnext p10.s, p8, p10.s"); 5890 COMPARE_MACRO(Pnext(p11.VnD(), p11, p3.VnD()), 5891 "mov p8.b, p11.b\n" 5892 "mov p11.b, p3.b\n" 5893 "pnext p11.d, p8, p11.d"); 5894 } 5895 5896 CLEANUP(); 5897} 5898 5899TEST(sve_predicate_initialize) { 5900 SETUP(); 5901 5902 // Basic forms. 5903 COMPARE(ptrue(p0.VnB(), SVE_POW2), "ptrue p0.b, pow2"); 5904 COMPARE(ptrue(p1.VnH(), SVE_VL1), "ptrue p1.h, vl1"); 5905 COMPARE(ptrue(p2.VnS(), SVE_VL8), "ptrue p2.s, vl8"); 5906 COMPARE(ptrue(p3.VnD(), SVE_VL16), "ptrue p3.d, vl16"); 5907 COMPARE(ptrue(p4.VnB(), SVE_VL256), "ptrue p4.b, vl256"); 5908 COMPARE(ptrue(p5.VnH(), SVE_MUL3), "ptrue p5.h, mul3"); 5909 COMPARE(ptrue(p6.VnS(), SVE_MUL4), "ptrue p6.s, mul4"); 5910 COMPARE(ptrue(p7.VnD(), SVE_ALL), "ptrue p7.d"); 5911 5912 COMPARE(ptrues(p8.VnB(), SVE_ALL), "ptrues p8.b"); 5913 COMPARE(ptrues(p9.VnH(), SVE_MUL4), "ptrues p9.h, mul4"); 5914 COMPARE(ptrues(p10.VnS(), SVE_MUL3), "ptrues p10.s, mul3"); 5915 COMPARE(ptrues(p11.VnD(), SVE_VL256), "ptrues p11.d, vl256"); 5916 COMPARE(ptrues(p12.VnB(), SVE_VL16), "ptrues p12.b, vl16"); 5917 COMPARE(ptrues(p13.VnH(), SVE_VL8), "ptrues p13.h, vl8"); 5918 COMPARE(ptrues(p14.VnS(), SVE_VL1), "ptrues p14.s, vl1"); 5919 COMPARE(ptrues(p15.VnD(), SVE_POW2), "ptrues p15.d, pow2"); 5920 5921 // The Assembler supports arbitrary immediates. 5922 COMPARE(ptrue(p7.VnS(), 0xd), "ptrue p7.s, vl256"); 5923 COMPARE(ptrue(p8.VnD(), 0xe), "ptrue p8.d, #0xe"); 5924 COMPARE(ptrue(p9.VnB(), 0x15), "ptrue p9.b, #0x15"); 5925 COMPARE(ptrue(p10.VnH(), 0x19), "ptrue p10.h, #0x19"); 5926 COMPARE(ptrue(p11.VnS(), 0x1a), "ptrue p11.s, #0x1a"); 5927 COMPARE(ptrue(p12.VnD(), 0x1c), "ptrue p12.d, #0x1c"); 5928 COMPARE(ptrue(p13.VnB(), 0x1d), "ptrue p13.b, mul4"); 5929 5930 COMPARE(ptrues(p14.VnS(), 0xd), "ptrues p14.s, vl256"); 5931 COMPARE(ptrues(p15.VnD(), 0xe), "ptrues p15.d, #0xe"); 5932 COMPARE(ptrues(p0.VnB(), 0x15), "ptrues p0.b, #0x15"); 5933 COMPARE(ptrues(p1.VnH(), 0x19), "ptrues p1.h, #0x19"); 5934 COMPARE(ptrues(p2.VnS(), 0x1a), "ptrues p2.s, #0x1a"); 5935 COMPARE(ptrues(p3.VnD(), 0x1c), "ptrues p3.d, #0x1c"); 5936 COMPARE(ptrues(p4.VnB(), 0x1d), "ptrues p4.b, mul4"); 5937 5938 // SVE_ALL is the default. 5939 COMPARE(ptrue(p15.VnS()), "ptrue p15.s"); 5940 COMPARE(ptrues(p0.VnS()), "ptrues p0.s"); 5941 5942 // The MacroAssembler provides a `FlagsUpdate` argument. 5943 COMPARE_MACRO(Ptrue(p0.VnB(), SVE_MUL3), "ptrue p0.b, mul3"); 5944 COMPARE_MACRO(Ptrues(p1.VnH(), SVE_MUL4), "ptrues p1.h, mul4"); 5945 COMPARE_MACRO(Ptrue(p2.VnS(), SVE_VL32, LeaveFlags), "ptrue p2.s, vl32"); 5946 COMPARE_MACRO(Ptrue(p3.VnD(), SVE_VL64, SetFlags), "ptrues p3.d, vl64"); 5947} 5948 5949TEST(sve_pfalse) { 5950 SETUP(); 5951 5952 COMPARE(pfalse(p0.VnB()), "pfalse p0.b"); 5953 COMPARE(pfalse(p15.VnB()), "pfalse p15.b"); 5954 5955 COMPARE_MACRO(Pfalse(p1.VnB()), "pfalse p1.b"); 5956 COMPARE_MACRO(Pfalse(p4.VnH()), "pfalse p4.b"); 5957 COMPARE_MACRO(Pfalse(p9.VnS()), "pfalse p9.b"); 5958 COMPARE_MACRO(Pfalse(p14.VnD()), "pfalse p14.b"); 5959} 5960 5961TEST(sve_ptest) { 5962 SETUP(); 5963 5964 COMPARE(ptest(p15, p0.VnB()), "ptest p15, p0.b"); 5965 COMPARE(ptest(p0, p15.VnB()), "ptest p0, p15.b"); 5966 COMPARE(ptest(p6, p6.VnB()), "ptest p6, p6.b"); 5967 5968 COMPARE_MACRO(Ptest(p0, p1.VnB()), "ptest p0, p1.b"); 5969} 5970 5971TEST(sve_lane_size_relaxing) { 5972 SETUP(); 5973 5974 COMPARE_MACRO(Rdffr(p3), "rdffr p3.b"); 5975 COMPARE_MACRO(Rdffr(p8.VnB()), "rdffr p8.b"); 5976 5977 COMPARE_MACRO(Wrffr(p9), "wrffr p9.b"); 5978 COMPARE_MACRO(Wrffr(p8.VnB()), "wrffr p8.b"); 5979 5980 COMPARE_MACRO(And(z10, z1, z15), "and z10.d, z1.d, z15.d"); 5981 COMPARE_MACRO(Bic(z11, z2, z16), "bic z11.d, z2.d, z16.d"); 5982 COMPARE_MACRO(Eor(z12, z3, z17), "eor z12.d, z3.d, z17.d"); 5983 COMPARE_MACRO(Orr(z13, z4, z18), "orr z13.d, z4.d, z18.d"); 5984 5985 COMPARE_MACRO(And(z10.VnD(), z1.VnD(), z15.VnD()), "and z10.d, z1.d, z15.d"); 5986 COMPARE_MACRO(Bic(z11.VnS(), z2.VnS(), z16.VnS()), "bic z11.d, z2.d, z16.d"); 5987 COMPARE_MACRO(Eor(z12.VnH(), z3.VnH(), z17.VnH()), "eor z12.d, z3.d, z17.d"); 5988 COMPARE_MACRO(Orr(z13.VnB(), z4.VnB(), z18.VnB()), "orr z13.d, z4.d, z18.d"); 5989 5990 COMPARE_MACRO(Pfalse(p1), "pfalse p1.b"); 5991 5992 CLEANUP(); 5993} 5994 5995TEST(sve_read_ffr) { 5996 SETUP(); 5997 5998 COMPARE(rdffr(p13.VnB()), "rdffr p13.b"); 5999 COMPARE(rdffrs(p14.VnB(), p9.Zeroing()), "rdffrs p14.b, p9/z"); 6000 COMPARE(rdffr(p5.VnB(), p14.Zeroing()), "rdffr p5.b, p14/z"); 6001 6002 CLEANUP(); 6003} 6004 6005TEST(sve_propagate_break) { 6006 SETUP(); 6007 6008 COMPARE(brkpas(p12.VnB(), p0.Zeroing(), p12.VnB(), p11.VnB()), 6009 "brkpas p12.b, p0/z, p12.b, p11.b"); 6010 COMPARE(brkpa(p1.VnB(), p2.Zeroing(), p13.VnB(), p8.VnB()), 6011 "brkpa p1.b, p2/z, p13.b, p8.b"); 6012 COMPARE(brkpbs(p14.VnB(), p1.Zeroing(), p8.VnB(), p3.VnB()), 6013 "brkpbs p14.b, p1/z, p8.b, p3.b"); 6014 COMPARE(brkpb(p2.VnB(), p5.Zeroing(), p0.VnB(), p14.VnB()), 6015 "brkpb p2.b, p5/z, p0.b, p14.b"); 6016 6017 CLEANUP(); 6018} 6019 6020TEST(sve_stack_allocation) { 6021 SETUP(); 6022 6023 COMPARE(rdvl(x26, 0), "rdvl x26, #0"); 6024 COMPARE(rdvl(x27, 31), "rdvl x27, #31"); 6025 COMPARE(rdvl(x28, -32), "rdvl x28, #-32"); 6026 COMPARE(rdvl(xzr, 9), "rdvl xzr, #9"); 6027 6028 COMPARE(addvl(x6, x20, 0), "addvl x6, x20, #0"); 6029 COMPARE(addvl(x7, x21, 31), "addvl x7, x21, #31"); 6030 COMPARE(addvl(x8, x22, -32), "addvl x8, x22, #-32"); 6031 COMPARE(addvl(sp, x1, 5), "addvl sp, x1, #5"); 6032 COMPARE(addvl(x9, sp, -16), "addvl x9, sp, #-16"); 6033 6034 COMPARE(addpl(x20, x6, 0), "addpl x20, x6, #0"); 6035 COMPARE(addpl(x21, x7, 31), "addpl x21, x7, #31"); 6036 COMPARE(addpl(x22, x8, -32), "addpl x22, x8, #-32"); 6037 COMPARE(addpl(sp, x1, 5), "addpl sp, x1, #5"); 6038 COMPARE(addpl(x9, sp, -16), "addpl x9, sp, #-16"); 6039 6040 CLEANUP(); 6041} 6042 6043TEST(sve_rdvl_macro) { 6044 SETUP(); 6045 6046 // Encodable cases use rdvl directly. 6047 COMPARE_MACRO(Rdvl(x0, 3), "rdvl x0, #3"); 6048 COMPARE_MACRO(Rdvl(x0, 31), "rdvl x0, #31"); 6049 COMPARE_MACRO(Rdvl(x0, -32), "rdvl x0, #-32"); 6050 6051 // Unencodable cases fall back on `xn + (VL * multiplier)`. 6052 COMPARE_MACRO(Rdvl(x2, 0x1234), 6053 "mov x2, #0x1234\n" 6054 "rdvl x16, #1\n" 6055 "mul x2, x2, x16"); 6056 6057 CLEANUP(); 6058} 6059 6060TEST(sve_addvl_macro) { 6061 SETUP(); 6062 6063 // Encodable cases use addvl directly. 6064 COMPARE_MACRO(Addvl(sp, sp, -3), "addvl sp, sp, #-3"); 6065 COMPARE_MACRO(Addvl(x0, x1, 8), "addvl x0, x1, #8"); 6066 6067 // If xn is xzr, `Addvl` behaves like `Rdvl`. 6068 COMPARE_MACRO(Addvl(x7, xzr, 8), "rdvl x7, #8"); 6069 6070 // Unencodable cases fall back on `xn + (VL * multiplier)`. 6071 COMPARE_MACRO(Addvl(x7, x8, 42), 6072 "mov x7, #0x2a\n" 6073 "rdvl x16, #1\n" 6074 "madd x7, x7, x16, x8"); 6075 COMPARE_MACRO(Addvl(x10, x10, 42), 6076 "mov x16, #0x2a\n" 6077 "rdvl x17, #1\n" 6078 "madd x10, x16, x17, x10"); 6079 COMPARE_MACRO(Addvl(x10, sp, 42), 6080 "mov x10, #0x2a\n" 6081 "rdvl x16, #1\n" 6082 "mul x10, x10, x16\n" 6083 "add x10, sp, x10"); 6084 COMPARE_MACRO(Addvl(sp, x10, 42), 6085 "mov x16, #0x2a\n" 6086 "rdvl x17, #1\n" 6087 "mul x16, x16, x17\n" 6088 "add sp, x10, x16"); 6089 6090 CLEANUP(); 6091} 6092 6093TEST(sve_rdpl_macro) { 6094 SETUP(); 6095 6096 // There is no `rdpl` instruction. `Rdpl` is implemented as `Addpl` (with 6097 // xzr). However, since `addpl` operates on the stack pointer, some special 6098 // cases exist. 6099 6100 // If the multiplier is a multiple of 8, `Rdpl` will pass through to `Rdvl`. 6101 COMPARE_MACRO(Rdpl(x0, 0), "rdvl x0, #0"); 6102 COMPARE_MACRO(Rdpl(x0, 8), "rdvl x0, #1"); 6103 6104 // If the multiplier is encodable with `addpl`, we use that with an 6105 // explicitly-zeroed register. 6106 COMPARE_MACRO(Rdpl(x1, 7), 6107 "mov x1, #0x0\n" 6108 "addpl x1, x1, #7"); 6109 COMPARE_MACRO(Rdpl(x1, -31), 6110 "mov x1, #0x0\n" 6111 "addpl x1, x1, #-31"); 6112 6113 // All other cases use `Rdvl`, and scale the result. 6114 COMPARE_MACRO(Rdpl(x2, 37), 6115 "mov x2, #0x25\n" 6116 "rdvl x16, #1\n" 6117 "mul x2, x2, x16\n" 6118 "asr x2, x2, #3"); 6119 6120 CLEANUP(); 6121} 6122 6123TEST(sve_addpl_macro) { 6124 SETUP(); 6125 6126 // Encodable cases use addpl directly. 6127 COMPARE_MACRO(Addpl(x22, x22, -3), "addpl x22, x22, #-3"); 6128 COMPARE_MACRO(Addpl(x10, x11, 8), "addpl x10, x11, #8"); 6129 COMPARE_MACRO(Addpl(x7, sp, 31), "addpl x7, sp, #31"); 6130 6131 // Otherwise, if the multiplier is a multiple of 8, `Addpl` will pass through 6132 // to `Addvl`. 6133 COMPARE_MACRO(Addpl(sp, x0, 48), "addvl sp, x0, #6"); 6134 COMPARE_MACRO(Addpl(x2, sp, -48), "addvl x2, sp, #-6"); 6135 6136 // If xn is xzr, `Addpl` behaves like `Rdpl`. 6137 COMPARE_MACRO(Addpl(x7, xzr, 8), "rdvl x7, #1"); 6138 COMPARE_MACRO(Addpl(x29, xzr, 13), 6139 "mov x29, #0x0\n" 6140 "addpl x29, x29, #13"); 6141 6142 // All other cases use `Rdvl`, and scale the result before adding it to `xn`. 6143 // Where possible, the scaling `asr` is merged with the `add`. 6144 COMPARE_MACRO(Addpl(x7, x8, 123), 6145 "mov x7, #0x7b\n" 6146 "rdvl x16, #1\n" 6147 "mul x7, x7, x16\n" 6148 "add x7, x8, x7, asr #3"); 6149 COMPARE_MACRO(Addpl(x9, x9, 122), 6150 "mov x16, #0x7a\n" 6151 "rdvl x17, #1\n" 6152 "mul x16, x16, x17\n" 6153 "add x9, x9, x16, asr #3"); 6154 // If the stack pointer is used, the `asr` and `add` must be separate. 6155 COMPARE_MACRO(Addpl(sp, x0, 33), 6156 "mov x16, #0x21\n" 6157 "rdvl x17, #1\n" 6158 "mul x16, x16, x17\n" 6159 "asr x16, x16, #3\n" 6160 "add sp, x0, x16"); 6161 6162 CLEANUP(); 6163} 6164 6165TEST(sve_vector_select) { 6166 SETUP(); 6167 6168 COMPARE_MACRO(Sel(z13.VnB(), p3, z3.VnB(), z25.VnB()), 6169 "sel z13.b, p3, z3.b, z25.b"); 6170 COMPARE_MACRO(Sel(z13.VnH(), p3, z3.VnH(), z25.VnH()), 6171 "sel z13.h, p3, z3.h, z25.h"); 6172 COMPARE_MACRO(Sel(z13.VnS(), p3, z3.VnS(), z25.VnS()), 6173 "sel z13.s, p3, z3.s, z25.s"); 6174 COMPARE_MACRO(Sel(z13.VnD(), p3, z3.VnD(), z25.VnD()), 6175 "sel z13.d, p3, z3.d, z25.d"); 6176 6177 // Check sel() where zd == zm disassemble as predicated mov. 6178 COMPARE_MACRO(Sel(z1.VnB(), p4.Merging(), z30.VnB(), z1.VnB()), 6179 "mov z1.b, p4/m, z30.b"); 6180 COMPARE_MACRO(Sel(z1.VnH(), p4.Merging(), z30.VnH(), z1.VnH()), 6181 "mov z1.h, p4/m, z30.h"); 6182 COMPARE_MACRO(Sel(z1.VnS(), p4.Merging(), z30.VnS(), z1.VnS()), 6183 "mov z1.s, p4/m, z30.s"); 6184 COMPARE_MACRO(Sel(z1.VnD(), p4.Merging(), z30.VnD(), z1.VnD()), 6185 "mov z1.d, p4/m, z30.d"); 6186 6187 // Check predicated mov() directly. 6188 COMPARE_MACRO(Mov(z2.VnB(), p10.Merging(), z22.VnB()), 6189 "mov z2.b, p10/m, z22.b"); 6190 COMPARE_MACRO(Mov(z2.VnH(), p10.Merging(), z22.VnH()), 6191 "mov z2.h, p10/m, z22.h"); 6192 COMPARE_MACRO(Mov(z2.VnS(), p10.Merging(), z22.VnS()), 6193 "mov z2.s, p10/m, z22.s"); 6194 COMPARE_MACRO(Mov(z2.VnD(), p10.Merging(), z22.VnD()), 6195 "mov z2.d, p10/m, z22.d"); 6196 6197 CLEANUP(); 6198} 6199 6200TEST(sve_write_ffr) { 6201 SETUP(); 6202 6203 COMPARE_PREFIX(setffr(), "setffr"); 6204 COMPARE(wrffr(p9.VnB()), "wrffr p9.b"); 6205 6206 CLEANUP(); 6207} 6208 6209TEST(sve2_match_nmatch) { 6210 SETUP(); 6211 6212 COMPARE(match(p15.VnB(), p1.Zeroing(), z18.VnB(), z5.VnB()), 6213 "match p15.b, p1/z, z18.b, z5.b"); 6214 COMPARE(match(p15.VnH(), p1.Zeroing(), z18.VnH(), z5.VnH()), 6215 "match p15.h, p1/z, z18.h, z5.h"); 6216 COMPARE(nmatch(p1.VnB(), p1.Zeroing(), z20.VnB(), z17.VnB()), 6217 "nmatch p1.b, p1/z, z20.b, z17.b"); 6218 COMPARE(nmatch(p1.VnH(), p1.Zeroing(), z20.VnH(), z17.VnH()), 6219 "nmatch p1.h, p1/z, z20.h, z17.h"); 6220 6221 CLEANUP(); 6222} 6223 6224TEST(sve2_saba_uaba) { 6225 SETUP(); 6226 6227 COMPARE(saba(z13.VnB(), z2.VnB(), z31.VnB()), "saba z13.b, z2.b, z31.b"); 6228 COMPARE(saba(z13.VnD(), z2.VnD(), z31.VnD()), "saba z13.d, z2.d, z31.d"); 6229 COMPARE(saba(z13.VnH(), z2.VnH(), z31.VnH()), "saba z13.h, z2.h, z31.h"); 6230 COMPARE(saba(z13.VnS(), z2.VnS(), z31.VnS()), "saba z13.s, z2.s, z31.s"); 6231 COMPARE(uaba(z23.VnB(), z22.VnB(), z20.VnB()), "uaba z23.b, z22.b, z20.b"); 6232 COMPARE(uaba(z23.VnD(), z22.VnD(), z20.VnD()), "uaba z23.d, z22.d, z20.d"); 6233 COMPARE(uaba(z23.VnH(), z22.VnH(), z20.VnH()), "uaba z23.h, z22.h, z20.h"); 6234 COMPARE(uaba(z23.VnS(), z22.VnS(), z20.VnS()), "uaba z23.s, z22.s, z20.s"); 6235 6236 COMPARE_MACRO(Saba(z12.VnB(), z12.VnB(), z3.VnB(), z30.VnB()), 6237 "saba z12.b, z3.b, z30.b"); 6238 COMPARE_MACRO(Saba(z12.VnB(), z12.VnB(), z3.VnB(), z12.VnB()), 6239 "saba z12.b, z3.b, z12.b"); 6240 COMPARE_MACRO(Saba(z12.VnB(), z12.VnB(), z12.VnB(), z30.VnB()), 6241 "saba z12.b, z12.b, z30.b"); 6242 COMPARE_MACRO(Saba(z12.VnB(), z12.VnB(), z12.VnB(), z12.VnB()), ""); 6243 COMPARE_MACRO(Saba(z12.VnB(), z13.VnB(), z3.VnB(), z30.VnB()), 6244 "movprfx z12, z13\n" 6245 "saba z12.b, z3.b, z30.b"); 6246 COMPARE_MACRO(Saba(z12.VnB(), z3.VnB(), z3.VnB(), z30.VnB()), 6247 "movprfx z12, z3\n" 6248 "saba z12.b, z3.b, z30.b"); 6249 COMPARE_MACRO(Saba(z12.VnB(), z30.VnB(), z3.VnB(), z30.VnB()), 6250 "movprfx z12, z30\n" 6251 "saba z12.b, z3.b, z30.b"); 6252 COMPARE_MACRO(Saba(z12.VnB(), z3.VnB(), z3.VnB(), z3.VnB()), 6253 "mov z12.d, z3.d"); 6254 COMPARE_MACRO(Saba(z12.VnB(), z3.VnB(), z12.VnB(), z3.VnB()), 6255 "mov z31.d, z12.d\n" 6256 "movprfx z12, z3\n" 6257 "saba z12.b, z31.b, z3.b"); 6258 COMPARE_MACRO(Saba(z12.VnB(), z3.VnB(), z3.VnB(), z12.VnB()), 6259 "mov z31.d, z12.d\n" 6260 "movprfx z12, z3\n" 6261 "saba z12.b, z3.b, z31.b"); 6262 6263 COMPARE_MACRO(Uaba(z12.VnB(), z12.VnB(), z3.VnB(), z30.VnB()), 6264 "uaba z12.b, z3.b, z30.b"); 6265 COMPARE_MACRO(Uaba(z12.VnB(), z12.VnB(), z3.VnB(), z12.VnB()), 6266 "uaba z12.b, z3.b, z12.b"); 6267 COMPARE_MACRO(Uaba(z12.VnB(), z12.VnB(), z12.VnB(), z30.VnB()), 6268 "uaba z12.b, z12.b, z30.b"); 6269 COMPARE_MACRO(Uaba(z12.VnB(), z12.VnB(), z12.VnB(), z12.VnB()), ""); 6270 COMPARE_MACRO(Uaba(z12.VnB(), z13.VnB(), z3.VnB(), z30.VnB()), 6271 "movprfx z12, z13\n" 6272 "uaba z12.b, z3.b, z30.b"); 6273 COMPARE_MACRO(Uaba(z12.VnB(), z3.VnB(), z3.VnB(), z30.VnB()), 6274 "movprfx z12, z3\n" 6275 "uaba z12.b, z3.b, z30.b"); 6276 COMPARE_MACRO(Uaba(z12.VnB(), z30.VnB(), z3.VnB(), z30.VnB()), 6277 "movprfx z12, z30\n" 6278 "uaba z12.b, z3.b, z30.b"); 6279 COMPARE_MACRO(Uaba(z12.VnB(), z3.VnB(), z3.VnB(), z3.VnB()), 6280 "mov z12.d, z3.d"); 6281 COMPARE_MACRO(Uaba(z12.VnB(), z3.VnB(), z12.VnB(), z3.VnB()), 6282 "mov z31.d, z12.d\n" 6283 "movprfx z12, z3\n" 6284 "uaba z12.b, z31.b, z3.b"); 6285 COMPARE_MACRO(Uaba(z12.VnB(), z3.VnB(), z3.VnB(), z12.VnB()), 6286 "mov z31.d, z12.d\n" 6287 "movprfx z12, z3\n" 6288 "uaba z12.b, z3.b, z31.b"); 6289 6290 CLEANUP(); 6291} 6292 6293TEST(sve2_halving_arithmetic) { 6294 SETUP(); 6295 6296 COMPARE(shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB()), 6297 "shadd z20.b, p3/m, z20.b, z7.b"); 6298 COMPARE(shadd(z20.VnD(), p3.Merging(), z20.VnD(), z7.VnD()), 6299 "shadd z20.d, p3/m, z20.d, z7.d"); 6300 COMPARE(shadd(z20.VnH(), p3.Merging(), z20.VnH(), z7.VnH()), 6301 "shadd z20.h, p3/m, z20.h, z7.h"); 6302 COMPARE(shadd(z20.VnS(), p3.Merging(), z20.VnS(), z7.VnS()), 6303 "shadd z20.s, p3/m, z20.s, z7.s"); 6304 COMPARE(shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB()), 6305 "shsub z21.b, p0/m, z21.b, z0.b"); 6306 COMPARE(shsub(z21.VnD(), p0.Merging(), z21.VnD(), z0.VnD()), 6307 "shsub z21.d, p0/m, z21.d, z0.d"); 6308 COMPARE(shsub(z21.VnH(), p0.Merging(), z21.VnH(), z0.VnH()), 6309 "shsub z21.h, p0/m, z21.h, z0.h"); 6310 COMPARE(shsub(z21.VnS(), p0.Merging(), z21.VnS(), z0.VnS()), 6311 "shsub z21.s, p0/m, z21.s, z0.s"); 6312 COMPARE(shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB()), 6313 "shsubr z1.b, p0/m, z1.b, z2.b"); 6314 COMPARE(shsubr(z1.VnD(), p0.Merging(), z1.VnD(), z2.VnD()), 6315 "shsubr z1.d, p0/m, z1.d, z2.d"); 6316 COMPARE(shsubr(z1.VnH(), p0.Merging(), z1.VnH(), z2.VnH()), 6317 "shsubr z1.h, p0/m, z1.h, z2.h"); 6318 COMPARE(shsubr(z1.VnS(), p0.Merging(), z1.VnS(), z2.VnS()), 6319 "shsubr z1.s, p0/m, z1.s, z2.s"); 6320 COMPARE(srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB()), 6321 "srhadd z23.b, p4/m, z23.b, z14.b"); 6322 COMPARE(srhadd(z23.VnD(), p4.Merging(), z23.VnD(), z14.VnD()), 6323 "srhadd z23.d, p4/m, z23.d, z14.d"); 6324 COMPARE(srhadd(z23.VnH(), p4.Merging(), z23.VnH(), z14.VnH()), 6325 "srhadd z23.h, p4/m, z23.h, z14.h"); 6326 COMPARE(srhadd(z23.VnS(), p4.Merging(), z23.VnS(), z14.VnS()), 6327 "srhadd z23.s, p4/m, z23.s, z14.s"); 6328 6329 COMPARE(uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB()), 6330 "uhadd z21.b, p2/m, z21.b, z19.b"); 6331 COMPARE(uhadd(z21.VnD(), p2.Merging(), z21.VnD(), z19.VnD()), 6332 "uhadd z21.d, p2/m, z21.d, z19.d"); 6333 COMPARE(uhadd(z21.VnH(), p2.Merging(), z21.VnH(), z19.VnH()), 6334 "uhadd z21.h, p2/m, z21.h, z19.h"); 6335 COMPARE(uhadd(z21.VnS(), p2.Merging(), z21.VnS(), z19.VnS()), 6336 "uhadd z21.s, p2/m, z21.s, z19.s"); 6337 COMPARE(uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB()), 6338 "uhsub z1.b, p4/m, z1.b, z9.b"); 6339 COMPARE(uhsub(z1.VnD(), p4.Merging(), z1.VnD(), z9.VnD()), 6340 "uhsub z1.d, p4/m, z1.d, z9.d"); 6341 COMPARE(uhsub(z1.VnH(), p4.Merging(), z1.VnH(), z9.VnH()), 6342 "uhsub z1.h, p4/m, z1.h, z9.h"); 6343 COMPARE(uhsub(z1.VnS(), p4.Merging(), z1.VnS(), z9.VnS()), 6344 "uhsub z1.s, p4/m, z1.s, z9.s"); 6345 COMPARE(uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB()), 6346 "uhsubr z18.b, p0/m, z18.b, z1.b"); 6347 COMPARE(uhsubr(z18.VnD(), p0.Merging(), z18.VnD(), z1.VnD()), 6348 "uhsubr z18.d, p0/m, z18.d, z1.d"); 6349 COMPARE(uhsubr(z18.VnH(), p0.Merging(), z18.VnH(), z1.VnH()), 6350 "uhsubr z18.h, p0/m, z18.h, z1.h"); 6351 COMPARE(uhsubr(z18.VnS(), p0.Merging(), z18.VnS(), z1.VnS()), 6352 "uhsubr z18.s, p0/m, z18.s, z1.s"); 6353 COMPARE(urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB()), 6354 "urhadd z29.b, p4/m, z29.b, z10.b"); 6355 COMPARE(urhadd(z29.VnD(), p4.Merging(), z29.VnD(), z10.VnD()), 6356 "urhadd z29.d, p4/m, z29.d, z10.d"); 6357 COMPARE(urhadd(z29.VnH(), p4.Merging(), z29.VnH(), z10.VnH()), 6358 "urhadd z29.h, p4/m, z29.h, z10.h"); 6359 COMPARE(urhadd(z29.VnS(), p4.Merging(), z29.VnS(), z10.VnS()), 6360 "urhadd z29.s, p4/m, z29.s, z10.s"); 6361 6362 COMPARE_MACRO(Shadd(z0.VnB(), p0.Merging(), z1.VnB(), z2.VnB()), 6363 "movprfx z0.b, p0/m, z1.b\n" 6364 "shadd z0.b, p0/m, z0.b, z2.b"); 6365 COMPARE_MACRO(Shadd(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()), 6366 "shadd z0.b, p0/m, z0.b, z1.b"); 6367 COMPARE_MACRO(Srhadd(z0.VnB(), p0.Merging(), z1.VnB(), z2.VnB()), 6368 "movprfx z0.b, p0/m, z1.b\n" 6369 "srhadd z0.b, p0/m, z0.b, z2.b"); 6370 COMPARE_MACRO(Srhadd(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()), 6371 "srhadd z0.b, p0/m, z0.b, z1.b"); 6372 COMPARE_MACRO(Uhadd(z0.VnB(), p0.Merging(), z1.VnB(), z2.VnB()), 6373 "movprfx z0.b, p0/m, z1.b\n" 6374 "uhadd z0.b, p0/m, z0.b, z2.b"); 6375 COMPARE_MACRO(Uhadd(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()), 6376 "uhadd z0.b, p0/m, z0.b, z1.b"); 6377 COMPARE_MACRO(Urhadd(z0.VnB(), p0.Merging(), z1.VnB(), z2.VnB()), 6378 "movprfx z0.b, p0/m, z1.b\n" 6379 "urhadd z0.b, p0/m, z0.b, z2.b"); 6380 COMPARE_MACRO(Urhadd(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()), 6381 "urhadd z0.b, p0/m, z0.b, z1.b"); 6382 6383 COMPARE_MACRO(Shsub(z0.VnB(), p0.Merging(), z0.VnB(), z1.VnB()), 6384 "shsub z0.b, p0/m, z0.b, z1.b"); 6385 COMPARE_MACRO(Shsub(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()), 6386 "shsubr z0.b, p0/m, z0.b, z1.b"); 6387 COMPARE_MACRO(Uhsub(z0.VnB(), p0.Merging(), z0.VnB(), z1.VnB()), 6388 "uhsub z0.b, p0/m, z0.b, z1.b"); 6389 COMPARE_MACRO(Uhsub(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()), 6390 "uhsubr z0.b, p0/m, z0.b, z1.b"); 6391 6392 CLEANUP(); 6393} 6394 6395TEST(sve2_sra) { 6396 SETUP(); 6397 6398 COMPARE(ssra(z0.VnB(), z8.VnB(), 1), "ssra z0.b, z8.b, #1"); 6399 COMPARE(ssra(z0.VnB(), z8.VnB(), 2), "ssra z0.b, z8.b, #2"); 6400 COMPARE(ssra(z0.VnB(), z8.VnB(), 5), "ssra z0.b, z8.b, #5"); 6401 COMPARE(ssra(z0.VnB(), z8.VnB(), 8), "ssra z0.b, z8.b, #8"); 6402 COMPARE(ssra(z0.VnH(), z8.VnH(), 1), "ssra z0.h, z8.h, #1"); 6403 COMPARE(ssra(z0.VnH(), z8.VnH(), 16), "ssra z0.h, z8.h, #16"); 6404 COMPARE(ssra(z0.VnS(), z8.VnS(), 1), "ssra z0.s, z8.s, #1"); 6405 COMPARE(ssra(z0.VnS(), z8.VnS(), 31), "ssra z0.s, z8.s, #31"); 6406 COMPARE(ssra(z0.VnD(), z8.VnD(), 1), "ssra z0.d, z8.d, #1"); 6407 COMPARE(ssra(z0.VnD(), z8.VnD(), 64), "ssra z0.d, z8.d, #64"); 6408 6409 COMPARE(srsra(z0.VnB(), z8.VnB(), 1), "srsra z0.b, z8.b, #1"); 6410 COMPARE(srsra(z0.VnB(), z8.VnB(), 2), "srsra z0.b, z8.b, #2"); 6411 COMPARE(srsra(z0.VnB(), z8.VnB(), 5), "srsra z0.b, z8.b, #5"); 6412 COMPARE(srsra(z0.VnB(), z8.VnB(), 8), "srsra z0.b, z8.b, #8"); 6413 COMPARE(srsra(z0.VnH(), z8.VnH(), 1), "srsra z0.h, z8.h, #1"); 6414 COMPARE(srsra(z0.VnH(), z8.VnH(), 16), "srsra z0.h, z8.h, #16"); 6415 COMPARE(srsra(z0.VnS(), z8.VnS(), 1), "srsra z0.s, z8.s, #1"); 6416 COMPARE(srsra(z0.VnS(), z8.VnS(), 31), "srsra z0.s, z8.s, #31"); 6417 COMPARE(srsra(z0.VnD(), z8.VnD(), 1), "srsra z0.d, z8.d, #1"); 6418 COMPARE(srsra(z0.VnD(), z8.VnD(), 64), "srsra z0.d, z8.d, #64"); 6419 6420 COMPARE(usra(z0.VnB(), z8.VnB(), 1), "usra z0.b, z8.b, #1"); 6421 COMPARE(usra(z0.VnB(), z8.VnB(), 2), "usra z0.b, z8.b, #2"); 6422 COMPARE(usra(z0.VnB(), z8.VnB(), 5), "usra z0.b, z8.b, #5"); 6423 COMPARE(usra(z0.VnB(), z8.VnB(), 8), "usra z0.b, z8.b, #8"); 6424 COMPARE(usra(z0.VnH(), z8.VnH(), 1), "usra z0.h, z8.h, #1"); 6425 COMPARE(usra(z0.VnH(), z8.VnH(), 16), "usra z0.h, z8.h, #16"); 6426 COMPARE(usra(z0.VnS(), z8.VnS(), 1), "usra z0.s, z8.s, #1"); 6427 COMPARE(usra(z0.VnS(), z8.VnS(), 31), "usra z0.s, z8.s, #31"); 6428 COMPARE(usra(z0.VnD(), z8.VnD(), 1), "usra z0.d, z8.d, #1"); 6429 COMPARE(usra(z0.VnD(), z8.VnD(), 64), "usra z0.d, z8.d, #64"); 6430 6431 COMPARE(ursra(z0.VnB(), z8.VnB(), 1), "ursra z0.b, z8.b, #1"); 6432 COMPARE(ursra(z0.VnB(), z8.VnB(), 2), "ursra z0.b, z8.b, #2"); 6433 COMPARE(ursra(z0.VnB(), z8.VnB(), 5), "ursra z0.b, z8.b, #5"); 6434 COMPARE(ursra(z0.VnB(), z8.VnB(), 8), "ursra z0.b, z8.b, #8"); 6435 COMPARE(ursra(z0.VnH(), z8.VnH(), 1), "ursra z0.h, z8.h, #1"); 6436 COMPARE(ursra(z0.VnH(), z8.VnH(), 16), "ursra z0.h, z8.h, #16"); 6437 COMPARE(ursra(z0.VnS(), z8.VnS(), 1), "ursra z0.s, z8.s, #1"); 6438 COMPARE(ursra(z0.VnS(), z8.VnS(), 31), "ursra z0.s, z8.s, #31"); 6439 COMPARE(ursra(z0.VnD(), z8.VnD(), 1), "ursra z0.d, z8.d, #1"); 6440 COMPARE(ursra(z0.VnD(), z8.VnD(), 64), "ursra z0.d, z8.d, #64"); 6441 6442 COMPARE_MACRO(Ssra(z0.VnB(), z1.VnB(), z2.VnB(), 2), 6443 "movprfx z0, z1\n" 6444 "ssra z0.b, z2.b, #2"); 6445 COMPARE_MACRO(Ssra(z0.VnB(), z0.VnB(), z2.VnB(), 2), "ssra z0.b, z2.b, #2"); 6446 COMPARE_MACRO(Ssra(z0.VnB(), z1.VnB(), z1.VnB(), 2), 6447 "movprfx z0, z1\n" 6448 "ssra z0.b, z1.b, #2"); 6449 COMPARE_MACRO(Ssra(z2.VnB(), z1.VnB(), z2.VnB(), 2), 6450 "mov z31.d, z2.d\n" 6451 "movprfx z2, z1\n" 6452 "ssra z2.b, z31.b, #2"); 6453 COMPARE_MACRO(Ssra(z0.VnB(), z0.VnB(), z0.VnB(), 2), "ssra z0.b, z0.b, #2"); 6454 6455 COMPARE_MACRO(Srsra(z2.VnB(), z1.VnB(), z2.VnB(), 2), 6456 "mov z31.d, z2.d\n" 6457 "movprfx z2, z1\n" 6458 "srsra z2.b, z31.b, #2"); 6459 COMPARE_MACRO(Usra(z2.VnB(), z1.VnB(), z2.VnB(), 2), 6460 "mov z31.d, z2.d\n" 6461 "movprfx z2, z1\n" 6462 "usra z2.b, z31.b, #2"); 6463 COMPARE_MACRO(Ursra(z2.VnB(), z1.VnB(), z2.VnB(), 2), 6464 "mov z31.d, z2.d\n" 6465 "movprfx z2, z1\n" 6466 "ursra z2.b, z31.b, #2"); 6467 CLEANUP(); 6468} 6469 6470TEST(sve2_sri_sli) { 6471 SETUP(); 6472 6473 COMPARE(sri(z6.VnB(), z9.VnB(), 1), "sri z6.b, z9.b, #1"); 6474 COMPARE(sri(z6.VnB(), z9.VnB(), 2), "sri z6.b, z9.b, #2"); 6475 COMPARE(sri(z6.VnB(), z9.VnB(), 5), "sri z6.b, z9.b, #5"); 6476 COMPARE(sri(z6.VnB(), z9.VnB(), 8), "sri z6.b, z9.b, #8"); 6477 COMPARE(sri(z6.VnH(), z9.VnH(), 1), "sri z6.h, z9.h, #1"); 6478 COMPARE(sri(z6.VnH(), z9.VnH(), 16), "sri z6.h, z9.h, #16"); 6479 COMPARE(sri(z6.VnS(), z9.VnS(), 1), "sri z6.s, z9.s, #1"); 6480 COMPARE(sri(z6.VnS(), z9.VnS(), 31), "sri z6.s, z9.s, #31"); 6481 COMPARE(sri(z6.VnD(), z9.VnD(), 1), "sri z6.d, z9.d, #1"); 6482 COMPARE(sri(z6.VnD(), z9.VnD(), 64), "sri z6.d, z9.d, #64"); 6483 6484 COMPARE(sli(z29.VnB(), z7.VnB(), 0), "sli z29.b, z7.b, #0"); 6485 COMPARE(sli(z29.VnB(), z7.VnB(), 2), "sli z29.b, z7.b, #2"); 6486 COMPARE(sli(z29.VnB(), z7.VnB(), 5), "sli z29.b, z7.b, #5"); 6487 COMPARE(sli(z29.VnB(), z7.VnB(), 7), "sli z29.b, z7.b, #7"); 6488 COMPARE(sli(z29.VnH(), z7.VnH(), 0), "sli z29.h, z7.h, #0"); 6489 COMPARE(sli(z29.VnH(), z7.VnH(), 15), "sli z29.h, z7.h, #15"); 6490 COMPARE(sli(z29.VnS(), z7.VnS(), 0), "sli z29.s, z7.s, #0"); 6491 COMPARE(sli(z29.VnS(), z7.VnS(), 31), "sli z29.s, z7.s, #31"); 6492 COMPARE(sli(z29.VnD(), z7.VnD(), 0), "sli z29.d, z7.d, #0"); 6493 COMPARE(sli(z29.VnD(), z7.VnD(), 63), "sli z29.d, z7.d, #63"); 6494 6495 CLEANUP(); 6496} 6497 6498TEST(sve2_shift_imm) { 6499 SETUP(); 6500 6501 COMPARE(sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0), 6502 "sqshl z0.b, p5/m, z0.b, #0"); 6503 COMPARE(sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 2), 6504 "sqshl z0.b, p5/m, z0.b, #2"); 6505 COMPARE(sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 5), 6506 "sqshl z0.b, p5/m, z0.b, #5"); 6507 COMPARE(sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 7), 6508 "sqshl z0.b, p5/m, z0.b, #7"); 6509 COMPARE(sqshl(z0.VnH(), p5.Merging(), z0.VnH(), 0), 6510 "sqshl z0.h, p5/m, z0.h, #0"); 6511 COMPARE(sqshl(z0.VnH(), p5.Merging(), z0.VnH(), 15), 6512 "sqshl z0.h, p5/m, z0.h, #15"); 6513 COMPARE(sqshl(z0.VnS(), p5.Merging(), z0.VnS(), 0), 6514 "sqshl z0.s, p5/m, z0.s, #0"); 6515 COMPARE(sqshl(z0.VnS(), p5.Merging(), z0.VnS(), 31), 6516 "sqshl z0.s, p5/m, z0.s, #31"); 6517 COMPARE(sqshl(z0.VnD(), p5.Merging(), z0.VnD(), 0), 6518 "sqshl z0.d, p5/m, z0.d, #0"); 6519 COMPARE(sqshl(z0.VnD(), p5.Merging(), z0.VnD(), 63), 6520 "sqshl z0.d, p5/m, z0.d, #63"); 6521 6522 COMPARE(sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0), 6523 "sqshlu z10.b, p1/m, z10.b, #0"); 6524 COMPARE(sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 2), 6525 "sqshlu z10.b, p1/m, z10.b, #2"); 6526 COMPARE(sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 5), 6527 "sqshlu z10.b, p1/m, z10.b, #5"); 6528 COMPARE(sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 7), 6529 "sqshlu z10.b, p1/m, z10.b, #7"); 6530 COMPARE(sqshlu(z10.VnH(), p1.Merging(), z10.VnH(), 0), 6531 "sqshlu z10.h, p1/m, z10.h, #0"); 6532 COMPARE(sqshlu(z10.VnH(), p1.Merging(), z10.VnH(), 15), 6533 "sqshlu z10.h, p1/m, z10.h, #15"); 6534 COMPARE(sqshlu(z10.VnS(), p1.Merging(), z10.VnS(), 0), 6535 "sqshlu z10.s, p1/m, z10.s, #0"); 6536 COMPARE(sqshlu(z10.VnS(), p1.Merging(), z10.VnS(), 31), 6537 "sqshlu z10.s, p1/m, z10.s, #31"); 6538 COMPARE(sqshlu(z10.VnD(), p1.Merging(), z10.VnD(), 0), 6539 "sqshlu z10.d, p1/m, z10.d, #0"); 6540 COMPARE(sqshlu(z10.VnD(), p1.Merging(), z10.VnD(), 63), 6541 "sqshlu z10.d, p1/m, z10.d, #63"); 6542 6543 COMPARE(srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1), 6544 "srshr z12.b, p0/m, z12.b, #1"); 6545 COMPARE(srshr(z12.VnB(), p0.Merging(), z12.VnB(), 2), 6546 "srshr z12.b, p0/m, z12.b, #2"); 6547 COMPARE(srshr(z12.VnB(), p0.Merging(), z12.VnB(), 5), 6548 "srshr z12.b, p0/m, z12.b, #5"); 6549 COMPARE(srshr(z12.VnB(), p0.Merging(), z12.VnB(), 8), 6550 "srshr z12.b, p0/m, z12.b, #8"); 6551 COMPARE(srshr(z12.VnH(), p0.Merging(), z12.VnH(), 1), 6552 "srshr z12.h, p0/m, z12.h, #1"); 6553 COMPARE(srshr(z12.VnH(), p0.Merging(), z12.VnH(), 16), 6554 "srshr z12.h, p0/m, z12.h, #16"); 6555 COMPARE(srshr(z12.VnS(), p0.Merging(), z12.VnS(), 1), 6556 "srshr z12.s, p0/m, z12.s, #1"); 6557 COMPARE(srshr(z12.VnS(), p0.Merging(), z12.VnS(), 32), 6558 "srshr z12.s, p0/m, z12.s, #32"); 6559 COMPARE(srshr(z12.VnD(), p0.Merging(), z12.VnD(), 1), 6560 "srshr z12.d, p0/m, z12.d, #1"); 6561 COMPARE(srshr(z12.VnD(), p0.Merging(), z12.VnD(), 64), 6562 "srshr z12.d, p0/m, z12.d, #64"); 6563 6564 COMPARE(uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0), 6565 "uqshl z29.b, p7/m, z29.b, #0"); 6566 COMPARE(uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 2), 6567 "uqshl z29.b, p7/m, z29.b, #2"); 6568 COMPARE(uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 5), 6569 "uqshl z29.b, p7/m, z29.b, #5"); 6570 COMPARE(uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 7), 6571 "uqshl z29.b, p7/m, z29.b, #7"); 6572 COMPARE(uqshl(z29.VnH(), p7.Merging(), z29.VnH(), 0), 6573 "uqshl z29.h, p7/m, z29.h, #0"); 6574 COMPARE(uqshl(z29.VnH(), p7.Merging(), z29.VnH(), 15), 6575 "uqshl z29.h, p7/m, z29.h, #15"); 6576 COMPARE(uqshl(z29.VnS(), p7.Merging(), z29.VnS(), 0), 6577 "uqshl z29.s, p7/m, z29.s, #0"); 6578 COMPARE(uqshl(z29.VnS(), p7.Merging(), z29.VnS(), 31), 6579 "uqshl z29.s, p7/m, z29.s, #31"); 6580 COMPARE(uqshl(z29.VnD(), p7.Merging(), z29.VnD(), 0), 6581 "uqshl z29.d, p7/m, z29.d, #0"); 6582 COMPARE(uqshl(z29.VnD(), p7.Merging(), z29.VnD(), 63), 6583 "uqshl z29.d, p7/m, z29.d, #63"); 6584 6585 COMPARE(urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1), 6586 "urshr z31.b, p2/m, z31.b, #1"); 6587 COMPARE(urshr(z31.VnB(), p2.Merging(), z31.VnB(), 2), 6588 "urshr z31.b, p2/m, z31.b, #2"); 6589 COMPARE(urshr(z31.VnB(), p2.Merging(), z31.VnB(), 5), 6590 "urshr z31.b, p2/m, z31.b, #5"); 6591 COMPARE(urshr(z31.VnB(), p2.Merging(), z31.VnB(), 8), 6592 "urshr z31.b, p2/m, z31.b, #8"); 6593 COMPARE(urshr(z31.VnH(), p2.Merging(), z31.VnH(), 1), 6594 "urshr z31.h, p2/m, z31.h, #1"); 6595 COMPARE(urshr(z31.VnH(), p2.Merging(), z31.VnH(), 16), 6596 "urshr z31.h, p2/m, z31.h, #16"); 6597 COMPARE(urshr(z31.VnS(), p2.Merging(), z31.VnS(), 1), 6598 "urshr z31.s, p2/m, z31.s, #1"); 6599 COMPARE(urshr(z31.VnS(), p2.Merging(), z31.VnS(), 32), 6600 "urshr z31.s, p2/m, z31.s, #32"); 6601 COMPARE(urshr(z31.VnD(), p2.Merging(), z31.VnD(), 1), 6602 "urshr z31.d, p2/m, z31.d, #1"); 6603 COMPARE(urshr(z31.VnD(), p2.Merging(), z31.VnD(), 64), 6604 "urshr z31.d, p2/m, z31.d, #64"); 6605 6606 CLEANUP(); 6607} 6608 6609TEST(sve2_shift_sat) { 6610 SETUP(); 6611 6612 COMPARE(srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB()), 6613 "srshl z31.b, p7/m, z31.b, z3.b"); 6614 COMPARE(srshl(z31.VnD(), p7.Merging(), z31.VnD(), z3.VnD()), 6615 "srshl z31.d, p7/m, z31.d, z3.d"); 6616 COMPARE(srshl(z31.VnH(), p7.Merging(), z31.VnH(), z3.VnH()), 6617 "srshl z31.h, p7/m, z31.h, z3.h"); 6618 COMPARE(srshl(z31.VnS(), p7.Merging(), z31.VnS(), z3.VnS()), 6619 "srshl z31.s, p7/m, z31.s, z3.s"); 6620 6621 COMPARE(srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB()), 6622 "srshlr z16.b, p7/m, z16.b, z29.b"); 6623 COMPARE(srshlr(z16.VnD(), p7.Merging(), z16.VnD(), z29.VnD()), 6624 "srshlr z16.d, p7/m, z16.d, z29.d"); 6625 COMPARE(srshlr(z16.VnH(), p7.Merging(), z16.VnH(), z29.VnH()), 6626 "srshlr z16.h, p7/m, z16.h, z29.h"); 6627 COMPARE(srshlr(z16.VnS(), p7.Merging(), z16.VnS(), z29.VnS()), 6628 "srshlr z16.s, p7/m, z16.s, z29.s"); 6629 6630 COMPARE(urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB()), 6631 "urshl z15.b, p2/m, z15.b, z3.b"); 6632 COMPARE(urshl(z15.VnD(), p2.Merging(), z15.VnD(), z3.VnD()), 6633 "urshl z15.d, p2/m, z15.d, z3.d"); 6634 COMPARE(urshl(z15.VnH(), p2.Merging(), z15.VnH(), z3.VnH()), 6635 "urshl z15.h, p2/m, z15.h, z3.h"); 6636 COMPARE(urshl(z15.VnS(), p2.Merging(), z15.VnS(), z3.VnS()), 6637 "urshl z15.s, p2/m, z15.s, z3.s"); 6638 6639 COMPARE(urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB()), 6640 "urshlr z27.b, p1/m, z27.b, z30.b"); 6641 COMPARE(urshlr(z27.VnD(), p1.Merging(), z27.VnD(), z30.VnD()), 6642 "urshlr z27.d, p1/m, z27.d, z30.d"); 6643 COMPARE(urshlr(z27.VnH(), p1.Merging(), z27.VnH(), z30.VnH()), 6644 "urshlr z27.h, p1/m, z27.h, z30.h"); 6645 COMPARE(urshlr(z27.VnS(), p1.Merging(), z27.VnS(), z30.VnS()), 6646 "urshlr z27.s, p1/m, z27.s, z30.s"); 6647 6648 COMPARE(sqshl(z22.VnB(), p4.Merging(), z22.VnB(), z21.VnB()), 6649 "sqshl z22.b, p4/m, z22.b, z21.b"); 6650 COMPARE(sqshl(z22.VnD(), p4.Merging(), z22.VnD(), z21.VnD()), 6651 "sqshl z22.d, p4/m, z22.d, z21.d"); 6652 COMPARE(sqshl(z22.VnH(), p4.Merging(), z22.VnH(), z21.VnH()), 6653 "sqshl z22.h, p4/m, z22.h, z21.h"); 6654 COMPARE(sqshl(z22.VnS(), p4.Merging(), z22.VnS(), z21.VnS()), 6655 "sqshl z22.s, p4/m, z22.s, z21.s"); 6656 6657 COMPARE(sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB()), 6658 "sqshlr z7.b, p3/m, z7.b, z5.b"); 6659 COMPARE(sqshlr(z7.VnD(), p3.Merging(), z7.VnD(), z5.VnD()), 6660 "sqshlr z7.d, p3/m, z7.d, z5.d"); 6661 COMPARE(sqshlr(z7.VnH(), p3.Merging(), z7.VnH(), z5.VnH()), 6662 "sqshlr z7.h, p3/m, z7.h, z5.h"); 6663 COMPARE(sqshlr(z7.VnS(), p3.Merging(), z7.VnS(), z5.VnS()), 6664 "sqshlr z7.s, p3/m, z7.s, z5.s"); 6665 6666 COMPARE(uqshl(z10.VnB(), p0.Merging(), z10.VnB(), z21.VnB()), 6667 "uqshl z10.b, p0/m, z10.b, z21.b"); 6668 COMPARE(uqshl(z10.VnD(), p0.Merging(), z10.VnD(), z21.VnD()), 6669 "uqshl z10.d, p0/m, z10.d, z21.d"); 6670 COMPARE(uqshl(z10.VnH(), p0.Merging(), z10.VnH(), z21.VnH()), 6671 "uqshl z10.h, p0/m, z10.h, z21.h"); 6672 COMPARE(uqshl(z10.VnS(), p0.Merging(), z10.VnS(), z21.VnS()), 6673 "uqshl z10.s, p0/m, z10.s, z21.s"); 6674 6675 COMPARE(uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z12.VnB()), 6676 "uqshlr z12.b, p1/m, z12.b, z12.b"); 6677 COMPARE(uqshlr(z12.VnD(), p1.Merging(), z12.VnD(), z12.VnD()), 6678 "uqshlr z12.d, p1/m, z12.d, z12.d"); 6679 COMPARE(uqshlr(z12.VnH(), p1.Merging(), z12.VnH(), z12.VnH()), 6680 "uqshlr z12.h, p1/m, z12.h, z12.h"); 6681 COMPARE(uqshlr(z12.VnS(), p1.Merging(), z12.VnS(), z12.VnS()), 6682 "uqshlr z12.s, p1/m, z12.s, z12.s"); 6683 6684 COMPARE(sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB()), 6685 "sqrshl z31.b, p5/m, z31.b, z27.b"); 6686 COMPARE(sqrshl(z31.VnD(), p5.Merging(), z31.VnD(), z27.VnD()), 6687 "sqrshl z31.d, p5/m, z31.d, z27.d"); 6688 COMPARE(sqrshl(z31.VnH(), p5.Merging(), z31.VnH(), z27.VnH()), 6689 "sqrshl z31.h, p5/m, z31.h, z27.h"); 6690 COMPARE(sqrshl(z31.VnS(), p5.Merging(), z31.VnS(), z27.VnS()), 6691 "sqrshl z31.s, p5/m, z31.s, z27.s"); 6692 6693 COMPARE(sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB()), 6694 "sqrshlr z25.b, p6/m, z25.b, z7.b"); 6695 COMPARE(sqrshlr(z25.VnD(), p6.Merging(), z25.VnD(), z7.VnD()), 6696 "sqrshlr z25.d, p6/m, z25.d, z7.d"); 6697 COMPARE(sqrshlr(z25.VnH(), p6.Merging(), z25.VnH(), z7.VnH()), 6698 "sqrshlr z25.h, p6/m, z25.h, z7.h"); 6699 COMPARE(sqrshlr(z25.VnS(), p6.Merging(), z25.VnS(), z7.VnS()), 6700 "sqrshlr z25.s, p6/m, z25.s, z7.s"); 6701 6702 COMPARE(uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB()), 6703 "uqrshl z20.b, p1/m, z20.b, z30.b"); 6704 COMPARE(uqrshl(z20.VnD(), p1.Merging(), z20.VnD(), z30.VnD()), 6705 "uqrshl z20.d, p1/m, z20.d, z30.d"); 6706 COMPARE(uqrshl(z20.VnH(), p1.Merging(), z20.VnH(), z30.VnH()), 6707 "uqrshl z20.h, p1/m, z20.h, z30.h"); 6708 COMPARE(uqrshl(z20.VnS(), p1.Merging(), z20.VnS(), z30.VnS()), 6709 "uqrshl z20.s, p1/m, z20.s, z30.s"); 6710 6711 COMPARE(uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB()), 6712 "uqrshlr z8.b, p5/m, z8.b, z9.b"); 6713 COMPARE(uqrshlr(z8.VnD(), p5.Merging(), z8.VnD(), z9.VnD()), 6714 "uqrshlr z8.d, p5/m, z8.d, z9.d"); 6715 COMPARE(uqrshlr(z8.VnH(), p5.Merging(), z8.VnH(), z9.VnH()), 6716 "uqrshlr z8.h, p5/m, z8.h, z9.h"); 6717 COMPARE(uqrshlr(z8.VnS(), p5.Merging(), z8.VnS(), z9.VnS()), 6718 "uqrshlr z8.s, p5/m, z8.s, z9.s"); 6719 6720 CLEANUP(); 6721} 6722 6723TEST(sve2_sat_arith) { 6724 SETUP(); 6725 6726 COMPARE(sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB()), 6727 "sqadd z28.b, p0/m, z28.b, z3.b"); 6728 COMPARE(sqadd(z28.VnD(), p0.Merging(), z28.VnD(), z3.VnD()), 6729 "sqadd z28.d, p0/m, z28.d, z3.d"); 6730 COMPARE(sqadd(z28.VnH(), p0.Merging(), z28.VnH(), z3.VnH()), 6731 "sqadd z28.h, p0/m, z28.h, z3.h"); 6732 COMPARE(sqadd(z28.VnS(), p0.Merging(), z28.VnS(), z3.VnS()), 6733 "sqadd z28.s, p0/m, z28.s, z3.s"); 6734 COMPARE(sqsub(z6.VnB(), p0.Merging(), z6.VnB(), z12.VnB()), 6735 "sqsub z6.b, p0/m, z6.b, z12.b"); 6736 COMPARE(sqsub(z6.VnD(), p0.Merging(), z6.VnD(), z12.VnD()), 6737 "sqsub z6.d, p0/m, z6.d, z12.d"); 6738 COMPARE(sqsub(z6.VnH(), p0.Merging(), z6.VnH(), z12.VnH()), 6739 "sqsub z6.h, p0/m, z6.h, z12.h"); 6740 COMPARE(sqsub(z6.VnS(), p0.Merging(), z6.VnS(), z12.VnS()), 6741 "sqsub z6.s, p0/m, z6.s, z12.s"); 6742 COMPARE(sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()), 6743 "sqsubr z16.b, p7/m, z16.b, z22.b"); 6744 COMPARE(sqsubr(z16.VnD(), p7.Merging(), z16.VnD(), z22.VnD()), 6745 "sqsubr z16.d, p7/m, z16.d, z22.d"); 6746 COMPARE(sqsubr(z16.VnH(), p7.Merging(), z16.VnH(), z22.VnH()), 6747 "sqsubr z16.h, p7/m, z16.h, z22.h"); 6748 COMPARE(sqsubr(z16.VnS(), p7.Merging(), z16.VnS(), z22.VnS()), 6749 "sqsubr z16.s, p7/m, z16.s, z22.s"); 6750 COMPARE(suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB()), 6751 "suqadd z26.b, p2/m, z26.b, z28.b"); 6752 COMPARE(suqadd(z26.VnD(), p2.Merging(), z26.VnD(), z28.VnD()), 6753 "suqadd z26.d, p2/m, z26.d, z28.d"); 6754 COMPARE(suqadd(z26.VnH(), p2.Merging(), z26.VnH(), z28.VnH()), 6755 "suqadd z26.h, p2/m, z26.h, z28.h"); 6756 COMPARE(suqadd(z26.VnS(), p2.Merging(), z26.VnS(), z28.VnS()), 6757 "suqadd z26.s, p2/m, z26.s, z28.s"); 6758 COMPARE(usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB()), 6759 "usqadd z25.b, p4/m, z25.b, z6.b"); 6760 COMPARE(usqadd(z25.VnD(), p4.Merging(), z25.VnD(), z6.VnD()), 6761 "usqadd z25.d, p4/m, z25.d, z6.d"); 6762 COMPARE(usqadd(z25.VnH(), p4.Merging(), z25.VnH(), z6.VnH()), 6763 "usqadd z25.h, p4/m, z25.h, z6.h"); 6764 COMPARE(usqadd(z25.VnS(), p4.Merging(), z25.VnS(), z6.VnS()), 6765 "usqadd z25.s, p4/m, z25.s, z6.s"); 6766 COMPARE(uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()), 6767 "uqadd z24.b, p7/m, z24.b, z1.b"); 6768 COMPARE(uqadd(z24.VnD(), p7.Merging(), z24.VnD(), z1.VnD()), 6769 "uqadd z24.d, p7/m, z24.d, z1.d"); 6770 COMPARE(uqadd(z24.VnH(), p7.Merging(), z24.VnH(), z1.VnH()), 6771 "uqadd z24.h, p7/m, z24.h, z1.h"); 6772 COMPARE(uqadd(z24.VnS(), p7.Merging(), z24.VnS(), z1.VnS()), 6773 "uqadd z24.s, p7/m, z24.s, z1.s"); 6774 COMPARE(uqsub(z10.VnB(), p3.Merging(), z10.VnB(), z1.VnB()), 6775 "uqsub z10.b, p3/m, z10.b, z1.b"); 6776 COMPARE(uqsub(z10.VnD(), p3.Merging(), z10.VnD(), z1.VnD()), 6777 "uqsub z10.d, p3/m, z10.d, z1.d"); 6778 COMPARE(uqsub(z10.VnH(), p3.Merging(), z10.VnH(), z1.VnH()), 6779 "uqsub z10.h, p3/m, z10.h, z1.h"); 6780 COMPARE(uqsub(z10.VnS(), p3.Merging(), z10.VnS(), z1.VnS()), 6781 "uqsub z10.s, p3/m, z10.s, z1.s"); 6782 COMPARE(uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()), 6783 "uqsubr z20.b, p0/m, z20.b, z6.b"); 6784 COMPARE(uqsubr(z20.VnD(), p0.Merging(), z20.VnD(), z6.VnD()), 6785 "uqsubr z20.d, p0/m, z20.d, z6.d"); 6786 COMPARE(uqsubr(z20.VnH(), p0.Merging(), z20.VnH(), z6.VnH()), 6787 "uqsubr z20.h, p0/m, z20.h, z6.h"); 6788 COMPARE(uqsubr(z20.VnS(), p0.Merging(), z20.VnS(), z6.VnS()), 6789 "uqsubr z20.s, p0/m, z20.s, z6.s"); 6790 6791 COMPARE_MACRO(Sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()), 6792 "sqadd z28.b, p0/m, z28.b, z28.b"); 6793 COMPARE_MACRO(Sqadd(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()), 6794 "movprfx z29.b, p0/m, z28.b\n" 6795 "sqadd z29.b, p0/m, z29.b, z3.b"); 6796 COMPARE_MACRO(Sqadd(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()), 6797 "sqadd z29.b, p0/m, z29.b, z28.b"); 6798 COMPARE_MACRO(Uqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()), 6799 "uqadd z28.b, p0/m, z28.b, z28.b"); 6800 COMPARE_MACRO(Uqadd(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()), 6801 "movprfx z29.b, p0/m, z28.b\n" 6802 "uqadd z29.b, p0/m, z29.b, z3.b"); 6803 COMPARE_MACRO(Uqadd(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()), 6804 "uqadd z29.b, p0/m, z29.b, z28.b"); 6805 6806 COMPARE_MACRO(Sqsub(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()), 6807 "sqsub z28.b, p0/m, z28.b, z28.b"); 6808 COMPARE_MACRO(Sqsub(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()), 6809 "movprfx z29.b, p0/m, z28.b\n" 6810 "sqsub z29.b, p0/m, z29.b, z3.b"); 6811 COMPARE_MACRO(Sqsub(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()), 6812 "sqsubr z29.b, p0/m, z29.b, z28.b"); 6813 COMPARE_MACRO(Uqsub(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()), 6814 "uqsub z28.b, p0/m, z28.b, z28.b"); 6815 COMPARE_MACRO(Uqsub(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()), 6816 "movprfx z29.b, p0/m, z28.b\n" 6817 "uqsub z29.b, p0/m, z29.b, z3.b"); 6818 COMPARE_MACRO(Uqsub(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()), 6819 "uqsubr z29.b, p0/m, z29.b, z28.b"); 6820 6821 COMPARE_MACRO(Suqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()), 6822 "suqadd z28.b, p0/m, z28.b, z28.b"); 6823 COMPARE_MACRO(Suqadd(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()), 6824 "movprfx z29.b, p0/m, z28.b\n" 6825 "suqadd z29.b, p0/m, z29.b, z3.b"); 6826 COMPARE_MACRO(Suqadd(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()), 6827 "mov z31.d, z29.d\n" 6828 "movprfx z29.b, p0/m, z28.b\n" 6829 "suqadd z29.b, p0/m, z29.b, z31.b"); 6830 COMPARE_MACRO(Usqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()), 6831 "usqadd z28.b, p0/m, z28.b, z28.b"); 6832 COMPARE_MACRO(Usqadd(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()), 6833 "movprfx z29.b, p0/m, z28.b\n" 6834 "usqadd z29.b, p0/m, z29.b, z3.b"); 6835 COMPARE_MACRO(Usqadd(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()), 6836 "mov z31.d, z29.d\n" 6837 "movprfx z29.b, p0/m, z28.b\n" 6838 "usqadd z29.b, p0/m, z29.b, z31.b"); 6839 6840 CLEANUP(); 6841} 6842 6843TEST(sve2_pair_arith) { 6844 SETUP(); 6845 6846 COMPARE(addp(z3.VnB(), p1.Merging(), z3.VnB(), z0.VnB()), 6847 "addp z3.b, p1/m, z3.b, z0.b"); 6848 COMPARE(addp(z3.VnD(), p1.Merging(), z3.VnD(), z0.VnD()), 6849 "addp z3.d, p1/m, z3.d, z0.d"); 6850 COMPARE(addp(z3.VnH(), p1.Merging(), z3.VnH(), z0.VnH()), 6851 "addp z3.h, p1/m, z3.h, z0.h"); 6852 COMPARE(addp(z3.VnS(), p1.Merging(), z3.VnS(), z0.VnS()), 6853 "addp z3.s, p1/m, z3.s, z0.s"); 6854 COMPARE(smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB()), 6855 "smaxp z5.b, p4/m, z5.b, z10.b"); 6856 COMPARE(smaxp(z5.VnD(), p4.Merging(), z5.VnD(), z10.VnD()), 6857 "smaxp z5.d, p4/m, z5.d, z10.d"); 6858 COMPARE(smaxp(z5.VnH(), p4.Merging(), z5.VnH(), z10.VnH()), 6859 "smaxp z5.h, p4/m, z5.h, z10.h"); 6860 COMPARE(smaxp(z5.VnS(), p4.Merging(), z5.VnS(), z10.VnS()), 6861 "smaxp z5.s, p4/m, z5.s, z10.s"); 6862 COMPARE(sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB()), 6863 "sminp z27.b, p3/m, z27.b, z1.b"); 6864 COMPARE(sminp(z27.VnD(), p3.Merging(), z27.VnD(), z1.VnD()), 6865 "sminp z27.d, p3/m, z27.d, z1.d"); 6866 COMPARE(sminp(z27.VnH(), p3.Merging(), z27.VnH(), z1.VnH()), 6867 "sminp z27.h, p3/m, z27.h, z1.h"); 6868 COMPARE(sminp(z27.VnS(), p3.Merging(), z27.VnS(), z1.VnS()), 6869 "sminp z27.s, p3/m, z27.s, z1.s"); 6870 COMPARE(umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB()), 6871 "umaxp z7.b, p2/m, z7.b, z23.b"); 6872 COMPARE(umaxp(z7.VnD(), p2.Merging(), z7.VnD(), z23.VnD()), 6873 "umaxp z7.d, p2/m, z7.d, z23.d"); 6874 COMPARE(umaxp(z7.VnH(), p2.Merging(), z7.VnH(), z23.VnH()), 6875 "umaxp z7.h, p2/m, z7.h, z23.h"); 6876 COMPARE(umaxp(z7.VnS(), p2.Merging(), z7.VnS(), z23.VnS()), 6877 "umaxp z7.s, p2/m, z7.s, z23.s"); 6878 COMPARE(uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB()), 6879 "uminp z10.b, p0/m, z10.b, z22.b"); 6880 COMPARE(uminp(z10.VnD(), p0.Merging(), z10.VnD(), z22.VnD()), 6881 "uminp z10.d, p0/m, z10.d, z22.d"); 6882 COMPARE(uminp(z10.VnH(), p0.Merging(), z10.VnH(), z22.VnH()), 6883 "uminp z10.h, p0/m, z10.h, z22.h"); 6884 COMPARE(uminp(z10.VnS(), p0.Merging(), z10.VnS(), z22.VnS()), 6885 "uminp z10.s, p0/m, z10.s, z22.s"); 6886 6887 COMPARE_MACRO(Addp(z3.VnB(), p1.Merging(), z3.VnB(), z3.VnB()), 6888 "addp z3.b, p1/m, z3.b, z3.b"); 6889 COMPARE_MACRO(Addp(z4.VnB(), p1.Merging(), z3.VnB(), z3.VnB()), 6890 "movprfx z4.b, p1/m, z3.b\n" 6891 "addp z4.b, p1/m, z4.b, z3.b"); 6892 COMPARE_MACRO(Addp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()), 6893 "mov z31.d, z4.d\n" 6894 "movprfx z4.b, p1/m, z3.b\n" 6895 "addp z4.b, p1/m, z4.b, z31.b"); 6896 COMPARE_MACRO(Smaxp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()), 6897 "mov z31.d, z4.d\n" 6898 "movprfx z4.b, p1/m, z3.b\n" 6899 "smaxp z4.b, p1/m, z4.b, z31.b"); 6900 COMPARE_MACRO(Sminp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()), 6901 "mov z31.d, z4.d\n" 6902 "movprfx z4.b, p1/m, z3.b\n" 6903 "sminp z4.b, p1/m, z4.b, z31.b"); 6904 COMPARE_MACRO(Umaxp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()), 6905 "mov z31.d, z4.d\n" 6906 "movprfx z4.b, p1/m, z3.b\n" 6907 "umaxp z4.b, p1/m, z4.b, z31.b"); 6908 COMPARE_MACRO(Uminp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()), 6909 "mov z31.d, z4.d\n" 6910 "movprfx z4.b, p1/m, z3.b\n" 6911 "uminp z4.b, p1/m, z4.b, z31.b"); 6912 CLEANUP(); 6913} 6914 6915TEST(sve2_extract_narrow) { 6916 SETUP(); 6917 6918 COMPARE(sqxtnb(z2.VnB(), z0.VnH()), "sqxtnb z2.b, z0.h"); 6919 COMPARE(sqxtnb(z2.VnH(), z0.VnS()), "sqxtnb z2.h, z0.s"); 6920 COMPARE(sqxtnb(z2.VnS(), z0.VnD()), "sqxtnb z2.s, z0.d"); 6921 COMPARE(sqxtnt(z31.VnB(), z18.VnH()), "sqxtnt z31.b, z18.h"); 6922 COMPARE(sqxtnt(z31.VnH(), z18.VnS()), "sqxtnt z31.h, z18.s"); 6923 COMPARE(sqxtnt(z31.VnS(), z18.VnD()), "sqxtnt z31.s, z18.d"); 6924 COMPARE(sqxtunb(z28.VnB(), z6.VnH()), "sqxtunb z28.b, z6.h"); 6925 COMPARE(sqxtunb(z28.VnH(), z6.VnS()), "sqxtunb z28.h, z6.s"); 6926 COMPARE(sqxtunb(z28.VnS(), z6.VnD()), "sqxtunb z28.s, z6.d"); 6927 COMPARE(sqxtunt(z14.VnB(), z31.VnH()), "sqxtunt z14.b, z31.h"); 6928 COMPARE(sqxtunt(z14.VnH(), z31.VnS()), "sqxtunt z14.h, z31.s"); 6929 COMPARE(sqxtunt(z14.VnS(), z31.VnD()), "sqxtunt z14.s, z31.d"); 6930 COMPARE(uqxtnb(z28.VnB(), z4.VnH()), "uqxtnb z28.b, z4.h"); 6931 COMPARE(uqxtnb(z28.VnH(), z4.VnS()), "uqxtnb z28.h, z4.s"); 6932 COMPARE(uqxtnb(z28.VnS(), z4.VnD()), "uqxtnb z28.s, z4.d"); 6933 COMPARE(uqxtnt(z19.VnB(), z7.VnH()), "uqxtnt z19.b, z7.h"); 6934 COMPARE(uqxtnt(z19.VnH(), z7.VnS()), "uqxtnt z19.h, z7.s"); 6935 COMPARE(uqxtnt(z19.VnS(), z7.VnD()), "uqxtnt z19.s, z7.d"); 6936 6937 CLEANUP(); 6938} 6939 6940TEST(sve2_eorbt_eortb) { 6941 SETUP(); 6942 6943 COMPARE(eorbt(z3.VnB(), z10.VnB(), z8.VnB()), "eorbt z3.b, z10.b, z8.b"); 6944 COMPARE(eorbt(z3.VnD(), z10.VnD(), z8.VnD()), "eorbt z3.d, z10.d, z8.d"); 6945 COMPARE(eorbt(z3.VnH(), z10.VnH(), z8.VnH()), "eorbt z3.h, z10.h, z8.h"); 6946 COMPARE(eorbt(z3.VnS(), z10.VnS(), z8.VnS()), "eorbt z3.s, z10.s, z8.s"); 6947 COMPARE(eortb(z21.VnB(), z21.VnB(), z15.VnB()), "eortb z21.b, z21.b, z15.b"); 6948 COMPARE(eortb(z21.VnD(), z21.VnD(), z15.VnD()), "eortb z21.d, z21.d, z15.d"); 6949 COMPARE(eortb(z21.VnH(), z21.VnH(), z15.VnH()), "eortb z21.h, z21.h, z15.h"); 6950 COMPARE(eortb(z21.VnS(), z21.VnS(), z15.VnS()), "eortb z21.s, z21.s, z15.s"); 6951 6952 CLEANUP(); 6953} 6954 6955TEST(sve2_saturating_multiply_add_high) { 6956 SETUP(); 6957 6958 COMPARE(sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB()), 6959 "sqrdmlah z27.b, z28.b, z19.b"); 6960 COMPARE(sqrdmlah(z27.VnD(), z28.VnD(), z19.VnD()), 6961 "sqrdmlah z27.d, z28.d, z19.d"); 6962 COMPARE(sqrdmlah(z27.VnH(), z28.VnH(), z19.VnH()), 6963 "sqrdmlah z27.h, z28.h, z19.h"); 6964 COMPARE(sqrdmlah(z27.VnS(), z28.VnS(), z19.VnS()), 6965 "sqrdmlah z27.s, z28.s, z19.s"); 6966 COMPARE(sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB()), 6967 "sqrdmlsh z11.b, z16.b, z31.b"); 6968 COMPARE(sqrdmlsh(z11.VnD(), z16.VnD(), z31.VnD()), 6969 "sqrdmlsh z11.d, z16.d, z31.d"); 6970 COMPARE(sqrdmlsh(z11.VnH(), z16.VnH(), z31.VnH()), 6971 "sqrdmlsh z11.h, z16.h, z31.h"); 6972 COMPARE(sqrdmlsh(z11.VnS(), z16.VnS(), z31.VnS()), 6973 "sqrdmlsh z11.s, z16.s, z31.s"); 6974 6975 COMPARE_MACRO(Sqrdmlah(z29.VnD(), z0.VnD(), z29.VnD(), z26.VnD()), 6976 "movprfx z31, z0\n" 6977 "sqrdmlah z31.d, z29.d, z26.d\n" 6978 "mov z29.d, z31.d"); 6979 COMPARE_MACRO(Sqrdmlah(z26.VnH(), z0.VnH(), z29.VnH(), z26.VnH()), 6980 "movprfx z31, z0\n" 6981 "sqrdmlah z31.h, z29.h, z26.h\n" 6982 "mov z26.d, z31.d"); 6983 COMPARE_MACRO(Sqrdmlsh(z23.VnS(), z31.VnS(), z26.VnS(), z29.VnS()), 6984 "movprfx z23, z31\n" 6985 "sqrdmlsh z23.s, z26.s, z29.s"); 6986 COMPARE_MACRO(Sqrdmlsh(z4.VnB(), z31.VnB(), z4.VnB(), z4.VnB()), 6987 "sqrdmlsh z31.b, z4.b, z4.b\n" 6988 "mov z4.d, z31.d"); 6989 6990 COMPARE(sqrdmlah(z10.VnD(), z30.VnD(), z11.VnD(), 1), 6991 "sqrdmlah z10.d, z30.d, z11.d[1]"); 6992 COMPARE(sqrdmlah(z11.VnH(), z8.VnH(), z3.VnH(), 7), 6993 "sqrdmlah z11.h, z8.h, z3.h[7]"); 6994 COMPARE(sqrdmlah(z21.VnS(), z29.VnS(), z7.VnS(), 3), 6995 "sqrdmlah z21.s, z29.s, z7.s[3]"); 6996 COMPARE(sqrdmlsh(z2.VnD(), z16.VnD(), z14.VnD(), 0), 6997 "sqrdmlsh z2.d, z16.d, z14.d[0]"); 6998 COMPARE(sqrdmlsh(z23.VnH(), z13.VnH(), z6.VnH(), 5), 6999 "sqrdmlsh z23.h, z13.h, z6.h[5]"); 7000 COMPARE(sqrdmlsh(z27.VnS(), z8.VnS(), z4.VnS(), 2), 7001 "sqrdmlsh z27.s, z8.s, z4.s[2]"); 7002 7003 COMPARE_MACRO(Sqrdmlah(z24.VnD(), z0.VnD(), z24.VnD(), z13.VnD(), 0), 7004 "movprfx z31, z0\n" 7005 "sqrdmlah z31.d, z24.d, z13.d[0]\n" 7006 "mov z24.d, z31.d"); 7007 COMPARE_MACRO(Sqrdmlah(z4.VnH(), z0.VnH(), z29.VnH(), z4.VnH(), 6), 7008 "movprfx z31, z0\n" 7009 "sqrdmlah z31.h, z29.h, z4.h[6]\n" 7010 "mov z4.d, z31.d"); 7011 COMPARE_MACRO(Sqrdmlsh(z12.VnS(), z31.VnS(), z26.VnS(), z2.VnS(), 2), 7012 "movprfx z12, z31\n" 7013 "sqrdmlsh z12.s, z26.s, z2.s[2]"); 7014 COMPARE_MACRO(Sqrdmlsh(z0.VnD(), z31.VnD(), z0.VnD(), z0.VnD(), 1), 7015 "sqrdmlsh z31.d, z0.d, z0.d[1]\n" 7016 "mov z0.d, z31.d"); 7017 7018 CLEANUP(); 7019} 7020 7021TEST(sve2_integer_pairwise_add_accumulate_long) { 7022 SETUP(); 7023 7024 COMPARE(sadalp(z19.VnD(), p5.Merging(), z9.VnS()), 7025 "sadalp z19.d, p5/m, z9.s"); 7026 COMPARE(sadalp(z19.VnH(), p5.Merging(), z9.VnB()), 7027 "sadalp z19.h, p5/m, z9.b"); 7028 COMPARE(sadalp(z19.VnS(), p5.Merging(), z9.VnH()), 7029 "sadalp z19.s, p5/m, z9.h"); 7030 COMPARE(uadalp(z20.VnD(), p4.Merging(), z5.VnS()), 7031 "uadalp z20.d, p4/m, z5.s"); 7032 COMPARE(uadalp(z20.VnH(), p4.Merging(), z5.VnB()), 7033 "uadalp z20.h, p4/m, z5.b"); 7034 COMPARE(uadalp(z20.VnS(), p4.Merging(), z5.VnH()), 7035 "uadalp z20.s, p4/m, z5.h"); 7036 7037 CLEANUP(); 7038} 7039 7040TEST(sve2_integer_multiply_vectors_unpredicated) { 7041 SETUP(); 7042 7043 COMPARE(mul(z23.VnB(), z0.VnB(), z12.VnB()), "mul z23.b, z0.b, z12.b"); 7044 COMPARE(mul(z24.VnD(), z1.VnD(), z14.VnD()), "mul z24.d, z1.d, z14.d"); 7045 COMPARE(mul(z25.VnH(), z2.VnH(), z16.VnH()), "mul z25.h, z2.h, z16.h"); 7046 COMPARE(mul(z26.VnS(), z3.VnS(), z18.VnS()), "mul z26.s, z3.s, z18.s"); 7047 7048 COMPARE(pmul(z0.VnB(), z5.VnB(), z5.VnB()), "pmul z0.b, z5.b, z5.b"); 7049 7050 COMPARE(smulh(z11.VnB(), z9.VnB(), z1.VnB()), "smulh z11.b, z9.b, z1.b"); 7051 COMPARE(smulh(z21.VnD(), z19.VnD(), z16.VnD()), "smulh z21.d, z19.d, z16.d"); 7052 COMPARE(smulh(z11.VnH(), z9.VnH(), z1.VnH()), "smulh z11.h, z9.h, z1.h"); 7053 COMPARE(smulh(z21.VnS(), z19.VnS(), z16.VnS()), "smulh z21.s, z19.s, z16.s"); 7054 7055 COMPARE(umulh(z5.VnB(), z9.VnB(), z5.VnB()), "umulh z5.b, z9.b, z5.b"); 7056 COMPARE(umulh(z18.VnD(), z9.VnD(), z5.VnD()), "umulh z18.d, z9.d, z5.d"); 7057 COMPARE(umulh(z18.VnH(), z9.VnH(), z9.VnH()), "umulh z18.h, z9.h, z9.h"); 7058 COMPARE(umulh(z18.VnS(), z9.VnS(), z18.VnS()), "umulh z18.s, z9.s, z18.s"); 7059 7060 CLEANUP(); 7061} 7062 7063TEST(sve2_arith_interleaved_long) { 7064 SETUP(); 7065 7066 COMPARE(saddlbt(z15.VnD(), z6.VnS(), z18.VnS()), 7067 "saddlbt z15.d, z6.s, z18.s"); 7068 COMPARE(saddlbt(z15.VnH(), z6.VnB(), z18.VnB()), 7069 "saddlbt z15.h, z6.b, z18.b"); 7070 COMPARE(saddlbt(z15.VnS(), z6.VnH(), z18.VnH()), 7071 "saddlbt z15.s, z6.h, z18.h"); 7072 COMPARE(ssublbt(z6.VnD(), z28.VnS(), z12.VnS()), 7073 "ssublbt z6.d, z28.s, z12.s"); 7074 COMPARE(ssublbt(z6.VnH(), z28.VnB(), z12.VnB()), 7075 "ssublbt z6.h, z28.b, z12.b"); 7076 COMPARE(ssublbt(z6.VnS(), z28.VnH(), z12.VnH()), 7077 "ssublbt z6.s, z28.h, z12.h"); 7078 COMPARE(ssubltb(z11.VnD(), z18.VnS(), z19.VnS()), 7079 "ssubltb z11.d, z18.s, z19.s"); 7080 COMPARE(ssubltb(z11.VnH(), z18.VnB(), z19.VnB()), 7081 "ssubltb z11.h, z18.b, z19.b"); 7082 COMPARE(ssubltb(z11.VnS(), z18.VnH(), z19.VnH()), 7083 "ssubltb z11.s, z18.h, z19.h"); 7084 7085 CLEANUP(); 7086} 7087 7088TEST(sve2_int_unary_predicated) { 7089 SETUP(); 7090 7091 COMPARE_MACRO(Sqabs(z29.VnB(), p1.Merging(), z18.VnB()), 7092 "sqabs z29.b, p1/m, z18.b"); 7093 COMPARE_MACRO(Sqabs(z29.VnD(), p1.Merging(), z18.VnD()), 7094 "sqabs z29.d, p1/m, z18.d"); 7095 COMPARE_MACRO(Sqabs(z29.VnH(), p1.Merging(), z18.VnH()), 7096 "sqabs z29.h, p1/m, z18.h"); 7097 COMPARE_MACRO(Sqabs(z29.VnS(), p1.Merging(), z18.VnS()), 7098 "sqabs z29.s, p1/m, z18.s"); 7099 COMPARE_MACRO(Sqneg(z21.VnB(), p0.Merging(), z17.VnB()), 7100 "sqneg z21.b, p0/m, z17.b"); 7101 COMPARE_MACRO(Sqneg(z21.VnD(), p0.Merging(), z17.VnD()), 7102 "sqneg z21.d, p0/m, z17.d"); 7103 COMPARE_MACRO(Sqneg(z21.VnH(), p0.Merging(), z17.VnH()), 7104 "sqneg z21.h, p0/m, z17.h"); 7105 COMPARE_MACRO(Sqneg(z21.VnS(), p0.Merging(), z17.VnS()), 7106 "sqneg z21.s, p0/m, z17.s"); 7107 COMPARE_MACRO(Urecpe(z25.VnS(), p7.Merging(), z2.VnS()), 7108 "urecpe z25.s, p7/m, z2.s"); 7109 COMPARE_MACRO(Ursqrte(z4.VnS(), p3.Merging(), z3.VnS()), 7110 "ursqrte z4.s, p3/m, z3.s"); 7111 7112 COMPARE_MACRO(Sqabs(z29.VnS(), p1.Zeroing(), z18.VnS()), 7113 "movprfx z29.s, p1/z, z29.s\n" 7114 "sqabs z29.s, p1/m, z18.s"); 7115 COMPARE_MACRO(Sqneg(z21.VnB(), p0.Zeroing(), z17.VnB()), 7116 "movprfx z21.b, p0/z, z21.b\n" 7117 "sqneg z21.b, p0/m, z17.b"); 7118 COMPARE_MACRO(Urecpe(z25.VnS(), p7.Zeroing(), z2.VnS()), 7119 "movprfx z25.s, p7/z, z25.s\n" 7120 "urecpe z25.s, p7/m, z2.s"); 7121 COMPARE_MACRO(Ursqrte(z4.VnS(), p3.Zeroing(), z3.VnS()), 7122 "movprfx z4.s, p3/z, z4.s\n" 7123 "ursqrte z4.s, p3/m, z3.s"); 7124 CLEANUP(); 7125} 7126 7127TEST(sve2_arith_long) { 7128 SETUP(); 7129 7130 COMPARE_MACRO(Sabdlb(z2.VnD(), z21.VnS(), z3.VnS()), 7131 "sabdlb z2.d, z21.s, z3.s"); 7132 COMPARE_MACRO(Sabdlb(z2.VnH(), z21.VnB(), z3.VnB()), 7133 "sabdlb z2.h, z21.b, z3.b"); 7134 COMPARE_MACRO(Sabdlb(z2.VnS(), z21.VnH(), z3.VnH()), 7135 "sabdlb z2.s, z21.h, z3.h"); 7136 COMPARE_MACRO(Sabdlt(z25.VnD(), z23.VnS(), z17.VnS()), 7137 "sabdlt z25.d, z23.s, z17.s"); 7138 COMPARE_MACRO(Sabdlt(z25.VnH(), z23.VnB(), z17.VnB()), 7139 "sabdlt z25.h, z23.b, z17.b"); 7140 COMPARE_MACRO(Sabdlt(z25.VnS(), z23.VnH(), z17.VnH()), 7141 "sabdlt z25.s, z23.h, z17.h"); 7142 COMPARE_MACRO(Saddlb(z24.VnD(), z30.VnS(), z16.VnS()), 7143 "saddlb z24.d, z30.s, z16.s"); 7144 COMPARE_MACRO(Saddlb(z24.VnH(), z30.VnB(), z16.VnB()), 7145 "saddlb z24.h, z30.b, z16.b"); 7146 COMPARE_MACRO(Saddlb(z24.VnS(), z30.VnH(), z16.VnH()), 7147 "saddlb z24.s, z30.h, z16.h"); 7148 COMPARE_MACRO(Saddlt(z21.VnD(), z29.VnS(), z31.VnS()), 7149 "saddlt z21.d, z29.s, z31.s"); 7150 COMPARE_MACRO(Saddlt(z21.VnH(), z29.VnB(), z31.VnB()), 7151 "saddlt z21.h, z29.b, z31.b"); 7152 COMPARE_MACRO(Saddlt(z21.VnS(), z29.VnH(), z31.VnH()), 7153 "saddlt z21.s, z29.h, z31.h"); 7154 COMPARE_MACRO(Ssublb(z4.VnD(), z23.VnS(), z7.VnS()), 7155 "ssublb z4.d, z23.s, z7.s"); 7156 COMPARE_MACRO(Ssublb(z4.VnH(), z23.VnB(), z7.VnB()), 7157 "ssublb z4.h, z23.b, z7.b"); 7158 COMPARE_MACRO(Ssublb(z4.VnS(), z23.VnH(), z7.VnH()), 7159 "ssublb z4.s, z23.h, z7.h"); 7160 COMPARE_MACRO(Ssublt(z12.VnD(), z13.VnS(), z6.VnS()), 7161 "ssublt z12.d, z13.s, z6.s"); 7162 COMPARE_MACRO(Ssublt(z12.VnH(), z13.VnB(), z6.VnB()), 7163 "ssublt z12.h, z13.b, z6.b"); 7164 COMPARE_MACRO(Ssublt(z12.VnS(), z13.VnH(), z6.VnH()), 7165 "ssublt z12.s, z13.h, z6.h"); 7166 COMPARE_MACRO(Uabdlb(z1.VnD(), z26.VnS(), z12.VnS()), 7167 "uabdlb z1.d, z26.s, z12.s"); 7168 COMPARE_MACRO(Uabdlb(z1.VnH(), z26.VnB(), z12.VnB()), 7169 "uabdlb z1.h, z26.b, z12.b"); 7170 COMPARE_MACRO(Uabdlb(z1.VnS(), z26.VnH(), z12.VnH()), 7171 "uabdlb z1.s, z26.h, z12.h"); 7172 COMPARE_MACRO(Uabdlt(z25.VnD(), z29.VnS(), z14.VnS()), 7173 "uabdlt z25.d, z29.s, z14.s"); 7174 COMPARE_MACRO(Uabdlt(z25.VnH(), z29.VnB(), z14.VnB()), 7175 "uabdlt z25.h, z29.b, z14.b"); 7176 COMPARE_MACRO(Uabdlt(z25.VnS(), z29.VnH(), z14.VnH()), 7177 "uabdlt z25.s, z29.h, z14.h"); 7178 COMPARE_MACRO(Uaddlb(z3.VnD(), z5.VnS(), z2.VnS()), 7179 "uaddlb z3.d, z5.s, z2.s"); 7180 COMPARE_MACRO(Uaddlb(z3.VnH(), z5.VnB(), z2.VnB()), 7181 "uaddlb z3.h, z5.b, z2.b"); 7182 COMPARE_MACRO(Uaddlb(z3.VnS(), z5.VnH(), z2.VnH()), 7183 "uaddlb z3.s, z5.h, z2.h"); 7184 COMPARE_MACRO(Uaddlt(z15.VnD(), z28.VnS(), z20.VnS()), 7185 "uaddlt z15.d, z28.s, z20.s"); 7186 COMPARE_MACRO(Uaddlt(z15.VnH(), z28.VnB(), z20.VnB()), 7187 "uaddlt z15.h, z28.b, z20.b"); 7188 COMPARE_MACRO(Uaddlt(z15.VnS(), z28.VnH(), z20.VnH()), 7189 "uaddlt z15.s, z28.h, z20.h"); 7190 COMPARE_MACRO(Usublb(z25.VnD(), z9.VnS(), z17.VnS()), 7191 "usublb z25.d, z9.s, z17.s"); 7192 COMPARE_MACRO(Usublb(z25.VnH(), z9.VnB(), z17.VnB()), 7193 "usublb z25.h, z9.b, z17.b"); 7194 COMPARE_MACRO(Usublb(z25.VnS(), z9.VnH(), z17.VnH()), 7195 "usublb z25.s, z9.h, z17.h"); 7196 COMPARE_MACRO(Usublt(z5.VnD(), z11.VnS(), z15.VnS()), 7197 "usublt z5.d, z11.s, z15.s"); 7198 COMPARE_MACRO(Usublt(z5.VnH(), z11.VnB(), z15.VnB()), 7199 "usublt z5.h, z11.b, z15.b"); 7200 COMPARE_MACRO(Usublt(z5.VnS(), z11.VnH(), z15.VnH()), 7201 "usublt z5.s, z11.h, z15.h"); 7202 7203 CLEANUP(); 7204} 7205 7206TEST(sve2_arith_wide) { 7207 SETUP(); 7208 7209 COMPARE_MACRO(Saddwb(z12.VnD(), z8.VnD(), z8.VnS()), 7210 "saddwb z12.d, z8.d, z8.s"); 7211 COMPARE_MACRO(Saddwb(z12.VnH(), z8.VnH(), z8.VnB()), 7212 "saddwb z12.h, z8.h, z8.b"); 7213 COMPARE_MACRO(Saddwb(z12.VnS(), z8.VnS(), z8.VnH()), 7214 "saddwb z12.s, z8.s, z8.h"); 7215 COMPARE_MACRO(Saddwt(z24.VnD(), z0.VnD(), z3.VnS()), 7216 "saddwt z24.d, z0.d, z3.s"); 7217 COMPARE_MACRO(Saddwt(z24.VnH(), z0.VnH(), z3.VnB()), 7218 "saddwt z24.h, z0.h, z3.b"); 7219 COMPARE_MACRO(Saddwt(z24.VnS(), z0.VnS(), z3.VnH()), 7220 "saddwt z24.s, z0.s, z3.h"); 7221 COMPARE_MACRO(Ssubwb(z7.VnD(), z28.VnD(), z11.VnS()), 7222 "ssubwb z7.d, z28.d, z11.s"); 7223 COMPARE_MACRO(Ssubwb(z7.VnH(), z28.VnH(), z11.VnB()), 7224 "ssubwb z7.h, z28.h, z11.b"); 7225 COMPARE_MACRO(Ssubwb(z7.VnS(), z28.VnS(), z11.VnH()), 7226 "ssubwb z7.s, z28.s, z11.h"); 7227 COMPARE_MACRO(Ssubwt(z29.VnD(), z25.VnD(), z20.VnS()), 7228 "ssubwt z29.d, z25.d, z20.s"); 7229 COMPARE_MACRO(Ssubwt(z29.VnH(), z25.VnH(), z20.VnB()), 7230 "ssubwt z29.h, z25.h, z20.b"); 7231 COMPARE_MACRO(Ssubwt(z29.VnS(), z25.VnS(), z20.VnH()), 7232 "ssubwt z29.s, z25.s, z20.h"); 7233 COMPARE_MACRO(Uaddwb(z31.VnD(), z8.VnD(), z25.VnS()), 7234 "uaddwb z31.d, z8.d, z25.s"); 7235 COMPARE_MACRO(Uaddwb(z31.VnH(), z8.VnH(), z25.VnB()), 7236 "uaddwb z31.h, z8.h, z25.b"); 7237 COMPARE_MACRO(Uaddwb(z31.VnS(), z8.VnS(), z25.VnH()), 7238 "uaddwb z31.s, z8.s, z25.h"); 7239 COMPARE_MACRO(Uaddwt(z17.VnD(), z15.VnD(), z2.VnS()), 7240 "uaddwt z17.d, z15.d, z2.s"); 7241 COMPARE_MACRO(Uaddwt(z17.VnH(), z15.VnH(), z2.VnB()), 7242 "uaddwt z17.h, z15.h, z2.b"); 7243 COMPARE_MACRO(Uaddwt(z17.VnS(), z15.VnS(), z2.VnH()), 7244 "uaddwt z17.s, z15.s, z2.h"); 7245 COMPARE_MACRO(Usubwb(z10.VnD(), z13.VnD(), z20.VnS()), 7246 "usubwb z10.d, z13.d, z20.s"); 7247 COMPARE_MACRO(Usubwb(z10.VnH(), z13.VnH(), z20.VnB()), 7248 "usubwb z10.h, z13.h, z20.b"); 7249 COMPARE_MACRO(Usubwb(z10.VnS(), z13.VnS(), z20.VnH()), 7250 "usubwb z10.s, z13.s, z20.h"); 7251 COMPARE_MACRO(Usubwt(z15.VnD(), z8.VnD(), z23.VnS()), 7252 "usubwt z15.d, z8.d, z23.s"); 7253 COMPARE_MACRO(Usubwt(z15.VnH(), z8.VnH(), z23.VnB()), 7254 "usubwt z15.h, z8.h, z23.b"); 7255 COMPARE_MACRO(Usubwt(z15.VnS(), z8.VnS(), z23.VnH()), 7256 "usubwt z15.s, z8.s, z23.h"); 7257 7258 CLEANUP(); 7259} 7260 7261TEST(sve2_shift_long) { 7262 SETUP(); 7263 7264 COMPARE_MACRO(Sshllb(z2.VnH(), z20.VnB(), 0), "sshllb z2.h, z20.b, #0"); 7265 COMPARE_MACRO(Sshllb(z2.VnH(), z20.VnB(), 1), "sshllb z2.h, z20.b, #1"); 7266 COMPARE_MACRO(Sshllb(z2.VnH(), z20.VnB(), 5), "sshllb z2.h, z20.b, #5"); 7267 COMPARE_MACRO(Sshllb(z2.VnH(), z20.VnB(), 7), "sshllb z2.h, z20.b, #7"); 7268 COMPARE_MACRO(Sshllb(z2.VnS(), z20.VnH(), 0), "sshllb z2.s, z20.h, #0"); 7269 COMPARE_MACRO(Sshllb(z2.VnS(), z20.VnH(), 15), "sshllb z2.s, z20.h, #15"); 7270 COMPARE_MACRO(Sshllb(z2.VnD(), z20.VnS(), 0), "sshllb z2.d, z20.s, #0"); 7271 COMPARE_MACRO(Sshllb(z2.VnD(), z20.VnS(), 31), "sshllb z2.d, z20.s, #31"); 7272 COMPARE_MACRO(Sshllt(z27.VnH(), z8.VnB(), 0), "sshllt z27.h, z8.b, #0"); 7273 COMPARE_MACRO(Sshllt(z27.VnH(), z8.VnB(), 7), "sshllt z27.h, z8.b, #7"); 7274 COMPARE_MACRO(Sshllt(z27.VnS(), z8.VnH(), 0), "sshllt z27.s, z8.h, #0"); 7275 COMPARE_MACRO(Sshllt(z27.VnS(), z8.VnH(), 15), "sshllt z27.s, z8.h, #15"); 7276 COMPARE_MACRO(Sshllt(z27.VnD(), z8.VnS(), 0), "sshllt z27.d, z8.s, #0"); 7277 COMPARE_MACRO(Sshllt(z27.VnD(), z8.VnS(), 31), "sshllt z27.d, z8.s, #31"); 7278 COMPARE_MACRO(Ushllb(z8.VnH(), z31.VnB(), 0), "ushllb z8.h, z31.b, #0"); 7279 COMPARE_MACRO(Ushllb(z8.VnH(), z31.VnB(), 7), "ushllb z8.h, z31.b, #7"); 7280 COMPARE_MACRO(Ushllb(z8.VnS(), z31.VnH(), 0), "ushllb z8.s, z31.h, #0"); 7281 COMPARE_MACRO(Ushllb(z8.VnS(), z31.VnH(), 15), "ushllb z8.s, z31.h, #15"); 7282 COMPARE_MACRO(Ushllb(z8.VnD(), z31.VnS(), 0), "ushllb z8.d, z31.s, #0"); 7283 COMPARE_MACRO(Ushllb(z8.VnD(), z31.VnS(), 31), "ushllb z8.d, z31.s, #31"); 7284 COMPARE_MACRO(Ushllt(z3.VnH(), z21.VnB(), 0), "ushllt z3.h, z21.b, #0"); 7285 COMPARE_MACRO(Ushllt(z3.VnH(), z21.VnB(), 7), "ushllt z3.h, z21.b, #7"); 7286 COMPARE_MACRO(Ushllt(z3.VnS(), z21.VnH(), 0), "ushllt z3.s, z21.h, #0"); 7287 COMPARE_MACRO(Ushllt(z3.VnS(), z21.VnH(), 15), "ushllt z3.s, z21.h, #15"); 7288 COMPARE_MACRO(Ushllt(z3.VnD(), z21.VnS(), 0), "ushllt z3.d, z21.s, #0"); 7289 COMPARE_MACRO(Ushllt(z3.VnD(), z21.VnS(), 31), "ushllt z3.d, z21.s, #31"); 7290 7291 CLEANUP(); 7292} 7293 7294TEST(sve2_shift_narrow) { 7295 SETUP(); 7296 7297 COMPARE_MACRO(Shrnb(z7.VnB(), z4.VnH(), 1), "shrnb z7.b, z4.h, #1"); 7298 COMPARE_MACRO(Shrnb(z7.VnB(), z4.VnH(), 2), "shrnb z7.b, z4.h, #2"); 7299 COMPARE_MACRO(Shrnb(z7.VnB(), z4.VnH(), 5), "shrnb z7.b, z4.h, #5"); 7300 COMPARE_MACRO(Shrnb(z7.VnB(), z4.VnH(), 8), "shrnb z7.b, z4.h, #8"); 7301 COMPARE_MACRO(Shrnb(z7.VnH(), z4.VnS(), 1), "shrnb z7.h, z4.s, #1"); 7302 COMPARE_MACRO(Shrnb(z7.VnH(), z4.VnS(), 16), "shrnb z7.h, z4.s, #16"); 7303 COMPARE_MACRO(Shrnb(z7.VnS(), z4.VnD(), 1), "shrnb z7.s, z4.d, #1"); 7304 COMPARE_MACRO(Shrnb(z7.VnS(), z4.VnD(), 32), "shrnb z7.s, z4.d, #32"); 7305 COMPARE_MACRO(Shrnt(z21.VnB(), z29.VnH(), 1), "shrnt z21.b, z29.h, #1"); 7306 COMPARE_MACRO(Shrnt(z21.VnB(), z29.VnH(), 8), "shrnt z21.b, z29.h, #8"); 7307 COMPARE_MACRO(Shrnt(z21.VnH(), z29.VnS(), 1), "shrnt z21.h, z29.s, #1"); 7308 COMPARE_MACRO(Shrnt(z21.VnH(), z29.VnS(), 16), "shrnt z21.h, z29.s, #16"); 7309 COMPARE_MACRO(Shrnt(z21.VnS(), z29.VnD(), 1), "shrnt z21.s, z29.d, #1"); 7310 COMPARE_MACRO(Shrnt(z21.VnS(), z29.VnD(), 32), "shrnt z21.s, z29.d, #32"); 7311 7312 COMPARE_MACRO(Rshrnb(z5.VnB(), z1.VnH(), 1), "rshrnb z5.b, z1.h, #1"); 7313 COMPARE_MACRO(Rshrnb(z5.VnB(), z1.VnH(), 8), "rshrnb z5.b, z1.h, #8"); 7314 COMPARE_MACRO(Rshrnb(z5.VnH(), z1.VnS(), 16), "rshrnb z5.h, z1.s, #16"); 7315 COMPARE_MACRO(Rshrnb(z5.VnS(), z1.VnD(), 32), "rshrnb z5.s, z1.d, #32"); 7316 COMPARE_MACRO(Rshrnt(z5.VnB(), z1.VnH(), 8), "rshrnt z5.b, z1.h, #8"); 7317 COMPARE_MACRO(Rshrnt(z5.VnH(), z1.VnS(), 16), "rshrnt z5.h, z1.s, #16"); 7318 COMPARE_MACRO(Rshrnt(z5.VnS(), z1.VnD(), 32), "rshrnt z5.s, z1.d, #32"); 7319 7320 COMPARE_MACRO(Sqrshrnb(z1.VnB(), z1.VnH(), 1), "sqrshrnb z1.b, z1.h, #1"); 7321 COMPARE_MACRO(Sqrshrnb(z1.VnB(), z1.VnH(), 8), "sqrshrnb z1.b, z1.h, #8"); 7322 COMPARE_MACRO(Sqrshrnb(z1.VnH(), z1.VnS(), 16), "sqrshrnb z1.h, z1.s, #16"); 7323 COMPARE_MACRO(Sqrshrnb(z1.VnS(), z1.VnD(), 32), "sqrshrnb z1.s, z1.d, #32"); 7324 COMPARE_MACRO(Sqrshrnt(z24.VnB(), z19.VnH(), 8), "sqrshrnt z24.b, z19.h, #8"); 7325 COMPARE_MACRO(Sqrshrnt(z24.VnH(), z19.VnS(), 16), 7326 "sqrshrnt z24.h, z19.s, #16"); 7327 COMPARE_MACRO(Sqrshrnt(z24.VnS(), z19.VnD(), 32), 7328 "sqrshrnt z24.s, z19.d, #32"); 7329 7330 COMPARE_MACRO(Sqshrnb(z25.VnB(), z1.VnH(), 1), "sqshrnb z25.b, z1.h, #1"); 7331 COMPARE_MACRO(Sqshrnb(z25.VnB(), z1.VnH(), 8), "sqshrnb z25.b, z1.h, #8"); 7332 COMPARE_MACRO(Sqshrnb(z25.VnH(), z1.VnS(), 16), "sqshrnb z25.h, z1.s, #16"); 7333 COMPARE_MACRO(Sqshrnb(z25.VnS(), z1.VnD(), 32), "sqshrnb z25.s, z1.d, #32"); 7334 COMPARE_MACRO(Sqshrnt(z0.VnB(), z25.VnH(), 8), "sqshrnt z0.b, z25.h, #8"); 7335 COMPARE_MACRO(Sqshrnt(z0.VnH(), z25.VnS(), 16), "sqshrnt z0.h, z25.s, #16"); 7336 COMPARE_MACRO(Sqshrnt(z0.VnS(), z25.VnD(), 32), "sqshrnt z0.s, z25.d, #32"); 7337 7338 COMPARE_MACRO(Uqrshrnb(z30.VnB(), z25.VnH(), 1), "uqrshrnb z30.b, z25.h, #1"); 7339 COMPARE_MACRO(Uqrshrnb(z30.VnB(), z25.VnH(), 8), "uqrshrnb z30.b, z25.h, #8"); 7340 COMPARE_MACRO(Uqrshrnb(z30.VnH(), z25.VnS(), 16), 7341 "uqrshrnb z30.h, z25.s, #16"); 7342 COMPARE_MACRO(Uqrshrnb(z30.VnS(), z25.VnD(), 32), 7343 "uqrshrnb z30.s, z25.d, #32"); 7344 COMPARE_MACRO(Uqrshrnt(z3.VnB(), z25.VnH(), 8), "uqrshrnt z3.b, z25.h, #8"); 7345 COMPARE_MACRO(Uqrshrnt(z3.VnH(), z25.VnS(), 16), "uqrshrnt z3.h, z25.s, #16"); 7346 COMPARE_MACRO(Uqrshrnt(z3.VnS(), z25.VnD(), 32), "uqrshrnt z3.s, z25.d, #32"); 7347 7348 COMPARE_MACRO(Uqshrnb(z17.VnB(), z4.VnH(), 1), "uqshrnb z17.b, z4.h, #1"); 7349 COMPARE_MACRO(Uqshrnb(z17.VnB(), z4.VnH(), 8), "uqshrnb z17.b, z4.h, #8"); 7350 COMPARE_MACRO(Uqshrnb(z17.VnH(), z4.VnS(), 16), "uqshrnb z17.h, z4.s, #16"); 7351 COMPARE_MACRO(Uqshrnb(z17.VnS(), z4.VnD(), 32), "uqshrnb z17.s, z4.d, #32"); 7352 COMPARE_MACRO(Uqshrnt(z28.VnB(), z18.VnH(), 8), "uqshrnt z28.b, z18.h, #8"); 7353 COMPARE_MACRO(Uqshrnt(z28.VnH(), z18.VnS(), 16), "uqshrnt z28.h, z18.s, #16"); 7354 COMPARE_MACRO(Uqshrnt(z28.VnS(), z18.VnD(), 32), "uqshrnt z28.s, z18.d, #32"); 7355 7356 COMPARE_MACRO(Sqrshrunb(z23.VnB(), z28.VnH(), 1), 7357 "sqrshrunb z23.b, z28.h, #1"); 7358 COMPARE_MACRO(Sqrshrunb(z23.VnB(), z28.VnH(), 8), 7359 "sqrshrunb z23.b, z28.h, #8"); 7360 COMPARE_MACRO(Sqrshrunb(z23.VnH(), z28.VnS(), 16), 7361 "sqrshrunb z23.h, z28.s, #16"); 7362 COMPARE_MACRO(Sqrshrunb(z23.VnS(), z28.VnD(), 32), 7363 "sqrshrunb z23.s, z28.d, #32"); 7364 COMPARE_MACRO(Sqrshrunt(z9.VnB(), z15.VnH(), 8), "sqrshrunt z9.b, z15.h, #8"); 7365 COMPARE_MACRO(Sqrshrunt(z9.VnH(), z15.VnS(), 16), 7366 "sqrshrunt z9.h, z15.s, #16"); 7367 COMPARE_MACRO(Sqrshrunt(z9.VnS(), z15.VnD(), 32), 7368 "sqrshrunt z9.s, z15.d, #32"); 7369 7370 COMPARE_MACRO(Sqshrunb(z25.VnB(), z10.VnH(), 1), "sqshrunb z25.b, z10.h, #1"); 7371 COMPARE_MACRO(Sqshrunb(z25.VnB(), z10.VnH(), 8), "sqshrunb z25.b, z10.h, #8"); 7372 COMPARE_MACRO(Sqshrunb(z25.VnH(), z10.VnS(), 16), 7373 "sqshrunb z25.h, z10.s, #16"); 7374 COMPARE_MACRO(Sqshrunb(z25.VnS(), z10.VnD(), 32), 7375 "sqshrunb z25.s, z10.d, #32"); 7376 COMPARE_MACRO(Sqshrunt(z20.VnB(), z3.VnH(), 8), "sqshrunt z20.b, z3.h, #8"); 7377 COMPARE_MACRO(Sqshrunt(z20.VnH(), z3.VnS(), 16), "sqshrunt z20.h, z3.s, #16"); 7378 COMPARE_MACRO(Sqshrunt(z20.VnS(), z3.VnD(), 32), "sqshrunt z20.s, z3.d, #32"); 7379 7380 CLEANUP(); 7381} 7382 7383TEST(sve2_aba_long) { 7384 SETUP(); 7385 7386 COMPARE(sabalb(z13.VnD(), z20.VnS(), z26.VnS()), 7387 "sabalb z13.d, z20.s, z26.s"); 7388 COMPARE(sabalb(z13.VnH(), z20.VnB(), z26.VnB()), 7389 "sabalb z13.h, z20.b, z26.b"); 7390 COMPARE(sabalb(z13.VnS(), z20.VnH(), z26.VnH()), 7391 "sabalb z13.s, z20.h, z26.h"); 7392 COMPARE(sabalt(z14.VnD(), z19.VnS(), z10.VnS()), 7393 "sabalt z14.d, z19.s, z10.s"); 7394 COMPARE(sabalt(z14.VnH(), z19.VnB(), z10.VnB()), 7395 "sabalt z14.h, z19.b, z10.b"); 7396 COMPARE(sabalt(z14.VnS(), z19.VnH(), z10.VnH()), 7397 "sabalt z14.s, z19.h, z10.h"); 7398 COMPARE(uabalb(z11.VnD(), z25.VnS(), z11.VnS()), 7399 "uabalb z11.d, z25.s, z11.s"); 7400 COMPARE(uabalb(z11.VnH(), z25.VnB(), z11.VnB()), 7401 "uabalb z11.h, z25.b, z11.b"); 7402 COMPARE(uabalb(z11.VnS(), z25.VnH(), z11.VnH()), 7403 "uabalb z11.s, z25.h, z11.h"); 7404 COMPARE(uabalt(z4.VnD(), z2.VnS(), z31.VnS()), "uabalt z4.d, z2.s, z31.s"); 7405 COMPARE(uabalt(z4.VnH(), z2.VnB(), z31.VnB()), "uabalt z4.h, z2.b, z31.b"); 7406 COMPARE(uabalt(z4.VnS(), z2.VnH(), z31.VnH()), "uabalt z4.s, z2.h, z31.h"); 7407 7408 COMPARE_MACRO(Sabalb(z12.VnH(), z12.VnH(), z3.VnB(), z30.VnB()), 7409 "sabalb z12.h, z3.b, z30.b"); 7410 COMPARE_MACRO(Sabalt(z12.VnH(), z12.VnH(), z3.VnB(), z12.VnB()), 7411 "sabalt z12.h, z3.b, z12.b"); 7412 COMPARE_MACRO(Sabalb(z12.VnH(), z12.VnH(), z12.VnB(), z30.VnB()), 7413 "sabalb z12.h, z12.b, z30.b"); 7414 COMPARE_MACRO(Sabalt(z12.VnH(), z12.VnH(), z12.VnB(), z12.VnB()), ""); 7415 COMPARE_MACRO(Sabalb(z12.VnH(), z13.VnH(), z3.VnB(), z30.VnB()), 7416 "movprfx z12, z13\n" 7417 "sabalb z12.h, z3.b, z30.b"); 7418 COMPARE_MACRO(Sabalt(z12.VnH(), z3.VnH(), z3.VnB(), z30.VnB()), 7419 "movprfx z12, z3\n" 7420 "sabalt z12.h, z3.b, z30.b"); 7421 COMPARE_MACRO(Sabalb(z12.VnH(), z30.VnH(), z3.VnB(), z30.VnB()), 7422 "movprfx z12, z30\n" 7423 "sabalb z12.h, z3.b, z30.b"); 7424 COMPARE_MACRO(Sabalt(z12.VnH(), z3.VnH(), z3.VnB(), z3.VnB()), 7425 "mov z12.d, z3.d"); 7426 COMPARE_MACRO(Sabalb(z12.VnH(), z3.VnH(), z12.VnB(), z3.VnB()), 7427 "mov z31.d, z12.d\n" 7428 "movprfx z12, z3\n" 7429 "sabalb z12.h, z31.b, z3.b"); 7430 COMPARE_MACRO(Sabalt(z12.VnH(), z3.VnH(), z3.VnB(), z12.VnB()), 7431 "mov z31.d, z12.d\n" 7432 "movprfx z12, z3\n" 7433 "sabalt z12.h, z3.b, z31.b"); 7434 7435 COMPARE_MACRO(Uabalt(z12.VnH(), z12.VnH(), z3.VnB(), z30.VnB()), 7436 "uabalt z12.h, z3.b, z30.b"); 7437 COMPARE_MACRO(Uabalb(z12.VnH(), z12.VnH(), z3.VnB(), z12.VnB()), 7438 "uabalb z12.h, z3.b, z12.b"); 7439 COMPARE_MACRO(Uabalt(z12.VnH(), z12.VnH(), z12.VnB(), z30.VnB()), 7440 "uabalt z12.h, z12.b, z30.b"); 7441 COMPARE_MACRO(Uabalb(z12.VnH(), z12.VnH(), z12.VnB(), z12.VnB()), ""); 7442 COMPARE_MACRO(Uabalt(z12.VnH(), z13.VnH(), z3.VnB(), z30.VnB()), 7443 "movprfx z12, z13\n" 7444 "uabalt z12.h, z3.b, z30.b"); 7445 COMPARE_MACRO(Uabalb(z12.VnH(), z3.VnH(), z3.VnB(), z30.VnB()), 7446 "movprfx z12, z3\n" 7447 "uabalb z12.h, z3.b, z30.b"); 7448 COMPARE_MACRO(Uabalt(z12.VnH(), z30.VnH(), z3.VnB(), z30.VnB()), 7449 "movprfx z12, z30\n" 7450 "uabalt z12.h, z3.b, z30.b"); 7451 COMPARE_MACRO(Uabalb(z12.VnH(), z3.VnH(), z3.VnB(), z3.VnB()), 7452 "mov z12.d, z3.d"); 7453 COMPARE_MACRO(Uabalt(z12.VnH(), z3.VnH(), z12.VnB(), z3.VnB()), 7454 "mov z31.d, z12.d\n" 7455 "movprfx z12, z3\n" 7456 "uabalt z12.h, z31.b, z3.b"); 7457 COMPARE_MACRO(Uabalb(z12.VnH(), z3.VnH(), z3.VnB(), z12.VnB()), 7458 "mov z31.d, z12.d\n" 7459 "movprfx z12, z3\n" 7460 "uabalb z12.h, z3.b, z31.b"); 7461 CLEANUP(); 7462} 7463 7464TEST(sve2_add_sub_carry) { 7465 SETUP(); 7466 7467 COMPARE(adclb(z25.VnS(), z17.VnS(), z24.VnS()), "adclb z25.s, z17.s, z24.s"); 7468 COMPARE(adclb(z25.VnD(), z17.VnD(), z24.VnD()), "adclb z25.d, z17.d, z24.d"); 7469 COMPARE(adclt(z0.VnS(), z2.VnS(), z15.VnS()), "adclt z0.s, z2.s, z15.s"); 7470 COMPARE(adclt(z0.VnD(), z2.VnD(), z15.VnD()), "adclt z0.d, z2.d, z15.d"); 7471 COMPARE(sbclb(z17.VnS(), z10.VnS(), z8.VnS()), "sbclb z17.s, z10.s, z8.s"); 7472 COMPARE(sbclb(z17.VnD(), z10.VnD(), z8.VnD()), "sbclb z17.d, z10.d, z8.d"); 7473 COMPARE(sbclt(z20.VnS(), z0.VnS(), z13.VnS()), "sbclt z20.s, z0.s, z13.s"); 7474 COMPARE(sbclt(z20.VnD(), z0.VnD(), z13.VnD()), "sbclt z20.d, z0.d, z13.d"); 7475 7476 COMPARE_MACRO(Adclb(z25.VnS(), z25.VnS(), z17.VnS(), z24.VnS()), 7477 "adclb z25.s, z17.s, z24.s"); 7478 COMPARE_MACRO(Adclb(z25.VnS(), z20.VnS(), z17.VnS(), z24.VnS()), 7479 "movprfx z25, z20\n" 7480 "adclb z25.s, z17.s, z24.s"); 7481 COMPARE_MACRO(Adclb(z25.VnS(), z20.VnS(), z25.VnS(), z24.VnS()), 7482 "movprfx z31, z20\n" 7483 "adclb z31.s, z25.s, z24.s\n" 7484 "mov z25.d, z31.d"); 7485 COMPARE_MACRO(Adclb(z25.VnS(), z20.VnS(), z24.VnS(), z25.VnS()), 7486 "movprfx z31, z20\n" 7487 "adclb z31.s, z24.s, z25.s\n" 7488 "mov z25.d, z31.d"); 7489 COMPARE_MACRO(Adclb(z25.VnS(), z20.VnS(), z25.VnS(), z25.VnS()), 7490 "movprfx z31, z20\n" 7491 "adclb z31.s, z25.s, z25.s\n" 7492 "mov z25.d, z31.d"); 7493 COMPARE_MACRO(Adclt(z25.VnS(), z20.VnS(), z25.VnS(), z25.VnS()), 7494 "movprfx z31, z20\n" 7495 "adclt z31.s, z25.s, z25.s\n" 7496 "mov z25.d, z31.d"); 7497 7498 COMPARE_MACRO(Sbclb(z30.VnS(), z30.VnS(), z7.VnS(), z29.VnS()), 7499 "sbclb z30.s, z7.s, z29.s"); 7500 COMPARE_MACRO(Sbclb(z30.VnS(), z2.VnS(), z7.VnS(), z29.VnS()), 7501 "movprfx z30, z2\n" 7502 "sbclb z30.s, z7.s, z29.s"); 7503 COMPARE_MACRO(Sbclb(z30.VnS(), z2.VnS(), z30.VnS(), z29.VnS()), 7504 "movprfx z31, z2\n" 7505 "sbclb z31.s, z30.s, z29.s\n" 7506 "mov z30.d, z31.d"); 7507 COMPARE_MACRO(Sbclb(z30.VnS(), z2.VnS(), z29.VnS(), z30.VnS()), 7508 "movprfx z31, z2\n" 7509 "sbclb z31.s, z29.s, z30.s\n" 7510 "mov z30.d, z31.d"); 7511 COMPARE_MACRO(Sbclb(z30.VnS(), z2.VnS(), z30.VnS(), z30.VnS()), 7512 "movprfx z31, z2\n" 7513 "sbclb z31.s, z30.s, z30.s\n" 7514 "mov z30.d, z31.d"); 7515 COMPARE_MACRO(Sbclt(z30.VnS(), z2.VnS(), z30.VnS(), z30.VnS()), 7516 "movprfx z31, z2\n" 7517 "sbclt z31.s, z30.s, z30.s\n" 7518 "mov z30.d, z31.d"); 7519 CLEANUP(); 7520} 7521 7522TEST(sve2_add_sub_high) { 7523 SETUP(); 7524 7525 COMPARE_MACRO(Addhnb(z29.VnS(), z19.VnD(), z2.VnD()), 7526 "addhnb z29.s, z19.d, z2.d"); 7527 COMPARE_MACRO(Addhnb(z29.VnB(), z19.VnH(), z2.VnH()), 7528 "addhnb z29.b, z19.h, z2.h"); 7529 COMPARE_MACRO(Addhnb(z29.VnH(), z19.VnS(), z2.VnS()), 7530 "addhnb z29.h, z19.s, z2.s"); 7531 COMPARE_MACRO(Addhnt(z8.VnS(), z12.VnD(), z6.VnD()), 7532 "addhnt z8.s, z12.d, z6.d"); 7533 COMPARE_MACRO(Addhnt(z8.VnB(), z12.VnH(), z6.VnH()), 7534 "addhnt z8.b, z12.h, z6.h"); 7535 COMPARE_MACRO(Addhnt(z8.VnH(), z12.VnS(), z6.VnS()), 7536 "addhnt z8.h, z12.s, z6.s"); 7537 COMPARE_MACRO(Raddhnb(z0.VnS(), z11.VnD(), z10.VnD()), 7538 "raddhnb z0.s, z11.d, z10.d"); 7539 COMPARE_MACRO(Raddhnb(z0.VnB(), z11.VnH(), z10.VnH()), 7540 "raddhnb z0.b, z11.h, z10.h"); 7541 COMPARE_MACRO(Raddhnb(z0.VnH(), z11.VnS(), z10.VnS()), 7542 "raddhnb z0.h, z11.s, z10.s"); 7543 COMPARE_MACRO(Raddhnt(z23.VnS(), z27.VnD(), z9.VnD()), 7544 "raddhnt z23.s, z27.d, z9.d"); 7545 COMPARE_MACRO(Raddhnt(z23.VnB(), z27.VnH(), z9.VnH()), 7546 "raddhnt z23.b, z27.h, z9.h"); 7547 COMPARE_MACRO(Raddhnt(z23.VnH(), z27.VnS(), z9.VnS()), 7548 "raddhnt z23.h, z27.s, z9.s"); 7549 COMPARE_MACRO(Rsubhnb(z30.VnS(), z29.VnD(), z11.VnD()), 7550 "rsubhnb z30.s, z29.d, z11.d"); 7551 COMPARE_MACRO(Rsubhnb(z30.VnB(), z29.VnH(), z11.VnH()), 7552 "rsubhnb z30.b, z29.h, z11.h"); 7553 COMPARE_MACRO(Rsubhnb(z30.VnH(), z29.VnS(), z11.VnS()), 7554 "rsubhnb z30.h, z29.s, z11.s"); 7555 COMPARE_MACRO(Rsubhnt(z25.VnS(), z7.VnD(), z18.VnD()), 7556 "rsubhnt z25.s, z7.d, z18.d"); 7557 COMPARE_MACRO(Rsubhnt(z25.VnB(), z7.VnH(), z18.VnH()), 7558 "rsubhnt z25.b, z7.h, z18.h"); 7559 COMPARE_MACRO(Rsubhnt(z25.VnH(), z7.VnS(), z18.VnS()), 7560 "rsubhnt z25.h, z7.s, z18.s"); 7561 COMPARE_MACRO(Subhnb(z31.VnS(), z31.VnD(), z7.VnD()), 7562 "subhnb z31.s, z31.d, z7.d"); 7563 COMPARE_MACRO(Subhnb(z31.VnB(), z31.VnH(), z7.VnH()), 7564 "subhnb z31.b, z31.h, z7.h"); 7565 COMPARE_MACRO(Subhnb(z31.VnH(), z31.VnS(), z7.VnS()), 7566 "subhnb z31.h, z31.s, z7.s"); 7567 COMPARE_MACRO(Subhnt(z31.VnS(), z22.VnD(), z27.VnD()), 7568 "subhnt z31.s, z22.d, z27.d"); 7569 COMPARE_MACRO(Subhnt(z31.VnB(), z22.VnH(), z27.VnH()), 7570 "subhnt z31.b, z22.h, z27.h"); 7571 COMPARE_MACRO(Subhnt(z31.VnH(), z22.VnS(), z27.VnS()), 7572 "subhnt z31.h, z22.s, z27.s"); 7573 7574 CLEANUP(); 7575} 7576 7577TEST(sve2_complex_addition) { 7578 SETUP(); 7579 7580 COMPARE_MACRO(Cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90), 7581 "cadd z5.b, z5.b, z12.b, #90"); 7582 COMPARE_MACRO(Cadd(z5.VnD(), z5.VnD(), z12.VnD(), 90), 7583 "cadd z5.d, z5.d, z12.d, #90"); 7584 COMPARE_MACRO(Cadd(z5.VnH(), z5.VnH(), z12.VnH(), 90), 7585 "cadd z5.h, z5.h, z12.h, #90"); 7586 COMPARE_MACRO(Cadd(z5.VnS(), z5.VnS(), z12.VnS(), 90), 7587 "cadd z5.s, z5.s, z12.s, #90"); 7588 COMPARE_MACRO(Cadd(z5.VnB(), z5.VnB(), z12.VnB(), 270), 7589 "cadd z5.b, z5.b, z12.b, #270"); 7590 COMPARE_MACRO(Cadd(z5.VnD(), z5.VnD(), z12.VnD(), 270), 7591 "cadd z5.d, z5.d, z12.d, #270"); 7592 COMPARE_MACRO(Cadd(z5.VnH(), z5.VnH(), z12.VnH(), 270), 7593 "cadd z5.h, z5.h, z12.h, #270"); 7594 COMPARE_MACRO(Cadd(z5.VnS(), z5.VnS(), z12.VnS(), 270), 7595 "cadd z5.s, z5.s, z12.s, #270"); 7596 COMPARE_MACRO(Cadd(z5.VnS(), z6.VnS(), z12.VnS(), 270), 7597 "movprfx z5, z6\n" 7598 "cadd z5.s, z5.s, z12.s, #270"); 7599 COMPARE_MACRO(Cadd(z5.VnS(), z6.VnS(), z5.VnS(), 270), 7600 "mov z31.d, z5.d\n" 7601 "movprfx z5, z6\n" 7602 "cadd z5.s, z5.s, z31.s, #270"); 7603 7604 COMPARE_MACRO(Sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90), 7605 "sqcadd z20.b, z20.b, z23.b, #90"); 7606 COMPARE_MACRO(Sqcadd(z20.VnD(), z20.VnD(), z23.VnD(), 90), 7607 "sqcadd z20.d, z20.d, z23.d, #90"); 7608 COMPARE_MACRO(Sqcadd(z20.VnH(), z20.VnH(), z23.VnH(), 90), 7609 "sqcadd z20.h, z20.h, z23.h, #90"); 7610 COMPARE_MACRO(Sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 270), 7611 "sqcadd z20.b, z20.b, z23.b, #270"); 7612 COMPARE_MACRO(Sqcadd(z20.VnD(), z20.VnD(), z23.VnD(), 270), 7613 "sqcadd z20.d, z20.d, z23.d, #270"); 7614 COMPARE_MACRO(Sqcadd(z20.VnH(), z20.VnH(), z23.VnH(), 270), 7615 "sqcadd z20.h, z20.h, z23.h, #270"); 7616 COMPARE_MACRO(Sqcadd(z20.VnS(), z20.VnS(), z23.VnS(), 270), 7617 "sqcadd z20.s, z20.s, z23.s, #270"); 7618 COMPARE_MACRO(Sqcadd(z20.VnH(), z21.VnH(), z23.VnH(), 270), 7619 "movprfx z20, z21\n" 7620 "sqcadd z20.h, z20.h, z23.h, #270"); 7621 COMPARE_MACRO(Sqcadd(z20.VnH(), z21.VnH(), z20.VnH(), 270), 7622 "mov z31.d, z20.d\n" 7623 "movprfx z20, z21\n" 7624 "sqcadd z20.h, z20.h, z31.h, #270"); 7625 7626 CLEANUP(); 7627} 7628 7629TEST(sve2_bit_permute) { 7630 SETUP(); 7631 7632 COMPARE_MACRO(Bdep(z18.VnB(), z10.VnB(), z0.VnB()), 7633 "bdep z18.b, z10.b, z0.b"); 7634 COMPARE_MACRO(Bdep(z18.VnD(), z10.VnD(), z0.VnD()), 7635 "bdep z18.d, z10.d, z0.d"); 7636 COMPARE_MACRO(Bdep(z18.VnH(), z10.VnH(), z0.VnH()), 7637 "bdep z18.h, z10.h, z0.h"); 7638 COMPARE_MACRO(Bdep(z18.VnS(), z10.VnS(), z0.VnS()), 7639 "bdep z18.s, z10.s, z0.s"); 7640 COMPARE_MACRO(Bext(z6.VnB(), z2.VnB(), z5.VnB()), "bext z6.b, z2.b, z5.b"); 7641 COMPARE_MACRO(Bext(z6.VnD(), z2.VnD(), z5.VnD()), "bext z6.d, z2.d, z5.d"); 7642 COMPARE_MACRO(Bext(z6.VnH(), z2.VnH(), z5.VnH()), "bext z6.h, z2.h, z5.h"); 7643 COMPARE_MACRO(Bext(z6.VnS(), z2.VnS(), z5.VnS()), "bext z6.s, z2.s, z5.s"); 7644 COMPARE_MACRO(Bgrp(z24.VnB(), z9.VnB(), z5.VnB()), "bgrp z24.b, z9.b, z5.b"); 7645 COMPARE_MACRO(Bgrp(z24.VnD(), z9.VnD(), z5.VnD()), "bgrp z24.d, z9.d, z5.d"); 7646 COMPARE_MACRO(Bgrp(z24.VnH(), z9.VnH(), z5.VnH()), "bgrp z24.h, z9.h, z5.h"); 7647 COMPARE_MACRO(Bgrp(z24.VnS(), z9.VnS(), z5.VnS()), "bgrp z24.s, z9.s, z5.s"); 7648 7649 CLEANUP(); 7650} 7651 7652TEST(sve2_integer_multiply_long_vector) { 7653 SETUP(); 7654 7655 COMPARE(sqdmullb(z1.VnD(), z31.VnS(), z21.VnS()), 7656 "sqdmullb z1.d, z31.s, z21.s"); 7657 COMPARE(sqdmullb(z2.VnH(), z30.VnB(), z22.VnB()), 7658 "sqdmullb z2.h, z30.b, z22.b"); 7659 COMPARE(sqdmullb(z3.VnS(), z29.VnH(), z23.VnH()), 7660 "sqdmullb z3.s, z29.h, z23.h"); 7661 COMPARE(sqdmullb(z1.VnS(), z27.VnH(), z3.VnH(), 7), 7662 "sqdmullb z1.s, z27.h, z3.h[7]"); 7663 COMPARE(sqdmullb(z27.VnD(), z16.VnS(), z5.VnS(), 3), 7664 "sqdmullb z27.d, z16.s, z5.s[3]"); 7665 7666 COMPARE(sqdmullt(z2.VnD(), z1.VnS(), z5.VnS()), "sqdmullt z2.d, z1.s, z5.s"); 7667 COMPARE(sqdmullt(z12.VnH(), z11.VnB(), z15.VnB()), 7668 "sqdmullt z12.h, z11.b, z15.b"); 7669 COMPARE(sqdmullt(z20.VnS(), z21.VnH(), z25.VnH()), 7670 "sqdmullt z20.s, z21.h, z25.h"); 7671 COMPARE(sqdmullt(z23.VnS(), z28.VnH(), z2.VnH(), 0), 7672 "sqdmullt z23.s, z28.h, z2.h[0]"); 7673 COMPARE(sqdmullt(z7.VnD(), z4.VnS(), z0.VnS(), 0), 7674 "sqdmullt z7.d, z4.s, z0.s[0]"); 7675 7676 // Feature `SVEPmull128` is not supported. 7677 // COMPARE(pmullb(z12.VnQ(), z21.VnD(), z12.VnD()), 7678 // "pmullb z12.q, z21.d, z12.d"); 7679 COMPARE(pmullb(z12.VnH(), z21.VnB(), z12.VnB()), 7680 "pmullb z12.h, z21.b, z12.b"); 7681 COMPARE(pmullt(z31.VnD(), z30.VnS(), z26.VnS()), 7682 "pmullt z31.d, z30.s, z26.s"); 7683 7684 COMPARE(smullb(z10.VnD(), z4.VnS(), z4.VnS()), "smullb z10.d, z4.s, z4.s"); 7685 COMPARE(smullb(z11.VnH(), z14.VnB(), z14.VnB()), 7686 "smullb z11.h, z14.b, z14.b"); 7687 COMPARE(smullb(z12.VnS(), z24.VnH(), z24.VnH()), 7688 "smullb z12.s, z24.h, z24.h"); 7689 7690 COMPARE(smullt(z31.VnD(), z26.VnS(), z5.VnS()), "smullt z31.d, z26.s, z5.s"); 7691 COMPARE(smullt(z21.VnH(), z16.VnB(), z5.VnB()), "smullt z21.h, z16.b, z5.b"); 7692 COMPARE(smullt(z11.VnS(), z6.VnH(), z5.VnH()), "smullt z11.s, z6.h, z5.h"); 7693 7694 COMPARE(umullb(z12.VnD(), z5.VnS(), z2.VnS()), "umullb z12.d, z5.s, z2.s"); 7695 COMPARE(umullb(z12.VnH(), z15.VnB(), z12.VnB()), 7696 "umullb z12.h, z15.b, z12.b"); 7697 COMPARE(umullb(z12.VnS(), z25.VnH(), z22.VnH()), 7698 "umullb z12.s, z25.h, z22.h"); 7699 7700 COMPARE(umullt(z24.VnD(), z6.VnS(), z6.VnS()), "umullt z24.d, z6.s, z6.s"); 7701 COMPARE(umullt(z24.VnH(), z7.VnB(), z16.VnB()), "umullt z24.h, z7.b, z16.b"); 7702 COMPARE(umullt(z24.VnS(), z8.VnH(), z26.VnH()), "umullt z24.s, z8.h, z26.h"); 7703 7704 CLEANUP(); 7705} 7706 7707TEST(sve2_xar) { 7708 SETUP(); 7709 7710 COMPARE_MACRO(Xar(z16.VnB(), z16.VnB(), z13.VnB(), 1), 7711 "xar z16.b, z16.b, z13.b, #1"); 7712 COMPARE_MACRO(Xar(z16.VnB(), z16.VnB(), z13.VnB(), 8), 7713 "xar z16.b, z16.b, z13.b, #8"); 7714 COMPARE_MACRO(Xar(z16.VnH(), z16.VnH(), z13.VnH(), 1), 7715 "xar z16.h, z16.h, z13.h, #1"); 7716 COMPARE_MACRO(Xar(z16.VnH(), z16.VnH(), z13.VnH(), 16), 7717 "xar z16.h, z16.h, z13.h, #16"); 7718 COMPARE_MACRO(Xar(z16.VnS(), z16.VnS(), z13.VnS(), 1), 7719 "xar z16.s, z16.s, z13.s, #1"); 7720 COMPARE_MACRO(Xar(z16.VnS(), z16.VnS(), z13.VnS(), 32), 7721 "xar z16.s, z16.s, z13.s, #32"); 7722 COMPARE_MACRO(Xar(z16.VnD(), z16.VnD(), z13.VnD(), 1), 7723 "xar z16.d, z16.d, z13.d, #1"); 7724 COMPARE_MACRO(Xar(z16.VnD(), z16.VnD(), z13.VnD(), 64), 7725 "xar z16.d, z16.d, z13.d, #64"); 7726 7727 COMPARE_MACRO(Xar(z16.VnD(), z13.VnD(), z16.VnD(), 64), 7728 "xar z16.d, z16.d, z13.d, #64"); 7729 COMPARE_MACRO(Xar(z16.VnD(), z13.VnD(), z12.VnD(), 64), 7730 "movprfx z16, z13\n" 7731 "xar z16.d, z16.d, z12.d, #64"); 7732 COMPARE_MACRO(Xar(z16.VnD(), z16.VnD(), z16.VnD(), 64), 7733 "xar z16.d, z16.d, z16.d, #64"); 7734 7735 CLEANUP(); 7736} 7737 7738TEST(sve2_histogram) { 7739 SETUP(); 7740 7741 COMPARE_MACRO(Histcnt(z24.VnS(), p6.Zeroing(), z3.VnS(), z10.VnS()), 7742 "histcnt z24.s, p6/z, z3.s, z10.s"); 7743 COMPARE_MACRO(Histcnt(z24.VnD(), p6.Zeroing(), z3.VnD(), z10.VnD()), 7744 "histcnt z24.d, p6/z, z3.d, z10.d"); 7745 COMPARE_MACRO(Histseg(z22.VnB(), z14.VnB(), z8.VnB()), 7746 "histseg z22.b, z14.b, z8.b"); 7747 7748 CLEANUP(); 7749} 7750 7751TEST(sve2_table) { 7752 SETUP(); 7753 7754 COMPARE_MACRO(Tbl(z17.VnB(), z1.VnB(), z2.VnB(), z22.VnB()), 7755 "tbl z17.b, {z3.b, z4.b}, z22.b"); 7756 COMPARE_MACRO(Tbl(z17.VnD(), z1.VnD(), z2.VnD(), z22.VnD()), 7757 "tbl z17.d, {z3.d, z4.d}, z22.d"); 7758 COMPARE_MACRO(Tbl(z17.VnH(), z1.VnH(), z2.VnH(), z22.VnH()), 7759 "tbl z17.h, {z3.h, z4.h}, z22.h"); 7760 COMPARE_MACRO(Tbl(z17.VnS(), z31.VnS(), z0.VnS(), z22.VnS()), 7761 "tbl z17.s, {z31.s, z0.s}, z22.s"); 7762 COMPARE_MACRO(Tbx(z22.VnB(), z15.VnB(), z19.VnB()), 7763 "tbx z22.b, z15.b, z19.b"); 7764 COMPARE_MACRO(Tbx(z22.VnD(), z15.VnD(), z19.VnD()), 7765 "tbx z22.d, z15.d, z19.d"); 7766 COMPARE_MACRO(Tbx(z22.VnH(), z15.VnH(), z19.VnH()), 7767 "tbx z22.h, z15.h, z19.h"); 7768 COMPARE_MACRO(Tbx(z22.VnS(), z15.VnS(), z19.VnS()), 7769 "tbx z22.s, z15.s, z19.s"); 7770 7771 CLEANUP(); 7772} 7773 7774TEST(sve2_cdot) { 7775 SETUP(); 7776 7777 COMPARE_MACRO(Cdot(z7.VnS(), z7.VnS(), z4.VnB(), z10.VnB(), 0), 7778 "cdot z7.s, z4.b, z10.b, #0"); 7779 COMPARE_MACRO(Cdot(z7.VnD(), z7.VnD(), z4.VnH(), z10.VnH(), 0), 7780 "cdot z7.d, z4.h, z10.h, #0"); 7781 COMPARE_MACRO(Cdot(z7.VnS(), z7.VnS(), z4.VnB(), z10.VnB(), 90), 7782 "cdot z7.s, z4.b, z10.b, #90"); 7783 COMPARE_MACRO(Cdot(z7.VnD(), z7.VnD(), z4.VnH(), z10.VnH(), 90), 7784 "cdot z7.d, z4.h, z10.h, #90"); 7785 COMPARE_MACRO(Cdot(z7.VnS(), z7.VnS(), z4.VnB(), z10.VnB(), 180), 7786 "cdot z7.s, z4.b, z10.b, #180"); 7787 COMPARE_MACRO(Cdot(z7.VnD(), z7.VnD(), z4.VnH(), z10.VnH(), 180), 7788 "cdot z7.d, z4.h, z10.h, #180"); 7789 COMPARE_MACRO(Cdot(z7.VnS(), z7.VnS(), z4.VnB(), z10.VnB(), 270), 7790 "cdot z7.s, z4.b, z10.b, #270"); 7791 COMPARE_MACRO(Cdot(z7.VnD(), z7.VnD(), z4.VnH(), z10.VnH(), 270), 7792 "cdot z7.d, z4.h, z10.h, #270"); 7793 7794 COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z3.VnB(), 0), 7795 "movprfx z0, z1\n" 7796 "cdot z0.s, z2.b, z3.b, #0"); 7797 COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z3.VnB(), 0), 7798 "mov z31.d, z0.d\n" 7799 "movprfx z0, z1\n" 7800 "cdot z0.s, z31.b, z3.b, #0"); 7801 COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z0.VnB(), 0), 7802 "mov z31.d, z0.d\n" 7803 "movprfx z0, z1\n" 7804 "cdot z0.s, z2.b, z31.b, #0"); 7805 COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z0.VnB(), 0), 7806 "mov z31.d, z0.d\n" 7807 "movprfx z0, z1\n" 7808 "cdot z0.s, z31.b, z31.b, #0"); 7809 7810 COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 0, 0), 7811 "cdot z18.s, z26.b, z7.b[0], #0"); 7812 COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 1, 0), 7813 "cdot z18.s, z26.b, z7.b[1], #0"); 7814 COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 0), 7815 "cdot z18.s, z26.b, z7.b[2], #0"); 7816 COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 3, 0), 7817 "cdot z18.s, z26.b, z7.b[3], #0"); 7818 COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 90), 7819 "cdot z18.s, z26.b, z7.b[2], #90"); 7820 COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 180), 7821 "cdot z18.s, z26.b, z7.b[2], #180"); 7822 COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 270), 7823 "cdot z18.s, z26.b, z7.b[2], #270"); 7824 COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 0, 0), 7825 "cdot z5.d, z7.h, z1.h[0], #0"); 7826 COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 0), 7827 "cdot z5.d, z7.h, z1.h[1], #0"); 7828 COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 90), 7829 "cdot z5.d, z7.h, z1.h[1], #90"); 7830 COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 180), 7831 "cdot z5.d, z7.h, z1.h[1], #180"); 7832 COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 270), 7833 "cdot z5.d, z7.h, z1.h[1], #270"); 7834 7835 COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z3.VnB(), 0, 0), 7836 "movprfx z0, z1\n" 7837 "cdot z0.s, z2.b, z3.b[0], #0"); 7838 COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z3.VnB(), 1, 90), 7839 "movprfx z31, z1\n" 7840 "cdot z31.s, z0.b, z3.b[1], #90\n" 7841 "mov z0.d, z31.d"); 7842 COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z0.VnB(), 2, 180), 7843 "movprfx z31, z1\n" 7844 "cdot z31.s, z2.b, z0.b[2], #180\n" 7845 "mov z0.d, z31.d"); 7846 COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z0.VnB(), 3, 270), 7847 "movprfx z31, z1\n" 7848 "cdot z31.s, z0.b, z0.b[3], #270\n" 7849 "mov z0.d, z31.d"); 7850 7851 CLEANUP(); 7852} 7853 7854TEST(sve2_ldnt1) { 7855 SETUP(); 7856 7857 COMPARE_MACRO(Ldnt1b(z24.VnS(), p4.Zeroing(), SVEMemOperand(z18.VnS(), x13)), 7858 "ldnt1b {z24.s}, p4/z, [z18.s, x13]"); 7859 COMPARE_MACRO(Ldnt1h(z3.VnS(), p4.Zeroing(), SVEMemOperand(z15.VnS(), x14)), 7860 "ldnt1h {z3.s}, p4/z, [z15.s, x14]"); 7861 COMPARE_MACRO(Ldnt1sb(z7.VnS(), p3.Zeroing(), SVEMemOperand(z18.VnS(), x11)), 7862 "ldnt1sb {z7.s}, p3/z, [z18.s, x11]"); 7863 COMPARE_MACRO(Ldnt1sh(z17.VnS(), p5.Zeroing(), SVEMemOperand(z31.VnS(), x19)), 7864 "ldnt1sh {z17.s}, p5/z, [z31.s, x19]"); 7865 COMPARE_MACRO(Ldnt1w(z18.VnS(), p5.Zeroing(), SVEMemOperand(z9.VnS(), x17)), 7866 "ldnt1w {z18.s}, p5/z, [z9.s, x17]"); 7867 7868 COMPARE_MACRO(Ldnt1b(z27.VnD(), p4.Zeroing(), SVEMemOperand(z27.VnD(), x24)), 7869 "ldnt1b {z27.d}, p4/z, [z27.d, x24]"); 7870 COMPARE_MACRO(Ldnt1d(z25.VnD(), p0.Zeroing(), SVEMemOperand(z10.VnD(), x0)), 7871 "ldnt1d {z25.d}, p0/z, [z10.d, x0]"); 7872 COMPARE_MACRO(Ldnt1h(z16.VnD(), p2.Zeroing(), SVEMemOperand(z10.VnD(), x9)), 7873 "ldnt1h {z16.d}, p2/z, [z10.d, x9]"); 7874 COMPARE_MACRO(Ldnt1sb(z25.VnD(), p0.Zeroing(), SVEMemOperand(z0.VnD(), x3)), 7875 "ldnt1sb {z25.d}, p0/z, [z0.d, x3]"); 7876 COMPARE_MACRO(Ldnt1sh(z4.VnD(), p1.Zeroing(), SVEMemOperand(z31.VnD(), x4)), 7877 "ldnt1sh {z4.d}, p1/z, [z31.d, x4]"); 7878 COMPARE_MACRO(Ldnt1sw(z3.VnD(), p7.Zeroing(), SVEMemOperand(z1.VnD(), x10)), 7879 "ldnt1sw {z3.d}, p7/z, [z1.d, x10]"); 7880 COMPARE_MACRO(Ldnt1w(z17.VnD(), p5.Zeroing(), SVEMemOperand(z8.VnD(), x12)), 7881 "ldnt1w {z17.d}, p5/z, [z8.d, x12]"); 7882 7883 CLEANUP(); 7884} 7885 7886TEST(sve2_stnt1) { 7887 SETUP(); 7888 7889 COMPARE_MACRO(Stnt1b(z29.VnD(), p7, SVEMemOperand(z29.VnD(), x21)), 7890 "stnt1b {z29.d}, p7, [z29.d, x21]"); 7891 COMPARE_MACRO(Stnt1d(z19.VnD(), p4, SVEMemOperand(z3.VnD(), x16)), 7892 "stnt1d {z19.d}, p4, [z3.d, x16]"); 7893 COMPARE_MACRO(Stnt1h(z11.VnS(), p3, SVEMemOperand(z2.VnS(), x16)), 7894 "stnt1h {z11.s}, p3, [z2.s, x16]"); 7895 COMPARE_MACRO(Stnt1h(z3.VnD(), p3, SVEMemOperand(z10.VnD(), x16)), 7896 "stnt1h {z3.d}, p3, [z10.d, x16]"); 7897 COMPARE_MACRO(Stnt1w(z11.VnS(), p4, SVEMemOperand(z14.VnS(), x15)), 7898 "stnt1w {z11.s}, p4, [z14.s, x15]"); 7899 COMPARE_MACRO(Stnt1w(z7.VnD(), p0, SVEMemOperand(z11.VnD(), x10)), 7900 "stnt1w {z7.d}, p0, [z11.d, x10]"); 7901 7902 CLEANUP(); 7903} 7904 7905TEST(sve2_bitwise_ternary) { 7906 SETUP(); 7907 7908 COMPARE_MACRO(Bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD()), 7909 "bcax z6.d, z6.d, z12.d, z1.d"); 7910 COMPARE_MACRO(Bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD()), 7911 "bsl z21.d, z21.d, z2.d, z2.d"); 7912 COMPARE_MACRO(Bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD()), 7913 "bsl1n z18.d, z18.d, z8.d, z7.d"); 7914 COMPARE_MACRO(Bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD()), 7915 "bsl2n z7.d, z7.d, z3.d, z19.d"); 7916 COMPARE_MACRO(Eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD()), 7917 "eor3 z10.d, z10.d, z24.d, z23.d"); 7918 COMPARE_MACRO(Nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD()), 7919 "nbsl z17.d, z17.d, z21.d, z27.d"); 7920 7921 COMPARE_MACRO(Nbsl(z17.VnD(), z18.VnD(), z21.VnD(), z27.VnD()), 7922 "movprfx z17, z18\n" 7923 "nbsl z17.d, z17.d, z21.d, z27.d"); 7924 COMPARE_MACRO(Nbsl(z17.VnD(), z18.VnD(), z17.VnD(), z27.VnD()), 7925 "movprfx z31, z18\n" 7926 "nbsl z31.d, z31.d, z17.d, z27.d\n" 7927 "mov z17.d, z31.d"); 7928 COMPARE_MACRO(Nbsl(z17.VnD(), z18.VnD(), z21.VnD(), z17.VnD()), 7929 "movprfx z31, z18\n" 7930 "nbsl z31.d, z31.d, z21.d, z17.d\n" 7931 "mov z17.d, z31.d"); 7932 COMPARE_MACRO(Nbsl(z17.VnD(), z18.VnD(), z17.VnD(), z17.VnD()), 7933 "movprfx z31, z18\n" 7934 "nbsl z31.d, z31.d, z17.d, z17.d\n" 7935 "mov z17.d, z31.d"); 7936 7937 CLEANUP(); 7938} 7939 7940TEST(sve2_int_compare_scalars) { 7941 SETUP(); 7942 7943 COMPARE_MACRO(Whilege(p0.VnB(), w20, w29), "whilege p0.b, w20, w29"); 7944 COMPARE_MACRO(Whilege(p0.VnB(), x20, x29), "whilege p0.b, x20, x29"); 7945 COMPARE_MACRO(Whilege(p0.VnD(), w20, w29), "whilege p0.d, w20, w29"); 7946 COMPARE_MACRO(Whilege(p0.VnD(), x20, x29), "whilege p0.d, x20, x29"); 7947 COMPARE_MACRO(Whilege(p0.VnH(), w20, w29), "whilege p0.h, w20, w29"); 7948 COMPARE_MACRO(Whilege(p0.VnH(), x20, x29), "whilege p0.h, x20, x29"); 7949 COMPARE_MACRO(Whilege(p0.VnS(), w20, w29), "whilege p0.s, w20, w29"); 7950 COMPARE_MACRO(Whilege(p0.VnS(), x20, x29), "whilege p0.s, x20, x29"); 7951 COMPARE_MACRO(Whilegt(p11.VnB(), w24, w3), "whilegt p11.b, w24, w3"); 7952 COMPARE_MACRO(Whilegt(p11.VnD(), w24, w3), "whilegt p11.d, w24, w3"); 7953 COMPARE_MACRO(Whilegt(p11.VnH(), x24, x3), "whilegt p11.h, x24, x3"); 7954 COMPARE_MACRO(Whilegt(p11.VnS(), x24, x3), "whilegt p11.s, x24, x3"); 7955 COMPARE_MACRO(Whilehi(p2.VnB(), x20, x8), "whilehi p2.b, x20, x8"); 7956 COMPARE_MACRO(Whilehi(p2.VnD(), x20, x8), "whilehi p2.d, x20, x8"); 7957 COMPARE_MACRO(Whilehi(p2.VnH(), w20, w8), "whilehi p2.h, w20, w8"); 7958 COMPARE_MACRO(Whilehi(p2.VnS(), w20, w8), "whilehi p2.s, w20, w8"); 7959 COMPARE_MACRO(Whilehs(p4.VnB(), w22, w9), "whilehs p4.b, w22, w9"); 7960 COMPARE_MACRO(Whilehs(p4.VnD(), x22, x9), "whilehs p4.d, x22, x9"); 7961 COMPARE_MACRO(Whilehs(p4.VnH(), w22, w9), "whilehs p4.h, w22, w9"); 7962 COMPARE_MACRO(Whilehs(p4.VnS(), x22, x9), "whilehs p4.s, x22, x9"); 7963 7964 COMPARE_MACRO(Whilerw(p7.VnB(), x25, x27), "whilerw p7.b, x25, x27"); 7965 COMPARE_MACRO(Whilerw(p7.VnD(), x25, x28), "whilerw p7.d, x25, x28"); 7966 COMPARE_MACRO(Whilerw(p7.VnH(), x25, x29), "whilerw p7.h, x25, x29"); 7967 COMPARE_MACRO(Whilerw(p7.VnS(), x25, x30), "whilerw p7.s, x25, x30"); 7968 COMPARE_MACRO(Whilerw(p7.VnS(), x25, xzr), "whilerw p7.s, x25, xzr"); 7969 COMPARE_MACRO(Whilewr(p8.VnB(), x14, x14), "whilewr p8.b, x14, x14"); 7970 COMPARE_MACRO(Whilewr(p8.VnD(), x14, x13), "whilewr p8.d, x14, x13"); 7971 COMPARE_MACRO(Whilewr(p8.VnH(), x14, x12), "whilewr p8.h, x14, x12"); 7972 COMPARE_MACRO(Whilewr(p8.VnS(), x14, x11), "whilewr p8.s, x14, x11"); 7973 COMPARE_MACRO(Whilewr(p8.VnS(), xzr, x11), "whilewr p8.s, xzr, x11"); 7974 7975 CLEANUP(); 7976} 7977 7978TEST(sve2_splice) { 7979 SETUP(); 7980 7981 COMPARE_MACRO(Splice(z31.VnB(), p0, z21.VnB(), z22.VnB()), 7982 "splice z31.b, p0, {z21.b, z22.b}"); 7983 COMPARE_MACRO(Splice(z31.VnD(), p0, z21.VnD(), z22.VnD()), 7984 "splice z31.d, p0, {z21.d, z22.d}"); 7985 COMPARE_MACRO(Splice(z31.VnH(), p0, z21.VnH(), z22.VnH()), 7986 "splice z31.h, p0, {z21.h, z22.h}"); 7987 COMPARE_MACRO(Splice(z31.VnS(), p0, z31.VnS(), z0.VnS()), 7988 "splice z31.s, p0, z31.s, z0.s"); 7989 COMPARE_MACRO(Splice(z30.VnS(), p0, z31.VnS(), z0.VnS()), 7990 "splice z30.s, p0, {z31.s, z0.s}"); 7991 7992 CLEANUP(); 7993} 7994 7995TEST(sve2_mul_index) { 7996 SETUP(); 7997 7998 COMPARE_MACRO(Mul(z18.VnH(), z5.VnH(), z7.VnH(), 0), 7999 "mul z18.h, z5.h, z7.h[0]"); 8000 COMPARE_MACRO(Mul(z18.VnH(), z5.VnH(), z2.VnH(), 2), 8001 "mul z18.h, z5.h, z2.h[2]"); 8002 COMPARE_MACRO(Mul(z18.VnH(), z5.VnH(), z2.VnH(), 6), 8003 "mul z18.h, z5.h, z2.h[6]"); 8004 COMPARE_MACRO(Mul(z18.VnH(), z5.VnH(), z2.VnH(), 7), 8005 "mul z18.h, z5.h, z2.h[7]"); 8006 COMPARE_MACRO(Mul(z8.VnS(), z15.VnS(), z7.VnS(), 0), 8007 "mul z8.s, z15.s, z7.s[0]"); 8008 COMPARE_MACRO(Mul(z8.VnS(), z15.VnS(), z0.VnS(), 3), 8009 "mul z8.s, z15.s, z0.s[3]"); 8010 COMPARE_MACRO(Mul(z8.VnD(), z15.VnD(), z15.VnD(), 0), 8011 "mul z8.d, z15.d, z15.d[0]"); 8012 COMPARE_MACRO(Mul(z8.VnD(), z15.VnD(), z0.VnD(), 1), 8013 "mul z8.d, z15.d, z0.d[1]"); 8014 8015 CLEANUP(); 8016} 8017 8018TEST(sve2_mla_mls_index) { 8019 SETUP(); 8020 8021 COMPARE_MACRO(Mla(z1.VnH(), z1.VnH(), z9.VnH(), z0.VnH(), 0), 8022 "mla z1.h, z9.h, z0.h[0]"); 8023 COMPARE_MACRO(Mla(z1.VnH(), z1.VnH(), z9.VnH(), z1.VnH(), 2), 8024 "mla z1.h, z9.h, z1.h[2]"); 8025 COMPARE_MACRO(Mla(z1.VnH(), z1.VnH(), z9.VnH(), z2.VnH(), 6), 8026 "mla z1.h, z9.h, z2.h[6]"); 8027 COMPARE_MACRO(Mla(z1.VnH(), z1.VnH(), z9.VnH(), z3.VnH(), 7), 8028 "mla z1.h, z9.h, z3.h[7]"); 8029 COMPARE_MACRO(Mla(z10.VnS(), z10.VnS(), z22.VnS(), z7.VnS(), 0), 8030 "mla z10.s, z22.s, z7.s[0]"); 8031 COMPARE_MACRO(Mla(z10.VnS(), z10.VnS(), z22.VnS(), z0.VnS(), 3), 8032 "mla z10.s, z22.s, z0.s[3]"); 8033 COMPARE_MACRO(Mla(z4.VnD(), z4.VnD(), z0.VnD(), z15.VnD(), 0), 8034 "mla z4.d, z0.d, z15.d[0]"); 8035 COMPARE_MACRO(Mla(z4.VnD(), z4.VnD(), z0.VnD(), z0.VnD(), 1), 8036 "mla z4.d, z0.d, z0.d[1]"); 8037 8038 COMPARE_MACRO(Mla(z4.VnH(), z5.VnH(), z0.VnH(), z1.VnH(), 0), 8039 "movprfx z4, z5\n" 8040 "mla z4.h, z0.h, z1.h[0]"); 8041 COMPARE_MACRO(Mla(z4.VnH(), z5.VnH(), z4.VnH(), z1.VnH(), 0), 8042 "movprfx z31, z5\n" 8043 "mla z31.h, z4.h, z1.h[0]\n" 8044 "mov z4.d, z31.d"); 8045 COMPARE_MACRO(Mla(z4.VnH(), z5.VnH(), z0.VnH(), z4.VnH(), 0), 8046 "movprfx z31, z5\n" 8047 "mla z31.h, z0.h, z4.h[0]\n" 8048 "mov z4.d, z31.d"); 8049 COMPARE_MACRO(Mla(z4.VnH(), z5.VnH(), z4.VnH(), z4.VnH(), 0), 8050 "movprfx z31, z5\n" 8051 "mla z31.h, z4.h, z4.h[0]\n" 8052 "mov z4.d, z31.d"); 8053 8054 COMPARE_MACRO(Mls(z1.VnH(), z1.VnH(), z9.VnH(), z0.VnH(), 0), 8055 "mls z1.h, z9.h, z0.h[0]"); 8056 COMPARE_MACRO(Mls(z1.VnH(), z1.VnH(), z9.VnH(), z1.VnH(), 2), 8057 "mls z1.h, z9.h, z1.h[2]"); 8058 COMPARE_MACRO(Mls(z1.VnH(), z1.VnH(), z9.VnH(), z2.VnH(), 6), 8059 "mls z1.h, z9.h, z2.h[6]"); 8060 COMPARE_MACRO(Mls(z1.VnH(), z1.VnH(), z9.VnH(), z3.VnH(), 7), 8061 "mls z1.h, z9.h, z3.h[7]"); 8062 COMPARE_MACRO(Mls(z10.VnS(), z10.VnS(), z22.VnS(), z7.VnS(), 0), 8063 "mls z10.s, z22.s, z7.s[0]"); 8064 COMPARE_MACRO(Mls(z10.VnS(), z10.VnS(), z22.VnS(), z0.VnS(), 3), 8065 "mls z10.s, z22.s, z0.s[3]"); 8066 COMPARE_MACRO(Mls(z4.VnD(), z4.VnD(), z0.VnD(), z15.VnD(), 0), 8067 "mls z4.d, z0.d, z15.d[0]"); 8068 COMPARE_MACRO(Mls(z4.VnD(), z4.VnD(), z0.VnD(), z0.VnD(), 1), 8069 "mls z4.d, z0.d, z0.d[1]"); 8070 8071 COMPARE_MACRO(Mls(z4.VnS(), z5.VnS(), z0.VnS(), z1.VnS(), 0), 8072 "movprfx z4, z5\n" 8073 "mls z4.s, z0.s, z1.s[0]"); 8074 COMPARE_MACRO(Mls(z4.VnS(), z5.VnS(), z4.VnS(), z1.VnS(), 0), 8075 "movprfx z31, z5\n" 8076 "mls z31.s, z4.s, z1.s[0]\n" 8077 "mov z4.d, z31.d"); 8078 COMPARE_MACRO(Mls(z4.VnS(), z5.VnS(), z0.VnS(), z4.VnS(), 0), 8079 "movprfx z31, z5\n" 8080 "mls z31.s, z0.s, z4.s[0]\n" 8081 "mov z4.d, z31.d"); 8082 COMPARE_MACRO(Mls(z4.VnS(), z5.VnS(), z4.VnS(), z4.VnS(), 0), 8083 "movprfx z31, z5\n" 8084 "mls z31.s, z4.s, z4.s[0]\n" 8085 "mov z4.d, z31.d"); 8086 8087 CLEANUP(); 8088} 8089 8090TEST(sve2_mla_long) { 8091 SETUP(); 8092 8093 COMPARE_MACRO(Smlalb(z1.VnD(), z1.VnD(), z3.VnS(), z23.VnS()), 8094 "smlalb z1.d, z3.s, z23.s"); 8095 COMPARE_MACRO(Smlalb(z1.VnH(), z1.VnH(), z3.VnB(), z23.VnB()), 8096 "smlalb z1.h, z3.b, z23.b"); 8097 COMPARE_MACRO(Smlalb(z1.VnS(), z1.VnS(), z3.VnH(), z23.VnH()), 8098 "smlalb z1.s, z3.h, z23.h"); 8099 COMPARE_MACRO(Smlalt(z31.VnD(), z31.VnD(), z24.VnS(), z29.VnS()), 8100 "smlalt z31.d, z24.s, z29.s"); 8101 COMPARE_MACRO(Smlalt(z31.VnH(), z31.VnH(), z24.VnB(), z29.VnB()), 8102 "smlalt z31.h, z24.b, z29.b"); 8103 COMPARE_MACRO(Smlalt(z31.VnS(), z31.VnS(), z24.VnH(), z29.VnH()), 8104 "smlalt z31.s, z24.h, z29.h"); 8105 COMPARE_MACRO(Smlslb(z5.VnD(), z5.VnD(), z26.VnS(), z27.VnS()), 8106 "smlslb z5.d, z26.s, z27.s"); 8107 COMPARE_MACRO(Smlslb(z5.VnH(), z5.VnH(), z26.VnB(), z27.VnB()), 8108 "smlslb z5.h, z26.b, z27.b"); 8109 COMPARE_MACRO(Smlslb(z5.VnS(), z5.VnS(), z26.VnH(), z27.VnH()), 8110 "smlslb z5.s, z26.h, z27.h"); 8111 COMPARE_MACRO(Smlslt(z23.VnD(), z23.VnD(), z24.VnS(), z25.VnS()), 8112 "smlslt z23.d, z24.s, z25.s"); 8113 COMPARE_MACRO(Smlslt(z23.VnH(), z23.VnH(), z24.VnB(), z25.VnB()), 8114 "smlslt z23.h, z24.b, z25.b"); 8115 COMPARE_MACRO(Smlslt(z23.VnS(), z23.VnS(), z24.VnH(), z25.VnH()), 8116 "smlslt z23.s, z24.h, z25.h"); 8117 COMPARE_MACRO(Umlalb(z31.VnD(), z31.VnD(), z9.VnS(), z21.VnS()), 8118 "umlalb z31.d, z9.s, z21.s"); 8119 COMPARE_MACRO(Umlalb(z31.VnH(), z31.VnH(), z9.VnB(), z21.VnB()), 8120 "umlalb z31.h, z9.b, z21.b"); 8121 COMPARE_MACRO(Umlalb(z31.VnS(), z31.VnS(), z9.VnH(), z21.VnH()), 8122 "umlalb z31.s, z9.h, z21.h"); 8123 COMPARE_MACRO(Umlalt(z11.VnD(), z11.VnD(), z5.VnS(), z22.VnS()), 8124 "umlalt z11.d, z5.s, z22.s"); 8125 COMPARE_MACRO(Umlalt(z11.VnH(), z11.VnH(), z5.VnB(), z22.VnB()), 8126 "umlalt z11.h, z5.b, z22.b"); 8127 COMPARE_MACRO(Umlalt(z11.VnS(), z11.VnS(), z5.VnH(), z22.VnH()), 8128 "umlalt z11.s, z5.h, z22.h"); 8129 COMPARE_MACRO(Umlslb(z28.VnD(), z28.VnD(), z13.VnS(), z9.VnS()), 8130 "umlslb z28.d, z13.s, z9.s"); 8131 COMPARE_MACRO(Umlslb(z28.VnH(), z28.VnH(), z13.VnB(), z9.VnB()), 8132 "umlslb z28.h, z13.b, z9.b"); 8133 COMPARE_MACRO(Umlslb(z28.VnS(), z28.VnS(), z13.VnH(), z9.VnH()), 8134 "umlslb z28.s, z13.h, z9.h"); 8135 COMPARE_MACRO(Umlslt(z9.VnD(), z9.VnD(), z12.VnS(), z30.VnS()), 8136 "umlslt z9.d, z12.s, z30.s"); 8137 COMPARE_MACRO(Umlslt(z9.VnH(), z9.VnH(), z12.VnB(), z30.VnB()), 8138 "umlslt z9.h, z12.b, z30.b"); 8139 COMPARE_MACRO(Umlslt(z9.VnS(), z9.VnS(), z12.VnH(), z30.VnH()), 8140 "umlslt z9.s, z12.h, z30.h"); 8141 8142 COMPARE_MACRO(Smlalt(z0.VnD(), z1.VnD(), z2.VnS(), z3.VnS()), 8143 "movprfx z0, z1\n" 8144 "smlalt z0.d, z2.s, z3.s"); 8145 COMPARE_MACRO(Smlalt(z0.VnD(), z1.VnD(), z0.VnS(), z3.VnS()), 8146 "movprfx z31, z1\n" 8147 "smlalt z31.d, z0.s, z3.s\n" 8148 "mov z0.d, z31.d"); 8149 COMPARE_MACRO(Smlalt(z0.VnD(), z1.VnD(), z2.VnS(), z0.VnS()), 8150 "movprfx z31, z1\n" 8151 "smlalt z31.d, z2.s, z0.s\n" 8152 "mov z0.d, z31.d"); 8153 COMPARE_MACRO(Smlalt(z0.VnD(), z1.VnD(), z0.VnS(), z0.VnS()), 8154 "movprfx z31, z1\n" 8155 "smlalt z31.d, z0.s, z0.s\n" 8156 "mov z0.d, z31.d"); 8157 8158 CLEANUP(); 8159} 8160 8161TEST(sve2_complex_integer_multiply_add) { 8162 SETUP(); 8163 8164 COMPARE(sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0), 8165 "sqrdcmlah z31.b, z15.b, z20.b, #0"); 8166 COMPARE(sqrdcmlah(z31.VnD(), z15.VnD(), z20.VnD(), 90), 8167 "sqrdcmlah z31.d, z15.d, z20.d, #90"); 8168 COMPARE(sqrdcmlah(z31.VnH(), z15.VnH(), z20.VnH(), 180), 8169 "sqrdcmlah z31.h, z15.h, z20.h, #180"); 8170 COMPARE(sqrdcmlah(z31.VnS(), z15.VnS(), z20.VnS(), 270), 8171 "sqrdcmlah z31.s, z15.s, z20.s, #270"); 8172 8173 COMPARE(sqrdcmlah(z14.VnS(), z11.VnS(), z8.VnS(), 1, 0), 8174 "sqrdcmlah z14.s, z11.s, z8.s[1], #0"); 8175 COMPARE(sqrdcmlah(z31.VnH(), z2.VnH(), z3.VnH(), 2, 180), 8176 "sqrdcmlah z31.h, z2.h, z3.h[2], #180"); 8177 8178 COMPARE_MACRO(Sqrdcmlah(z0.VnB(), z1.VnB(), z0.VnB(), z3.VnB(), 0), 8179 "mov z31.d, z0.d\n" 8180 "movprfx z0, z1\n" 8181 "sqrdcmlah z0.b, z31.b, z3.b, #0"); 8182 COMPARE_MACRO(Sqrdcmlah(z0.VnH(), z1.VnH(), z2.VnH(), z0.VnH(), 90), 8183 "mov z31.d, z0.d\n" 8184 "movprfx z0, z1\n" 8185 "sqrdcmlah z0.h, z2.h, z31.h, #90"); 8186 COMPARE_MACRO(Sqrdcmlah(z0.VnS(), z1.VnS(), z0.VnS(), z0.VnS(), 0, 180), 8187 "movprfx z31, z1\n" 8188 "sqrdcmlah z31.s, z0.s, z0.s[0], #180\n" 8189 "mov z0.d, z31.d"); 8190 COMPARE_MACRO(Sqrdcmlah(z5.VnH(), z1.VnH(), z2.VnH(), z5.VnH(), 3, 270), 8191 "movprfx z31, z1\n" 8192 "sqrdcmlah z31.h, z2.h, z5.h[3], #270\n" 8193 "mov z5.d, z31.d"); 8194 COMPARE_MACRO(Sqrdcmlah(z3.VnH(), z3.VnH(), z3.VnH(), z3.VnH(), 2, 90), 8195 "sqrdcmlah z3.h, z3.h, z3.h[2], #90"); 8196 8197 COMPARE(cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0), "cmla z19.b, z7.b, z2.b, #0"); 8198 COMPARE(cmla(z19.VnD(), z7.VnD(), z2.VnD(), 90), 8199 "cmla z19.d, z7.d, z2.d, #90"); 8200 COMPARE(cmla(z19.VnH(), z7.VnH(), z2.VnH(), 180), 8201 "cmla z19.h, z7.h, z2.h, #180"); 8202 COMPARE(cmla(z19.VnS(), z7.VnS(), z2.VnS(), 270), 8203 "cmla z19.s, z7.s, z2.s, #270"); 8204 8205 COMPARE_MACRO(Cmla(z0.VnB(), z1.VnB(), z0.VnB(), z3.VnB(), 0), 8206 "mov z31.d, z0.d\n" 8207 "movprfx z0, z1\n" 8208 "cmla z0.b, z31.b, z3.b, #0"); 8209 COMPARE_MACRO(Cmla(z0.VnH(), z1.VnH(), z2.VnH(), z0.VnH(), 90), 8210 "mov z31.d, z0.d\n" 8211 "movprfx z0, z1\n" 8212 "cmla z0.h, z2.h, z31.h, #90"); 8213 COMPARE_MACRO(Cmla(z0.VnS(), z1.VnS(), z0.VnS(), z0.VnS(), 180), 8214 "mov z31.d, z0.d\n" 8215 "movprfx z0, z1\n" 8216 "cmla z0.s, z31.s, z31.s, #180"); 8217 COMPARE_MACRO(Cmla(z0.VnD(), z1.VnD(), z2.VnD(), z0.VnD(), 270), 8218 "mov z31.d, z0.d\n" 8219 "movprfx z0, z1\n" 8220 "cmla z0.d, z2.d, z31.d, #270"); 8221 8222 COMPARE_MACRO(Cmla(z17.VnS(), z17.VnS(), z29.VnS(), z0.VnS(), 1, 0), 8223 "cmla z17.s, z29.s, z0.s[1], #0"); 8224 COMPARE_MACRO(Cmla(z17.VnS(), z17.VnS(), z29.VnS(), z1.VnS(), 0, 0), 8225 "cmla z17.s, z29.s, z1.s[0], #0"); 8226 COMPARE_MACRO(Cmla(z17.VnS(), z17.VnS(), z29.VnS(), z8.VnS(), 1, 90), 8227 "cmla z17.s, z29.s, z8.s[1], #90"); 8228 COMPARE_MACRO(Cmla(z17.VnS(), z17.VnS(), z29.VnS(), z15.VnS(), 0, 180), 8229 "cmla z17.s, z29.s, z15.s[0], #180"); 8230 COMPARE_MACRO(Cmla(z18.VnH(), z18.VnH(), z22.VnH(), z0.VnH(), 3, 0), 8231 "cmla z18.h, z22.h, z0.h[3], #0"); 8232 COMPARE_MACRO(Cmla(z18.VnH(), z18.VnH(), z22.VnH(), z1.VnH(), 2, 0), 8233 "cmla z18.h, z22.h, z1.h[2], #0"); 8234 COMPARE_MACRO(Cmla(z18.VnH(), z18.VnH(), z22.VnH(), z4.VnH(), 1, 270), 8235 "cmla z18.h, z22.h, z4.h[1], #270"); 8236 COMPARE_MACRO(Cmla(z18.VnH(), z18.VnH(), z22.VnH(), z7.VnH(), 0, 90), 8237 "cmla z18.h, z22.h, z7.h[0], #90"); 8238 8239 COMPARE_MACRO(Cmla(z1.VnH(), z19.VnH(), z22.VnH(), z7.VnH(), 0, 90), 8240 "movprfx z1, z19\n" 8241 "cmla z1.h, z22.h, z7.h[0], #90"); 8242 COMPARE_MACRO(Cmla(z1.VnH(), z19.VnH(), z1.VnH(), z7.VnH(), 0, 90), 8243 "movprfx z31, z19\n" 8244 "cmla z31.h, z1.h, z7.h[0], #90\n" 8245 "mov z1.d, z31.d"); 8246 COMPARE_MACRO(Cmla(z1.VnH(), z19.VnH(), z22.VnH(), z1.VnH(), 0, 90), 8247 "movprfx z31, z19\n" 8248 "cmla z31.h, z22.h, z1.h[0], #90\n" 8249 "mov z1.d, z31.d"); 8250 COMPARE_MACRO(Cmla(z1.VnH(), z19.VnH(), z1.VnH(), z1.VnH(), 0, 90), 8251 "movprfx z31, z19\n" 8252 "cmla z31.h, z1.h, z1.h[0], #90\n" 8253 "mov z1.d, z31.d"); 8254 8255 CLEANUP(); 8256} 8257 8258TEST(sve2_saturating_multiply_add_long) { 8259 SETUP(); 8260 8261 COMPARE(sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS()), 8262 "sqdmlalb z6.d, z19.s, z25.s"); 8263 COMPARE(sqdmlalb(z6.VnH(), z19.VnB(), z25.VnB()), 8264 "sqdmlalb z6.h, z19.b, z25.b"); 8265 COMPARE(sqdmlalb(z6.VnS(), z19.VnH(), z25.VnH()), 8266 "sqdmlalb z6.s, z19.h, z25.h"); 8267 COMPARE(sqdmlalt(z11.VnD(), z0.VnS(), z10.VnS()), 8268 "sqdmlalt z11.d, z0.s, z10.s"); 8269 COMPARE(sqdmlalt(z11.VnH(), z0.VnB(), z10.VnB()), 8270 "sqdmlalt z11.h, z0.b, z10.b"); 8271 COMPARE(sqdmlalt(z11.VnS(), z0.VnH(), z10.VnH()), 8272 "sqdmlalt z11.s, z0.h, z10.h"); 8273 COMPARE(sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS()), 8274 "sqdmlslb z16.d, z26.s, z25.s"); 8275 COMPARE(sqdmlslb(z16.VnH(), z26.VnB(), z25.VnB()), 8276 "sqdmlslb z16.h, z26.b, z25.b"); 8277 COMPARE(sqdmlslb(z16.VnS(), z26.VnH(), z25.VnH()), 8278 "sqdmlslb z16.s, z26.h, z25.h"); 8279 COMPARE(sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS()), 8280 "sqdmlslt z21.d, z23.s, z9.s"); 8281 COMPARE(sqdmlslt(z21.VnH(), z23.VnB(), z9.VnB()), 8282 "sqdmlslt z21.h, z23.b, z9.b"); 8283 COMPARE(sqdmlslt(z21.VnS(), z23.VnH(), z9.VnH()), 8284 "sqdmlslt z21.s, z23.h, z9.h"); 8285 8286 COMPARE(sqdmlalb(z1.VnD(), z27.VnS(), z11.VnS(), 0), 8287 "sqdmlalb z1.d, z27.s, z11.s[0]"); 8288 COMPARE(sqdmlalb(z30.VnS(), z6.VnH(), z3.VnH(), 0), 8289 "sqdmlalb z30.s, z6.h, z3.h[0]"); 8290 COMPARE(sqdmlalt(z30.VnD(), z25.VnS(), z15.VnS(), 1), 8291 "sqdmlalt z30.d, z25.s, z15.s[1]"); 8292 COMPARE(sqdmlalt(z10.VnS(), z1.VnH(), z1.VnH(), 3), 8293 "sqdmlalt z10.s, z1.h, z1.h[3]"); 8294 COMPARE(sqdmlslb(z15.VnD(), z27.VnS(), z15.VnS(), 2), 8295 "sqdmlslb z15.d, z27.s, z15.s[2]"); 8296 COMPARE(sqdmlslb(z5.VnS(), z5.VnH(), z7.VnH(), 6), 8297 "sqdmlslb z5.s, z5.h, z7.h[6]"); 8298 COMPARE(sqdmlslt(z21.VnD(), z28.VnS(), z13.VnS(), 3), 8299 "sqdmlslt z21.d, z28.s, z13.s[3]"); 8300 COMPARE(sqdmlslt(z5.VnS(), z3.VnH(), z1.VnH(), 7), 8301 "sqdmlslt z5.s, z3.h, z1.h[7]"); 8302 8303 COMPARE_MACRO(Sqdmlalb(z6.VnD(), z16.VnD(), z19.VnS(), z25.VnS()), 8304 "movprfx z6, z16\n" 8305 "sqdmlalb z6.d, z19.s, z25.s"); 8306 COMPARE_MACRO(Sqdmlalt(z4.VnH(), z26.VnH(), z4.VnB(), z24.VnB()), 8307 "movprfx z31, z26\n" 8308 "sqdmlalt z31.h, z4.b, z24.b\n" 8309 "mov z4.d, z31.d"); 8310 COMPARE_MACRO(Sqdmlslb(z2.VnS(), z6.VnS(), z17.VnH(), z2.VnH()), 8311 "movprfx z31, z6\n" 8312 "sqdmlslb z31.s, z17.h, z2.h\n" 8313 "mov z2.d, z31.d"); 8314 COMPARE_MACRO(Sqdmlslt(z0.VnD(), z1.VnD(), z0.VnS(), z0.VnS()), 8315 "movprfx z31, z1\n" 8316 "sqdmlslt z31.d, z0.s, z0.s\n" 8317 "mov z0.d, z31.d"); 8318 8319 COMPARE_MACRO(Sqdmlalb(z6.VnD(), z16.VnD(), z9.VnS(), z15.VnS(), 0), 8320 "movprfx z6, z16\n" 8321 "sqdmlalb z6.d, z9.s, z15.s[0]"); 8322 COMPARE_MACRO(Sqdmlalt(z4.VnS(), z6.VnS(), z4.VnH(), z4.VnH(), 3), 8323 "movprfx z31, z6\n" 8324 "sqdmlalt z31.s, z4.h, z4.h[3]\n" 8325 "mov z4.d, z31.d"); 8326 COMPARE_MACRO(Sqdmlslb(z2.VnS(), z16.VnS(), z17.VnH(), z2.VnH(), 6), 8327 "movprfx z31, z16\n" 8328 "sqdmlslb z31.s, z17.h, z2.h[6]\n" 8329 "mov z2.d, z31.d"); 8330 COMPARE_MACRO(Sqdmlslt(z6.VnD(), z1.VnD(), z6.VnS(), z6.VnS(), 2), 8331 "movprfx z31, z1\n" 8332 "sqdmlslt z31.d, z6.s, z6.s[2]\n" 8333 "mov z6.d, z31.d"); 8334 8335 CLEANUP(); 8336} 8337 8338TEST(sve2_saturating_multiply_add_interleaved_long) { 8339 SETUP(); 8340 8341 COMPARE(sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS()), 8342 "sqdmlalbt z23.d, z29.s, z26.s"); 8343 COMPARE(sqdmlalbt(z23.VnH(), z29.VnB(), z26.VnB()), 8344 "sqdmlalbt z23.h, z29.b, z26.b"); 8345 COMPARE(sqdmlalbt(z23.VnS(), z29.VnH(), z26.VnH()), 8346 "sqdmlalbt z23.s, z29.h, z26.h"); 8347 COMPARE(sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS()), 8348 "sqdmlslbt z26.d, z23.s, z4.s"); 8349 COMPARE(sqdmlslbt(z26.VnH(), z23.VnB(), z4.VnB()), 8350 "sqdmlslbt z26.h, z23.b, z4.b"); 8351 COMPARE(sqdmlslbt(z26.VnS(), z23.VnH(), z4.VnH()), 8352 "sqdmlslbt z26.s, z23.h, z4.h"); 8353 8354 COMPARE_MACRO(Sqdmlalbt(z29.VnD(), z0.VnD(), z29.VnS(), z26.VnS()), 8355 "movprfx z31, z0\n" 8356 "sqdmlalbt z31.d, z29.s, z26.s\n" 8357 "mov z29.d, z31.d"); 8358 COMPARE_MACRO(Sqdmlalbt(z26.VnH(), z0.VnH(), z29.VnB(), z26.VnB()), 8359 "movprfx z31, z0\n" 8360 "sqdmlalbt z31.h, z29.b, z26.b\n" 8361 "mov z26.d, z31.d"); 8362 COMPARE_MACRO(Sqdmlslbt(z23.VnS(), z31.VnS(), z26.VnH(), z29.VnH()), 8363 "movprfx z23, z31\n" 8364 "sqdmlslbt z23.s, z26.h, z29.h"); 8365 COMPARE_MACRO(Sqdmlslbt(z4.VnD(), z31.VnD(), z4.VnS(), z4.VnS()), 8366 "sqdmlslbt z31.d, z4.s, z4.s\n" 8367 "mov z4.d, z31.d"); 8368 8369 CLEANUP(); 8370} 8371 8372TEST(sve2_floating_multiply_add_long_vector) { 8373 SETUP(); 8374 8375 COMPARE(fmlalb(z16.VnS(), z18.VnH(), z29.VnH()), 8376 "fmlalb z16.s, z18.h, z29.h"); 8377 COMPARE(fmlalb(z3.VnS(), z8.VnH(), z7.VnH()), "fmlalb z3.s, z8.h, z7.h"); 8378 COMPARE(fmlalt(z18.VnS(), z13.VnH(), z5.VnH()), "fmlalt z18.s, z13.h, z5.h"); 8379 COMPARE(fmlalt(z18.VnS(), z7.VnH(), z16.VnH()), "fmlalt z18.s, z7.h, z16.h"); 8380 COMPARE(fmlslb(z16.VnS(), z10.VnH(), z1.VnH()), "fmlslb z16.s, z10.h, z1.h"); 8381 COMPARE(fmlslb(z25.VnS(), z11.VnH(), z0.VnH()), "fmlslb z25.s, z11.h, z0.h"); 8382 COMPARE(fmlslt(z3.VnS(), z17.VnH(), z14.VnH()), "fmlslt z3.s, z17.h, z14.h"); 8383 COMPARE(fmlslt(z5.VnS(), z1.VnH(), z7.VnH()), "fmlslt z5.s, z1.h, z7.h"); 8384 8385 CLEANUP(); 8386} 8387 8388TEST(sve2_mla_long_index) { 8389 SETUP(); 8390 8391 COMPARE_MACRO(Smlalb(z11.VnD(), z11.VnD(), z29.VnS(), z0.VnS(), 3), 8392 "smlalb z11.d, z29.s, z0.s[3]"); 8393 COMPARE_MACRO(Smlalb(z18.VnS(), z18.VnS(), z17.VnH(), z0.VnH(), 7), 8394 "smlalb z18.s, z17.h, z0.h[7]"); 8395 COMPARE_MACRO(Smlalt(z10.VnD(), z10.VnD(), z30.VnS(), z15.VnS(), 0), 8396 "smlalt z10.d, z30.s, z15.s[0]"); 8397 COMPARE_MACRO(Smlalt(z23.VnS(), z23.VnS(), z31.VnH(), z7.VnH(), 0), 8398 "smlalt z23.s, z31.h, z7.h[0]"); 8399 COMPARE_MACRO(Smlslb(z12.VnD(), z12.VnD(), z23.VnS(), z3.VnS(), 1), 8400 "smlslb z12.d, z23.s, z3.s[1]"); 8401 COMPARE_MACRO(Smlslb(z5.VnS(), z5.VnS(), z4.VnH(), z4.VnH(), 2), 8402 "smlslb z5.s, z4.h, z4.h[2]"); 8403 COMPARE_MACRO(Smlslt(z7.VnD(), z7.VnD(), z9.VnS(), z6.VnS(), 3), 8404 "smlslt z7.d, z9.s, z6.s[3]"); 8405 COMPARE_MACRO(Smlslt(z9.VnS(), z9.VnS(), z21.VnH(), z3.VnH(), 4), 8406 "smlslt z9.s, z21.h, z3.h[4]"); 8407 COMPARE_MACRO(Umlalb(z9.VnD(), z9.VnD(), z1.VnS(), z11.VnS(), 0), 8408 "umlalb z9.d, z1.s, z11.s[0]"); 8409 COMPARE_MACRO(Umlalb(z9.VnS(), z9.VnS(), z5.VnH(), z1.VnH(), 6), 8410 "umlalb z9.s, z5.h, z1.h[6]"); 8411 COMPARE_MACRO(Umlalt(z6.VnD(), z6.VnD(), z17.VnS(), z14.VnS(), 1), 8412 "umlalt z6.d, z17.s, z14.s[1]"); 8413 COMPARE_MACRO(Umlalt(z9.VnS(), z9.VnS(), z11.VnH(), z3.VnH(), 7), 8414 "umlalt z9.s, z11.h, z3.h[7]"); 8415 COMPARE_MACRO(Umlslb(z12.VnD(), z12.VnD(), z15.VnS(), z9.VnS(), 2), 8416 "umlslb z12.d, z15.s, z9.s[2]"); 8417 COMPARE_MACRO(Umlslb(z14.VnS(), z14.VnS(), z10.VnH(), z2.VnH(), 0), 8418 "umlslb z14.s, z10.h, z2.h[0]"); 8419 COMPARE_MACRO(Umlslt(z12.VnD(), z12.VnD(), z28.VnS(), z8.VnS(), 3), 8420 "umlslt z12.d, z28.s, z8.s[3]"); 8421 COMPARE_MACRO(Umlslt(z24.VnS(), z24.VnS(), z12.VnH(), z6.VnH(), 1), 8422 "umlslt z24.s, z12.h, z6.h[1]"); 8423 8424 COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z12.VnH(), z6.VnH(), 1), 8425 "movprfx z2, z23\n" 8426 "umlslt z2.s, z12.h, z6.h[1]"); 8427 COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z2.VnH(), z6.VnH(), 1), 8428 "movprfx z31, z23\n" 8429 "umlslt z31.s, z2.h, z6.h[1]\n" 8430 "mov z2.d, z31.d"); 8431 COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z12.VnH(), z2.VnH(), 1), 8432 "movprfx z31, z23\n" 8433 "umlslt z31.s, z12.h, z2.h[1]\n" 8434 "mov z2.d, z31.d"); 8435 COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z2.VnH(), z2.VnH(), 1), 8436 "movprfx z31, z23\n" 8437 "umlslt z31.s, z2.h, z2.h[1]\n" 8438 "mov z2.d, z31.d"); 8439 8440 CLEANUP(); 8441} 8442 8443TEST(sve2_mul_long_index) { 8444 SETUP(); 8445 8446 COMPARE_MACRO(Smullb(z13.VnS(), z31.VnH(), z0.VnH(), 0), 8447 "smullb z13.s, z31.h, z0.h[0]"); 8448 COMPARE_MACRO(Smullb(z8.VnD(), z22.VnS(), z0.VnS(), 0), 8449 "smullb z8.d, z22.s, z0.s[0]"); 8450 COMPARE_MACRO(Smullt(z14.VnS(), z30.VnH(), z7.VnH(), 7), 8451 "smullt z14.s, z30.h, z7.h[7]"); 8452 COMPARE_MACRO(Smullt(z22.VnD(), z28.VnS(), z15.VnS(), 3), 8453 "smullt z22.d, z28.s, z15.s[3]"); 8454 COMPARE_MACRO(Umullb(z24.VnD(), z20.VnS(), z5.VnS(), 1), 8455 "umullb z24.d, z20.s, z5.s[1]"); 8456 COMPARE_MACRO(Umullb(z28.VnS(), z19.VnH(), z3.VnH(), 4), 8457 "umullb z28.s, z19.h, z3.h[4]"); 8458 COMPARE_MACRO(Umullt(z0.VnD(), z31.VnS(), z8.VnS(), 2), 8459 "umullt z0.d, z31.s, z8.s[2]"); 8460 COMPARE_MACRO(Umullt(z14.VnS(), z20.VnH(), z5.VnH(), 6), 8461 "umullt z14.s, z20.h, z5.h[6]"); 8462 8463 CLEANUP(); 8464} 8465 8466TEST(sve2_sat_double_mul_high) { 8467 SETUP(); 8468 8469 COMPARE_MACRO(Sqdmulh(z18.VnB(), z25.VnB(), z1.VnB()), 8470 "sqdmulh z18.b, z25.b, z1.b"); 8471 COMPARE_MACRO(Sqdmulh(z18.VnD(), z25.VnD(), z1.VnD()), 8472 "sqdmulh z18.d, z25.d, z1.d"); 8473 COMPARE_MACRO(Sqdmulh(z18.VnH(), z25.VnH(), z1.VnH()), 8474 "sqdmulh z18.h, z25.h, z1.h"); 8475 COMPARE_MACRO(Sqdmulh(z18.VnS(), z25.VnS(), z1.VnS()), 8476 "sqdmulh z18.s, z25.s, z1.s"); 8477 COMPARE_MACRO(Sqrdmulh(z21.VnB(), z21.VnB(), z27.VnB()), 8478 "sqrdmulh z21.b, z21.b, z27.b"); 8479 COMPARE_MACRO(Sqrdmulh(z21.VnD(), z21.VnD(), z27.VnD()), 8480 "sqrdmulh z21.d, z21.d, z27.d"); 8481 COMPARE_MACRO(Sqrdmulh(z21.VnH(), z21.VnH(), z27.VnH()), 8482 "sqrdmulh z21.h, z21.h, z27.h"); 8483 COMPARE_MACRO(Sqrdmulh(z21.VnS(), z21.VnS(), z27.VnS()), 8484 "sqrdmulh z21.s, z21.s, z27.s"); 8485 8486 CLEANUP(); 8487} 8488 8489TEST(sve2_flogb) { 8490 SETUP(); 8491 8492 COMPARE_MACRO(Flogb(z15.VnH(), p0.Merging(), z3.VnH()), 8493 "flogb z15.h, p0/m, z3.h"); 8494 COMPARE_MACRO(Flogb(z15.VnS(), p0.Merging(), z3.VnS()), 8495 "flogb z15.s, p0/m, z3.s"); 8496 COMPARE_MACRO(Flogb(z15.VnD(), p0.Merging(), z3.VnD()), 8497 "flogb z15.d, p0/m, z3.d"); 8498 COMPARE_MACRO(Flogb(z15.VnD(), p0.Zeroing(), z3.VnD()), 8499 "movprfx z15.d, p0/z, z15.d\n" 8500 "flogb z15.d, p0/m, z3.d"); 8501 8502 CLEANUP(); 8503} 8504 8505TEST(sve2_fp_pair) { 8506 SETUP(); 8507 8508 COMPARE_MACRO(Faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD()), 8509 "faddp z14.d, p1/m, z14.d, z26.d"); 8510 COMPARE_MACRO(Faddp(z14.VnH(), p1.Merging(), z14.VnH(), z26.VnH()), 8511 "faddp z14.h, p1/m, z14.h, z26.h"); 8512 COMPARE_MACRO(Faddp(z14.VnS(), p1.Merging(), z14.VnS(), z26.VnS()), 8513 "faddp z14.s, p1/m, z14.s, z26.s"); 8514 COMPARE_MACRO(Fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD()), 8515 "fmaxnmp z2.d, p1/m, z2.d, z14.d"); 8516 COMPARE_MACRO(Fmaxnmp(z2.VnH(), p1.Merging(), z2.VnH(), z14.VnH()), 8517 "fmaxnmp z2.h, p1/m, z2.h, z14.h"); 8518 COMPARE_MACRO(Fmaxnmp(z2.VnS(), p1.Merging(), z2.VnS(), z14.VnS()), 8519 "fmaxnmp z2.s, p1/m, z2.s, z14.s"); 8520 COMPARE_MACRO(Fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD()), 8521 "fmaxp z22.d, p1/m, z22.d, z3.d"); 8522 COMPARE_MACRO(Fmaxp(z22.VnH(), p1.Merging(), z22.VnH(), z3.VnH()), 8523 "fmaxp z22.h, p1/m, z22.h, z3.h"); 8524 COMPARE_MACRO(Fmaxp(z22.VnS(), p1.Merging(), z22.VnS(), z3.VnS()), 8525 "fmaxp z22.s, p1/m, z22.s, z3.s"); 8526 COMPARE_MACRO(Fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD()), 8527 "fminnmp z1.d, p0/m, z1.d, z14.d"); 8528 COMPARE_MACRO(Fminnmp(z1.VnH(), p0.Merging(), z1.VnH(), z14.VnH()), 8529 "fminnmp z1.h, p0/m, z1.h, z14.h"); 8530 COMPARE_MACRO(Fminnmp(z1.VnS(), p0.Merging(), z1.VnS(), z14.VnS()), 8531 "fminnmp z1.s, p0/m, z1.s, z14.s"); 8532 COMPARE_MACRO(Fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD()), 8533 "fminp z16.d, p3/m, z16.d, z11.d"); 8534 COMPARE_MACRO(Fminp(z16.VnH(), p3.Merging(), z16.VnH(), z11.VnH()), 8535 "fminp z16.h, p3/m, z16.h, z11.h"); 8536 COMPARE_MACRO(Fminp(z16.VnS(), p3.Merging(), z16.VnS(), z11.VnS()), 8537 "fminp z16.s, p3/m, z16.s, z11.s"); 8538 8539 COMPARE_MACRO(Faddp(z14.VnD(), p1.Merging(), z13.VnD(), z26.VnD()), 8540 "movprfx z14.d, p1/m, z13.d\n" 8541 "faddp z14.d, p1/m, z14.d, z26.d"); 8542 COMPARE_MACRO(Fmaxnmp(z2.VnD(), p1.Merging(), z3.VnD(), z2.VnD()), 8543 "mov z31.d, z2.d\n" 8544 "movprfx z2.d, p1/m, z3.d\n" 8545 "fmaxnmp z2.d, p1/m, z2.d, z31.d"); 8546 COMPARE_MACRO(Fmaxp(z22.VnH(), p1.Merging(), z23.VnH(), z3.VnH()), 8547 "movprfx z22.h, p1/m, z23.h\n" 8548 "fmaxp z22.h, p1/m, z22.h, z3.h"); 8549 COMPARE_MACRO(Fminnmp(z1.VnH(), p0.Merging(), z4.VnH(), z1.VnH()), 8550 "mov z31.d, z1.d\n" 8551 "movprfx z1.h, p0/m, z4.h\n" 8552 "fminnmp z1.h, p0/m, z1.h, z31.h"); 8553 COMPARE_MACRO(Fminp(z16.VnS(), p3.Merging(), z11.VnS(), z11.VnS()), 8554 "movprfx z16.s, p3/m, z11.s\n" 8555 "fminp z16.s, p3/m, z16.s, z11.s"); 8556 CLEANUP(); 8557} 8558 8559TEST(sve2_fmlal_fmlsl_index) { 8560 SETUP(); 8561 8562 COMPARE_MACRO(Fmlalb(z16.VnS(), z16.VnS(), z18.VnH(), z2.VnH(), 0), 8563 "fmlalb z16.s, z18.h, z2.h[0]"); 8564 COMPARE_MACRO(Fmlalb(z3.VnS(), z3.VnS(), z8.VnH(), z7.VnH(), 7), 8565 "fmlalb z3.s, z8.h, z7.h[7]"); 8566 COMPARE_MACRO(Fmlalt(z18.VnS(), z18.VnS(), z13.VnH(), z5.VnH(), 6), 8567 "fmlalt z18.s, z13.h, z5.h[6]"); 8568 COMPARE_MACRO(Fmlalt(z18.VnS(), z18.VnS(), z7.VnH(), z6.VnH(), 5), 8569 "fmlalt z18.s, z7.h, z6.h[5]"); 8570 COMPARE_MACRO(Fmlslb(z16.VnS(), z16.VnS(), z10.VnH(), z1.VnH(), 4), 8571 "fmlslb z16.s, z10.h, z1.h[4]"); 8572 COMPARE_MACRO(Fmlslb(z25.VnS(), z25.VnS(), z11.VnH(), z0.VnH(), 3), 8573 "fmlslb z25.s, z11.h, z0.h[3]"); 8574 COMPARE_MACRO(Fmlslt(z3.VnS(), z3.VnS(), z17.VnH(), z4.VnH(), 2), 8575 "fmlslt z3.s, z17.h, z4.h[2]"); 8576 COMPARE_MACRO(Fmlslt(z5.VnS(), z5.VnS(), z1.VnH(), z7.VnH(), 1), 8577 "fmlslt z5.s, z1.h, z7.h[1]"); 8578 8579 COMPARE_MACRO(Fmlalb(z5.VnS(), z4.VnS(), z1.VnH(), z7.VnH(), 1), 8580 "movprfx z5, z4\n" 8581 "fmlalb z5.s, z1.h, z7.h[1]"); 8582 COMPARE_MACRO(Fmlalt(z5.VnS(), z4.VnS(), z5.VnH(), z7.VnH(), 1), 8583 "movprfx z31, z4\n" 8584 "fmlalt z31.s, z5.h, z7.h[1]\n" 8585 "mov z5.d, z31.d"); 8586 COMPARE_MACRO(Fmlslb(z5.VnS(), z4.VnS(), z1.VnH(), z5.VnH(), 1), 8587 "movprfx z31, z4\n" 8588 "fmlslb z31.s, z1.h, z5.h[1]\n" 8589 "mov z5.d, z31.d"); 8590 COMPARE_MACRO(Fmlslt(z5.VnS(), z4.VnS(), z5.VnH(), z5.VnH(), 1), 8591 "movprfx z31, z4\n" 8592 "fmlslt z31.s, z5.h, z5.h[1]\n" 8593 "mov z5.d, z31.d"); 8594 CLEANUP(); 8595} 8596 8597TEST(sve2_fp_convert) { 8598 SETUP(); 8599 8600 COMPARE_MACRO(Fcvtx(z14.VnS(), p4.Merging(), z0.VnD()), 8601 "fcvtx z14.s, p4/m, z0.d"); 8602 COMPARE_MACRO(Fcvtx(z14.VnS(), p4.Zeroing(), z0.VnD()), 8603 "movprfx z14.d, p4/z, z14.d\n" 8604 "fcvtx z14.s, p4/m, z0.d"); 8605 COMPARE_MACRO(Fcvtlt(z1.VnD(), p1.Merging(), z28.VnS()), 8606 "fcvtlt z1.d, p1/m, z28.s"); 8607 COMPARE_MACRO(Fcvtlt(z10.VnS(), p5.Merging(), z0.VnH()), 8608 "fcvtlt z10.s, p5/m, z0.h"); 8609 COMPARE_MACRO(Fcvtnt(z4.VnH(), p7.Merging(), z0.VnS()), 8610 "fcvtnt z4.h, p7/m, z0.s"); 8611 COMPARE_MACRO(Fcvtnt(z8.VnS(), p0.Merging(), z4.VnD()), 8612 "fcvtnt z8.s, p0/m, z4.d"); 8613 COMPARE_MACRO(Fcvtx(z14.VnS(), p4.Merging(), z0.VnD()), 8614 "fcvtx z14.s, p4/m, z0.d"); 8615 COMPARE_MACRO(Fcvtxnt(z27.VnS(), p0.Merging(), z17.VnD()), 8616 "fcvtxnt z27.s, p0/m, z17.d"); 8617 8618 CLEANUP(); 8619} 8620 8621TEST(sve2_sat_double_mul_high_index) { 8622 SETUP(); 8623 8624 COMPARE_MACRO(Sqdmulh(z11.VnH(), z20.VnH(), z7.VnH(), 1), 8625 "sqdmulh z11.h, z20.h, z7.h[1]"); 8626 COMPARE_MACRO(Sqdmulh(z11.VnH(), z20.VnH(), z2.VnH(), 7), 8627 "sqdmulh z11.h, z20.h, z2.h[7]"); 8628 COMPARE_MACRO(Sqdmulh(z8.VnS(), z4.VnS(), z7.VnS(), 1), 8629 "sqdmulh z8.s, z4.s, z7.s[1]"); 8630 COMPARE_MACRO(Sqdmulh(z8.VnS(), z4.VnS(), z3.VnS(), 3), 8631 "sqdmulh z8.s, z4.s, z3.s[3]"); 8632 COMPARE_MACRO(Sqdmulh(z6.VnD(), z13.VnD(), z0.VnD(), 1), 8633 "sqdmulh z6.d, z13.d, z0.d[1]"); 8634 COMPARE_MACRO(Sqdmulh(z6.VnD(), z13.VnD(), z15.VnD(), 0), 8635 "sqdmulh z6.d, z13.d, z15.d[0]"); 8636 8637 COMPARE_MACRO(Sqrdmulh(z3.VnH(), z29.VnH(), z7.VnH(), 2), 8638 "sqrdmulh z3.h, z29.h, z7.h[2]"); 8639 COMPARE_MACRO(Sqrdmulh(z3.VnH(), z29.VnH(), z3.VnH(), 7), 8640 "sqrdmulh z3.h, z29.h, z3.h[7]"); 8641 COMPARE_MACRO(Sqrdmulh(z19.VnS(), z15.VnS(), z7.VnS(), 0), 8642 "sqrdmulh z19.s, z15.s, z7.s[0]"); 8643 COMPARE_MACRO(Sqrdmulh(z19.VnS(), z15.VnS(), z2.VnS(), 3), 8644 "sqrdmulh z19.s, z15.s, z2.s[3]"); 8645 COMPARE_MACRO(Sqrdmulh(z29.VnD(), z13.VnD(), z0.VnD(), 1), 8646 "sqrdmulh z29.d, z13.d, z0.d[1]"); 8647 COMPARE_MACRO(Sqrdmulh(z29.VnD(), z13.VnD(), z15.VnD(), 0), 8648 "sqrdmulh z29.d, z13.d, z15.d[0]"); 8649 8650 CLEANUP(); 8651} 8652 8653TEST(sve2_extract) { 8654 SETUP(); 8655 8656 COMPARE_MACRO(Ext(z0.VnB(), z1.VnB(), z2.VnB(), 2), 8657 "ext z0.b, {z1.b, z2.b}, #2"); 8658 COMPARE_MACRO(Ext(z0.VnB(), z31.VnB(), z0.VnB(), 255), 8659 "ext z0.b, {z31.b, z0.b}, #255"); 8660 COMPARE_MACRO(Ext(z0.VnB(), z31.VnB(), z0.VnB(), 0), 8661 "ext z0.b, {z31.b, z0.b}, #0"); 8662 8663 // Check destructive form is preferred over constructive. 8664 COMPARE_MACRO(Ext(z0.VnB(), z0.VnB(), z1.VnB(), 42), 8665 "ext z0.b, z0.b, z1.b, #42"); 8666 8667 CLEANUP(); 8668} 8669 8670TEST(sve_matmul) { 8671 SETUP(); 8672 8673 COMPARE_MACRO(Fmmla(z2.VnS(), z2.VnS(), z3.VnS(), z20.VnS()), 8674 "fmmla z2.s, z3.s, z20.s"); 8675 COMPARE_MACRO(Fmmla(z21.VnD(), z21.VnD(), z30.VnD(), z2.VnD()), 8676 "fmmla z21.d, z30.d, z2.d"); 8677 COMPARE_MACRO(Smmla(z31.VnS(), z31.VnS(), z7.VnB(), z19.VnB()), 8678 "smmla z31.s, z7.b, z19.b"); 8679 COMPARE_MACRO(Ummla(z0.VnS(), z0.VnS(), z1.VnB(), z2.VnB()), 8680 "ummla z0.s, z1.b, z2.b"); 8681 COMPARE_MACRO(Usmmla(z30.VnS(), z30.VnS(), z31.VnB(), z4.VnB()), 8682 "usmmla z30.s, z31.b, z4.b"); 8683 8684 COMPARE_MACRO(Fmmla(z0.VnS(), z1.VnS(), z2.VnS(), z3.VnS()), 8685 "movprfx z0, z1\n" 8686 "fmmla z0.s, z2.s, z3.s"); 8687 COMPARE_MACRO(Smmla(z0.VnS(), z1.VnS(), z0.VnB(), z3.VnB()), 8688 "movprfx z31, z1\n" 8689 "smmla z31.s, z0.b, z3.b\n" 8690 "mov z0.d, z31.d"); 8691 COMPARE_MACRO(Ummla(z0.VnS(), z1.VnS(), z2.VnB(), z0.VnB()), 8692 "movprfx z31, z1\n" 8693 "ummla z31.s, z2.b, z0.b\n" 8694 "mov z0.d, z31.d"); 8695 COMPARE_MACRO(Usmmla(z0.VnS(), z1.VnS(), z0.VnB(), z0.VnB()), 8696 "movprfx z31, z1\n" 8697 "usmmla z31.s, z0.b, z0.b\n" 8698 "mov z0.d, z31.d"); 8699 8700 CLEANUP(); 8701} 8702 8703TEST(sve_usdot_sudot) { 8704 SETUP(); 8705 8706 COMPARE_MACRO(Usdot(z30.VnS(), z30.VnS(), z31.VnB(), z4.VnB()), 8707 "usdot z30.s, z31.b, z4.b"); 8708 COMPARE_MACRO(Usdot(z30.VnS(), z30.VnS(), z31.VnB(), z4.VnB(), 0), 8709 "usdot z30.s, z31.b, z4.b[0]"); 8710 COMPARE_MACRO(Sudot(z30.VnS(), z30.VnS(), z31.VnB(), z4.VnB(), 3), 8711 "sudot z30.s, z31.b, z4.b[3]"); 8712 8713 COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z29.VnB(), z28.VnB()), 8714 "movprfx z0, z30\n" 8715 "usdot z0.s, z29.b, z28.b"); 8716 COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z29.VnB(), z0.VnB()), 8717 "movprfx z31, z30\n" 8718 "usdot z31.s, z29.b, z0.b\n" 8719 "mov z0.d, z31.d"); 8720 COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z0.VnB(), z28.VnB()), 8721 "movprfx z31, z30\n" 8722 "usdot z31.s, z0.b, z28.b\n" 8723 "mov z0.d, z31.d"); 8724 COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z0.VnB(), z0.VnB()), 8725 "movprfx z31, z30\n" 8726 "usdot z31.s, z0.b, z0.b\n" 8727 "mov z0.d, z31.d"); 8728 COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z29.VnB(), z4.VnB(), 0), 8729 "movprfx z0, z30\n" 8730 "usdot z0.s, z29.b, z4.b[0]"); 8731 COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z0.VnB(), z4.VnB(), 0), 8732 "movprfx z31, z30\n" 8733 "usdot z31.s, z0.b, z4.b[0]\n" 8734 "mov z0.d, z31.d"); 8735 COMPARE_MACRO(Sudot(z0.VnS(), z30.VnS(), z29.VnB(), z0.VnB(), 0), 8736 "movprfx z31, z30\n" 8737 "sudot z31.s, z29.b, z0.b[0]\n" 8738 "mov z0.d, z31.d"); 8739 COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z0.VnB(), z0.VnB(), 0), 8740 "movprfx z31, z30\n" 8741 "usdot z31.s, z0.b, z0.b[0]\n" 8742 "mov z0.d, z31.d"); 8743 8744 CLEANUP(); 8745} 8746} // namespace aarch64 8747} // namespace vixl 8748