1// Copyright 2016, VIXL authors 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are met: 6// 7// * Redistributions of source code must retain the above copyright notice, 8// this list of conditions and the following disclaimer. 9// * Redistributions in binary form must reproduce the above copyright notice, 10// this list of conditions and the following disclaimer in the documentation 11// and/or other materials provided with the distribution. 12// * Neither the name of ARM Limited nor the names of its contributors may be 13// used to endorse or promote products derived from this software without 14// specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27#include <cfloat> 28#include <cmath> 29#include <cstdio> 30#include <cstdlib> 31#include <cstring> 32#include <fstream> 33#include <regex> 34 35#include "test-runner.h" 36 37#include "aarch64/cpu-aarch64.h" 38#include "aarch64/disasm-aarch64.h" 39#include "aarch64/macro-assembler-aarch64.h" 40#include "aarch64/simulator-aarch64.h" 41#include "test-utils-aarch64.h" 42 43namespace vixl { 44namespace aarch64 { 45 46#define __ masm-> 47#define TEST(name) TEST_(TRACE_##name) 48 49#define REF(name) "test/test-trace-reference/" name 50 51static void GenerateTestSequenceBase(MacroAssembler* masm) { 52 ExactAssemblyScope guard(masm, 53 masm->GetBuffer()->GetRemainingBytes(), 54 ExactAssemblyScope::kMaximumSize); 55 56 __ adc(w3, w4, w5); 57 __ adc(x6, x7, x8); 58 __ adcs(w9, w10, w11); 59 __ adcs(x12, x13, x14); 60 __ add(w15, w16, w17); 61 __ add(x18, x19, x20); 62 __ adds(w21, w22, w23); 63 __ adds(x24, x25, x26); 64 __ and_(w27, w28, w29); 65 __ and_(x2, x3, x4); 66 __ ands(w5, w6, w7); 67 __ ands(x8, x9, x10); 68 __ asr(w11, w12, 0); 69 __ asr(x13, x14, 1); 70 __ asrv(w15, w16, w17); 71 __ asrv(x18, x19, x20); 72 __ bfm(w21, w22, 5, 6); 73 __ bfm(x23, x24, 7, 8); 74 __ bic(w25, w26, w27); 75 __ bic(x28, x29, x2); 76 __ bics(w3, w4, w5); 77 __ bics(x6, x7, x8); 78 __ ccmn(w9, w10, NoFlag, al); 79 __ ccmn(w9, w10, NoFlag, eq); 80 __ ccmn(w9, w10, NoFlag, ne); 81 __ ccmn(x11, x12, CFlag, al); 82 __ ccmn(x11, x12, CFlag, cc); 83 __ ccmn(x11, x12, CFlag, cs); 84 __ ccmp(w13, w14, VFlag, al); 85 __ ccmp(w13, w14, VFlag, hi); 86 __ ccmp(w13, w14, VFlag, ls); 87 __ ccmp(x15, x16, CVFlag, al); 88 __ ccmp(x15, x16, CVFlag, eq); 89 __ ccmp(x15, x16, CVFlag, ne); 90 __ cinc(w17, w18, cc); 91 __ cinc(w17, w18, cs); 92 __ cinc(x19, x20, hi); 93 __ cinc(x19, x20, ls); 94 __ cinv(w21, w22, eq); 95 __ cinv(w21, w22, ne); 96 __ cinv(x23, x24, cc); 97 __ cinv(x23, x24, cs); 98 __ clrex(); 99 __ cls(w25, w26); 100 __ cls(x27, x28); 101 __ clz(w29, w2); 102 __ clz(x3, x4); 103 __ cmn(w5, w6); 104 __ cmn(x7, x8); 105 __ cmp(w9, w10); 106 __ cmp(x11, x12); 107 __ cneg(w13, w14, hi); 108 __ cneg(w13, w14, ls); 109 __ cneg(x15, x16, eq); 110 __ cneg(x15, x16, ne); 111 __ crc32b(w17, w18, w19); 112 __ crc32cb(w20, w21, w22); 113 __ crc32ch(w23, w24, w25); 114 __ crc32cw(w26, w27, w28); 115 __ crc32h(w4, w5, w6); 116 __ crc32w(w7, w8, w9); 117 __ csel(w13, w14, w15, cc); 118 __ csel(w13, w14, w15, cs); 119 __ csel(x16, x17, x18, hi); 120 __ csel(x16, x17, x18, ls); 121 __ cset(w19, eq); 122 __ cset(w19, ne); 123 __ cset(x20, cc); 124 __ cset(x20, cs); 125 __ csetm(w21, hi); 126 __ csetm(w21, ls); 127 __ csetm(x22, eq); 128 __ csetm(x22, ne); 129 __ csinc(w23, w24, w25, cc); 130 __ csinc(w23, w24, w25, cs); 131 __ csinc(x26, x27, x28, hi); 132 __ csinc(x26, x27, x28, ls); 133 __ csinv(w29, w2, w3, eq); 134 __ csinv(w29, w2, w3, ne); 135 __ csinv(x4, x5, x6, cc); 136 __ csinv(x4, x5, x6, cs); 137 __ csneg(w7, w8, w9, hi); 138 __ csneg(w7, w8, w9, ls); 139 __ csneg(x10, x11, x12, eq); 140 __ csneg(x10, x11, x12, ne); 141 __ dc(CVAC, x0); 142 __ dmb(InnerShareable, BarrierAll); 143 __ dsb(InnerShareable, BarrierAll); 144 __ eon(w13, w14, w15); 145 __ eon(x16, x17, x18); 146 __ eor(w19, w20, w21); 147 __ eor(x22, x23, x24); 148 __ extr(w25, w26, w27, 9); 149 __ extr(x28, x29, x2, 10); 150 __ hint(NOP); 151 __ ic(IVAU, x0); 152 __ isb(); 153 __ ldar(w3, MemOperand(x0)); 154 __ ldar(x4, MemOperand(x0)); 155 __ ldarb(w5, MemOperand(x0)); 156 __ ldarb(x6, MemOperand(x0)); 157 __ ldarh(w7, MemOperand(x0)); 158 __ ldarh(x8, MemOperand(x0)); 159 __ ldaxp(w9, w10, MemOperand(x0)); 160 __ ldaxp(x11, x12, MemOperand(x0)); 161 __ ldaxr(w13, MemOperand(x0)); 162 __ ldaxr(x14, MemOperand(x0)); 163 __ ldaxrb(w15, MemOperand(x0)); 164 __ ldaxrb(x16, MemOperand(x0)); 165 __ ldaxrh(w17, MemOperand(x0)); 166 __ ldaxrh(x18, MemOperand(x0)); 167 __ ldnp(w19, w20, MemOperand(x0)); 168 __ ldnp(x21, x22, MemOperand(x0)); 169 __ ldp(w23, w24, MemOperand(x0)); 170 __ ldp(w23, w24, MemOperand(x1, 8, PostIndex)); 171 __ ldp(w23, w24, MemOperand(x1, 8, PreIndex)); 172 __ ldp(x25, x26, MemOperand(x0)); 173 __ ldp(x25, x26, MemOperand(x1, 16, PostIndex)); 174 __ ldp(x25, x26, MemOperand(x1, 16, PreIndex)); 175 __ ldpsw(x27, x28, MemOperand(x0)); 176 __ ldpsw(x27, x28, MemOperand(x1, 8, PostIndex)); 177 __ ldpsw(x27, x28, MemOperand(x1, 8, PreIndex)); 178 __ ldr(w29, MemOperand(x0)); 179 __ ldr(w29, MemOperand(x1, 4, PostIndex)); 180 __ ldr(w29, MemOperand(x1, 4, PreIndex)); 181 __ ldr(x2, MemOperand(x0)); 182 __ ldr(x2, MemOperand(x1, 8, PostIndex)); 183 __ ldr(x2, MemOperand(x1, 8, PreIndex)); 184 __ ldrb(w3, MemOperand(x0)); 185 __ ldrb(w3, MemOperand(x1, 1, PostIndex)); 186 __ ldrb(w3, MemOperand(x1, 1, PreIndex)); 187 __ ldrb(x4, MemOperand(x0)); 188 __ ldrb(x4, MemOperand(x1, 1, PostIndex)); 189 __ ldrb(x4, MemOperand(x1, 1, PreIndex)); 190 __ ldrh(w5, MemOperand(x0)); 191 __ ldrh(w5, MemOperand(x1, 2, PostIndex)); 192 __ ldrh(w5, MemOperand(x1, 2, PreIndex)); 193 __ ldrh(x6, MemOperand(x0)); 194 __ ldrh(x6, MemOperand(x1, 2, PostIndex)); 195 __ ldrh(x6, MemOperand(x1, 2, PreIndex)); 196 __ ldrsb(w7, MemOperand(x0)); 197 __ ldrsb(w7, MemOperand(x1, 1, PostIndex)); 198 __ ldrsb(w7, MemOperand(x1, 1, PreIndex)); 199 __ ldrsb(x8, MemOperand(x0)); 200 __ ldrsb(x8, MemOperand(x1, 1, PostIndex)); 201 __ ldrsb(x8, MemOperand(x1, 1, PreIndex)); 202 __ ldrsh(w9, MemOperand(x0)); 203 __ ldrsh(w9, MemOperand(x1, 2, PostIndex)); 204 __ ldrsh(w9, MemOperand(x1, 2, PreIndex)); 205 __ ldrsh(x10, MemOperand(x0)); 206 __ ldrsh(x10, MemOperand(x1, 2, PostIndex)); 207 __ ldrsh(x10, MemOperand(x1, 2, PreIndex)); 208 __ ldrsw(x11, MemOperand(x0)); 209 __ ldrsw(x11, MemOperand(x1, 4, PostIndex)); 210 __ ldrsw(x11, MemOperand(x1, 4, PreIndex)); 211 __ ldur(w12, MemOperand(x0, 7)); 212 __ ldur(x13, MemOperand(x0, 15)); 213 __ ldurb(w14, MemOperand(x0, 1)); 214 __ ldurb(x15, MemOperand(x0, 1)); 215 __ ldurh(w16, MemOperand(x0, 3)); 216 __ ldurh(x17, MemOperand(x0, 3)); 217 __ ldursb(w18, MemOperand(x0, 1)); 218 __ ldursb(x19, MemOperand(x0, 1)); 219 __ ldursh(w20, MemOperand(x0, 3)); 220 __ ldursh(x21, MemOperand(x0, 3)); 221 __ ldursw(x22, MemOperand(x0, 7)); 222 __ ldxp(w23, w24, MemOperand(x0)); 223 __ ldxp(x25, x26, MemOperand(x0)); 224 __ ldxr(w27, MemOperand(x0)); 225 __ ldxr(x28, MemOperand(x0)); 226 __ ldxrb(w29, MemOperand(x0)); 227 __ ldxrb(x2, MemOperand(x0)); 228 __ ldxrh(w3, MemOperand(x0)); 229 __ ldxrh(x4, MemOperand(x0)); 230 __ lsl(w5, w6, 2); 231 __ lsl(x7, x8, 3); 232 __ lslv(w9, w10, w11); 233 __ lslv(x12, x13, x14); 234 __ lsr(w15, w16, 4); 235 __ lsr(x17, x18, 5); 236 __ lsrv(w19, w20, w21); 237 __ lsrv(x22, x23, x24); 238 __ madd(w25, w26, w27, w28); 239 __ madd(x29, x2, x3, x4); 240 __ mneg(w5, w6, w7); 241 __ mneg(x8, x9, x10); 242 __ mov(w11, w12); 243 __ mov(x13, x14); 244 __ movk(w15, 130); 245 __ movk(x16, 131); 246 __ movn(w17, 132); 247 __ movn(x18, 133); 248 __ movz(w19, 134); 249 __ movz(x20, 135); 250 __ msub(w22, w23, w24, w25); 251 __ msub(x26, x27, x28, x29); 252 __ mul(w2, w3, w4); 253 __ mul(x5, x6, x7); 254 __ mvn(w8, w9); 255 __ mvn(x10, x11); 256 __ neg(w12, w13); 257 __ neg(x14, x15); 258 __ negs(w16, w17); 259 __ negs(x18, x19); 260 __ ngc(w20, w21); 261 __ ngc(x22, x23); 262 __ ngcs(w24, w25); 263 __ ngcs(x26, x27); 264 __ nop(); 265 __ orn(w28, w29, w2); 266 __ orn(x3, x4, x5); 267 __ orr(w6, w7, w8); 268 __ orr(x9, x10, x11); 269 __ prfm(PLDL1KEEP, MemOperand(x0, 4)); 270 __ prfum(PLDL1KEEP, MemOperand(x0, 1)); 271 __ rbit(w12, w13); 272 __ rbit(x14, x15); 273 __ rev(w16, w17); 274 __ rev(x18, x19); 275 __ rev16(w20, w21); 276 __ rev16(x22, x23); 277 __ rev32(x24, x25); 278 __ rorv(w26, w27, w28); 279 __ rorv(x29, x2, x3); 280 __ sbc(w4, w5, w6); 281 __ sbc(x7, x8, x9); 282 __ sbcs(w10, w11, w12); 283 __ sbcs(x13, x14, x15); 284 __ sbfiz(w16, w17, 2, 3); 285 __ sbfiz(x18, x19, 4, 5); 286 __ sbfx(w22, w23, 6, 7); 287 __ sbfx(x24, x25, 8, 9); 288 __ sdiv(w26, w27, w28); 289 __ sdiv(x29, x2, x3); 290 __ smulh(x12, x13, x14); 291 __ stlr(w18, MemOperand(x0)); 292 __ stlr(x19, MemOperand(x0)); 293 __ stlrb(w20, MemOperand(x0)); 294 __ stlrb(x21, MemOperand(x0)); 295 __ stlrh(w22, MemOperand(x0)); 296 __ stlrh(x23, MemOperand(x0)); 297 __ stlxp(w24, w25, w26, MemOperand(x0)); 298 __ stlxp(x27, x28, x29, MemOperand(x0)); 299 __ stlxr(w2, w3, MemOperand(x0)); 300 __ stlxr(x4, x5, MemOperand(x0)); 301 __ stlxrb(w6, w7, MemOperand(x0)); 302 __ stlxrb(x8, x9, MemOperand(x0)); 303 __ stlxrh(w10, w11, MemOperand(x0)); 304 __ stlxrh(x12, x13, MemOperand(x0)); 305 __ stnp(w14, w15, MemOperand(x0)); 306 __ stnp(x16, x17, MemOperand(x0)); 307 __ stp(w18, w19, MemOperand(x0)); 308 __ stp(w18, w19, MemOperand(x1, 8, PostIndex)); 309 __ stp(w18, w19, MemOperand(x1, 8, PreIndex)); 310 __ stp(x20, x21, MemOperand(x0)); 311 __ stp(x20, x21, MemOperand(x1, 16, PostIndex)); 312 __ stp(x20, x21, MemOperand(x1, 16, PreIndex)); 313 __ str(w22, MemOperand(x0)); 314 __ str(w22, MemOperand(x1, 4, PostIndex)); 315 __ str(w22, MemOperand(x1, 4, PreIndex)); 316 __ str(x23, MemOperand(x0)); 317 __ str(x23, MemOperand(x1, 8, PostIndex)); 318 __ str(x23, MemOperand(x1, 8, PreIndex)); 319 __ strb(w24, MemOperand(x0)); 320 __ strb(w24, MemOperand(x1, 1, PostIndex)); 321 __ strb(w24, MemOperand(x1, 1, PreIndex)); 322 __ strb(x25, MemOperand(x0)); 323 __ strb(x25, MemOperand(x1, 1, PostIndex)); 324 __ strb(x25, MemOperand(x1, 1, PreIndex)); 325 __ strh(w26, MemOperand(x0)); 326 __ strh(w26, MemOperand(x1, 2, PostIndex)); 327 __ strh(w26, MemOperand(x1, 2, PreIndex)); 328 __ strh(x27, MemOperand(x0)); 329 __ strh(x27, MemOperand(x1, 2, PostIndex)); 330 __ strh(x27, MemOperand(x1, 2, PreIndex)); 331 __ stur(w28, MemOperand(x0, 7)); 332 __ stur(x29, MemOperand(x0, 15)); 333 __ sturb(w2, MemOperand(x0, 1)); 334 __ sturb(x3, MemOperand(x0, 1)); 335 __ sturh(w4, MemOperand(x0, 3)); 336 __ sturh(x5, MemOperand(x0, 3)); 337 __ stxp(w6, w7, w8, MemOperand(x0)); 338 __ stxp(x9, x10, x11, MemOperand(x0)); 339 __ stxr(w12, w13, MemOperand(x0)); 340 __ stxr(x14, x15, MemOperand(x0)); 341 __ stxrb(w16, w17, MemOperand(x0)); 342 __ stxrb(x18, x19, MemOperand(x0)); 343 __ stxrh(w20, w21, MemOperand(x0)); 344 __ stxrh(x22, x23, MemOperand(x0)); 345 __ sub(w24, w25, w26); 346 __ sub(x27, x28, x29); 347 __ subs(w2, w3, w4); 348 __ subs(x5, x6, x7); 349 __ sxtb(w8, w9); 350 __ sxtb(x10, x11); 351 __ sxth(w12, w13); 352 __ sxth(x14, x15); 353 __ sxtw(w16, w17); 354 __ sxtw(x18, x19); 355 __ tst(w20, w21); 356 __ tst(x22, x23); 357 __ ubfiz(w24, w25, 10, 11); 358 __ ubfiz(x26, x27, 12, 13); 359 __ ubfm(w28, w29, 14, 15); 360 __ ubfm(x2, x3, 1, 2); 361 __ ubfx(w4, w5, 3, 4); 362 __ ubfx(x6, x7, 5, 6); 363 __ udiv(w8, w9, w10); 364 __ udiv(x11, x12, x13); 365 __ umulh(x22, x23, x24); 366 __ uxtb(w28, w29); 367 __ uxtb(x2, x3); 368 __ uxth(w4, w5); 369 __ uxth(x6, x7); 370 __ uxtw(w8, w9); 371 __ uxtw(x10, x11); 372 373 // Regression tests. 374 __ stp(x10, xzr, MemOperand(sp, -16, PreIndex)); 375 __ ldp(x10, xzr, MemOperand(sp, 16, PostIndex)); 376 __ str(xzr, MemOperand(sp, -16, PreIndex)); 377 __ ldrsb(xzr, MemOperand(sp, 16, PostIndex)); 378 __ str(xzr, MemOperand(sp, -16, PreIndex)); 379 __ ldrsh(xzr, MemOperand(sp, 16, PostIndex)); 380 __ str(xzr, MemOperand(sp, -16, PreIndex)); 381 __ ldrsw(xzr, MemOperand(sp, 16, PostIndex)); 382 383 // Branch tests. 384 { 385 Label end; 386 // Branch to the next instruction. 387 __ b(&end); 388 __ bind(&end); 389 } 390 { 391 Label loop, end; 392 __ subs(x3, x3, x3); 393 __ bind(&loop); 394 // Not-taken branch (the first time). 395 // Taken branch (the second time). 396 __ b(&end, ne); 397 __ cmp(x3, 1); 398 // Backwards branch. 399 __ b(&loop); 400 __ bind(&end); 401 } 402} 403 404 405static void GenerateTestSequenceFP(MacroAssembler* masm) { 406 ExactAssemblyScope guard(masm, 407 masm->GetBuffer()->GetRemainingBytes(), 408 ExactAssemblyScope::kMaximumSize); 409 410 // Scalar floating point instructions. 411 __ fabd(d13, d2, d19); 412 __ fabd(s8, s10, s30); 413 __ fabs(d1, d1); 414 __ fabs(s25, s7); 415 __ facge(d1, d23, d16); 416 __ facge(s4, s17, s1); 417 __ facgt(d2, d21, d24); 418 __ facgt(s12, s26, s12); 419 __ fadd(d13, d11, d22); 420 __ fadd(s27, s19, s8); 421 __ fccmp(d6, d10, NoFlag, hs); 422 __ fccmp(s29, s20, NZVFlag, ne); 423 __ fccmpe(d10, d2, NZCFlag, al); 424 __ fccmpe(s3, s3, NZVFlag, pl); 425 __ fcmeq(d19, d8, d10); 426 __ fcmeq(d0, d18, 0.0); 427 __ fcmeq(s1, s4, s30); 428 __ fcmeq(s22, s29, 0.0); 429 __ fcmge(d27, d18, d1); 430 __ fcmge(d31, d28, 0.0); 431 __ fcmge(s31, s19, s9); 432 __ fcmge(s1, s25, 0.0); 433 __ fcmgt(d18, d1, d15); 434 __ fcmgt(d3, d31, 0.0); 435 __ fcmgt(s11, s25, s2); 436 __ fcmgt(s17, s16, 0.0); 437 __ fcmle(d24, d17, 0.0); 438 __ fcmle(s11, s8, 0.0); 439 __ fcmlt(d5, d31, 0.0); 440 __ fcmlt(s18, s23, 0.0); 441 __ fcmp(d10, d24); 442 __ fcmp(d13, 0.0); 443 __ fcmp(s18, s6); 444 __ fcmp(s16, 0.0); 445 __ fcmpe(d9, d17); 446 __ fcmpe(d29, 0.0); 447 __ fcmpe(s16, s17); 448 __ fcmpe(s22, 0.0); 449 __ fcsel(d10, d14, d19, gt); 450 __ fcsel(s22, s18, s2, ge); 451 __ fcvt(d4, h24); 452 __ fcvt(d11, s2); 453 __ fcvt(h8, d9); 454 __ fcvt(h12, s1); 455 __ fcvt(s12, d31); 456 __ fcvt(s27, h25); 457 __ fcvtas(d28, d16); 458 __ fcvtas(s3, s5); 459 __ fcvtas(w18, d31); 460 __ fcvtas(w29, s24); 461 __ fcvtas(x9, d1); 462 __ fcvtas(x30, s2); 463 __ fcvtau(d14, d0); 464 __ fcvtau(s31, s14); 465 __ fcvtau(w16, d2); 466 __ fcvtau(w18, s0); 467 __ fcvtau(x26, d7); 468 __ fcvtau(x25, s19); 469 __ fcvtms(d30, d25); 470 __ fcvtms(s12, s15); 471 __ fcvtms(w9, d7); 472 __ fcvtms(w19, s6); 473 __ fcvtms(x6, d6); 474 __ fcvtms(x22, s7); 475 __ fcvtmu(d27, d0); 476 __ fcvtmu(s8, s22); 477 __ fcvtmu(w29, d19); 478 __ fcvtmu(w26, s0); 479 __ fcvtmu(x13, d5); 480 __ fcvtmu(x5, s18); 481 __ fcvtns(d30, d15); 482 __ fcvtns(s10, s11); 483 __ fcvtns(w21, d15); 484 __ fcvtns(w18, s10); 485 __ fcvtns(x8, d17); 486 __ fcvtns(x17, s12); 487 __ fcvtnu(d0, d21); 488 __ fcvtnu(s6, s25); 489 __ fcvtnu(w29, d11); 490 __ fcvtnu(w25, s31); 491 __ fcvtnu(x30, d11); 492 __ fcvtnu(x27, s18); 493 __ fcvtps(d11, d22); 494 __ fcvtps(s29, s20); 495 __ fcvtps(w15, d25); 496 __ fcvtps(w16, s7); 497 __ fcvtps(x13, d20); 498 __ fcvtps(x3, s23); 499 __ fcvtpu(d24, d1); 500 __ fcvtpu(s14, s24); 501 __ fcvtpu(w26, d29); 502 __ fcvtpu(wzr, s26); 503 __ fcvtpu(x27, d6); 504 __ fcvtpu(x29, s14); 505 __ fcvtxn(s12, d12); 506 __ fcvtzs(d15, d0); 507 __ fcvtzs(d13, d4, 42); 508 __ fcvtzs(s8, s11); 509 __ fcvtzs(s31, s6, 25); 510 __ fcvtzs(w6, d9); 511 __ fcvtzs(w25, d10, 20); 512 __ fcvtzs(w9, s1); 513 __ fcvtzs(w17, s29, 30); 514 __ fcvtzs(x19, d2); 515 __ fcvtzs(x22, d14, 1); 516 __ fcvtzs(x14, s20); 517 __ fcvtzs(x3, s30, 33); 518 __ fcvtzu(d28, d15); 519 __ fcvtzu(d0, d4, 3); 520 __ fcvtzu(s2, s5); 521 __ fcvtzu(s4, s0, 30); 522 __ fcvtzu(w11, d4); 523 __ fcvtzu(w7, d24, 32); 524 __ fcvtzu(w18, s24); 525 __ fcvtzu(w14, s27, 4); 526 __ fcvtzu(x22, d11); 527 __ fcvtzu(x8, d27, 52); 528 __ fcvtzu(x7, s20); 529 __ fcvtzu(x22, s7, 44); 530 __ fdiv(d6, d14, d15); 531 __ fdiv(s26, s5, s25); 532 __ fmadd(d18, d26, d12, d30); 533 __ fmadd(s13, s9, s28, s4); 534 __ fmax(d12, d5, d5); 535 __ fmax(s12, s28, s6); 536 __ fmaxnm(d28, d4, d2); 537 __ fmaxnm(s6, s10, s8); 538 __ fmin(d20, d20, d18); 539 __ fmin(s7, s13, s16); 540 __ fminnm(d19, d14, d30); 541 __ fminnm(s0, s1, s1); 542 __ fmov(d13, d6); 543 __ fmov(d2, x17); 544 __ fmov(d8, -2.5000); 545 __ fmov(s5, s3); 546 __ fmov(s25, w20); 547 __ fmov(s21, 2.8750f); 548 __ fmov(w18, s24); 549 __ fmov(x18, d2); 550 __ fmsub(d20, d30, d3, d19); 551 __ fmsub(s5, s19, s4, s12); 552 __ fmul(d30, d27, d23); 553 __ fmul(s25, s17, s15); 554 __ fmulx(d4, d17, d1); 555 __ fmulx(s14, s25, s4); 556 __ fneg(d15, d0); 557 __ fneg(s14, s15); 558 __ fnmadd(d0, d16, d22, d31); 559 __ fnmadd(s0, s18, s26, s18); 560 __ fnmsub(d19, d12, d15, d21); 561 __ fnmsub(s29, s0, s11, s26); 562 __ fnmul(d31, d19, d1); 563 __ fnmul(s18, s3, s17); 564 __ frecpe(d7, d21); 565 __ frecpe(s29, s17); 566 __ frecps(d11, d26, d17); 567 __ frecps(s18, s27, s1); 568 __ frecpx(d15, d18); 569 __ frecpx(s5, s10); 570 __ frinta(d16, d30); 571 __ frinta(s1, s22); 572 __ frinti(d19, d29); 573 __ frinti(s14, s21); 574 __ frintm(d20, d30); 575 __ frintm(s1, s16); 576 __ frintn(d30, d1); 577 __ frintn(s24, s10); 578 __ frintp(d4, d20); 579 __ frintp(s13, s3); 580 __ frintx(d13, d20); 581 __ frintx(s17, s7); 582 __ frintz(d0, d8); 583 __ frintz(s15, s29); 584 __ frsqrte(d21, d10); 585 __ frsqrte(s17, s25); 586 __ frsqrts(d4, d29, d17); 587 __ frsqrts(s14, s3, s24); 588 __ fsqrt(d14, d17); 589 __ fsqrt(s4, s14); 590 __ fsub(d13, d19, d7); 591 __ fsub(s3, s21, s27); 592 __ scvtf(d31, d16); 593 __ scvtf(d26, d31, 24); 594 __ scvtf(d6, w16); 595 __ scvtf(d5, w20, 6); 596 __ scvtf(d16, x8); 597 __ scvtf(d15, x8, 10); 598 __ scvtf(s7, s4); 599 __ scvtf(s8, s15, 14); 600 __ scvtf(s29, w10); 601 __ scvtf(s15, w21, 11); 602 __ scvtf(s27, x26); 603 __ scvtf(s26, x12, 38); 604 __ ucvtf(d0, d9); 605 __ ucvtf(d5, d22, 47); 606 __ ucvtf(d30, w27); 607 __ ucvtf(d3, w19, 1); 608 __ ucvtf(d28, x21); 609 __ ucvtf(d27, x30, 35); 610 __ ucvtf(s11, s5); 611 __ ucvtf(s0, s23, 14); 612 __ ucvtf(s20, w19); 613 __ ucvtf(s21, w22, 18); 614 __ ucvtf(s6, x13); 615 __ ucvtf(s7, x2, 21); 616} 617 618 619static void GenerateTestSequenceNEON(MacroAssembler* masm) { 620 ExactAssemblyScope guard(masm, 621 masm->GetBuffer()->GetRemainingBytes(), 622 ExactAssemblyScope::kMaximumSize); 623 624 // NEON integer instructions. 625 __ abs(d19, d0); 626 __ abs(v16.V16B(), v11.V16B()); 627 __ abs(v0.V2D(), v31.V2D()); 628 __ abs(v27.V2S(), v25.V2S()); 629 __ abs(v21.V4H(), v27.V4H()); 630 __ abs(v16.V4S(), v1.V4S()); 631 __ abs(v31.V8B(), v5.V8B()); 632 __ abs(v29.V8H(), v13.V8H()); 633 __ add(d10, d5, d17); 634 __ add(v31.V16B(), v15.V16B(), v23.V16B()); 635 __ add(v10.V2D(), v31.V2D(), v14.V2D()); 636 __ add(v15.V2S(), v14.V2S(), v19.V2S()); 637 __ add(v27.V4H(), v23.V4H(), v17.V4H()); 638 __ add(v25.V4S(), v28.V4S(), v29.V4S()); 639 __ add(v13.V8B(), v7.V8B(), v18.V8B()); 640 __ add(v4.V8H(), v2.V8H(), v1.V8H()); 641 __ addhn(v10.V2S(), v14.V2D(), v15.V2D()); 642 __ addhn(v10.V4H(), v30.V4S(), v26.V4S()); 643 __ addhn(v31.V8B(), v12.V8H(), v22.V8H()); 644 __ addhn2(v16.V16B(), v21.V8H(), v20.V8H()); 645 __ addhn2(v0.V4S(), v2.V2D(), v17.V2D()); 646 __ addhn2(v31.V8H(), v7.V4S(), v17.V4S()); 647 __ addp(d14, v19.V2D()); 648 __ addp(v3.V16B(), v8.V16B(), v28.V16B()); 649 __ addp(v8.V2D(), v5.V2D(), v17.V2D()); 650 __ addp(v22.V2S(), v30.V2S(), v26.V2S()); 651 __ addp(v29.V4H(), v24.V4H(), v14.V4H()); 652 __ addp(v30.V4S(), v26.V4S(), v24.V4S()); 653 __ addp(v12.V8B(), v26.V8B(), v7.V8B()); 654 __ addp(v17.V8H(), v8.V8H(), v12.V8H()); 655 __ addv(b27, v23.V16B()); 656 __ addv(b12, v20.V8B()); 657 __ addv(h27, v30.V4H()); 658 __ addv(h19, v14.V8H()); 659 __ addv(s14, v27.V4S()); 660 __ and_(v10.V16B(), v8.V16B(), v27.V16B()); 661 __ and_(v5.V8B(), v1.V8B(), v16.V8B()); 662 __ bic(v26.V16B(), v3.V16B(), v24.V16B()); 663 __ bic(v7.V2S(), 0xe4, 16); 664 __ bic(v28.V4H(), 0x23, 8); 665 __ bic(v29.V4S(), 0xac); 666 __ bic(v12.V8B(), v31.V8B(), v21.V8B()); 667 __ bic(v18.V8H(), 0x98); 668 __ bif(v12.V16B(), v26.V16B(), v8.V16B()); 669 __ bif(v2.V8B(), v23.V8B(), v27.V8B()); 670 __ bit(v8.V16B(), v3.V16B(), v13.V16B()); 671 __ bit(v5.V8B(), v5.V8B(), v23.V8B()); 672 __ bsl(v9.V16B(), v31.V16B(), v23.V16B()); 673 __ bsl(v14.V8B(), v7.V8B(), v3.V8B()); 674 __ cls(v29.V16B(), v5.V16B()); 675 __ cls(v21.V2S(), v0.V2S()); 676 __ cls(v1.V4H(), v12.V4H()); 677 __ cls(v27.V4S(), v10.V4S()); 678 __ cls(v19.V8B(), v4.V8B()); 679 __ cls(v15.V8H(), v14.V8H()); 680 __ clz(v1.V16B(), v4.V16B()); 681 __ clz(v27.V2S(), v17.V2S()); 682 __ clz(v9.V4H(), v9.V4H()); 683 __ clz(v31.V4S(), v15.V4S()); 684 __ clz(v14.V8B(), v19.V8B()); 685 __ clz(v6.V8H(), v11.V8H()); 686 __ cmeq(d18, d5, d29); 687 __ cmeq(d14, d31, 0); 688 __ cmeq(v19.V16B(), v3.V16B(), v22.V16B()); 689 __ cmeq(v15.V16B(), v9.V16B(), 0); 690 __ cmeq(v12.V2D(), v16.V2D(), v10.V2D()); 691 __ cmeq(v8.V2D(), v22.V2D(), 0); 692 __ cmeq(v2.V2S(), v3.V2S(), v9.V2S()); 693 __ cmeq(v16.V2S(), v25.V2S(), 0); 694 __ cmeq(v6.V4H(), v23.V4H(), v20.V4H()); 695 __ cmeq(v16.V4H(), v13.V4H(), 0); 696 __ cmeq(v21.V4S(), v17.V4S(), v2.V4S()); 697 __ cmeq(v6.V4S(), v25.V4S(), 0); 698 __ cmeq(v16.V8B(), v13.V8B(), v2.V8B()); 699 __ cmeq(v21.V8B(), v16.V8B(), 0); 700 __ cmeq(v20.V8H(), v7.V8H(), v25.V8H()); 701 __ cmeq(v26.V8H(), v8.V8H(), 0); 702 __ cmge(d16, d13, d31); 703 __ cmge(d25, d24, 0); 704 __ cmge(v17.V16B(), v19.V16B(), v17.V16B()); 705 __ cmge(v22.V16B(), v30.V16B(), 0); 706 __ cmge(v28.V2D(), v20.V2D(), v26.V2D()); 707 __ cmge(v6.V2D(), v23.V2D(), 0); 708 __ cmge(v25.V2S(), v22.V2S(), v3.V2S()); 709 __ cmge(v21.V2S(), v11.V2S(), 0); 710 __ cmge(v16.V4H(), v3.V4H(), v12.V4H()); 711 __ cmge(v23.V4H(), v9.V4H(), 0); 712 __ cmge(v7.V4S(), v2.V4S(), v11.V4S()); 713 __ cmge(v0.V4S(), v22.V4S(), 0); 714 __ cmge(v10.V8B(), v30.V8B(), v9.V8B()); 715 __ cmge(v21.V8B(), v8.V8B(), 0); 716 __ cmge(v2.V8H(), v7.V8H(), v26.V8H()); 717 __ cmge(v19.V8H(), v10.V8H(), 0); 718 __ cmgt(d6, d13, d1); 719 __ cmgt(d30, d24, 0); 720 __ cmgt(v20.V16B(), v25.V16B(), v27.V16B()); 721 __ cmgt(v0.V16B(), v25.V16B(), 0); 722 __ cmgt(v22.V2D(), v25.V2D(), v1.V2D()); 723 __ cmgt(v16.V2D(), v16.V2D(), 0); 724 __ cmgt(v5.V2S(), v9.V2S(), v15.V2S()); 725 __ cmgt(v12.V2S(), v18.V2S(), 0); 726 __ cmgt(v28.V4H(), v18.V4H(), v11.V4H()); 727 __ cmgt(v22.V4H(), v3.V4H(), 0); 728 __ cmgt(v5.V4S(), v11.V4S(), v27.V4S()); 729 __ cmgt(v13.V4S(), v20.V4S(), 0); 730 __ cmgt(v27.V8B(), v31.V8B(), v7.V8B()); 731 __ cmgt(v5.V8B(), v0.V8B(), 0); 732 __ cmgt(v22.V8H(), v28.V8H(), v13.V8H()); 733 __ cmgt(v6.V8H(), v2.V8H(), 0); 734 __ cmhi(d21, d8, d22); 735 __ cmhi(v18.V16B(), v19.V16B(), v19.V16B()); 736 __ cmhi(v7.V2D(), v0.V2D(), v21.V2D()); 737 __ cmhi(v15.V2S(), v19.V2S(), v0.V2S()); 738 __ cmhi(v31.V4H(), v7.V4H(), v12.V4H()); 739 __ cmhi(v9.V4S(), v16.V4S(), v22.V4S()); 740 __ cmhi(v7.V8B(), v24.V8B(), v28.V8B()); 741 __ cmhi(v11.V8H(), v10.V8H(), v25.V8H()); 742 __ cmhs(d1, d12, d17); 743 __ cmhs(v21.V16B(), v25.V16B(), v30.V16B()); 744 __ cmhs(v8.V2D(), v2.V2D(), v26.V2D()); 745 __ cmhs(v1.V2S(), v22.V2S(), v29.V2S()); 746 __ cmhs(v26.V4H(), v30.V4H(), v30.V4H()); 747 __ cmhs(v19.V4S(), v20.V4S(), v16.V4S()); 748 __ cmhs(v1.V8B(), v3.V8B(), v26.V8B()); 749 __ cmhs(v20.V8H(), v28.V8H(), v8.V8H()); 750 __ cmle(d30, d24, 0); 751 __ cmle(v0.V16B(), v3.V16B(), 0); 752 __ cmle(v2.V2D(), v30.V2D(), 0); 753 __ cmle(v7.V2S(), v10.V2S(), 0); 754 __ cmle(v9.V4H(), v31.V4H(), 0); 755 __ cmle(v9.V4S(), v18.V4S(), 0); 756 __ cmle(v21.V8B(), v31.V8B(), 0); 757 __ cmle(v29.V8H(), v21.V8H(), 0); 758 __ cmlt(d25, d23, 0); 759 __ cmlt(v7.V16B(), v21.V16B(), 0); 760 __ cmlt(v7.V2D(), v30.V2D(), 0); 761 __ cmlt(v25.V2S(), v28.V2S(), 0); 762 __ cmlt(v0.V4H(), v11.V4H(), 0); 763 __ cmlt(v24.V4S(), v5.V4S(), 0); 764 __ cmlt(v26.V8B(), v11.V8B(), 0); 765 __ cmlt(v1.V8H(), v21.V8H(), 0); 766 __ cmtst(d28, d23, d30); 767 __ cmtst(v26.V16B(), v6.V16B(), v31.V16B()); 768 __ cmtst(v1.V2D(), v21.V2D(), v4.V2D()); 769 __ cmtst(v27.V2S(), v26.V2S(), v20.V2S()); 770 __ cmtst(v26.V4H(), v0.V4H(), v18.V4H()); 771 __ cmtst(v25.V4S(), v16.V4S(), v4.V4S()); 772 __ cmtst(v11.V8B(), v10.V8B(), v9.V8B()); 773 __ cmtst(v0.V8H(), v2.V8H(), v1.V8H()); 774 __ cnt(v25.V16B(), v15.V16B()); 775 __ cnt(v28.V8B(), v6.V8B()); 776 __ dup(v6.V16B(), v7.B(), 7); 777 __ dup(v9.V16B(), w20); 778 __ dup(v12.V2D(), v13.D(), 1); 779 __ dup(v9.V2D(), xzr); 780 __ dup(v4.V2S(), v26.S(), 2); 781 __ dup(v3.V2S(), w12); 782 __ dup(v22.V4H(), v5.H(), 7); 783 __ dup(v16.V4H(), w25); 784 __ dup(v20.V4S(), v10.S(), 2); 785 __ dup(v10.V4S(), w7); 786 __ dup(v30.V8B(), v30.B(), 2); 787 __ dup(v31.V8B(), w15); 788 __ dup(v28.V8H(), v17.H(), 4); 789 __ dup(v2.V8H(), w3); 790 __ eor(v29.V16B(), v25.V16B(), v3.V16B()); 791 __ eor(v3.V8B(), v16.V8B(), v28.V8B()); 792 __ ext(v1.V16B(), v26.V16B(), v6.V16B(), 1); 793 __ ext(v2.V8B(), v30.V8B(), v1.V8B(), 1); 794 __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x0)); 795 __ ld1(v23.V16B(), 796 v24.V16B(), 797 v25.V16B(), 798 v26.V16B(), 799 MemOperand(x1, x2, PostIndex)); 800 __ ld1(v5.V16B(), 801 v6.V16B(), 802 v7.V16B(), 803 v8.V16B(), 804 MemOperand(x1, 64, PostIndex)); 805 __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), MemOperand(x0)); 806 __ ld1(v13.V16B(), v14.V16B(), v15.V16B(), MemOperand(x1, x2, PostIndex)); 807 __ ld1(v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x1, 48, PostIndex)); 808 __ ld1(v17.V16B(), v18.V16B(), MemOperand(x0)); 809 __ ld1(v20.V16B(), v21.V16B(), MemOperand(x1, x2, PostIndex)); 810 __ ld1(v28.V16B(), v29.V16B(), MemOperand(x1, 32, PostIndex)); 811 __ ld1(v29.V16B(), MemOperand(x0)); 812 __ ld1(v21.V16B(), MemOperand(x1, x2, PostIndex)); 813 __ ld1(v4.V16B(), MemOperand(x1, 16, PostIndex)); 814 __ ld1(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), MemOperand(x0)); 815 __ ld1(v17.V1D(), 816 v18.V1D(), 817 v19.V1D(), 818 v20.V1D(), 819 MemOperand(x1, x2, PostIndex)); 820 __ ld1(v28.V1D(), 821 v29.V1D(), 822 v30.V1D(), 823 v31.V1D(), 824 MemOperand(x1, 32, PostIndex)); 825 __ ld1(v20.V1D(), v21.V1D(), v22.V1D(), MemOperand(x0)); 826 __ ld1(v19.V1D(), v20.V1D(), v21.V1D(), MemOperand(x1, x2, PostIndex)); 827 __ ld1(v12.V1D(), v13.V1D(), v14.V1D(), MemOperand(x1, 24, PostIndex)); 828 __ ld1(v29.V1D(), v30.V1D(), MemOperand(x0)); 829 __ ld1(v31.V1D(), v0.V1D(), MemOperand(x1, x2, PostIndex)); 830 __ ld1(v3.V1D(), v4.V1D(), MemOperand(x1, 16, PostIndex)); 831 __ ld1(v28.V1D(), MemOperand(x0)); 832 __ ld1(v11.V1D(), MemOperand(x1, x2, PostIndex)); 833 __ ld1(v29.V1D(), MemOperand(x1, 8, PostIndex)); 834 __ ld1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), MemOperand(x0)); 835 __ ld1(v8.V2D(), 836 v9.V2D(), 837 v10.V2D(), 838 v11.V2D(), 839 MemOperand(x1, x2, PostIndex)); 840 __ ld1(v14.V2D(), 841 v15.V2D(), 842 v16.V2D(), 843 v17.V2D(), 844 MemOperand(x1, 64, PostIndex)); 845 __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), MemOperand(x0)); 846 __ ld1(v5.V2D(), v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex)); 847 __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), MemOperand(x1, 48, PostIndex)); 848 __ ld1(v18.V2D(), v19.V2D(), MemOperand(x0)); 849 __ ld1(v21.V2D(), v22.V2D(), MemOperand(x1, x2, PostIndex)); 850 __ ld1(v17.V2D(), v18.V2D(), MemOperand(x1, 32, PostIndex)); 851 __ ld1(v5.V2D(), MemOperand(x0)); 852 __ ld1(v6.V2D(), MemOperand(x1, x2, PostIndex)); 853 __ ld1(v15.V2D(), MemOperand(x1, 16, PostIndex)); 854 __ ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x0)); 855 __ ld1(v24.V2S(), 856 v25.V2S(), 857 v26.V2S(), 858 v27.V2S(), 859 MemOperand(x1, x2, PostIndex)); 860 __ ld1(v27.V2S(), 861 v28.V2S(), 862 v29.V2S(), 863 v30.V2S(), 864 MemOperand(x1, 32, PostIndex)); 865 __ ld1(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x0)); 866 __ ld1(v8.V2S(), v9.V2S(), v10.V2S(), MemOperand(x1, x2, PostIndex)); 867 __ ld1(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x1, 24, PostIndex)); 868 __ ld1(v0.V2S(), v1.V2S(), MemOperand(x0)); 869 __ ld1(v13.V2S(), v14.V2S(), MemOperand(x1, x2, PostIndex)); 870 __ ld1(v3.V2S(), v4.V2S(), MemOperand(x1, 16, PostIndex)); 871 __ ld1(v26.V2S(), MemOperand(x0)); 872 __ ld1(v0.V2S(), MemOperand(x1, x2, PostIndex)); 873 __ ld1(v11.V2S(), MemOperand(x1, 8, PostIndex)); 874 __ ld1(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), MemOperand(x0)); 875 __ ld1(v24.V4H(), 876 v25.V4H(), 877 v26.V4H(), 878 v27.V4H(), 879 MemOperand(x1, x2, PostIndex)); 880 __ ld1(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), MemOperand(x1, 32, PostIndex)); 881 __ ld1(v30.V4H(), v31.V4H(), v0.V4H(), MemOperand(x0)); 882 __ ld1(v25.V4H(), v26.V4H(), v27.V4H(), MemOperand(x1, x2, PostIndex)); 883 __ ld1(v3.V4H(), v4.V4H(), v5.V4H(), MemOperand(x1, 24, PostIndex)); 884 __ ld1(v3.V4H(), v4.V4H(), MemOperand(x0)); 885 __ ld1(v3.V4H(), v4.V4H(), MemOperand(x1, x2, PostIndex)); 886 __ ld1(v23.V4H(), v24.V4H(), MemOperand(x1, 16, PostIndex)); 887 __ ld1(v26.V4H(), MemOperand(x0)); 888 __ ld1(v1.V4H(), MemOperand(x1, x2, PostIndex)); 889 __ ld1(v14.V4H(), MemOperand(x1, 8, PostIndex)); 890 __ ld1(v26.V4S(), v27.V4S(), v28.V4S(), v29.V4S(), MemOperand(x0)); 891 __ ld1(v28.V4S(), 892 v29.V4S(), 893 v30.V4S(), 894 v31.V4S(), 895 MemOperand(x1, x2, PostIndex)); 896 __ ld1(v4.V4S(), v5.V4S(), v6.V4S(), v7.V4S(), MemOperand(x1, 64, PostIndex)); 897 __ ld1(v2.V4S(), v3.V4S(), v4.V4S(), MemOperand(x0)); 898 __ ld1(v22.V4S(), v23.V4S(), v24.V4S(), MemOperand(x1, x2, PostIndex)); 899 __ ld1(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x1, 48, PostIndex)); 900 __ ld1(v20.V4S(), v21.V4S(), MemOperand(x0)); 901 __ ld1(v30.V4S(), v31.V4S(), MemOperand(x1, x2, PostIndex)); 902 __ ld1(v11.V4S(), v12.V4S(), MemOperand(x1, 32, PostIndex)); 903 __ ld1(v15.V4S(), MemOperand(x0)); 904 __ ld1(v12.V4S(), MemOperand(x1, x2, PostIndex)); 905 __ ld1(v0.V4S(), MemOperand(x1, 16, PostIndex)); 906 __ ld1(v17.V8B(), v18.V8B(), v19.V8B(), v20.V8B(), MemOperand(x0)); 907 __ ld1(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), MemOperand(x1, x2, PostIndex)); 908 __ ld1(v9.V8B(), 909 v10.V8B(), 910 v11.V8B(), 911 v12.V8B(), 912 MemOperand(x1, 32, PostIndex)); 913 __ ld1(v4.V8B(), v5.V8B(), v6.V8B(), MemOperand(x0)); 914 __ ld1(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x1, x2, PostIndex)); 915 __ ld1(v12.V8B(), v13.V8B(), v14.V8B(), MemOperand(x1, 24, PostIndex)); 916 __ ld1(v10.V8B(), v11.V8B(), MemOperand(x0)); 917 __ ld1(v11.V8B(), v12.V8B(), MemOperand(x1, x2, PostIndex)); 918 __ ld1(v27.V8B(), v28.V8B(), MemOperand(x1, 16, PostIndex)); 919 __ ld1(v31.V8B(), MemOperand(x0)); 920 __ ld1(v10.V8B(), MemOperand(x1, x2, PostIndex)); 921 __ ld1(v28.V8B(), MemOperand(x1, 8, PostIndex)); 922 __ ld1(v5.V8H(), v6.V8H(), v7.V8H(), v8.V8H(), MemOperand(x0)); 923 __ ld1(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex)); 924 __ ld1(v10.V8H(), 925 v11.V8H(), 926 v12.V8H(), 927 v13.V8H(), 928 MemOperand(x1, 64, PostIndex)); 929 __ ld1(v26.V8H(), v27.V8H(), v28.V8H(), MemOperand(x0)); 930 __ ld1(v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex)); 931 __ ld1(v17.V8H(), v18.V8H(), v19.V8H(), MemOperand(x1, 48, PostIndex)); 932 __ ld1(v4.V8H(), v5.V8H(), MemOperand(x0)); 933 __ ld1(v21.V8H(), v22.V8H(), MemOperand(x1, x2, PostIndex)); 934 __ ld1(v4.V8H(), v5.V8H(), MemOperand(x1, 32, PostIndex)); 935 __ ld1(v9.V8H(), MemOperand(x0)); 936 __ ld1(v27.V8H(), MemOperand(x1, x2, PostIndex)); 937 __ ld1(v26.V8H(), MemOperand(x1, 16, PostIndex)); 938 __ ld1(v19.B(), 1, MemOperand(x0)); 939 __ ld1(v12.B(), 3, MemOperand(x1, x2, PostIndex)); 940 __ ld1(v27.B(), 12, MemOperand(x1, 1, PostIndex)); 941 __ ld1(v10.D(), 1, MemOperand(x0)); 942 __ ld1(v26.D(), 1, MemOperand(x1, x2, PostIndex)); 943 __ ld1(v7.D(), 1, MemOperand(x1, 8, PostIndex)); 944 __ ld1(v19.H(), 5, MemOperand(x0)); 945 __ ld1(v10.H(), 1, MemOperand(x1, x2, PostIndex)); 946 __ ld1(v5.H(), 4, MemOperand(x1, 2, PostIndex)); 947 __ ld1(v21.S(), 2, MemOperand(x0)); 948 __ ld1(v13.S(), 2, MemOperand(x1, x2, PostIndex)); 949 __ ld1(v1.S(), 2, MemOperand(x1, 4, PostIndex)); 950 __ ld1r(v2.V16B(), MemOperand(x0)); 951 __ ld1r(v2.V16B(), MemOperand(x1, x2, PostIndex)); 952 __ ld1r(v22.V16B(), MemOperand(x1, 1, PostIndex)); 953 __ ld1r(v25.V1D(), MemOperand(x0)); 954 __ ld1r(v9.V1D(), MemOperand(x1, x2, PostIndex)); 955 __ ld1r(v23.V1D(), MemOperand(x1, 8, PostIndex)); 956 __ ld1r(v19.V2D(), MemOperand(x0)); 957 __ ld1r(v21.V2D(), MemOperand(x1, x2, PostIndex)); 958 __ ld1r(v30.V2D(), MemOperand(x1, 8, PostIndex)); 959 __ ld1r(v24.V2S(), MemOperand(x0)); 960 __ ld1r(v26.V2S(), MemOperand(x1, x2, PostIndex)); 961 __ ld1r(v28.V2S(), MemOperand(x1, 4, PostIndex)); 962 __ ld1r(v19.V4H(), MemOperand(x0)); 963 __ ld1r(v1.V4H(), MemOperand(x1, x2, PostIndex)); 964 __ ld1r(v21.V4H(), MemOperand(x1, 2, PostIndex)); 965 __ ld1r(v15.V4S(), MemOperand(x0)); 966 __ ld1r(v21.V4S(), MemOperand(x1, x2, PostIndex)); 967 __ ld1r(v23.V4S(), MemOperand(x1, 4, PostIndex)); 968 __ ld1r(v26.V8B(), MemOperand(x0)); 969 __ ld1r(v14.V8B(), MemOperand(x1, x2, PostIndex)); 970 __ ld1r(v19.V8B(), MemOperand(x1, 1, PostIndex)); 971 __ ld1r(v13.V8H(), MemOperand(x0)); 972 __ ld1r(v30.V8H(), MemOperand(x1, x2, PostIndex)); 973 __ ld1r(v27.V8H(), MemOperand(x1, 2, PostIndex)); 974 __ ld2(v21.V16B(), v22.V16B(), MemOperand(x0)); 975 __ ld2(v21.V16B(), v22.V16B(), MemOperand(x1, x2, PostIndex)); 976 __ ld2(v12.V16B(), v13.V16B(), MemOperand(x1, 32, PostIndex)); 977 __ ld2(v14.V2D(), v15.V2D(), MemOperand(x0)); 978 __ ld2(v0.V2D(), v1.V2D(), MemOperand(x1, x2, PostIndex)); 979 __ ld2(v12.V2D(), v13.V2D(), MemOperand(x1, 32, PostIndex)); 980 __ ld2(v27.V2S(), v28.V2S(), MemOperand(x0)); 981 __ ld2(v2.V2S(), v3.V2S(), MemOperand(x1, x2, PostIndex)); 982 __ ld2(v12.V2S(), v13.V2S(), MemOperand(x1, 16, PostIndex)); 983 __ ld2(v9.V4H(), v10.V4H(), MemOperand(x0)); 984 __ ld2(v23.V4H(), v24.V4H(), MemOperand(x1, x2, PostIndex)); 985 __ ld2(v1.V4H(), v2.V4H(), MemOperand(x1, 16, PostIndex)); 986 __ ld2(v20.V4S(), v21.V4S(), MemOperand(x0)); 987 __ ld2(v10.V4S(), v11.V4S(), MemOperand(x1, x2, PostIndex)); 988 __ ld2(v24.V4S(), v25.V4S(), MemOperand(x1, 32, PostIndex)); 989 __ ld2(v17.V8B(), v18.V8B(), MemOperand(x0)); 990 __ ld2(v13.V8B(), v14.V8B(), MemOperand(x1, x2, PostIndex)); 991 __ ld2(v7.V8B(), v8.V8B(), MemOperand(x1, 16, PostIndex)); 992 __ ld2(v30.V8H(), v31.V8H(), MemOperand(x0)); 993 __ ld2(v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex)); 994 __ ld2(v13.V8H(), v14.V8H(), MemOperand(x1, 32, PostIndex)); 995 __ ld2(v5.B(), v6.B(), 12, MemOperand(x0)); 996 __ ld2(v16.B(), v17.B(), 7, MemOperand(x1, x2, PostIndex)); 997 __ ld2(v29.B(), v30.B(), 2, MemOperand(x1, 2, PostIndex)); 998 __ ld2(v11.D(), v12.D(), 1, MemOperand(x0)); 999 __ ld2(v26.D(), v27.D(), 0, MemOperand(x1, x2, PostIndex)); 1000 __ ld2(v25.D(), v26.D(), 0, MemOperand(x1, 16, PostIndex)); 1001 __ ld2(v18.H(), v19.H(), 7, MemOperand(x0)); 1002 __ ld2(v17.H(), v18.H(), 5, MemOperand(x1, x2, PostIndex)); 1003 __ ld2(v30.H(), v31.H(), 2, MemOperand(x1, 4, PostIndex)); 1004 __ ld2(v29.S(), v30.S(), 3, MemOperand(x0)); 1005 __ ld2(v28.S(), v29.S(), 0, MemOperand(x1, x2, PostIndex)); 1006 __ ld2(v6.S(), v7.S(), 1, MemOperand(x1, 8, PostIndex)); 1007 __ ld2r(v26.V16B(), v27.V16B(), MemOperand(x0)); 1008 __ ld2r(v21.V16B(), v22.V16B(), MemOperand(x1, x2, PostIndex)); 1009 __ ld2r(v5.V16B(), v6.V16B(), MemOperand(x1, 2, PostIndex)); 1010 __ ld2r(v26.V1D(), v27.V1D(), MemOperand(x0)); 1011 __ ld2r(v14.V1D(), v15.V1D(), MemOperand(x1, x2, PostIndex)); 1012 __ ld2r(v23.V1D(), v24.V1D(), MemOperand(x1, 16, PostIndex)); 1013 __ ld2r(v11.V2D(), v12.V2D(), MemOperand(x0)); 1014 __ ld2r(v29.V2D(), v30.V2D(), MemOperand(x1, x2, PostIndex)); 1015 __ ld2r(v15.V2D(), v16.V2D(), MemOperand(x1, 16, PostIndex)); 1016 __ ld2r(v26.V2S(), v27.V2S(), MemOperand(x0)); 1017 __ ld2r(v22.V2S(), v23.V2S(), MemOperand(x1, x2, PostIndex)); 1018 __ ld2r(v2.V2S(), v3.V2S(), MemOperand(x1, 8, PostIndex)); 1019 __ ld2r(v2.V4H(), v3.V4H(), MemOperand(x0)); 1020 __ ld2r(v9.V4H(), v10.V4H(), MemOperand(x1, x2, PostIndex)); 1021 __ ld2r(v6.V4H(), v7.V4H(), MemOperand(x1, 4, PostIndex)); 1022 __ ld2r(v7.V4S(), v8.V4S(), MemOperand(x0)); 1023 __ ld2r(v19.V4S(), v20.V4S(), MemOperand(x1, x2, PostIndex)); 1024 __ ld2r(v21.V4S(), v22.V4S(), MemOperand(x1, 8, PostIndex)); 1025 __ ld2r(v26.V8B(), v27.V8B(), MemOperand(x0)); 1026 __ ld2r(v20.V8B(), v21.V8B(), MemOperand(x1, x2, PostIndex)); 1027 __ ld2r(v11.V8B(), v12.V8B(), MemOperand(x1, 2, PostIndex)); 1028 __ ld2r(v12.V8H(), v13.V8H(), MemOperand(x0)); 1029 __ ld2r(v6.V8H(), v7.V8H(), MemOperand(x1, x2, PostIndex)); 1030 __ ld2r(v25.V8H(), v26.V8H(), MemOperand(x1, 4, PostIndex)); 1031 __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), MemOperand(x0)); 1032 __ ld3(v28.V16B(), v29.V16B(), v30.V16B(), MemOperand(x1, x2, PostIndex)); 1033 __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), MemOperand(x1, 48, PostIndex)); 1034 __ ld3(v21.V2D(), v22.V2D(), v23.V2D(), MemOperand(x0)); 1035 __ ld3(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x1, x2, PostIndex)); 1036 __ ld3(v27.V2D(), v28.V2D(), v29.V2D(), MemOperand(x1, 48, PostIndex)); 1037 __ ld3(v7.V2S(), v8.V2S(), v9.V2S(), MemOperand(x0)); 1038 __ ld3(v20.V2S(), v21.V2S(), v22.V2S(), MemOperand(x1, x2, PostIndex)); 1039 __ ld3(v26.V2S(), v27.V2S(), v28.V2S(), MemOperand(x1, 24, PostIndex)); 1040 __ ld3(v27.V4H(), v28.V4H(), v29.V4H(), MemOperand(x0)); 1041 __ ld3(v28.V4H(), v29.V4H(), v30.V4H(), MemOperand(x1, x2, PostIndex)); 1042 __ ld3(v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x1, 24, PostIndex)); 1043 __ ld3(v2.V4S(), v3.V4S(), v4.V4S(), MemOperand(x0)); 1044 __ ld3(v24.V4S(), v25.V4S(), v26.V4S(), MemOperand(x1, x2, PostIndex)); 1045 __ ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x1, 48, PostIndex)); 1046 __ ld3(v29.V8B(), v30.V8B(), v31.V8B(), MemOperand(x0)); 1047 __ ld3(v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x1, x2, PostIndex)); 1048 __ ld3(v12.V8B(), v13.V8B(), v14.V8B(), MemOperand(x1, 24, PostIndex)); 1049 __ ld3(v22.V8H(), v23.V8H(), v24.V8H(), MemOperand(x0)); 1050 __ ld3(v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x1, x2, PostIndex)); 1051 __ ld3(v28.V8H(), v29.V8H(), v30.V8H(), MemOperand(x1, 48, PostIndex)); 1052 __ ld3(v21.B(), v22.B(), v23.B(), 11, MemOperand(x0)); 1053 __ ld3(v5.B(), v6.B(), v7.B(), 9, MemOperand(x1, x2, PostIndex)); 1054 __ ld3(v23.B(), v24.B(), v25.B(), 0, MemOperand(x1, 3, PostIndex)); 1055 __ ld3(v16.D(), v17.D(), v18.D(), 0, MemOperand(x0)); 1056 __ ld3(v30.D(), v31.D(), v0.D(), 0, MemOperand(x1, x2, PostIndex)); 1057 __ ld3(v28.D(), v29.D(), v30.D(), 1, MemOperand(x1, 24, PostIndex)); 1058 __ ld3(v13.H(), v14.H(), v15.H(), 2, MemOperand(x0)); 1059 __ ld3(v22.H(), v23.H(), v24.H(), 7, MemOperand(x1, x2, PostIndex)); 1060 __ ld3(v14.H(), v15.H(), v16.H(), 3, MemOperand(x1, 6, PostIndex)); 1061 __ ld3(v22.S(), v23.S(), v24.S(), 3, MemOperand(x0)); 1062 __ ld3(v30.S(), v31.S(), v0.S(), 2, MemOperand(x1, x2, PostIndex)); 1063 __ ld3(v12.S(), v13.S(), v14.S(), 1, MemOperand(x1, 12, PostIndex)); 1064 __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x0)); 1065 __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x1, x2, PostIndex)); 1066 __ ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x1, 3, PostIndex)); 1067 __ ld3r(v4.V1D(), v5.V1D(), v6.V1D(), MemOperand(x0)); 1068 __ ld3r(v7.V1D(), v8.V1D(), v9.V1D(), MemOperand(x1, x2, PostIndex)); 1069 __ ld3r(v17.V1D(), v18.V1D(), v19.V1D(), MemOperand(x1, 24, PostIndex)); 1070 __ ld3r(v16.V2D(), v17.V2D(), v18.V2D(), MemOperand(x0)); 1071 __ ld3r(v20.V2D(), v21.V2D(), v22.V2D(), MemOperand(x1, x2, PostIndex)); 1072 __ ld3r(v14.V2D(), v15.V2D(), v16.V2D(), MemOperand(x1, 24, PostIndex)); 1073 __ ld3r(v10.V2S(), v11.V2S(), v12.V2S(), MemOperand(x0)); 1074 __ ld3r(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x1, x2, PostIndex)); 1075 __ ld3r(v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x1, 12, PostIndex)); 1076 __ ld3r(v22.V4H(), v23.V4H(), v24.V4H(), MemOperand(x0)); 1077 __ ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x1, x2, PostIndex)); 1078 __ ld3r(v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x1, 6, PostIndex)); 1079 __ ld3r(v26.V4S(), v27.V4S(), v28.V4S(), MemOperand(x0)); 1080 __ ld3r(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x1, x2, PostIndex)); 1081 __ ld3r(v30.V4S(), v31.V4S(), v0.V4S(), MemOperand(x1, 12, PostIndex)); 1082 __ ld3r(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x0)); 1083 __ ld3r(v10.V8B(), v11.V8B(), v12.V8B(), MemOperand(x1, x2, PostIndex)); 1084 __ ld3r(v28.V8B(), v29.V8B(), v30.V8B(), MemOperand(x1, 3, PostIndex)); 1085 __ ld3r(v6.V8H(), v7.V8H(), v8.V8H(), MemOperand(x0)); 1086 __ ld3r(v29.V8H(), v30.V8H(), v31.V8H(), MemOperand(x1, x2, PostIndex)); 1087 __ ld3r(v7.V8H(), v8.V8H(), v9.V8H(), MemOperand(x1, 6, PostIndex)); 1088 __ ld4(v3.V16B(), v4.V16B(), v5.V16B(), v6.V16B(), MemOperand(x0)); 1089 __ ld4(v2.V16B(), 1090 v3.V16B(), 1091 v4.V16B(), 1092 v5.V16B(), 1093 MemOperand(x1, x2, PostIndex)); 1094 __ ld4(v5.V16B(), 1095 v6.V16B(), 1096 v7.V16B(), 1097 v8.V16B(), 1098 MemOperand(x1, 64, PostIndex)); 1099 __ ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x0)); 1100 __ ld4(v4.V2D(), v5.V2D(), v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex)); 1101 __ ld4(v29.V2D(), 1102 v30.V2D(), 1103 v31.V2D(), 1104 v0.V2D(), 1105 MemOperand(x1, 64, PostIndex)); 1106 __ ld4(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), MemOperand(x0)); 1107 __ ld4(v24.V2S(), 1108 v25.V2S(), 1109 v26.V2S(), 1110 v27.V2S(), 1111 MemOperand(x1, x2, PostIndex)); 1112 __ ld4(v4.V2S(), v5.V2S(), v6.V2S(), v7.V2S(), MemOperand(x1, 32, PostIndex)); 1113 __ ld4(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), MemOperand(x0)); 1114 __ ld4(v23.V4H(), 1115 v24.V4H(), 1116 v25.V4H(), 1117 v26.V4H(), 1118 MemOperand(x1, x2, PostIndex)); 1119 __ ld4(v2.V4H(), v3.V4H(), v4.V4H(), v5.V4H(), MemOperand(x1, 32, PostIndex)); 1120 __ ld4(v7.V4S(), v8.V4S(), v9.V4S(), v10.V4S(), MemOperand(x0)); 1121 __ ld4(v28.V4S(), 1122 v29.V4S(), 1123 v30.V4S(), 1124 v31.V4S(), 1125 MemOperand(x1, x2, PostIndex)); 1126 __ ld4(v29.V4S(), 1127 v30.V4S(), 1128 v31.V4S(), 1129 v0.V4S(), 1130 MemOperand(x1, 64, PostIndex)); 1131 __ ld4(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), MemOperand(x0)); 1132 __ ld4(v27.V8B(), 1133 v28.V8B(), 1134 v29.V8B(), 1135 v30.V8B(), 1136 MemOperand(x1, x2, PostIndex)); 1137 __ ld4(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), MemOperand(x1, 32, PostIndex)); 1138 __ ld4(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), MemOperand(x0)); 1139 __ ld4(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex)); 1140 __ ld4(v20.V8H(), 1141 v21.V8H(), 1142 v22.V8H(), 1143 v23.V8H(), 1144 MemOperand(x1, 64, PostIndex)); 1145 __ ld4(v20.B(), v21.B(), v22.B(), v23.B(), 3, MemOperand(x0)); 1146 __ ld4(v12.B(), v13.B(), v14.B(), v15.B(), 3, MemOperand(x1, x2, PostIndex)); 1147 __ ld4(v27.B(), v28.B(), v29.B(), v30.B(), 6, MemOperand(x1, 4, PostIndex)); 1148 __ ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x0)); 1149 __ ld4(v15.D(), v16.D(), v17.D(), v18.D(), 1, MemOperand(x1, x2, PostIndex)); 1150 __ ld4(v16.D(), v17.D(), v18.D(), v19.D(), 1, MemOperand(x1, 32, PostIndex)); 1151 __ ld4(v2.H(), v3.H(), v4.H(), v5.H(), 6, MemOperand(x0)); 1152 __ ld4(v5.H(), v6.H(), v7.H(), v8.H(), 3, MemOperand(x1, x2, PostIndex)); 1153 __ ld4(v7.H(), v8.H(), v9.H(), v10.H(), 6, MemOperand(x1, 8, PostIndex)); 1154 __ ld4(v6.S(), v7.S(), v8.S(), v9.S(), 1, MemOperand(x0)); 1155 __ ld4(v25.S(), v26.S(), v27.S(), v28.S(), 2, MemOperand(x1, x2, PostIndex)); 1156 __ ld4(v8.S(), v9.S(), v10.S(), v11.S(), 3, MemOperand(x1, 16, PostIndex)); 1157 __ ld4r(v14.V16B(), v15.V16B(), v16.V16B(), v17.V16B(), MemOperand(x0)); 1158 __ ld4r(v13.V16B(), 1159 v14.V16B(), 1160 v15.V16B(), 1161 v16.V16B(), 1162 MemOperand(x1, x2, PostIndex)); 1163 __ ld4r(v9.V16B(), 1164 v10.V16B(), 1165 v11.V16B(), 1166 v12.V16B(), 1167 MemOperand(x1, 4, PostIndex)); 1168 __ ld4r(v8.V1D(), v9.V1D(), v10.V1D(), v11.V1D(), MemOperand(x0)); 1169 __ ld4r(v4.V1D(), 1170 v5.V1D(), 1171 v6.V1D(), 1172 v7.V1D(), 1173 MemOperand(x1, x2, PostIndex)); 1174 __ ld4r(v26.V1D(), 1175 v27.V1D(), 1176 v28.V1D(), 1177 v29.V1D(), 1178 MemOperand(x1, 32, PostIndex)); 1179 __ ld4r(v19.V2D(), v20.V2D(), v21.V2D(), v22.V2D(), MemOperand(x0)); 1180 __ ld4r(v28.V2D(), 1181 v29.V2D(), 1182 v30.V2D(), 1183 v31.V2D(), 1184 MemOperand(x1, x2, PostIndex)); 1185 __ ld4r(v15.V2D(), 1186 v16.V2D(), 1187 v17.V2D(), 1188 v18.V2D(), 1189 MemOperand(x1, 32, PostIndex)); 1190 __ ld4r(v31.V2S(), v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x0)); 1191 __ ld4r(v28.V2S(), 1192 v29.V2S(), 1193 v30.V2S(), 1194 v31.V2S(), 1195 MemOperand(x1, x2, PostIndex)); 1196 __ ld4r(v11.V2S(), 1197 v12.V2S(), 1198 v13.V2S(), 1199 v14.V2S(), 1200 MemOperand(x1, 16, PostIndex)); 1201 __ ld4r(v19.V4H(), v20.V4H(), v21.V4H(), v22.V4H(), MemOperand(x0)); 1202 __ ld4r(v22.V4H(), 1203 v23.V4H(), 1204 v24.V4H(), 1205 v25.V4H(), 1206 MemOperand(x1, x2, PostIndex)); 1207 __ ld4r(v20.V4H(), 1208 v21.V4H(), 1209 v22.V4H(), 1210 v23.V4H(), 1211 MemOperand(x1, 8, PostIndex)); 1212 __ ld4r(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x0)); 1213 __ ld4r(v25.V4S(), 1214 v26.V4S(), 1215 v27.V4S(), 1216 v28.V4S(), 1217 MemOperand(x1, x2, PostIndex)); 1218 __ ld4r(v23.V4S(), 1219 v24.V4S(), 1220 v25.V4S(), 1221 v26.V4S(), 1222 MemOperand(x1, 16, PostIndex)); 1223 __ ld4r(v22.V8B(), v23.V8B(), v24.V8B(), v25.V8B(), MemOperand(x0)); 1224 __ ld4r(v27.V8B(), 1225 v28.V8B(), 1226 v29.V8B(), 1227 v30.V8B(), 1228 MemOperand(x1, x2, PostIndex)); 1229 __ ld4r(v29.V8B(), 1230 v30.V8B(), 1231 v31.V8B(), 1232 v0.V8B(), 1233 MemOperand(x1, 4, PostIndex)); 1234 __ ld4r(v28.V8H(), v29.V8H(), v30.V8H(), v31.V8H(), MemOperand(x0)); 1235 __ ld4r(v25.V8H(), 1236 v26.V8H(), 1237 v27.V8H(), 1238 v28.V8H(), 1239 MemOperand(x1, x2, PostIndex)); 1240 __ ld4r(v22.V8H(), 1241 v23.V8H(), 1242 v24.V8H(), 1243 v25.V8H(), 1244 MemOperand(x1, 8, PostIndex)); 1245 __ mla(v29.V16B(), v7.V16B(), v26.V16B()); 1246 __ mla(v6.V2S(), v4.V2S(), v14.V2S()); 1247 __ mla(v9.V2S(), v11.V2S(), v0.S(), 2); 1248 __ mla(v5.V4H(), v17.V4H(), v25.V4H()); 1249 __ mla(v24.V4H(), v7.V4H(), v11.H(), 3); 1250 __ mla(v12.V4S(), v3.V4S(), v4.V4S()); 1251 __ mla(v10.V4S(), v7.V4S(), v7.S(), 3); 1252 __ mla(v3.V8B(), v16.V8B(), v9.V8B()); 1253 __ mla(v19.V8H(), v22.V8H(), v18.V8H()); 1254 __ mla(v6.V8H(), v2.V8H(), v0.H(), 0); 1255 __ mls(v23.V16B(), v10.V16B(), v11.V16B()); 1256 __ mls(v14.V2S(), v31.V2S(), v22.V2S()); 1257 __ mls(v28.V2S(), v13.V2S(), v1.S(), 3); 1258 __ mls(v2.V4H(), v19.V4H(), v13.V4H()); 1259 __ mls(v18.V4H(), v15.V4H(), v12.H(), 6); 1260 __ mls(v6.V4S(), v11.V4S(), v16.V4S()); 1261 __ mls(v23.V4S(), v16.V4S(), v10.S(), 2); 1262 __ mls(v26.V8B(), v13.V8B(), v23.V8B()); 1263 __ mls(v10.V8H(), v10.V8H(), v12.V8H()); 1264 __ mls(v14.V8H(), v0.V8H(), v14.H(), 7); 1265 __ mov(b22, v1.B(), 3); 1266 __ mov(d7, v13.D(), 1); 1267 __ mov(h26, v21.H(), 2); 1268 __ mov(s26, v19.S(), 0); 1269 __ mov(v26.V16B(), v11.V16B()); 1270 __ mov(v20.V8B(), v0.V8B()); 1271 __ mov(v19.B(), 13, v6.B(), 4); 1272 __ mov(v4.B(), 13, w19); 1273 __ mov(v11.D(), 1, v8.D(), 0); 1274 __ mov(v3.D(), 0, x30); 1275 __ mov(v29.H(), 4, v11.H(), 7); 1276 __ mov(v2.H(), 6, w6); 1277 __ mov(v22.S(), 0, v5.S(), 2); 1278 __ mov(v24.S(), 3, w8); 1279 __ mov(w18, v1.S(), 3); 1280 __ mov(x28, v21.D(), 0); 1281 __ movi(d24, 0xffff0000ffffff); 1282 __ movi(v29.V16B(), 0x80); 1283 __ movi(v12.V2D(), 0xffff00ff00ffff00); 1284 __ movi(v12.V2S(), 0xec, LSL, 24); 1285 __ movi(v10.V2S(), 0x4c, MSL, 16); 1286 __ movi(v26.V4H(), 0xc0, LSL); 1287 __ movi(v24.V4S(), 0x98, LSL, 16); 1288 __ movi(v1.V4S(), 0xde, MSL, 16); 1289 __ movi(v21.V8B(), 0x4d); 1290 __ movi(v29.V8H(), 0x69, LSL); 1291 __ mul(v1.V16B(), v15.V16B(), v17.V16B()); 1292 __ mul(v21.V2S(), v19.V2S(), v29.V2S()); 1293 __ mul(v19.V2S(), v5.V2S(), v3.S(), 0); 1294 __ mul(v29.V4H(), v11.V4H(), v2.V4H()); 1295 __ mul(v2.V4H(), v7.V4H(), v0.H(), 0); 1296 __ mul(v25.V4S(), v26.V4S(), v16.V4S()); 1297 __ mul(v26.V4S(), v6.V4S(), v15.S(), 2); 1298 __ mul(v11.V8B(), v15.V8B(), v31.V8B()); 1299 __ mul(v20.V8H(), v31.V8H(), v15.V8H()); 1300 __ mul(v29.V8H(), v5.V8H(), v9.H(), 4); 1301 __ mvn(v13.V16B(), v21.V16B()); 1302 __ mvn(v28.V8B(), v19.V8B()); 1303 __ mvni(v25.V2S(), 0xb8, LSL, 8); 1304 __ mvni(v17.V2S(), 0x6c, MSL, 16); 1305 __ mvni(v29.V4H(), 0x48, LSL); 1306 __ mvni(v20.V4S(), 0x7a, LSL, 16); 1307 __ mvni(v0.V4S(), 0x1e, MSL, 8); 1308 __ mvni(v31.V8H(), 0x3e, LSL); 1309 __ neg(d25, d11); 1310 __ neg(v4.V16B(), v9.V16B()); 1311 __ neg(v11.V2D(), v25.V2D()); 1312 __ neg(v7.V2S(), v18.V2S()); 1313 __ neg(v7.V4H(), v15.V4H()); 1314 __ neg(v17.V4S(), v18.V4S()); 1315 __ neg(v20.V8B(), v17.V8B()); 1316 __ neg(v0.V8H(), v11.V8H()); 1317 __ orn(v13.V16B(), v11.V16B(), v31.V16B()); 1318 __ orn(v22.V8B(), v16.V8B(), v22.V8B()); 1319 __ orr(v17.V16B(), v17.V16B(), v23.V16B()); 1320 __ orr(v8.V2S(), 0xe3); 1321 __ orr(v11.V4H(), 0x97, 8); 1322 __ orr(v7.V4S(), 0xab); 1323 __ orr(v8.V8B(), v4.V8B(), v3.V8B()); 1324 __ orr(v31.V8H(), 0xb0, 8); 1325 __ pmul(v11.V16B(), v18.V16B(), v23.V16B()); 1326 __ pmul(v8.V8B(), v24.V8B(), v5.V8B()); 1327 __ pmull(v24.V8H(), v18.V8B(), v22.V8B()); 1328 __ pmull2(v13.V8H(), v3.V16B(), v21.V16B()); 1329 __ raddhn(v22.V2S(), v10.V2D(), v21.V2D()); 1330 __ raddhn(v5.V4H(), v13.V4S(), v13.V4S()); 1331 __ raddhn(v10.V8B(), v17.V8H(), v26.V8H()); 1332 __ raddhn2(v9.V16B(), v29.V8H(), v13.V8H()); 1333 __ raddhn2(v27.V4S(), v23.V2D(), v26.V2D()); 1334 __ raddhn2(v0.V8H(), v29.V4S(), v7.V4S()); 1335 __ rbit(v22.V16B(), v15.V16B()); 1336 __ rbit(v30.V8B(), v3.V8B()); 1337 __ rev16(v31.V16B(), v27.V16B()); 1338 __ rev16(v12.V8B(), v26.V8B()); 1339 __ rev32(v5.V16B(), v4.V16B()); 1340 __ rev32(v16.V4H(), v26.V4H()); 1341 __ rev32(v20.V8B(), v3.V8B()); 1342 __ rev32(v20.V8H(), v28.V8H()); 1343 __ rev64(v9.V16B(), v19.V16B()); 1344 __ rev64(v5.V2S(), v16.V2S()); 1345 __ rev64(v7.V4H(), v31.V4H()); 1346 __ rev64(v15.V4S(), v26.V4S()); 1347 __ rev64(v25.V8B(), v9.V8B()); 1348 __ rev64(v11.V8H(), v5.V8H()); 1349 __ rshrn(v18.V2S(), v13.V2D(), 1); 1350 __ rshrn(v25.V4H(), v30.V4S(), 2); 1351 __ rshrn(v13.V8B(), v9.V8H(), 8); 1352 __ rshrn2(v3.V16B(), v6.V8H(), 8); 1353 __ rshrn2(v0.V4S(), v29.V2D(), 25); 1354 __ rshrn2(v27.V8H(), v26.V4S(), 15); 1355 __ rsubhn(v15.V2S(), v25.V2D(), v4.V2D()); 1356 __ rsubhn(v23.V4H(), v9.V4S(), v3.V4S()); 1357 __ rsubhn(v6.V8B(), v30.V8H(), v24.V8H()); 1358 __ rsubhn2(v4.V16B(), v24.V8H(), v20.V8H()); 1359 __ rsubhn2(v1.V4S(), v23.V2D(), v22.V2D()); 1360 __ rsubhn2(v19.V8H(), v2.V4S(), v20.V4S()); 1361 __ saba(v28.V16B(), v9.V16B(), v25.V16B()); 1362 __ saba(v9.V2S(), v28.V2S(), v20.V2S()); 1363 __ saba(v17.V4H(), v22.V4H(), v22.V4H()); 1364 __ saba(v29.V4S(), v5.V4S(), v27.V4S()); 1365 __ saba(v20.V8B(), v21.V8B(), v18.V8B()); 1366 __ saba(v27.V8H(), v17.V8H(), v30.V8H()); 1367 __ sabal(v20.V2D(), v13.V2S(), v7.V2S()); 1368 __ sabal(v4.V4S(), v12.V4H(), v4.V4H()); 1369 __ sabal(v23.V8H(), v24.V8B(), v20.V8B()); 1370 __ sabal2(v26.V2D(), v21.V4S(), v18.V4S()); 1371 __ sabal2(v27.V4S(), v28.V8H(), v8.V8H()); 1372 __ sabal2(v12.V8H(), v16.V16B(), v21.V16B()); 1373 __ sabd(v0.V16B(), v15.V16B(), v13.V16B()); 1374 __ sabd(v15.V2S(), v7.V2S(), v30.V2S()); 1375 __ sabd(v17.V4H(), v17.V4H(), v12.V4H()); 1376 __ sabd(v7.V4S(), v4.V4S(), v22.V4S()); 1377 __ sabd(v23.V8B(), v3.V8B(), v26.V8B()); 1378 __ sabd(v20.V8H(), v28.V8H(), v5.V8H()); 1379 __ sabdl(v27.V2D(), v22.V2S(), v20.V2S()); 1380 __ sabdl(v31.V4S(), v20.V4H(), v23.V4H()); 1381 __ sabdl(v0.V8H(), v20.V8B(), v27.V8B()); 1382 __ sabdl2(v31.V2D(), v11.V4S(), v3.V4S()); 1383 __ sabdl2(v26.V4S(), v11.V8H(), v27.V8H()); 1384 __ sabdl2(v6.V8H(), v8.V16B(), v18.V16B()); 1385 __ sadalp(v8.V1D(), v26.V2S()); 1386 __ sadalp(v12.V2D(), v26.V4S()); 1387 __ sadalp(v12.V2S(), v26.V4H()); 1388 __ sadalp(v4.V4H(), v1.V8B()); 1389 __ sadalp(v15.V4S(), v17.V8H()); 1390 __ sadalp(v21.V8H(), v25.V16B()); 1391 __ saddl(v5.V2D(), v10.V2S(), v14.V2S()); 1392 __ saddl(v18.V4S(), v3.V4H(), v15.V4H()); 1393 __ saddl(v15.V8H(), v2.V8B(), v23.V8B()); 1394 __ saddl2(v16.V2D(), v16.V4S(), v27.V4S()); 1395 __ saddl2(v6.V4S(), v24.V8H(), v0.V8H()); 1396 __ saddl2(v7.V8H(), v20.V16B(), v28.V16B()); 1397 __ saddlp(v10.V1D(), v25.V2S()); 1398 __ saddlp(v15.V2D(), v16.V4S()); 1399 __ saddlp(v18.V2S(), v10.V4H()); 1400 __ saddlp(v29.V4H(), v26.V8B()); 1401 __ saddlp(v10.V4S(), v1.V8H()); 1402 __ saddlp(v0.V8H(), v21.V16B()); 1403 __ saddlv(d12, v7.V4S()); 1404 __ saddlv(h14, v28.V16B()); 1405 __ saddlv(h30, v30.V8B()); 1406 __ saddlv(s27, v3.V4H()); 1407 __ saddlv(s16, v16.V8H()); 1408 __ saddw(v24.V2D(), v11.V2D(), v18.V2S()); 1409 __ saddw(v13.V4S(), v12.V4S(), v6.V4H()); 1410 __ saddw(v19.V8H(), v19.V8H(), v7.V8B()); 1411 __ saddw2(v27.V2D(), v9.V2D(), v26.V4S()); 1412 __ saddw2(v19.V4S(), v23.V4S(), v21.V8H()); 1413 __ saddw2(v15.V8H(), v25.V8H(), v30.V16B()); 1414 __ shadd(v7.V16B(), v4.V16B(), v9.V16B()); 1415 __ shadd(v29.V2S(), v25.V2S(), v24.V2S()); 1416 __ shadd(v31.V4H(), v10.V4H(), v13.V4H()); 1417 __ shadd(v21.V4S(), v16.V4S(), v8.V4S()); 1418 __ shadd(v14.V8B(), v29.V8B(), v22.V8B()); 1419 __ shadd(v19.V8H(), v24.V8H(), v20.V8H()); 1420 __ shl(d22, d25, 23); 1421 __ shl(v5.V16B(), v17.V16B(), 7); 1422 __ shl(v2.V2D(), v4.V2D(), 21); 1423 __ shl(v4.V2S(), v3.V2S(), 26); 1424 __ shl(v3.V4H(), v28.V4H(), 8); 1425 __ shl(v4.V4S(), v31.V4S(), 24); 1426 __ shl(v18.V8B(), v16.V8B(), 2); 1427 __ shl(v0.V8H(), v11.V8H(), 3); 1428 __ shll(v5.V2D(), v24.V2S(), 32); 1429 __ shll(v26.V4S(), v20.V4H(), 16); 1430 __ shll(v5.V8H(), v9.V8B(), 8); 1431 __ shll2(v21.V2D(), v28.V4S(), 32); 1432 __ shll2(v22.V4S(), v1.V8H(), 16); 1433 __ shll2(v30.V8H(), v25.V16B(), 8); 1434 __ shrn(v5.V2S(), v1.V2D(), 28); 1435 __ shrn(v29.V4H(), v18.V4S(), 7); 1436 __ shrn(v17.V8B(), v29.V8H(), 2); 1437 __ shrn2(v5.V16B(), v30.V8H(), 3); 1438 __ shrn2(v24.V4S(), v1.V2D(), 1); 1439 __ shrn2(v5.V8H(), v14.V4S(), 16); 1440 __ shsub(v30.V16B(), v22.V16B(), v23.V16B()); 1441 __ shsub(v22.V2S(), v27.V2S(), v25.V2S()); 1442 __ shsub(v13.V4H(), v22.V4H(), v1.V4H()); 1443 __ shsub(v10.V4S(), v8.V4S(), v23.V4S()); 1444 __ shsub(v6.V8B(), v9.V8B(), v31.V8B()); 1445 __ shsub(v8.V8H(), v31.V8H(), v8.V8H()); 1446 __ sli(d19, d29, 20); 1447 __ sli(v9.V16B(), v24.V16B(), 0); 1448 __ sli(v22.V2D(), v9.V2D(), 10); 1449 __ sli(v11.V2S(), v27.V2S(), 20); 1450 __ sli(v16.V4H(), v15.V4H(), 5); 1451 __ sli(v8.V4S(), v8.V4S(), 25); 1452 __ sli(v10.V8B(), v30.V8B(), 0); 1453 __ sli(v7.V8H(), v28.V8H(), 6); 1454 __ smax(v18.V16B(), v8.V16B(), v1.V16B()); 1455 __ smax(v30.V2S(), v5.V2S(), v1.V2S()); 1456 __ smax(v17.V4H(), v25.V4H(), v19.V4H()); 1457 __ smax(v1.V4S(), v24.V4S(), v31.V4S()); 1458 __ smax(v17.V8B(), v24.V8B(), v24.V8B()); 1459 __ smax(v11.V8H(), v26.V8H(), v10.V8H()); 1460 __ smaxp(v12.V16B(), v14.V16B(), v7.V16B()); 1461 __ smaxp(v31.V2S(), v24.V2S(), v6.V2S()); 1462 __ smaxp(v10.V4H(), v29.V4H(), v10.V4H()); 1463 __ smaxp(v18.V4S(), v11.V4S(), v7.V4S()); 1464 __ smaxp(v21.V8B(), v0.V8B(), v18.V8B()); 1465 __ smaxp(v26.V8H(), v8.V8H(), v15.V8H()); 1466 __ smaxv(b4, v5.V16B()); 1467 __ smaxv(b23, v0.V8B()); 1468 __ smaxv(h6, v0.V4H()); 1469 __ smaxv(h24, v8.V8H()); 1470 __ smaxv(s3, v16.V4S()); 1471 __ smin(v24.V16B(), v8.V16B(), v18.V16B()); 1472 __ smin(v29.V2S(), v8.V2S(), v23.V2S()); 1473 __ smin(v6.V4H(), v11.V4H(), v21.V4H()); 1474 __ smin(v24.V4S(), v23.V4S(), v15.V4S()); 1475 __ smin(v8.V8B(), v16.V8B(), v4.V8B()); 1476 __ smin(v12.V8H(), v1.V8H(), v10.V8H()); 1477 __ sminp(v13.V16B(), v18.V16B(), v28.V16B()); 1478 __ sminp(v22.V2S(), v28.V2S(), v16.V2S()); 1479 __ sminp(v15.V4H(), v12.V4H(), v5.V4H()); 1480 __ sminp(v15.V4S(), v17.V4S(), v8.V4S()); 1481 __ sminp(v21.V8B(), v2.V8B(), v6.V8B()); 1482 __ sminp(v21.V8H(), v12.V8H(), v6.V8H()); 1483 __ sminv(b8, v6.V16B()); 1484 __ sminv(b6, v18.V8B()); 1485 __ sminv(h20, v1.V4H()); 1486 __ sminv(h7, v17.V8H()); 1487 __ sminv(s21, v4.V4S()); 1488 __ smlal(v24.V2D(), v14.V2S(), v21.V2S()); 1489 __ smlal(v31.V2D(), v3.V2S(), v14.S(), 2); 1490 __ smlal(v7.V4S(), v20.V4H(), v21.V4H()); 1491 __ smlal(v19.V4S(), v16.V4H(), v9.H(), 3); 1492 __ smlal(v29.V8H(), v14.V8B(), v1.V8B()); 1493 __ smlal2(v30.V2D(), v26.V4S(), v16.V4S()); 1494 __ smlal2(v31.V2D(), v30.V4S(), v1.S(), 0); 1495 __ smlal2(v17.V4S(), v6.V8H(), v3.V8H()); 1496 __ smlal2(v11.V4S(), v31.V8H(), v5.H(), 7); 1497 __ smlal2(v30.V8H(), v16.V16B(), v29.V16B()); 1498 __ smlsl(v1.V2D(), v20.V2S(), v17.V2S()); 1499 __ smlsl(v29.V2D(), v12.V2S(), v5.S(), 3); 1500 __ smlsl(v0.V4S(), v26.V4H(), v1.V4H()); 1501 __ smlsl(v3.V4S(), v5.V4H(), v6.H(), 5); 1502 __ smlsl(v4.V8H(), v0.V8B(), v26.V8B()); 1503 __ smlsl2(v14.V2D(), v14.V4S(), v5.V4S()); 1504 __ smlsl2(v15.V2D(), v5.V4S(), v0.S(), 1); 1505 __ smlsl2(v29.V4S(), v17.V8H(), v31.V8H()); 1506 __ smlsl2(v6.V4S(), v15.V8H(), v9.H(), 6); 1507 __ smlsl2(v30.V8H(), v15.V16B(), v15.V16B()); 1508 __ smov(w21, v6.B(), 3); 1509 __ smov(w13, v26.H(), 7); 1510 __ smov(x24, v16.B(), 7); 1511 __ smov(x7, v4.H(), 3); 1512 __ smov(x29, v7.S(), 1); 1513 __ smull(v4.V2D(), v29.V2S(), v17.V2S()); 1514 __ smull(v30.V2D(), v21.V2S(), v6.S(), 2); 1515 __ smull(v23.V4S(), v5.V4H(), v23.V4H()); 1516 __ smull(v8.V4S(), v9.V4H(), v2.H(), 1); 1517 __ smull(v31.V8H(), v17.V8B(), v1.V8B()); 1518 __ smull2(v3.V2D(), v3.V4S(), v23.V4S()); 1519 __ smull2(v15.V2D(), v29.V4S(), v6.S(), 1); 1520 __ smull2(v19.V4S(), v20.V8H(), v30.V8H()); 1521 __ smull2(v6.V4S(), v10.V8H(), v7.H(), 4); 1522 __ smull2(v25.V8H(), v8.V16B(), v27.V16B()); 1523 __ sqabs(b3, b15); 1524 __ sqabs(d14, d9); 1525 __ sqabs(h31, h28); 1526 __ sqabs(s8, s0); 1527 __ sqabs(v14.V16B(), v7.V16B()); 1528 __ sqabs(v23.V2D(), v19.V2D()); 1529 __ sqabs(v10.V2S(), v24.V2S()); 1530 __ sqabs(v31.V4H(), v19.V4H()); 1531 __ sqabs(v23.V4S(), v0.V4S()); 1532 __ sqabs(v29.V8B(), v23.V8B()); 1533 __ sqabs(v17.V8H(), v21.V8H()); 1534 __ sqadd(b9, b23, b13); 1535 __ sqadd(d2, d25, d26); 1536 __ sqadd(h7, h29, h25); 1537 __ sqadd(s11, s7, s24); 1538 __ sqadd(v20.V16B(), v16.V16B(), v29.V16B()); 1539 __ sqadd(v23.V2D(), v30.V2D(), v28.V2D()); 1540 __ sqadd(v8.V2S(), v19.V2S(), v2.V2S()); 1541 __ sqadd(v20.V4H(), v12.V4H(), v31.V4H()); 1542 __ sqadd(v14.V4S(), v15.V4S(), v17.V4S()); 1543 __ sqadd(v2.V8B(), v29.V8B(), v13.V8B()); 1544 __ sqadd(v7.V8H(), v19.V8H(), v14.V8H()); 1545 __ sqdmlal(d15, s5, s30); 1546 __ sqdmlal(d24, s10, v2.S(), 3); 1547 __ sqdmlal(s9, h19, h8); 1548 __ sqdmlal(s14, h1, v12.H(), 3); 1549 __ sqdmlal(v30.V2D(), v5.V2S(), v31.V2S()); 1550 __ sqdmlal(v25.V2D(), v14.V2S(), v10.S(), 1); 1551 __ sqdmlal(v19.V4S(), v17.V4H(), v16.V4H()); 1552 __ sqdmlal(v8.V4S(), v5.V4H(), v8.H(), 1); 1553 __ sqdmlal2(v1.V2D(), v23.V4S(), v3.V4S()); 1554 __ sqdmlal2(v19.V2D(), v0.V4S(), v9.S(), 0); 1555 __ sqdmlal2(v26.V4S(), v22.V8H(), v11.V8H()); 1556 __ sqdmlal2(v6.V4S(), v28.V8H(), v13.H(), 4); 1557 __ sqdmlsl(d10, s29, s20); 1558 __ sqdmlsl(d10, s9, v10.S(), 1); 1559 __ sqdmlsl(s30, h9, h24); 1560 __ sqdmlsl(s13, h24, v6.H(), 1); 1561 __ sqdmlsl(v27.V2D(), v10.V2S(), v20.V2S()); 1562 __ sqdmlsl(v23.V2D(), v23.V2S(), v3.S(), 3); 1563 __ sqdmlsl(v7.V4S(), v17.V4H(), v29.V4H()); 1564 __ sqdmlsl(v22.V4S(), v21.V4H(), v3.H(), 4); 1565 __ sqdmlsl2(v12.V2D(), v7.V4S(), v22.V4S()); 1566 __ sqdmlsl2(v20.V2D(), v25.V4S(), v8.S(), 0); 1567 __ sqdmlsl2(v25.V4S(), v26.V8H(), v18.V8H()); 1568 __ sqdmlsl2(v25.V4S(), v19.V8H(), v5.H(), 0); 1569 __ sqdmulh(h17, h27, h12); 1570 __ sqdmulh(h16, h5, v11.H(), 0); 1571 __ sqdmulh(s1, s19, s16); 1572 __ sqdmulh(s1, s16, v2.S(), 0); 1573 __ sqdmulh(v28.V2S(), v1.V2S(), v8.V2S()); 1574 __ sqdmulh(v28.V2S(), v8.V2S(), v3.S(), 0); 1575 __ sqdmulh(v11.V4H(), v25.V4H(), v5.V4H()); 1576 __ sqdmulh(v30.V4H(), v14.V4H(), v8.H(), 5); 1577 __ sqdmulh(v25.V4S(), v21.V4S(), v13.V4S()); 1578 __ sqdmulh(v23.V4S(), v2.V4S(), v10.S(), 3); 1579 __ sqdmulh(v26.V8H(), v5.V8H(), v23.V8H()); 1580 __ sqdmulh(v4.V8H(), v22.V8H(), v4.H(), 3); 1581 __ sqdmull(d25, s2, s26); 1582 __ sqdmull(d30, s14, v5.S(), 1); 1583 __ sqdmull(s29, h18, h11); 1584 __ sqdmull(s11, h13, v7.H(), 6); 1585 __ sqdmull(v23.V2D(), v9.V2S(), v8.V2S()); 1586 __ sqdmull(v18.V2D(), v29.V2S(), v4.S(), 1); 1587 __ sqdmull(v17.V4S(), v24.V4H(), v7.V4H()); 1588 __ sqdmull(v8.V4S(), v15.V4H(), v5.H(), 1); 1589 __ sqdmull2(v28.V2D(), v14.V4S(), v2.V4S()); 1590 __ sqdmull2(v1.V2D(), v24.V4S(), v13.S(), 2); 1591 __ sqdmull2(v11.V4S(), v17.V8H(), v31.V8H()); 1592 __ sqdmull2(v1.V4S(), v20.V8H(), v11.H(), 3); 1593 __ sqneg(b2, b0); 1594 __ sqneg(d24, d2); 1595 __ sqneg(h29, h3); 1596 __ sqneg(s4, s9); 1597 __ sqneg(v14.V16B(), v29.V16B()); 1598 __ sqneg(v30.V2D(), v12.V2D()); 1599 __ sqneg(v28.V2S(), v26.V2S()); 1600 __ sqneg(v4.V4H(), v4.V4H()); 1601 __ sqneg(v9.V4S(), v8.V4S()); 1602 __ sqneg(v20.V8B(), v20.V8B()); 1603 __ sqneg(v27.V8H(), v10.V8H()); 1604 __ sqrdmulh(h7, h24, h0); 1605 __ sqrdmulh(h14, h3, v4.H(), 6); 1606 __ sqrdmulh(s27, s19, s24); 1607 __ sqrdmulh(s31, s21, v4.S(), 0); 1608 __ sqrdmulh(v18.V2S(), v25.V2S(), v1.V2S()); 1609 __ sqrdmulh(v22.V2S(), v5.V2S(), v13.S(), 0); 1610 __ sqrdmulh(v22.V4H(), v24.V4H(), v9.V4H()); 1611 __ sqrdmulh(v13.V4H(), v2.V4H(), v12.H(), 6); 1612 __ sqrdmulh(v9.V4S(), v27.V4S(), v2.V4S()); 1613 __ sqrdmulh(v3.V4S(), v23.V4S(), v7.S(), 1); 1614 __ sqrdmulh(v2.V8H(), v0.V8H(), v7.V8H()); 1615 __ sqrdmulh(v16.V8H(), v9.V8H(), v8.H(), 2); 1616 __ sqrshl(b8, b21, b13); 1617 __ sqrshl(d29, d7, d20); 1618 __ sqrshl(h28, h14, h10); 1619 __ sqrshl(s26, s18, s2); 1620 __ sqrshl(v18.V16B(), v31.V16B(), v26.V16B()); 1621 __ sqrshl(v28.V2D(), v4.V2D(), v0.V2D()); 1622 __ sqrshl(v3.V2S(), v6.V2S(), v0.V2S()); 1623 __ sqrshl(v1.V4H(), v18.V4H(), v22.V4H()); 1624 __ sqrshl(v16.V4S(), v25.V4S(), v7.V4S()); 1625 __ sqrshl(v0.V8B(), v21.V8B(), v5.V8B()); 1626 __ sqrshl(v30.V8H(), v19.V8H(), v8.V8H()); 1627 __ sqrshrn(b6, h21, 4); 1628 __ sqrshrn(h14, s17, 11); 1629 __ sqrshrn(s25, d27, 10); 1630 __ sqrshrn(v6.V2S(), v13.V2D(), 18); 1631 __ sqrshrn(v5.V4H(), v9.V4S(), 15); 1632 __ sqrshrn(v19.V8B(), v12.V8H(), 1); 1633 __ sqrshrn2(v19.V16B(), v21.V8H(), 7); 1634 __ sqrshrn2(v29.V4S(), v24.V2D(), 13); 1635 __ sqrshrn2(v12.V8H(), v2.V4S(), 10); 1636 __ sqrshrun(b16, h9, 5); 1637 __ sqrshrun(h3, s24, 15); 1638 __ sqrshrun(s16, d18, 8); 1639 __ sqrshrun(v28.V2S(), v23.V2D(), 8); 1640 __ sqrshrun(v31.V4H(), v25.V4S(), 10); 1641 __ sqrshrun(v19.V8B(), v23.V8H(), 2); 1642 __ sqrshrun2(v24.V16B(), v0.V8H(), 8); 1643 __ sqrshrun2(v22.V4S(), v1.V2D(), 23); 1644 __ sqrshrun2(v28.V8H(), v21.V4S(), 13); 1645 __ sqshl(b6, b21, b8); 1646 __ sqshl(b11, b26, 2); 1647 __ sqshl(d29, d0, d4); 1648 __ sqshl(d21, d7, 35); 1649 __ sqshl(h20, h25, h17); 1650 __ sqshl(h20, h0, 8); 1651 __ sqshl(s29, s13, s4); 1652 __ sqshl(s10, s11, 20); 1653 __ sqshl(v8.V16B(), v18.V16B(), v28.V16B()); 1654 __ sqshl(v29.V16B(), v29.V16B(), 2); 1655 __ sqshl(v8.V2D(), v31.V2D(), v16.V2D()); 1656 __ sqshl(v7.V2D(), v14.V2D(), 37); 1657 __ sqshl(v0.V2S(), v26.V2S(), v7.V2S()); 1658 __ sqshl(v5.V2S(), v11.V2S(), 19); 1659 __ sqshl(v11.V4H(), v30.V4H(), v0.V4H()); 1660 __ sqshl(v1.V4H(), v18.V4H(), 7); 1661 __ sqshl(v22.V4S(), v3.V4S(), v30.V4S()); 1662 __ sqshl(v16.V4S(), v15.V4S(), 28); 1663 __ sqshl(v6.V8B(), v28.V8B(), v25.V8B()); 1664 __ sqshl(v0.V8B(), v15.V8B(), 0); 1665 __ sqshl(v6.V8H(), v16.V8H(), v30.V8H()); 1666 __ sqshl(v3.V8H(), v20.V8H(), 14); 1667 __ sqshlu(b13, b14, 6); 1668 __ sqshlu(d0, d16, 44); 1669 __ sqshlu(h5, h29, 15); 1670 __ sqshlu(s29, s8, 13); 1671 __ sqshlu(v27.V16B(), v20.V16B(), 2); 1672 __ sqshlu(v24.V2D(), v12.V2D(), 11); 1673 __ sqshlu(v12.V2S(), v19.V2S(), 22); 1674 __ sqshlu(v8.V4H(), v12.V4H(), 11); 1675 __ sqshlu(v18.V4S(), v3.V4S(), 8); 1676 __ sqshlu(v3.V8B(), v10.V8B(), 1); 1677 __ sqshlu(v30.V8H(), v24.V8H(), 4); 1678 __ sqshrn(b1, h28, 1); 1679 __ sqshrn(h31, s7, 10); 1680 __ sqshrn(s4, d10, 24); 1681 __ sqshrn(v10.V2S(), v1.V2D(), 29); 1682 __ sqshrn(v3.V4H(), v13.V4S(), 14); 1683 __ sqshrn(v27.V8B(), v6.V8H(), 7); 1684 __ sqshrn2(v14.V16B(), v23.V8H(), 1); 1685 __ sqshrn2(v25.V4S(), v22.V2D(), 27); 1686 __ sqshrn2(v31.V8H(), v12.V4S(), 10); 1687 __ sqshrun(b9, h0, 1); 1688 __ sqshrun(h11, s6, 7); 1689 __ sqshrun(s13, d12, 13); 1690 __ sqshrun(v10.V2S(), v30.V2D(), 1); 1691 __ sqshrun(v31.V4H(), v3.V4S(), 11); 1692 __ sqshrun(v28.V8B(), v30.V8H(), 8); 1693 __ sqshrun2(v16.V16B(), v27.V8H(), 3); 1694 __ sqshrun2(v27.V4S(), v14.V2D(), 18); 1695 __ sqshrun2(v23.V8H(), v14.V4S(), 1); 1696 __ sqsub(b19, b29, b11); 1697 __ sqsub(d21, d31, d6); 1698 __ sqsub(h18, h10, h19); 1699 __ sqsub(s6, s5, s0); 1700 __ sqsub(v21.V16B(), v22.V16B(), v0.V16B()); 1701 __ sqsub(v22.V2D(), v10.V2D(), v17.V2D()); 1702 __ sqsub(v8.V2S(), v21.V2S(), v2.V2S()); 1703 __ sqsub(v18.V4H(), v25.V4H(), v27.V4H()); 1704 __ sqsub(v13.V4S(), v3.V4S(), v6.V4S()); 1705 __ sqsub(v28.V8B(), v29.V8B(), v16.V8B()); 1706 __ sqsub(v17.V8H(), v6.V8H(), v10.V8H()); 1707 __ sqxtn(b27, h26); 1708 __ sqxtn(h17, s11); 1709 __ sqxtn(s22, d31); 1710 __ sqxtn(v26.V2S(), v5.V2D()); 1711 __ sqxtn(v13.V4H(), v7.V4S()); 1712 __ sqxtn(v19.V8B(), v19.V8H()); 1713 __ sqxtn2(v19.V16B(), v3.V8H()); 1714 __ sqxtn2(v23.V4S(), v1.V2D()); 1715 __ sqxtn2(v13.V8H(), v3.V4S()); 1716 __ sqxtun(b26, h9); 1717 __ sqxtun(h19, s12); 1718 __ sqxtun(s3, d6); 1719 __ sqxtun(v29.V2S(), v26.V2D()); 1720 __ sqxtun(v26.V4H(), v10.V4S()); 1721 __ sqxtun(v7.V8B(), v29.V8H()); 1722 __ sqxtun2(v21.V16B(), v14.V8H()); 1723 __ sqxtun2(v24.V4S(), v15.V2D()); 1724 __ sqxtun2(v30.V8H(), v1.V4S()); 1725 __ srhadd(v21.V16B(), v17.V16B(), v15.V16B()); 1726 __ srhadd(v28.V2S(), v21.V2S(), v29.V2S()); 1727 __ srhadd(v9.V4H(), v1.V4H(), v30.V4H()); 1728 __ srhadd(v24.V4S(), v0.V4S(), v2.V4S()); 1729 __ srhadd(v6.V8B(), v17.V8B(), v15.V8B()); 1730 __ srhadd(v5.V8H(), v7.V8H(), v21.V8H()); 1731 __ sri(d14, d14, 49); 1732 __ sri(v23.V16B(), v8.V16B(), 4); 1733 __ sri(v20.V2D(), v13.V2D(), 20); 1734 __ sri(v16.V2S(), v2.V2S(), 24); 1735 __ sri(v5.V4H(), v23.V4H(), 11); 1736 __ sri(v27.V4S(), v15.V4S(), 23); 1737 __ sri(v19.V8B(), v29.V8B(), 4); 1738 __ sri(v7.V8H(), v29.V8H(), 3); 1739 __ srshl(d2, d9, d26); 1740 __ srshl(v29.V16B(), v17.V16B(), v11.V16B()); 1741 __ srshl(v8.V2D(), v15.V2D(), v4.V2D()); 1742 __ srshl(v25.V2S(), v17.V2S(), v8.V2S()); 1743 __ srshl(v19.V4H(), v7.V4H(), v7.V4H()); 1744 __ srshl(v13.V4S(), v2.V4S(), v17.V4S()); 1745 __ srshl(v22.V8B(), v6.V8B(), v21.V8B()); 1746 __ srshl(v10.V8H(), v17.V8H(), v4.V8H()); 1747 __ srshr(d21, d18, 45); 1748 __ srshr(v3.V16B(), v11.V16B(), 7); 1749 __ srshr(v21.V2D(), v26.V2D(), 53); 1750 __ srshr(v11.V2S(), v5.V2S(), 28); 1751 __ srshr(v7.V4H(), v18.V4H(), 12); 1752 __ srshr(v7.V4S(), v3.V4S(), 30); 1753 __ srshr(v14.V8B(), v2.V8B(), 6); 1754 __ srshr(v21.V8H(), v20.V8H(), 3); 1755 __ srsra(d21, d30, 63); 1756 __ srsra(v27.V16B(), v30.V16B(), 6); 1757 __ srsra(v20.V2D(), v12.V2D(), 27); 1758 __ srsra(v0.V2S(), v17.V2S(), 5); 1759 __ srsra(v14.V4H(), v16.V4H(), 15); 1760 __ srsra(v18.V4S(), v3.V4S(), 20); 1761 __ srsra(v21.V8B(), v1.V8B(), 1); 1762 __ srsra(v31.V8H(), v25.V8H(), 2); 1763 __ sshl(d1, d13, d9); 1764 __ sshl(v17.V16B(), v31.V16B(), v15.V16B()); 1765 __ sshl(v13.V2D(), v16.V2D(), v0.V2D()); 1766 __ sshl(v0.V2S(), v7.V2S(), v22.V2S()); 1767 __ sshl(v23.V4H(), v19.V4H(), v4.V4H()); 1768 __ sshl(v5.V4S(), v5.V4S(), v11.V4S()); 1769 __ sshl(v23.V8B(), v27.V8B(), v7.V8B()); 1770 __ sshl(v29.V8H(), v10.V8H(), v5.V8H()); 1771 __ sshll(v0.V2D(), v2.V2S(), 23); 1772 __ sshll(v11.V4S(), v8.V4H(), 8); 1773 __ sshll(v4.V8H(), v29.V8B(), 1); 1774 __ sshll2(v10.V2D(), v4.V4S(), 14); 1775 __ sshll2(v26.V4S(), v31.V8H(), 6); 1776 __ sshll2(v3.V8H(), v26.V16B(), 4); 1777 __ sshr(d19, d21, 20); 1778 __ sshr(v15.V16B(), v23.V16B(), 5); 1779 __ sshr(v17.V2D(), v14.V2D(), 38); 1780 __ sshr(v3.V2S(), v29.V2S(), 23); 1781 __ sshr(v23.V4H(), v27.V4H(), 4); 1782 __ sshr(v28.V4S(), v3.V4S(), 4); 1783 __ sshr(v14.V8B(), v2.V8B(), 6); 1784 __ sshr(v3.V8H(), v8.V8H(), 6); 1785 __ ssra(d12, d28, 44); 1786 __ ssra(v29.V16B(), v31.V16B(), 4); 1787 __ ssra(v3.V2D(), v0.V2D(), 24); 1788 __ ssra(v14.V2S(), v28.V2S(), 6); 1789 __ ssra(v18.V4H(), v8.V4H(), 7); 1790 __ ssra(v31.V4S(), v14.V4S(), 24); 1791 __ ssra(v28.V8B(), v26.V8B(), 5); 1792 __ ssra(v9.V8H(), v9.V8H(), 14); 1793 __ ssubl(v13.V2D(), v14.V2S(), v3.V2S()); 1794 __ ssubl(v5.V4S(), v16.V4H(), v8.V4H()); 1795 __ ssubl(v0.V8H(), v28.V8B(), v6.V8B()); 1796 __ ssubl2(v5.V2D(), v13.V4S(), v25.V4S()); 1797 __ ssubl2(v3.V4S(), v15.V8H(), v17.V8H()); 1798 __ ssubl2(v15.V8H(), v15.V16B(), v14.V16B()); 1799 __ ssubw(v25.V2D(), v23.V2D(), v26.V2S()); 1800 __ ssubw(v21.V4S(), v18.V4S(), v24.V4H()); 1801 __ ssubw(v30.V8H(), v22.V8H(), v3.V8B()); 1802 __ ssubw2(v16.V2D(), v24.V2D(), v28.V4S()); 1803 __ ssubw2(v31.V4S(), v11.V4S(), v15.V8H()); 1804 __ ssubw2(v4.V8H(), v8.V8H(), v16.V16B()); 1805 __ st1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x0)); 1806 __ st1(v10.V16B(), 1807 v11.V16B(), 1808 v12.V16B(), 1809 v13.V16B(), 1810 MemOperand(x1, x2, PostIndex)); 1811 __ st1(v27.V16B(), 1812 v28.V16B(), 1813 v29.V16B(), 1814 v30.V16B(), 1815 MemOperand(x1, 64, PostIndex)); 1816 __ st1(v16.V16B(), v17.V16B(), v18.V16B(), MemOperand(x0)); 1817 __ st1(v21.V16B(), v22.V16B(), v23.V16B(), MemOperand(x1, x2, PostIndex)); 1818 __ st1(v9.V16B(), v10.V16B(), v11.V16B(), MemOperand(x1, 48, PostIndex)); 1819 __ st1(v7.V16B(), v8.V16B(), MemOperand(x0)); 1820 __ st1(v26.V16B(), v27.V16B(), MemOperand(x1, x2, PostIndex)); 1821 __ st1(v22.V16B(), v23.V16B(), MemOperand(x1, 32, PostIndex)); 1822 __ st1(v23.V16B(), MemOperand(x0)); 1823 __ st1(v28.V16B(), MemOperand(x1, x2, PostIndex)); 1824 __ st1(v2.V16B(), MemOperand(x1, 16, PostIndex)); 1825 __ st1(v29.V1D(), v30.V1D(), v31.V1D(), v0.V1D(), MemOperand(x0)); 1826 __ st1(v12.V1D(), 1827 v13.V1D(), 1828 v14.V1D(), 1829 v15.V1D(), 1830 MemOperand(x1, x2, PostIndex)); 1831 __ st1(v30.V1D(), 1832 v31.V1D(), 1833 v0.V1D(), 1834 v1.V1D(), 1835 MemOperand(x1, 32, PostIndex)); 1836 __ st1(v16.V1D(), v17.V1D(), v18.V1D(), MemOperand(x0)); 1837 __ st1(v3.V1D(), v4.V1D(), v5.V1D(), MemOperand(x1, x2, PostIndex)); 1838 __ st1(v14.V1D(), v15.V1D(), v16.V1D(), MemOperand(x1, 24, PostIndex)); 1839 __ st1(v18.V1D(), v19.V1D(), MemOperand(x0)); 1840 __ st1(v5.V1D(), v6.V1D(), MemOperand(x1, x2, PostIndex)); 1841 __ st1(v2.V1D(), v3.V1D(), MemOperand(x1, 16, PostIndex)); 1842 __ st1(v4.V1D(), MemOperand(x0)); 1843 __ st1(v27.V1D(), MemOperand(x1, x2, PostIndex)); 1844 __ st1(v23.V1D(), MemOperand(x1, 8, PostIndex)); 1845 __ st1(v2.V2D(), v3.V2D(), v4.V2D(), v5.V2D(), MemOperand(x0)); 1846 __ st1(v22.V2D(), 1847 v23.V2D(), 1848 v24.V2D(), 1849 v25.V2D(), 1850 MemOperand(x1, x2, PostIndex)); 1851 __ st1(v28.V2D(), 1852 v29.V2D(), 1853 v30.V2D(), 1854 v31.V2D(), 1855 MemOperand(x1, 64, PostIndex)); 1856 __ st1(v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0)); 1857 __ st1(v16.V2D(), v17.V2D(), v18.V2D(), MemOperand(x1, x2, PostIndex)); 1858 __ st1(v22.V2D(), v23.V2D(), v24.V2D(), MemOperand(x1, 48, PostIndex)); 1859 __ st1(v21.V2D(), v22.V2D(), MemOperand(x0)); 1860 __ st1(v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex)); 1861 __ st1(v27.V2D(), v28.V2D(), MemOperand(x1, 32, PostIndex)); 1862 __ st1(v21.V2D(), MemOperand(x0)); 1863 __ st1(v29.V2D(), MemOperand(x1, x2, PostIndex)); 1864 __ st1(v20.V2D(), MemOperand(x1, 16, PostIndex)); 1865 __ st1(v22.V2S(), v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x0)); 1866 __ st1(v8.V2S(), 1867 v9.V2S(), 1868 v10.V2S(), 1869 v11.V2S(), 1870 MemOperand(x1, x2, PostIndex)); 1871 __ st1(v15.V2S(), 1872 v16.V2S(), 1873 v17.V2S(), 1874 v18.V2S(), 1875 MemOperand(x1, 32, PostIndex)); 1876 __ st1(v2.V2S(), v3.V2S(), v4.V2S(), MemOperand(x0)); 1877 __ st1(v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x1, x2, PostIndex)); 1878 __ st1(v7.V2S(), v8.V2S(), v9.V2S(), MemOperand(x1, 24, PostIndex)); 1879 __ st1(v28.V2S(), v29.V2S(), MemOperand(x0)); 1880 __ st1(v29.V2S(), v30.V2S(), MemOperand(x1, x2, PostIndex)); 1881 __ st1(v23.V2S(), v24.V2S(), MemOperand(x1, 16, PostIndex)); 1882 __ st1(v6.V2S(), MemOperand(x0)); 1883 __ st1(v11.V2S(), MemOperand(x1, x2, PostIndex)); 1884 __ st1(v17.V2S(), MemOperand(x1, 8, PostIndex)); 1885 __ st1(v6.V4H(), v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x0)); 1886 __ st1(v9.V4H(), 1887 v10.V4H(), 1888 v11.V4H(), 1889 v12.V4H(), 1890 MemOperand(x1, x2, PostIndex)); 1891 __ st1(v25.V4H(), 1892 v26.V4H(), 1893 v27.V4H(), 1894 v28.V4H(), 1895 MemOperand(x1, 32, PostIndex)); 1896 __ st1(v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x0)); 1897 __ st1(v10.V4H(), v11.V4H(), v12.V4H(), MemOperand(x1, x2, PostIndex)); 1898 __ st1(v12.V4H(), v13.V4H(), v14.V4H(), MemOperand(x1, 24, PostIndex)); 1899 __ st1(v13.V4H(), v14.V4H(), MemOperand(x0)); 1900 __ st1(v15.V4H(), v16.V4H(), MemOperand(x1, x2, PostIndex)); 1901 __ st1(v21.V4H(), v22.V4H(), MemOperand(x1, 16, PostIndex)); 1902 __ st1(v16.V4H(), MemOperand(x0)); 1903 __ st1(v8.V4H(), MemOperand(x1, x2, PostIndex)); 1904 __ st1(v30.V4H(), MemOperand(x1, 8, PostIndex)); 1905 __ st1(v3.V4S(), v4.V4S(), v5.V4S(), v6.V4S(), MemOperand(x0)); 1906 __ st1(v25.V4S(), 1907 v26.V4S(), 1908 v27.V4S(), 1909 v28.V4S(), 1910 MemOperand(x1, x2, PostIndex)); 1911 __ st1(v5.V4S(), v6.V4S(), v7.V4S(), v8.V4S(), MemOperand(x1, 64, PostIndex)); 1912 __ st1(v31.V4S(), v0.V4S(), v1.V4S(), MemOperand(x0)); 1913 __ st1(v30.V4S(), v31.V4S(), v0.V4S(), MemOperand(x1, x2, PostIndex)); 1914 __ st1(v6.V4S(), v7.V4S(), v8.V4S(), MemOperand(x1, 48, PostIndex)); 1915 __ st1(v17.V4S(), v18.V4S(), MemOperand(x0)); 1916 __ st1(v31.V4S(), v0.V4S(), MemOperand(x1, x2, PostIndex)); 1917 __ st1(v1.V4S(), v2.V4S(), MemOperand(x1, 32, PostIndex)); 1918 __ st1(v26.V4S(), MemOperand(x0)); 1919 __ st1(v15.V4S(), MemOperand(x1, x2, PostIndex)); 1920 __ st1(v13.V4S(), MemOperand(x1, 16, PostIndex)); 1921 __ st1(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0)); 1922 __ st1(v10.V8B(), 1923 v11.V8B(), 1924 v12.V8B(), 1925 v13.V8B(), 1926 MemOperand(x1, x2, PostIndex)); 1927 __ st1(v15.V8B(), 1928 v16.V8B(), 1929 v17.V8B(), 1930 v18.V8B(), 1931 MemOperand(x1, 32, PostIndex)); 1932 __ st1(v19.V8B(), v20.V8B(), v21.V8B(), MemOperand(x0)); 1933 __ st1(v31.V8B(), v0.V8B(), v1.V8B(), MemOperand(x1, x2, PostIndex)); 1934 __ st1(v9.V8B(), v10.V8B(), v11.V8B(), MemOperand(x1, 24, PostIndex)); 1935 __ st1(v12.V8B(), v13.V8B(), MemOperand(x0)); 1936 __ st1(v2.V8B(), v3.V8B(), MemOperand(x1, x2, PostIndex)); 1937 __ st1(v0.V8B(), v1.V8B(), MemOperand(x1, 16, PostIndex)); 1938 __ st1(v16.V8B(), MemOperand(x0)); 1939 __ st1(v25.V8B(), MemOperand(x1, x2, PostIndex)); 1940 __ st1(v31.V8B(), MemOperand(x1, 8, PostIndex)); 1941 __ st1(v4.V8H(), v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x0)); 1942 __ st1(v3.V8H(), v4.V8H(), v5.V8H(), v6.V8H(), MemOperand(x1, x2, PostIndex)); 1943 __ st1(v26.V8H(), 1944 v27.V8H(), 1945 v28.V8H(), 1946 v29.V8H(), 1947 MemOperand(x1, 64, PostIndex)); 1948 __ st1(v10.V8H(), v11.V8H(), v12.V8H(), MemOperand(x0)); 1949 __ st1(v21.V8H(), v22.V8H(), v23.V8H(), MemOperand(x1, x2, PostIndex)); 1950 __ st1(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, 48, PostIndex)); 1951 __ st1(v26.V8H(), v27.V8H(), MemOperand(x0)); 1952 __ st1(v24.V8H(), v25.V8H(), MemOperand(x1, x2, PostIndex)); 1953 __ st1(v17.V8H(), v18.V8H(), MemOperand(x1, 32, PostIndex)); 1954 __ st1(v29.V8H(), MemOperand(x0)); 1955 __ st1(v19.V8H(), MemOperand(x1, x2, PostIndex)); 1956 __ st1(v23.V8H(), MemOperand(x1, 16, PostIndex)); 1957 __ st1(v19.B(), 15, MemOperand(x0)); 1958 __ st1(v25.B(), 9, MemOperand(x1, x2, PostIndex)); 1959 __ st1(v4.B(), 8, MemOperand(x1, 1, PostIndex)); 1960 __ st1(v13.D(), 0, MemOperand(x0)); 1961 __ st1(v30.D(), 0, MemOperand(x1, x2, PostIndex)); 1962 __ st1(v3.D(), 0, MemOperand(x1, 8, PostIndex)); 1963 __ st1(v22.H(), 0, MemOperand(x0)); 1964 __ st1(v31.H(), 7, MemOperand(x1, x2, PostIndex)); 1965 __ st1(v23.H(), 3, MemOperand(x1, 2, PostIndex)); 1966 __ st1(v0.S(), 0, MemOperand(x0)); 1967 __ st1(v11.S(), 3, MemOperand(x1, x2, PostIndex)); 1968 __ st1(v24.S(), 3, MemOperand(x1, 4, PostIndex)); 1969 __ st2(v7.V16B(), v8.V16B(), MemOperand(x0)); 1970 __ st2(v5.V16B(), v6.V16B(), MemOperand(x1, x2, PostIndex)); 1971 __ st2(v18.V16B(), v19.V16B(), MemOperand(x1, 32, PostIndex)); 1972 __ st2(v14.V2D(), v15.V2D(), MemOperand(x0)); 1973 __ st2(v7.V2D(), v8.V2D(), MemOperand(x1, x2, PostIndex)); 1974 __ st2(v24.V2D(), v25.V2D(), MemOperand(x1, 32, PostIndex)); 1975 __ st2(v22.V2S(), v23.V2S(), MemOperand(x0)); 1976 __ st2(v4.V2S(), v5.V2S(), MemOperand(x1, x2, PostIndex)); 1977 __ st2(v2.V2S(), v3.V2S(), MemOperand(x1, 16, PostIndex)); 1978 __ st2(v23.V4H(), v24.V4H(), MemOperand(x0)); 1979 __ st2(v8.V4H(), v9.V4H(), MemOperand(x1, x2, PostIndex)); 1980 __ st2(v7.V4H(), v8.V4H(), MemOperand(x1, 16, PostIndex)); 1981 __ st2(v17.V4S(), v18.V4S(), MemOperand(x0)); 1982 __ st2(v6.V4S(), v7.V4S(), MemOperand(x1, x2, PostIndex)); 1983 __ st2(v26.V4S(), v27.V4S(), MemOperand(x1, 32, PostIndex)); 1984 __ st2(v31.V8B(), v0.V8B(), MemOperand(x0)); 1985 __ st2(v0.V8B(), v1.V8B(), MemOperand(x1, x2, PostIndex)); 1986 __ st2(v21.V8B(), v22.V8B(), MemOperand(x1, 16, PostIndex)); 1987 __ st2(v7.V8H(), v8.V8H(), MemOperand(x0)); 1988 __ st2(v22.V8H(), v23.V8H(), MemOperand(x1, x2, PostIndex)); 1989 __ st2(v4.V8H(), v5.V8H(), MemOperand(x1, 32, PostIndex)); 1990 __ st2(v8.B(), v9.B(), 15, MemOperand(x0)); 1991 __ st2(v8.B(), v9.B(), 15, MemOperand(x1, x2, PostIndex)); 1992 __ st2(v7.B(), v8.B(), 4, MemOperand(x1, 2, PostIndex)); 1993 __ st2(v25.D(), v26.D(), 0, MemOperand(x0)); 1994 __ st2(v17.D(), v18.D(), 1, MemOperand(x1, x2, PostIndex)); 1995 __ st2(v3.D(), v4.D(), 1, MemOperand(x1, 16, PostIndex)); 1996 __ st2(v4.H(), v5.H(), 3, MemOperand(x0)); 1997 __ st2(v0.H(), v1.H(), 5, MemOperand(x1, x2, PostIndex)); 1998 __ st2(v22.H(), v23.H(), 2, MemOperand(x1, 4, PostIndex)); 1999 __ st2(v14.S(), v15.S(), 3, MemOperand(x0)); 2000 __ st2(v23.S(), v24.S(), 3, MemOperand(x1, x2, PostIndex)); 2001 __ st2(v0.S(), v1.S(), 2, MemOperand(x1, 8, PostIndex)); 2002 __ st3(v26.V16B(), v27.V16B(), v28.V16B(), MemOperand(x0)); 2003 __ st3(v21.V16B(), v22.V16B(), v23.V16B(), MemOperand(x1, x2, PostIndex)); 2004 __ st3(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x1, 48, PostIndex)); 2005 __ st3(v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0)); 2006 __ st3(v23.V2D(), v24.V2D(), v25.V2D(), MemOperand(x1, x2, PostIndex)); 2007 __ st3(v10.V2D(), v11.V2D(), v12.V2D(), MemOperand(x1, 48, PostIndex)); 2008 __ st3(v9.V2S(), v10.V2S(), v11.V2S(), MemOperand(x0)); 2009 __ st3(v13.V2S(), v14.V2S(), v15.V2S(), MemOperand(x1, x2, PostIndex)); 2010 __ st3(v22.V2S(), v23.V2S(), v24.V2S(), MemOperand(x1, 24, PostIndex)); 2011 __ st3(v31.V4H(), v0.V4H(), v1.V4H(), MemOperand(x0)); 2012 __ st3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x1, x2, PostIndex)); 2013 __ st3(v19.V4H(), v20.V4H(), v21.V4H(), MemOperand(x1, 24, PostIndex)); 2014 __ st3(v18.V4S(), v19.V4S(), v20.V4S(), MemOperand(x0)); 2015 __ st3(v25.V4S(), v26.V4S(), v27.V4S(), MemOperand(x1, x2, PostIndex)); 2016 __ st3(v16.V4S(), v17.V4S(), v18.V4S(), MemOperand(x1, 48, PostIndex)); 2017 __ st3(v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0)); 2018 __ st3(v29.V8B(), v30.V8B(), v31.V8B(), MemOperand(x1, x2, PostIndex)); 2019 __ st3(v30.V8B(), v31.V8B(), v0.V8B(), MemOperand(x1, 24, PostIndex)); 2020 __ st3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x0)); 2021 __ st3(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, x2, PostIndex)); 2022 __ st3(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, 48, PostIndex)); 2023 __ st3(v31.B(), v0.B(), v1.B(), 10, MemOperand(x0)); 2024 __ st3(v4.B(), v5.B(), v6.B(), 5, MemOperand(x1, x2, PostIndex)); 2025 __ st3(v5.B(), v6.B(), v7.B(), 1, MemOperand(x1, 3, PostIndex)); 2026 __ st3(v5.D(), v6.D(), v7.D(), 0, MemOperand(x0)); 2027 __ st3(v6.D(), v7.D(), v8.D(), 0, MemOperand(x1, x2, PostIndex)); 2028 __ st3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x1, 24, PostIndex)); 2029 __ st3(v31.H(), v0.H(), v1.H(), 2, MemOperand(x0)); 2030 __ st3(v14.H(), v15.H(), v16.H(), 5, MemOperand(x1, x2, PostIndex)); 2031 __ st3(v21.H(), v22.H(), v23.H(), 6, MemOperand(x1, 6, PostIndex)); 2032 __ st3(v21.S(), v22.S(), v23.S(), 0, MemOperand(x0)); 2033 __ st3(v11.S(), v12.S(), v13.S(), 1, MemOperand(x1, x2, PostIndex)); 2034 __ st3(v15.S(), v16.S(), v17.S(), 0, MemOperand(x1, 12, PostIndex)); 2035 __ st4(v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), MemOperand(x0)); 2036 __ st4(v24.V16B(), 2037 v25.V16B(), 2038 v26.V16B(), 2039 v27.V16B(), 2040 MemOperand(x1, x2, PostIndex)); 2041 __ st4(v15.V16B(), 2042 v16.V16B(), 2043 v17.V16B(), 2044 v18.V16B(), 2045 MemOperand(x1, 64, PostIndex)); 2046 __ st4(v16.V2D(), v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0)); 2047 __ st4(v17.V2D(), 2048 v18.V2D(), 2049 v19.V2D(), 2050 v20.V2D(), 2051 MemOperand(x1, x2, PostIndex)); 2052 __ st4(v9.V2D(), 2053 v10.V2D(), 2054 v11.V2D(), 2055 v12.V2D(), 2056 MemOperand(x1, 64, PostIndex)); 2057 __ st4(v23.V2S(), v24.V2S(), v25.V2S(), v26.V2S(), MemOperand(x0)); 2058 __ st4(v15.V2S(), 2059 v16.V2S(), 2060 v17.V2S(), 2061 v18.V2S(), 2062 MemOperand(x1, x2, PostIndex)); 2063 __ st4(v24.V2S(), 2064 v25.V2S(), 2065 v26.V2S(), 2066 v27.V2S(), 2067 MemOperand(x1, 32, PostIndex)); 2068 __ st4(v14.V4H(), v15.V4H(), v16.V4H(), v17.V4H(), MemOperand(x0)); 2069 __ st4(v18.V4H(), 2070 v19.V4H(), 2071 v20.V4H(), 2072 v21.V4H(), 2073 MemOperand(x1, x2, PostIndex)); 2074 __ st4(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), MemOperand(x1, 32, PostIndex)); 2075 __ st4(v13.V4S(), v14.V4S(), v15.V4S(), v16.V4S(), MemOperand(x0)); 2076 __ st4(v6.V4S(), v7.V4S(), v8.V4S(), v9.V4S(), MemOperand(x1, x2, PostIndex)); 2077 __ st4(v15.V4S(), 2078 v16.V4S(), 2079 v17.V4S(), 2080 v18.V4S(), 2081 MemOperand(x1, 64, PostIndex)); 2082 __ st4(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0)); 2083 __ st4(v25.V8B(), 2084 v26.V8B(), 2085 v27.V8B(), 2086 v28.V8B(), 2087 MemOperand(x1, x2, PostIndex)); 2088 __ st4(v19.V8B(), 2089 v20.V8B(), 2090 v21.V8B(), 2091 v22.V8B(), 2092 MemOperand(x1, 32, PostIndex)); 2093 __ st4(v19.V8H(), v20.V8H(), v21.V8H(), v22.V8H(), MemOperand(x0)); 2094 __ st4(v15.V8H(), 2095 v16.V8H(), 2096 v17.V8H(), 2097 v18.V8H(), 2098 MemOperand(x1, x2, PostIndex)); 2099 __ st4(v31.V8H(), 2100 v0.V8H(), 2101 v1.V8H(), 2102 v2.V8H(), 2103 MemOperand(x1, 64, PostIndex)); 2104 __ st4(v0.B(), v1.B(), v2.B(), v3.B(), 13, MemOperand(x0)); 2105 __ st4(v4.B(), v5.B(), v6.B(), v7.B(), 10, MemOperand(x1, x2, PostIndex)); 2106 __ st4(v9.B(), v10.B(), v11.B(), v12.B(), 9, MemOperand(x1, 4, PostIndex)); 2107 __ st4(v2.D(), v3.D(), v4.D(), v5.D(), 1, MemOperand(x0)); 2108 __ st4(v7.D(), v8.D(), v9.D(), v10.D(), 0, MemOperand(x1, x2, PostIndex)); 2109 __ st4(v31.D(), v0.D(), v1.D(), v2.D(), 1, MemOperand(x1, 32, PostIndex)); 2110 __ st4(v2.H(), v3.H(), v4.H(), v5.H(), 1, MemOperand(x0)); 2111 __ st4(v27.H(), v28.H(), v29.H(), v30.H(), 3, MemOperand(x1, x2, PostIndex)); 2112 __ st4(v24.H(), v25.H(), v26.H(), v27.H(), 4, MemOperand(x1, 8, PostIndex)); 2113 __ st4(v18.S(), v19.S(), v20.S(), v21.S(), 2, MemOperand(x0)); 2114 __ st4(v6.S(), v7.S(), v8.S(), v9.S(), 2, MemOperand(x1, x2, PostIndex)); 2115 __ st4(v25.S(), v26.S(), v27.S(), v28.S(), 1, MemOperand(x1, 16, PostIndex)); 2116 __ sub(d12, d17, d2); 2117 __ sub(v20.V16B(), v24.V16B(), v8.V16B()); 2118 __ sub(v8.V2D(), v29.V2D(), v5.V2D()); 2119 __ sub(v2.V2S(), v28.V2S(), v24.V2S()); 2120 __ sub(v24.V4H(), v10.V4H(), v4.V4H()); 2121 __ sub(v28.V4S(), v4.V4S(), v17.V4S()); 2122 __ sub(v16.V8B(), v27.V8B(), v2.V8B()); 2123 __ sub(v20.V8H(), v10.V8H(), v13.V8H()); 2124 __ subhn(v5.V2S(), v14.V2D(), v13.V2D()); 2125 __ subhn(v10.V4H(), v5.V4S(), v8.V4S()); 2126 __ subhn(v6.V8B(), v10.V8H(), v22.V8H()); 2127 __ subhn2(v11.V16B(), v6.V8H(), v9.V8H()); 2128 __ subhn2(v25.V4S(), v18.V2D(), v24.V2D()); 2129 __ subhn2(v20.V8H(), v21.V4S(), v1.V4S()); 2130 __ suqadd(b25, b11); 2131 __ suqadd(d13, d1); 2132 __ suqadd(h0, h9); 2133 __ suqadd(s22, s8); 2134 __ suqadd(v24.V16B(), v27.V16B()); 2135 __ suqadd(v26.V2D(), v14.V2D()); 2136 __ suqadd(v7.V2S(), v10.V2S()); 2137 __ suqadd(v25.V4H(), v12.V4H()); 2138 __ suqadd(v4.V4S(), v3.V4S()); 2139 __ suqadd(v14.V8B(), v18.V8B()); 2140 __ suqadd(v31.V8H(), v8.V8H()); 2141 __ sxtl(v16.V2D(), v20.V2S()); 2142 __ sxtl(v27.V4S(), v28.V4H()); 2143 __ sxtl(v0.V8H(), v22.V8B()); 2144 __ sxtl2(v6.V2D(), v7.V4S()); 2145 __ sxtl2(v9.V4S(), v27.V8H()); 2146 __ sxtl2(v16.V8H(), v16.V16B()); 2147 __ tbl(v25.V16B(), 2148 v17.V16B(), 2149 v18.V16B(), 2150 v19.V16B(), 2151 v20.V16B(), 2152 v22.V16B()); 2153 __ tbl(v28.V16B(), v13.V16B(), v14.V16B(), v15.V16B(), v4.V16B()); 2154 __ tbl(v3.V16B(), v0.V16B(), v1.V16B(), v2.V16B()); 2155 __ tbl(v20.V16B(), v15.V16B(), v4.V16B()); 2156 __ tbl(v7.V8B(), v23.V16B(), v24.V16B(), v25.V16B(), v26.V16B(), v20.V8B()); 2157 __ tbl(v8.V8B(), v1.V16B(), v2.V16B(), v3.V16B(), v31.V8B()); 2158 __ tbl(v8.V8B(), v25.V16B(), v26.V16B(), v16.V8B()); 2159 __ tbl(v11.V8B(), v19.V16B(), v30.V8B()); 2160 __ tbx(v25.V16B(), v25.V16B(), v26.V16B(), v27.V16B(), v28.V16B(), v5.V16B()); 2161 __ tbx(v21.V16B(), v29.V16B(), v30.V16B(), v31.V16B(), v24.V16B()); 2162 __ tbx(v6.V16B(), v16.V16B(), v17.V16B(), v1.V16B()); 2163 __ tbx(v13.V16B(), v3.V16B(), v20.V16B()); 2164 __ tbx(v24.V8B(), v29.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v9.V8B()); 2165 __ tbx(v17.V8B(), v9.V16B(), v10.V16B(), v11.V16B(), v26.V8B()); 2166 __ tbx(v5.V8B(), v3.V16B(), v4.V16B(), v21.V8B()); 2167 __ tbx(v16.V8B(), v11.V16B(), v29.V8B()); 2168 __ trn1(v19.V16B(), v24.V16B(), v12.V16B()); 2169 __ trn1(v2.V2D(), v7.V2D(), v10.V2D()); 2170 __ trn1(v22.V2S(), v0.V2S(), v21.V2S()); 2171 __ trn1(v12.V4H(), v15.V4H(), v20.V4H()); 2172 __ trn1(v30.V4S(), v17.V4S(), v9.V4S()); 2173 __ trn1(v12.V8B(), v19.V8B(), v29.V8B()); 2174 __ trn1(v23.V8H(), v8.V8H(), v9.V8H()); 2175 __ trn2(v28.V16B(), v30.V16B(), v25.V16B()); 2176 __ trn2(v7.V2D(), v27.V2D(), v7.V2D()); 2177 __ trn2(v30.V2S(), v16.V2S(), v19.V2S()); 2178 __ trn2(v24.V4H(), v6.V4H(), v25.V4H()); 2179 __ trn2(v2.V4S(), v19.V4S(), v11.V4S()); 2180 __ trn2(v25.V8B(), v27.V8B(), v18.V8B()); 2181 __ trn2(v12.V8H(), v4.V8H(), v15.V8H()); 2182 __ uaba(v31.V16B(), v12.V16B(), v28.V16B()); 2183 __ uaba(v18.V2S(), v5.V2S(), v14.V2S()); 2184 __ uaba(v9.V4H(), v20.V4H(), v21.V4H()); 2185 __ uaba(v6.V4S(), v20.V4S(), v2.V4S()); 2186 __ uaba(v16.V8B(), v12.V8B(), v5.V8B()); 2187 __ uaba(v15.V8H(), v26.V8H(), v30.V8H()); 2188 __ uabal(v10.V2D(), v18.V2S(), v15.V2S()); 2189 __ uabal(v30.V4S(), v19.V4H(), v7.V4H()); 2190 __ uabal(v4.V8H(), v27.V8B(), v0.V8B()); 2191 __ uabal2(v19.V2D(), v12.V4S(), v2.V4S()); 2192 __ uabal2(v26.V4S(), v5.V8H(), v12.V8H()); 2193 __ uabal2(v19.V8H(), v20.V16B(), v28.V16B()); 2194 __ uabd(v18.V16B(), v4.V16B(), v21.V16B()); 2195 __ uabd(v30.V2S(), v21.V2S(), v16.V2S()); 2196 __ uabd(v8.V4H(), v28.V4H(), v25.V4H()); 2197 __ uabd(v28.V4S(), v12.V4S(), v21.V4S()); 2198 __ uabd(v19.V8B(), v16.V8B(), v28.V8B()); 2199 __ uabd(v9.V8H(), v12.V8H(), v29.V8H()); 2200 __ uabdl(v26.V2D(), v0.V2S(), v8.V2S()); 2201 __ uabdl(v29.V4S(), v31.V4H(), v25.V4H()); 2202 __ uabdl(v27.V8H(), v29.V8B(), v14.V8B()); 2203 __ uabdl2(v20.V2D(), v20.V4S(), v8.V4S()); 2204 __ uabdl2(v22.V4S(), v15.V8H(), v18.V8H()); 2205 __ uabdl2(v9.V8H(), v18.V16B(), v23.V16B()); 2206 __ uadalp(v9.V1D(), v15.V2S()); 2207 __ uadalp(v14.V2D(), v12.V4S()); 2208 __ uadalp(v28.V2S(), v12.V4H()); 2209 __ uadalp(v0.V4H(), v17.V8B()); 2210 __ uadalp(v1.V4S(), v29.V8H()); 2211 __ uadalp(v15.V8H(), v22.V16B()); 2212 __ uaddl(v1.V2D(), v20.V2S(), v27.V2S()); 2213 __ uaddl(v31.V4S(), v25.V4H(), v5.V4H()); 2214 __ uaddl(v12.V8H(), v3.V8B(), v3.V8B()); 2215 __ uaddl2(v5.V2D(), v23.V4S(), v6.V4S()); 2216 __ uaddl2(v1.V4S(), v5.V8H(), v25.V8H()); 2217 __ uaddl2(v22.V8H(), v30.V16B(), v28.V16B()); 2218 __ uaddlp(v7.V1D(), v9.V2S()); 2219 __ uaddlp(v26.V2D(), v4.V4S()); 2220 __ uaddlp(v28.V2S(), v1.V4H()); 2221 __ uaddlp(v20.V4H(), v31.V8B()); 2222 __ uaddlp(v16.V4S(), v17.V8H()); 2223 __ uaddlp(v6.V8H(), v2.V16B()); 2224 __ uaddlv(d28, v22.V4S()); 2225 __ uaddlv(h0, v19.V16B()); 2226 __ uaddlv(h30, v30.V8B()); 2227 __ uaddlv(s24, v18.V4H()); 2228 __ uaddlv(s10, v0.V8H()); 2229 __ uaddw(v9.V2D(), v17.V2D(), v14.V2S()); 2230 __ uaddw(v9.V4S(), v25.V4S(), v3.V4H()); 2231 __ uaddw(v18.V8H(), v1.V8H(), v0.V8B()); 2232 __ uaddw2(v18.V2D(), v5.V2D(), v6.V4S()); 2233 __ uaddw2(v17.V4S(), v15.V4S(), v11.V8H()); 2234 __ uaddw2(v29.V8H(), v11.V8H(), v7.V16B()); 2235 __ uhadd(v13.V16B(), v9.V16B(), v3.V16B()); 2236 __ uhadd(v17.V2S(), v25.V2S(), v24.V2S()); 2237 __ uhadd(v25.V4H(), v23.V4H(), v13.V4H()); 2238 __ uhadd(v0.V4S(), v20.V4S(), v16.V4S()); 2239 __ uhadd(v5.V8B(), v5.V8B(), v25.V8B()); 2240 __ uhadd(v3.V8H(), v29.V8H(), v18.V8H()); 2241 __ uhsub(v1.V16B(), v22.V16B(), v13.V16B()); 2242 __ uhsub(v14.V2S(), v30.V2S(), v30.V2S()); 2243 __ uhsub(v29.V4H(), v14.V4H(), v17.V4H()); 2244 __ uhsub(v26.V4S(), v5.V4S(), v18.V4S()); 2245 __ uhsub(v3.V8B(), v7.V8B(), v12.V8B()); 2246 __ uhsub(v25.V8H(), v21.V8H(), v5.V8H()); 2247 __ umax(v28.V16B(), v12.V16B(), v6.V16B()); 2248 __ umax(v20.V2S(), v19.V2S(), v26.V2S()); 2249 __ umax(v0.V4H(), v31.V4H(), v18.V4H()); 2250 __ umax(v6.V4S(), v21.V4S(), v28.V4S()); 2251 __ umax(v0.V8B(), v2.V8B(), v20.V8B()); 2252 __ umax(v4.V8H(), v11.V8H(), v22.V8H()); 2253 __ umaxp(v1.V16B(), v6.V16B(), v29.V16B()); 2254 __ umaxp(v19.V2S(), v17.V2S(), v27.V2S()); 2255 __ umaxp(v21.V4H(), v16.V4H(), v7.V4H()); 2256 __ umaxp(v9.V4S(), v20.V4S(), v29.V4S()); 2257 __ umaxp(v13.V8B(), v1.V8B(), v16.V8B()); 2258 __ umaxp(v19.V8H(), v23.V8H(), v26.V8H()); 2259 __ umaxv(b17, v30.V16B()); 2260 __ umaxv(b23, v12.V8B()); 2261 __ umaxv(h31, v15.V4H()); 2262 __ umaxv(h15, v25.V8H()); 2263 __ umaxv(s18, v21.V4S()); 2264 __ umin(v22.V16B(), v0.V16B(), v18.V16B()); 2265 __ umin(v1.V2S(), v21.V2S(), v16.V2S()); 2266 __ umin(v17.V4H(), v4.V4H(), v25.V4H()); 2267 __ umin(v24.V4S(), v26.V4S(), v13.V4S()); 2268 __ umin(v20.V8B(), v1.V8B(), v5.V8B()); 2269 __ umin(v26.V8H(), v25.V8H(), v23.V8H()); 2270 __ uminp(v5.V16B(), v1.V16B(), v23.V16B()); 2271 __ uminp(v7.V2S(), v26.V2S(), v30.V2S()); 2272 __ uminp(v9.V4H(), v5.V4H(), v25.V4H()); 2273 __ uminp(v23.V4S(), v10.V4S(), v1.V4S()); 2274 __ uminp(v4.V8B(), v29.V8B(), v14.V8B()); 2275 __ uminp(v21.V8H(), v0.V8H(), v14.V8H()); 2276 __ uminv(b0, v17.V16B()); 2277 __ uminv(b0, v31.V8B()); 2278 __ uminv(h24, v0.V4H()); 2279 __ uminv(h29, v14.V8H()); 2280 __ uminv(s30, v3.V4S()); 2281 __ umlal(v11.V2D(), v11.V2S(), v24.V2S()); 2282 __ umlal(v30.V2D(), v16.V2S(), v11.S(), 3); 2283 __ umlal(v0.V4S(), v9.V4H(), v26.V4H()); 2284 __ umlal(v20.V4S(), v24.V4H(), v12.H(), 4); 2285 __ umlal(v16.V8H(), v21.V8B(), v6.V8B()); 2286 __ umlal2(v17.V2D(), v19.V4S(), v23.V4S()); 2287 __ umlal2(v5.V2D(), v30.V4S(), v8.S(), 0); 2288 __ umlal2(v16.V4S(), v8.V8H(), v15.V8H()); 2289 __ umlal2(v15.V4S(), v26.V8H(), v1.H(), 5); 2290 __ umlal2(v30.V8H(), v1.V16B(), v17.V16B()); 2291 __ umlsl(v18.V2D(), v19.V2S(), v28.V2S()); 2292 __ umlsl(v7.V2D(), v7.V2S(), v8.S(), 0); 2293 __ umlsl(v24.V4S(), v8.V4H(), v4.V4H()); 2294 __ umlsl(v18.V4S(), v22.V4H(), v12.H(), 4); 2295 __ umlsl(v28.V8H(), v14.V8B(), v20.V8B()); 2296 __ umlsl2(v11.V2D(), v0.V4S(), v9.V4S()); 2297 __ umlsl2(v26.V2D(), v16.V4S(), v9.S(), 2); 2298 __ umlsl2(v3.V4S(), v11.V8H(), v9.V8H()); 2299 __ umlsl2(v10.V4S(), v25.V8H(), v9.H(), 4); 2300 __ umlsl2(v24.V8H(), v16.V16B(), v28.V16B()); 2301 __ umov(x30, v25.D(), 1); 2302 __ umull(v12.V2D(), v10.V2S(), v29.V2S()); 2303 __ umull(v22.V2D(), v30.V2S(), v5.S(), 3); 2304 __ umull(v7.V4S(), v0.V4H(), v25.V4H()); 2305 __ umull(v11.V4S(), v13.V4H(), v3.H(), 2); 2306 __ umull(v25.V8H(), v16.V8B(), v10.V8B()); 2307 __ umull2(v17.V2D(), v3.V4S(), v26.V4S()); 2308 __ umull2(v26.V2D(), v11.V4S(), v2.S(), 3); 2309 __ umull2(v12.V4S(), v17.V8H(), v23.V8H()); 2310 __ umull2(v4.V4S(), v31.V8H(), v1.H(), 2); 2311 __ umull2(v5.V8H(), v12.V16B(), v17.V16B()); 2312 __ uqadd(b30, b4, b28); 2313 __ uqadd(d27, d20, d16); 2314 __ uqadd(h7, h14, h28); 2315 __ uqadd(s28, s17, s4); 2316 __ uqadd(v19.V16B(), v22.V16B(), v21.V16B()); 2317 __ uqadd(v16.V2D(), v4.V2D(), v11.V2D()); 2318 __ uqadd(v20.V2S(), v14.V2S(), v4.V2S()); 2319 __ uqadd(v5.V4H(), v0.V4H(), v16.V4H()); 2320 __ uqadd(v21.V4S(), v31.V4S(), v9.V4S()); 2321 __ uqadd(v23.V8B(), v24.V8B(), v3.V8B()); 2322 __ uqadd(v17.V8H(), v27.V8H(), v11.V8H()); 2323 __ uqrshl(b10, b22, b10); 2324 __ uqrshl(d29, d5, d11); 2325 __ uqrshl(h27, h24, h30); 2326 __ uqrshl(s10, s13, s8); 2327 __ uqrshl(v9.V16B(), v18.V16B(), v14.V16B()); 2328 __ uqrshl(v24.V2D(), v15.V2D(), v17.V2D()); 2329 __ uqrshl(v4.V2S(), v14.V2S(), v27.V2S()); 2330 __ uqrshl(v15.V4H(), v5.V4H(), v8.V4H()); 2331 __ uqrshl(v21.V4S(), v29.V4S(), v0.V4S()); 2332 __ uqrshl(v16.V8B(), v24.V8B(), v9.V8B()); 2333 __ uqrshl(v2.V8H(), v0.V8H(), v15.V8H()); 2334 __ uqrshrn(b11, h26, 4); 2335 __ uqrshrn(h7, s30, 5); 2336 __ uqrshrn(s10, d8, 21); 2337 __ uqrshrn(v15.V2S(), v6.V2D(), 11); 2338 __ uqrshrn(v5.V4H(), v26.V4S(), 12); 2339 __ uqrshrn(v28.V8B(), v25.V8H(), 5); 2340 __ uqrshrn2(v25.V16B(), v30.V8H(), 2); 2341 __ uqrshrn2(v21.V4S(), v14.V2D(), 32); 2342 __ uqrshrn2(v13.V8H(), v7.V4S(), 2); 2343 __ uqshl(b13, b0, b23); 2344 __ uqshl(b9, b17, 4); 2345 __ uqshl(d23, d6, d4); 2346 __ uqshl(d8, d11, 44); 2347 __ uqshl(h19, h13, h15); 2348 __ uqshl(h25, h26, 6); 2349 __ uqshl(s4, s24, s10); 2350 __ uqshl(s19, s14, 1); 2351 __ uqshl(v14.V16B(), v30.V16B(), v25.V16B()); 2352 __ uqshl(v6.V16B(), v10.V16B(), 5); 2353 __ uqshl(v18.V2D(), v8.V2D(), v7.V2D()); 2354 __ uqshl(v25.V2D(), v14.V2D(), 18); 2355 __ uqshl(v25.V2S(), v16.V2S(), v23.V2S()); 2356 __ uqshl(v13.V2S(), v15.V2S(), 31); 2357 __ uqshl(v28.V4H(), v24.V4H(), v15.V4H()); 2358 __ uqshl(v4.V4H(), v17.V4H(), 1); 2359 __ uqshl(v9.V4S(), v31.V4S(), v23.V4S()); 2360 __ uqshl(v18.V4S(), v28.V4S(), 31); 2361 __ uqshl(v31.V8B(), v21.V8B(), v15.V8B()); 2362 __ uqshl(v6.V8B(), v21.V8B(), 1); 2363 __ uqshl(v28.V8H(), v2.V8H(), v17.V8H()); 2364 __ uqshl(v24.V8H(), v8.V8H(), 14); 2365 __ uqshrn(b21, h27, 7); 2366 __ uqshrn(h28, s26, 11); 2367 __ uqshrn(s13, d31, 17); 2368 __ uqshrn(v21.V2S(), v16.V2D(), 8); 2369 __ uqshrn(v24.V4H(), v24.V4S(), 2); 2370 __ uqshrn(v5.V8B(), v1.V8H(), 8); 2371 __ uqshrn2(v16.V16B(), v29.V8H(), 6); 2372 __ uqshrn2(v2.V4S(), v6.V2D(), 1); 2373 __ uqshrn2(v16.V8H(), v10.V4S(), 14); 2374 __ uqsub(b28, b20, b26); 2375 __ uqsub(d0, d7, d10); 2376 __ uqsub(h26, h24, h7); 2377 __ uqsub(s23, s23, s16); 2378 __ uqsub(v14.V16B(), v16.V16B(), v24.V16B()); 2379 __ uqsub(v11.V2D(), v17.V2D(), v6.V2D()); 2380 __ uqsub(v10.V2S(), v10.V2S(), v8.V2S()); 2381 __ uqsub(v9.V4H(), v15.V4H(), v12.V4H()); 2382 __ uqsub(v23.V4S(), v18.V4S(), v7.V4S()); 2383 __ uqsub(v9.V8B(), v19.V8B(), v17.V8B()); 2384 __ uqsub(v20.V8H(), v2.V8H(), v6.V8H()); 2385 __ uqxtn(b29, h19); 2386 __ uqxtn(h0, s13); 2387 __ uqxtn(s26, d22); 2388 __ uqxtn(v5.V2S(), v31.V2D()); 2389 __ uqxtn(v30.V4H(), v19.V4S()); 2390 __ uqxtn(v15.V8B(), v2.V8H()); 2391 __ uqxtn2(v29.V16B(), v3.V8H()); 2392 __ uqxtn2(v13.V4S(), v17.V2D()); 2393 __ uqxtn2(v28.V8H(), v11.V4S()); 2394 __ urecpe(v23.V2S(), v15.V2S()); 2395 __ urecpe(v27.V4S(), v7.V4S()); 2396 __ urhadd(v2.V16B(), v15.V16B(), v27.V16B()); 2397 __ urhadd(v15.V2S(), v1.V2S(), v18.V2S()); 2398 __ urhadd(v17.V4H(), v4.V4H(), v26.V4H()); 2399 __ urhadd(v2.V4S(), v27.V4S(), v14.V4S()); 2400 __ urhadd(v5.V8B(), v17.V8B(), v14.V8B()); 2401 __ urhadd(v30.V8H(), v2.V8H(), v25.V8H()); 2402 __ urshl(d4, d28, d30); 2403 __ urshl(v13.V16B(), v31.V16B(), v19.V16B()); 2404 __ urshl(v14.V2D(), v23.V2D(), v21.V2D()); 2405 __ urshl(v10.V2S(), v7.V2S(), v8.V2S()); 2406 __ urshl(v15.V4H(), v21.V4H(), v28.V4H()); 2407 __ urshl(v30.V4S(), v8.V4S(), v23.V4S()); 2408 __ urshl(v31.V8B(), v20.V8B(), v5.V8B()); 2409 __ urshl(v30.V8H(), v27.V8H(), v30.V8H()); 2410 __ urshr(d4, d13, 49); 2411 __ urshr(v2.V16B(), v20.V16B(), 1); 2412 __ urshr(v13.V2D(), v11.V2D(), 51); 2413 __ urshr(v21.V2S(), v31.V2S(), 10); 2414 __ urshr(v21.V4H(), v17.V4H(), 11); 2415 __ urshr(v4.V4S(), v22.V4S(), 1); 2416 __ urshr(v0.V8B(), v1.V8B(), 7); 2417 __ urshr(v13.V8H(), v20.V8H(), 1); 2418 __ ursqrte(v20.V2S(), v16.V2S()); 2419 __ ursqrte(v28.V4S(), v8.V4S()); 2420 __ ursra(d27, d16, 45); 2421 __ ursra(v18.V16B(), v17.V16B(), 3); 2422 __ ursra(v26.V2D(), v28.V2D(), 58); 2423 __ ursra(v8.V2S(), v22.V2S(), 31); 2424 __ ursra(v31.V4H(), v4.V4H(), 7); 2425 __ ursra(v31.V4S(), v15.V4S(), 2); 2426 __ ursra(v3.V8B(), v1.V8B(), 5); 2427 __ ursra(v18.V8H(), v14.V8H(), 13); 2428 __ ushl(d31, d0, d16); 2429 __ ushl(v0.V16B(), v6.V16B(), v2.V16B()); 2430 __ ushl(v18.V2D(), v1.V2D(), v18.V2D()); 2431 __ ushl(v27.V2S(), v7.V2S(), v29.V2S()); 2432 __ ushl(v14.V4H(), v14.V4H(), v13.V4H()); 2433 __ ushl(v22.V4S(), v4.V4S(), v9.V4S()); 2434 __ ushl(v23.V8B(), v22.V8B(), v27.V8B()); 2435 __ ushl(v21.V8H(), v25.V8H(), v8.V8H()); 2436 __ ushll(v11.V2D(), v0.V2S(), 21); 2437 __ ushll(v2.V4S(), v17.V4H(), 8); 2438 __ ushll(v11.V8H(), v14.V8B(), 1); 2439 __ ushll2(v8.V2D(), v29.V4S(), 7); 2440 __ ushll2(v29.V4S(), v9.V8H(), 2); 2441 __ ushll2(v5.V8H(), v24.V16B(), 6); 2442 __ ushr(d28, d27, 53); 2443 __ ushr(v1.V16B(), v9.V16B(), 7); 2444 __ ushr(v2.V2D(), v24.V2D(), 43); 2445 __ ushr(v30.V2S(), v25.V2S(), 11); 2446 __ ushr(v10.V4H(), v26.V4H(), 12); 2447 __ ushr(v4.V4S(), v5.V4S(), 30); 2448 __ ushr(v30.V8B(), v2.V8B(), 1); 2449 __ ushr(v6.V8H(), v12.V8H(), 2); 2450 __ usqadd(b19, b5); 2451 __ usqadd(d9, d2); 2452 __ usqadd(h2, h16); 2453 __ usqadd(s16, s3); 2454 __ usqadd(v31.V16B(), v29.V16B()); 2455 __ usqadd(v8.V2D(), v10.V2D()); 2456 __ usqadd(v18.V2S(), v9.V2S()); 2457 __ usqadd(v24.V4H(), v14.V4H()); 2458 __ usqadd(v10.V4S(), v30.V4S()); 2459 __ usqadd(v16.V8B(), v20.V8B()); 2460 __ usqadd(v12.V8H(), v16.V8H()); 2461 __ usra(d28, d27, 37); 2462 __ usra(v5.V16B(), v22.V16B(), 5); 2463 __ usra(v2.V2D(), v19.V2D(), 33); 2464 __ usra(v0.V2S(), v0.V2S(), 21); 2465 __ usra(v7.V4H(), v6.V4H(), 12); 2466 __ usra(v4.V4S(), v17.V4S(), 9); 2467 __ usra(v9.V8B(), v12.V8B(), 7); 2468 __ usra(v3.V8H(), v27.V8H(), 14); 2469 __ usubl(v29.V2D(), v12.V2S(), v30.V2S()); 2470 __ usubl(v29.V4S(), v28.V4H(), v6.V4H()); 2471 __ usubl(v12.V8H(), v4.V8B(), v14.V8B()); 2472 __ usubl2(v1.V2D(), v24.V4S(), v17.V4S()); 2473 __ usubl2(v4.V4S(), v1.V8H(), v3.V8H()); 2474 __ usubl2(v23.V8H(), v4.V16B(), v7.V16B()); 2475 __ usubw(v9.V2D(), v20.V2D(), v30.V2S()); 2476 __ usubw(v20.V4S(), v16.V4S(), v23.V4H()); 2477 __ usubw(v25.V8H(), v8.V8H(), v29.V8B()); 2478 __ usubw2(v18.V2D(), v29.V2D(), v6.V4S()); 2479 __ usubw2(v6.V4S(), v6.V4S(), v20.V8H()); 2480 __ usubw2(v18.V8H(), v4.V8H(), v16.V16B()); 2481 __ uxtl(v27.V2D(), v21.V2S()); 2482 __ uxtl(v0.V4S(), v31.V4H()); 2483 __ uxtl(v27.V8H(), v10.V8B()); 2484 __ uxtl2(v6.V2D(), v16.V4S()); 2485 __ uxtl2(v22.V4S(), v20.V8H()); 2486 __ uxtl2(v20.V8H(), v21.V16B()); 2487 __ uzp1(v30.V16B(), v9.V16B(), v17.V16B()); 2488 __ uzp1(v7.V2D(), v26.V2D(), v28.V2D()); 2489 __ uzp1(v26.V2S(), v16.V2S(), v22.V2S()); 2490 __ uzp1(v14.V4H(), v19.V4H(), v6.V4H()); 2491 __ uzp1(v17.V4S(), v23.V4S(), v30.V4S()); 2492 __ uzp1(v28.V8B(), v27.V8B(), v13.V8B()); 2493 __ uzp1(v17.V8H(), v1.V8H(), v12.V8H()); 2494 __ uzp2(v8.V16B(), v18.V16B(), v26.V16B()); 2495 __ uzp2(v21.V2D(), v22.V2D(), v24.V2D()); 2496 __ uzp2(v20.V2S(), v21.V2S(), v2.V2S()); 2497 __ uzp2(v16.V4H(), v31.V4H(), v6.V4H()); 2498 __ uzp2(v25.V4S(), v11.V4S(), v8.V4S()); 2499 __ uzp2(v31.V8B(), v31.V8B(), v13.V8B()); 2500 __ uzp2(v8.V8H(), v17.V8H(), v1.V8H()); 2501 __ xtn(v17.V2S(), v26.V2D()); 2502 __ xtn(v3.V4H(), v0.V4S()); 2503 __ xtn(v18.V8B(), v8.V8H()); 2504 __ xtn2(v0.V16B(), v0.V8H()); 2505 __ xtn2(v15.V4S(), v4.V2D()); 2506 __ xtn2(v31.V8H(), v18.V4S()); 2507 __ zip1(v22.V16B(), v9.V16B(), v6.V16B()); 2508 __ zip1(v23.V2D(), v11.V2D(), v2.V2D()); 2509 __ zip1(v26.V2S(), v16.V2S(), v9.V2S()); 2510 __ zip1(v1.V4H(), v9.V4H(), v7.V4H()); 2511 __ zip1(v0.V4S(), v30.V4S(), v20.V4S()); 2512 __ zip1(v30.V8B(), v17.V8B(), v15.V8B()); 2513 __ zip1(v17.V8H(), v8.V8H(), v2.V8H()); 2514 __ zip2(v23.V16B(), v10.V16B(), v11.V16B()); 2515 __ zip2(v30.V2D(), v6.V2D(), v14.V2D()); 2516 __ zip2(v9.V2S(), v10.V2S(), v21.V2S()); 2517 __ zip2(v8.V4H(), v24.V4H(), v29.V4H()); 2518 __ zip2(v0.V4S(), v21.V4S(), v23.V4S()); 2519 __ zip2(v25.V8B(), v23.V8B(), v30.V8B()); 2520 __ zip2(v7.V8H(), v10.V8H(), v30.V8H()); 2521} // NOLINT(readability/fn_size) 2522 2523 2524static void GenerateTestSequenceNEONFP(MacroAssembler* masm) { 2525 ExactAssemblyScope guard(masm, 2526 masm->GetBuffer()->GetRemainingBytes(), 2527 ExactAssemblyScope::kMaximumSize); 2528 2529 // NEON floating point instructions. 2530 __ fabd(v3.V2D(), v25.V2D(), v8.V2D()); 2531 __ fabd(v14.V2S(), v27.V2S(), v11.V2S()); 2532 __ fabd(v9.V4S(), v22.V4S(), v18.V4S()); 2533 __ fabs(v1.V2D(), v29.V2D()); 2534 __ fabs(v6.V2S(), v21.V2S()); 2535 __ fabs(v12.V4S(), v25.V4S()); 2536 __ facge(v18.V2D(), v5.V2D(), v0.V2D()); 2537 __ facge(v15.V2S(), v11.V2S(), v6.V2S()); 2538 __ facge(v30.V4S(), v10.V4S(), v25.V4S()); 2539 __ facgt(v28.V2D(), v16.V2D(), v31.V2D()); 2540 __ facgt(v15.V2S(), v1.V2S(), v4.V2S()); 2541 __ facgt(v22.V4S(), v3.V4S(), v10.V4S()); 2542 __ fadd(v7.V2D(), v10.V2D(), v24.V2D()); 2543 __ fadd(v10.V2S(), v23.V2S(), v7.V2S()); 2544 __ fadd(v16.V4S(), v22.V4S(), v11.V4S()); 2545 __ faddp(d27, v28.V2D()); 2546 __ faddp(s20, v23.V2S()); 2547 __ faddp(v21.V2D(), v4.V2D(), v11.V2D()); 2548 __ faddp(v31.V2S(), v26.V2S(), v1.V2S()); 2549 __ faddp(v13.V4S(), v27.V4S(), v28.V4S()); 2550 __ fcmeq(v17.V2D(), v13.V2D(), v20.V2D()); 2551 __ fcmeq(v24.V2D(), v16.V2D(), 0.0); 2552 __ fcmeq(v26.V2S(), v17.V2S(), v10.V2S()); 2553 __ fcmeq(v24.V2S(), v4.V2S(), 0.0); 2554 __ fcmeq(v8.V4S(), v4.V4S(), v14.V4S()); 2555 __ fcmeq(v26.V4S(), v25.V4S(), 0.0); 2556 __ fcmge(v27.V2D(), v0.V2D(), v0.V2D()); 2557 __ fcmge(v22.V2D(), v30.V2D(), 0.0); 2558 __ fcmge(v7.V2S(), v21.V2S(), v25.V2S()); 2559 __ fcmge(v15.V2S(), v15.V2S(), 0.0); 2560 __ fcmge(v29.V4S(), v4.V4S(), v27.V4S()); 2561 __ fcmge(v22.V4S(), v21.V4S(), 0.0); 2562 __ fcmgt(v1.V2D(), v26.V2D(), v15.V2D()); 2563 __ fcmgt(v15.V2D(), v23.V2D(), 0.0); 2564 __ fcmgt(v21.V2S(), v16.V2S(), v6.V2S()); 2565 __ fcmgt(v1.V2S(), v13.V2S(), 0.0); 2566 __ fcmgt(v14.V4S(), v0.V4S(), v25.V4S()); 2567 __ fcmgt(v13.V4S(), v8.V4S(), 0.0); 2568 __ fcmle(v4.V2D(), v6.V2D(), 0.0); 2569 __ fcmle(v24.V2S(), v31.V2S(), 0.0); 2570 __ fcmle(v8.V4S(), v23.V4S(), 0.0); 2571 __ fcmlt(v7.V2D(), v3.V2D(), 0.0); 2572 __ fcmlt(v15.V2S(), v21.V2S(), 0.0); 2573 __ fcmlt(v1.V4S(), v2.V4S(), 0.0); 2574 __ fcvtas(v6.V2D(), v8.V2D()); 2575 __ fcvtas(v1.V2S(), v9.V2S()); 2576 __ fcvtas(v8.V4S(), v19.V4S()); 2577 __ fcvtau(v5.V2D(), v31.V2D()); 2578 __ fcvtau(v28.V2S(), v29.V2S()); 2579 __ fcvtau(v11.V4S(), v26.V4S()); 2580 __ fcvtl(v8.V2D(), v25.V2S()); 2581 __ fcvtl(v27.V4S(), v14.V4H()); 2582 __ fcvtl2(v1.V2D(), v6.V4S()); 2583 __ fcvtl2(v24.V4S(), v9.V8H()); 2584 __ fcvtms(v9.V2D(), v24.V2D()); 2585 __ fcvtms(v7.V2S(), v11.V2S()); 2586 __ fcvtms(v23.V4S(), v21.V4S()); 2587 __ fcvtmu(v13.V2D(), v1.V2D()); 2588 __ fcvtmu(v26.V2S(), v12.V2S()); 2589 __ fcvtmu(v21.V4S(), v21.V4S()); 2590 __ fcvtn(v11.V2S(), v1.V2D()); 2591 __ fcvtn(v8.V4H(), v2.V4S()); 2592 __ fcvtn2(v24.V4S(), v29.V2D()); 2593 __ fcvtn2(v4.V8H(), v10.V4S()); 2594 __ fcvtns(v25.V2D(), v10.V2D()); 2595 __ fcvtns(v4.V2S(), v8.V2S()); 2596 __ fcvtns(v29.V4S(), v27.V4S()); 2597 __ fcvtnu(v18.V2D(), v27.V2D()); 2598 __ fcvtnu(v11.V2S(), v14.V2S()); 2599 __ fcvtnu(v27.V4S(), v21.V4S()); 2600 __ fcvtps(v23.V2D(), v5.V2D()); 2601 __ fcvtps(v24.V2S(), v15.V2S()); 2602 __ fcvtps(v5.V4S(), v19.V4S()); 2603 __ fcvtpu(v3.V2D(), v21.V2D()); 2604 __ fcvtpu(v3.V2S(), v21.V2S()); 2605 __ fcvtpu(v0.V4S(), v7.V4S()); 2606 __ fcvtxn(v29.V2S(), v11.V2D()); 2607 __ fcvtxn2(v31.V4S(), v25.V2D()); 2608 __ fcvtzs(v19.V2D(), v17.V2D()); 2609 __ fcvtzs(v12.V2D(), v24.V2D(), 64); 2610 __ fcvtzs(v9.V2S(), v2.V2S()); 2611 __ fcvtzs(v5.V2S(), v20.V2S(), 29); 2612 __ fcvtzs(v21.V4S(), v25.V4S()); 2613 __ fcvtzs(v26.V4S(), v1.V4S(), 6); 2614 __ fcvtzu(v13.V2D(), v25.V2D()); 2615 __ fcvtzu(v28.V2D(), v13.V2D(), 32); 2616 __ fcvtzu(v26.V2S(), v6.V2S()); 2617 __ fcvtzu(v9.V2S(), v10.V2S(), 15); 2618 __ fcvtzu(v30.V4S(), v6.V4S()); 2619 __ fcvtzu(v19.V4S(), v22.V4S(), 18); 2620 __ fdiv(v15.V2D(), v8.V2D(), v15.V2D()); 2621 __ fdiv(v12.V2S(), v9.V2S(), v26.V2S()); 2622 __ fdiv(v19.V4S(), v22.V4S(), v19.V4S()); 2623 __ fmax(v19.V2D(), v7.V2D(), v8.V2D()); 2624 __ fmax(v25.V2S(), v12.V2S(), v29.V2S()); 2625 __ fmax(v6.V4S(), v15.V4S(), v5.V4S()); 2626 __ fmaxnm(v16.V2D(), v8.V2D(), v20.V2D()); 2627 __ fmaxnm(v15.V2S(), v26.V2S(), v25.V2S()); 2628 __ fmaxnm(v23.V4S(), v14.V4S(), v16.V4S()); 2629 __ fmaxnmp(d6, v19.V2D()); 2630 __ fmaxnmp(s27, v26.V2S()); 2631 __ fmaxnmp(v8.V2D(), v12.V2D(), v23.V2D()); 2632 __ fmaxnmp(v13.V2S(), v25.V2S(), v22.V2S()); 2633 __ fmaxnmp(v15.V4S(), v11.V4S(), v17.V4S()); 2634 __ fmaxnmv(s27, v19.V4S()); 2635 __ fmaxp(d20, v14.V2D()); 2636 __ fmaxp(s18, v2.V2S()); 2637 __ fmaxp(v9.V2D(), v23.V2D(), v31.V2D()); 2638 __ fmaxp(v7.V2S(), v22.V2S(), v31.V2S()); 2639 __ fmaxp(v18.V4S(), v7.V4S(), v29.V4S()); 2640 __ fmaxv(s31, v29.V4S()); 2641 __ fmin(v2.V2D(), v5.V2D(), v2.V2D()); 2642 __ fmin(v31.V2S(), v17.V2S(), v10.V2S()); 2643 __ fmin(v10.V4S(), v4.V4S(), v16.V4S()); 2644 __ fminnm(v21.V2D(), v6.V2D(), v5.V2D()); 2645 __ fminnm(v22.V2S(), v18.V2S(), v14.V2S()); 2646 __ fminnm(v25.V4S(), v31.V4S(), v3.V4S()); 2647 __ fminnmp(d9, v1.V2D()); 2648 __ fminnmp(s21, v20.V2S()); 2649 __ fminnmp(v16.V2D(), v21.V2D(), v19.V2D()); 2650 __ fminnmp(v16.V2S(), v31.V2S(), v25.V2S()); 2651 __ fminnmp(v26.V4S(), v16.V4S(), v15.V4S()); 2652 __ fminnmv(s3, v4.V4S()); 2653 __ fminp(d24, v26.V2D()); 2654 __ fminp(s7, v17.V2S()); 2655 __ fminp(v23.V2D(), v19.V2D(), v3.V2D()); 2656 __ fminp(v29.V2S(), v21.V2S(), v9.V2S()); 2657 __ fminp(v0.V4S(), v24.V4S(), v21.V4S()); 2658 __ fminv(s25, v8.V4S()); 2659 __ fmla(d23, d0, v9.D(), 1); 2660 __ fmla(s23, s15, v7.S(), 0); 2661 __ fmla(v17.V2D(), v11.V2D(), v6.V2D()); 2662 __ fmla(v30.V2D(), v30.V2D(), v11.D(), 0); 2663 __ fmla(v19.V2S(), v12.V2S(), v6.V2S()); 2664 __ fmla(v24.V2S(), v17.V2S(), v9.S(), 0); 2665 __ fmla(v16.V4S(), v11.V4S(), v11.V4S()); 2666 __ fmla(v27.V4S(), v23.V4S(), v9.S(), 2); 2667 __ fmls(d27, d30, v6.D(), 0); 2668 __ fmls(s21, s16, v2.S(), 0); 2669 __ fmls(v5.V2D(), v19.V2D(), v21.V2D()); 2670 __ fmls(v18.V2D(), v30.V2D(), v12.D(), 0); 2671 __ fmls(v5.V2S(), v16.V2S(), v7.V2S()); 2672 __ fmls(v3.V2S(), v18.V2S(), v11.S(), 1); 2673 __ fmls(v27.V4S(), v5.V4S(), v30.V4S()); 2674 __ fmls(v26.V4S(), v20.V4S(), v4.S(), 3); 2675 __ fmov(v14.V2D(), -0.34375); 2676 __ fmov(v26.V2S(), 0.90625f); 2677 __ fmov(v31.V4S(), -5.0000f); 2678 __ fmov(v28.D(), 1, x25); 2679 __ fmov(x18, v2.D(), 1); 2680 __ fmul(d12, d4, v1.D(), 1); 2681 __ fmul(s30, s1, v15.S(), 3); 2682 __ fmul(v25.V2D(), v0.V2D(), v21.V2D()); 2683 __ fmul(v10.V2D(), v24.V2D(), v10.D(), 1); 2684 __ fmul(v7.V2S(), v24.V2S(), v16.V2S()); 2685 __ fmul(v1.V2S(), v16.V2S(), v4.S(), 2); 2686 __ fmul(v5.V4S(), v28.V4S(), v25.V4S()); 2687 __ fmul(v11.V4S(), v3.V4S(), v8.S(), 0); 2688 __ fmulx(d28, d9, v3.D(), 1); 2689 __ fmulx(s25, s21, v15.S(), 1); 2690 __ fmulx(v31.V2D(), v28.V2D(), v8.V2D()); 2691 __ fmulx(v3.V2D(), v21.V2D(), v6.D(), 0); 2692 __ fmulx(v9.V2S(), v1.V2S(), v0.V2S()); 2693 __ fmulx(v16.V2S(), v27.V2S(), v6.S(), 0); 2694 __ fmulx(v2.V4S(), v4.V4S(), v5.V4S()); 2695 __ fmulx(v18.V4S(), v7.V4S(), v4.S(), 0); 2696 __ fneg(v1.V2D(), v25.V2D()); 2697 __ fneg(v14.V2S(), v31.V2S()); 2698 __ fneg(v5.V4S(), v4.V4S()); 2699 __ frecpe(v18.V2D(), v12.V2D()); 2700 __ frecpe(v10.V2S(), v22.V2S()); 2701 __ frecpe(v5.V4S(), v6.V4S()); 2702 __ frecps(v22.V2D(), v7.V2D(), v26.V2D()); 2703 __ frecps(v31.V2S(), v27.V2S(), v2.V2S()); 2704 __ frecps(v18.V4S(), v6.V4S(), v27.V4S()); 2705 __ frinta(v26.V2D(), v13.V2D()); 2706 __ frinta(v15.V2S(), v26.V2S()); 2707 __ frinta(v13.V4S(), v16.V4S()); 2708 __ frinti(v9.V2D(), v12.V2D()); 2709 __ frinti(v5.V2S(), v19.V2S()); 2710 __ frinti(v15.V4S(), v11.V4S()); 2711 __ frintm(v17.V2D(), v29.V2D()); 2712 __ frintm(v30.V2S(), v11.V2S()); 2713 __ frintm(v1.V4S(), v20.V4S()); 2714 __ frintn(v24.V2D(), v6.V2D()); 2715 __ frintn(v12.V2S(), v17.V2S()); 2716 __ frintn(v29.V4S(), v11.V4S()); 2717 __ frintp(v10.V2D(), v7.V2D()); 2718 __ frintp(v12.V2S(), v18.V2S()); 2719 __ frintp(v26.V4S(), v31.V4S()); 2720 __ frintx(v24.V2D(), v13.V2D()); 2721 __ frintx(v7.V2S(), v9.V2S()); 2722 __ frintx(v18.V4S(), v21.V4S()); 2723 __ frintz(v19.V2D(), v25.V2D()); 2724 __ frintz(v15.V2S(), v8.V2S()); 2725 __ frintz(v20.V4S(), v3.V4S()); 2726 __ frsqrte(v23.V2D(), v5.V2D()); 2727 __ frsqrte(v9.V2S(), v7.V2S()); 2728 __ frsqrte(v3.V4S(), v9.V4S()); 2729 __ frsqrts(v25.V2D(), v28.V2D(), v15.V2D()); 2730 __ frsqrts(v9.V2S(), v26.V2S(), v10.V2S()); 2731 __ frsqrts(v5.V4S(), v1.V4S(), v10.V4S()); 2732 __ fsqrt(v6.V2D(), v18.V2D()); 2733 __ fsqrt(v6.V2S(), v18.V2S()); 2734 __ fsqrt(v0.V4S(), v31.V4S()); 2735 __ fsub(v31.V2D(), v30.V2D(), v31.V2D()); 2736 __ fsub(v11.V2S(), v8.V2S(), v6.V2S()); 2737 __ fsub(v16.V4S(), v0.V4S(), v31.V4S()); 2738 __ scvtf(v25.V2D(), v31.V2D()); 2739 __ scvtf(v10.V2D(), v13.V2D(), 45); 2740 __ scvtf(v10.V2S(), v15.V2S()); 2741 __ scvtf(v18.V2S(), v4.V2S(), 27); 2742 __ scvtf(v17.V4S(), v5.V4S()); 2743 __ scvtf(v11.V4S(), v25.V4S(), 24); 2744 __ ucvtf(v9.V2D(), v3.V2D()); 2745 __ ucvtf(v26.V2D(), v30.V2D(), 46); 2746 __ ucvtf(v11.V2S(), v4.V2S()); 2747 __ ucvtf(v29.V2S(), v3.V2S(), 25); 2748 __ ucvtf(v22.V4S(), v23.V4S()); 2749 __ ucvtf(v18.V4S(), v9.V4S(), 25); 2750} 2751 2752 2753static void GenerateTestSequenceSVE(MacroAssembler* masm) { 2754 ExactAssemblyScope guard(masm, 2755 masm->GetBuffer()->GetRemainingBytes(), 2756 ExactAssemblyScope::kMaximumSize); 2757 CPUFeaturesScope feature_guard(masm, CPUFeatures::kSVE); 2758 2759 // Simple, unpredicated loads and stores. 2760 __ str(p12.VnD(), SVEMemOperand(x0, 11, SVE_MUL_VL)); 2761 __ str(p13.VnS(), SVEMemOperand(x0, 11, SVE_MUL_VL)); 2762 __ str(p14.VnH(), SVEMemOperand(x0, 11, SVE_MUL_VL)); 2763 __ str(p15.VnB(), SVEMemOperand(x0, 11, SVE_MUL_VL)); 2764 __ ldr(p8.VnD(), SVEMemOperand(x0, 11, SVE_MUL_VL)); 2765 __ ldr(p9.VnS(), SVEMemOperand(x0, 11, SVE_MUL_VL)); 2766 __ ldr(p10.VnH(), SVEMemOperand(x0, 11, SVE_MUL_VL)); 2767 __ ldr(p11.VnB(), SVEMemOperand(x0, 11, SVE_MUL_VL)); 2768 2769 __ str(z0.VnD(), SVEMemOperand(x0, 11, SVE_MUL_VL)); 2770 __ str(z1.VnS(), SVEMemOperand(x0, 11, SVE_MUL_VL)); 2771 __ str(z2.VnH(), SVEMemOperand(x0, 11, SVE_MUL_VL)); 2772 __ str(z3.VnB(), SVEMemOperand(x0, 11, SVE_MUL_VL)); 2773 __ ldr(z20.VnD(), SVEMemOperand(x0, 11, SVE_MUL_VL)); 2774 __ ldr(z21.VnS(), SVEMemOperand(x0, 11, SVE_MUL_VL)); 2775 __ ldr(z22.VnH(), SVEMemOperand(x0, 11, SVE_MUL_VL)); 2776 __ ldr(z23.VnB(), SVEMemOperand(x0, 11, SVE_MUL_VL)); 2777 2778 // Structured accesses. 2779 __ st1b(z0.VnB(), p2, SVEMemOperand(x0, 3, SVE_MUL_VL)); 2780 __ st1h(z1.VnH(), p1, SVEMemOperand(x0, 3, SVE_MUL_VL)); 2781 __ st1w(z2.VnS(), p1, SVEMemOperand(x0, x3, LSL, 2)); 2782 __ st1d(z3.VnD(), p2, SVEMemOperand(x0, 3, SVE_MUL_VL)); 2783 __ ld1b(z20.VnB(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL)); 2784 __ ld1h(z21.VnH(), p2.Zeroing(), SVEMemOperand(x0, x2, LSL, 1)); 2785 __ ld1w(z22.VnS(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL)); 2786 __ ld1d(z23.VnD(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL)); 2787 2788 // Structured, packed accesses. 2789 __ st1b(z2.VnH(), p1, SVEMemOperand(x0, 3, SVE_MUL_VL)); 2790 __ st1b(z3.VnS(), p2, SVEMemOperand(x0, 3, SVE_MUL_VL)); 2791 __ st1b(z4.VnD(), p2, SVEMemOperand(x0, 3, SVE_MUL_VL)); 2792 __ st1h(z0.VnS(), p1, SVEMemOperand(x0, 3, SVE_MUL_VL)); 2793 __ st1h(z1.VnD(), p1, SVEMemOperand(x0, x2, LSL, 1)); 2794 __ st1w(z2.VnD(), p1, SVEMemOperand(x0, 3, SVE_MUL_VL)); 2795 __ ld1b(z20.VnH(), p1.Zeroing(), SVEMemOperand(x0, x2)); 2796 __ ld1b(z21.VnS(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL)); 2797 __ ld1b(z22.VnD(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL)); 2798 __ ld1h(z23.VnS(), p2.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL)); 2799 __ ld1h(z24.VnD(), p2.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL)); 2800 __ ld1w(z20.VnD(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL)); 2801 __ ld1sb(z21.VnH(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL)); 2802 __ ld1sb(z22.VnS(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL)); 2803 __ ld1sb(z23.VnD(), p2.Zeroing(), SVEMemOperand(x0, x2)); 2804 __ ld1sh(z24.VnS(), p2.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL)); 2805 __ ld1sh(z20.VnD(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL)); 2806 __ ld1sw(z21.VnD(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL)); 2807 2808 // Structured, interleaved accesses. 2809 __ st2b(z0.VnB(), z1.VnB(), p4, SVEMemOperand(x0, 4, SVE_MUL_VL)); 2810 __ st2h(z1.VnH(), z2.VnH(), p4, SVEMemOperand(x0, 4, SVE_MUL_VL)); 2811 __ st2w(z2.VnS(), z3.VnS(), p3, SVEMemOperand(x0, x2, LSL, 2)); 2812 __ st2d(z3.VnD(), z4.VnD(), p4, SVEMemOperand(x0, 4, SVE_MUL_VL)); 2813 __ ld2b(z20.VnB(), z21.VnB(), p5.Zeroing(), SVEMemOperand(x0, x2)); 2814 __ ld2h(z21.VnH(), z22.VnH(), p6.Zeroing(), SVEMemOperand(x0, 4, SVE_MUL_VL)); 2815 __ ld2w(z22.VnS(), z23.VnS(), p6.Zeroing(), SVEMemOperand(x0, 4, SVE_MUL_VL)); 2816 __ ld2d(z23.VnD(), z24.VnD(), p5.Zeroing(), SVEMemOperand(x0, 4, SVE_MUL_VL)); 2817 2818 __ st3b(z4.VnB(), z5.VnB(), z6.VnB(), p4, SVEMemOperand(x0, 3, SVE_MUL_VL)); 2819 __ st3h(z5.VnH(), z6.VnH(), z7.VnH(), p4, SVEMemOperand(x0, 3, SVE_MUL_VL)); 2820 __ st3w(z6.VnS(), z7.VnS(), z8.VnS(), p3, SVEMemOperand(x0, 3, SVE_MUL_VL)); 2821 __ st3d(z7.VnD(), z8.VnD(), z9.VnD(), p4, SVEMemOperand(x0, x2, LSL, 3)); 2822 __ ld3b(z24.VnB(), 2823 z25.VnB(), 2824 z26.VnB(), 2825 p5.Zeroing(), 2826 SVEMemOperand(x0, 3, SVE_MUL_VL)); 2827 __ ld3h(z25.VnH(), 2828 z26.VnH(), 2829 z27.VnH(), 2830 p6.Zeroing(), 2831 SVEMemOperand(x0, x2, LSL, 1)); 2832 __ ld3w(z26.VnS(), 2833 z27.VnS(), 2834 z28.VnS(), 2835 p6.Zeroing(), 2836 SVEMemOperand(x0, 3, SVE_MUL_VL)); 2837 __ ld3d(z27.VnD(), 2838 z28.VnD(), 2839 z29.VnD(), 2840 p5.Zeroing(), 2841 SVEMemOperand(x0, 3, SVE_MUL_VL)); 2842 2843 __ st4b(z31.VnB(), 2844 z0.VnB(), 2845 z1.VnB(), 2846 z2.VnB(), 2847 p4, 2848 SVEMemOperand(x0, 4, SVE_MUL_VL)); 2849 __ st4h(z0.VnH(), 2850 z1.VnH(), 2851 z2.VnH(), 2852 z3.VnH(), 2853 p4, 2854 SVEMemOperand(x0, 4, SVE_MUL_VL)); 2855 __ st4w(z1.VnS(), 2856 z2.VnS(), 2857 z3.VnS(), 2858 z4.VnS(), 2859 p3, 2860 SVEMemOperand(x0, 4, SVE_MUL_VL)); 2861 __ st4d(z2.VnD(), 2862 z3.VnD(), 2863 z4.VnD(), 2864 z5.VnD(), 2865 p4, 2866 SVEMemOperand(x0, x2, LSL, 3)); 2867 __ ld4b(z25.VnB(), 2868 z26.VnB(), 2869 z27.VnB(), 2870 z28.VnB(), 2871 p5.Zeroing(), 2872 SVEMemOperand(x0, 4, SVE_MUL_VL)); 2873 __ ld4h(z26.VnH(), 2874 z27.VnH(), 2875 z28.VnH(), 2876 z29.VnH(), 2877 p6.Zeroing(), 2878 SVEMemOperand(x0, 4, SVE_MUL_VL)); 2879 __ ld4w(z27.VnS(), 2880 z28.VnS(), 2881 z29.VnS(), 2882 z30.VnS(), 2883 p6.Zeroing(), 2884 SVEMemOperand(x0, x2, LSL, 2)); 2885 __ ld4d(z28.VnD(), 2886 z29.VnD(), 2887 z30.VnD(), 2888 z31.VnD(), 2889 p5.Zeroing(), 2890 SVEMemOperand(x0, 4, SVE_MUL_VL)); 2891} 2892 2893static void GenerateTestSequenceAtomics(MacroAssembler* masm) { 2894 ExactAssemblyScope guard(masm, 2895 masm->GetBuffer()->GetRemainingBytes(), 2896 ExactAssemblyScope::kMaximumSize); 2897 CPUFeaturesScope feature_guard(masm, CPUFeatures::kAtomics); 2898 __ sub(sp, sp, 16); // Claim some working space on the stack. 2899 __ mov(x0, 0x5555555555555555); 2900 __ str(x0, MemOperand(sp)); // Initialise working space. 2901 2902#define INST_LIST(OP) \ 2903 __ ld##OP##b(w0, w0, MemOperand(sp)); \ 2904 __ ld##OP##ab(w0, w1, MemOperand(sp)); \ 2905 __ ld##OP##lb(w0, w2, MemOperand(sp)); \ 2906 __ ld##OP##alb(w0, w3, MemOperand(sp)); \ 2907 __ ld##OP##h(w0, w0, MemOperand(sp)); \ 2908 __ ld##OP##ah(w0, w1, MemOperand(sp)); \ 2909 __ ld##OP##lh(w0, w2, MemOperand(sp)); \ 2910 __ ld##OP##alh(w0, w3, MemOperand(sp)); \ 2911 __ ld##OP(w0, w0, MemOperand(sp)); \ 2912 __ ld##OP##a(w0, w1, MemOperand(sp)); \ 2913 __ ld##OP##l(w0, w2, MemOperand(sp)); \ 2914 __ ld##OP##al(w0, w3, MemOperand(sp)); \ 2915 __ ld##OP(x0, x0, MemOperand(sp)); \ 2916 __ ld##OP##a(x0, x1, MemOperand(sp)); \ 2917 __ ld##OP##l(x0, x2, MemOperand(sp)); \ 2918 __ ld##OP##al(x0, x3, MemOperand(sp)); \ 2919 __ st##OP##b(w0, MemOperand(sp)); \ 2920 __ st##OP##lb(w0, MemOperand(sp)); \ 2921 __ st##OP##h(w0, MemOperand(sp)); \ 2922 __ st##OP##lh(w0, MemOperand(sp)); \ 2923 __ st##OP(w0, MemOperand(sp)); \ 2924 __ st##OP##l(w0, MemOperand(sp)); \ 2925 __ st##OP(x0, MemOperand(sp)); \ 2926 __ st##OP##l(x0, MemOperand(sp)); 2927 2928 INST_LIST(add); 2929 INST_LIST(set); 2930 INST_LIST(eor); 2931 INST_LIST(smin); 2932 INST_LIST(smax); 2933 INST_LIST(umin); 2934 INST_LIST(umax); 2935 INST_LIST(clr); 2936 2937#undef INST_LIST 2938 2939 __ add(sp, sp, 16); // Restore stack pointer. 2940} 2941 2942static void MaskAddresses(const char* trace) { 2943#define VIXL_COLOUR "(\x1b\\[[01];([0-9][0-9])?m)?" 2944 // All patterns are replaced with "$1~~~~~~~~~~~~~~~~". 2945 std::regex patterns[] = 2946 {// Mask registers that hold addresses that change from run to run. 2947 std::regex("((x0|x1|x2|sp): " VIXL_COLOUR "0x)[0-9a-f]{16}"), 2948 // Mask accessed memory addresses. 2949 std::regex("((<-|->) " VIXL_COLOUR "0x)[0-9a-f]{16}"), 2950 // Mask instruction addresses. 2951 std::regex("^(0x)[0-9a-f]{16}"), 2952 // Mask branch targets. 2953 std::regex("(Branch" VIXL_COLOUR " to 0x)[0-9a-f]{16}"), 2954 // Mask explicit address annotations. 2955 std::regex("(addr 0x)[0-9a-f]+")}; 2956#undef VIXL_COLOUR 2957 2958 std::vector<std::string> lines; 2959 std::ifstream in(trace); 2960 while (!in.eof()) { 2961 std::string line; 2962 std::getline(in, line); 2963 for (auto&& pattern : patterns) { 2964 line = std::regex_replace(line, pattern, "$1~~~~~~~~~~~~~~~~"); 2965 } 2966 lines.push_back(line); 2967 } 2968 in.close(); 2969 2970 // `getline` produces an empty line after a terminal "\n". 2971 if (lines.back().empty()) lines.pop_back(); 2972 2973 std::ofstream out(trace, std::ofstream::trunc); 2974 for (auto&& line : lines) { 2975 out << line << "\n"; 2976 } 2977} 2978 2979static void PrintFile(const char* name) { 2980 FILE* file = fopen(name, "r"); 2981 char buffer[1024]; // The buffer size is arbitrary. 2982 while (fgets(buffer, sizeof(buffer), file) != NULL) fputs(buffer, stdout); 2983 fclose(file); 2984} 2985 2986static bool CheckOrGenerateTrace(const char* filename, const char* ref_file) { 2987 bool trace_matched_reference; 2988 if (Test::generate_test_trace()) { 2989 // Copy trace_stream to stdout. 2990 FILE* trace_stream = fopen(filename, "r"); 2991 VIXL_ASSERT(trace_stream != NULL); 2992 fseek(trace_stream, 0, SEEK_SET); 2993 int c; 2994 while (1) { 2995 c = getc(trace_stream); 2996 if (c == EOF) break; 2997 putc(c, stdout); 2998 } 2999 fclose(trace_stream); 3000 trace_matched_reference = true; 3001 } else { 3002 // Check trace_stream against ref_file. 3003 char command[1024]; 3004 size_t length = 3005 snprintf(command, sizeof(command), "diff -u %s %s", ref_file, filename); 3006 VIXL_CHECK(length < sizeof(command)); 3007 trace_matched_reference = (system(command) == 0); 3008 } 3009 return trace_matched_reference; 3010} 3011 3012 3013// Trace tests can only work with the simulator. 3014#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 3015 3016static void TraceTestHelper(bool coloured_trace, 3017 TraceParameters trace_parameters, 3018 const char* ref_file) { 3019 MacroAssembler masm(12 * KBytes); 3020 3021 char trace_stream_filename[] = "/tmp/vixl-test-trace-XXXXXX"; 3022 FILE* trace_stream = fdopen(mkstemp(trace_stream_filename), "w"); 3023 3024 Decoder decoder; 3025 Simulator simulator(&decoder, trace_stream); 3026 simulator.SetColouredTrace(coloured_trace); 3027 simulator.SetTraceParameters(trace_parameters); 3028 simulator.SilenceExclusiveAccessWarning(); 3029 3030 const int vl_in_bytes = 5 * kZRegMinSizeInBytes; 3031 const int vl_in_bits = vl_in_bytes * kBitsPerByte; 3032 const int pl_in_bits = vl_in_bits / kZRegBitsPerPRegBit; 3033 simulator.SetVectorLengthInBits(vl_in_bits); 3034 3035 // Set up a scratch buffer so we can test loads and stores. 3036 const int kScratchSize = vl_in_bytes * 1024; 3037 const int kScratchGuardSize = vl_in_bytes; 3038 char scratch_buffer[kScratchSize + kScratchGuardSize]; 3039 for (size_t i = 0; i < (sizeof(scratch_buffer) / sizeof(scratch_buffer[0])); 3040 i++) { 3041 scratch_buffer[i] = i & 0xff; 3042 } 3043 // Used for offset addressing. 3044 simulator.WriteXRegister(0, reinterpret_cast<uintptr_t>(scratch_buffer)); 3045 // Used for pre-/post-index addressing. 3046 simulator.WriteXRegister(1, reinterpret_cast<uintptr_t>(scratch_buffer)); 3047 3048 const int kPostIndexRegisterStep = 13; // Arbitrary interesting value. 3049 // Used for post-index offsets. 3050 simulator.WriteXRegister(2, kPostIndexRegisterStep); 3051 3052 // Initialize the other registers with unique values. 3053 uint64_t initial_base_u64 = 0x0100001000100101; 3054 for (unsigned i = 3; i < kNumberOfRegisters; i++) { 3055 if (i == kLinkRegCode) continue; 3056 if (i == kZeroRegCode) continue; 3057 // NoRegLog suppresses the log now, but the registers will still be logged 3058 // before the first instruction is executed since they have been written but 3059 // not printed. 3060 simulator.WriteRegister(i, initial_base_u64 * i, Simulator::NoRegLog); 3061 } 3062 for (unsigned r = 0; r < kNumberOfVRegisters; r++) { 3063 LogicVRegister reg(simulator.ReadVRegister(r)); 3064 // Try to initialise Z registers with reasonable FP values. We prioritise 3065 // setting double values, then floats and half-precision values. The lanes 3066 // overlap, so this is a compromise, but d0, s0 and h0 views all see similar 3067 // arithmetic values. 3068 // 3069 // The exponent of each value is set to the (biased) register number. We set 3070 // the double, float and half-precision exponents where we can. 3071 uint64_t base = 0x3ff000003f803c00 + (0x0010000000800400 * (0x7f + r)); 3072 for (unsigned lane = 0; lane < (vl_in_bytes / kDRegSizeInBytes); lane++) { 3073 uint64_t mantissas = 0x0000000100010001 * (lane & 0x7f); 3074 reg.SetUint(kFormatVnD, lane, base | mantissas); 3075 } 3076 } 3077 for (unsigned r = 0; r < kNumberOfPRegisters; r++) { 3078 LogicPRegister reg(simulator.ReadPRegister(r)); 3079 // Set `r` active lanes between each inactive lane. 3080 for (unsigned bit = 0; bit < pl_in_bits; bit++) { 3081 reg.SetActive(kFormatVnB, bit, ((bit + 1) % (r + 2)) != 0); 3082 } 3083 // Completely clear some Q-sized blocks. The trace will completely omit 3084 // these for stores. 3085 for (unsigned chunk = 0; chunk < (vl_in_bits / kQRegSize); chunk++) { 3086 if (((chunk + 1) % (r + 2)) == 0) { 3087 reg.SetActiveMask(chunk, static_cast<uint16_t>(0)); 3088 } 3089 } 3090 } 3091 3092 GenerateTestSequenceBase(&masm); 3093 GenerateTestSequenceFP(&masm); 3094 GenerateTestSequenceNEON(&masm); 3095 GenerateTestSequenceNEONFP(&masm); 3096 GenerateTestSequenceSVE(&masm); 3097 GenerateTestSequenceAtomics(&masm); 3098 masm.Ret(); 3099 masm.FinalizeCode(); 3100 3101 if (Test::disassemble()) { 3102 PrintDisassembler disasm(stdout); 3103 Instruction* start = masm.GetBuffer()->GetStartAddress<Instruction*>(); 3104 Instruction* end = masm.GetBuffer()->GetEndAddress<Instruction*>(); 3105 disasm.DisassembleBuffer(start, end); 3106 } 3107 3108 simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>()); 3109 3110 fclose(trace_stream); 3111 3112 // We already traced into the temporary file, so just print the file. 3113 // Note that these tests need to control the trace flags, so we ignore all 3114 // --trace-* options here except for --trace-sim. 3115 if (Test::trace_sim()) PrintFile(trace_stream_filename); 3116 3117 MaskAddresses(trace_stream_filename); 3118 3119 bool trace_matched_reference = 3120 CheckOrGenerateTrace(trace_stream_filename, ref_file); 3121 remove(trace_stream_filename); // Clean up before checking the result. 3122 VIXL_CHECK(trace_matched_reference); 3123 3124 uint64_t offset_base = simulator.ReadRegister<uint64_t>(0); 3125 uint64_t index_base = simulator.ReadRegister<uint64_t>(1); 3126 3127 VIXL_CHECK(index_base >= offset_base); 3128 VIXL_CHECK((index_base - offset_base) <= kScratchSize); 3129} 3130 3131 3132// Test individual options. 3133TEST(disasm) { TraceTestHelper(false, LOG_DISASM, REF("log-disasm")); } 3134TEST(regs) { TraceTestHelper(false, LOG_REGS, REF("log-regs")); } 3135TEST(vregs) { TraceTestHelper(false, LOG_VREGS, REF("log-vregs")); } 3136TEST(sysregs) { TraceTestHelper(false, LOG_SYSREGS, REF("log-sysregs")); } 3137TEST(write) { TraceTestHelper(false, LOG_WRITE, REF("log-write")); } 3138TEST(branch) { TraceTestHelper(false, LOG_WRITE, REF("log-branch")); } 3139 3140// Test standard combinations. 3141TEST(none) { TraceTestHelper(false, LOG_NONE, REF("log-none")); } 3142TEST(state) { TraceTestHelper(false, LOG_STATE, REF("log-state")); } 3143TEST(all) { TraceTestHelper(false, LOG_ALL, REF("log-all")); } 3144 3145 3146// Test individual options (with colour). 3147TEST(disasm_colour) { 3148 TraceTestHelper(true, LOG_DISASM, REF("log-disasm-colour")); 3149} 3150TEST(regs_colour) { TraceTestHelper(true, LOG_REGS, REF("log-regs-colour")); } 3151TEST(vregs_colour) { 3152 TraceTestHelper(true, LOG_VREGS, REF("log-vregs-colour")); 3153} 3154TEST(sysregs_colour) { 3155 TraceTestHelper(true, LOG_SYSREGS, REF("log-sysregs-colour")); 3156} 3157TEST(write_colour) { 3158 TraceTestHelper(true, LOG_WRITE, REF("log-write-colour")); 3159} 3160TEST(branch_colour) { 3161 TraceTestHelper(true, LOG_WRITE, REF("log-branch-colour")); 3162} 3163 3164// Test standard combinations (with colour). 3165TEST(none_colour) { TraceTestHelper(true, LOG_NONE, REF("log-none-colour")); } 3166TEST(state_colour) { 3167 TraceTestHelper(true, LOG_STATE, REF("log-state-colour")); 3168} 3169TEST(all_colour) { TraceTestHelper(true, LOG_ALL, REF("log-all-colour")); } 3170 3171#endif // VIXL_INCLUDE_SIMULATOR_AARCH64 3172 3173static void PrintDisassemblerTestHelper(const char* prefix, 3174 const char* suffix, 3175 const char* ref_file) { 3176 MacroAssembler masm(12 * KBytes); 3177 3178 char trace_stream_filename[] = "/tmp/vixl-test-trace-XXXXXX"; 3179 FILE* trace_stream = fdopen(mkstemp(trace_stream_filename), "w"); 3180 3181 // We don't need to execute this code so there's no need for the execution 3182 // environment setup from TraceTestHelper. 3183 3184 GenerateTestSequenceBase(&masm); 3185 GenerateTestSequenceFP(&masm); 3186 GenerateTestSequenceNEON(&masm); 3187 GenerateTestSequenceNEONFP(&masm); 3188 GenerateTestSequenceSVE(&masm); 3189 GenerateTestSequenceAtomics(&masm); 3190 masm.FinalizeCode(); 3191 3192 Decoder decoder; 3193 CPUFeaturesAuditor auditor(&decoder); 3194 PrintDisassembler disasm(trace_stream); 3195 if (prefix != NULL) disasm.SetCPUFeaturesPrefix(prefix); 3196 if (suffix != NULL) disasm.SetCPUFeaturesSuffix(suffix); 3197 disasm.RegisterCPUFeaturesAuditor(&auditor); 3198 decoder.AppendVisitor(&disasm); 3199 3200 Instruction* instruction = masm.GetBuffer()->GetStartAddress<Instruction*>(); 3201 Instruction* end = masm.GetCursorAddress<Instruction*>(); 3202 while (instruction != end) { 3203 decoder.Decode(instruction); 3204 instruction += kInstructionSize; 3205 } 3206 3207 fclose(trace_stream); 3208 3209 // We already disassembled into the temporary file, so just print the file. 3210 if (Test::disassemble()) PrintFile(trace_stream_filename); 3211 3212 MaskAddresses(trace_stream_filename); 3213 3214 bool trace_matched_reference = 3215 CheckOrGenerateTrace(trace_stream_filename, ref_file); 3216 remove(trace_stream_filename); // Clean up before checking the result. 3217 VIXL_CHECK(trace_matched_reference); 3218} 3219 3220 3221// Test CPUFeatures disassembly annotations. 3222TEST(cpufeatures) { 3223 PrintDisassemblerTestHelper(NULL, NULL, REF("log-cpufeatures")); 3224} 3225TEST(cpufeatures_custom) { 3226 PrintDisassemblerTestHelper("### {", "} ###", REF("log-cpufeatures-custom")); 3227} 3228TEST(cpufeatures_colour) { 3229 // The colour chosen is arbitrary. 3230 PrintDisassemblerTestHelper("\033[1;35m", // Prefix: Bold magenta. 3231 "\033[0;m", // Suffix: Reset colour. 3232 REF("log-cpufeatures-colour")); 3233} 3234} // namespace aarch64 3235} // namespace vixl 3236