1b8021494Sopenharmony_ci// Copyright 2019, VIXL authors 2b8021494Sopenharmony_ci// All rights reserved. 3b8021494Sopenharmony_ci// 4b8021494Sopenharmony_ci// Redistribution and use in source and binary forms, with or without 5b8021494Sopenharmony_ci// modification, are permitted provided that the following conditions are met: 6b8021494Sopenharmony_ci// 7b8021494Sopenharmony_ci// * Redistributions of source code must retain the above copyright notice, 8b8021494Sopenharmony_ci// this list of conditions and the following disclaimer. 9b8021494Sopenharmony_ci// * Redistributions in binary form must reproduce the above copyright notice, 10b8021494Sopenharmony_ci// this list of conditions and the following disclaimer in the documentation 11b8021494Sopenharmony_ci// and/or other materials provided with the distribution. 12b8021494Sopenharmony_ci// * Neither the name of ARM Limited nor the names of its contributors may be 13b8021494Sopenharmony_ci// used to endorse or promote products derived from this software without 14b8021494Sopenharmony_ci// specific prior written permission. 15b8021494Sopenharmony_ci// 16b8021494Sopenharmony_ci// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17b8021494Sopenharmony_ci// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18b8021494Sopenharmony_ci// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19b8021494Sopenharmony_ci// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20b8021494Sopenharmony_ci// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21b8021494Sopenharmony_ci// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22b8021494Sopenharmony_ci// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23b8021494Sopenharmony_ci// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24b8021494Sopenharmony_ci// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25b8021494Sopenharmony_ci// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26b8021494Sopenharmony_ci 27b8021494Sopenharmony_ci#include "macro-assembler-aarch64.h" 28b8021494Sopenharmony_ci 29b8021494Sopenharmony_cinamespace vixl { 30b8021494Sopenharmony_cinamespace aarch64 { 31b8021494Sopenharmony_ci 32b8021494Sopenharmony_civoid MacroAssembler::AddSubHelper(AddSubHelperOption option, 33b8021494Sopenharmony_ci const ZRegister& zd, 34b8021494Sopenharmony_ci const ZRegister& zn, 35b8021494Sopenharmony_ci IntegerOperand imm) { 36b8021494Sopenharmony_ci VIXL_ASSERT(imm.FitsInLane(zd)); 37b8021494Sopenharmony_ci 38b8021494Sopenharmony_ci // Simple, encodable cases. 39b8021494Sopenharmony_ci if (TrySingleAddSub(option, zd, zn, imm)) return; 40b8021494Sopenharmony_ci 41b8021494Sopenharmony_ci VIXL_ASSERT((option == kAddImmediate) || (option == kSubImmediate)); 42b8021494Sopenharmony_ci bool add_imm = (option == kAddImmediate); 43b8021494Sopenharmony_ci 44b8021494Sopenharmony_ci // Try to translate Add(..., -imm) to Sub(..., imm) if we can encode it in one 45b8021494Sopenharmony_ci // instruction. Also interpret the immediate as signed, so we can convert 46b8021494Sopenharmony_ci // Add(zd.VnH(), zn.VnH(), 0xffff...) to Sub(..., 1), etc. 47b8021494Sopenharmony_ci IntegerOperand signed_imm(imm.AsIntN(zd.GetLaneSizeInBits())); 48b8021494Sopenharmony_ci if (signed_imm.IsNegative()) { 49b8021494Sopenharmony_ci AddSubHelperOption n_option = add_imm ? kSubImmediate : kAddImmediate; 50b8021494Sopenharmony_ci IntegerOperand n_imm(signed_imm.GetMagnitude()); 51b8021494Sopenharmony_ci // IntegerOperand can represent -INT_MIN, so this is always safe. 52b8021494Sopenharmony_ci VIXL_ASSERT(n_imm.IsPositiveOrZero()); 53b8021494Sopenharmony_ci if (TrySingleAddSub(n_option, zd, zn, n_imm)) return; 54b8021494Sopenharmony_ci } 55b8021494Sopenharmony_ci 56b8021494Sopenharmony_ci // Otherwise, fall back to dup + ADD_z_z/SUB_z_z. 57b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 58b8021494Sopenharmony_ci ZRegister scratch = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); 59b8021494Sopenharmony_ci Dup(scratch, imm); 60b8021494Sopenharmony_ci 61b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 62b8021494Sopenharmony_ci if (add_imm) { 63b8021494Sopenharmony_ci add(zd, zn, scratch); 64b8021494Sopenharmony_ci } else { 65b8021494Sopenharmony_ci sub(zd, zn, scratch); 66b8021494Sopenharmony_ci } 67b8021494Sopenharmony_ci} 68b8021494Sopenharmony_ci 69b8021494Sopenharmony_cibool MacroAssembler::TrySingleAddSub(AddSubHelperOption option, 70b8021494Sopenharmony_ci const ZRegister& zd, 71b8021494Sopenharmony_ci const ZRegister& zn, 72b8021494Sopenharmony_ci IntegerOperand imm) { 73b8021494Sopenharmony_ci VIXL_ASSERT(imm.FitsInLane(zd)); 74b8021494Sopenharmony_ci 75b8021494Sopenharmony_ci int imm8; 76b8021494Sopenharmony_ci int shift = -1; 77b8021494Sopenharmony_ci if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) || 78b8021494Sopenharmony_ci imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) { 79b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, zn); 80b8021494Sopenharmony_ci switch (option) { 81b8021494Sopenharmony_ci case kAddImmediate: 82b8021494Sopenharmony_ci add(zd, zd, imm8, shift); 83b8021494Sopenharmony_ci return true; 84b8021494Sopenharmony_ci case kSubImmediate: 85b8021494Sopenharmony_ci sub(zd, zd, imm8, shift); 86b8021494Sopenharmony_ci return true; 87b8021494Sopenharmony_ci } 88b8021494Sopenharmony_ci } 89b8021494Sopenharmony_ci return false; 90b8021494Sopenharmony_ci} 91b8021494Sopenharmony_ci 92b8021494Sopenharmony_civoid MacroAssembler::IntWideImmHelper(IntArithImmFn imm_fn, 93b8021494Sopenharmony_ci SVEArithPredicatedFn reg_macro, 94b8021494Sopenharmony_ci const ZRegister& zd, 95b8021494Sopenharmony_ci const ZRegister& zn, 96b8021494Sopenharmony_ci IntegerOperand imm, 97b8021494Sopenharmony_ci bool is_signed) { 98b8021494Sopenharmony_ci if (is_signed) { 99b8021494Sopenharmony_ci // E.g. MUL_z_zi, SMIN_z_zi, SMAX_z_zi 100b8021494Sopenharmony_ci if (imm.IsInt8()) { 101b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, zn); 102b8021494Sopenharmony_ci (this->*imm_fn)(zd, zd, imm.AsInt8()); 103b8021494Sopenharmony_ci return; 104b8021494Sopenharmony_ci } 105b8021494Sopenharmony_ci } else { 106b8021494Sopenharmony_ci // E.g. UMIN_z_zi, UMAX_z_zi 107b8021494Sopenharmony_ci if (imm.IsUint8()) { 108b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, zn); 109b8021494Sopenharmony_ci (this->*imm_fn)(zd, zd, imm.AsUint8()); 110b8021494Sopenharmony_ci return; 111b8021494Sopenharmony_ci } 112b8021494Sopenharmony_ci } 113b8021494Sopenharmony_ci 114b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 115b8021494Sopenharmony_ci PRegister pg = temps.AcquireGoverningP(); 116b8021494Sopenharmony_ci Ptrue(pg.WithSameLaneSizeAs(zd)); 117b8021494Sopenharmony_ci 118b8021494Sopenharmony_ci // Try to re-use zd if we can, so we can avoid a movprfx. 119b8021494Sopenharmony_ci ZRegister scratch = 120b8021494Sopenharmony_ci zd.Aliases(zn) ? temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()) 121b8021494Sopenharmony_ci : zd; 122b8021494Sopenharmony_ci Dup(scratch, imm); 123b8021494Sopenharmony_ci 124b8021494Sopenharmony_ci // The vector-form macro for commutative operations will swap the arguments to 125b8021494Sopenharmony_ci // avoid movprfx, if necessary. 126b8021494Sopenharmony_ci (this->*reg_macro)(zd, pg.Merging(), zn, scratch); 127b8021494Sopenharmony_ci} 128b8021494Sopenharmony_ci 129b8021494Sopenharmony_civoid MacroAssembler::Mul(const ZRegister& zd, 130b8021494Sopenharmony_ci const ZRegister& zn, 131b8021494Sopenharmony_ci IntegerOperand imm) { 132b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 133b8021494Sopenharmony_ci IntArithImmFn imm_fn = &Assembler::mul; 134b8021494Sopenharmony_ci SVEArithPredicatedFn reg_fn = &MacroAssembler::Mul; 135b8021494Sopenharmony_ci IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); 136b8021494Sopenharmony_ci} 137b8021494Sopenharmony_ci 138b8021494Sopenharmony_civoid MacroAssembler::Smin(const ZRegister& zd, 139b8021494Sopenharmony_ci const ZRegister& zn, 140b8021494Sopenharmony_ci IntegerOperand imm) { 141b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 142b8021494Sopenharmony_ci VIXL_ASSERT(imm.FitsInSignedLane(zd)); 143b8021494Sopenharmony_ci IntArithImmFn imm_fn = &Assembler::smin; 144b8021494Sopenharmony_ci SVEArithPredicatedFn reg_fn = &MacroAssembler::Smin; 145b8021494Sopenharmony_ci IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); 146b8021494Sopenharmony_ci} 147b8021494Sopenharmony_ci 148b8021494Sopenharmony_civoid MacroAssembler::Smax(const ZRegister& zd, 149b8021494Sopenharmony_ci const ZRegister& zn, 150b8021494Sopenharmony_ci IntegerOperand imm) { 151b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 152b8021494Sopenharmony_ci VIXL_ASSERT(imm.FitsInSignedLane(zd)); 153b8021494Sopenharmony_ci IntArithImmFn imm_fn = &Assembler::smax; 154b8021494Sopenharmony_ci SVEArithPredicatedFn reg_fn = &MacroAssembler::Smax; 155b8021494Sopenharmony_ci IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); 156b8021494Sopenharmony_ci} 157b8021494Sopenharmony_ci 158b8021494Sopenharmony_civoid MacroAssembler::Umax(const ZRegister& zd, 159b8021494Sopenharmony_ci const ZRegister& zn, 160b8021494Sopenharmony_ci IntegerOperand imm) { 161b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 162b8021494Sopenharmony_ci VIXL_ASSERT(imm.FitsInUnsignedLane(zd)); 163b8021494Sopenharmony_ci IntArithImmFn imm_fn = &Assembler::umax; 164b8021494Sopenharmony_ci SVEArithPredicatedFn reg_fn = &MacroAssembler::Umax; 165b8021494Sopenharmony_ci IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false); 166b8021494Sopenharmony_ci} 167b8021494Sopenharmony_ci 168b8021494Sopenharmony_civoid MacroAssembler::Umin(const ZRegister& zd, 169b8021494Sopenharmony_ci const ZRegister& zn, 170b8021494Sopenharmony_ci IntegerOperand imm) { 171b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 172b8021494Sopenharmony_ci VIXL_ASSERT(imm.FitsInUnsignedLane(zd)); 173b8021494Sopenharmony_ci IntArithImmFn imm_fn = &Assembler::umin; 174b8021494Sopenharmony_ci SVEArithPredicatedFn reg_fn = &MacroAssembler::Umin; 175b8021494Sopenharmony_ci IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false); 176b8021494Sopenharmony_ci} 177b8021494Sopenharmony_ci 178b8021494Sopenharmony_civoid MacroAssembler::Addpl(const Register& xd, 179b8021494Sopenharmony_ci const Register& xn, 180b8021494Sopenharmony_ci int64_t multiplier) { 181b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 182b8021494Sopenharmony_ci 183b8021494Sopenharmony_ci // This macro relies on `Rdvl` to handle some out-of-range cases. Check that 184b8021494Sopenharmony_ci // `VL * multiplier` cannot overflow, for any possible value of VL. 185b8021494Sopenharmony_ci VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes)); 186b8021494Sopenharmony_ci VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes)); 187b8021494Sopenharmony_ci 188b8021494Sopenharmony_ci if (xd.IsZero()) return; 189b8021494Sopenharmony_ci if (xn.IsZero() && xd.IsSP()) { 190b8021494Sopenharmony_ci // TODO: This operation doesn't make much sense, but we could support it 191b8021494Sopenharmony_ci // with a scratch register if necessary. 192b8021494Sopenharmony_ci VIXL_UNIMPLEMENTED(); 193b8021494Sopenharmony_ci } 194b8021494Sopenharmony_ci 195b8021494Sopenharmony_ci // Handling xzr requires an extra move, so defer it until later so we can try 196b8021494Sopenharmony_ci // to use `rdvl` instead (via `Addvl`). 197b8021494Sopenharmony_ci if (IsInt6(multiplier) && !xn.IsZero()) { 198b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 199b8021494Sopenharmony_ci addpl(xd, xn, static_cast<int>(multiplier)); 200b8021494Sopenharmony_ci return; 201b8021494Sopenharmony_ci } 202b8021494Sopenharmony_ci 203b8021494Sopenharmony_ci // If `multiplier` is a multiple of 8, we can use `Addvl` instead. 204b8021494Sopenharmony_ci if ((multiplier % kZRegBitsPerPRegBit) == 0) { 205b8021494Sopenharmony_ci Addvl(xd, xn, multiplier / kZRegBitsPerPRegBit); 206b8021494Sopenharmony_ci return; 207b8021494Sopenharmony_ci } 208b8021494Sopenharmony_ci 209b8021494Sopenharmony_ci if (IsInt6(multiplier)) { 210b8021494Sopenharmony_ci VIXL_ASSERT(xn.IsZero()); // Other cases were handled with `addpl`. 211b8021494Sopenharmony_ci // There is no simple `rdpl` instruction, and `addpl` cannot accept xzr, so 212b8021494Sopenharmony_ci // materialise a zero. 213b8021494Sopenharmony_ci MacroEmissionCheckScope guard(this); 214b8021494Sopenharmony_ci movz(xd, 0); 215b8021494Sopenharmony_ci addpl(xd, xd, static_cast<int>(multiplier)); 216b8021494Sopenharmony_ci return; 217b8021494Sopenharmony_ci } 218b8021494Sopenharmony_ci 219b8021494Sopenharmony_ci // TODO: Some probable cases result in rather long sequences. For example, 220b8021494Sopenharmony_ci // `Addpl(sp, sp, 33)` requires five instructions, even though it's only just 221b8021494Sopenharmony_ci // outside the encodable range. We should look for ways to cover such cases 222b8021494Sopenharmony_ci // without drastically increasing the complexity of this logic. 223b8021494Sopenharmony_ci 224b8021494Sopenharmony_ci // For other cases, calculate xn + (PL * multiplier) using discrete 225b8021494Sopenharmony_ci // instructions. This requires two scratch registers in the general case, so 226b8021494Sopenharmony_ci // try to re-use the destination as a scratch register. 227b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 228b8021494Sopenharmony_ci temps.Include(xd); 229b8021494Sopenharmony_ci temps.Exclude(xn); 230b8021494Sopenharmony_ci 231b8021494Sopenharmony_ci Register scratch = temps.AcquireX(); 232b8021494Sopenharmony_ci // Because there is no `rdpl`, so we have to calculate PL from VL. We can't 233b8021494Sopenharmony_ci // scale the multiplier because (we already know) it isn't a multiple of 8. 234b8021494Sopenharmony_ci Rdvl(scratch, multiplier); 235b8021494Sopenharmony_ci 236b8021494Sopenharmony_ci MacroEmissionCheckScope guard(this); 237b8021494Sopenharmony_ci if (xn.IsZero()) { 238b8021494Sopenharmony_ci asr(xd, scratch, kZRegBitsPerPRegBitLog2); 239b8021494Sopenharmony_ci } else if (xd.IsSP() || xn.IsSP()) { 240b8021494Sopenharmony_ci // TODO: MacroAssembler::Add should be able to handle this. 241b8021494Sopenharmony_ci asr(scratch, scratch, kZRegBitsPerPRegBitLog2); 242b8021494Sopenharmony_ci add(xd, xn, scratch); 243b8021494Sopenharmony_ci } else { 244b8021494Sopenharmony_ci add(xd, xn, Operand(scratch, ASR, kZRegBitsPerPRegBitLog2)); 245b8021494Sopenharmony_ci } 246b8021494Sopenharmony_ci} 247b8021494Sopenharmony_ci 248b8021494Sopenharmony_civoid MacroAssembler::Addvl(const Register& xd, 249b8021494Sopenharmony_ci const Register& xn, 250b8021494Sopenharmony_ci int64_t multiplier) { 251b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 252b8021494Sopenharmony_ci VIXL_ASSERT(xd.IsX()); 253b8021494Sopenharmony_ci VIXL_ASSERT(xn.IsX()); 254b8021494Sopenharmony_ci 255b8021494Sopenharmony_ci // Check that `VL * multiplier` cannot overflow, for any possible value of VL. 256b8021494Sopenharmony_ci VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes)); 257b8021494Sopenharmony_ci VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes)); 258b8021494Sopenharmony_ci 259b8021494Sopenharmony_ci if (xd.IsZero()) return; 260b8021494Sopenharmony_ci if (xn.IsZero() && xd.IsSP()) { 261b8021494Sopenharmony_ci // TODO: This operation doesn't make much sense, but we could support it 262b8021494Sopenharmony_ci // with a scratch register if necessary. `rdvl` cannot write into `sp`. 263b8021494Sopenharmony_ci VIXL_UNIMPLEMENTED(); 264b8021494Sopenharmony_ci } 265b8021494Sopenharmony_ci 266b8021494Sopenharmony_ci if (IsInt6(multiplier)) { 267b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 268b8021494Sopenharmony_ci if (xn.IsZero()) { 269b8021494Sopenharmony_ci rdvl(xd, static_cast<int>(multiplier)); 270b8021494Sopenharmony_ci } else { 271b8021494Sopenharmony_ci addvl(xd, xn, static_cast<int>(multiplier)); 272b8021494Sopenharmony_ci } 273b8021494Sopenharmony_ci return; 274b8021494Sopenharmony_ci } 275b8021494Sopenharmony_ci 276b8021494Sopenharmony_ci // TODO: Some probable cases result in rather long sequences. For example, 277b8021494Sopenharmony_ci // `Addvl(sp, sp, 42)` requires four instructions, even though it's only just 278b8021494Sopenharmony_ci // outside the encodable range. We should look for ways to cover such cases 279b8021494Sopenharmony_ci // without drastically increasing the complexity of this logic. 280b8021494Sopenharmony_ci 281b8021494Sopenharmony_ci // For other cases, calculate xn + (VL * multiplier) using discrete 282b8021494Sopenharmony_ci // instructions. This requires two scratch registers in the general case, so 283b8021494Sopenharmony_ci // we try to re-use the destination as a scratch register. 284b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 285b8021494Sopenharmony_ci temps.Include(xd); 286b8021494Sopenharmony_ci temps.Exclude(xn); 287b8021494Sopenharmony_ci 288b8021494Sopenharmony_ci Register a = temps.AcquireX(); 289b8021494Sopenharmony_ci Mov(a, multiplier); 290b8021494Sopenharmony_ci 291b8021494Sopenharmony_ci MacroEmissionCheckScope guard(this); 292b8021494Sopenharmony_ci Register b = temps.AcquireX(); 293b8021494Sopenharmony_ci rdvl(b, 1); 294b8021494Sopenharmony_ci if (xn.IsZero()) { 295b8021494Sopenharmony_ci mul(xd, a, b); 296b8021494Sopenharmony_ci } else if (xd.IsSP() || xn.IsSP()) { 297b8021494Sopenharmony_ci mul(a, a, b); 298b8021494Sopenharmony_ci add(xd, xn, a); 299b8021494Sopenharmony_ci } else { 300b8021494Sopenharmony_ci madd(xd, a, b, xn); 301b8021494Sopenharmony_ci } 302b8021494Sopenharmony_ci} 303b8021494Sopenharmony_ci 304b8021494Sopenharmony_civoid MacroAssembler::CalculateSVEAddress(const Register& xd, 305b8021494Sopenharmony_ci const SVEMemOperand& addr, 306b8021494Sopenharmony_ci int vl_divisor_log2) { 307b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 308b8021494Sopenharmony_ci VIXL_ASSERT(!addr.IsScatterGather()); 309b8021494Sopenharmony_ci VIXL_ASSERT(xd.IsX()); 310b8021494Sopenharmony_ci 311b8021494Sopenharmony_ci // The lower bound is where a whole Z register is accessed. 312b8021494Sopenharmony_ci VIXL_ASSERT(!addr.IsMulVl() || (vl_divisor_log2 >= 0)); 313b8021494Sopenharmony_ci // The upper bound is for P register accesses, and for instructions like 314b8021494Sopenharmony_ci // "st1b { z0.d } [...]", where one byte is accessed for every D-sized lane. 315b8021494Sopenharmony_ci VIXL_ASSERT(vl_divisor_log2 <= static_cast<int>(kZRegBitsPerPRegBitLog2)); 316b8021494Sopenharmony_ci 317b8021494Sopenharmony_ci SVEOffsetModifier mod = addr.GetOffsetModifier(); 318b8021494Sopenharmony_ci Register base = addr.GetScalarBase(); 319b8021494Sopenharmony_ci 320b8021494Sopenharmony_ci if (addr.IsEquivalentToScalar()) { 321b8021494Sopenharmony_ci // For example: 322b8021494Sopenharmony_ci // [x0] 323b8021494Sopenharmony_ci // [x0, #0] 324b8021494Sopenharmony_ci // [x0, xzr, LSL 2] 325b8021494Sopenharmony_ci Mov(xd, base); 326b8021494Sopenharmony_ci } else if (addr.IsScalarPlusImmediate()) { 327b8021494Sopenharmony_ci // For example: 328b8021494Sopenharmony_ci // [x0, #42] 329b8021494Sopenharmony_ci // [x0, #42, MUL VL] 330b8021494Sopenharmony_ci int64_t offset = addr.GetImmediateOffset(); 331b8021494Sopenharmony_ci VIXL_ASSERT(offset != 0); // Handled by IsEquivalentToScalar. 332b8021494Sopenharmony_ci if (addr.IsMulVl()) { 333b8021494Sopenharmony_ci int vl_divisor = 1 << vl_divisor_log2; 334b8021494Sopenharmony_ci // For all possible values of vl_divisor, we can simply use `Addpl`. This 335b8021494Sopenharmony_ci // will select `addvl` if necessary. 336b8021494Sopenharmony_ci VIXL_ASSERT((kZRegBitsPerPRegBit % vl_divisor) == 0); 337b8021494Sopenharmony_ci Addpl(xd, base, offset * (kZRegBitsPerPRegBit / vl_divisor)); 338b8021494Sopenharmony_ci } else { 339b8021494Sopenharmony_ci // IsScalarPlusImmediate() ensures that no other modifiers can occur. 340b8021494Sopenharmony_ci VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER); 341b8021494Sopenharmony_ci Add(xd, base, offset); 342b8021494Sopenharmony_ci } 343b8021494Sopenharmony_ci } else if (addr.IsScalarPlusScalar()) { 344b8021494Sopenharmony_ci // For example: 345b8021494Sopenharmony_ci // [x0, x1] 346b8021494Sopenharmony_ci // [x0, x1, LSL #4] 347b8021494Sopenharmony_ci Register offset = addr.GetScalarOffset(); 348b8021494Sopenharmony_ci VIXL_ASSERT(!offset.IsZero()); // Handled by IsEquivalentToScalar. 349b8021494Sopenharmony_ci if (mod == SVE_LSL) { 350b8021494Sopenharmony_ci Add(xd, base, Operand(offset, LSL, addr.GetShiftAmount())); 351b8021494Sopenharmony_ci } else { 352b8021494Sopenharmony_ci // IsScalarPlusScalar() ensures that no other modifiers can occur. 353b8021494Sopenharmony_ci VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER); 354b8021494Sopenharmony_ci Add(xd, base, offset); 355b8021494Sopenharmony_ci } 356b8021494Sopenharmony_ci } else { 357b8021494Sopenharmony_ci // All other forms are scatter-gather addresses, which cannot be evaluated 358b8021494Sopenharmony_ci // into an X register. 359b8021494Sopenharmony_ci VIXL_UNREACHABLE(); 360b8021494Sopenharmony_ci } 361b8021494Sopenharmony_ci} 362b8021494Sopenharmony_ci 363b8021494Sopenharmony_civoid MacroAssembler::Cpy(const ZRegister& zd, 364b8021494Sopenharmony_ci const PRegister& pg, 365b8021494Sopenharmony_ci IntegerOperand imm) { 366b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 367b8021494Sopenharmony_ci VIXL_ASSERT(imm.FitsInLane(zd)); 368b8021494Sopenharmony_ci int imm8; 369b8021494Sopenharmony_ci int shift; 370b8021494Sopenharmony_ci if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) || 371b8021494Sopenharmony_ci imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) { 372b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 373b8021494Sopenharmony_ci cpy(zd, pg, imm8, shift); 374b8021494Sopenharmony_ci return; 375b8021494Sopenharmony_ci } 376b8021494Sopenharmony_ci 377b8021494Sopenharmony_ci // The fallbacks rely on `cpy` variants that only support merging predication. 378b8021494Sopenharmony_ci // If zeroing predication was requested, zero the destination first. 379b8021494Sopenharmony_ci if (pg.IsZeroing()) { 380b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 381b8021494Sopenharmony_ci dup(zd, 0); 382b8021494Sopenharmony_ci } 383b8021494Sopenharmony_ci PRegisterM pg_m = pg.Merging(); 384b8021494Sopenharmony_ci 385b8021494Sopenharmony_ci // Try to encode the immediate using fcpy. 386b8021494Sopenharmony_ci VIXL_ASSERT(imm.FitsInLane(zd)); 387b8021494Sopenharmony_ci if (zd.GetLaneSizeInBits() >= kHRegSize) { 388b8021494Sopenharmony_ci double fp_imm = 0.0; 389b8021494Sopenharmony_ci switch (zd.GetLaneSizeInBits()) { 390b8021494Sopenharmony_ci case kHRegSize: 391b8021494Sopenharmony_ci fp_imm = 392b8021494Sopenharmony_ci FPToDouble(RawbitsToFloat16(imm.AsUint16()), kIgnoreDefaultNaN); 393b8021494Sopenharmony_ci break; 394b8021494Sopenharmony_ci case kSRegSize: 395b8021494Sopenharmony_ci fp_imm = RawbitsToFloat(imm.AsUint32()); 396b8021494Sopenharmony_ci break; 397b8021494Sopenharmony_ci case kDRegSize: 398b8021494Sopenharmony_ci fp_imm = RawbitsToDouble(imm.AsUint64()); 399b8021494Sopenharmony_ci break; 400b8021494Sopenharmony_ci default: 401b8021494Sopenharmony_ci VIXL_UNREACHABLE(); 402b8021494Sopenharmony_ci break; 403b8021494Sopenharmony_ci } 404b8021494Sopenharmony_ci // IsImmFP64 is equivalent to IsImmFP<n> for the same arithmetic value, so 405b8021494Sopenharmony_ci // we can use IsImmFP64 for all lane sizes. 406b8021494Sopenharmony_ci if (IsImmFP64(fp_imm)) { 407b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 408b8021494Sopenharmony_ci fcpy(zd, pg_m, fp_imm); 409b8021494Sopenharmony_ci return; 410b8021494Sopenharmony_ci } 411b8021494Sopenharmony_ci } 412b8021494Sopenharmony_ci 413b8021494Sopenharmony_ci // Fall back to using a scratch register. 414b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 415b8021494Sopenharmony_ci Register scratch = temps.AcquireRegisterToHoldLane(zd); 416b8021494Sopenharmony_ci Mov(scratch, imm); 417b8021494Sopenharmony_ci 418b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 419b8021494Sopenharmony_ci cpy(zd, pg_m, scratch); 420b8021494Sopenharmony_ci} 421b8021494Sopenharmony_ci 422b8021494Sopenharmony_ci// TODO: We implement Fcpy (amongst other things) for all FP types because it 423b8021494Sopenharmony_ci// allows us to preserve user-specified NaNs. We should come up with some 424b8021494Sopenharmony_ci// FPImmediate type to abstract this, and avoid all the duplication below (and 425b8021494Sopenharmony_ci// elsewhere). 426b8021494Sopenharmony_ci 427b8021494Sopenharmony_civoid MacroAssembler::Fcpy(const ZRegister& zd, 428b8021494Sopenharmony_ci const PRegisterM& pg, 429b8021494Sopenharmony_ci double imm) { 430b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 431b8021494Sopenharmony_ci VIXL_ASSERT(pg.IsMerging()); 432b8021494Sopenharmony_ci 433b8021494Sopenharmony_ci if (IsImmFP64(imm)) { 434b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 435b8021494Sopenharmony_ci fcpy(zd, pg, imm); 436b8021494Sopenharmony_ci return; 437b8021494Sopenharmony_ci } 438b8021494Sopenharmony_ci 439b8021494Sopenharmony_ci // As a fall-back, cast the immediate to the required lane size, and try to 440b8021494Sopenharmony_ci // encode the bit pattern using `Cpy`. 441b8021494Sopenharmony_ci Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm)); 442b8021494Sopenharmony_ci} 443b8021494Sopenharmony_ci 444b8021494Sopenharmony_civoid MacroAssembler::Fcpy(const ZRegister& zd, 445b8021494Sopenharmony_ci const PRegisterM& pg, 446b8021494Sopenharmony_ci float imm) { 447b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 448b8021494Sopenharmony_ci VIXL_ASSERT(pg.IsMerging()); 449b8021494Sopenharmony_ci 450b8021494Sopenharmony_ci if (IsImmFP32(imm)) { 451b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 452b8021494Sopenharmony_ci fcpy(zd, pg, imm); 453b8021494Sopenharmony_ci return; 454b8021494Sopenharmony_ci } 455b8021494Sopenharmony_ci 456b8021494Sopenharmony_ci // As a fall-back, cast the immediate to the required lane size, and try to 457b8021494Sopenharmony_ci // encode the bit pattern using `Cpy`. 458b8021494Sopenharmony_ci Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm)); 459b8021494Sopenharmony_ci} 460b8021494Sopenharmony_ci 461b8021494Sopenharmony_civoid MacroAssembler::Fcpy(const ZRegister& zd, 462b8021494Sopenharmony_ci const PRegisterM& pg, 463b8021494Sopenharmony_ci Float16 imm) { 464b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 465b8021494Sopenharmony_ci VIXL_ASSERT(pg.IsMerging()); 466b8021494Sopenharmony_ci 467b8021494Sopenharmony_ci if (IsImmFP16(imm)) { 468b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 469b8021494Sopenharmony_ci fcpy(zd, pg, imm); 470b8021494Sopenharmony_ci return; 471b8021494Sopenharmony_ci } 472b8021494Sopenharmony_ci 473b8021494Sopenharmony_ci // As a fall-back, cast the immediate to the required lane size, and try to 474b8021494Sopenharmony_ci // encode the bit pattern using `Cpy`. 475b8021494Sopenharmony_ci Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm)); 476b8021494Sopenharmony_ci} 477b8021494Sopenharmony_ci 478b8021494Sopenharmony_civoid MacroAssembler::Dup(const ZRegister& zd, IntegerOperand imm) { 479b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 480b8021494Sopenharmony_ci VIXL_ASSERT(imm.FitsInLane(zd)); 481b8021494Sopenharmony_ci unsigned lane_size = zd.GetLaneSizeInBits(); 482b8021494Sopenharmony_ci int imm8; 483b8021494Sopenharmony_ci int shift; 484b8021494Sopenharmony_ci if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) || 485b8021494Sopenharmony_ci imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) { 486b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 487b8021494Sopenharmony_ci dup(zd, imm8, shift); 488b8021494Sopenharmony_ci } else if (IsImmLogical(imm.AsUintN(lane_size), lane_size)) { 489b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 490b8021494Sopenharmony_ci dupm(zd, imm.AsUintN(lane_size)); 491b8021494Sopenharmony_ci } else { 492b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 493b8021494Sopenharmony_ci Register scratch = temps.AcquireRegisterToHoldLane(zd); 494b8021494Sopenharmony_ci Mov(scratch, imm); 495b8021494Sopenharmony_ci 496b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 497b8021494Sopenharmony_ci dup(zd, scratch); 498b8021494Sopenharmony_ci } 499b8021494Sopenharmony_ci} 500b8021494Sopenharmony_ci 501b8021494Sopenharmony_civoid MacroAssembler::NoncommutativeArithmeticHelper( 502b8021494Sopenharmony_ci const ZRegister& zd, 503b8021494Sopenharmony_ci const PRegisterM& pg, 504b8021494Sopenharmony_ci const ZRegister& zn, 505b8021494Sopenharmony_ci const ZRegister& zm, 506b8021494Sopenharmony_ci SVEArithPredicatedFn fn, 507b8021494Sopenharmony_ci SVEArithPredicatedFn rev_fn) { 508b8021494Sopenharmony_ci if (zd.Aliases(zn)) { 509b8021494Sopenharmony_ci // E.g. zd = zd / zm 510b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 511b8021494Sopenharmony_ci (this->*fn)(zd, pg, zn, zm); 512b8021494Sopenharmony_ci } else if (zd.Aliases(zm)) { 513b8021494Sopenharmony_ci // E.g. zd = zn / zd 514b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 515b8021494Sopenharmony_ci (this->*rev_fn)(zd, pg, zm, zn); 516b8021494Sopenharmony_ci } else { 517b8021494Sopenharmony_ci // E.g. zd = zn / zm 518b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, pg, zn); 519b8021494Sopenharmony_ci (this->*fn)(zd, pg, zd, zm); 520b8021494Sopenharmony_ci } 521b8021494Sopenharmony_ci} 522b8021494Sopenharmony_ci 523b8021494Sopenharmony_civoid MacroAssembler::FPCommutativeArithmeticHelper( 524b8021494Sopenharmony_ci const ZRegister& zd, 525b8021494Sopenharmony_ci const PRegisterM& pg, 526b8021494Sopenharmony_ci const ZRegister& zn, 527b8021494Sopenharmony_ci const ZRegister& zm, 528b8021494Sopenharmony_ci SVEArithPredicatedFn fn, 529b8021494Sopenharmony_ci FPMacroNaNPropagationOption nan_option) { 530b8021494Sopenharmony_ci ResolveFPNaNPropagationOption(&nan_option); 531b8021494Sopenharmony_ci 532b8021494Sopenharmony_ci if (zd.Aliases(zn)) { 533b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 534b8021494Sopenharmony_ci (this->*fn)(zd, pg, zd, zm); 535b8021494Sopenharmony_ci } else if (zd.Aliases(zm)) { 536b8021494Sopenharmony_ci switch (nan_option) { 537b8021494Sopenharmony_ci case FastNaNPropagation: { 538b8021494Sopenharmony_ci // Swap the arguments. 539b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 540b8021494Sopenharmony_ci (this->*fn)(zd, pg, zd, zn); 541b8021494Sopenharmony_ci return; 542b8021494Sopenharmony_ci } 543b8021494Sopenharmony_ci case StrictNaNPropagation: { 544b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 545b8021494Sopenharmony_ci // Use a scratch register to keep the argument order exactly as 546b8021494Sopenharmony_ci // specified. 547b8021494Sopenharmony_ci ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn); 548b8021494Sopenharmony_ci { 549b8021494Sopenharmony_ci MovprfxHelperScope guard(this, scratch, pg, zn); 550b8021494Sopenharmony_ci (this->*fn)(scratch, pg, scratch, zm); 551b8021494Sopenharmony_ci } 552b8021494Sopenharmony_ci Mov(zd, scratch); 553b8021494Sopenharmony_ci return; 554b8021494Sopenharmony_ci } 555b8021494Sopenharmony_ci case NoFPMacroNaNPropagationSelected: 556b8021494Sopenharmony_ci VIXL_UNREACHABLE(); 557b8021494Sopenharmony_ci return; 558b8021494Sopenharmony_ci } 559b8021494Sopenharmony_ci } else { 560b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, pg, zn); 561b8021494Sopenharmony_ci (this->*fn)(zd, pg, zd, zm); 562b8021494Sopenharmony_ci } 563b8021494Sopenharmony_ci} 564b8021494Sopenharmony_ci 565b8021494Sopenharmony_ci// Instructions of the form "inst zda, zn, zm, #num", where they are 566b8021494Sopenharmony_ci// non-commutative and no reversed form is provided. 567b8021494Sopenharmony_ci#define VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(V) \ 568b8021494Sopenharmony_ci V(Cmla, cmla) \ 569b8021494Sopenharmony_ci V(Sqrdcmlah, sqrdcmlah) 570b8021494Sopenharmony_ci 571b8021494Sopenharmony_ci#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \ 572b8021494Sopenharmony_ci void MacroAssembler::MASMFN(const ZRegister& zd, \ 573b8021494Sopenharmony_ci const ZRegister& za, \ 574b8021494Sopenharmony_ci const ZRegister& zn, \ 575b8021494Sopenharmony_ci const ZRegister& zm, \ 576b8021494Sopenharmony_ci int imm) { \ 577b8021494Sopenharmony_ci if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \ 578b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); \ 579b8021494Sopenharmony_ci VIXL_ASSERT(AreSameLaneSize(zn, zm)); \ 580b8021494Sopenharmony_ci ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn); \ 581b8021494Sopenharmony_ci Mov(ztmp, zd.Aliases(zn) ? zn : zm); \ 582b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, za); \ 583b8021494Sopenharmony_ci ASMFN(zd, \ 584b8021494Sopenharmony_ci (zd.Aliases(zn) ? ztmp : zn), \ 585b8021494Sopenharmony_ci (zd.Aliases(zm) ? ztmp : zm), \ 586b8021494Sopenharmony_ci imm); \ 587b8021494Sopenharmony_ci } else { \ 588b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, za); \ 589b8021494Sopenharmony_ci ASMFN(zd, zn, zm, imm); \ 590b8021494Sopenharmony_ci } \ 591b8021494Sopenharmony_ci } 592b8021494Sopenharmony_ciVIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(VIXL_DEFINE_MASM_FUNC) 593b8021494Sopenharmony_ci#undef VIXL_DEFINE_MASM_FUNC 594b8021494Sopenharmony_ci 595b8021494Sopenharmony_ci// Instructions of the form "inst zda, zn, zm, #num, #num", where they are 596b8021494Sopenharmony_ci// non-commutative and no reversed form is provided. 597b8021494Sopenharmony_ci#define VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(V) \ 598b8021494Sopenharmony_ci V(Cmla, cmla) \ 599b8021494Sopenharmony_ci V(Sqrdcmlah, sqrdcmlah) 600b8021494Sopenharmony_ci 601b8021494Sopenharmony_ci// This doesn't handle zm when it's out of the range that can be encoded in 602b8021494Sopenharmony_ci// instruction. The range depends on element size: z0-z7 for H, z0-15 for S. 603b8021494Sopenharmony_ci#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \ 604b8021494Sopenharmony_ci void MacroAssembler::MASMFN(const ZRegister& zd, \ 605b8021494Sopenharmony_ci const ZRegister& za, \ 606b8021494Sopenharmony_ci const ZRegister& zn, \ 607b8021494Sopenharmony_ci const ZRegister& zm, \ 608b8021494Sopenharmony_ci int index, \ 609b8021494Sopenharmony_ci int rot) { \ 610b8021494Sopenharmony_ci if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \ 611b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); \ 612b8021494Sopenharmony_ci ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd); \ 613b8021494Sopenharmony_ci { \ 614b8021494Sopenharmony_ci MovprfxHelperScope guard(this, ztmp, za); \ 615b8021494Sopenharmony_ci ASMFN(ztmp, zn, zm, index, rot); \ 616b8021494Sopenharmony_ci } \ 617b8021494Sopenharmony_ci Mov(zd, ztmp); \ 618b8021494Sopenharmony_ci } else { \ 619b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, za); \ 620b8021494Sopenharmony_ci ASMFN(zd, zn, zm, index, rot); \ 621b8021494Sopenharmony_ci } \ 622b8021494Sopenharmony_ci } 623b8021494Sopenharmony_ciVIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(VIXL_DEFINE_MASM_FUNC) 624b8021494Sopenharmony_ci#undef VIXL_DEFINE_MASM_FUNC 625b8021494Sopenharmony_ci 626b8021494Sopenharmony_ci// Instructions of the form "inst zda, pg, zda, zn", where they are 627b8021494Sopenharmony_ci// non-commutative and no reversed form is provided. 628b8021494Sopenharmony_ci#define VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(V) \ 629b8021494Sopenharmony_ci V(Addp, addp) \ 630b8021494Sopenharmony_ci V(Bic, bic) \ 631b8021494Sopenharmony_ci V(Faddp, faddp) \ 632b8021494Sopenharmony_ci V(Fmaxnmp, fmaxnmp) \ 633b8021494Sopenharmony_ci V(Fminnmp, fminnmp) \ 634b8021494Sopenharmony_ci V(Fmaxp, fmaxp) \ 635b8021494Sopenharmony_ci V(Fminp, fminp) \ 636b8021494Sopenharmony_ci V(Fscale, fscale) \ 637b8021494Sopenharmony_ci V(Smaxp, smaxp) \ 638b8021494Sopenharmony_ci V(Sminp, sminp) \ 639b8021494Sopenharmony_ci V(Suqadd, suqadd) \ 640b8021494Sopenharmony_ci V(Umaxp, umaxp) \ 641b8021494Sopenharmony_ci V(Uminp, uminp) \ 642b8021494Sopenharmony_ci V(Usqadd, usqadd) 643b8021494Sopenharmony_ci 644b8021494Sopenharmony_ci#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \ 645b8021494Sopenharmony_ci void MacroAssembler::MASMFN(const ZRegister& zd, \ 646b8021494Sopenharmony_ci const PRegisterM& pg, \ 647b8021494Sopenharmony_ci const ZRegister& zn, \ 648b8021494Sopenharmony_ci const ZRegister& zm) { \ 649b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); \ 650b8021494Sopenharmony_ci if (zd.Aliases(zm) && !zd.Aliases(zn)) { \ 651b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); \ 652b8021494Sopenharmony_ci ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); \ 653b8021494Sopenharmony_ci Mov(scratch, zm); \ 654b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, pg, zn); \ 655b8021494Sopenharmony_ci ASMFN(zd, pg, zd, scratch); \ 656b8021494Sopenharmony_ci } else { \ 657b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, pg, zn); \ 658b8021494Sopenharmony_ci ASMFN(zd, pg, zd, zm); \ 659b8021494Sopenharmony_ci } \ 660b8021494Sopenharmony_ci } 661b8021494Sopenharmony_ciVIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC) 662b8021494Sopenharmony_ci#undef VIXL_DEFINE_MASM_FUNC 663b8021494Sopenharmony_ci 664b8021494Sopenharmony_ci// Instructions of the form "inst zda, pg, zda, zn", where they are 665b8021494Sopenharmony_ci// non-commutative and a reversed form is provided. 666b8021494Sopenharmony_ci#define VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(V) \ 667b8021494Sopenharmony_ci V(Asr, asr) \ 668b8021494Sopenharmony_ci V(Fdiv, fdiv) \ 669b8021494Sopenharmony_ci V(Fsub, fsub) \ 670b8021494Sopenharmony_ci V(Lsl, lsl) \ 671b8021494Sopenharmony_ci V(Lsr, lsr) \ 672b8021494Sopenharmony_ci V(Sdiv, sdiv) \ 673b8021494Sopenharmony_ci V(Shsub, shsub) \ 674b8021494Sopenharmony_ci V(Sqrshl, sqrshl) \ 675b8021494Sopenharmony_ci V(Sqshl, sqshl) \ 676b8021494Sopenharmony_ci V(Sqsub, sqsub) \ 677b8021494Sopenharmony_ci V(Srshl, srshl) \ 678b8021494Sopenharmony_ci V(Sub, sub) \ 679b8021494Sopenharmony_ci V(Udiv, udiv) \ 680b8021494Sopenharmony_ci V(Uhsub, uhsub) \ 681b8021494Sopenharmony_ci V(Uqrshl, uqrshl) \ 682b8021494Sopenharmony_ci V(Uqshl, uqshl) \ 683b8021494Sopenharmony_ci V(Uqsub, uqsub) \ 684b8021494Sopenharmony_ci V(Urshl, urshl) 685b8021494Sopenharmony_ci 686b8021494Sopenharmony_ci#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \ 687b8021494Sopenharmony_ci void MacroAssembler::MASMFN(const ZRegister& zd, \ 688b8021494Sopenharmony_ci const PRegisterM& pg, \ 689b8021494Sopenharmony_ci const ZRegister& zn, \ 690b8021494Sopenharmony_ci const ZRegister& zm) { \ 691b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); \ 692b8021494Sopenharmony_ci NoncommutativeArithmeticHelper(zd, \ 693b8021494Sopenharmony_ci pg, \ 694b8021494Sopenharmony_ci zn, \ 695b8021494Sopenharmony_ci zm, \ 696b8021494Sopenharmony_ci static_cast<SVEArithPredicatedFn>( \ 697b8021494Sopenharmony_ci &Assembler::ASMFN), \ 698b8021494Sopenharmony_ci static_cast<SVEArithPredicatedFn>( \ 699b8021494Sopenharmony_ci &Assembler::ASMFN##r)); \ 700b8021494Sopenharmony_ci } 701b8021494Sopenharmony_ciVIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC) 702b8021494Sopenharmony_ci#undef VIXL_DEFINE_MASM_FUNC 703b8021494Sopenharmony_ci 704b8021494Sopenharmony_civoid MacroAssembler::Fadd(const ZRegister& zd, 705b8021494Sopenharmony_ci const PRegisterM& pg, 706b8021494Sopenharmony_ci const ZRegister& zn, 707b8021494Sopenharmony_ci const ZRegister& zm, 708b8021494Sopenharmony_ci FPMacroNaNPropagationOption nan_option) { 709b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 710b8021494Sopenharmony_ci FPCommutativeArithmeticHelper(zd, 711b8021494Sopenharmony_ci pg, 712b8021494Sopenharmony_ci zn, 713b8021494Sopenharmony_ci zm, 714b8021494Sopenharmony_ci static_cast<SVEArithPredicatedFn>( 715b8021494Sopenharmony_ci &Assembler::fadd), 716b8021494Sopenharmony_ci nan_option); 717b8021494Sopenharmony_ci} 718b8021494Sopenharmony_ci 719b8021494Sopenharmony_civoid MacroAssembler::Fabd(const ZRegister& zd, 720b8021494Sopenharmony_ci const PRegisterM& pg, 721b8021494Sopenharmony_ci const ZRegister& zn, 722b8021494Sopenharmony_ci const ZRegister& zm, 723b8021494Sopenharmony_ci FPMacroNaNPropagationOption nan_option) { 724b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 725b8021494Sopenharmony_ci FPCommutativeArithmeticHelper(zd, 726b8021494Sopenharmony_ci pg, 727b8021494Sopenharmony_ci zn, 728b8021494Sopenharmony_ci zm, 729b8021494Sopenharmony_ci static_cast<SVEArithPredicatedFn>( 730b8021494Sopenharmony_ci &Assembler::fabd), 731b8021494Sopenharmony_ci nan_option); 732b8021494Sopenharmony_ci} 733b8021494Sopenharmony_ci 734b8021494Sopenharmony_civoid MacroAssembler::Fmul(const ZRegister& zd, 735b8021494Sopenharmony_ci const PRegisterM& pg, 736b8021494Sopenharmony_ci const ZRegister& zn, 737b8021494Sopenharmony_ci const ZRegister& zm, 738b8021494Sopenharmony_ci FPMacroNaNPropagationOption nan_option) { 739b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 740b8021494Sopenharmony_ci FPCommutativeArithmeticHelper(zd, 741b8021494Sopenharmony_ci pg, 742b8021494Sopenharmony_ci zn, 743b8021494Sopenharmony_ci zm, 744b8021494Sopenharmony_ci static_cast<SVEArithPredicatedFn>( 745b8021494Sopenharmony_ci &Assembler::fmul), 746b8021494Sopenharmony_ci nan_option); 747b8021494Sopenharmony_ci} 748b8021494Sopenharmony_ci 749b8021494Sopenharmony_civoid MacroAssembler::Fmulx(const ZRegister& zd, 750b8021494Sopenharmony_ci const PRegisterM& pg, 751b8021494Sopenharmony_ci const ZRegister& zn, 752b8021494Sopenharmony_ci const ZRegister& zm, 753b8021494Sopenharmony_ci FPMacroNaNPropagationOption nan_option) { 754b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 755b8021494Sopenharmony_ci FPCommutativeArithmeticHelper(zd, 756b8021494Sopenharmony_ci pg, 757b8021494Sopenharmony_ci zn, 758b8021494Sopenharmony_ci zm, 759b8021494Sopenharmony_ci static_cast<SVEArithPredicatedFn>( 760b8021494Sopenharmony_ci &Assembler::fmulx), 761b8021494Sopenharmony_ci nan_option); 762b8021494Sopenharmony_ci} 763b8021494Sopenharmony_ci 764b8021494Sopenharmony_civoid MacroAssembler::Fmax(const ZRegister& zd, 765b8021494Sopenharmony_ci const PRegisterM& pg, 766b8021494Sopenharmony_ci const ZRegister& zn, 767b8021494Sopenharmony_ci const ZRegister& zm, 768b8021494Sopenharmony_ci FPMacroNaNPropagationOption nan_option) { 769b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 770b8021494Sopenharmony_ci FPCommutativeArithmeticHelper(zd, 771b8021494Sopenharmony_ci pg, 772b8021494Sopenharmony_ci zn, 773b8021494Sopenharmony_ci zm, 774b8021494Sopenharmony_ci static_cast<SVEArithPredicatedFn>( 775b8021494Sopenharmony_ci &Assembler::fmax), 776b8021494Sopenharmony_ci nan_option); 777b8021494Sopenharmony_ci} 778b8021494Sopenharmony_ci 779b8021494Sopenharmony_civoid MacroAssembler::Fmin(const ZRegister& zd, 780b8021494Sopenharmony_ci const PRegisterM& pg, 781b8021494Sopenharmony_ci const ZRegister& zn, 782b8021494Sopenharmony_ci const ZRegister& zm, 783b8021494Sopenharmony_ci FPMacroNaNPropagationOption nan_option) { 784b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 785b8021494Sopenharmony_ci FPCommutativeArithmeticHelper(zd, 786b8021494Sopenharmony_ci pg, 787b8021494Sopenharmony_ci zn, 788b8021494Sopenharmony_ci zm, 789b8021494Sopenharmony_ci static_cast<SVEArithPredicatedFn>( 790b8021494Sopenharmony_ci &Assembler::fmin), 791b8021494Sopenharmony_ci nan_option); 792b8021494Sopenharmony_ci} 793b8021494Sopenharmony_ci 794b8021494Sopenharmony_civoid MacroAssembler::Fmaxnm(const ZRegister& zd, 795b8021494Sopenharmony_ci const PRegisterM& pg, 796b8021494Sopenharmony_ci const ZRegister& zn, 797b8021494Sopenharmony_ci const ZRegister& zm, 798b8021494Sopenharmony_ci FPMacroNaNPropagationOption nan_option) { 799b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 800b8021494Sopenharmony_ci FPCommutativeArithmeticHelper(zd, 801b8021494Sopenharmony_ci pg, 802b8021494Sopenharmony_ci zn, 803b8021494Sopenharmony_ci zm, 804b8021494Sopenharmony_ci static_cast<SVEArithPredicatedFn>( 805b8021494Sopenharmony_ci &Assembler::fmaxnm), 806b8021494Sopenharmony_ci nan_option); 807b8021494Sopenharmony_ci} 808b8021494Sopenharmony_ci 809b8021494Sopenharmony_civoid MacroAssembler::Fminnm(const ZRegister& zd, 810b8021494Sopenharmony_ci const PRegisterM& pg, 811b8021494Sopenharmony_ci const ZRegister& zn, 812b8021494Sopenharmony_ci const ZRegister& zm, 813b8021494Sopenharmony_ci FPMacroNaNPropagationOption nan_option) { 814b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 815b8021494Sopenharmony_ci FPCommutativeArithmeticHelper(zd, 816b8021494Sopenharmony_ci pg, 817b8021494Sopenharmony_ci zn, 818b8021494Sopenharmony_ci zm, 819b8021494Sopenharmony_ci static_cast<SVEArithPredicatedFn>( 820b8021494Sopenharmony_ci &Assembler::fminnm), 821b8021494Sopenharmony_ci nan_option); 822b8021494Sopenharmony_ci} 823b8021494Sopenharmony_ci 824b8021494Sopenharmony_civoid MacroAssembler::Fdup(const ZRegister& zd, double imm) { 825b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 826b8021494Sopenharmony_ci 827b8021494Sopenharmony_ci switch (zd.GetLaneSizeInBits()) { 828b8021494Sopenharmony_ci case kHRegSize: 829b8021494Sopenharmony_ci Fdup(zd, Float16(imm)); 830b8021494Sopenharmony_ci break; 831b8021494Sopenharmony_ci case kSRegSize: 832b8021494Sopenharmony_ci Fdup(zd, static_cast<float>(imm)); 833b8021494Sopenharmony_ci break; 834b8021494Sopenharmony_ci case kDRegSize: 835b8021494Sopenharmony_ci uint64_t bits = DoubleToRawbits(imm); 836b8021494Sopenharmony_ci if (IsImmFP64(bits)) { 837b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 838b8021494Sopenharmony_ci fdup(zd, imm); 839b8021494Sopenharmony_ci } else { 840b8021494Sopenharmony_ci Dup(zd, bits); 841b8021494Sopenharmony_ci } 842b8021494Sopenharmony_ci break; 843b8021494Sopenharmony_ci } 844b8021494Sopenharmony_ci} 845b8021494Sopenharmony_ci 846b8021494Sopenharmony_civoid MacroAssembler::Fdup(const ZRegister& zd, float imm) { 847b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 848b8021494Sopenharmony_ci 849b8021494Sopenharmony_ci switch (zd.GetLaneSizeInBits()) { 850b8021494Sopenharmony_ci case kHRegSize: 851b8021494Sopenharmony_ci Fdup(zd, Float16(imm)); 852b8021494Sopenharmony_ci break; 853b8021494Sopenharmony_ci case kSRegSize: 854b8021494Sopenharmony_ci if (IsImmFP32(imm)) { 855b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 856b8021494Sopenharmony_ci fdup(zd, imm); 857b8021494Sopenharmony_ci } else { 858b8021494Sopenharmony_ci Dup(zd, FloatToRawbits(imm)); 859b8021494Sopenharmony_ci } 860b8021494Sopenharmony_ci break; 861b8021494Sopenharmony_ci case kDRegSize: 862b8021494Sopenharmony_ci Fdup(zd, static_cast<double>(imm)); 863b8021494Sopenharmony_ci break; 864b8021494Sopenharmony_ci } 865b8021494Sopenharmony_ci} 866b8021494Sopenharmony_ci 867b8021494Sopenharmony_civoid MacroAssembler::Fdup(const ZRegister& zd, Float16 imm) { 868b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 869b8021494Sopenharmony_ci 870b8021494Sopenharmony_ci switch (zd.GetLaneSizeInBits()) { 871b8021494Sopenharmony_ci case kHRegSize: 872b8021494Sopenharmony_ci if (IsImmFP16(imm)) { 873b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 874b8021494Sopenharmony_ci fdup(zd, imm); 875b8021494Sopenharmony_ci } else { 876b8021494Sopenharmony_ci Dup(zd, Float16ToRawbits(imm)); 877b8021494Sopenharmony_ci } 878b8021494Sopenharmony_ci break; 879b8021494Sopenharmony_ci case kSRegSize: 880b8021494Sopenharmony_ci Fdup(zd, FPToFloat(imm, kIgnoreDefaultNaN)); 881b8021494Sopenharmony_ci break; 882b8021494Sopenharmony_ci case kDRegSize: 883b8021494Sopenharmony_ci Fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN)); 884b8021494Sopenharmony_ci break; 885b8021494Sopenharmony_ci } 886b8021494Sopenharmony_ci} 887b8021494Sopenharmony_ci 888b8021494Sopenharmony_civoid MacroAssembler::Index(const ZRegister& zd, 889b8021494Sopenharmony_ci const Operand& start, 890b8021494Sopenharmony_ci const Operand& step) { 891b8021494Sopenharmony_ci class IndexOperand : public Operand { 892b8021494Sopenharmony_ci public: 893b8021494Sopenharmony_ci static IndexOperand Prepare(MacroAssembler* masm, 894b8021494Sopenharmony_ci UseScratchRegisterScope* temps, 895b8021494Sopenharmony_ci const Operand& op, 896b8021494Sopenharmony_ci const ZRegister& zd_inner) { 897b8021494Sopenharmony_ci // Look for encodable immediates. 898b8021494Sopenharmony_ci int imm; 899b8021494Sopenharmony_ci if (op.IsImmediate()) { 900b8021494Sopenharmony_ci if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd_inner, &imm)) { 901b8021494Sopenharmony_ci return IndexOperand(imm); 902b8021494Sopenharmony_ci } 903b8021494Sopenharmony_ci Register scratch = temps->AcquireRegisterToHoldLane(zd_inner); 904b8021494Sopenharmony_ci masm->Mov(scratch, op); 905b8021494Sopenharmony_ci return IndexOperand(scratch); 906b8021494Sopenharmony_ci } else { 907b8021494Sopenharmony_ci // Plain registers can be encoded directly. 908b8021494Sopenharmony_ci VIXL_ASSERT(op.IsPlainRegister()); 909b8021494Sopenharmony_ci return IndexOperand(op.GetRegister()); 910b8021494Sopenharmony_ci } 911b8021494Sopenharmony_ci } 912b8021494Sopenharmony_ci 913b8021494Sopenharmony_ci int GetImm5() const { 914b8021494Sopenharmony_ci int64_t imm = GetImmediate(); 915b8021494Sopenharmony_ci VIXL_ASSERT(IsInt5(imm)); 916b8021494Sopenharmony_ci return static_cast<int>(imm); 917b8021494Sopenharmony_ci } 918b8021494Sopenharmony_ci 919b8021494Sopenharmony_ci private: 920b8021494Sopenharmony_ci explicit IndexOperand(const Register& reg) : Operand(reg) {} 921b8021494Sopenharmony_ci explicit IndexOperand(int64_t imm) : Operand(imm) {} 922b8021494Sopenharmony_ci }; 923b8021494Sopenharmony_ci 924b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 925b8021494Sopenharmony_ci IndexOperand start_enc = IndexOperand::Prepare(this, &temps, start, zd); 926b8021494Sopenharmony_ci IndexOperand step_enc = IndexOperand::Prepare(this, &temps, step, zd); 927b8021494Sopenharmony_ci 928b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 929b8021494Sopenharmony_ci if (start_enc.IsImmediate()) { 930b8021494Sopenharmony_ci if (step_enc.IsImmediate()) { 931b8021494Sopenharmony_ci index(zd, start_enc.GetImm5(), step_enc.GetImm5()); 932b8021494Sopenharmony_ci } else { 933b8021494Sopenharmony_ci index(zd, start_enc.GetImm5(), step_enc.GetRegister()); 934b8021494Sopenharmony_ci } 935b8021494Sopenharmony_ci } else { 936b8021494Sopenharmony_ci if (step_enc.IsImmediate()) { 937b8021494Sopenharmony_ci index(zd, start_enc.GetRegister(), step_enc.GetImm5()); 938b8021494Sopenharmony_ci } else { 939b8021494Sopenharmony_ci index(zd, start_enc.GetRegister(), step_enc.GetRegister()); 940b8021494Sopenharmony_ci } 941b8021494Sopenharmony_ci } 942b8021494Sopenharmony_ci} 943b8021494Sopenharmony_ci 944b8021494Sopenharmony_civoid MacroAssembler::Insr(const ZRegister& zdn, IntegerOperand imm) { 945b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 946b8021494Sopenharmony_ci VIXL_ASSERT(imm.FitsInLane(zdn)); 947b8021494Sopenharmony_ci 948b8021494Sopenharmony_ci if (imm.IsZero()) { 949b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 950b8021494Sopenharmony_ci insr(zdn, xzr); 951b8021494Sopenharmony_ci return; 952b8021494Sopenharmony_ci } 953b8021494Sopenharmony_ci 954b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 955b8021494Sopenharmony_ci Register scratch = temps.AcquireRegisterToHoldLane(zdn); 956b8021494Sopenharmony_ci 957b8021494Sopenharmony_ci // TODO: There are many cases where we could optimise immediates, such as by 958b8021494Sopenharmony_ci // detecting repeating patterns or FP immediates. We should optimise and 959b8021494Sopenharmony_ci // abstract this for use in other SVE mov-immediate-like macros. 960b8021494Sopenharmony_ci Mov(scratch, imm); 961b8021494Sopenharmony_ci 962b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 963b8021494Sopenharmony_ci insr(zdn, scratch); 964b8021494Sopenharmony_ci} 965b8021494Sopenharmony_ci 966b8021494Sopenharmony_civoid MacroAssembler::Mla(const ZRegister& zd, 967b8021494Sopenharmony_ci const PRegisterM& pg, 968b8021494Sopenharmony_ci const ZRegister& za, 969b8021494Sopenharmony_ci const ZRegister& zn, 970b8021494Sopenharmony_ci const ZRegister& zm) { 971b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 972b8021494Sopenharmony_ci if (zd.Aliases(za)) { 973b8021494Sopenharmony_ci // zda = zda + (zn * zm) 974b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 975b8021494Sopenharmony_ci mla(zd, pg, zn, zm); 976b8021494Sopenharmony_ci } else if (zd.Aliases(zn)) { 977b8021494Sopenharmony_ci // zdn = za + (zdn * zm) 978b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 979b8021494Sopenharmony_ci mad(zd, pg, zm, za); 980b8021494Sopenharmony_ci } else if (zd.Aliases(zm)) { 981b8021494Sopenharmony_ci // Multiplication is commutative, so we can swap zn and zm. 982b8021494Sopenharmony_ci // zdm = za + (zdm * zn) 983b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 984b8021494Sopenharmony_ci mad(zd, pg, zn, za); 985b8021494Sopenharmony_ci } else { 986b8021494Sopenharmony_ci // zd = za + (zn * zm) 987b8021494Sopenharmony_ci ExactAssemblyScope guard(this, 2 * kInstructionSize); 988b8021494Sopenharmony_ci movprfx(zd, pg, za); 989b8021494Sopenharmony_ci mla(zd, pg, zn, zm); 990b8021494Sopenharmony_ci } 991b8021494Sopenharmony_ci} 992b8021494Sopenharmony_ci 993b8021494Sopenharmony_civoid MacroAssembler::Mls(const ZRegister& zd, 994b8021494Sopenharmony_ci const PRegisterM& pg, 995b8021494Sopenharmony_ci const ZRegister& za, 996b8021494Sopenharmony_ci const ZRegister& zn, 997b8021494Sopenharmony_ci const ZRegister& zm) { 998b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 999b8021494Sopenharmony_ci if (zd.Aliases(za)) { 1000b8021494Sopenharmony_ci // zda = zda - (zn * zm) 1001b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1002b8021494Sopenharmony_ci mls(zd, pg, zn, zm); 1003b8021494Sopenharmony_ci } else if (zd.Aliases(zn)) { 1004b8021494Sopenharmony_ci // zdn = za - (zdn * zm) 1005b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1006b8021494Sopenharmony_ci msb(zd, pg, zm, za); 1007b8021494Sopenharmony_ci } else if (zd.Aliases(zm)) { 1008b8021494Sopenharmony_ci // Multiplication is commutative, so we can swap zn and zm. 1009b8021494Sopenharmony_ci // zdm = za - (zdm * zn) 1010b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1011b8021494Sopenharmony_ci msb(zd, pg, zn, za); 1012b8021494Sopenharmony_ci } else { 1013b8021494Sopenharmony_ci // zd = za - (zn * zm) 1014b8021494Sopenharmony_ci ExactAssemblyScope guard(this, 2 * kInstructionSize); 1015b8021494Sopenharmony_ci movprfx(zd, pg, za); 1016b8021494Sopenharmony_ci mls(zd, pg, zn, zm); 1017b8021494Sopenharmony_ci } 1018b8021494Sopenharmony_ci} 1019b8021494Sopenharmony_ci 1020b8021494Sopenharmony_civoid MacroAssembler::CompareHelper(Condition cond, 1021b8021494Sopenharmony_ci const PRegisterWithLaneSize& pd, 1022b8021494Sopenharmony_ci const PRegisterZ& pg, 1023b8021494Sopenharmony_ci const ZRegister& zn, 1024b8021494Sopenharmony_ci IntegerOperand imm) { 1025b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1026b8021494Sopenharmony_ci ZRegister zm = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); 1027b8021494Sopenharmony_ci Dup(zm, imm); 1028b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1029b8021494Sopenharmony_ci cmp(cond, pd, pg, zn, zm); 1030b8021494Sopenharmony_ci} 1031b8021494Sopenharmony_ci 1032b8021494Sopenharmony_civoid MacroAssembler::Pfirst(const PRegisterWithLaneSize& pd, 1033b8021494Sopenharmony_ci const PRegister& pg, 1034b8021494Sopenharmony_ci const PRegisterWithLaneSize& pn) { 1035b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1036b8021494Sopenharmony_ci VIXL_ASSERT(pd.IsLaneSizeB()); 1037b8021494Sopenharmony_ci VIXL_ASSERT(pn.IsLaneSizeB()); 1038b8021494Sopenharmony_ci if (pd.Is(pn)) { 1039b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1040b8021494Sopenharmony_ci pfirst(pd, pg, pn); 1041b8021494Sopenharmony_ci } else { 1042b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1043b8021494Sopenharmony_ci PRegister temp_pg = pg; 1044b8021494Sopenharmony_ci if (pd.Aliases(pg)) { 1045b8021494Sopenharmony_ci temp_pg = temps.AcquireP(); 1046b8021494Sopenharmony_ci Mov(temp_pg.VnB(), pg.VnB()); 1047b8021494Sopenharmony_ci } 1048b8021494Sopenharmony_ci Mov(pd, pn); 1049b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1050b8021494Sopenharmony_ci pfirst(pd, temp_pg, pd); 1051b8021494Sopenharmony_ci } 1052b8021494Sopenharmony_ci} 1053b8021494Sopenharmony_ci 1054b8021494Sopenharmony_civoid MacroAssembler::Pnext(const PRegisterWithLaneSize& pd, 1055b8021494Sopenharmony_ci const PRegister& pg, 1056b8021494Sopenharmony_ci const PRegisterWithLaneSize& pn) { 1057b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1058b8021494Sopenharmony_ci VIXL_ASSERT(AreSameFormat(pd, pn)); 1059b8021494Sopenharmony_ci if (pd.Is(pn)) { 1060b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1061b8021494Sopenharmony_ci pnext(pd, pg, pn); 1062b8021494Sopenharmony_ci } else { 1063b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1064b8021494Sopenharmony_ci PRegister temp_pg = pg; 1065b8021494Sopenharmony_ci if (pd.Aliases(pg)) { 1066b8021494Sopenharmony_ci temp_pg = temps.AcquireP(); 1067b8021494Sopenharmony_ci Mov(temp_pg.VnB(), pg.VnB()); 1068b8021494Sopenharmony_ci } 1069b8021494Sopenharmony_ci Mov(pd.VnB(), pn.VnB()); 1070b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1071b8021494Sopenharmony_ci pnext(pd, temp_pg, pd); 1072b8021494Sopenharmony_ci } 1073b8021494Sopenharmony_ci} 1074b8021494Sopenharmony_ci 1075b8021494Sopenharmony_civoid MacroAssembler::Ptrue(const PRegisterWithLaneSize& pd, 1076b8021494Sopenharmony_ci SVEPredicateConstraint pattern, 1077b8021494Sopenharmony_ci FlagsUpdate s) { 1078b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1079b8021494Sopenharmony_ci switch (s) { 1080b8021494Sopenharmony_ci case LeaveFlags: 1081b8021494Sopenharmony_ci Ptrue(pd, pattern); 1082b8021494Sopenharmony_ci return; 1083b8021494Sopenharmony_ci case SetFlags: 1084b8021494Sopenharmony_ci Ptrues(pd, pattern); 1085b8021494Sopenharmony_ci return; 1086b8021494Sopenharmony_ci } 1087b8021494Sopenharmony_ci VIXL_UNREACHABLE(); 1088b8021494Sopenharmony_ci} 1089b8021494Sopenharmony_ci 1090b8021494Sopenharmony_civoid MacroAssembler::Sub(const ZRegister& zd, 1091b8021494Sopenharmony_ci IntegerOperand imm, 1092b8021494Sopenharmony_ci const ZRegister& zm) { 1093b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1094b8021494Sopenharmony_ci 1095b8021494Sopenharmony_ci int imm8; 1096b8021494Sopenharmony_ci int shift = -1; 1097b8021494Sopenharmony_ci if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) || 1098b8021494Sopenharmony_ci imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) { 1099b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, zm); 1100b8021494Sopenharmony_ci subr(zd, zd, imm8, shift); 1101b8021494Sopenharmony_ci } else { 1102b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1103b8021494Sopenharmony_ci ZRegister scratch = temps.AcquireZ().WithLaneSize(zm.GetLaneSizeInBits()); 1104b8021494Sopenharmony_ci Dup(scratch, imm); 1105b8021494Sopenharmony_ci 1106b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1107b8021494Sopenharmony_ci sub(zd, scratch, zm); 1108b8021494Sopenharmony_ci } 1109b8021494Sopenharmony_ci} 1110b8021494Sopenharmony_ci 1111b8021494Sopenharmony_civoid MacroAssembler::SVELoadBroadcastImmHelper(const ZRegister& zt, 1112b8021494Sopenharmony_ci const PRegisterZ& pg, 1113b8021494Sopenharmony_ci const SVEMemOperand& addr, 1114b8021494Sopenharmony_ci SVELoadBroadcastFn fn, 1115b8021494Sopenharmony_ci int divisor) { 1116b8021494Sopenharmony_ci VIXL_ASSERT(addr.IsScalarPlusImmediate()); 1117b8021494Sopenharmony_ci int64_t imm = addr.GetImmediateOffset(); 1118b8021494Sopenharmony_ci if ((imm % divisor == 0) && IsUint6(imm / divisor)) { 1119b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1120b8021494Sopenharmony_ci (this->*fn)(zt, pg, addr); 1121b8021494Sopenharmony_ci } else { 1122b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1123b8021494Sopenharmony_ci Register scratch = temps.AcquireX(); 1124b8021494Sopenharmony_ci CalculateSVEAddress(scratch, addr, zt); 1125b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1126b8021494Sopenharmony_ci (this->*fn)(zt, pg, SVEMemOperand(scratch)); 1127b8021494Sopenharmony_ci } 1128b8021494Sopenharmony_ci} 1129b8021494Sopenharmony_ci 1130b8021494Sopenharmony_civoid MacroAssembler::SVELoadStoreScalarImmHelper(const CPURegister& rt, 1131b8021494Sopenharmony_ci const SVEMemOperand& addr, 1132b8021494Sopenharmony_ci SVELoadStoreFn fn) { 1133b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1134b8021494Sopenharmony_ci VIXL_ASSERT(rt.IsZRegister() || rt.IsPRegister()); 1135b8021494Sopenharmony_ci 1136b8021494Sopenharmony_ci if (addr.IsPlainScalar() || 1137b8021494Sopenharmony_ci (addr.IsScalarPlusImmediate() && IsInt9(addr.GetImmediateOffset()) && 1138b8021494Sopenharmony_ci addr.IsMulVl())) { 1139b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1140b8021494Sopenharmony_ci (this->*fn)(rt, addr); 1141b8021494Sopenharmony_ci return; 1142b8021494Sopenharmony_ci } 1143b8021494Sopenharmony_ci 1144b8021494Sopenharmony_ci if (addr.IsEquivalentToScalar()) { 1145b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1146b8021494Sopenharmony_ci (this->*fn)(rt, SVEMemOperand(addr.GetScalarBase())); 1147b8021494Sopenharmony_ci return; 1148b8021494Sopenharmony_ci } 1149b8021494Sopenharmony_ci 1150b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1151b8021494Sopenharmony_ci Register scratch = temps.AcquireX(); 1152b8021494Sopenharmony_ci CalculateSVEAddress(scratch, addr, rt); 1153b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1154b8021494Sopenharmony_ci (this->*fn)(rt, SVEMemOperand(scratch)); 1155b8021494Sopenharmony_ci} 1156b8021494Sopenharmony_ci 1157b8021494Sopenharmony_citemplate <typename Tg, typename Tf> 1158b8021494Sopenharmony_civoid MacroAssembler::SVELoadStoreNTBroadcastQOHelper( 1159b8021494Sopenharmony_ci const ZRegister& zt, 1160b8021494Sopenharmony_ci const Tg& pg, 1161b8021494Sopenharmony_ci const SVEMemOperand& addr, 1162b8021494Sopenharmony_ci Tf fn, 1163b8021494Sopenharmony_ci int imm_bits, 1164b8021494Sopenharmony_ci int shift_amount, 1165b8021494Sopenharmony_ci SVEOffsetModifier supported_modifier, 1166b8021494Sopenharmony_ci int vl_divisor_log2) { 1167b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1168b8021494Sopenharmony_ci int imm_divisor = 1 << shift_amount; 1169b8021494Sopenharmony_ci 1170b8021494Sopenharmony_ci if (addr.IsPlainScalar() || 1171b8021494Sopenharmony_ci (addr.IsScalarPlusImmediate() && 1172b8021494Sopenharmony_ci IsIntN(imm_bits, addr.GetImmediateOffset() / imm_divisor) && 1173b8021494Sopenharmony_ci ((addr.GetImmediateOffset() % imm_divisor) == 0) && 1174b8021494Sopenharmony_ci (addr.GetOffsetModifier() == supported_modifier))) { 1175b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1176b8021494Sopenharmony_ci (this->*fn)(zt, pg, addr); 1177b8021494Sopenharmony_ci return; 1178b8021494Sopenharmony_ci } 1179b8021494Sopenharmony_ci 1180b8021494Sopenharmony_ci if (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() && 1181b8021494Sopenharmony_ci addr.IsEquivalentToLSL(zt.GetLaneSizeInBytesLog2())) { 1182b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1183b8021494Sopenharmony_ci (this->*fn)(zt, pg, addr); 1184b8021494Sopenharmony_ci return; 1185b8021494Sopenharmony_ci } 1186b8021494Sopenharmony_ci 1187b8021494Sopenharmony_ci if (addr.IsEquivalentToScalar()) { 1188b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1189b8021494Sopenharmony_ci (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase())); 1190b8021494Sopenharmony_ci return; 1191b8021494Sopenharmony_ci } 1192b8021494Sopenharmony_ci 1193b8021494Sopenharmony_ci if (addr.IsMulVl() && (supported_modifier != SVE_MUL_VL) && 1194b8021494Sopenharmony_ci (vl_divisor_log2 == -1)) { 1195b8021494Sopenharmony_ci // We don't handle [x0, #imm, MUL VL] if the in-memory access size is not VL 1196b8021494Sopenharmony_ci // dependent. 1197b8021494Sopenharmony_ci VIXL_UNIMPLEMENTED(); 1198b8021494Sopenharmony_ci } 1199b8021494Sopenharmony_ci 1200b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1201b8021494Sopenharmony_ci Register scratch = temps.AcquireX(); 1202b8021494Sopenharmony_ci CalculateSVEAddress(scratch, addr, vl_divisor_log2); 1203b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1204b8021494Sopenharmony_ci (this->*fn)(zt, pg, SVEMemOperand(scratch)); 1205b8021494Sopenharmony_ci} 1206b8021494Sopenharmony_ci 1207b8021494Sopenharmony_citemplate <typename Tg, typename Tf> 1208b8021494Sopenharmony_civoid MacroAssembler::SVELoadStore1Helper(int msize_in_bytes_log2, 1209b8021494Sopenharmony_ci const ZRegister& zt, 1210b8021494Sopenharmony_ci const Tg& pg, 1211b8021494Sopenharmony_ci const SVEMemOperand& addr, 1212b8021494Sopenharmony_ci Tf fn) { 1213b8021494Sopenharmony_ci if (addr.IsPlainScalar() || 1214b8021494Sopenharmony_ci (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() && 1215b8021494Sopenharmony_ci addr.IsEquivalentToLSL(msize_in_bytes_log2)) || 1216b8021494Sopenharmony_ci (addr.IsScalarPlusImmediate() && IsInt4(addr.GetImmediateOffset()) && 1217b8021494Sopenharmony_ci addr.IsMulVl())) { 1218b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1219b8021494Sopenharmony_ci (this->*fn)(zt, pg, addr); 1220b8021494Sopenharmony_ci return; 1221b8021494Sopenharmony_ci } 1222b8021494Sopenharmony_ci 1223b8021494Sopenharmony_ci if (addr.IsEquivalentToScalar()) { 1224b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1225b8021494Sopenharmony_ci (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase())); 1226b8021494Sopenharmony_ci return; 1227b8021494Sopenharmony_ci } 1228b8021494Sopenharmony_ci 1229b8021494Sopenharmony_ci if (addr.IsVectorPlusImmediate()) { 1230b8021494Sopenharmony_ci uint64_t offset = addr.GetImmediateOffset(); 1231b8021494Sopenharmony_ci if (IsMultiple(offset, (1 << msize_in_bytes_log2)) && 1232b8021494Sopenharmony_ci IsUint5(offset >> msize_in_bytes_log2)) { 1233b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1234b8021494Sopenharmony_ci (this->*fn)(zt, pg, addr); 1235b8021494Sopenharmony_ci return; 1236b8021494Sopenharmony_ci } 1237b8021494Sopenharmony_ci } 1238b8021494Sopenharmony_ci 1239b8021494Sopenharmony_ci if (addr.IsScalarPlusVector()) { 1240b8021494Sopenharmony_ci VIXL_ASSERT(addr.IsScatterGather()); 1241b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1242b8021494Sopenharmony_ci (this->*fn)(zt, pg, addr); 1243b8021494Sopenharmony_ci return; 1244b8021494Sopenharmony_ci } 1245b8021494Sopenharmony_ci 1246b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1247b8021494Sopenharmony_ci if (addr.IsScatterGather()) { 1248b8021494Sopenharmony_ci // In scatter-gather modes, zt and zn/zm have the same lane size. However, 1249b8021494Sopenharmony_ci // for 32-bit accesses, the result of each lane's address calculation still 1250b8021494Sopenharmony_ci // requires 64 bits; we can't naively use `Adr` for the address calculation 1251b8021494Sopenharmony_ci // because it would truncate each address to 32 bits. 1252b8021494Sopenharmony_ci 1253b8021494Sopenharmony_ci if (addr.IsVectorPlusImmediate()) { 1254b8021494Sopenharmony_ci // Synthesise the immediate in an X register, then use a 1255b8021494Sopenharmony_ci // scalar-plus-vector access with the original vector. 1256b8021494Sopenharmony_ci Register scratch = temps.AcquireX(); 1257b8021494Sopenharmony_ci Mov(scratch, addr.GetImmediateOffset()); 1258b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1259b8021494Sopenharmony_ci SVEOffsetModifier om = 1260b8021494Sopenharmony_ci zt.IsLaneSizeS() ? SVE_UXTW : NO_SVE_OFFSET_MODIFIER; 1261b8021494Sopenharmony_ci (this->*fn)(zt, pg, SVEMemOperand(scratch, addr.GetVectorBase(), om)); 1262b8021494Sopenharmony_ci return; 1263b8021494Sopenharmony_ci } 1264b8021494Sopenharmony_ci 1265b8021494Sopenharmony_ci VIXL_UNIMPLEMENTED(); 1266b8021494Sopenharmony_ci } else { 1267b8021494Sopenharmony_ci Register scratch = temps.AcquireX(); 1268b8021494Sopenharmony_ci // TODO: If we have an immediate offset that is a multiple of 1269b8021494Sopenharmony_ci // msize_in_bytes, we can use Rdvl/Rdpl and a scalar-plus-scalar form to 1270b8021494Sopenharmony_ci // save an instruction. 1271b8021494Sopenharmony_ci int vl_divisor_log2 = zt.GetLaneSizeInBytesLog2() - msize_in_bytes_log2; 1272b8021494Sopenharmony_ci CalculateSVEAddress(scratch, addr, vl_divisor_log2); 1273b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1274b8021494Sopenharmony_ci (this->*fn)(zt, pg, SVEMemOperand(scratch)); 1275b8021494Sopenharmony_ci } 1276b8021494Sopenharmony_ci} 1277b8021494Sopenharmony_ci 1278b8021494Sopenharmony_citemplate <typename Tf> 1279b8021494Sopenharmony_civoid MacroAssembler::SVELoadFFHelper(int msize_in_bytes_log2, 1280b8021494Sopenharmony_ci const ZRegister& zt, 1281b8021494Sopenharmony_ci const PRegisterZ& pg, 1282b8021494Sopenharmony_ci const SVEMemOperand& addr, 1283b8021494Sopenharmony_ci Tf fn) { 1284b8021494Sopenharmony_ci if (addr.IsScatterGather()) { 1285b8021494Sopenharmony_ci // Scatter-gather first-fault loads share encodings with normal loads. 1286b8021494Sopenharmony_ci SVELoadStore1Helper(msize_in_bytes_log2, zt, pg, addr, fn); 1287b8021494Sopenharmony_ci return; 1288b8021494Sopenharmony_ci } 1289b8021494Sopenharmony_ci 1290b8021494Sopenharmony_ci // Contiguous first-faulting loads have no scalar-plus-immediate form at all, 1291b8021494Sopenharmony_ci // so we don't do immediate synthesis. 1292b8021494Sopenharmony_ci 1293b8021494Sopenharmony_ci // We cannot currently distinguish "[x0]" from "[x0, #0]", and this 1294b8021494Sopenharmony_ci // is not "scalar-plus-scalar", so we have to permit `IsPlainScalar()` here. 1295b8021494Sopenharmony_ci if (addr.IsPlainScalar() || (addr.IsScalarPlusScalar() && 1296b8021494Sopenharmony_ci addr.IsEquivalentToLSL(msize_in_bytes_log2))) { 1297b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1298b8021494Sopenharmony_ci (this->*fn)(zt, pg, addr); 1299b8021494Sopenharmony_ci return; 1300b8021494Sopenharmony_ci } 1301b8021494Sopenharmony_ci 1302b8021494Sopenharmony_ci VIXL_UNIMPLEMENTED(); 1303b8021494Sopenharmony_ci} 1304b8021494Sopenharmony_ci 1305b8021494Sopenharmony_civoid MacroAssembler::Ld1b(const ZRegister& zt, 1306b8021494Sopenharmony_ci const PRegisterZ& pg, 1307b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1308b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1309b8021494Sopenharmony_ci SVELoadStore1Helper(kBRegSizeInBytesLog2, 1310b8021494Sopenharmony_ci zt, 1311b8021494Sopenharmony_ci pg, 1312b8021494Sopenharmony_ci addr, 1313b8021494Sopenharmony_ci static_cast<SVELoad1Fn>(&Assembler::ld1b)); 1314b8021494Sopenharmony_ci} 1315b8021494Sopenharmony_ci 1316b8021494Sopenharmony_civoid MacroAssembler::Ld1h(const ZRegister& zt, 1317b8021494Sopenharmony_ci const PRegisterZ& pg, 1318b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1319b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1320b8021494Sopenharmony_ci SVELoadStore1Helper(kHRegSizeInBytesLog2, 1321b8021494Sopenharmony_ci zt, 1322b8021494Sopenharmony_ci pg, 1323b8021494Sopenharmony_ci addr, 1324b8021494Sopenharmony_ci static_cast<SVELoad1Fn>(&Assembler::ld1h)); 1325b8021494Sopenharmony_ci} 1326b8021494Sopenharmony_ci 1327b8021494Sopenharmony_civoid MacroAssembler::Ld1w(const ZRegister& zt, 1328b8021494Sopenharmony_ci const PRegisterZ& pg, 1329b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1330b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1331b8021494Sopenharmony_ci SVELoadStore1Helper(kWRegSizeInBytesLog2, 1332b8021494Sopenharmony_ci zt, 1333b8021494Sopenharmony_ci pg, 1334b8021494Sopenharmony_ci addr, 1335b8021494Sopenharmony_ci static_cast<SVELoad1Fn>(&Assembler::ld1w)); 1336b8021494Sopenharmony_ci} 1337b8021494Sopenharmony_ci 1338b8021494Sopenharmony_civoid MacroAssembler::Ld1d(const ZRegister& zt, 1339b8021494Sopenharmony_ci const PRegisterZ& pg, 1340b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1341b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1342b8021494Sopenharmony_ci SVELoadStore1Helper(kDRegSizeInBytesLog2, 1343b8021494Sopenharmony_ci zt, 1344b8021494Sopenharmony_ci pg, 1345b8021494Sopenharmony_ci addr, 1346b8021494Sopenharmony_ci static_cast<SVELoad1Fn>(&Assembler::ld1d)); 1347b8021494Sopenharmony_ci} 1348b8021494Sopenharmony_ci 1349b8021494Sopenharmony_civoid MacroAssembler::Ld1sb(const ZRegister& zt, 1350b8021494Sopenharmony_ci const PRegisterZ& pg, 1351b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1352b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1353b8021494Sopenharmony_ci SVELoadStore1Helper(kBRegSizeInBytesLog2, 1354b8021494Sopenharmony_ci zt, 1355b8021494Sopenharmony_ci pg, 1356b8021494Sopenharmony_ci addr, 1357b8021494Sopenharmony_ci static_cast<SVELoad1Fn>(&Assembler::ld1sb)); 1358b8021494Sopenharmony_ci} 1359b8021494Sopenharmony_ci 1360b8021494Sopenharmony_civoid MacroAssembler::Ld1sh(const ZRegister& zt, 1361b8021494Sopenharmony_ci const PRegisterZ& pg, 1362b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1363b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1364b8021494Sopenharmony_ci SVELoadStore1Helper(kHRegSizeInBytesLog2, 1365b8021494Sopenharmony_ci zt, 1366b8021494Sopenharmony_ci pg, 1367b8021494Sopenharmony_ci addr, 1368b8021494Sopenharmony_ci static_cast<SVELoad1Fn>(&Assembler::ld1sh)); 1369b8021494Sopenharmony_ci} 1370b8021494Sopenharmony_ci 1371b8021494Sopenharmony_civoid MacroAssembler::Ld1sw(const ZRegister& zt, 1372b8021494Sopenharmony_ci const PRegisterZ& pg, 1373b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1374b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1375b8021494Sopenharmony_ci SVELoadStore1Helper(kSRegSizeInBytesLog2, 1376b8021494Sopenharmony_ci zt, 1377b8021494Sopenharmony_ci pg, 1378b8021494Sopenharmony_ci addr, 1379b8021494Sopenharmony_ci static_cast<SVELoad1Fn>(&Assembler::ld1sw)); 1380b8021494Sopenharmony_ci} 1381b8021494Sopenharmony_ci 1382b8021494Sopenharmony_civoid MacroAssembler::St1b(const ZRegister& zt, 1383b8021494Sopenharmony_ci const PRegister& pg, 1384b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1385b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1386b8021494Sopenharmony_ci SVELoadStore1Helper(kBRegSizeInBytesLog2, 1387b8021494Sopenharmony_ci zt, 1388b8021494Sopenharmony_ci pg, 1389b8021494Sopenharmony_ci addr, 1390b8021494Sopenharmony_ci static_cast<SVEStore1Fn>(&Assembler::st1b)); 1391b8021494Sopenharmony_ci} 1392b8021494Sopenharmony_ci 1393b8021494Sopenharmony_civoid MacroAssembler::St1h(const ZRegister& zt, 1394b8021494Sopenharmony_ci const PRegister& pg, 1395b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1396b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1397b8021494Sopenharmony_ci SVELoadStore1Helper(kHRegSizeInBytesLog2, 1398b8021494Sopenharmony_ci zt, 1399b8021494Sopenharmony_ci pg, 1400b8021494Sopenharmony_ci addr, 1401b8021494Sopenharmony_ci static_cast<SVEStore1Fn>(&Assembler::st1h)); 1402b8021494Sopenharmony_ci} 1403b8021494Sopenharmony_ci 1404b8021494Sopenharmony_civoid MacroAssembler::St1w(const ZRegister& zt, 1405b8021494Sopenharmony_ci const PRegister& pg, 1406b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1407b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1408b8021494Sopenharmony_ci SVELoadStore1Helper(kSRegSizeInBytesLog2, 1409b8021494Sopenharmony_ci zt, 1410b8021494Sopenharmony_ci pg, 1411b8021494Sopenharmony_ci addr, 1412b8021494Sopenharmony_ci static_cast<SVEStore1Fn>(&Assembler::st1w)); 1413b8021494Sopenharmony_ci} 1414b8021494Sopenharmony_ci 1415b8021494Sopenharmony_civoid MacroAssembler::St1d(const ZRegister& zt, 1416b8021494Sopenharmony_ci const PRegister& pg, 1417b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1418b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1419b8021494Sopenharmony_ci SVELoadStore1Helper(kDRegSizeInBytesLog2, 1420b8021494Sopenharmony_ci zt, 1421b8021494Sopenharmony_ci pg, 1422b8021494Sopenharmony_ci addr, 1423b8021494Sopenharmony_ci static_cast<SVEStore1Fn>(&Assembler::st1d)); 1424b8021494Sopenharmony_ci} 1425b8021494Sopenharmony_ci 1426b8021494Sopenharmony_civoid MacroAssembler::Ldff1b(const ZRegister& zt, 1427b8021494Sopenharmony_ci const PRegisterZ& pg, 1428b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1429b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1430b8021494Sopenharmony_ci SVELoadFFHelper(kBRegSizeInBytesLog2, 1431b8021494Sopenharmony_ci zt, 1432b8021494Sopenharmony_ci pg, 1433b8021494Sopenharmony_ci addr, 1434b8021494Sopenharmony_ci static_cast<SVELoad1Fn>(&Assembler::ldff1b)); 1435b8021494Sopenharmony_ci} 1436b8021494Sopenharmony_ci 1437b8021494Sopenharmony_civoid MacroAssembler::Ldff1h(const ZRegister& zt, 1438b8021494Sopenharmony_ci const PRegisterZ& pg, 1439b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1440b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1441b8021494Sopenharmony_ci SVELoadFFHelper(kHRegSizeInBytesLog2, 1442b8021494Sopenharmony_ci zt, 1443b8021494Sopenharmony_ci pg, 1444b8021494Sopenharmony_ci addr, 1445b8021494Sopenharmony_ci static_cast<SVELoad1Fn>(&Assembler::ldff1h)); 1446b8021494Sopenharmony_ci} 1447b8021494Sopenharmony_ci 1448b8021494Sopenharmony_civoid MacroAssembler::Ldff1w(const ZRegister& zt, 1449b8021494Sopenharmony_ci const PRegisterZ& pg, 1450b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1451b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1452b8021494Sopenharmony_ci SVELoadFFHelper(kSRegSizeInBytesLog2, 1453b8021494Sopenharmony_ci zt, 1454b8021494Sopenharmony_ci pg, 1455b8021494Sopenharmony_ci addr, 1456b8021494Sopenharmony_ci static_cast<SVELoad1Fn>(&Assembler::ldff1w)); 1457b8021494Sopenharmony_ci} 1458b8021494Sopenharmony_ci 1459b8021494Sopenharmony_civoid MacroAssembler::Ldff1d(const ZRegister& zt, 1460b8021494Sopenharmony_ci const PRegisterZ& pg, 1461b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1462b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1463b8021494Sopenharmony_ci SVELoadFFHelper(kDRegSizeInBytesLog2, 1464b8021494Sopenharmony_ci zt, 1465b8021494Sopenharmony_ci pg, 1466b8021494Sopenharmony_ci addr, 1467b8021494Sopenharmony_ci static_cast<SVELoad1Fn>(&Assembler::ldff1d)); 1468b8021494Sopenharmony_ci} 1469b8021494Sopenharmony_ci 1470b8021494Sopenharmony_civoid MacroAssembler::Ldff1sb(const ZRegister& zt, 1471b8021494Sopenharmony_ci const PRegisterZ& pg, 1472b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1473b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1474b8021494Sopenharmony_ci SVELoadFFHelper(kBRegSizeInBytesLog2, 1475b8021494Sopenharmony_ci zt, 1476b8021494Sopenharmony_ci pg, 1477b8021494Sopenharmony_ci addr, 1478b8021494Sopenharmony_ci static_cast<SVELoad1Fn>(&Assembler::ldff1sb)); 1479b8021494Sopenharmony_ci} 1480b8021494Sopenharmony_ci 1481b8021494Sopenharmony_civoid MacroAssembler::Ldff1sh(const ZRegister& zt, 1482b8021494Sopenharmony_ci const PRegisterZ& pg, 1483b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1484b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1485b8021494Sopenharmony_ci SVELoadFFHelper(kHRegSizeInBytesLog2, 1486b8021494Sopenharmony_ci zt, 1487b8021494Sopenharmony_ci pg, 1488b8021494Sopenharmony_ci addr, 1489b8021494Sopenharmony_ci static_cast<SVELoad1Fn>(&Assembler::ldff1sh)); 1490b8021494Sopenharmony_ci} 1491b8021494Sopenharmony_ci 1492b8021494Sopenharmony_civoid MacroAssembler::Ldff1sw(const ZRegister& zt, 1493b8021494Sopenharmony_ci const PRegisterZ& pg, 1494b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1495b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1496b8021494Sopenharmony_ci SVELoadFFHelper(kSRegSizeInBytesLog2, 1497b8021494Sopenharmony_ci zt, 1498b8021494Sopenharmony_ci pg, 1499b8021494Sopenharmony_ci addr, 1500b8021494Sopenharmony_ci static_cast<SVELoad1Fn>(&Assembler::ldff1sw)); 1501b8021494Sopenharmony_ci} 1502b8021494Sopenharmony_ci 1503b8021494Sopenharmony_ci#define VIXL_SVE_LD1R_LIST(V) \ 1504b8021494Sopenharmony_ci V(qb, 4) V(qh, 4) V(qw, 4) V(qd, 4) V(ob, 5) V(oh, 5) V(ow, 5) V(od, 5) 1505b8021494Sopenharmony_ci 1506b8021494Sopenharmony_ci#define VIXL_DEFINE_MASM_FUNC(SZ, SH) \ 1507b8021494Sopenharmony_ci void MacroAssembler::Ld1r##SZ(const ZRegister& zt, \ 1508b8021494Sopenharmony_ci const PRegisterZ& pg, \ 1509b8021494Sopenharmony_ci const SVEMemOperand& addr) { \ 1510b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); \ 1511b8021494Sopenharmony_ci SVELoadStoreNTBroadcastQOHelper(zt, \ 1512b8021494Sopenharmony_ci pg, \ 1513b8021494Sopenharmony_ci addr, \ 1514b8021494Sopenharmony_ci &MacroAssembler::ld1r##SZ, \ 1515b8021494Sopenharmony_ci 4, \ 1516b8021494Sopenharmony_ci SH, \ 1517b8021494Sopenharmony_ci NO_SVE_OFFSET_MODIFIER, \ 1518b8021494Sopenharmony_ci -1); \ 1519b8021494Sopenharmony_ci } 1520b8021494Sopenharmony_ci 1521b8021494Sopenharmony_ciVIXL_SVE_LD1R_LIST(VIXL_DEFINE_MASM_FUNC) 1522b8021494Sopenharmony_ci 1523b8021494Sopenharmony_ci#undef VIXL_DEFINE_MASM_FUNC 1524b8021494Sopenharmony_ci#undef VIXL_SVE_LD1R_LIST 1525b8021494Sopenharmony_ci 1526b8021494Sopenharmony_civoid MacroAssembler::Ldnt1b(const ZRegister& zt, 1527b8021494Sopenharmony_ci const PRegisterZ& pg, 1528b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1529b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1530b8021494Sopenharmony_ci if (addr.IsVectorPlusScalar()) { 1531b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1532b8021494Sopenharmony_ci ldnt1b(zt, pg, addr); 1533b8021494Sopenharmony_ci } else { 1534b8021494Sopenharmony_ci SVELoadStoreNTBroadcastQOHelper(zt, 1535b8021494Sopenharmony_ci pg, 1536b8021494Sopenharmony_ci addr, 1537b8021494Sopenharmony_ci &MacroAssembler::ldnt1b, 1538b8021494Sopenharmony_ci 4, 1539b8021494Sopenharmony_ci 0, 1540b8021494Sopenharmony_ci SVE_MUL_VL); 1541b8021494Sopenharmony_ci } 1542b8021494Sopenharmony_ci} 1543b8021494Sopenharmony_ci 1544b8021494Sopenharmony_civoid MacroAssembler::Ldnt1d(const ZRegister& zt, 1545b8021494Sopenharmony_ci const PRegisterZ& pg, 1546b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1547b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1548b8021494Sopenharmony_ci if (addr.IsVectorPlusScalar()) { 1549b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1550b8021494Sopenharmony_ci ldnt1d(zt, pg, addr); 1551b8021494Sopenharmony_ci } else { 1552b8021494Sopenharmony_ci SVELoadStoreNTBroadcastQOHelper(zt, 1553b8021494Sopenharmony_ci pg, 1554b8021494Sopenharmony_ci addr, 1555b8021494Sopenharmony_ci &MacroAssembler::ldnt1d, 1556b8021494Sopenharmony_ci 4, 1557b8021494Sopenharmony_ci 0, 1558b8021494Sopenharmony_ci SVE_MUL_VL); 1559b8021494Sopenharmony_ci } 1560b8021494Sopenharmony_ci} 1561b8021494Sopenharmony_ci 1562b8021494Sopenharmony_civoid MacroAssembler::Ldnt1h(const ZRegister& zt, 1563b8021494Sopenharmony_ci const PRegisterZ& pg, 1564b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1565b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1566b8021494Sopenharmony_ci if (addr.IsVectorPlusScalar()) { 1567b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1568b8021494Sopenharmony_ci ldnt1h(zt, pg, addr); 1569b8021494Sopenharmony_ci } else { 1570b8021494Sopenharmony_ci SVELoadStoreNTBroadcastQOHelper(zt, 1571b8021494Sopenharmony_ci pg, 1572b8021494Sopenharmony_ci addr, 1573b8021494Sopenharmony_ci &MacroAssembler::ldnt1h, 1574b8021494Sopenharmony_ci 4, 1575b8021494Sopenharmony_ci 0, 1576b8021494Sopenharmony_ci SVE_MUL_VL); 1577b8021494Sopenharmony_ci } 1578b8021494Sopenharmony_ci} 1579b8021494Sopenharmony_ci 1580b8021494Sopenharmony_civoid MacroAssembler::Ldnt1w(const ZRegister& zt, 1581b8021494Sopenharmony_ci const PRegisterZ& pg, 1582b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1583b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1584b8021494Sopenharmony_ci if (addr.IsVectorPlusScalar()) { 1585b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1586b8021494Sopenharmony_ci ldnt1w(zt, pg, addr); 1587b8021494Sopenharmony_ci } else { 1588b8021494Sopenharmony_ci SVELoadStoreNTBroadcastQOHelper(zt, 1589b8021494Sopenharmony_ci pg, 1590b8021494Sopenharmony_ci addr, 1591b8021494Sopenharmony_ci &MacroAssembler::ldnt1w, 1592b8021494Sopenharmony_ci 4, 1593b8021494Sopenharmony_ci 0, 1594b8021494Sopenharmony_ci SVE_MUL_VL); 1595b8021494Sopenharmony_ci } 1596b8021494Sopenharmony_ci} 1597b8021494Sopenharmony_ci 1598b8021494Sopenharmony_civoid MacroAssembler::Stnt1b(const ZRegister& zt, 1599b8021494Sopenharmony_ci const PRegister& pg, 1600b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1601b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1602b8021494Sopenharmony_ci if (addr.IsVectorPlusScalar()) { 1603b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1604b8021494Sopenharmony_ci stnt1b(zt, pg, addr); 1605b8021494Sopenharmony_ci } else { 1606b8021494Sopenharmony_ci SVELoadStoreNTBroadcastQOHelper(zt, 1607b8021494Sopenharmony_ci pg, 1608b8021494Sopenharmony_ci addr, 1609b8021494Sopenharmony_ci &MacroAssembler::stnt1b, 1610b8021494Sopenharmony_ci 4, 1611b8021494Sopenharmony_ci 0, 1612b8021494Sopenharmony_ci SVE_MUL_VL); 1613b8021494Sopenharmony_ci } 1614b8021494Sopenharmony_ci} 1615b8021494Sopenharmony_civoid MacroAssembler::Stnt1d(const ZRegister& zt, 1616b8021494Sopenharmony_ci const PRegister& pg, 1617b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1618b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1619b8021494Sopenharmony_ci if (addr.IsVectorPlusScalar()) { 1620b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1621b8021494Sopenharmony_ci stnt1d(zt, pg, addr); 1622b8021494Sopenharmony_ci } else { 1623b8021494Sopenharmony_ci SVELoadStoreNTBroadcastQOHelper(zt, 1624b8021494Sopenharmony_ci pg, 1625b8021494Sopenharmony_ci addr, 1626b8021494Sopenharmony_ci &MacroAssembler::stnt1d, 1627b8021494Sopenharmony_ci 4, 1628b8021494Sopenharmony_ci 0, 1629b8021494Sopenharmony_ci SVE_MUL_VL); 1630b8021494Sopenharmony_ci } 1631b8021494Sopenharmony_ci} 1632b8021494Sopenharmony_civoid MacroAssembler::Stnt1h(const ZRegister& zt, 1633b8021494Sopenharmony_ci const PRegister& pg, 1634b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1635b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1636b8021494Sopenharmony_ci if (addr.IsVectorPlusScalar()) { 1637b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1638b8021494Sopenharmony_ci stnt1h(zt, pg, addr); 1639b8021494Sopenharmony_ci } else { 1640b8021494Sopenharmony_ci SVELoadStoreNTBroadcastQOHelper(zt, 1641b8021494Sopenharmony_ci pg, 1642b8021494Sopenharmony_ci addr, 1643b8021494Sopenharmony_ci &MacroAssembler::stnt1h, 1644b8021494Sopenharmony_ci 4, 1645b8021494Sopenharmony_ci 0, 1646b8021494Sopenharmony_ci SVE_MUL_VL); 1647b8021494Sopenharmony_ci } 1648b8021494Sopenharmony_ci} 1649b8021494Sopenharmony_civoid MacroAssembler::Stnt1w(const ZRegister& zt, 1650b8021494Sopenharmony_ci const PRegister& pg, 1651b8021494Sopenharmony_ci const SVEMemOperand& addr) { 1652b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1653b8021494Sopenharmony_ci if (addr.IsVectorPlusScalar()) { 1654b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1655b8021494Sopenharmony_ci stnt1w(zt, pg, addr); 1656b8021494Sopenharmony_ci } else { 1657b8021494Sopenharmony_ci SVELoadStoreNTBroadcastQOHelper(zt, 1658b8021494Sopenharmony_ci pg, 1659b8021494Sopenharmony_ci addr, 1660b8021494Sopenharmony_ci &MacroAssembler::stnt1w, 1661b8021494Sopenharmony_ci 4, 1662b8021494Sopenharmony_ci 0, 1663b8021494Sopenharmony_ci SVE_MUL_VL); 1664b8021494Sopenharmony_ci } 1665b8021494Sopenharmony_ci} 1666b8021494Sopenharmony_ci 1667b8021494Sopenharmony_civoid MacroAssembler::SVEDotIndexHelper(ZZZImmFn fn, 1668b8021494Sopenharmony_ci const ZRegister& zd, 1669b8021494Sopenharmony_ci const ZRegister& za, 1670b8021494Sopenharmony_ci const ZRegister& zn, 1671b8021494Sopenharmony_ci const ZRegister& zm, 1672b8021494Sopenharmony_ci int index) { 1673b8021494Sopenharmony_ci if (zd.Aliases(za)) { 1674b8021494Sopenharmony_ci // zda = zda + (zn . zm) 1675b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1676b8021494Sopenharmony_ci (this->*fn)(zd, zn, zm, index); 1677b8021494Sopenharmony_ci 1678b8021494Sopenharmony_ci } else if (zd.Aliases(zn) || zd.Aliases(zm)) { 1679b8021494Sopenharmony_ci // zdn = za + (zdn . zm[index]) 1680b8021494Sopenharmony_ci // zdm = za + (zn . zdm[index]) 1681b8021494Sopenharmony_ci // zdnm = za + (zdnm . zdnm[index]) 1682b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1683b8021494Sopenharmony_ci ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); 1684b8021494Sopenharmony_ci { 1685b8021494Sopenharmony_ci MovprfxHelperScope guard(this, scratch, za); 1686b8021494Sopenharmony_ci (this->*fn)(scratch, zn, zm, index); 1687b8021494Sopenharmony_ci } 1688b8021494Sopenharmony_ci 1689b8021494Sopenharmony_ci Mov(zd, scratch); 1690b8021494Sopenharmony_ci } else { 1691b8021494Sopenharmony_ci // zd = za + (zn . zm) 1692b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, za); 1693b8021494Sopenharmony_ci (this->*fn)(zd, zn, zm, index); 1694b8021494Sopenharmony_ci } 1695b8021494Sopenharmony_ci} 1696b8021494Sopenharmony_ci 1697b8021494Sopenharmony_civoid MacroAssembler::FourRegDestructiveHelper(Int3ArithFn fn, 1698b8021494Sopenharmony_ci const ZRegister& zd, 1699b8021494Sopenharmony_ci const ZRegister& za, 1700b8021494Sopenharmony_ci const ZRegister& zn, 1701b8021494Sopenharmony_ci const ZRegister& zm) { 1702b8021494Sopenharmony_ci if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) { 1703b8021494Sopenharmony_ci // zd = za . zd . zm 1704b8021494Sopenharmony_ci // zd = za . zn . zd 1705b8021494Sopenharmony_ci // zd = za . zd . zd 1706b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1707b8021494Sopenharmony_ci ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); 1708b8021494Sopenharmony_ci { 1709b8021494Sopenharmony_ci MovprfxHelperScope guard(this, scratch, za); 1710b8021494Sopenharmony_ci (this->*fn)(scratch, zn, zm); 1711b8021494Sopenharmony_ci } 1712b8021494Sopenharmony_ci 1713b8021494Sopenharmony_ci Mov(zd, scratch); 1714b8021494Sopenharmony_ci } else { 1715b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, za); 1716b8021494Sopenharmony_ci (this->*fn)(zd, zn, zm); 1717b8021494Sopenharmony_ci } 1718b8021494Sopenharmony_ci} 1719b8021494Sopenharmony_ci 1720b8021494Sopenharmony_civoid MacroAssembler::FourRegDestructiveHelper(Int4ArithFn fn, 1721b8021494Sopenharmony_ci const ZRegister& zd, 1722b8021494Sopenharmony_ci const ZRegister& za, 1723b8021494Sopenharmony_ci const ZRegister& zn, 1724b8021494Sopenharmony_ci const ZRegister& zm) { 1725b8021494Sopenharmony_ci if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) { 1726b8021494Sopenharmony_ci // zd = za . zd . zm 1727b8021494Sopenharmony_ci // zd = za . zn . zd 1728b8021494Sopenharmony_ci // zd = za . zd . zd 1729b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1730b8021494Sopenharmony_ci ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); 1731b8021494Sopenharmony_ci { 1732b8021494Sopenharmony_ci MovprfxHelperScope guard(this, scratch, za); 1733b8021494Sopenharmony_ci (this->*fn)(scratch, scratch, zn, zm); 1734b8021494Sopenharmony_ci } 1735b8021494Sopenharmony_ci 1736b8021494Sopenharmony_ci Mov(zd, scratch); 1737b8021494Sopenharmony_ci } else { 1738b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, za); 1739b8021494Sopenharmony_ci (this->*fn)(zd, zd, zn, zm); 1740b8021494Sopenharmony_ci } 1741b8021494Sopenharmony_ci} 1742b8021494Sopenharmony_ci 1743b8021494Sopenharmony_civoid MacroAssembler::FourRegOneImmDestructiveHelper(ZZZImmFn fn, 1744b8021494Sopenharmony_ci const ZRegister& zd, 1745b8021494Sopenharmony_ci const ZRegister& za, 1746b8021494Sopenharmony_ci const ZRegister& zn, 1747b8021494Sopenharmony_ci const ZRegister& zm, 1748b8021494Sopenharmony_ci int imm) { 1749b8021494Sopenharmony_ci if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) { 1750b8021494Sopenharmony_ci // zd = za . zd . zm[i] 1751b8021494Sopenharmony_ci // zd = za . zn . zd[i] 1752b8021494Sopenharmony_ci // zd = za . zd . zd[i] 1753b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1754b8021494Sopenharmony_ci ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); 1755b8021494Sopenharmony_ci { 1756b8021494Sopenharmony_ci MovprfxHelperScope guard(this, scratch, za); 1757b8021494Sopenharmony_ci (this->*fn)(scratch, zn, zm, imm); 1758b8021494Sopenharmony_ci } 1759b8021494Sopenharmony_ci 1760b8021494Sopenharmony_ci Mov(zd, scratch); 1761b8021494Sopenharmony_ci } else { 1762b8021494Sopenharmony_ci // zd = za . zn . zm[i] 1763b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, za); 1764b8021494Sopenharmony_ci (this->*fn)(zd, zn, zm, imm); 1765b8021494Sopenharmony_ci } 1766b8021494Sopenharmony_ci} 1767b8021494Sopenharmony_ci 1768b8021494Sopenharmony_civoid MacroAssembler::AbsoluteDifferenceAccumulate(Int3ArithFn fn, 1769b8021494Sopenharmony_ci const ZRegister& zd, 1770b8021494Sopenharmony_ci const ZRegister& za, 1771b8021494Sopenharmony_ci const ZRegister& zn, 1772b8021494Sopenharmony_ci const ZRegister& zm) { 1773b8021494Sopenharmony_ci if (zn.Aliases(zm)) { 1774b8021494Sopenharmony_ci // If zn == zm, the difference is zero. 1775b8021494Sopenharmony_ci if (!zd.Aliases(za)) { 1776b8021494Sopenharmony_ci Mov(zd, za); 1777b8021494Sopenharmony_ci } 1778b8021494Sopenharmony_ci } else if (zd.Aliases(za)) { 1779b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1780b8021494Sopenharmony_ci (this->*fn)(zd, zn, zm); 1781b8021494Sopenharmony_ci } else if (zd.Aliases(zn)) { 1782b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1783b8021494Sopenharmony_ci ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); 1784b8021494Sopenharmony_ci Mov(ztmp, zn); 1785b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, za); 1786b8021494Sopenharmony_ci (this->*fn)(zd, ztmp, zm); 1787b8021494Sopenharmony_ci } else if (zd.Aliases(zm)) { 1788b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1789b8021494Sopenharmony_ci ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); 1790b8021494Sopenharmony_ci Mov(ztmp, zm); 1791b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, za); 1792b8021494Sopenharmony_ci (this->*fn)(zd, zn, ztmp); 1793b8021494Sopenharmony_ci } else { 1794b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, za); 1795b8021494Sopenharmony_ci (this->*fn)(zd, zn, zm); 1796b8021494Sopenharmony_ci } 1797b8021494Sopenharmony_ci} 1798b8021494Sopenharmony_ci 1799b8021494Sopenharmony_ci#define VIXL_SVE_4REG_LIST(V) \ 1800b8021494Sopenharmony_ci V(Saba, saba, AbsoluteDifferenceAccumulate) \ 1801b8021494Sopenharmony_ci V(Uaba, uaba, AbsoluteDifferenceAccumulate) \ 1802b8021494Sopenharmony_ci V(Sabalb, sabalb, AbsoluteDifferenceAccumulate) \ 1803b8021494Sopenharmony_ci V(Sabalt, sabalt, AbsoluteDifferenceAccumulate) \ 1804b8021494Sopenharmony_ci V(Uabalb, uabalb, AbsoluteDifferenceAccumulate) \ 1805b8021494Sopenharmony_ci V(Uabalt, uabalt, AbsoluteDifferenceAccumulate) \ 1806b8021494Sopenharmony_ci V(Sdot, sdot, FourRegDestructiveHelper) \ 1807b8021494Sopenharmony_ci V(Udot, udot, FourRegDestructiveHelper) \ 1808b8021494Sopenharmony_ci V(Adclb, adclb, FourRegDestructiveHelper) \ 1809b8021494Sopenharmony_ci V(Adclt, adclt, FourRegDestructiveHelper) \ 1810b8021494Sopenharmony_ci V(Sbclb, sbclb, FourRegDestructiveHelper) \ 1811b8021494Sopenharmony_ci V(Sbclt, sbclt, FourRegDestructiveHelper) \ 1812b8021494Sopenharmony_ci V(Smlalb, smlalb, FourRegDestructiveHelper) \ 1813b8021494Sopenharmony_ci V(Smlalt, smlalt, FourRegDestructiveHelper) \ 1814b8021494Sopenharmony_ci V(Smlslb, smlslb, FourRegDestructiveHelper) \ 1815b8021494Sopenharmony_ci V(Smlslt, smlslt, FourRegDestructiveHelper) \ 1816b8021494Sopenharmony_ci V(Umlalb, umlalb, FourRegDestructiveHelper) \ 1817b8021494Sopenharmony_ci V(Umlalt, umlalt, FourRegDestructiveHelper) \ 1818b8021494Sopenharmony_ci V(Umlslb, umlslb, FourRegDestructiveHelper) \ 1819b8021494Sopenharmony_ci V(Umlslt, umlslt, FourRegDestructiveHelper) \ 1820b8021494Sopenharmony_ci V(Bcax, bcax, FourRegDestructiveHelper) \ 1821b8021494Sopenharmony_ci V(Bsl, bsl, FourRegDestructiveHelper) \ 1822b8021494Sopenharmony_ci V(Bsl1n, bsl1n, FourRegDestructiveHelper) \ 1823b8021494Sopenharmony_ci V(Bsl2n, bsl2n, FourRegDestructiveHelper) \ 1824b8021494Sopenharmony_ci V(Eor3, eor3, FourRegDestructiveHelper) \ 1825b8021494Sopenharmony_ci V(Nbsl, nbsl, FourRegDestructiveHelper) \ 1826b8021494Sopenharmony_ci V(Fmlalb, fmlalb, FourRegDestructiveHelper) \ 1827b8021494Sopenharmony_ci V(Fmlalt, fmlalt, FourRegDestructiveHelper) \ 1828b8021494Sopenharmony_ci V(Fmlslb, fmlslb, FourRegDestructiveHelper) \ 1829b8021494Sopenharmony_ci V(Fmlslt, fmlslt, FourRegDestructiveHelper) \ 1830b8021494Sopenharmony_ci V(Sqdmlalb, sqdmlalb, FourRegDestructiveHelper) \ 1831b8021494Sopenharmony_ci V(Sqdmlalbt, sqdmlalbt, FourRegDestructiveHelper) \ 1832b8021494Sopenharmony_ci V(Sqdmlalt, sqdmlalt, FourRegDestructiveHelper) \ 1833b8021494Sopenharmony_ci V(Sqdmlslb, sqdmlslb, FourRegDestructiveHelper) \ 1834b8021494Sopenharmony_ci V(Sqdmlslbt, sqdmlslbt, FourRegDestructiveHelper) \ 1835b8021494Sopenharmony_ci V(Sqdmlslt, sqdmlslt, FourRegDestructiveHelper) \ 1836b8021494Sopenharmony_ci V(Sqrdmlah, sqrdmlah, FourRegDestructiveHelper) \ 1837b8021494Sopenharmony_ci V(Sqrdmlsh, sqrdmlsh, FourRegDestructiveHelper) \ 1838b8021494Sopenharmony_ci V(Fmmla, fmmla, FourRegDestructiveHelper) \ 1839b8021494Sopenharmony_ci V(Smmla, smmla, FourRegDestructiveHelper) \ 1840b8021494Sopenharmony_ci V(Ummla, ummla, FourRegDestructiveHelper) \ 1841b8021494Sopenharmony_ci V(Usmmla, usmmla, FourRegDestructiveHelper) \ 1842b8021494Sopenharmony_ci V(Usdot, usdot, FourRegDestructiveHelper) 1843b8021494Sopenharmony_ci 1844b8021494Sopenharmony_ci#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \ 1845b8021494Sopenharmony_ci void MacroAssembler::MASMFN(const ZRegister& zd, \ 1846b8021494Sopenharmony_ci const ZRegister& za, \ 1847b8021494Sopenharmony_ci const ZRegister& zn, \ 1848b8021494Sopenharmony_ci const ZRegister& zm) { \ 1849b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); \ 1850b8021494Sopenharmony_ci HELPER(&Assembler::ASMFN, zd, za, zn, zm); \ 1851b8021494Sopenharmony_ci } 1852b8021494Sopenharmony_ciVIXL_SVE_4REG_LIST(VIXL_DEFINE_MASM_FUNC) 1853b8021494Sopenharmony_ci#undef VIXL_DEFINE_MASM_FUNC 1854b8021494Sopenharmony_ci 1855b8021494Sopenharmony_ci#define VIXL_SVE_4REG_1IMM_LIST(V) \ 1856b8021494Sopenharmony_ci V(Fmla, fmla, FourRegOneImmDestructiveHelper) \ 1857b8021494Sopenharmony_ci V(Fmls, fmls, FourRegOneImmDestructiveHelper) \ 1858b8021494Sopenharmony_ci V(Fmlalb, fmlalb, FourRegOneImmDestructiveHelper) \ 1859b8021494Sopenharmony_ci V(Fmlalt, fmlalt, FourRegOneImmDestructiveHelper) \ 1860b8021494Sopenharmony_ci V(Fmlslb, fmlslb, FourRegOneImmDestructiveHelper) \ 1861b8021494Sopenharmony_ci V(Fmlslt, fmlslt, FourRegOneImmDestructiveHelper) \ 1862b8021494Sopenharmony_ci V(Mla, mla, FourRegOneImmDestructiveHelper) \ 1863b8021494Sopenharmony_ci V(Mls, mls, FourRegOneImmDestructiveHelper) \ 1864b8021494Sopenharmony_ci V(Smlalb, smlalb, FourRegOneImmDestructiveHelper) \ 1865b8021494Sopenharmony_ci V(Smlalt, smlalt, FourRegOneImmDestructiveHelper) \ 1866b8021494Sopenharmony_ci V(Smlslb, smlslb, FourRegOneImmDestructiveHelper) \ 1867b8021494Sopenharmony_ci V(Smlslt, smlslt, FourRegOneImmDestructiveHelper) \ 1868b8021494Sopenharmony_ci V(Sqdmlalb, sqdmlalb, FourRegOneImmDestructiveHelper) \ 1869b8021494Sopenharmony_ci V(Sqdmlalt, sqdmlalt, FourRegOneImmDestructiveHelper) \ 1870b8021494Sopenharmony_ci V(Sqdmlslb, sqdmlslb, FourRegOneImmDestructiveHelper) \ 1871b8021494Sopenharmony_ci V(Sqdmlslt, sqdmlslt, FourRegOneImmDestructiveHelper) \ 1872b8021494Sopenharmony_ci V(Sqrdmlah, sqrdmlah, FourRegOneImmDestructiveHelper) \ 1873b8021494Sopenharmony_ci V(Sqrdmlsh, sqrdmlsh, FourRegOneImmDestructiveHelper) \ 1874b8021494Sopenharmony_ci V(Umlalb, umlalb, FourRegOneImmDestructiveHelper) \ 1875b8021494Sopenharmony_ci V(Umlalt, umlalt, FourRegOneImmDestructiveHelper) \ 1876b8021494Sopenharmony_ci V(Umlslb, umlslb, FourRegOneImmDestructiveHelper) \ 1877b8021494Sopenharmony_ci V(Umlslt, umlslt, FourRegOneImmDestructiveHelper) 1878b8021494Sopenharmony_ci 1879b8021494Sopenharmony_ci#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \ 1880b8021494Sopenharmony_ci void MacroAssembler::MASMFN(const ZRegister& zd, \ 1881b8021494Sopenharmony_ci const ZRegister& za, \ 1882b8021494Sopenharmony_ci const ZRegister& zn, \ 1883b8021494Sopenharmony_ci const ZRegister& zm, \ 1884b8021494Sopenharmony_ci int imm) { \ 1885b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); \ 1886b8021494Sopenharmony_ci HELPER(&Assembler::ASMFN, zd, za, zn, zm, imm); \ 1887b8021494Sopenharmony_ci } 1888b8021494Sopenharmony_ciVIXL_SVE_4REG_1IMM_LIST(VIXL_DEFINE_MASM_FUNC) 1889b8021494Sopenharmony_ci#undef VIXL_DEFINE_MASM_FUNC 1890b8021494Sopenharmony_ci 1891b8021494Sopenharmony_civoid MacroAssembler::Sdot(const ZRegister& zd, 1892b8021494Sopenharmony_ci const ZRegister& za, 1893b8021494Sopenharmony_ci const ZRegister& zn, 1894b8021494Sopenharmony_ci const ZRegister& zm, 1895b8021494Sopenharmony_ci int index) { 1896b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1897b8021494Sopenharmony_ci SVEDotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index); 1898b8021494Sopenharmony_ci} 1899b8021494Sopenharmony_ci 1900b8021494Sopenharmony_civoid MacroAssembler::Udot(const ZRegister& zd, 1901b8021494Sopenharmony_ci const ZRegister& za, 1902b8021494Sopenharmony_ci const ZRegister& zn, 1903b8021494Sopenharmony_ci const ZRegister& zm, 1904b8021494Sopenharmony_ci int index) { 1905b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1906b8021494Sopenharmony_ci SVEDotIndexHelper(&Assembler::udot, zd, za, zn, zm, index); 1907b8021494Sopenharmony_ci} 1908b8021494Sopenharmony_ci 1909b8021494Sopenharmony_civoid MacroAssembler::Sudot(const ZRegister& zd, 1910b8021494Sopenharmony_ci const ZRegister& za, 1911b8021494Sopenharmony_ci const ZRegister& zn, 1912b8021494Sopenharmony_ci const ZRegister& zm, 1913b8021494Sopenharmony_ci int index) { 1914b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1915b8021494Sopenharmony_ci SVEDotIndexHelper(&Assembler::sudot, zd, za, zn, zm, index); 1916b8021494Sopenharmony_ci} 1917b8021494Sopenharmony_ci 1918b8021494Sopenharmony_civoid MacroAssembler::Usdot(const ZRegister& zd, 1919b8021494Sopenharmony_ci const ZRegister& za, 1920b8021494Sopenharmony_ci const ZRegister& zn, 1921b8021494Sopenharmony_ci const ZRegister& zm, 1922b8021494Sopenharmony_ci int index) { 1923b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 1924b8021494Sopenharmony_ci SVEDotIndexHelper(&Assembler::usdot, zd, za, zn, zm, index); 1925b8021494Sopenharmony_ci} 1926b8021494Sopenharmony_ci 1927b8021494Sopenharmony_civoid MacroAssembler::Cdot(const ZRegister& zd, 1928b8021494Sopenharmony_ci const ZRegister& za, 1929b8021494Sopenharmony_ci const ZRegister& zn, 1930b8021494Sopenharmony_ci const ZRegister& zm, 1931b8021494Sopenharmony_ci int index, 1932b8021494Sopenharmony_ci int rot) { 1933b8021494Sopenharmony_ci // This doesn't handle zm when it's out of the range that can be encoded in 1934b8021494Sopenharmony_ci // instruction. The range depends on element size: z0-z7 for B, z0-15 for H. 1935b8021494Sopenharmony_ci if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { 1936b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1937b8021494Sopenharmony_ci ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd); 1938b8021494Sopenharmony_ci { 1939b8021494Sopenharmony_ci MovprfxHelperScope guard(this, ztmp, za); 1940b8021494Sopenharmony_ci cdot(ztmp, zn, zm, index, rot); 1941b8021494Sopenharmony_ci } 1942b8021494Sopenharmony_ci Mov(zd, ztmp); 1943b8021494Sopenharmony_ci } else { 1944b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, za); 1945b8021494Sopenharmony_ci cdot(zd, zn, zm, index, rot); 1946b8021494Sopenharmony_ci } 1947b8021494Sopenharmony_ci} 1948b8021494Sopenharmony_ci 1949b8021494Sopenharmony_civoid MacroAssembler::Cdot(const ZRegister& zd, 1950b8021494Sopenharmony_ci const ZRegister& za, 1951b8021494Sopenharmony_ci const ZRegister& zn, 1952b8021494Sopenharmony_ci const ZRegister& zm, 1953b8021494Sopenharmony_ci int rot) { 1954b8021494Sopenharmony_ci if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { 1955b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1956b8021494Sopenharmony_ci VIXL_ASSERT(AreSameLaneSize(zn, zm)); 1957b8021494Sopenharmony_ci ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn); 1958b8021494Sopenharmony_ci Mov(ztmp, zd.Aliases(zn) ? zn : zm); 1959b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, za); 1960b8021494Sopenharmony_ci cdot(zd, (zd.Aliases(zn) ? ztmp : zn), (zd.Aliases(zm) ? ztmp : zm), rot); 1961b8021494Sopenharmony_ci } else { 1962b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, za); 1963b8021494Sopenharmony_ci cdot(zd, zn, zm, rot); 1964b8021494Sopenharmony_ci } 1965b8021494Sopenharmony_ci} 1966b8021494Sopenharmony_ci 1967b8021494Sopenharmony_civoid MacroAssembler::FPMulAddHelper(const ZRegister& zd, 1968b8021494Sopenharmony_ci const PRegisterM& pg, 1969b8021494Sopenharmony_ci const ZRegister& za, 1970b8021494Sopenharmony_ci const ZRegister& zn, 1971b8021494Sopenharmony_ci const ZRegister& zm, 1972b8021494Sopenharmony_ci SVEMulAddPredicatedZdaFn fn_zda, 1973b8021494Sopenharmony_ci SVEMulAddPredicatedZdnFn fn_zdn, 1974b8021494Sopenharmony_ci FPMacroNaNPropagationOption nan_option) { 1975b8021494Sopenharmony_ci ResolveFPNaNPropagationOption(&nan_option); 1976b8021494Sopenharmony_ci 1977b8021494Sopenharmony_ci if (zd.Aliases(za)) { 1978b8021494Sopenharmony_ci // zda = (-)zda + ((-)zn * zm) for fmla, fmls, fnmla and fnmls. 1979b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1980b8021494Sopenharmony_ci (this->*fn_zda)(zd, pg, zn, zm); 1981b8021494Sopenharmony_ci } else if (zd.Aliases(zn)) { 1982b8021494Sopenharmony_ci // zdn = (-)za + ((-)zdn * zm) for fmad, fmsb, fnmad and fnmsb. 1983b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1984b8021494Sopenharmony_ci (this->*fn_zdn)(zd, pg, zm, za); 1985b8021494Sopenharmony_ci } else if (zd.Aliases(zm)) { 1986b8021494Sopenharmony_ci switch (nan_option) { 1987b8021494Sopenharmony_ci case FastNaNPropagation: { 1988b8021494Sopenharmony_ci // We treat multiplication as commutative in the fast mode, so we can 1989b8021494Sopenharmony_ci // swap zn and zm. 1990b8021494Sopenharmony_ci // zdm = (-)za + ((-)zdm * zn) for fmad, fmsb, fnmad and fnmsb. 1991b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 1992b8021494Sopenharmony_ci (this->*fn_zdn)(zd, pg, zn, za); 1993b8021494Sopenharmony_ci return; 1994b8021494Sopenharmony_ci } 1995b8021494Sopenharmony_ci case StrictNaNPropagation: { 1996b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 1997b8021494Sopenharmony_ci // Use a scratch register to keep the argument order exactly as 1998b8021494Sopenharmony_ci // specified. 1999b8021494Sopenharmony_ci ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn); 2000b8021494Sopenharmony_ci { 2001b8021494Sopenharmony_ci MovprfxHelperScope guard(this, scratch, pg, za); 2002b8021494Sopenharmony_ci // scratch = (-)za + ((-)zn * zm) 2003b8021494Sopenharmony_ci (this->*fn_zda)(scratch, pg, zn, zm); 2004b8021494Sopenharmony_ci } 2005b8021494Sopenharmony_ci Mov(zd, scratch); 2006b8021494Sopenharmony_ci return; 2007b8021494Sopenharmony_ci } 2008b8021494Sopenharmony_ci case NoFPMacroNaNPropagationSelected: 2009b8021494Sopenharmony_ci VIXL_UNREACHABLE(); 2010b8021494Sopenharmony_ci return; 2011b8021494Sopenharmony_ci } 2012b8021494Sopenharmony_ci } else { 2013b8021494Sopenharmony_ci // zd = (-)za + ((-)zn * zm) for fmla, fmls, fnmla and fnmls. 2014b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, pg, za); 2015b8021494Sopenharmony_ci (this->*fn_zda)(zd, pg, zn, zm); 2016b8021494Sopenharmony_ci } 2017b8021494Sopenharmony_ci} 2018b8021494Sopenharmony_ci 2019b8021494Sopenharmony_civoid MacroAssembler::Fmla(const ZRegister& zd, 2020b8021494Sopenharmony_ci const PRegisterM& pg, 2021b8021494Sopenharmony_ci const ZRegister& za, 2022b8021494Sopenharmony_ci const ZRegister& zn, 2023b8021494Sopenharmony_ci const ZRegister& zm, 2024b8021494Sopenharmony_ci FPMacroNaNPropagationOption nan_option) { 2025b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 2026b8021494Sopenharmony_ci FPMulAddHelper(zd, 2027b8021494Sopenharmony_ci pg, 2028b8021494Sopenharmony_ci za, 2029b8021494Sopenharmony_ci zn, 2030b8021494Sopenharmony_ci zm, 2031b8021494Sopenharmony_ci &Assembler::fmla, 2032b8021494Sopenharmony_ci &Assembler::fmad, 2033b8021494Sopenharmony_ci nan_option); 2034b8021494Sopenharmony_ci} 2035b8021494Sopenharmony_ci 2036b8021494Sopenharmony_civoid MacroAssembler::Fmls(const ZRegister& zd, 2037b8021494Sopenharmony_ci const PRegisterM& pg, 2038b8021494Sopenharmony_ci const ZRegister& za, 2039b8021494Sopenharmony_ci const ZRegister& zn, 2040b8021494Sopenharmony_ci const ZRegister& zm, 2041b8021494Sopenharmony_ci FPMacroNaNPropagationOption nan_option) { 2042b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 2043b8021494Sopenharmony_ci FPMulAddHelper(zd, 2044b8021494Sopenharmony_ci pg, 2045b8021494Sopenharmony_ci za, 2046b8021494Sopenharmony_ci zn, 2047b8021494Sopenharmony_ci zm, 2048b8021494Sopenharmony_ci &Assembler::fmls, 2049b8021494Sopenharmony_ci &Assembler::fmsb, 2050b8021494Sopenharmony_ci nan_option); 2051b8021494Sopenharmony_ci} 2052b8021494Sopenharmony_ci 2053b8021494Sopenharmony_civoid MacroAssembler::Fnmla(const ZRegister& zd, 2054b8021494Sopenharmony_ci const PRegisterM& pg, 2055b8021494Sopenharmony_ci const ZRegister& za, 2056b8021494Sopenharmony_ci const ZRegister& zn, 2057b8021494Sopenharmony_ci const ZRegister& zm, 2058b8021494Sopenharmony_ci FPMacroNaNPropagationOption nan_option) { 2059b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 2060b8021494Sopenharmony_ci FPMulAddHelper(zd, 2061b8021494Sopenharmony_ci pg, 2062b8021494Sopenharmony_ci za, 2063b8021494Sopenharmony_ci zn, 2064b8021494Sopenharmony_ci zm, 2065b8021494Sopenharmony_ci &Assembler::fnmla, 2066b8021494Sopenharmony_ci &Assembler::fnmad, 2067b8021494Sopenharmony_ci nan_option); 2068b8021494Sopenharmony_ci} 2069b8021494Sopenharmony_ci 2070b8021494Sopenharmony_civoid MacroAssembler::Fnmls(const ZRegister& zd, 2071b8021494Sopenharmony_ci const PRegisterM& pg, 2072b8021494Sopenharmony_ci const ZRegister& za, 2073b8021494Sopenharmony_ci const ZRegister& zn, 2074b8021494Sopenharmony_ci const ZRegister& zm, 2075b8021494Sopenharmony_ci FPMacroNaNPropagationOption nan_option) { 2076b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 2077b8021494Sopenharmony_ci FPMulAddHelper(zd, 2078b8021494Sopenharmony_ci pg, 2079b8021494Sopenharmony_ci za, 2080b8021494Sopenharmony_ci zn, 2081b8021494Sopenharmony_ci zm, 2082b8021494Sopenharmony_ci &Assembler::fnmls, 2083b8021494Sopenharmony_ci &Assembler::fnmsb, 2084b8021494Sopenharmony_ci nan_option); 2085b8021494Sopenharmony_ci} 2086b8021494Sopenharmony_ci 2087b8021494Sopenharmony_civoid MacroAssembler::Ftmad(const ZRegister& zd, 2088b8021494Sopenharmony_ci const ZRegister& zn, 2089b8021494Sopenharmony_ci const ZRegister& zm, 2090b8021494Sopenharmony_ci int imm3) { 2091b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 2092b8021494Sopenharmony_ci if (zd.Aliases(zm) && !zd.Aliases(zn)) { 2093b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 2094b8021494Sopenharmony_ci ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); 2095b8021494Sopenharmony_ci Mov(scratch, zm); 2096b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, zn); 2097b8021494Sopenharmony_ci ftmad(zd, zd, scratch, imm3); 2098b8021494Sopenharmony_ci } else { 2099b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, zn); 2100b8021494Sopenharmony_ci ftmad(zd, zd, zm, imm3); 2101b8021494Sopenharmony_ci } 2102b8021494Sopenharmony_ci} 2103b8021494Sopenharmony_ci 2104b8021494Sopenharmony_civoid MacroAssembler::Fcadd(const ZRegister& zd, 2105b8021494Sopenharmony_ci const PRegisterM& pg, 2106b8021494Sopenharmony_ci const ZRegister& zn, 2107b8021494Sopenharmony_ci const ZRegister& zm, 2108b8021494Sopenharmony_ci int rot) { 2109b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 2110b8021494Sopenharmony_ci if (zd.Aliases(zm) && !zd.Aliases(zn)) { 2111b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 2112b8021494Sopenharmony_ci ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); 2113b8021494Sopenharmony_ci { 2114b8021494Sopenharmony_ci MovprfxHelperScope guard(this, scratch, pg, zn); 2115b8021494Sopenharmony_ci fcadd(scratch, pg, scratch, zm, rot); 2116b8021494Sopenharmony_ci } 2117b8021494Sopenharmony_ci Mov(zd, scratch); 2118b8021494Sopenharmony_ci } else { 2119b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, pg, zn); 2120b8021494Sopenharmony_ci fcadd(zd, pg, zd, zm, rot); 2121b8021494Sopenharmony_ci } 2122b8021494Sopenharmony_ci} 2123b8021494Sopenharmony_ci 2124b8021494Sopenharmony_civoid MacroAssembler::Fcmla(const ZRegister& zd, 2125b8021494Sopenharmony_ci const PRegisterM& pg, 2126b8021494Sopenharmony_ci const ZRegister& za, 2127b8021494Sopenharmony_ci const ZRegister& zn, 2128b8021494Sopenharmony_ci const ZRegister& zm, 2129b8021494Sopenharmony_ci int rot) { 2130b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 2131b8021494Sopenharmony_ci if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { 2132b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 2133b8021494Sopenharmony_ci ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd); 2134b8021494Sopenharmony_ci { 2135b8021494Sopenharmony_ci MovprfxHelperScope guard(this, ztmp, za); 2136b8021494Sopenharmony_ci fcmla(ztmp, pg, zn, zm, rot); 2137b8021494Sopenharmony_ci } 2138b8021494Sopenharmony_ci Mov(zd, pg, ztmp); 2139b8021494Sopenharmony_ci } else { 2140b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, pg, za); 2141b8021494Sopenharmony_ci fcmla(zd, pg, zn, zm, rot); 2142b8021494Sopenharmony_ci } 2143b8021494Sopenharmony_ci} 2144b8021494Sopenharmony_ci 2145b8021494Sopenharmony_civoid MacroAssembler::Splice(const ZRegister& zd, 2146b8021494Sopenharmony_ci const PRegister& pg, 2147b8021494Sopenharmony_ci const ZRegister& zn, 2148b8021494Sopenharmony_ci const ZRegister& zm) { 2149b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 2150b8021494Sopenharmony_ci if (CPUHas(CPUFeatures::kSVE2) && AreConsecutive(zn, zm) && !zd.Aliases(zn)) { 2151b8021494Sopenharmony_ci SingleEmissionCheckScope guard(this); 2152b8021494Sopenharmony_ci splice(zd, pg, zn, zm); 2153b8021494Sopenharmony_ci } else if (zd.Aliases(zm) && !zd.Aliases(zn)) { 2154b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 2155b8021494Sopenharmony_ci ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); 2156b8021494Sopenharmony_ci { 2157b8021494Sopenharmony_ci MovprfxHelperScope guard(this, scratch, zn); 2158b8021494Sopenharmony_ci splice(scratch, pg, scratch, zm); 2159b8021494Sopenharmony_ci } 2160b8021494Sopenharmony_ci Mov(zd, scratch); 2161b8021494Sopenharmony_ci } else { 2162b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, zn); 2163b8021494Sopenharmony_ci splice(zd, pg, zd, zm); 2164b8021494Sopenharmony_ci } 2165b8021494Sopenharmony_ci} 2166b8021494Sopenharmony_ci 2167b8021494Sopenharmony_civoid MacroAssembler::Clasta(const ZRegister& zd, 2168b8021494Sopenharmony_ci const PRegister& pg, 2169b8021494Sopenharmony_ci const ZRegister& zn, 2170b8021494Sopenharmony_ci const ZRegister& zm) { 2171b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 2172b8021494Sopenharmony_ci if (zd.Aliases(zm) && !zd.Aliases(zn)) { 2173b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 2174b8021494Sopenharmony_ci ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); 2175b8021494Sopenharmony_ci { 2176b8021494Sopenharmony_ci MovprfxHelperScope guard(this, scratch, zn); 2177b8021494Sopenharmony_ci clasta(scratch, pg, scratch, zm); 2178b8021494Sopenharmony_ci } 2179b8021494Sopenharmony_ci Mov(zd, scratch); 2180b8021494Sopenharmony_ci } else { 2181b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, zn); 2182b8021494Sopenharmony_ci clasta(zd, pg, zd, zm); 2183b8021494Sopenharmony_ci } 2184b8021494Sopenharmony_ci} 2185b8021494Sopenharmony_ci 2186b8021494Sopenharmony_civoid MacroAssembler::Clastb(const ZRegister& zd, 2187b8021494Sopenharmony_ci const PRegister& pg, 2188b8021494Sopenharmony_ci const ZRegister& zn, 2189b8021494Sopenharmony_ci const ZRegister& zm) { 2190b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 2191b8021494Sopenharmony_ci if (zd.Aliases(zm) && !zd.Aliases(zn)) { 2192b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 2193b8021494Sopenharmony_ci ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); 2194b8021494Sopenharmony_ci { 2195b8021494Sopenharmony_ci MovprfxHelperScope guard(this, scratch, zn); 2196b8021494Sopenharmony_ci clastb(scratch, pg, scratch, zm); 2197b8021494Sopenharmony_ci } 2198b8021494Sopenharmony_ci Mov(zd, scratch); 2199b8021494Sopenharmony_ci } else { 2200b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, zn); 2201b8021494Sopenharmony_ci clastb(zd, pg, zd, zm); 2202b8021494Sopenharmony_ci } 2203b8021494Sopenharmony_ci} 2204b8021494Sopenharmony_ci 2205b8021494Sopenharmony_civoid MacroAssembler::ShiftRightAccumulate(IntArithImmFn fn, 2206b8021494Sopenharmony_ci const ZRegister& zd, 2207b8021494Sopenharmony_ci const ZRegister& za, 2208b8021494Sopenharmony_ci const ZRegister& zn, 2209b8021494Sopenharmony_ci int shift) { 2210b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 2211b8021494Sopenharmony_ci if (!zd.Aliases(za) && zd.Aliases(zn)) { 2212b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 2213b8021494Sopenharmony_ci ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn); 2214b8021494Sopenharmony_ci Mov(ztmp, zn); 2215b8021494Sopenharmony_ci { 2216b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, za); 2217b8021494Sopenharmony_ci (this->*fn)(zd, ztmp, shift); 2218b8021494Sopenharmony_ci } 2219b8021494Sopenharmony_ci } else { 2220b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, za); 2221b8021494Sopenharmony_ci (this->*fn)(zd, zn, shift); 2222b8021494Sopenharmony_ci } 2223b8021494Sopenharmony_ci} 2224b8021494Sopenharmony_ci 2225b8021494Sopenharmony_civoid MacroAssembler::Srsra(const ZRegister& zd, 2226b8021494Sopenharmony_ci const ZRegister& za, 2227b8021494Sopenharmony_ci const ZRegister& zn, 2228b8021494Sopenharmony_ci int shift) { 2229b8021494Sopenharmony_ci ShiftRightAccumulate(&Assembler::srsra, zd, za, zn, shift); 2230b8021494Sopenharmony_ci} 2231b8021494Sopenharmony_ci 2232b8021494Sopenharmony_civoid MacroAssembler::Ssra(const ZRegister& zd, 2233b8021494Sopenharmony_ci const ZRegister& za, 2234b8021494Sopenharmony_ci const ZRegister& zn, 2235b8021494Sopenharmony_ci int shift) { 2236b8021494Sopenharmony_ci ShiftRightAccumulate(&Assembler::ssra, zd, za, zn, shift); 2237b8021494Sopenharmony_ci} 2238b8021494Sopenharmony_ci 2239b8021494Sopenharmony_civoid MacroAssembler::Ursra(const ZRegister& zd, 2240b8021494Sopenharmony_ci const ZRegister& za, 2241b8021494Sopenharmony_ci const ZRegister& zn, 2242b8021494Sopenharmony_ci int shift) { 2243b8021494Sopenharmony_ci ShiftRightAccumulate(&Assembler::ursra, zd, za, zn, shift); 2244b8021494Sopenharmony_ci} 2245b8021494Sopenharmony_ci 2246b8021494Sopenharmony_civoid MacroAssembler::Usra(const ZRegister& zd, 2247b8021494Sopenharmony_ci const ZRegister& za, 2248b8021494Sopenharmony_ci const ZRegister& zn, 2249b8021494Sopenharmony_ci int shift) { 2250b8021494Sopenharmony_ci ShiftRightAccumulate(&Assembler::usra, zd, za, zn, shift); 2251b8021494Sopenharmony_ci} 2252b8021494Sopenharmony_ci 2253b8021494Sopenharmony_civoid MacroAssembler::ComplexAddition(ZZZImmFn fn, 2254b8021494Sopenharmony_ci const ZRegister& zd, 2255b8021494Sopenharmony_ci const ZRegister& zn, 2256b8021494Sopenharmony_ci const ZRegister& zm, 2257b8021494Sopenharmony_ci int rot) { 2258b8021494Sopenharmony_ci VIXL_ASSERT(allow_macro_instructions_); 2259b8021494Sopenharmony_ci if (!zd.Aliases(zn) && zd.Aliases(zm)) { 2260b8021494Sopenharmony_ci UseScratchRegisterScope temps(this); 2261b8021494Sopenharmony_ci ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zm); 2262b8021494Sopenharmony_ci Mov(ztmp, zm); 2263b8021494Sopenharmony_ci { 2264b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, zn); 2265b8021494Sopenharmony_ci (this->*fn)(zd, zd, ztmp, rot); 2266b8021494Sopenharmony_ci } 2267b8021494Sopenharmony_ci } else { 2268b8021494Sopenharmony_ci MovprfxHelperScope guard(this, zd, zn); 2269b8021494Sopenharmony_ci (this->*fn)(zd, zd, zm, rot); 2270b8021494Sopenharmony_ci } 2271b8021494Sopenharmony_ci} 2272b8021494Sopenharmony_ci 2273b8021494Sopenharmony_civoid MacroAssembler::Cadd(const ZRegister& zd, 2274b8021494Sopenharmony_ci const ZRegister& zn, 2275b8021494Sopenharmony_ci const ZRegister& zm, 2276b8021494Sopenharmony_ci int rot) { 2277b8021494Sopenharmony_ci ComplexAddition(&Assembler::cadd, zd, zn, zm, rot); 2278b8021494Sopenharmony_ci} 2279b8021494Sopenharmony_ci 2280b8021494Sopenharmony_civoid MacroAssembler::Sqcadd(const ZRegister& zd, 2281b8021494Sopenharmony_ci const ZRegister& zn, 2282b8021494Sopenharmony_ci const ZRegister& zm, 2283b8021494Sopenharmony_ci int rot) { 2284b8021494Sopenharmony_ci ComplexAddition(&Assembler::sqcadd, zd, zn, zm, rot); 2285b8021494Sopenharmony_ci} 2286b8021494Sopenharmony_ci 2287b8021494Sopenharmony_ci} // namespace aarch64 2288b8021494Sopenharmony_ci} // namespace vixl 2289