1// Copyright 2019, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright notice,
10//     this list of conditions and the following disclaimer in the documentation
11//     and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may be
13//     used to endorse or promote products derived from this software without
14//     specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include "macro-assembler-aarch64.h"
28
29namespace vixl {
30namespace aarch64 {
31
32void MacroAssembler::AddSubHelper(AddSubHelperOption option,
33                                  const ZRegister& zd,
34                                  const ZRegister& zn,
35                                  IntegerOperand imm) {
36  VIXL_ASSERT(imm.FitsInLane(zd));
37
38  // Simple, encodable cases.
39  if (TrySingleAddSub(option, zd, zn, imm)) return;
40
41  VIXL_ASSERT((option == kAddImmediate) || (option == kSubImmediate));
42  bool add_imm = (option == kAddImmediate);
43
44  // Try to translate Add(..., -imm) to Sub(..., imm) if we can encode it in one
45  // instruction. Also interpret the immediate as signed, so we can convert
46  // Add(zd.VnH(), zn.VnH(), 0xffff...) to Sub(..., 1), etc.
47  IntegerOperand signed_imm(imm.AsIntN(zd.GetLaneSizeInBits()));
48  if (signed_imm.IsNegative()) {
49    AddSubHelperOption n_option = add_imm ? kSubImmediate : kAddImmediate;
50    IntegerOperand n_imm(signed_imm.GetMagnitude());
51    // IntegerOperand can represent -INT_MIN, so this is always safe.
52    VIXL_ASSERT(n_imm.IsPositiveOrZero());
53    if (TrySingleAddSub(n_option, zd, zn, n_imm)) return;
54  }
55
56  // Otherwise, fall back to dup + ADD_z_z/SUB_z_z.
57  UseScratchRegisterScope temps(this);
58  ZRegister scratch = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
59  Dup(scratch, imm);
60
61  SingleEmissionCheckScope guard(this);
62  if (add_imm) {
63    add(zd, zn, scratch);
64  } else {
65    sub(zd, zn, scratch);
66  }
67}
68
69bool MacroAssembler::TrySingleAddSub(AddSubHelperOption option,
70                                     const ZRegister& zd,
71                                     const ZRegister& zn,
72                                     IntegerOperand imm) {
73  VIXL_ASSERT(imm.FitsInLane(zd));
74
75  int imm8;
76  int shift = -1;
77  if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) ||
78      imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) {
79    MovprfxHelperScope guard(this, zd, zn);
80    switch (option) {
81      case kAddImmediate:
82        add(zd, zd, imm8, shift);
83        return true;
84      case kSubImmediate:
85        sub(zd, zd, imm8, shift);
86        return true;
87    }
88  }
89  return false;
90}
91
92void MacroAssembler::IntWideImmHelper(IntArithImmFn imm_fn,
93                                      SVEArithPredicatedFn reg_macro,
94                                      const ZRegister& zd,
95                                      const ZRegister& zn,
96                                      IntegerOperand imm,
97                                      bool is_signed) {
98  if (is_signed) {
99    // E.g. MUL_z_zi, SMIN_z_zi, SMAX_z_zi
100    if (imm.IsInt8()) {
101      MovprfxHelperScope guard(this, zd, zn);
102      (this->*imm_fn)(zd, zd, imm.AsInt8());
103      return;
104    }
105  } else {
106    // E.g. UMIN_z_zi, UMAX_z_zi
107    if (imm.IsUint8()) {
108      MovprfxHelperScope guard(this, zd, zn);
109      (this->*imm_fn)(zd, zd, imm.AsUint8());
110      return;
111    }
112  }
113
114  UseScratchRegisterScope temps(this);
115  PRegister pg = temps.AcquireGoverningP();
116  Ptrue(pg.WithSameLaneSizeAs(zd));
117
118  // Try to re-use zd if we can, so we can avoid a movprfx.
119  ZRegister scratch =
120      zd.Aliases(zn) ? temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits())
121                     : zd;
122  Dup(scratch, imm);
123
124  // The vector-form macro for commutative operations will swap the arguments to
125  // avoid movprfx, if necessary.
126  (this->*reg_macro)(zd, pg.Merging(), zn, scratch);
127}
128
129void MacroAssembler::Mul(const ZRegister& zd,
130                         const ZRegister& zn,
131                         IntegerOperand imm) {
132  VIXL_ASSERT(allow_macro_instructions_);
133  IntArithImmFn imm_fn = &Assembler::mul;
134  SVEArithPredicatedFn reg_fn = &MacroAssembler::Mul;
135  IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
136}
137
138void MacroAssembler::Smin(const ZRegister& zd,
139                          const ZRegister& zn,
140                          IntegerOperand imm) {
141  VIXL_ASSERT(allow_macro_instructions_);
142  VIXL_ASSERT(imm.FitsInSignedLane(zd));
143  IntArithImmFn imm_fn = &Assembler::smin;
144  SVEArithPredicatedFn reg_fn = &MacroAssembler::Smin;
145  IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
146}
147
148void MacroAssembler::Smax(const ZRegister& zd,
149                          const ZRegister& zn,
150                          IntegerOperand imm) {
151  VIXL_ASSERT(allow_macro_instructions_);
152  VIXL_ASSERT(imm.FitsInSignedLane(zd));
153  IntArithImmFn imm_fn = &Assembler::smax;
154  SVEArithPredicatedFn reg_fn = &MacroAssembler::Smax;
155  IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
156}
157
158void MacroAssembler::Umax(const ZRegister& zd,
159                          const ZRegister& zn,
160                          IntegerOperand imm) {
161  VIXL_ASSERT(allow_macro_instructions_);
162  VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
163  IntArithImmFn imm_fn = &Assembler::umax;
164  SVEArithPredicatedFn reg_fn = &MacroAssembler::Umax;
165  IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
166}
167
168void MacroAssembler::Umin(const ZRegister& zd,
169                          const ZRegister& zn,
170                          IntegerOperand imm) {
171  VIXL_ASSERT(allow_macro_instructions_);
172  VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
173  IntArithImmFn imm_fn = &Assembler::umin;
174  SVEArithPredicatedFn reg_fn = &MacroAssembler::Umin;
175  IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
176}
177
178void MacroAssembler::Addpl(const Register& xd,
179                           const Register& xn,
180                           int64_t multiplier) {
181  VIXL_ASSERT(allow_macro_instructions_);
182
183  // This macro relies on `Rdvl` to handle some out-of-range cases. Check that
184  // `VL * multiplier` cannot overflow, for any possible value of VL.
185  VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes));
186  VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes));
187
188  if (xd.IsZero()) return;
189  if (xn.IsZero() && xd.IsSP()) {
190    // TODO: This operation doesn't make much sense, but we could support it
191    // with a scratch register if necessary.
192    VIXL_UNIMPLEMENTED();
193  }
194
195  // Handling xzr requires an extra move, so defer it until later so we can try
196  // to use `rdvl` instead (via `Addvl`).
197  if (IsInt6(multiplier) && !xn.IsZero()) {
198    SingleEmissionCheckScope guard(this);
199    addpl(xd, xn, static_cast<int>(multiplier));
200    return;
201  }
202
203  // If `multiplier` is a multiple of 8, we can use `Addvl` instead.
204  if ((multiplier % kZRegBitsPerPRegBit) == 0) {
205    Addvl(xd, xn, multiplier / kZRegBitsPerPRegBit);
206    return;
207  }
208
209  if (IsInt6(multiplier)) {
210    VIXL_ASSERT(xn.IsZero());  // Other cases were handled with `addpl`.
211    // There is no simple `rdpl` instruction, and `addpl` cannot accept xzr, so
212    // materialise a zero.
213    MacroEmissionCheckScope guard(this);
214    movz(xd, 0);
215    addpl(xd, xd, static_cast<int>(multiplier));
216    return;
217  }
218
219  // TODO: Some probable cases result in rather long sequences. For example,
220  // `Addpl(sp, sp, 33)` requires five instructions, even though it's only just
221  // outside the encodable range. We should look for ways to cover such cases
222  // without drastically increasing the complexity of this logic.
223
224  // For other cases, calculate xn + (PL * multiplier) using discrete
225  // instructions. This requires two scratch registers in the general case, so
226  // try to re-use the destination as a scratch register.
227  UseScratchRegisterScope temps(this);
228  temps.Include(xd);
229  temps.Exclude(xn);
230
231  Register scratch = temps.AcquireX();
232  // Because there is no `rdpl`, so we have to calculate PL from VL. We can't
233  // scale the multiplier because (we already know) it isn't a multiple of 8.
234  Rdvl(scratch, multiplier);
235
236  MacroEmissionCheckScope guard(this);
237  if (xn.IsZero()) {
238    asr(xd, scratch, kZRegBitsPerPRegBitLog2);
239  } else if (xd.IsSP() || xn.IsSP()) {
240    // TODO: MacroAssembler::Add should be able to handle this.
241    asr(scratch, scratch, kZRegBitsPerPRegBitLog2);
242    add(xd, xn, scratch);
243  } else {
244    add(xd, xn, Operand(scratch, ASR, kZRegBitsPerPRegBitLog2));
245  }
246}
247
248void MacroAssembler::Addvl(const Register& xd,
249                           const Register& xn,
250                           int64_t multiplier) {
251  VIXL_ASSERT(allow_macro_instructions_);
252  VIXL_ASSERT(xd.IsX());
253  VIXL_ASSERT(xn.IsX());
254
255  // Check that `VL * multiplier` cannot overflow, for any possible value of VL.
256  VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes));
257  VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes));
258
259  if (xd.IsZero()) return;
260  if (xn.IsZero() && xd.IsSP()) {
261    // TODO: This operation doesn't make much sense, but we could support it
262    // with a scratch register if necessary. `rdvl` cannot write into `sp`.
263    VIXL_UNIMPLEMENTED();
264  }
265
266  if (IsInt6(multiplier)) {
267    SingleEmissionCheckScope guard(this);
268    if (xn.IsZero()) {
269      rdvl(xd, static_cast<int>(multiplier));
270    } else {
271      addvl(xd, xn, static_cast<int>(multiplier));
272    }
273    return;
274  }
275
276  // TODO: Some probable cases result in rather long sequences. For example,
277  // `Addvl(sp, sp, 42)` requires four instructions, even though it's only just
278  // outside the encodable range. We should look for ways to cover such cases
279  // without drastically increasing the complexity of this logic.
280
281  // For other cases, calculate xn + (VL * multiplier) using discrete
282  // instructions. This requires two scratch registers in the general case, so
283  // we try to re-use the destination as a scratch register.
284  UseScratchRegisterScope temps(this);
285  temps.Include(xd);
286  temps.Exclude(xn);
287
288  Register a = temps.AcquireX();
289  Mov(a, multiplier);
290
291  MacroEmissionCheckScope guard(this);
292  Register b = temps.AcquireX();
293  rdvl(b, 1);
294  if (xn.IsZero()) {
295    mul(xd, a, b);
296  } else if (xd.IsSP() || xn.IsSP()) {
297    mul(a, a, b);
298    add(xd, xn, a);
299  } else {
300    madd(xd, a, b, xn);
301  }
302}
303
304void MacroAssembler::CalculateSVEAddress(const Register& xd,
305                                         const SVEMemOperand& addr,
306                                         int vl_divisor_log2) {
307  VIXL_ASSERT(allow_macro_instructions_);
308  VIXL_ASSERT(!addr.IsScatterGather());
309  VIXL_ASSERT(xd.IsX());
310
311  // The lower bound is where a whole Z register is accessed.
312  VIXL_ASSERT(!addr.IsMulVl() || (vl_divisor_log2 >= 0));
313  // The upper bound is for P register accesses, and for instructions like
314  // "st1b { z0.d } [...]", where one byte is accessed for every D-sized lane.
315  VIXL_ASSERT(vl_divisor_log2 <= static_cast<int>(kZRegBitsPerPRegBitLog2));
316
317  SVEOffsetModifier mod = addr.GetOffsetModifier();
318  Register base = addr.GetScalarBase();
319
320  if (addr.IsEquivalentToScalar()) {
321    // For example:
322    //   [x0]
323    //   [x0, #0]
324    //   [x0, xzr, LSL 2]
325    Mov(xd, base);
326  } else if (addr.IsScalarPlusImmediate()) {
327    // For example:
328    //   [x0, #42]
329    //   [x0, #42, MUL VL]
330    int64_t offset = addr.GetImmediateOffset();
331    VIXL_ASSERT(offset != 0);  // Handled by IsEquivalentToScalar.
332    if (addr.IsMulVl()) {
333      int vl_divisor = 1 << vl_divisor_log2;
334      // For all possible values of vl_divisor, we can simply use `Addpl`. This
335      // will select `addvl` if necessary.
336      VIXL_ASSERT((kZRegBitsPerPRegBit % vl_divisor) == 0);
337      Addpl(xd, base, offset * (kZRegBitsPerPRegBit / vl_divisor));
338    } else {
339      // IsScalarPlusImmediate() ensures that no other modifiers can occur.
340      VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
341      Add(xd, base, offset);
342    }
343  } else if (addr.IsScalarPlusScalar()) {
344    // For example:
345    //   [x0, x1]
346    //   [x0, x1, LSL #4]
347    Register offset = addr.GetScalarOffset();
348    VIXL_ASSERT(!offset.IsZero());  // Handled by IsEquivalentToScalar.
349    if (mod == SVE_LSL) {
350      Add(xd, base, Operand(offset, LSL, addr.GetShiftAmount()));
351    } else {
352      // IsScalarPlusScalar() ensures that no other modifiers can occur.
353      VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
354      Add(xd, base, offset);
355    }
356  } else {
357    // All other forms are scatter-gather addresses, which cannot be evaluated
358    // into an X register.
359    VIXL_UNREACHABLE();
360  }
361}
362
363void MacroAssembler::Cpy(const ZRegister& zd,
364                         const PRegister& pg,
365                         IntegerOperand imm) {
366  VIXL_ASSERT(allow_macro_instructions_);
367  VIXL_ASSERT(imm.FitsInLane(zd));
368  int imm8;
369  int shift;
370  if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
371      imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
372    SingleEmissionCheckScope guard(this);
373    cpy(zd, pg, imm8, shift);
374    return;
375  }
376
377  // The fallbacks rely on `cpy` variants that only support merging predication.
378  // If zeroing predication was requested, zero the destination first.
379  if (pg.IsZeroing()) {
380    SingleEmissionCheckScope guard(this);
381    dup(zd, 0);
382  }
383  PRegisterM pg_m = pg.Merging();
384
385  // Try to encode the immediate using fcpy.
386  VIXL_ASSERT(imm.FitsInLane(zd));
387  if (zd.GetLaneSizeInBits() >= kHRegSize) {
388    double fp_imm = 0.0;
389    switch (zd.GetLaneSizeInBits()) {
390      case kHRegSize:
391        fp_imm =
392            FPToDouble(RawbitsToFloat16(imm.AsUint16()), kIgnoreDefaultNaN);
393        break;
394      case kSRegSize:
395        fp_imm = RawbitsToFloat(imm.AsUint32());
396        break;
397      case kDRegSize:
398        fp_imm = RawbitsToDouble(imm.AsUint64());
399        break;
400      default:
401        VIXL_UNREACHABLE();
402        break;
403    }
404    // IsImmFP64 is equivalent to IsImmFP<n> for the same arithmetic value, so
405    // we can use IsImmFP64 for all lane sizes.
406    if (IsImmFP64(fp_imm)) {
407      SingleEmissionCheckScope guard(this);
408      fcpy(zd, pg_m, fp_imm);
409      return;
410    }
411  }
412
413  // Fall back to using a scratch register.
414  UseScratchRegisterScope temps(this);
415  Register scratch = temps.AcquireRegisterToHoldLane(zd);
416  Mov(scratch, imm);
417
418  SingleEmissionCheckScope guard(this);
419  cpy(zd, pg_m, scratch);
420}
421
422// TODO: We implement Fcpy (amongst other things) for all FP types because it
423// allows us to preserve user-specified NaNs. We should come up with some
424// FPImmediate type to abstract this, and avoid all the duplication below (and
425// elsewhere).
426
427void MacroAssembler::Fcpy(const ZRegister& zd,
428                          const PRegisterM& pg,
429                          double imm) {
430  VIXL_ASSERT(allow_macro_instructions_);
431  VIXL_ASSERT(pg.IsMerging());
432
433  if (IsImmFP64(imm)) {
434    SingleEmissionCheckScope guard(this);
435    fcpy(zd, pg, imm);
436    return;
437  }
438
439  // As a fall-back, cast the immediate to the required lane size, and try to
440  // encode the bit pattern using `Cpy`.
441  Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
442}
443
444void MacroAssembler::Fcpy(const ZRegister& zd,
445                          const PRegisterM& pg,
446                          float imm) {
447  VIXL_ASSERT(allow_macro_instructions_);
448  VIXL_ASSERT(pg.IsMerging());
449
450  if (IsImmFP32(imm)) {
451    SingleEmissionCheckScope guard(this);
452    fcpy(zd, pg, imm);
453    return;
454  }
455
456  // As a fall-back, cast the immediate to the required lane size, and try to
457  // encode the bit pattern using `Cpy`.
458  Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
459}
460
461void MacroAssembler::Fcpy(const ZRegister& zd,
462                          const PRegisterM& pg,
463                          Float16 imm) {
464  VIXL_ASSERT(allow_macro_instructions_);
465  VIXL_ASSERT(pg.IsMerging());
466
467  if (IsImmFP16(imm)) {
468    SingleEmissionCheckScope guard(this);
469    fcpy(zd, pg, imm);
470    return;
471  }
472
473  // As a fall-back, cast the immediate to the required lane size, and try to
474  // encode the bit pattern using `Cpy`.
475  Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
476}
477
478void MacroAssembler::Dup(const ZRegister& zd, IntegerOperand imm) {
479  VIXL_ASSERT(allow_macro_instructions_);
480  VIXL_ASSERT(imm.FitsInLane(zd));
481  unsigned lane_size = zd.GetLaneSizeInBits();
482  int imm8;
483  int shift;
484  if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
485      imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
486    SingleEmissionCheckScope guard(this);
487    dup(zd, imm8, shift);
488  } else if (IsImmLogical(imm.AsUintN(lane_size), lane_size)) {
489    SingleEmissionCheckScope guard(this);
490    dupm(zd, imm.AsUintN(lane_size));
491  } else {
492    UseScratchRegisterScope temps(this);
493    Register scratch = temps.AcquireRegisterToHoldLane(zd);
494    Mov(scratch, imm);
495
496    SingleEmissionCheckScope guard(this);
497    dup(zd, scratch);
498  }
499}
500
501void MacroAssembler::NoncommutativeArithmeticHelper(
502    const ZRegister& zd,
503    const PRegisterM& pg,
504    const ZRegister& zn,
505    const ZRegister& zm,
506    SVEArithPredicatedFn fn,
507    SVEArithPredicatedFn rev_fn) {
508  if (zd.Aliases(zn)) {
509    // E.g. zd = zd / zm
510    SingleEmissionCheckScope guard(this);
511    (this->*fn)(zd, pg, zn, zm);
512  } else if (zd.Aliases(zm)) {
513    // E.g. zd = zn / zd
514    SingleEmissionCheckScope guard(this);
515    (this->*rev_fn)(zd, pg, zm, zn);
516  } else {
517    // E.g. zd = zn / zm
518    MovprfxHelperScope guard(this, zd, pg, zn);
519    (this->*fn)(zd, pg, zd, zm);
520  }
521}
522
523void MacroAssembler::FPCommutativeArithmeticHelper(
524    const ZRegister& zd,
525    const PRegisterM& pg,
526    const ZRegister& zn,
527    const ZRegister& zm,
528    SVEArithPredicatedFn fn,
529    FPMacroNaNPropagationOption nan_option) {
530  ResolveFPNaNPropagationOption(&nan_option);
531
532  if (zd.Aliases(zn)) {
533    SingleEmissionCheckScope guard(this);
534    (this->*fn)(zd, pg, zd, zm);
535  } else if (zd.Aliases(zm)) {
536    switch (nan_option) {
537      case FastNaNPropagation: {
538        // Swap the arguments.
539        SingleEmissionCheckScope guard(this);
540        (this->*fn)(zd, pg, zd, zn);
541        return;
542      }
543      case StrictNaNPropagation: {
544        UseScratchRegisterScope temps(this);
545        // Use a scratch register to keep the argument order exactly as
546        // specified.
547        ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn);
548        {
549          MovprfxHelperScope guard(this, scratch, pg, zn);
550          (this->*fn)(scratch, pg, scratch, zm);
551        }
552        Mov(zd, scratch);
553        return;
554      }
555      case NoFPMacroNaNPropagationSelected:
556        VIXL_UNREACHABLE();
557        return;
558    }
559  } else {
560    MovprfxHelperScope guard(this, zd, pg, zn);
561    (this->*fn)(zd, pg, zd, zm);
562  }
563}
564
565// Instructions of the form "inst zda, zn, zm, #num", where they are
566// non-commutative and no reversed form is provided.
567#define VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(V) \
568  V(Cmla, cmla)                              \
569  V(Sqrdcmlah, sqrdcmlah)
570
571#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN)                     \
572  void MacroAssembler::MASMFN(const ZRegister& zd,               \
573                              const ZRegister& za,               \
574                              const ZRegister& zn,               \
575                              const ZRegister& zm,               \
576                              int imm) {                         \
577    if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \
578      UseScratchRegisterScope temps(this);                       \
579      VIXL_ASSERT(AreSameLaneSize(zn, zm));                      \
580      ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);  \
581      Mov(ztmp, zd.Aliases(zn) ? zn : zm);                       \
582      MovprfxHelperScope guard(this, zd, za);                    \
583      ASMFN(zd,                                                  \
584            (zd.Aliases(zn) ? ztmp : zn),                        \
585            (zd.Aliases(zm) ? ztmp : zm),                        \
586            imm);                                                \
587    } else {                                                     \
588      MovprfxHelperScope guard(this, zd, za);                    \
589      ASMFN(zd, zn, zm, imm);                                    \
590    }                                                            \
591  }
592VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(VIXL_DEFINE_MASM_FUNC)
593#undef VIXL_DEFINE_MASM_FUNC
594
595// Instructions of the form "inst zda, zn, zm, #num, #num", where they are
596// non-commutative and no reversed form is provided.
597#define VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(V) \
598  V(Cmla, cmla)                               \
599  V(Sqrdcmlah, sqrdcmlah)
600
601// This doesn't handle zm when it's out of the range that can be encoded in
602// instruction. The range depends on element size: z0-z7 for H, z0-15 for S.
603#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN)                     \
604  void MacroAssembler::MASMFN(const ZRegister& zd,               \
605                              const ZRegister& za,               \
606                              const ZRegister& zn,               \
607                              const ZRegister& zm,               \
608                              int index,                         \
609                              int rot) {                         \
610    if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \
611      UseScratchRegisterScope temps(this);                       \
612      ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);  \
613      {                                                          \
614        MovprfxHelperScope guard(this, ztmp, za);                \
615        ASMFN(ztmp, zn, zm, index, rot);                         \
616      }                                                          \
617      Mov(zd, ztmp);                                             \
618    } else {                                                     \
619      MovprfxHelperScope guard(this, zd, za);                    \
620      ASMFN(zd, zn, zm, index, rot);                             \
621    }                                                            \
622  }
623VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(VIXL_DEFINE_MASM_FUNC)
624#undef VIXL_DEFINE_MASM_FUNC
625
626// Instructions of the form "inst zda, pg, zda, zn", where they are
627// non-commutative and no reversed form is provided.
628#define VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(V) \
629  V(Addp, addp)                             \
630  V(Bic, bic)                               \
631  V(Faddp, faddp)                           \
632  V(Fmaxnmp, fmaxnmp)                       \
633  V(Fminnmp, fminnmp)                       \
634  V(Fmaxp, fmaxp)                           \
635  V(Fminp, fminp)                           \
636  V(Fscale, fscale)                         \
637  V(Smaxp, smaxp)                           \
638  V(Sminp, sminp)                           \
639  V(Suqadd, suqadd)                         \
640  V(Umaxp, umaxp)                           \
641  V(Uminp, uminp)                           \
642  V(Usqadd, usqadd)
643
644#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN)                       \
645  void MacroAssembler::MASMFN(const ZRegister& zd,                 \
646                              const PRegisterM& pg,                \
647                              const ZRegister& zn,                 \
648                              const ZRegister& zm) {               \
649    VIXL_ASSERT(allow_macro_instructions_);                        \
650    if (zd.Aliases(zm) && !zd.Aliases(zn)) {                       \
651      UseScratchRegisterScope temps(this);                         \
652      ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); \
653      Mov(scratch, zm);                                            \
654      MovprfxHelperScope guard(this, zd, pg, zn);                  \
655      ASMFN(zd, pg, zd, scratch);                                  \
656    } else {                                                       \
657      MovprfxHelperScope guard(this, zd, pg, zn);                  \
658      ASMFN(zd, pg, zd, zm);                                       \
659    }                                                              \
660  }
661VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC)
662#undef VIXL_DEFINE_MASM_FUNC
663
664// Instructions of the form "inst zda, pg, zda, zn", where they are
665// non-commutative and a reversed form is provided.
666#define VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(V) \
667  V(Asr, asr)                                       \
668  V(Fdiv, fdiv)                                     \
669  V(Fsub, fsub)                                     \
670  V(Lsl, lsl)                                       \
671  V(Lsr, lsr)                                       \
672  V(Sdiv, sdiv)                                     \
673  V(Shsub, shsub)                                   \
674  V(Sqrshl, sqrshl)                                 \
675  V(Sqshl, sqshl)                                   \
676  V(Sqsub, sqsub)                                   \
677  V(Srshl, srshl)                                   \
678  V(Sub, sub)                                       \
679  V(Udiv, udiv)                                     \
680  V(Uhsub, uhsub)                                   \
681  V(Uqrshl, uqrshl)                                 \
682  V(Uqshl, uqshl)                                   \
683  V(Uqsub, uqsub)                                   \
684  V(Urshl, urshl)
685
686#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN)                          \
687  void MacroAssembler::MASMFN(const ZRegister& zd,                    \
688                              const PRegisterM& pg,                   \
689                              const ZRegister& zn,                    \
690                              const ZRegister& zm) {                  \
691    VIXL_ASSERT(allow_macro_instructions_);                           \
692    NoncommutativeArithmeticHelper(zd,                                \
693                                   pg,                                \
694                                   zn,                                \
695                                   zm,                                \
696                                   static_cast<SVEArithPredicatedFn>( \
697                                       &Assembler::ASMFN),            \
698                                   static_cast<SVEArithPredicatedFn>( \
699                                       &Assembler::ASMFN##r));        \
700  }
701VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC)
702#undef VIXL_DEFINE_MASM_FUNC
703
704void MacroAssembler::Fadd(const ZRegister& zd,
705                          const PRegisterM& pg,
706                          const ZRegister& zn,
707                          const ZRegister& zm,
708                          FPMacroNaNPropagationOption nan_option) {
709  VIXL_ASSERT(allow_macro_instructions_);
710  FPCommutativeArithmeticHelper(zd,
711                                pg,
712                                zn,
713                                zm,
714                                static_cast<SVEArithPredicatedFn>(
715                                    &Assembler::fadd),
716                                nan_option);
717}
718
719void MacroAssembler::Fabd(const ZRegister& zd,
720                          const PRegisterM& pg,
721                          const ZRegister& zn,
722                          const ZRegister& zm,
723                          FPMacroNaNPropagationOption nan_option) {
724  VIXL_ASSERT(allow_macro_instructions_);
725  FPCommutativeArithmeticHelper(zd,
726                                pg,
727                                zn,
728                                zm,
729                                static_cast<SVEArithPredicatedFn>(
730                                    &Assembler::fabd),
731                                nan_option);
732}
733
734void MacroAssembler::Fmul(const ZRegister& zd,
735                          const PRegisterM& pg,
736                          const ZRegister& zn,
737                          const ZRegister& zm,
738                          FPMacroNaNPropagationOption nan_option) {
739  VIXL_ASSERT(allow_macro_instructions_);
740  FPCommutativeArithmeticHelper(zd,
741                                pg,
742                                zn,
743                                zm,
744                                static_cast<SVEArithPredicatedFn>(
745                                    &Assembler::fmul),
746                                nan_option);
747}
748
749void MacroAssembler::Fmulx(const ZRegister& zd,
750                           const PRegisterM& pg,
751                           const ZRegister& zn,
752                           const ZRegister& zm,
753                           FPMacroNaNPropagationOption nan_option) {
754  VIXL_ASSERT(allow_macro_instructions_);
755  FPCommutativeArithmeticHelper(zd,
756                                pg,
757                                zn,
758                                zm,
759                                static_cast<SVEArithPredicatedFn>(
760                                    &Assembler::fmulx),
761                                nan_option);
762}
763
764void MacroAssembler::Fmax(const ZRegister& zd,
765                          const PRegisterM& pg,
766                          const ZRegister& zn,
767                          const ZRegister& zm,
768                          FPMacroNaNPropagationOption nan_option) {
769  VIXL_ASSERT(allow_macro_instructions_);
770  FPCommutativeArithmeticHelper(zd,
771                                pg,
772                                zn,
773                                zm,
774                                static_cast<SVEArithPredicatedFn>(
775                                    &Assembler::fmax),
776                                nan_option);
777}
778
779void MacroAssembler::Fmin(const ZRegister& zd,
780                          const PRegisterM& pg,
781                          const ZRegister& zn,
782                          const ZRegister& zm,
783                          FPMacroNaNPropagationOption nan_option) {
784  VIXL_ASSERT(allow_macro_instructions_);
785  FPCommutativeArithmeticHelper(zd,
786                                pg,
787                                zn,
788                                zm,
789                                static_cast<SVEArithPredicatedFn>(
790                                    &Assembler::fmin),
791                                nan_option);
792}
793
794void MacroAssembler::Fmaxnm(const ZRegister& zd,
795                            const PRegisterM& pg,
796                            const ZRegister& zn,
797                            const ZRegister& zm,
798                            FPMacroNaNPropagationOption nan_option) {
799  VIXL_ASSERT(allow_macro_instructions_);
800  FPCommutativeArithmeticHelper(zd,
801                                pg,
802                                zn,
803                                zm,
804                                static_cast<SVEArithPredicatedFn>(
805                                    &Assembler::fmaxnm),
806                                nan_option);
807}
808
809void MacroAssembler::Fminnm(const ZRegister& zd,
810                            const PRegisterM& pg,
811                            const ZRegister& zn,
812                            const ZRegister& zm,
813                            FPMacroNaNPropagationOption nan_option) {
814  VIXL_ASSERT(allow_macro_instructions_);
815  FPCommutativeArithmeticHelper(zd,
816                                pg,
817                                zn,
818                                zm,
819                                static_cast<SVEArithPredicatedFn>(
820                                    &Assembler::fminnm),
821                                nan_option);
822}
823
824void MacroAssembler::Fdup(const ZRegister& zd, double imm) {
825  VIXL_ASSERT(allow_macro_instructions_);
826
827  switch (zd.GetLaneSizeInBits()) {
828    case kHRegSize:
829      Fdup(zd, Float16(imm));
830      break;
831    case kSRegSize:
832      Fdup(zd, static_cast<float>(imm));
833      break;
834    case kDRegSize:
835      uint64_t bits = DoubleToRawbits(imm);
836      if (IsImmFP64(bits)) {
837        SingleEmissionCheckScope guard(this);
838        fdup(zd, imm);
839      } else {
840        Dup(zd, bits);
841      }
842      break;
843  }
844}
845
846void MacroAssembler::Fdup(const ZRegister& zd, float imm) {
847  VIXL_ASSERT(allow_macro_instructions_);
848
849  switch (zd.GetLaneSizeInBits()) {
850    case kHRegSize:
851      Fdup(zd, Float16(imm));
852      break;
853    case kSRegSize:
854      if (IsImmFP32(imm)) {
855        SingleEmissionCheckScope guard(this);
856        fdup(zd, imm);
857      } else {
858        Dup(zd, FloatToRawbits(imm));
859      }
860      break;
861    case kDRegSize:
862      Fdup(zd, static_cast<double>(imm));
863      break;
864  }
865}
866
867void MacroAssembler::Fdup(const ZRegister& zd, Float16 imm) {
868  VIXL_ASSERT(allow_macro_instructions_);
869
870  switch (zd.GetLaneSizeInBits()) {
871    case kHRegSize:
872      if (IsImmFP16(imm)) {
873        SingleEmissionCheckScope guard(this);
874        fdup(zd, imm);
875      } else {
876        Dup(zd, Float16ToRawbits(imm));
877      }
878      break;
879    case kSRegSize:
880      Fdup(zd, FPToFloat(imm, kIgnoreDefaultNaN));
881      break;
882    case kDRegSize:
883      Fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
884      break;
885  }
886}
887
888void MacroAssembler::Index(const ZRegister& zd,
889                           const Operand& start,
890                           const Operand& step) {
891  class IndexOperand : public Operand {
892   public:
893    static IndexOperand Prepare(MacroAssembler* masm,
894                                UseScratchRegisterScope* temps,
895                                const Operand& op,
896                                const ZRegister& zd_inner) {
897      // Look for encodable immediates.
898      int imm;
899      if (op.IsImmediate()) {
900        if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd_inner, &imm)) {
901          return IndexOperand(imm);
902        }
903        Register scratch = temps->AcquireRegisterToHoldLane(zd_inner);
904        masm->Mov(scratch, op);
905        return IndexOperand(scratch);
906      } else {
907        // Plain registers can be encoded directly.
908        VIXL_ASSERT(op.IsPlainRegister());
909        return IndexOperand(op.GetRegister());
910      }
911    }
912
913    int GetImm5() const {
914      int64_t imm = GetImmediate();
915      VIXL_ASSERT(IsInt5(imm));
916      return static_cast<int>(imm);
917    }
918
919   private:
920    explicit IndexOperand(const Register& reg) : Operand(reg) {}
921    explicit IndexOperand(int64_t imm) : Operand(imm) {}
922  };
923
924  UseScratchRegisterScope temps(this);
925  IndexOperand start_enc = IndexOperand::Prepare(this, &temps, start, zd);
926  IndexOperand step_enc = IndexOperand::Prepare(this, &temps, step, zd);
927
928  SingleEmissionCheckScope guard(this);
929  if (start_enc.IsImmediate()) {
930    if (step_enc.IsImmediate()) {
931      index(zd, start_enc.GetImm5(), step_enc.GetImm5());
932    } else {
933      index(zd, start_enc.GetImm5(), step_enc.GetRegister());
934    }
935  } else {
936    if (step_enc.IsImmediate()) {
937      index(zd, start_enc.GetRegister(), step_enc.GetImm5());
938    } else {
939      index(zd, start_enc.GetRegister(), step_enc.GetRegister());
940    }
941  }
942}
943
944void MacroAssembler::Insr(const ZRegister& zdn, IntegerOperand imm) {
945  VIXL_ASSERT(allow_macro_instructions_);
946  VIXL_ASSERT(imm.FitsInLane(zdn));
947
948  if (imm.IsZero()) {
949    SingleEmissionCheckScope guard(this);
950    insr(zdn, xzr);
951    return;
952  }
953
954  UseScratchRegisterScope temps(this);
955  Register scratch = temps.AcquireRegisterToHoldLane(zdn);
956
957  // TODO: There are many cases where we could optimise immediates, such as by
958  // detecting repeating patterns or FP immediates. We should optimise and
959  // abstract this for use in other SVE mov-immediate-like macros.
960  Mov(scratch, imm);
961
962  SingleEmissionCheckScope guard(this);
963  insr(zdn, scratch);
964}
965
966void MacroAssembler::Mla(const ZRegister& zd,
967                         const PRegisterM& pg,
968                         const ZRegister& za,
969                         const ZRegister& zn,
970                         const ZRegister& zm) {
971  VIXL_ASSERT(allow_macro_instructions_);
972  if (zd.Aliases(za)) {
973    // zda = zda + (zn * zm)
974    SingleEmissionCheckScope guard(this);
975    mla(zd, pg, zn, zm);
976  } else if (zd.Aliases(zn)) {
977    // zdn = za + (zdn * zm)
978    SingleEmissionCheckScope guard(this);
979    mad(zd, pg, zm, za);
980  } else if (zd.Aliases(zm)) {
981    // Multiplication is commutative, so we can swap zn and zm.
982    // zdm = za + (zdm * zn)
983    SingleEmissionCheckScope guard(this);
984    mad(zd, pg, zn, za);
985  } else {
986    // zd = za + (zn * zm)
987    ExactAssemblyScope guard(this, 2 * kInstructionSize);
988    movprfx(zd, pg, za);
989    mla(zd, pg, zn, zm);
990  }
991}
992
993void MacroAssembler::Mls(const ZRegister& zd,
994                         const PRegisterM& pg,
995                         const ZRegister& za,
996                         const ZRegister& zn,
997                         const ZRegister& zm) {
998  VIXL_ASSERT(allow_macro_instructions_);
999  if (zd.Aliases(za)) {
1000    // zda = zda - (zn * zm)
1001    SingleEmissionCheckScope guard(this);
1002    mls(zd, pg, zn, zm);
1003  } else if (zd.Aliases(zn)) {
1004    // zdn = za - (zdn * zm)
1005    SingleEmissionCheckScope guard(this);
1006    msb(zd, pg, zm, za);
1007  } else if (zd.Aliases(zm)) {
1008    // Multiplication is commutative, so we can swap zn and zm.
1009    // zdm = za - (zdm * zn)
1010    SingleEmissionCheckScope guard(this);
1011    msb(zd, pg, zn, za);
1012  } else {
1013    // zd = za - (zn * zm)
1014    ExactAssemblyScope guard(this, 2 * kInstructionSize);
1015    movprfx(zd, pg, za);
1016    mls(zd, pg, zn, zm);
1017  }
1018}
1019
1020void MacroAssembler::CompareHelper(Condition cond,
1021                                   const PRegisterWithLaneSize& pd,
1022                                   const PRegisterZ& pg,
1023                                   const ZRegister& zn,
1024                                   IntegerOperand imm) {
1025  UseScratchRegisterScope temps(this);
1026  ZRegister zm = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
1027  Dup(zm, imm);
1028  SingleEmissionCheckScope guard(this);
1029  cmp(cond, pd, pg, zn, zm);
1030}
1031
1032void MacroAssembler::Pfirst(const PRegisterWithLaneSize& pd,
1033                            const PRegister& pg,
1034                            const PRegisterWithLaneSize& pn) {
1035  VIXL_ASSERT(allow_macro_instructions_);
1036  VIXL_ASSERT(pd.IsLaneSizeB());
1037  VIXL_ASSERT(pn.IsLaneSizeB());
1038  if (pd.Is(pn)) {
1039    SingleEmissionCheckScope guard(this);
1040    pfirst(pd, pg, pn);
1041  } else {
1042    UseScratchRegisterScope temps(this);
1043    PRegister temp_pg = pg;
1044    if (pd.Aliases(pg)) {
1045      temp_pg = temps.AcquireP();
1046      Mov(temp_pg.VnB(), pg.VnB());
1047    }
1048    Mov(pd, pn);
1049    SingleEmissionCheckScope guard(this);
1050    pfirst(pd, temp_pg, pd);
1051  }
1052}
1053
1054void MacroAssembler::Pnext(const PRegisterWithLaneSize& pd,
1055                           const PRegister& pg,
1056                           const PRegisterWithLaneSize& pn) {
1057  VIXL_ASSERT(allow_macro_instructions_);
1058  VIXL_ASSERT(AreSameFormat(pd, pn));
1059  if (pd.Is(pn)) {
1060    SingleEmissionCheckScope guard(this);
1061    pnext(pd, pg, pn);
1062  } else {
1063    UseScratchRegisterScope temps(this);
1064    PRegister temp_pg = pg;
1065    if (pd.Aliases(pg)) {
1066      temp_pg = temps.AcquireP();
1067      Mov(temp_pg.VnB(), pg.VnB());
1068    }
1069    Mov(pd.VnB(), pn.VnB());
1070    SingleEmissionCheckScope guard(this);
1071    pnext(pd, temp_pg, pd);
1072  }
1073}
1074
1075void MacroAssembler::Ptrue(const PRegisterWithLaneSize& pd,
1076                           SVEPredicateConstraint pattern,
1077                           FlagsUpdate s) {
1078  VIXL_ASSERT(allow_macro_instructions_);
1079  switch (s) {
1080    case LeaveFlags:
1081      Ptrue(pd, pattern);
1082      return;
1083    case SetFlags:
1084      Ptrues(pd, pattern);
1085      return;
1086  }
1087  VIXL_UNREACHABLE();
1088}
1089
1090void MacroAssembler::Sub(const ZRegister& zd,
1091                         IntegerOperand imm,
1092                         const ZRegister& zm) {
1093  VIXL_ASSERT(allow_macro_instructions_);
1094
1095  int imm8;
1096  int shift = -1;
1097  if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) ||
1098      imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) {
1099    MovprfxHelperScope guard(this, zd, zm);
1100    subr(zd, zd, imm8, shift);
1101  } else {
1102    UseScratchRegisterScope temps(this);
1103    ZRegister scratch = temps.AcquireZ().WithLaneSize(zm.GetLaneSizeInBits());
1104    Dup(scratch, imm);
1105
1106    SingleEmissionCheckScope guard(this);
1107    sub(zd, scratch, zm);
1108  }
1109}
1110
1111void MacroAssembler::SVELoadBroadcastImmHelper(const ZRegister& zt,
1112                                               const PRegisterZ& pg,
1113                                               const SVEMemOperand& addr,
1114                                               SVELoadBroadcastFn fn,
1115                                               int divisor) {
1116  VIXL_ASSERT(addr.IsScalarPlusImmediate());
1117  int64_t imm = addr.GetImmediateOffset();
1118  if ((imm % divisor == 0) && IsUint6(imm / divisor)) {
1119    SingleEmissionCheckScope guard(this);
1120    (this->*fn)(zt, pg, addr);
1121  } else {
1122    UseScratchRegisterScope temps(this);
1123    Register scratch = temps.AcquireX();
1124    CalculateSVEAddress(scratch, addr, zt);
1125    SingleEmissionCheckScope guard(this);
1126    (this->*fn)(zt, pg, SVEMemOperand(scratch));
1127  }
1128}
1129
1130void MacroAssembler::SVELoadStoreScalarImmHelper(const CPURegister& rt,
1131                                                 const SVEMemOperand& addr,
1132                                                 SVELoadStoreFn fn) {
1133  VIXL_ASSERT(allow_macro_instructions_);
1134  VIXL_ASSERT(rt.IsZRegister() || rt.IsPRegister());
1135
1136  if (addr.IsPlainScalar() ||
1137      (addr.IsScalarPlusImmediate() && IsInt9(addr.GetImmediateOffset()) &&
1138       addr.IsMulVl())) {
1139    SingleEmissionCheckScope guard(this);
1140    (this->*fn)(rt, addr);
1141    return;
1142  }
1143
1144  if (addr.IsEquivalentToScalar()) {
1145    SingleEmissionCheckScope guard(this);
1146    (this->*fn)(rt, SVEMemOperand(addr.GetScalarBase()));
1147    return;
1148  }
1149
1150  UseScratchRegisterScope temps(this);
1151  Register scratch = temps.AcquireX();
1152  CalculateSVEAddress(scratch, addr, rt);
1153  SingleEmissionCheckScope guard(this);
1154  (this->*fn)(rt, SVEMemOperand(scratch));
1155}
1156
1157template <typename Tg, typename Tf>
1158void MacroAssembler::SVELoadStoreNTBroadcastQOHelper(
1159    const ZRegister& zt,
1160    const Tg& pg,
1161    const SVEMemOperand& addr,
1162    Tf fn,
1163    int imm_bits,
1164    int shift_amount,
1165    SVEOffsetModifier supported_modifier,
1166    int vl_divisor_log2) {
1167  VIXL_ASSERT(allow_macro_instructions_);
1168  int imm_divisor = 1 << shift_amount;
1169
1170  if (addr.IsPlainScalar() ||
1171      (addr.IsScalarPlusImmediate() &&
1172       IsIntN(imm_bits, addr.GetImmediateOffset() / imm_divisor) &&
1173       ((addr.GetImmediateOffset() % imm_divisor) == 0) &&
1174       (addr.GetOffsetModifier() == supported_modifier))) {
1175    SingleEmissionCheckScope guard(this);
1176    (this->*fn)(zt, pg, addr);
1177    return;
1178  }
1179
1180  if (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() &&
1181      addr.IsEquivalentToLSL(zt.GetLaneSizeInBytesLog2())) {
1182    SingleEmissionCheckScope guard(this);
1183    (this->*fn)(zt, pg, addr);
1184    return;
1185  }
1186
1187  if (addr.IsEquivalentToScalar()) {
1188    SingleEmissionCheckScope guard(this);
1189    (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
1190    return;
1191  }
1192
1193  if (addr.IsMulVl() && (supported_modifier != SVE_MUL_VL) &&
1194      (vl_divisor_log2 == -1)) {
1195    // We don't handle [x0, #imm, MUL VL] if the in-memory access size is not VL
1196    // dependent.
1197    VIXL_UNIMPLEMENTED();
1198  }
1199
1200  UseScratchRegisterScope temps(this);
1201  Register scratch = temps.AcquireX();
1202  CalculateSVEAddress(scratch, addr, vl_divisor_log2);
1203  SingleEmissionCheckScope guard(this);
1204  (this->*fn)(zt, pg, SVEMemOperand(scratch));
1205}
1206
1207template <typename Tg, typename Tf>
1208void MacroAssembler::SVELoadStore1Helper(int msize_in_bytes_log2,
1209                                         const ZRegister& zt,
1210                                         const Tg& pg,
1211                                         const SVEMemOperand& addr,
1212                                         Tf fn) {
1213  if (addr.IsPlainScalar() ||
1214      (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() &&
1215       addr.IsEquivalentToLSL(msize_in_bytes_log2)) ||
1216      (addr.IsScalarPlusImmediate() && IsInt4(addr.GetImmediateOffset()) &&
1217       addr.IsMulVl())) {
1218    SingleEmissionCheckScope guard(this);
1219    (this->*fn)(zt, pg, addr);
1220    return;
1221  }
1222
1223  if (addr.IsEquivalentToScalar()) {
1224    SingleEmissionCheckScope guard(this);
1225    (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
1226    return;
1227  }
1228
1229  if (addr.IsVectorPlusImmediate()) {
1230    uint64_t offset = addr.GetImmediateOffset();
1231    if (IsMultiple(offset, (1 << msize_in_bytes_log2)) &&
1232        IsUint5(offset >> msize_in_bytes_log2)) {
1233      SingleEmissionCheckScope guard(this);
1234      (this->*fn)(zt, pg, addr);
1235      return;
1236    }
1237  }
1238
1239  if (addr.IsScalarPlusVector()) {
1240    VIXL_ASSERT(addr.IsScatterGather());
1241    SingleEmissionCheckScope guard(this);
1242    (this->*fn)(zt, pg, addr);
1243    return;
1244  }
1245
1246  UseScratchRegisterScope temps(this);
1247  if (addr.IsScatterGather()) {
1248    // In scatter-gather modes, zt and zn/zm have the same lane size. However,
1249    // for 32-bit accesses, the result of each lane's address calculation still
1250    // requires 64 bits; we can't naively use `Adr` for the address calculation
1251    // because it would truncate each address to 32 bits.
1252
1253    if (addr.IsVectorPlusImmediate()) {
1254      // Synthesise the immediate in an X register, then use a
1255      // scalar-plus-vector access with the original vector.
1256      Register scratch = temps.AcquireX();
1257      Mov(scratch, addr.GetImmediateOffset());
1258      SingleEmissionCheckScope guard(this);
1259      SVEOffsetModifier om =
1260          zt.IsLaneSizeS() ? SVE_UXTW : NO_SVE_OFFSET_MODIFIER;
1261      (this->*fn)(zt, pg, SVEMemOperand(scratch, addr.GetVectorBase(), om));
1262      return;
1263    }
1264
1265    VIXL_UNIMPLEMENTED();
1266  } else {
1267    Register scratch = temps.AcquireX();
1268    // TODO: If we have an immediate offset that is a multiple of
1269    // msize_in_bytes, we can use Rdvl/Rdpl and a scalar-plus-scalar form to
1270    // save an instruction.
1271    int vl_divisor_log2 = zt.GetLaneSizeInBytesLog2() - msize_in_bytes_log2;
1272    CalculateSVEAddress(scratch, addr, vl_divisor_log2);
1273    SingleEmissionCheckScope guard(this);
1274    (this->*fn)(zt, pg, SVEMemOperand(scratch));
1275  }
1276}
1277
1278template <typename Tf>
1279void MacroAssembler::SVELoadFFHelper(int msize_in_bytes_log2,
1280                                     const ZRegister& zt,
1281                                     const PRegisterZ& pg,
1282                                     const SVEMemOperand& addr,
1283                                     Tf fn) {
1284  if (addr.IsScatterGather()) {
1285    // Scatter-gather first-fault loads share encodings with normal loads.
1286    SVELoadStore1Helper(msize_in_bytes_log2, zt, pg, addr, fn);
1287    return;
1288  }
1289
1290  // Contiguous first-faulting loads have no scalar-plus-immediate form at all,
1291  // so we don't do immediate synthesis.
1292
1293  // We cannot currently distinguish "[x0]" from "[x0, #0]", and this
1294  // is not "scalar-plus-scalar", so we have to permit `IsPlainScalar()` here.
1295  if (addr.IsPlainScalar() || (addr.IsScalarPlusScalar() &&
1296                               addr.IsEquivalentToLSL(msize_in_bytes_log2))) {
1297    SingleEmissionCheckScope guard(this);
1298    (this->*fn)(zt, pg, addr);
1299    return;
1300  }
1301
1302  VIXL_UNIMPLEMENTED();
1303}
1304
1305void MacroAssembler::Ld1b(const ZRegister& zt,
1306                          const PRegisterZ& pg,
1307                          const SVEMemOperand& addr) {
1308  VIXL_ASSERT(allow_macro_instructions_);
1309  SVELoadStore1Helper(kBRegSizeInBytesLog2,
1310                      zt,
1311                      pg,
1312                      addr,
1313                      static_cast<SVELoad1Fn>(&Assembler::ld1b));
1314}
1315
1316void MacroAssembler::Ld1h(const ZRegister& zt,
1317                          const PRegisterZ& pg,
1318                          const SVEMemOperand& addr) {
1319  VIXL_ASSERT(allow_macro_instructions_);
1320  SVELoadStore1Helper(kHRegSizeInBytesLog2,
1321                      zt,
1322                      pg,
1323                      addr,
1324                      static_cast<SVELoad1Fn>(&Assembler::ld1h));
1325}
1326
1327void MacroAssembler::Ld1w(const ZRegister& zt,
1328                          const PRegisterZ& pg,
1329                          const SVEMemOperand& addr) {
1330  VIXL_ASSERT(allow_macro_instructions_);
1331  SVELoadStore1Helper(kWRegSizeInBytesLog2,
1332                      zt,
1333                      pg,
1334                      addr,
1335                      static_cast<SVELoad1Fn>(&Assembler::ld1w));
1336}
1337
1338void MacroAssembler::Ld1d(const ZRegister& zt,
1339                          const PRegisterZ& pg,
1340                          const SVEMemOperand& addr) {
1341  VIXL_ASSERT(allow_macro_instructions_);
1342  SVELoadStore1Helper(kDRegSizeInBytesLog2,
1343                      zt,
1344                      pg,
1345                      addr,
1346                      static_cast<SVELoad1Fn>(&Assembler::ld1d));
1347}
1348
1349void MacroAssembler::Ld1sb(const ZRegister& zt,
1350                           const PRegisterZ& pg,
1351                           const SVEMemOperand& addr) {
1352  VIXL_ASSERT(allow_macro_instructions_);
1353  SVELoadStore1Helper(kBRegSizeInBytesLog2,
1354                      zt,
1355                      pg,
1356                      addr,
1357                      static_cast<SVELoad1Fn>(&Assembler::ld1sb));
1358}
1359
1360void MacroAssembler::Ld1sh(const ZRegister& zt,
1361                           const PRegisterZ& pg,
1362                           const SVEMemOperand& addr) {
1363  VIXL_ASSERT(allow_macro_instructions_);
1364  SVELoadStore1Helper(kHRegSizeInBytesLog2,
1365                      zt,
1366                      pg,
1367                      addr,
1368                      static_cast<SVELoad1Fn>(&Assembler::ld1sh));
1369}
1370
1371void MacroAssembler::Ld1sw(const ZRegister& zt,
1372                           const PRegisterZ& pg,
1373                           const SVEMemOperand& addr) {
1374  VIXL_ASSERT(allow_macro_instructions_);
1375  SVELoadStore1Helper(kSRegSizeInBytesLog2,
1376                      zt,
1377                      pg,
1378                      addr,
1379                      static_cast<SVELoad1Fn>(&Assembler::ld1sw));
1380}
1381
1382void MacroAssembler::St1b(const ZRegister& zt,
1383                          const PRegister& pg,
1384                          const SVEMemOperand& addr) {
1385  VIXL_ASSERT(allow_macro_instructions_);
1386  SVELoadStore1Helper(kBRegSizeInBytesLog2,
1387                      zt,
1388                      pg,
1389                      addr,
1390                      static_cast<SVEStore1Fn>(&Assembler::st1b));
1391}
1392
1393void MacroAssembler::St1h(const ZRegister& zt,
1394                          const PRegister& pg,
1395                          const SVEMemOperand& addr) {
1396  VIXL_ASSERT(allow_macro_instructions_);
1397  SVELoadStore1Helper(kHRegSizeInBytesLog2,
1398                      zt,
1399                      pg,
1400                      addr,
1401                      static_cast<SVEStore1Fn>(&Assembler::st1h));
1402}
1403
1404void MacroAssembler::St1w(const ZRegister& zt,
1405                          const PRegister& pg,
1406                          const SVEMemOperand& addr) {
1407  VIXL_ASSERT(allow_macro_instructions_);
1408  SVELoadStore1Helper(kSRegSizeInBytesLog2,
1409                      zt,
1410                      pg,
1411                      addr,
1412                      static_cast<SVEStore1Fn>(&Assembler::st1w));
1413}
1414
1415void MacroAssembler::St1d(const ZRegister& zt,
1416                          const PRegister& pg,
1417                          const SVEMemOperand& addr) {
1418  VIXL_ASSERT(allow_macro_instructions_);
1419  SVELoadStore1Helper(kDRegSizeInBytesLog2,
1420                      zt,
1421                      pg,
1422                      addr,
1423                      static_cast<SVEStore1Fn>(&Assembler::st1d));
1424}
1425
1426void MacroAssembler::Ldff1b(const ZRegister& zt,
1427                            const PRegisterZ& pg,
1428                            const SVEMemOperand& addr) {
1429  VIXL_ASSERT(allow_macro_instructions_);
1430  SVELoadFFHelper(kBRegSizeInBytesLog2,
1431                  zt,
1432                  pg,
1433                  addr,
1434                  static_cast<SVELoad1Fn>(&Assembler::ldff1b));
1435}
1436
1437void MacroAssembler::Ldff1h(const ZRegister& zt,
1438                            const PRegisterZ& pg,
1439                            const SVEMemOperand& addr) {
1440  VIXL_ASSERT(allow_macro_instructions_);
1441  SVELoadFFHelper(kHRegSizeInBytesLog2,
1442                  zt,
1443                  pg,
1444                  addr,
1445                  static_cast<SVELoad1Fn>(&Assembler::ldff1h));
1446}
1447
1448void MacroAssembler::Ldff1w(const ZRegister& zt,
1449                            const PRegisterZ& pg,
1450                            const SVEMemOperand& addr) {
1451  VIXL_ASSERT(allow_macro_instructions_);
1452  SVELoadFFHelper(kSRegSizeInBytesLog2,
1453                  zt,
1454                  pg,
1455                  addr,
1456                  static_cast<SVELoad1Fn>(&Assembler::ldff1w));
1457}
1458
1459void MacroAssembler::Ldff1d(const ZRegister& zt,
1460                            const PRegisterZ& pg,
1461                            const SVEMemOperand& addr) {
1462  VIXL_ASSERT(allow_macro_instructions_);
1463  SVELoadFFHelper(kDRegSizeInBytesLog2,
1464                  zt,
1465                  pg,
1466                  addr,
1467                  static_cast<SVELoad1Fn>(&Assembler::ldff1d));
1468}
1469
1470void MacroAssembler::Ldff1sb(const ZRegister& zt,
1471                             const PRegisterZ& pg,
1472                             const SVEMemOperand& addr) {
1473  VIXL_ASSERT(allow_macro_instructions_);
1474  SVELoadFFHelper(kBRegSizeInBytesLog2,
1475                  zt,
1476                  pg,
1477                  addr,
1478                  static_cast<SVELoad1Fn>(&Assembler::ldff1sb));
1479}
1480
1481void MacroAssembler::Ldff1sh(const ZRegister& zt,
1482                             const PRegisterZ& pg,
1483                             const SVEMemOperand& addr) {
1484  VIXL_ASSERT(allow_macro_instructions_);
1485  SVELoadFFHelper(kHRegSizeInBytesLog2,
1486                  zt,
1487                  pg,
1488                  addr,
1489                  static_cast<SVELoad1Fn>(&Assembler::ldff1sh));
1490}
1491
1492void MacroAssembler::Ldff1sw(const ZRegister& zt,
1493                             const PRegisterZ& pg,
1494                             const SVEMemOperand& addr) {
1495  VIXL_ASSERT(allow_macro_instructions_);
1496  SVELoadFFHelper(kSRegSizeInBytesLog2,
1497                  zt,
1498                  pg,
1499                  addr,
1500                  static_cast<SVELoad1Fn>(&Assembler::ldff1sw));
1501}
1502
1503#define VIXL_SVE_LD1R_LIST(V) \
1504  V(qb, 4) V(qh, 4) V(qw, 4) V(qd, 4) V(ob, 5) V(oh, 5) V(ow, 5) V(od, 5)
1505
1506#define VIXL_DEFINE_MASM_FUNC(SZ, SH)                          \
1507  void MacroAssembler::Ld1r##SZ(const ZRegister& zt,           \
1508                                const PRegisterZ& pg,          \
1509                                const SVEMemOperand& addr) {   \
1510    VIXL_ASSERT(allow_macro_instructions_);                    \
1511    SVELoadStoreNTBroadcastQOHelper(zt,                        \
1512                                    pg,                        \
1513                                    addr,                      \
1514                                    &MacroAssembler::ld1r##SZ, \
1515                                    4,                         \
1516                                    SH,                        \
1517                                    NO_SVE_OFFSET_MODIFIER,    \
1518                                    -1);                       \
1519  }
1520
1521VIXL_SVE_LD1R_LIST(VIXL_DEFINE_MASM_FUNC)
1522
1523#undef VIXL_DEFINE_MASM_FUNC
1524#undef VIXL_SVE_LD1R_LIST
1525
1526void MacroAssembler::Ldnt1b(const ZRegister& zt,
1527                            const PRegisterZ& pg,
1528                            const SVEMemOperand& addr) {
1529  VIXL_ASSERT(allow_macro_instructions_);
1530  if (addr.IsVectorPlusScalar()) {
1531    SingleEmissionCheckScope guard(this);
1532    ldnt1b(zt, pg, addr);
1533  } else {
1534    SVELoadStoreNTBroadcastQOHelper(zt,
1535                                    pg,
1536                                    addr,
1537                                    &MacroAssembler::ldnt1b,
1538                                    4,
1539                                    0,
1540                                    SVE_MUL_VL);
1541  }
1542}
1543
1544void MacroAssembler::Ldnt1d(const ZRegister& zt,
1545                            const PRegisterZ& pg,
1546                            const SVEMemOperand& addr) {
1547  VIXL_ASSERT(allow_macro_instructions_);
1548  if (addr.IsVectorPlusScalar()) {
1549    SingleEmissionCheckScope guard(this);
1550    ldnt1d(zt, pg, addr);
1551  } else {
1552    SVELoadStoreNTBroadcastQOHelper(zt,
1553                                    pg,
1554                                    addr,
1555                                    &MacroAssembler::ldnt1d,
1556                                    4,
1557                                    0,
1558                                    SVE_MUL_VL);
1559  }
1560}
1561
1562void MacroAssembler::Ldnt1h(const ZRegister& zt,
1563                            const PRegisterZ& pg,
1564                            const SVEMemOperand& addr) {
1565  VIXL_ASSERT(allow_macro_instructions_);
1566  if (addr.IsVectorPlusScalar()) {
1567    SingleEmissionCheckScope guard(this);
1568    ldnt1h(zt, pg, addr);
1569  } else {
1570    SVELoadStoreNTBroadcastQOHelper(zt,
1571                                    pg,
1572                                    addr,
1573                                    &MacroAssembler::ldnt1h,
1574                                    4,
1575                                    0,
1576                                    SVE_MUL_VL);
1577  }
1578}
1579
1580void MacroAssembler::Ldnt1w(const ZRegister& zt,
1581                            const PRegisterZ& pg,
1582                            const SVEMemOperand& addr) {
1583  VIXL_ASSERT(allow_macro_instructions_);
1584  if (addr.IsVectorPlusScalar()) {
1585    SingleEmissionCheckScope guard(this);
1586    ldnt1w(zt, pg, addr);
1587  } else {
1588    SVELoadStoreNTBroadcastQOHelper(zt,
1589                                    pg,
1590                                    addr,
1591                                    &MacroAssembler::ldnt1w,
1592                                    4,
1593                                    0,
1594                                    SVE_MUL_VL);
1595  }
1596}
1597
1598void MacroAssembler::Stnt1b(const ZRegister& zt,
1599                            const PRegister& pg,
1600                            const SVEMemOperand& addr) {
1601  VIXL_ASSERT(allow_macro_instructions_);
1602  if (addr.IsVectorPlusScalar()) {
1603    SingleEmissionCheckScope guard(this);
1604    stnt1b(zt, pg, addr);
1605  } else {
1606    SVELoadStoreNTBroadcastQOHelper(zt,
1607                                    pg,
1608                                    addr,
1609                                    &MacroAssembler::stnt1b,
1610                                    4,
1611                                    0,
1612                                    SVE_MUL_VL);
1613  }
1614}
1615void MacroAssembler::Stnt1d(const ZRegister& zt,
1616                            const PRegister& pg,
1617                            const SVEMemOperand& addr) {
1618  VIXL_ASSERT(allow_macro_instructions_);
1619  if (addr.IsVectorPlusScalar()) {
1620    SingleEmissionCheckScope guard(this);
1621    stnt1d(zt, pg, addr);
1622  } else {
1623    SVELoadStoreNTBroadcastQOHelper(zt,
1624                                    pg,
1625                                    addr,
1626                                    &MacroAssembler::stnt1d,
1627                                    4,
1628                                    0,
1629                                    SVE_MUL_VL);
1630  }
1631}
1632void MacroAssembler::Stnt1h(const ZRegister& zt,
1633                            const PRegister& pg,
1634                            const SVEMemOperand& addr) {
1635  VIXL_ASSERT(allow_macro_instructions_);
1636  if (addr.IsVectorPlusScalar()) {
1637    SingleEmissionCheckScope guard(this);
1638    stnt1h(zt, pg, addr);
1639  } else {
1640    SVELoadStoreNTBroadcastQOHelper(zt,
1641                                    pg,
1642                                    addr,
1643                                    &MacroAssembler::stnt1h,
1644                                    4,
1645                                    0,
1646                                    SVE_MUL_VL);
1647  }
1648}
1649void MacroAssembler::Stnt1w(const ZRegister& zt,
1650                            const PRegister& pg,
1651                            const SVEMemOperand& addr) {
1652  VIXL_ASSERT(allow_macro_instructions_);
1653  if (addr.IsVectorPlusScalar()) {
1654    SingleEmissionCheckScope guard(this);
1655    stnt1w(zt, pg, addr);
1656  } else {
1657    SVELoadStoreNTBroadcastQOHelper(zt,
1658                                    pg,
1659                                    addr,
1660                                    &MacroAssembler::stnt1w,
1661                                    4,
1662                                    0,
1663                                    SVE_MUL_VL);
1664  }
1665}
1666
1667void MacroAssembler::SVEDotIndexHelper(ZZZImmFn fn,
1668                                       const ZRegister& zd,
1669                                       const ZRegister& za,
1670                                       const ZRegister& zn,
1671                                       const ZRegister& zm,
1672                                       int index) {
1673  if (zd.Aliases(za)) {
1674    // zda = zda + (zn . zm)
1675    SingleEmissionCheckScope guard(this);
1676    (this->*fn)(zd, zn, zm, index);
1677
1678  } else if (zd.Aliases(zn) || zd.Aliases(zm)) {
1679    // zdn = za + (zdn . zm[index])
1680    // zdm = za + (zn . zdm[index])
1681    // zdnm = za + (zdnm . zdnm[index])
1682    UseScratchRegisterScope temps(this);
1683    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1684    {
1685      MovprfxHelperScope guard(this, scratch, za);
1686      (this->*fn)(scratch, zn, zm, index);
1687    }
1688
1689    Mov(zd, scratch);
1690  } else {
1691    // zd = za + (zn . zm)
1692    MovprfxHelperScope guard(this, zd, za);
1693    (this->*fn)(zd, zn, zm, index);
1694  }
1695}
1696
1697void MacroAssembler::FourRegDestructiveHelper(Int3ArithFn fn,
1698                                              const ZRegister& zd,
1699                                              const ZRegister& za,
1700                                              const ZRegister& zn,
1701                                              const ZRegister& zm) {
1702  if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
1703    // zd = za . zd . zm
1704    // zd = za . zn . zd
1705    // zd = za . zd . zd
1706    UseScratchRegisterScope temps(this);
1707    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1708    {
1709      MovprfxHelperScope guard(this, scratch, za);
1710      (this->*fn)(scratch, zn, zm);
1711    }
1712
1713    Mov(zd, scratch);
1714  } else {
1715    MovprfxHelperScope guard(this, zd, za);
1716    (this->*fn)(zd, zn, zm);
1717  }
1718}
1719
1720void MacroAssembler::FourRegDestructiveHelper(Int4ArithFn fn,
1721                                              const ZRegister& zd,
1722                                              const ZRegister& za,
1723                                              const ZRegister& zn,
1724                                              const ZRegister& zm) {
1725  if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
1726    // zd = za . zd . zm
1727    // zd = za . zn . zd
1728    // zd = za . zd . zd
1729    UseScratchRegisterScope temps(this);
1730    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1731    {
1732      MovprfxHelperScope guard(this, scratch, za);
1733      (this->*fn)(scratch, scratch, zn, zm);
1734    }
1735
1736    Mov(zd, scratch);
1737  } else {
1738    MovprfxHelperScope guard(this, zd, za);
1739    (this->*fn)(zd, zd, zn, zm);
1740  }
1741}
1742
1743void MacroAssembler::FourRegOneImmDestructiveHelper(ZZZImmFn fn,
1744                                                    const ZRegister& zd,
1745                                                    const ZRegister& za,
1746                                                    const ZRegister& zn,
1747                                                    const ZRegister& zm,
1748                                                    int imm) {
1749  if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
1750    // zd = za . zd . zm[i]
1751    // zd = za . zn . zd[i]
1752    // zd = za . zd . zd[i]
1753    UseScratchRegisterScope temps(this);
1754    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1755    {
1756      MovprfxHelperScope guard(this, scratch, za);
1757      (this->*fn)(scratch, zn, zm, imm);
1758    }
1759
1760    Mov(zd, scratch);
1761  } else {
1762    // zd = za . zn . zm[i]
1763    MovprfxHelperScope guard(this, zd, za);
1764    (this->*fn)(zd, zn, zm, imm);
1765  }
1766}
1767
1768void MacroAssembler::AbsoluteDifferenceAccumulate(Int3ArithFn fn,
1769                                                  const ZRegister& zd,
1770                                                  const ZRegister& za,
1771                                                  const ZRegister& zn,
1772                                                  const ZRegister& zm) {
1773  if (zn.Aliases(zm)) {
1774    // If zn == zm, the difference is zero.
1775    if (!zd.Aliases(za)) {
1776      Mov(zd, za);
1777    }
1778  } else if (zd.Aliases(za)) {
1779    SingleEmissionCheckScope guard(this);
1780    (this->*fn)(zd, zn, zm);
1781  } else if (zd.Aliases(zn)) {
1782    UseScratchRegisterScope temps(this);
1783    ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
1784    Mov(ztmp, zn);
1785    MovprfxHelperScope guard(this, zd, za);
1786    (this->*fn)(zd, ztmp, zm);
1787  } else if (zd.Aliases(zm)) {
1788    UseScratchRegisterScope temps(this);
1789    ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
1790    Mov(ztmp, zm);
1791    MovprfxHelperScope guard(this, zd, za);
1792    (this->*fn)(zd, zn, ztmp);
1793  } else {
1794    MovprfxHelperScope guard(this, zd, za);
1795    (this->*fn)(zd, zn, zm);
1796  }
1797}
1798
1799#define VIXL_SVE_4REG_LIST(V)                       \
1800  V(Saba, saba, AbsoluteDifferenceAccumulate)       \
1801  V(Uaba, uaba, AbsoluteDifferenceAccumulate)       \
1802  V(Sabalb, sabalb, AbsoluteDifferenceAccumulate)   \
1803  V(Sabalt, sabalt, AbsoluteDifferenceAccumulate)   \
1804  V(Uabalb, uabalb, AbsoluteDifferenceAccumulate)   \
1805  V(Uabalt, uabalt, AbsoluteDifferenceAccumulate)   \
1806  V(Sdot, sdot, FourRegDestructiveHelper)           \
1807  V(Udot, udot, FourRegDestructiveHelper)           \
1808  V(Adclb, adclb, FourRegDestructiveHelper)         \
1809  V(Adclt, adclt, FourRegDestructiveHelper)         \
1810  V(Sbclb, sbclb, FourRegDestructiveHelper)         \
1811  V(Sbclt, sbclt, FourRegDestructiveHelper)         \
1812  V(Smlalb, smlalb, FourRegDestructiveHelper)       \
1813  V(Smlalt, smlalt, FourRegDestructiveHelper)       \
1814  V(Smlslb, smlslb, FourRegDestructiveHelper)       \
1815  V(Smlslt, smlslt, FourRegDestructiveHelper)       \
1816  V(Umlalb, umlalb, FourRegDestructiveHelper)       \
1817  V(Umlalt, umlalt, FourRegDestructiveHelper)       \
1818  V(Umlslb, umlslb, FourRegDestructiveHelper)       \
1819  V(Umlslt, umlslt, FourRegDestructiveHelper)       \
1820  V(Bcax, bcax, FourRegDestructiveHelper)           \
1821  V(Bsl, bsl, FourRegDestructiveHelper)             \
1822  V(Bsl1n, bsl1n, FourRegDestructiveHelper)         \
1823  V(Bsl2n, bsl2n, FourRegDestructiveHelper)         \
1824  V(Eor3, eor3, FourRegDestructiveHelper)           \
1825  V(Nbsl, nbsl, FourRegDestructiveHelper)           \
1826  V(Fmlalb, fmlalb, FourRegDestructiveHelper)       \
1827  V(Fmlalt, fmlalt, FourRegDestructiveHelper)       \
1828  V(Fmlslb, fmlslb, FourRegDestructiveHelper)       \
1829  V(Fmlslt, fmlslt, FourRegDestructiveHelper)       \
1830  V(Sqdmlalb, sqdmlalb, FourRegDestructiveHelper)   \
1831  V(Sqdmlalbt, sqdmlalbt, FourRegDestructiveHelper) \
1832  V(Sqdmlalt, sqdmlalt, FourRegDestructiveHelper)   \
1833  V(Sqdmlslb, sqdmlslb, FourRegDestructiveHelper)   \
1834  V(Sqdmlslbt, sqdmlslbt, FourRegDestructiveHelper) \
1835  V(Sqdmlslt, sqdmlslt, FourRegDestructiveHelper)   \
1836  V(Sqrdmlah, sqrdmlah, FourRegDestructiveHelper)   \
1837  V(Sqrdmlsh, sqrdmlsh, FourRegDestructiveHelper)   \
1838  V(Fmmla, fmmla, FourRegDestructiveHelper)         \
1839  V(Smmla, smmla, FourRegDestructiveHelper)         \
1840  V(Ummla, ummla, FourRegDestructiveHelper)         \
1841  V(Usmmla, usmmla, FourRegDestructiveHelper)       \
1842  V(Usdot, usdot, FourRegDestructiveHelper)
1843
1844#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \
1845  void MacroAssembler::MASMFN(const ZRegister& zd,   \
1846                              const ZRegister& za,   \
1847                              const ZRegister& zn,   \
1848                              const ZRegister& zm) { \
1849    VIXL_ASSERT(allow_macro_instructions_);          \
1850    HELPER(&Assembler::ASMFN, zd, za, zn, zm);       \
1851  }
1852VIXL_SVE_4REG_LIST(VIXL_DEFINE_MASM_FUNC)
1853#undef VIXL_DEFINE_MASM_FUNC
1854
1855#define VIXL_SVE_4REG_1IMM_LIST(V)                      \
1856  V(Fmla, fmla, FourRegOneImmDestructiveHelper)         \
1857  V(Fmls, fmls, FourRegOneImmDestructiveHelper)         \
1858  V(Fmlalb, fmlalb, FourRegOneImmDestructiveHelper)     \
1859  V(Fmlalt, fmlalt, FourRegOneImmDestructiveHelper)     \
1860  V(Fmlslb, fmlslb, FourRegOneImmDestructiveHelper)     \
1861  V(Fmlslt, fmlslt, FourRegOneImmDestructiveHelper)     \
1862  V(Mla, mla, FourRegOneImmDestructiveHelper)           \
1863  V(Mls, mls, FourRegOneImmDestructiveHelper)           \
1864  V(Smlalb, smlalb, FourRegOneImmDestructiveHelper)     \
1865  V(Smlalt, smlalt, FourRegOneImmDestructiveHelper)     \
1866  V(Smlslb, smlslb, FourRegOneImmDestructiveHelper)     \
1867  V(Smlslt, smlslt, FourRegOneImmDestructiveHelper)     \
1868  V(Sqdmlalb, sqdmlalb, FourRegOneImmDestructiveHelper) \
1869  V(Sqdmlalt, sqdmlalt, FourRegOneImmDestructiveHelper) \
1870  V(Sqdmlslb, sqdmlslb, FourRegOneImmDestructiveHelper) \
1871  V(Sqdmlslt, sqdmlslt, FourRegOneImmDestructiveHelper) \
1872  V(Sqrdmlah, sqrdmlah, FourRegOneImmDestructiveHelper) \
1873  V(Sqrdmlsh, sqrdmlsh, FourRegOneImmDestructiveHelper) \
1874  V(Umlalb, umlalb, FourRegOneImmDestructiveHelper)     \
1875  V(Umlalt, umlalt, FourRegOneImmDestructiveHelper)     \
1876  V(Umlslb, umlslb, FourRegOneImmDestructiveHelper)     \
1877  V(Umlslt, umlslt, FourRegOneImmDestructiveHelper)
1878
1879#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \
1880  void MacroAssembler::MASMFN(const ZRegister& zd,   \
1881                              const ZRegister& za,   \
1882                              const ZRegister& zn,   \
1883                              const ZRegister& zm,   \
1884                              int imm) {             \
1885    VIXL_ASSERT(allow_macro_instructions_);          \
1886    HELPER(&Assembler::ASMFN, zd, za, zn, zm, imm);  \
1887  }
1888VIXL_SVE_4REG_1IMM_LIST(VIXL_DEFINE_MASM_FUNC)
1889#undef VIXL_DEFINE_MASM_FUNC
1890
1891void MacroAssembler::Sdot(const ZRegister& zd,
1892                          const ZRegister& za,
1893                          const ZRegister& zn,
1894                          const ZRegister& zm,
1895                          int index) {
1896  VIXL_ASSERT(allow_macro_instructions_);
1897  SVEDotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index);
1898}
1899
1900void MacroAssembler::Udot(const ZRegister& zd,
1901                          const ZRegister& za,
1902                          const ZRegister& zn,
1903                          const ZRegister& zm,
1904                          int index) {
1905  VIXL_ASSERT(allow_macro_instructions_);
1906  SVEDotIndexHelper(&Assembler::udot, zd, za, zn, zm, index);
1907}
1908
1909void MacroAssembler::Sudot(const ZRegister& zd,
1910                           const ZRegister& za,
1911                           const ZRegister& zn,
1912                           const ZRegister& zm,
1913                           int index) {
1914  VIXL_ASSERT(allow_macro_instructions_);
1915  SVEDotIndexHelper(&Assembler::sudot, zd, za, zn, zm, index);
1916}
1917
1918void MacroAssembler::Usdot(const ZRegister& zd,
1919                           const ZRegister& za,
1920                           const ZRegister& zn,
1921                           const ZRegister& zm,
1922                           int index) {
1923  VIXL_ASSERT(allow_macro_instructions_);
1924  SVEDotIndexHelper(&Assembler::usdot, zd, za, zn, zm, index);
1925}
1926
1927void MacroAssembler::Cdot(const ZRegister& zd,
1928                          const ZRegister& za,
1929                          const ZRegister& zn,
1930                          const ZRegister& zm,
1931                          int index,
1932                          int rot) {
1933  // This doesn't handle zm when it's out of the range that can be encoded in
1934  // instruction. The range depends on element size: z0-z7 for B, z0-15 for H.
1935  if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
1936    UseScratchRegisterScope temps(this);
1937    ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);
1938    {
1939      MovprfxHelperScope guard(this, ztmp, za);
1940      cdot(ztmp, zn, zm, index, rot);
1941    }
1942    Mov(zd, ztmp);
1943  } else {
1944    MovprfxHelperScope guard(this, zd, za);
1945    cdot(zd, zn, zm, index, rot);
1946  }
1947}
1948
1949void MacroAssembler::Cdot(const ZRegister& zd,
1950                          const ZRegister& za,
1951                          const ZRegister& zn,
1952                          const ZRegister& zm,
1953                          int rot) {
1954  if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
1955    UseScratchRegisterScope temps(this);
1956    VIXL_ASSERT(AreSameLaneSize(zn, zm));
1957    ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);
1958    Mov(ztmp, zd.Aliases(zn) ? zn : zm);
1959    MovprfxHelperScope guard(this, zd, za);
1960    cdot(zd, (zd.Aliases(zn) ? ztmp : zn), (zd.Aliases(zm) ? ztmp : zm), rot);
1961  } else {
1962    MovprfxHelperScope guard(this, zd, za);
1963    cdot(zd, zn, zm, rot);
1964  }
1965}
1966
1967void MacroAssembler::FPMulAddHelper(const ZRegister& zd,
1968                                    const PRegisterM& pg,
1969                                    const ZRegister& za,
1970                                    const ZRegister& zn,
1971                                    const ZRegister& zm,
1972                                    SVEMulAddPredicatedZdaFn fn_zda,
1973                                    SVEMulAddPredicatedZdnFn fn_zdn,
1974                                    FPMacroNaNPropagationOption nan_option) {
1975  ResolveFPNaNPropagationOption(&nan_option);
1976
1977  if (zd.Aliases(za)) {
1978    // zda = (-)zda + ((-)zn * zm) for fmla, fmls, fnmla and fnmls.
1979    SingleEmissionCheckScope guard(this);
1980    (this->*fn_zda)(zd, pg, zn, zm);
1981  } else if (zd.Aliases(zn)) {
1982    // zdn = (-)za + ((-)zdn * zm) for fmad, fmsb, fnmad and fnmsb.
1983    SingleEmissionCheckScope guard(this);
1984    (this->*fn_zdn)(zd, pg, zm, za);
1985  } else if (zd.Aliases(zm)) {
1986    switch (nan_option) {
1987      case FastNaNPropagation: {
1988        // We treat multiplication as commutative in the fast mode, so we can
1989        // swap zn and zm.
1990        // zdm = (-)za + ((-)zdm * zn) for fmad, fmsb, fnmad and fnmsb.
1991        SingleEmissionCheckScope guard(this);
1992        (this->*fn_zdn)(zd, pg, zn, za);
1993        return;
1994      }
1995      case StrictNaNPropagation: {
1996        UseScratchRegisterScope temps(this);
1997        // Use a scratch register to keep the argument order exactly as
1998        // specified.
1999        ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn);
2000        {
2001          MovprfxHelperScope guard(this, scratch, pg, za);
2002          // scratch = (-)za + ((-)zn * zm)
2003          (this->*fn_zda)(scratch, pg, zn, zm);
2004        }
2005        Mov(zd, scratch);
2006        return;
2007      }
2008      case NoFPMacroNaNPropagationSelected:
2009        VIXL_UNREACHABLE();
2010        return;
2011    }
2012  } else {
2013    // zd = (-)za + ((-)zn * zm) for fmla, fmls, fnmla and fnmls.
2014    MovprfxHelperScope guard(this, zd, pg, za);
2015    (this->*fn_zda)(zd, pg, zn, zm);
2016  }
2017}
2018
2019void MacroAssembler::Fmla(const ZRegister& zd,
2020                          const PRegisterM& pg,
2021                          const ZRegister& za,
2022                          const ZRegister& zn,
2023                          const ZRegister& zm,
2024                          FPMacroNaNPropagationOption nan_option) {
2025  VIXL_ASSERT(allow_macro_instructions_);
2026  FPMulAddHelper(zd,
2027                 pg,
2028                 za,
2029                 zn,
2030                 zm,
2031                 &Assembler::fmla,
2032                 &Assembler::fmad,
2033                 nan_option);
2034}
2035
2036void MacroAssembler::Fmls(const ZRegister& zd,
2037                          const PRegisterM& pg,
2038                          const ZRegister& za,
2039                          const ZRegister& zn,
2040                          const ZRegister& zm,
2041                          FPMacroNaNPropagationOption nan_option) {
2042  VIXL_ASSERT(allow_macro_instructions_);
2043  FPMulAddHelper(zd,
2044                 pg,
2045                 za,
2046                 zn,
2047                 zm,
2048                 &Assembler::fmls,
2049                 &Assembler::fmsb,
2050                 nan_option);
2051}
2052
2053void MacroAssembler::Fnmla(const ZRegister& zd,
2054                           const PRegisterM& pg,
2055                           const ZRegister& za,
2056                           const ZRegister& zn,
2057                           const ZRegister& zm,
2058                           FPMacroNaNPropagationOption nan_option) {
2059  VIXL_ASSERT(allow_macro_instructions_);
2060  FPMulAddHelper(zd,
2061                 pg,
2062                 za,
2063                 zn,
2064                 zm,
2065                 &Assembler::fnmla,
2066                 &Assembler::fnmad,
2067                 nan_option);
2068}
2069
2070void MacroAssembler::Fnmls(const ZRegister& zd,
2071                           const PRegisterM& pg,
2072                           const ZRegister& za,
2073                           const ZRegister& zn,
2074                           const ZRegister& zm,
2075                           FPMacroNaNPropagationOption nan_option) {
2076  VIXL_ASSERT(allow_macro_instructions_);
2077  FPMulAddHelper(zd,
2078                 pg,
2079                 za,
2080                 zn,
2081                 zm,
2082                 &Assembler::fnmls,
2083                 &Assembler::fnmsb,
2084                 nan_option);
2085}
2086
2087void MacroAssembler::Ftmad(const ZRegister& zd,
2088                           const ZRegister& zn,
2089                           const ZRegister& zm,
2090                           int imm3) {
2091  VIXL_ASSERT(allow_macro_instructions_);
2092  if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2093    UseScratchRegisterScope temps(this);
2094    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm);
2095    Mov(scratch, zm);
2096    MovprfxHelperScope guard(this, zd, zn);
2097    ftmad(zd, zd, scratch, imm3);
2098  } else {
2099    MovprfxHelperScope guard(this, zd, zn);
2100    ftmad(zd, zd, zm, imm3);
2101  }
2102}
2103
2104void MacroAssembler::Fcadd(const ZRegister& zd,
2105                           const PRegisterM& pg,
2106                           const ZRegister& zn,
2107                           const ZRegister& zm,
2108                           int rot) {
2109  VIXL_ASSERT(allow_macro_instructions_);
2110  if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2111    UseScratchRegisterScope temps(this);
2112    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2113    {
2114      MovprfxHelperScope guard(this, scratch, pg, zn);
2115      fcadd(scratch, pg, scratch, zm, rot);
2116    }
2117    Mov(zd, scratch);
2118  } else {
2119    MovprfxHelperScope guard(this, zd, pg, zn);
2120    fcadd(zd, pg, zd, zm, rot);
2121  }
2122}
2123
2124void MacroAssembler::Fcmla(const ZRegister& zd,
2125                           const PRegisterM& pg,
2126                           const ZRegister& za,
2127                           const ZRegister& zn,
2128                           const ZRegister& zm,
2129                           int rot) {
2130  VIXL_ASSERT(allow_macro_instructions_);
2131  if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
2132    UseScratchRegisterScope temps(this);
2133    ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);
2134    {
2135      MovprfxHelperScope guard(this, ztmp, za);
2136      fcmla(ztmp, pg, zn, zm, rot);
2137    }
2138    Mov(zd, pg, ztmp);
2139  } else {
2140    MovprfxHelperScope guard(this, zd, pg, za);
2141    fcmla(zd, pg, zn, zm, rot);
2142  }
2143}
2144
2145void MacroAssembler::Splice(const ZRegister& zd,
2146                            const PRegister& pg,
2147                            const ZRegister& zn,
2148                            const ZRegister& zm) {
2149  VIXL_ASSERT(allow_macro_instructions_);
2150  if (CPUHas(CPUFeatures::kSVE2) && AreConsecutive(zn, zm) && !zd.Aliases(zn)) {
2151    SingleEmissionCheckScope guard(this);
2152    splice(zd, pg, zn, zm);
2153  } else if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2154    UseScratchRegisterScope temps(this);
2155    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2156    {
2157      MovprfxHelperScope guard(this, scratch, zn);
2158      splice(scratch, pg, scratch, zm);
2159    }
2160    Mov(zd, scratch);
2161  } else {
2162    MovprfxHelperScope guard(this, zd, zn);
2163    splice(zd, pg, zd, zm);
2164  }
2165}
2166
2167void MacroAssembler::Clasta(const ZRegister& zd,
2168                            const PRegister& pg,
2169                            const ZRegister& zn,
2170                            const ZRegister& zm) {
2171  VIXL_ASSERT(allow_macro_instructions_);
2172  if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2173    UseScratchRegisterScope temps(this);
2174    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2175    {
2176      MovprfxHelperScope guard(this, scratch, zn);
2177      clasta(scratch, pg, scratch, zm);
2178    }
2179    Mov(zd, scratch);
2180  } else {
2181    MovprfxHelperScope guard(this, zd, zn);
2182    clasta(zd, pg, zd, zm);
2183  }
2184}
2185
2186void MacroAssembler::Clastb(const ZRegister& zd,
2187                            const PRegister& pg,
2188                            const ZRegister& zn,
2189                            const ZRegister& zm) {
2190  VIXL_ASSERT(allow_macro_instructions_);
2191  if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2192    UseScratchRegisterScope temps(this);
2193    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2194    {
2195      MovprfxHelperScope guard(this, scratch, zn);
2196      clastb(scratch, pg, scratch, zm);
2197    }
2198    Mov(zd, scratch);
2199  } else {
2200    MovprfxHelperScope guard(this, zd, zn);
2201    clastb(zd, pg, zd, zm);
2202  }
2203}
2204
2205void MacroAssembler::ShiftRightAccumulate(IntArithImmFn fn,
2206                                          const ZRegister& zd,
2207                                          const ZRegister& za,
2208                                          const ZRegister& zn,
2209                                          int shift) {
2210  VIXL_ASSERT(allow_macro_instructions_);
2211  if (!zd.Aliases(za) && zd.Aliases(zn)) {
2212    UseScratchRegisterScope temps(this);
2213    ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);
2214    Mov(ztmp, zn);
2215    {
2216      MovprfxHelperScope guard(this, zd, za);
2217      (this->*fn)(zd, ztmp, shift);
2218    }
2219  } else {
2220    MovprfxHelperScope guard(this, zd, za);
2221    (this->*fn)(zd, zn, shift);
2222  }
2223}
2224
2225void MacroAssembler::Srsra(const ZRegister& zd,
2226                           const ZRegister& za,
2227                           const ZRegister& zn,
2228                           int shift) {
2229  ShiftRightAccumulate(&Assembler::srsra, zd, za, zn, shift);
2230}
2231
2232void MacroAssembler::Ssra(const ZRegister& zd,
2233                          const ZRegister& za,
2234                          const ZRegister& zn,
2235                          int shift) {
2236  ShiftRightAccumulate(&Assembler::ssra, zd, za, zn, shift);
2237}
2238
2239void MacroAssembler::Ursra(const ZRegister& zd,
2240                           const ZRegister& za,
2241                           const ZRegister& zn,
2242                           int shift) {
2243  ShiftRightAccumulate(&Assembler::ursra, zd, za, zn, shift);
2244}
2245
2246void MacroAssembler::Usra(const ZRegister& zd,
2247                          const ZRegister& za,
2248                          const ZRegister& zn,
2249                          int shift) {
2250  ShiftRightAccumulate(&Assembler::usra, zd, za, zn, shift);
2251}
2252
2253void MacroAssembler::ComplexAddition(ZZZImmFn fn,
2254                                     const ZRegister& zd,
2255                                     const ZRegister& zn,
2256                                     const ZRegister& zm,
2257                                     int rot) {
2258  VIXL_ASSERT(allow_macro_instructions_);
2259  if (!zd.Aliases(zn) && zd.Aliases(zm)) {
2260    UseScratchRegisterScope temps(this);
2261    ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zm);
2262    Mov(ztmp, zm);
2263    {
2264      MovprfxHelperScope guard(this, zd, zn);
2265      (this->*fn)(zd, zd, ztmp, rot);
2266    }
2267  } else {
2268    MovprfxHelperScope guard(this, zd, zn);
2269    (this->*fn)(zd, zd, zm, rot);
2270  }
2271}
2272
2273void MacroAssembler::Cadd(const ZRegister& zd,
2274                          const ZRegister& zn,
2275                          const ZRegister& zm,
2276                          int rot) {
2277  ComplexAddition(&Assembler::cadd, zd, zn, zm, rot);
2278}
2279
2280void MacroAssembler::Sqcadd(const ZRegister& zd,
2281                            const ZRegister& zn,
2282                            const ZRegister& zm,
2283                            int rot) {
2284  ComplexAddition(&Assembler::sqcadd, zd, zn, zm, rot);
2285}
2286
2287}  // namespace aarch64
2288}  // namespace vixl
2289