1// Copyright 2017, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright notice,
10//     this list of conditions and the following disclaimer in the documentation
11//     and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may be
13//     used to endorse or promote products derived from this software without
14//     specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include <cstdio>
28#include <cstring>
29#include <string>
30
31#include "test-runner.h"
32#include "test-utils.h"
33
34#include "aarch64/assembler-aarch64.h"
35#include "aarch64/instructions-aarch64.h"
36#include "aarch64/test-utils-aarch64.h"
37
38#define __ assm.
39#define TEST(name) TEST_(AARCH64_API_##name)
40
41namespace vixl {
42namespace aarch64 {
43
44class InstructionReporter : public DecoderVisitor {
45 public:
46  InstructionReporter() : DecoderVisitor(kNonConstVisitor) {}
47
48  void Visit(Metadata* metadata, const Instruction* instr) VIXL_OVERRIDE {
49    USE(instr);
50    instr_form_ = (*metadata)["form"];
51  }
52
53  std::string MoveForm() { return instr_form_; }
54
55 private:
56  std::string instr_form_;
57};
58
59static void CheckAndMaybeDisassembleMovprfxPairs(const CodeBuffer* buffer,
60                                                 bool can_take_movprfx) {
61  const Instruction* pair = buffer->GetStartAddress<Instruction*>();
62  const Instruction* end = buffer->GetEndAddress<Instruction*>();
63  bool any_failures = false;
64  PrintDisassembler print_disasm(stdout);
65  Decoder decoder;
66  InstructionReporter reporter;
67  decoder.AppendVisitor(&reporter);
68
69  while (pair < end) {
70    const Instruction* movprfx = pair;
71    const Instruction* candidate = pair->GetNextInstruction();
72    const Instruction* next_pair = candidate->GetNextInstruction();
73    VIXL_ASSERT(candidate < end);
74
75    Instr inst = candidate->GetInstructionBits();
76    decoder.Decode(reinterpret_cast<Instruction*>(&inst));
77    std::string form = reporter.MoveForm();
78    bool failed =
79        can_take_movprfx != candidate->CanTakeSVEMovprfx(form.c_str(), movprfx);
80    any_failures = any_failures || failed;
81
82    if (failed || Test::disassemble()) {
83      printf("----\n");
84      if (failed) {
85        printf("# ERROR: Expected %sCanTakeSVEMovprfx(movprfx):\n",
86               can_take_movprfx ? "" : "!");
87      }
88      print_disasm.DisassembleBuffer(pair, next_pair);
89    }
90
91    pair = next_pair;
92  }
93  // Abort only at the end, so we can see the individual failures.
94  VIXL_CHECK(!any_failures);
95}
96
97TEST(movprfx_negative_aliasing) {
98  // Test that CanTakeSVEMovprfx() checks that the movprfx destination does not
99  // alias an input to the prefixed instruction.
100  Assembler assm;
101  assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM);
102  {
103    // We have to use the Assembler directly to generate movprfx, so we need
104    // to manually reserve space for the code we're about to emit.
105    static const size_t kPairCount = 79;
106    CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
107
108    __ movprfx(z0.VnB(), p0.Merging(), z9.VnB());
109    __ abs(z0.VnB(), p0.Merging(), z0.VnB());
110
111    __ movprfx(z1, z17);
112    __ add(z1.VnH(), p2.Merging(), z1.VnH(), z1.VnH());
113
114    __ movprfx(z12, z13);
115    __ and_(z12.VnD(), p5.Merging(), z12.VnD(), z12.VnD());
116
117    __ movprfx(z2, z4);
118    __ asr(z2.VnS(), p2.Merging(), z2.VnS(), z2.VnS());
119
120    __ movprfx(z10, z18);
121    __ asr(z10.VnH(), p2.Merging(), z10.VnH(), z10.VnD());
122
123    __ movprfx(z17.VnD(), p5.Zeroing(), z20.VnD());
124    __ asr(z17.VnD(), p5.Merging(), z17.VnD(), z17.VnD());
125
126    __ movprfx(z22, z9);
127    __ asrr(z22.VnH(), p1.Merging(), z22.VnH(), z22.VnH());
128
129    __ movprfx(z0.VnS(), p6.Zeroing(), z6.VnS());
130    __ bic(z0.VnS(), p6.Merging(), z0.VnS(), z0.VnS());
131
132    __ movprfx(z12, z16);
133    __ clasta(z12.VnD(), p5, z12.VnD(), z12.VnD());
134
135    __ movprfx(z7, z15);
136    __ clastb(z7.VnS(), p7, z7.VnS(), z7.VnS());
137
138    __ movprfx(z10, z29);
139    __ cls(z10.VnH(), p2.Merging(), z10.VnH());
140
141    __ movprfx(z6, z13);
142    __ clz(z6.VnB(), p4.Merging(), z6.VnB());
143
144    __ movprfx(z14.VnS(), p6.Zeroing(), z3.VnS());
145    __ cnot(z14.VnS(), p6.Merging(), z14.VnS());
146
147    __ movprfx(z5.VnD(), p6.Merging(), z4.VnD());
148    __ cnt(z5.VnD(), p6.Merging(), z5.VnD());
149
150    __ movprfx(z19.VnB(), p6.Zeroing(), z4.VnB());
151    __ eor(z19.VnB(), p6.Merging(), z19.VnB(), z19.VnB());
152
153    __ movprfx(z27, z2);
154    __ ext(z27.VnB(), z27.VnB(), z27.VnB(), 42);
155
156    __ movprfx(z4.VnS(), p1.Zeroing(), z22.VnS());
157    __ lsl(z4.VnS(), p1.Merging(), z4.VnS(), z4.VnS());
158
159    __ movprfx(z4, z5);
160    __ lsl(z4.VnB(), p5.Merging(), z4.VnB(), z4.VnD());
161
162    __ movprfx(z11.VnD(), p4.Merging(), z29.VnD());
163    __ lsl(z11.VnD(), p4.Merging(), z11.VnD(), z11.VnD());
164
165    __ movprfx(z12.VnD(), p6.Merging(), z3.VnD());
166    __ lslr(z12.VnD(), p6.Merging(), z12.VnD(), z12.VnD());
167
168    __ movprfx(z7, z2);
169    __ lsr(z7.VnB(), p4.Merging(), z7.VnB(), z7.VnB());
170
171    __ movprfx(z25.VnH(), p6.Merging(), z28.VnH());
172    __ lsr(z25.VnH(), p6.Merging(), z25.VnH(), z25.VnD());
173
174    __ movprfx(z14.VnD(), p6.Merging(), z6.VnD());
175    __ lsr(z14.VnD(), p6.Merging(), z14.VnD(), z14.VnD());
176
177    __ movprfx(z26.VnH(), p6.Zeroing(), z27.VnH());
178    __ lsrr(z26.VnH(), p6.Merging(), z26.VnH(), z26.VnH());
179
180    __ movprfx(z17.VnS(), p4.Zeroing(), z29.VnS());
181    __ mad(z17.VnS(), p4.Merging(), z17.VnS(), z23.VnS());
182
183    __ movprfx(z7, z17);
184    __ mad(z7.VnD(), p5.Merging(), z4.VnD(), z7.VnD());
185
186    __ movprfx(z11, z7);
187    __ mla(z11.VnS(), p1.Merging(), z11.VnS(), z27.VnS());
188
189    __ movprfx(z7, z5);
190    __ mla(z7.VnH(), p0.Merging(), z5.VnH(), z7.VnH());
191
192    __ movprfx(z1.VnH(), p0.Merging(), z17.VnH());
193    __ mls(z1.VnH(), p0.Merging(), z1.VnH(), z31.VnH());
194
195    __ movprfx(z22.VnB(), p3.Merging(), z18.VnB());
196    __ mls(z22.VnB(), p3.Merging(), z18.VnB(), z22.VnB());
197
198    __ movprfx(z7.VnS(), p0.Merging(), z10.VnS());
199    __ msb(z7.VnS(), p0.Merging(), z7.VnS(), z10.VnS());
200
201    __ movprfx(z12, z6);
202    __ msb(z12.VnH(), p7.Merging(), z6.VnH(), z12.VnH());
203
204    __ movprfx(z8.VnB(), p4.Merging(), z3.VnB());
205    __ mul(z8.VnB(), p4.Merging(), z8.VnB(), z8.VnB());
206
207    __ movprfx(z9, z26);
208    __ neg(z9.VnS(), p7.Merging(), z9.VnS());
209
210    __ movprfx(z16, z8);
211    __ not_(z16.VnH(), p6.Merging(), z16.VnH());
212
213    __ movprfx(z25.VnH(), p5.Zeroing(), z11.VnH());
214    __ orr(z25.VnH(), p5.Merging(), z25.VnH(), z25.VnH());
215
216    __ movprfx(z17.VnH(), p1.Merging(), z22.VnH());
217    __ rbit(z17.VnH(), p1.Merging(), z17.VnH());
218
219    __ movprfx(z11, z25);
220    __ revb(z11.VnD(), p6.Merging(), z11.VnD());
221
222    __ movprfx(z13, z27);
223    __ revh(z13.VnS(), p2.Merging(), z13.VnS());
224
225    __ movprfx(z30.VnD(), p6.Merging(), z20.VnD());
226    __ revw(z30.VnD(), p6.Merging(), z30.VnD());
227
228    __ movprfx(z2.VnD(), p2.Merging(), z21.VnD());
229    __ sabd(z2.VnD(), p2.Merging(), z2.VnD(), z2.VnD());
230
231    __ movprfx(z0, z7);
232    __ sdiv(z0.VnD(), p0.Merging(), z0.VnD(), z0.VnD());
233
234    __ movprfx(z19, z28);
235    __ sdivr(z19.VnS(), p1.Merging(), z19.VnS(), z19.VnS());
236
237    __ movprfx(z5, z18);
238    __ sdot(z5.VnS(), z18.VnB(), z5.VnB(), 1);
239
240    __ movprfx(z15, z11);
241    __ sdot(z15.VnD(), z2.VnH(), z15.VnH(), 1);
242
243    __ movprfx(z30, z13);
244    __ sdot(z30.VnD(), z30.VnH(), z13.VnH(), 1);
245
246    __ movprfx(z8, z9);
247    __ sdot(z8.VnS(), z8.VnB(), z9.VnB());
248
249    __ movprfx(z23, z14);
250    __ sdot(z23.VnS(), z14.VnB(), z23.VnB());
251
252    __ movprfx(z26, z5);
253    __ sdot(z26.VnS(), z26.VnB(), z5.VnB(), 1);
254
255    __ movprfx(z14, z15);
256    __ smax(z14.VnB(), p2.Merging(), z14.VnB(), z14.VnB());
257
258    __ movprfx(z26.VnS(), p0.Merging(), z10.VnS());
259    __ smin(z26.VnS(), p0.Merging(), z26.VnS(), z26.VnS());
260
261    __ movprfx(z22, z18);
262    __ smulh(z22.VnB(), p2.Merging(), z22.VnB(), z22.VnB());
263
264    __ movprfx(z8, z19);
265    __ splice(z8.VnD(), p2, z8.VnD(), z8.VnD());
266
267    __ movprfx(z23.VnH(), p6.Zeroing(), z2.VnH());
268    __ sub(z23.VnH(), p6.Merging(), z23.VnH(), z23.VnH());
269
270    __ movprfx(z25.VnS(), p2.Merging(), z21.VnS());
271    __ subr(z25.VnS(), p2.Merging(), z25.VnS(), z25.VnS());
272
273    __ movprfx(z28, z31);
274    __ sxtb(z28.VnS(), p6.Merging(), z28.VnS());
275
276    __ movprfx(z14.VnD(), p6.Merging(), z17.VnD());
277    __ sxth(z14.VnD(), p6.Merging(), z14.VnD());
278
279    __ movprfx(z21.VnD(), p0.Zeroing(), z28.VnD());
280    __ sxtw(z21.VnD(), p0.Merging(), z21.VnD());
281
282    __ movprfx(z25, z30);
283    __ uabd(z25.VnB(), p5.Merging(), z25.VnB(), z25.VnB());
284
285    __ movprfx(z13.VnD(), p2.Merging(), z30.VnD());
286    __ udiv(z13.VnD(), p2.Merging(), z13.VnD(), z13.VnD());
287
288    __ movprfx(z19.VnD(), p4.Zeroing(), z6.VnD());
289    __ udivr(z19.VnD(), p4.Merging(), z19.VnD(), z19.VnD());
290
291    __ movprfx(z1, z20);
292    __ udot(z1.VnS(), z18.VnB(), z1.VnB(), 1);
293
294    __ movprfx(z8, z2);
295    __ udot(z8.VnD(), z2.VnH(), z8.VnH(), 1);
296
297    __ movprfx(z28, z10);
298    __ udot(z28.VnD(), z28.VnH(), z7.VnH(), 1);
299
300    __ movprfx(z21, z11);
301    __ udot(z21.VnD(), z21.VnH(), z11.VnH());
302
303    __ movprfx(z1, z22);
304    __ udot(z1.VnD(), z10.VnH(), z1.VnH());
305
306    __ movprfx(z8, z23);
307    __ udot(z8.VnS(), z8.VnB(), z0.VnB(), 1);
308
309    __ movprfx(z10.VnB(), p5.Zeroing(), z0.VnB());
310    __ umax(z10.VnB(), p5.Merging(), z10.VnB(), z10.VnB());
311
312    __ movprfx(z0.VnS(), p2.Zeroing(), z30.VnS());
313    __ umin(z0.VnS(), p2.Merging(), z0.VnS(), z0.VnS());
314
315    __ movprfx(z26.VnD(), p6.Zeroing(), z29.VnD());
316    __ umulh(z26.VnD(), p6.Merging(), z26.VnD(), z26.VnD());
317
318    __ movprfx(z23, z25);
319    __ uxtb(z23.VnS(), p7.Merging(), z23.VnS());
320
321    __ movprfx(z14.VnS(), p3.Zeroing(), z5.VnS());
322    __ uxth(z14.VnS(), p3.Merging(), z14.VnS());
323
324    __ movprfx(z14, z5);
325    __ uxtw(z14.VnD(), p3.Merging(), z14.VnD());
326
327    __ movprfx(z22, z5);
328    __ smmla(z22.VnS(), z22.VnB(), z0.VnB());
329
330    __ movprfx(z1, z5);
331    __ ummla(z1.VnS(), z10.VnB(), z1.VnB());
332
333    __ movprfx(z30, z5);
334    __ usmmla(z30.VnS(), z30.VnB(), z18.VnB());
335
336    __ movprfx(z4, z5);
337    __ usdot(z4.VnS(), z3.VnB(), z4.VnB());
338
339    __ movprfx(z10, z5);
340    __ usdot(z10.VnS(), z10.VnB(), z0.VnB(), 0);
341
342    __ movprfx(z1, z5);
343    __ sudot(z1.VnS(), z10.VnB(), z1.VnB(), 1);
344  }
345  assm.FinalizeCode();
346
347  CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false);
348}
349
350TEST(movprfx_negative_aliasing_fp) {
351  // Test that CanTakeSVEMovprfx() checks that the movprfx destination does not
352  // alias an input to the prefixed instruction.
353  Assembler assm;
354  assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE,
355                                 CPUFeatures::kSVEF32MM,
356                                 CPUFeatures::kSVEF64MM);
357  {
358    // We have to use the Assembler directly to generate movprfx, so we need
359    // to manually reserve space for the code we're about to emit.
360    static const size_t kPairCount = 80;
361    CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
362
363    __ movprfx(z17.VnS(), p1.Zeroing(), z12.VnS());
364    __ fabd(z17.VnS(), p1.Merging(), z17.VnS(), z17.VnS());
365
366    __ movprfx(z13, z23);
367    __ fabs(z13.VnS(), p4.Merging(), z13.VnS());
368
369    __ movprfx(z24.VnS(), p5.Merging(), z15.VnS());
370    __ fadd(z24.VnS(), p5.Merging(), z24.VnS(), z24.VnS());
371
372    __ movprfx(z28.VnD(), p5.Zeroing(), z14.VnD());
373    __ fcadd(z28.VnD(), p5.Merging(), z28.VnD(), z28.VnD(), 90);
374
375    __ movprfx(z5, z0);
376    __ fcmla(z5.VnH(), z0.VnH(), z5.VnH(), 2, 180);
377
378    __ movprfx(z10, z4);
379    __ fcmla(z10.VnS(), z8.VnS(), z10.VnS(), 1, 270);
380
381    __ movprfx(z12, z26);
382    __ fcmla(z12.VnH(), z12.VnH(), z3.VnH(), 2, 180);
383
384    __ movprfx(z8, z1);
385    __ fcmla(z8.VnS(), z8.VnS(), z1.VnS(), 1, 270);
386
387    __ movprfx(z16.VnD(), p0.Merging(), z13.VnD());
388    __ fcvt(z16.VnD(), p0.Merging(), z16.VnH());
389
390    __ movprfx(z12.VnD(), p7.Zeroing(), z13.VnD());
391    __ fcvt(z12.VnD(), p7.Merging(), z12.VnS());
392
393    __ movprfx(z14, z26);
394    __ fcvt(z14.VnS(), p5.Merging(), z14.VnD());
395
396    __ movprfx(z26, z2);
397    __ fcvt(z26.VnH(), p7.Merging(), z26.VnD());
398
399    __ movprfx(z25.VnD(), p2.Merging(), z13.VnD());
400    __ fcvtzs(z25.VnD(), p2.Merging(), z25.VnH());
401
402    __ movprfx(z31, z2);
403    __ fcvtzs(z31.VnH(), p7.Merging(), z31.VnH());
404
405    __ movprfx(z21.VnD(), p1.Merging(), z7.VnD());
406    __ fcvtzs(z21.VnD(), p1.Merging(), z21.VnS());
407
408    __ movprfx(z5, z17);
409    __ fcvtzs(z5.VnS(), p5.Merging(), z5.VnD());
410
411    __ movprfx(z19.VnD(), p1.Zeroing(), z16.VnD());
412    __ fcvtzu(z19.VnD(), p1.Merging(), z19.VnH());
413
414    __ movprfx(z2.VnH(), p7.Zeroing(), z28.VnH());
415    __ fcvtzu(z2.VnH(), p7.Merging(), z2.VnH());
416
417    __ movprfx(z21.VnD(), p7.Zeroing(), z27.VnD());
418    __ fcvtzu(z21.VnD(), p7.Merging(), z21.VnS());
419
420    __ movprfx(z22.VnD(), p4.Zeroing(), z8.VnD());
421    __ fcvtzu(z22.VnS(), p4.Merging(), z22.VnD());
422
423    __ movprfx(z0.VnS(), p5.Merging(), z5.VnS());
424    __ fdiv(z0.VnS(), p5.Merging(), z0.VnS(), z0.VnS());
425
426    __ movprfx(z12, z24);
427    __ fdivr(z12.VnD(), p7.Merging(), z12.VnD(), z12.VnD());
428
429    __ movprfx(z14.VnD(), p6.Zeroing(), z21.VnD());
430    __ fmad(z14.VnD(), p6.Merging(), z14.VnD(), z3.VnD());
431
432    __ movprfx(z2.VnS(), p5.Zeroing(), z10.VnS());
433    __ fmad(z2.VnS(), p5.Merging(), z14.VnS(), z2.VnS());
434
435    __ movprfx(z24, z5);
436    __ fmax(z24.VnS(), p1.Merging(), z24.VnS(), z24.VnS());
437
438    __ movprfx(z15.VnD(), p2.Merging(), z26.VnD());
439    __ fmaxnm(z15.VnD(), p2.Merging(), z15.VnD(), z15.VnD());
440
441    __ movprfx(z20, z22);
442    __ fmin(z20.VnH(), p0.Merging(), z20.VnH(), z20.VnH());
443
444    __ movprfx(z24.VnS(), p6.Zeroing(), z30.VnS());
445    __ fminnm(z24.VnS(), p6.Merging(), z24.VnS(), z24.VnS());
446
447    __ movprfx(z4, z24);
448    __ fmla(z4.VnH(), z24.VnH(), z4.VnH(), 7);
449
450    __ movprfx(z4, z7);
451    __ fmla(z4.VnS(), z24.VnS(), z4.VnS(), 3);
452
453    __ movprfx(z5, z28);
454    __ fmla(z5.VnD(), z28.VnD(), z5.VnD(), 1);
455
456    __ movprfx(z24, z2);
457    __ fmla(z24.VnD(), z24.VnD(), z2.VnD(), 1);
458
459    __ movprfx(z7, z21);
460    __ fmla(z7.VnH(), p2.Merging(), z7.VnH(), z31.VnH());
461
462    __ movprfx(z25.VnH(), p5.Zeroing(), z29.VnH());
463    __ fmla(z25.VnH(), p5.Merging(), z29.VnH(), z25.VnH());
464
465    __ movprfx(z31, z25);
466    __ fmla(z31.VnH(), z31.VnH(), z2.VnH(), 7);
467
468    __ movprfx(z15, z4);
469    __ fmla(z15.VnS(), z15.VnS(), z4.VnS(), 3);
470
471    __ movprfx(z7, z11);
472    __ fmls(z7.VnH(), z11.VnH(), z7.VnH(), 4);
473
474    __ movprfx(z3, z10);
475    __ fmls(z3.VnS(), z10.VnS(), z3.VnS(), 3);
476
477    __ movprfx(z5, z16);
478    __ fmls(z5.VnD(), z16.VnD(), z5.VnD(), 1);
479
480    __ movprfx(z31, z26);
481    __ fmls(z31.VnD(), z31.VnD(), z8.VnD(), 1);
482
483    __ movprfx(z5.VnH(), p3.Merging(), z2.VnH());
484    __ fmls(z5.VnH(), p3.Merging(), z5.VnH(), z2.VnH());
485
486    __ movprfx(z22.VnS(), p3.Zeroing(), z17.VnS());
487    __ fmls(z22.VnS(), p3.Merging(), z21.VnS(), z22.VnS());
488
489    __ movprfx(z17, z2);
490    __ fmls(z17.VnH(), z17.VnH(), z2.VnH(), 4);
491
492    __ movprfx(z28, z11);
493    __ fmls(z28.VnS(), z28.VnS(), z0.VnS(), 3);
494
495    __ movprfx(z15.VnD(), p1.Merging(), z31.VnD());
496    __ fmsb(z15.VnD(), p1.Merging(), z15.VnD(), z31.VnD());
497
498    __ movprfx(z21.VnD(), p0.Zeroing(), z5.VnD());
499    __ fmsb(z21.VnD(), p0.Merging(), z19.VnD(), z21.VnD());
500
501    __ movprfx(z0.VnH(), p3.Merging(), z31.VnH());
502    __ fmul(z0.VnH(), p3.Merging(), z0.VnH(), z0.VnH());
503
504    __ movprfx(z31.VnH(), p6.Merging(), z8.VnH());
505    __ fmulx(z31.VnH(), p6.Merging(), z31.VnH(), z31.VnH());
506
507    __ movprfx(z17.VnH(), p1.Zeroing(), z10.VnH());
508    __ fneg(z17.VnH(), p1.Merging(), z17.VnH());
509
510    __ movprfx(z22, z31);
511    __ fnmad(z22.VnH(), p1.Merging(), z22.VnH(), z23.VnH());
512
513    __ movprfx(z14.VnD(), p0.Zeroing(), z26.VnD());
514    __ fnmad(z14.VnD(), p0.Merging(), z2.VnD(), z14.VnD());
515
516    __ movprfx(z13.VnH(), p6.Zeroing(), z29.VnH());
517    __ fnmla(z13.VnH(), p6.Merging(), z13.VnH(), z26.VnH());
518
519    __ movprfx(z19.VnH(), p7.Zeroing(), z25.VnH());
520    __ fnmla(z19.VnH(), p7.Merging(), z25.VnH(), z19.VnH());
521
522    __ movprfx(z27.VnH(), p5.Merging(), z24.VnH());
523    __ fnmls(z27.VnH(), p5.Merging(), z27.VnH(), z24.VnH());
524
525    __ movprfx(z6.VnH(), p6.Zeroing(), z21.VnH());
526    __ fnmls(z6.VnH(), p6.Merging(), z21.VnH(), z6.VnH());
527
528    __ movprfx(z7.VnS(), p3.Merging(), z23.VnS());
529    __ fnmsb(z7.VnS(), p3.Merging(), z7.VnS(), z23.VnS());
530
531    __ movprfx(z29.VnH(), p2.Zeroing(), z24.VnH());
532    __ fnmsb(z29.VnH(), p2.Merging(), z24.VnH(), z29.VnH());
533
534    __ movprfx(z7.VnH(), p6.Merging(), z23.VnH());
535    __ frecpx(z7.VnH(), p6.Merging(), z7.VnH());
536
537    __ movprfx(z17.VnS(), p5.Zeroing(), z2.VnS());
538    __ frinta(z17.VnS(), p5.Merging(), z17.VnS());
539
540    __ movprfx(z0.VnS(), p2.Zeroing(), z7.VnS());
541    __ frinti(z0.VnS(), p2.Merging(), z0.VnS());
542
543    __ movprfx(z8.VnH(), p3.Merging(), z20.VnH());
544    __ frintm(z8.VnH(), p3.Merging(), z8.VnH());
545
546    __ movprfx(z3.VnD(), p2.Zeroing(), z20.VnD());
547    __ frintn(z3.VnD(), p2.Merging(), z3.VnD());
548
549    __ movprfx(z11, z3);
550    __ frintp(z11.VnS(), p4.Merging(), z11.VnS());
551
552    __ movprfx(z23, z29);
553    __ frintx(z23.VnD(), p4.Merging(), z23.VnD());
554
555    __ movprfx(z4.VnH(), p4.Zeroing(), z14.VnH());
556    __ frintz(z4.VnH(), p4.Merging(), z4.VnH());
557
558    __ movprfx(z18.VnH(), p3.Zeroing(), z0.VnH());
559    __ fscale(z18.VnH(), p3.Merging(), z18.VnH(), z18.VnH());
560
561    __ movprfx(z2.VnS(), p6.Zeroing(), z4.VnS());
562    __ fsqrt(z2.VnS(), p6.Merging(), z2.VnS());
563
564    __ movprfx(z14.VnD(), p4.Zeroing(), z31.VnD());
565    __ fsub(z14.VnD(), p4.Merging(), z14.VnD(), z14.VnD());
566
567    __ movprfx(z31.VnH(), p2.Merging(), z6.VnH());
568    __ fsubr(z31.VnH(), p2.Merging(), z31.VnH(), z31.VnH());
569
570    __ movprfx(z4, z30);
571    __ ftmad(z4.VnH(), z4.VnH(), z4.VnH(), 2);
572
573    __ movprfx(z25.VnD(), p6.Zeroing(), z2.VnD());
574    __ scvtf(z25.VnD(), p6.Merging(), z25.VnS());
575
576    __ movprfx(z0.VnD(), p3.Merging(), z16.VnD());
577    __ scvtf(z0.VnD(), p3.Merging(), z0.VnD());
578
579    __ movprfx(z19, z23);
580    __ scvtf(z19.VnS(), p7.Merging(), z19.VnD());
581
582    __ movprfx(z19, z4);
583    __ scvtf(z19.VnH(), p4.Merging(), z19.VnD());
584
585    __ movprfx(z13.VnD(), p4.Zeroing(), z6.VnD());
586    __ ucvtf(z13.VnD(), p4.Merging(), z13.VnS());
587
588    __ movprfx(z6.VnH(), p0.Zeroing(), z14.VnH());
589    __ ucvtf(z6.VnH(), p0.Merging(), z6.VnH());
590
591    __ movprfx(z19.VnS(), p4.Merging(), z12.VnS());
592    __ ucvtf(z19.VnH(), p4.Merging(), z19.VnS());
593
594    __ movprfx(z0.VnD(), p5.Zeroing(), z12.VnD());
595    __ ucvtf(z0.VnH(), p5.Merging(), z0.VnD());
596
597    __ movprfx(z30, z5);
598    __ fmmla(z30.VnS(), z30.VnS(), z18.VnS());
599
600    __ movprfx(z31, z5);
601    __ fmmla(z31.VnD(), z31.VnD(), z18.VnD());
602  }
603  assm.FinalizeCode();
604
605  CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false);
606}
607
608TEST(movprfx_negative_instructions) {
609  Assembler assm;
610  assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE);
611  {
612    // We have to use the Assembler directly to generate movprfx, so we need
613    // to manually reserve space for the code we're about to emit.
614    static const size_t kPairCount = 13;
615    CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
616
617    __ movprfx(z26, z11);
618    __ add(z26.VnB(), z11.VnB(), z4.VnB());
619
620    // The merging form can take movprfx, but the zeroing form cannot.
621    __ movprfx(z29.VnB(), p3.Zeroing(), z7.VnB());
622    __ cpy(z29.VnB(), p3.Zeroing(), -42);
623
624    // Frecpx can take movprfx, but frecpe and frecps cannot.
625    __ movprfx(z13, z15);
626    __ frecpe(z13.VnD(), z26.VnD());
627
628    __ movprfx(z19, z1);
629    __ frecps(z19.VnD(), z1.VnD(), z12.VnD());
630
631    __ movprfx(z6, z12);
632    __ frsqrte(z6.VnS(), z12.VnS());
633
634    __ movprfx(z29, z5);
635    __ frsqrts(z29.VnH(), z5.VnH(), z20.VnH());
636
637    // Ftmad can take movprfx, but ftsmul and ftssel cannot.
638    __ movprfx(z1, z31);
639    __ ftsmul(z1.VnD(), z31.VnD(), z16.VnD());
640
641    __ movprfx(z8, z27);
642    __ ftssel(z8.VnH(), z27.VnH(), z1.VnH());
643
644    // This looks like a merging unary operation, but it's actually an alias of
645    // sel, which isn't destructive.
646    __ movprfx(z0, z18);
647    __ mov(z0.VnS(), p6.Merging(), z18.VnS());
648
649    // The merging form can take movprfx, but the zeroing form cannot.
650    __ movprfx(z12.VnS(), p2.Merging(), z11.VnS());
651    __ mov(z12.VnS(), p2.Zeroing(), -42);
652
653    __ movprfx(z13, z6);
654    __ movprfx(z13, z2);
655
656    // Movprfx can never prefix itself.
657    __ movprfx(z3.VnD(), p5.Zeroing(), z8.VnD());
658    __ movprfx(z3.VnD(), p5.Merging(), z8.VnD());
659
660    __ movprfx(z1.VnD(), p3.Zeroing(), z14.VnD());
661    __ movprfx(z1.VnD(), p3.Zeroing(), z18.VnD());
662  }
663  assm.FinalizeCode();
664
665  CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false);
666}
667
668TEST(movprfx_negative_lane_size) {
669  // Test that CanTakeSVEMovprfx() checks that the (predicated) movprfx lane
670  // size is compatible with the prefixed instruction.
671  Assembler assm;
672  assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE);
673  {
674    // We have to use the Assembler directly to generate movprfx, so we need
675    // to manually reserve space for the code we're about to emit.
676    static const size_t kPairCount = 63;
677    CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
678
679    __ movprfx(z0.VnH(), p2.Zeroing(), z17.VnH());
680    __ abs(z0.VnS(), p2.Merging(), z17.VnS());
681
682    __ movprfx(z10.VnD(), p0.Zeroing(), z4.VnD());
683    __ add(z10.VnS(), p0.Merging(), z10.VnS(), z2.VnS());
684
685    __ movprfx(z25.VnS(), p4.Zeroing(), z26.VnS());
686    __ and_(z25.VnB(), p4.Merging(), z25.VnB(), z27.VnB());
687
688    __ movprfx(z26.VnD(), p5.Merging(), z23.VnD());
689    __ asr(z26.VnB(), p5.Merging(), z26.VnB(), 3);
690
691    __ movprfx(z25.VnS(), p7.Zeroing(), z14.VnS());
692    __ asr(z25.VnH(), p7.Merging(), z25.VnH(), z14.VnH());
693
694    __ movprfx(z12.VnS(), p7.Zeroing(), z23.VnS());
695    __ asr(z12.VnH(), p7.Merging(), z12.VnH(), z23.VnD());
696
697    __ movprfx(z3.VnH(), p4.Zeroing(), z18.VnH());
698    __ asr(z3.VnD(), p4.Merging(), z3.VnD(), z15.VnD());
699
700    __ movprfx(z29.VnH(), p4.Merging(), z31.VnH());
701    __ asrd(z29.VnB(), p4.Merging(), z29.VnB(), 3);
702
703    __ movprfx(z31.VnH(), p5.Zeroing(), z14.VnH());
704    __ asrr(z31.VnB(), p5.Merging(), z31.VnB(), z5.VnB());
705
706    __ movprfx(z0.VnS(), p6.Zeroing(), z18.VnS());
707    __ bic(z0.VnB(), p6.Merging(), z0.VnB(), z23.VnB());
708
709    __ movprfx(z19.VnH(), p2.Zeroing(), z24.VnH());
710    __ cls(z19.VnB(), p2.Merging(), z24.VnB());
711
712    __ movprfx(z14.VnS(), p5.Zeroing(), z4.VnS());
713    __ clz(z14.VnD(), p5.Merging(), z10.VnD());
714
715    __ movprfx(z0.VnD(), p5.Merging(), z2.VnD());
716    __ cnot(z0.VnH(), p5.Merging(), z2.VnH());
717
718    __ movprfx(z0.VnB(), p3.Zeroing(), z19.VnB());
719    __ cnt(z0.VnH(), p3.Merging(), z8.VnH());
720
721    __ movprfx(z29.VnS(), p0.Merging(), z7.VnS());
722    __ cpy(z29.VnD(), p0.Merging(), -42);
723
724    __ movprfx(z13.VnB(), p2.Merging(), z31.VnB());
725    __ cpy(z13.VnS(), p2.Merging(), w13);
726
727    __ movprfx(z0.VnS(), p3.Merging(), z15.VnS());
728    __ cpy(z0.VnH(), p3.Merging(), h0);
729
730    __ movprfx(z2.VnD(), p6.Zeroing(), z26.VnD());
731    __ eor(z2.VnB(), p6.Merging(), z2.VnB(), z26.VnB());
732
733    __ movprfx(z7.VnS(), p7.Zeroing(), z30.VnS());
734    __ lsl(z7.VnD(), p7.Merging(), z7.VnD(), 3);
735
736    __ movprfx(z11.VnH(), p3.Merging(), z23.VnH());
737    __ lsl(z11.VnB(), p3.Merging(), z11.VnB(), z21.VnB());
738
739    __ movprfx(z31.VnS(), p7.Zeroing(), z21.VnS());
740    __ lsl(z31.VnH(), p7.Merging(), z31.VnH(), z21.VnD());
741
742    __ movprfx(z26.VnH(), p0.Merging(), z0.VnH());
743    __ lsl(z26.VnD(), p0.Merging(), z26.VnD(), z24.VnD());
744
745    __ movprfx(z1.VnS(), p2.Zeroing(), z6.VnS());
746    __ lslr(z1.VnB(), p2.Merging(), z1.VnB(), z6.VnB());
747
748    __ movprfx(z4.VnD(), p4.Zeroing(), z6.VnD());
749    __ lsr(z4.VnH(), p4.Merging(), z4.VnH(), 3);
750
751    __ movprfx(z27.VnH(), p0.Zeroing(), z29.VnH());
752    __ lsr(z27.VnS(), p0.Merging(), z27.VnS(), z29.VnS());
753
754    __ movprfx(z5.VnD(), p2.Zeroing(), z16.VnD());
755    __ lsr(z5.VnH(), p2.Merging(), z5.VnH(), z2.VnD());
756
757    __ movprfx(z27.VnB(), p4.Zeroing(), z5.VnB());
758    __ lsr(z27.VnD(), p4.Merging(), z27.VnD(), z5.VnD());
759
760    __ movprfx(z27.VnS(), p3.Merging(), z13.VnS());
761    __ lsrr(z27.VnD(), p3.Merging(), z27.VnD(), z13.VnD());
762
763    __ movprfx(z30.VnS(), p2.Zeroing(), z14.VnS());
764    __ mad(z30.VnB(), p2.Merging(), z20.VnB(), z14.VnB());
765
766    __ movprfx(z14.VnB(), p6.Merging(), z11.VnB());
767    __ mla(z14.VnD(), p6.Merging(), z28.VnD(), z11.VnD());
768
769    __ movprfx(z28.VnH(), p2.Zeroing(), z22.VnH());
770    __ mls(z28.VnS(), p2.Merging(), z3.VnS(), z22.VnS());
771
772    // Aliases of cpy.
773    __ movprfx(z18.VnH(), p6.Zeroing(), z25.VnH());
774    __ mov(z18.VnD(), p6.Merging(), -42);
775
776    __ movprfx(z22.VnD(), p2.Zeroing(), z6.VnD());
777    __ mov(z22.VnS(), p2.Merging(), w22);
778
779    __ movprfx(z3.VnH(), p0.Zeroing(), z13.VnH());
780    __ mov(z3.VnB(), p0.Merging(), b0);
781
782    __ movprfx(z31.VnS(), p7.Zeroing(), z12.VnS());
783    __ msb(z31.VnH(), p7.Merging(), z14.VnH(), z12.VnH());
784
785    __ movprfx(z16.VnS(), p7.Zeroing(), z6.VnS());
786    __ mul(z16.VnB(), p7.Merging(), z16.VnB(), z30.VnB());
787
788    __ movprfx(z17.VnD(), p7.Merging(), z1.VnD());
789    __ neg(z17.VnB(), p7.Merging(), z1.VnB());
790
791    __ movprfx(z31.VnH(), p4.Zeroing(), z12.VnH());
792    __ not_(z31.VnB(), p4.Merging(), z12.VnB());
793
794    __ movprfx(z9.VnH(), p3.Zeroing(), z23.VnH());
795    __ orr(z9.VnS(), p3.Merging(), z9.VnS(), z13.VnS());
796
797    __ movprfx(z25.VnD(), p2.Zeroing(), z21.VnD());
798    __ rbit(z25.VnS(), p2.Merging(), z21.VnS());
799
800    __ movprfx(z26.VnH(), p3.Merging(), z13.VnH());
801    __ revb(z26.VnD(), p3.Merging(), z13.VnD());
802
803    __ movprfx(z8.VnH(), p5.Merging(), z20.VnH());
804    __ revh(z8.VnS(), p5.Merging(), z0.VnS());
805
806    __ movprfx(z22.VnH(), p6.Merging(), z15.VnH());
807    __ revw(z22.VnD(), p6.Merging(), z10.VnD());
808
809    __ movprfx(z1.VnD(), p3.Merging(), z15.VnD());
810    __ sabd(z1.VnB(), p3.Merging(), z1.VnB(), z15.VnB());
811
812    __ movprfx(z25.VnD(), p1.Zeroing(), z30.VnD());
813    __ sdiv(z25.VnS(), p1.Merging(), z25.VnS(), z30.VnS());
814
815    __ movprfx(z19.VnS(), p3.Zeroing(), z11.VnS());
816    __ sdivr(z19.VnD(), p3.Merging(), z19.VnD(), z24.VnD());
817
818    __ movprfx(z12.VnH(), p2.Merging(), z2.VnH());
819    __ smax(z12.VnS(), p2.Merging(), z12.VnS(), z24.VnS());
820
821    __ movprfx(z3.VnD(), p1.Merging(), z15.VnD());
822    __ smin(z3.VnS(), p1.Merging(), z3.VnS(), z20.VnS());
823
824    __ movprfx(z13.VnS(), p5.Merging(), z22.VnS());
825    __ smulh(z13.VnB(), p5.Merging(), z13.VnB(), z27.VnB());
826
827    __ movprfx(z11.VnH(), p5.Zeroing(), z25.VnH());
828    __ sub(z11.VnB(), p5.Merging(), z11.VnB(), z7.VnB());
829
830    __ movprfx(z3.VnB(), p6.Merging(), z13.VnB());
831    __ subr(z3.VnS(), p6.Merging(), z3.VnS(), z13.VnS());
832
833    __ movprfx(z26.VnH(), p5.Merging(), z1.VnH());
834    __ sxtb(z26.VnS(), p5.Merging(), z17.VnS());
835
836    __ movprfx(z11.VnB(), p7.Zeroing(), z26.VnB());
837    __ sxth(z11.VnS(), p7.Merging(), z26.VnS());
838
839    __ movprfx(z1.VnS(), p2.Merging(), z21.VnS());
840    __ sxtw(z1.VnD(), p2.Merging(), z21.VnD());
841
842    __ movprfx(z4.VnS(), p6.Zeroing(), z6.VnS());
843    __ uabd(z4.VnH(), p6.Merging(), z4.VnH(), z6.VnH());
844
845    __ movprfx(z26.VnB(), p2.Zeroing(), z11.VnB());
846    __ udiv(z26.VnD(), p2.Merging(), z26.VnD(), z11.VnD());
847
848    __ movprfx(z19.VnB(), p5.Merging(), z6.VnB());
849    __ udivr(z19.VnS(), p5.Merging(), z19.VnS(), z9.VnS());
850
851    __ movprfx(z16.VnB(), p4.Merging(), z6.VnB());
852    __ umax(z16.VnH(), p4.Merging(), z16.VnH(), z6.VnH());
853
854    __ movprfx(z1.VnD(), p0.Zeroing(), z4.VnD());
855    __ umin(z1.VnS(), p0.Merging(), z1.VnS(), z28.VnS());
856
857    __ movprfx(z25.VnD(), p7.Merging(), z4.VnD());
858    __ umulh(z25.VnB(), p7.Merging(), z25.VnB(), z16.VnB());
859
860    __ movprfx(z29.VnB(), p4.Merging(), z2.VnB());
861    __ uxtb(z29.VnS(), p4.Merging(), z31.VnS());
862
863    __ movprfx(z27.VnH(), p5.Merging(), z21.VnH());
864    __ uxth(z27.VnD(), p5.Merging(), z1.VnD());
865
866    __ movprfx(z29.VnB(), p2.Merging(), z7.VnB());
867    __ uxtw(z29.VnD(), p2.Merging(), z7.VnD());
868  }
869  assm.FinalizeCode();
870
871  CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false);
872}
873
874TEST(movprfx_negative_lane_size_fp) {
875  // Test that CanTakeSVEMovprfx() checks that the (predicated) movprfx lane
876  // size is compatible with the prefixed instruction.
877  Assembler assm;
878  assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE);
879  {
880    // We have to use the Assembler directly to generate movprfx, so we need
881    // to manually reserve space for the code we're about to emit.
882    static const size_t kPairCount = 64;
883    CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
884
885    __ movprfx(z29.VnD(), p5.Zeroing(), z8.VnD());
886    __ fabd(z29.VnS(), p5.Merging(), z29.VnS(), z26.VnS());
887
888    __ movprfx(z9.VnB(), p0.Zeroing(), z1.VnB());
889    __ fabs(z9.VnS(), p0.Merging(), z15.VnS());
890
891    __ movprfx(z24.VnD(), p0.Zeroing(), z8.VnD());
892    __ fadd(z24.VnH(), p0.Merging(), z24.VnH(), 0.5);
893
894    __ movprfx(z24.VnB(), p1.Zeroing(), z27.VnB());
895    __ fadd(z24.VnH(), p1.Merging(), z24.VnH(), z27.VnH());
896
897    __ movprfx(z14.VnH(), p7.Merging(), z12.VnH());
898    __ fcadd(z14.VnD(), p7.Merging(), z14.VnD(), z12.VnD(), 90);
899
900    __ movprfx(z10.VnB(), p6.Merging(), z11.VnB());
901    __ fcpy(z10.VnH(), p6.Merging(), 1.25);
902
903    __ movprfx(z12.VnB(), p6.Merging(), z18.VnB());
904    __ fcvt(z12.VnD(), p6.Merging(), z18.VnH());
905
906    __ movprfx(z18.VnH(), p7.Zeroing(), z2.VnH());
907    __ fcvt(z18.VnD(), p7.Merging(), z0.VnS());
908
909    __ movprfx(z3.VnH(), p5.Merging(), z14.VnH());
910    __ fcvt(z3.VnS(), p5.Merging(), z21.VnD());
911
912    __ movprfx(z15.VnH(), p1.Zeroing(), z12.VnH());
913    __ fcvt(z15.VnH(), p1.Merging(), z12.VnD());
914
915    __ movprfx(z3.VnH(), p2.Merging(), z22.VnH());
916    __ fcvtzs(z3.VnD(), p2.Merging(), z7.VnH());
917
918    __ movprfx(z17.VnS(), p3.Merging(), z14.VnS());
919    __ fcvtzs(z17.VnD(), p3.Merging(), z14.VnD());
920
921    __ movprfx(z2.VnH(), p1.Zeroing(), z16.VnH());
922    __ fcvtzs(z2.VnS(), p1.Merging(), z31.VnH());
923
924    __ movprfx(z13.VnB(), p2.Merging(), z9.VnB());
925    __ fcvtzs(z13.VnS(), p2.Merging(), z23.VnD());
926
927    __ movprfx(z19.VnB(), p1.Merging(), z4.VnB());
928    __ fcvtzu(z19.VnD(), p1.Merging(), z14.VnH());
929
930    __ movprfx(z29.VnS(), p2.Merging(), z19.VnS());
931    __ fcvtzu(z29.VnD(), p2.Merging(), z19.VnD());
932
933    __ movprfx(z21.VnS(), p4.Zeroing(), z17.VnS());
934    __ fcvtzu(z21.VnD(), p4.Merging(), z17.VnS());
935
936    __ movprfx(z19.VnH(), p4.Zeroing(), z30.VnH());
937    __ fcvtzu(z19.VnS(), p4.Merging(), z16.VnD());
938
939    __ movprfx(z10.VnS(), p7.Zeroing(), z27.VnS());
940    __ fdiv(z10.VnH(), p7.Merging(), z10.VnH(), z27.VnH());
941
942    __ movprfx(z7.VnD(), p7.Zeroing(), z17.VnD());
943    __ fdivr(z7.VnH(), p7.Merging(), z7.VnH(), z28.VnH());
944
945    __ movprfx(z22.VnB(), p0.Merging(), z27.VnB());
946    __ fmad(z22.VnH(), p0.Merging(), z27.VnH(), z15.VnH());
947
948    __ movprfx(z14.VnD(), p1.Zeroing(), z11.VnD());
949    __ fmax(z14.VnS(), p1.Merging(), z14.VnS(), 0.0);
950
951    __ movprfx(z27.VnB(), p5.Merging(), z14.VnB());
952    __ fmax(z27.VnD(), p5.Merging(), z27.VnD(), z14.VnD());
953
954    __ movprfx(z31.VnH(), p7.Merging(), z24.VnH());
955    __ fmaxnm(z31.VnD(), p7.Merging(), z31.VnD(), 0.0);
956
957    __ movprfx(z11.VnD(), p7.Zeroing(), z25.VnD());
958    __ fmaxnm(z11.VnS(), p7.Merging(), z11.VnS(), z28.VnS());
959
960    __ movprfx(z31.VnD(), p6.Merging(), z19.VnD());
961    __ fmin(z31.VnH(), p6.Merging(), z31.VnH(), 0.0);
962
963    __ movprfx(z20.VnS(), p3.Zeroing(), z15.VnS());
964    __ fmin(z20.VnH(), p3.Merging(), z20.VnH(), z8.VnH());
965
966    __ movprfx(z6.VnS(), p0.Merging(), z30.VnS());
967    __ fminnm(z6.VnH(), p0.Merging(), z6.VnH(), 0.0);
968
969    __ movprfx(z1.VnH(), p1.Zeroing(), z14.VnH());
970    __ fminnm(z1.VnS(), p1.Merging(), z1.VnS(), z14.VnS());
971
972    __ movprfx(z13.VnB(), p3.Zeroing(), z21.VnB());
973    __ fmla(z13.VnD(), p3.Merging(), z12.VnD(), z21.VnD());
974
975    __ movprfx(z15.VnS(), p1.Zeroing(), z20.VnS());
976    __ fmls(z15.VnH(), p1.Merging(), z28.VnH(), z20.VnH());
977
978    __ movprfx(z19.VnD(), p3.Zeroing(), z31.VnD());
979    __ fmov(z19.VnH(), p3.Merging(), 0.0);
980
981    __ movprfx(z16.VnS(), p7.Merging(), z30.VnS());
982    __ fmov(z16.VnH(), p7.Merging(), 2.5);
983
984    __ movprfx(z21.VnB(), p1.Merging(), z28.VnB());
985    __ fmsb(z21.VnH(), p1.Merging(), z30.VnH(), z28.VnH());
986
987    __ movprfx(z21.VnS(), p1.Zeroing(), z19.VnS());
988    __ fmul(z21.VnH(), p1.Merging(), z21.VnH(), 2.0);
989
990    __ movprfx(z28.VnB(), p7.Zeroing(), z8.VnB());
991    __ fmul(z28.VnS(), p7.Merging(), z28.VnS(), z26.VnS());
992
993    __ movprfx(z2.VnB(), p4.Merging(), z31.VnB());
994    __ fmulx(z2.VnH(), p4.Merging(), z2.VnH(), z31.VnH());
995
996    __ movprfx(z6.VnB(), p2.Zeroing(), z0.VnB());
997    __ fneg(z6.VnS(), p2.Merging(), z28.VnS());
998
999    __ movprfx(z26.VnB(), p0.Zeroing(), z21.VnB());
1000    __ fnmad(z26.VnH(), p0.Merging(), z21.VnH(), z18.VnH());
1001
1002    __ movprfx(z15.VnB(), p1.Zeroing(), z26.VnB());
1003    __ fnmla(z15.VnH(), p1.Merging(), z26.VnH(), z18.VnH());
1004
1005    __ movprfx(z16.VnS(), p0.Merging(), z1.VnS());
1006    __ fnmls(z16.VnD(), p0.Merging(), z1.VnD(), z13.VnD());
1007
1008    __ movprfx(z4.VnH(), p0.Zeroing(), z16.VnH());
1009    __ fnmsb(z4.VnS(), p0.Merging(), z30.VnS(), z3.VnS());
1010
1011    // Note that frecpe and frecps _cannot_ take movprfx.
1012    __ movprfx(z9.VnH(), p0.Zeroing(), z21.VnH());
1013    __ frecpx(z9.VnS(), p0.Merging(), z14.VnS());
1014
1015    __ movprfx(z6.VnH(), p2.Zeroing(), z28.VnH());
1016    __ frinta(z6.VnD(), p2.Merging(), z28.VnD());
1017
1018    __ movprfx(z12.VnS(), p4.Zeroing(), z7.VnS());
1019    __ frinti(z12.VnH(), p4.Merging(), z7.VnH());
1020
1021    __ movprfx(z6.VnB(), p5.Merging(), z20.VnB());
1022    __ frintm(z6.VnD(), p5.Merging(), z20.VnD());
1023
1024    __ movprfx(z7.VnB(), p6.Merging(), z19.VnB());
1025    __ frintn(z7.VnH(), p6.Merging(), z11.VnH());
1026
1027    __ movprfx(z12.VnD(), p2.Merging(), z31.VnD());
1028    __ frintp(z12.VnS(), p2.Merging(), z31.VnS());
1029
1030    __ movprfx(z1.VnS(), p5.Merging(), z10.VnS());
1031    __ frintx(z1.VnD(), p5.Merging(), z0.VnD());
1032
1033    __ movprfx(z6.VnH(), p0.Merging(), z12.VnH());
1034    __ frintz(z6.VnS(), p0.Merging(), z7.VnS());
1035
1036    __ movprfx(z8.VnH(), p2.Merging(), z6.VnH());
1037    __ fscale(z8.VnD(), p2.Merging(), z8.VnD(), z6.VnD());
1038
1039    __ movprfx(z20.VnH(), p2.Zeroing(), z2.VnH());
1040    __ fsqrt(z20.VnD(), p2.Merging(), z15.VnD());
1041
1042    __ movprfx(z28.VnS(), p6.Zeroing(), z19.VnS());
1043    __ fsub(z28.VnD(), p6.Merging(), z28.VnD(), 1.0);
1044
1045    __ movprfx(z6.VnB(), p0.Zeroing(), z12.VnB());
1046    __ fsub(z6.VnD(), p0.Merging(), z6.VnD(), z20.VnD());
1047
1048    __ movprfx(z6.VnS(), p7.Zeroing(), z11.VnS());
1049    __ fsubr(z6.VnH(), p7.Merging(), z6.VnH(), 1.0);
1050
1051    __ movprfx(z28.VnB(), p3.Merging(), z10.VnB());
1052    __ fsubr(z28.VnS(), p3.Merging(), z28.VnS(), z9.VnS());
1053
1054    __ movprfx(z22.VnB(), p3.Zeroing(), z14.VnB());
1055    __ scvtf(z22.VnD(), p3.Merging(), z24.VnS());
1056
1057    __ movprfx(z20.VnS(), p2.Merging(), z9.VnS());
1058    __ scvtf(z20.VnH(), p2.Merging(), z9.VnH());
1059
1060    __ movprfx(z19.VnH(), p1.Merging(), z21.VnH());
1061    __ scvtf(z19.VnS(), p1.Merging(), z6.VnD());
1062
1063    __ movprfx(z31.VnS(), p3.Merging(), z22.VnS());
1064    __ scvtf(z31.VnH(), p3.Merging(), z22.VnD());
1065
1066    __ movprfx(z8.VnS(), p3.Merging(), z3.VnS());
1067    __ ucvtf(z8.VnD(), p3.Merging(), z1.VnS());
1068
1069    __ movprfx(z0.VnB(), p0.Merging(), z23.VnB());
1070    __ ucvtf(z0.VnH(), p0.Merging(), z12.VnH());
1071
1072    __ movprfx(z8.VnH(), p3.Zeroing(), z4.VnH());
1073    __ ucvtf(z8.VnH(), p3.Merging(), z4.VnS());
1074
1075    __ movprfx(z20.VnH(), p2.Zeroing(), z10.VnH());
1076    __ ucvtf(z20.VnH(), p2.Merging(), z11.VnD());
1077  }
1078  assm.FinalizeCode();
1079
1080  CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false);
1081}
1082
1083TEST(movprfx_negative_predication) {
1084  // Test that CanTakeSVEMovprfx() is false when a predicated movprfx appears
1085  // before an unpredicated instruction.
1086  Assembler assm;
1087  assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM);
1088  {
1089    // We have to use the Assembler directly to generate movprfx, so we need
1090    // to manually reserve space for the code we're about to emit.
1091    static const size_t kPairCount = 60;
1092    CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
1093
1094    __ movprfx(z27.VnS(), p1.Zeroing(), z12.VnS());
1095    __ add(z27.VnS(), z27.VnS(), 42);
1096
1097    __ movprfx(z31.VnS(), p6.Zeroing(), z1.VnS());
1098    __ and_(z31.VnS(), z31.VnS(), 4);
1099
1100    __ movprfx(z27.VnS(), p5.Merging(), z24.VnS());
1101    __ bic(z27.VnS(), z27.VnS(), 4);
1102
1103    __ movprfx(z6.VnH(), p7.Merging(), z30.VnH());
1104    __ clasta(z6.VnH(), p7, z6.VnH(), z14.VnH());
1105
1106    __ movprfx(z11.VnB(), p6.Merging(), z5.VnB());
1107    __ clastb(z11.VnB(), p6, z11.VnB(), z29.VnB());
1108
1109    __ movprfx(z5.VnD(), p0.Merging(), z1.VnD());
1110    __ decd(z5.VnD(), SVE_MUL3);
1111
1112    __ movprfx(z11.VnH(), p7.Zeroing(), z28.VnH());
1113    __ dech(z11.VnH(), SVE_VL2);
1114
1115    __ movprfx(z14.VnS(), p5.Zeroing(), z6.VnS());
1116    __ decp(z14.VnS(), p5);
1117
1118    __ movprfx(z6.VnS(), p5.Merging(), z10.VnS());
1119    __ decw(z6.VnS(), SVE_ALL);
1120
1121    __ movprfx(z27.VnH(), p7.Zeroing(), z9.VnH());
1122    __ eon(z27.VnH(), z27.VnH(), 4);
1123
1124    __ movprfx(z3.VnS(), p3.Zeroing(), z2.VnS());
1125    __ eor(z3.VnS(), z3.VnS(), 4);
1126
1127    __ movprfx(z30.VnB(), p2.Zeroing(), z25.VnB());
1128    __ ext(z30.VnB(), z30.VnB(), z25.VnB(), 42);
1129
1130    __ movprfx(z22.VnD(), p0.Merging(), z0.VnD());
1131    __ incd(z22.VnD(), SVE_MUL3);
1132
1133    __ movprfx(z7.VnH(), p3.Merging(), z3.VnH());
1134    __ inch(z7.VnH(), SVE_VL2);
1135
1136    __ movprfx(z9.VnD(), p1.Zeroing(), z28.VnD());
1137    __ incp(z9.VnD(), p1);
1138
1139    __ movprfx(z30.VnS(), p3.Merging(), z4.VnS());
1140    __ incw(z30.VnS(), SVE_ALL);
1141
1142    __ movprfx(z30.VnB(), p7.Zeroing(), z21.VnB());
1143    __ insr(z30.VnB(), w30);
1144
1145    __ movprfx(z2.VnB(), p4.Zeroing(), z26.VnB());
1146    __ insr(z2.VnB(), b0);
1147
1148    __ movprfx(z27.VnS(), p5.Zeroing(), z5.VnS());
1149    __ mul(z27.VnS(), z27.VnS(), 42);
1150
1151    __ movprfx(z5.VnS(), p0.Merging(), z26.VnS());
1152    __ orn(z5.VnS(), z5.VnS(), 4);
1153
1154    __ movprfx(z5.VnS(), p0.Merging(), z26.VnS());
1155    __ orn(z5.VnS(), z5.VnS(), 4);
1156
1157    __ movprfx(z16.VnD(), p1.Merging(), z13.VnD());
1158    __ sdot(z16.VnD(), z11.VnH(), z7.VnH(), 1);
1159
1160    __ movprfx(z27.VnD(), p5.Merging(), z18.VnD());
1161    __ sdot(z27.VnD(), z18.VnH(), z0.VnH());
1162
1163    __ movprfx(z20.VnS(), p6.Merging(), z1.VnS());
1164    __ sdot(z20.VnS(), z10.VnB(), z1.VnB(), 1);
1165
1166    __ movprfx(z19.VnD(), p0.Zeroing(), z7.VnD());
1167    __ smax(z19.VnD(), z19.VnD(), 42);
1168
1169    __ movprfx(z15.VnD(), p1.Zeroing(), z7.VnD());
1170    __ smin(z15.VnD(), z15.VnD(), 42);
1171
1172    __ movprfx(z15.VnB(), p5.Merging(), z3.VnB());
1173    __ splice(z15.VnB(), p5, z15.VnB(), z3.VnB());
1174
1175    __ movprfx(z5.VnB(), p6.Zeroing(), z4.VnB());
1176    __ sqadd(z5.VnB(), z5.VnB(), 42);
1177
1178    __ movprfx(z16.VnD(), p0.Zeroing(), z18.VnD());
1179    __ sqdecd(z16.VnD(), SVE_MUL3);
1180
1181    __ movprfx(z7.VnH(), p3.Merging(), z28.VnH());
1182    __ sqdech(z7.VnH(), SVE_VL2);
1183
1184    __ movprfx(z7.VnS(), p2.Merging(), z13.VnS());
1185    __ sqdecp(z7.VnS(), p2);
1186
1187    __ movprfx(z22.VnS(), p7.Zeroing(), z20.VnS());
1188    __ sqdecw(z22.VnS(), SVE_ALL);
1189
1190    __ movprfx(z26.VnD(), p1.Zeroing(), z0.VnD());
1191    __ sqincd(z26.VnD(), SVE_MUL3);
1192
1193    __ movprfx(z15.VnH(), p7.Zeroing(), z27.VnH());
1194    __ sqinch(z15.VnH(), SVE_VL2);
1195
1196    __ movprfx(z4.VnD(), p7.Merging(), z13.VnD());
1197    __ sqincp(z4.VnD(), p7);
1198
1199    __ movprfx(z29.VnS(), p6.Merging(), z14.VnS());
1200    __ sqincw(z29.VnS(), SVE_ALL);
1201
1202    __ movprfx(z17.VnB(), p1.Merging(), z24.VnB());
1203    __ sqsub(z17.VnB(), z17.VnB(), 42);
1204
1205    __ movprfx(z26.VnS(), p5.Zeroing(), z19.VnS());
1206    __ sub(z26.VnS(), z26.VnS(), 42);
1207
1208    __ movprfx(z15.VnD(), p1.Merging(), z3.VnD());
1209    __ subr(z15.VnD(), z15.VnD(), 42);
1210
1211    __ movprfx(z4.VnD(), p2.Zeroing(), z14.VnD());
1212    __ udot(z4.VnD(), z15.VnH(), z7.VnH(), 1);
1213
1214    __ movprfx(z29.VnD(), p4.Zeroing(), z28.VnD());
1215    __ udot(z29.VnD(), z2.VnH(), z17.VnH());
1216
1217    __ movprfx(z7.VnS(), p6.Merging(), z3.VnS());
1218    __ udot(z7.VnS(), z14.VnB(), z1.VnB(), 1);
1219
1220    __ movprfx(z14.VnB(), p3.Merging(), z5.VnB());
1221    __ umax(z14.VnB(), z14.VnB(), 42);
1222
1223    __ movprfx(z4.VnD(), p1.Zeroing(), z2.VnD());
1224    __ umin(z4.VnD(), z4.VnD(), 42);
1225
1226    __ movprfx(z19.VnB(), p0.Zeroing(), z27.VnB());
1227    __ uqadd(z19.VnB(), z19.VnB(), 42);
1228
1229    __ movprfx(z24.VnD(), p7.Zeroing(), z11.VnD());
1230    __ uqdecd(z24.VnD(), SVE_MUL3);
1231
1232    __ movprfx(z24.VnH(), p4.Zeroing(), z18.VnH());
1233    __ uqdech(z24.VnH(), SVE_VL2);
1234
1235    __ movprfx(z31.VnS(), p5.Zeroing(), z2.VnS());
1236    __ uqdecp(z31.VnS(), p5);
1237
1238    __ movprfx(z19.VnS(), p6.Merging(), z21.VnS());
1239    __ uqdecw(z19.VnS(), SVE_ALL);
1240
1241    __ movprfx(z27.VnD(), p0.Merging(), z21.VnD());
1242    __ uqincd(z27.VnD(), SVE_MUL3);
1243
1244    __ movprfx(z13.VnH(), p4.Zeroing(), z12.VnH());
1245    __ uqinch(z13.VnH(), SVE_VL2);
1246
1247    __ movprfx(z0.VnD(), p4.Zeroing(), z1.VnD());
1248    __ uqincp(z0.VnD(), p4);
1249
1250    __ movprfx(z12.VnS(), p4.Merging(), z21.VnS());
1251    __ uqincw(z12.VnS(), SVE_ALL);
1252
1253    __ movprfx(z9.VnD(), p0.Zeroing(), z16.VnD());
1254    __ uqsub(z9.VnD(), z9.VnD(), 42);
1255
1256    __ movprfx(z22.VnS(), p0.Zeroing(), z5.VnS());
1257    __ smmla(z22.VnS(), z21.VnB(), z0.VnB());
1258
1259    __ movprfx(z1.VnS(), p0.Zeroing(), z5.VnS());
1260    __ ummla(z1.VnS(), z10.VnB(), z2.VnB());
1261
1262    __ movprfx(z30.VnS(), p0.Zeroing(), z5.VnS());
1263    __ usmmla(z30.VnS(), z29.VnB(), z18.VnB());
1264
1265    __ movprfx(z4.VnS(), p0.Zeroing(), z5.VnS());
1266    __ usdot(z4.VnS(), z3.VnB(), z4.VnB());
1267
1268    __ movprfx(z10.VnS(), p0.Zeroing(), z5.VnS());
1269    __ usdot(z10.VnS(), z10.VnB(), z0.VnB(), 0);
1270
1271    __ movprfx(z1.VnS(), p0.Zeroing(), z5.VnS());
1272    __ sudot(z1.VnS(), z10.VnB(), z1.VnB(), 1);
1273  }
1274  assm.FinalizeCode();
1275
1276  CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false);
1277}
1278
1279TEST(movprfx_negative_predication_fp) {
1280  // Test that CanTakeSVEMovprfx() is false when a predicated movprfx appears
1281  // before an unpredicated instruction.
1282  Assembler assm;
1283  assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE,
1284                                 CPUFeatures::kSVEF32MM,
1285                                 CPUFeatures::kSVEF64MM);
1286  {
1287    // We have to use the Assembler directly to generate movprfx, so we need
1288    // to manually reserve space for the code we're about to emit.
1289    static const size_t kPairCount = 11;
1290    CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
1291
1292    __ movprfx(z10.VnH(), p3.Zeroing(), z3.VnH());
1293    __ fcmla(z10.VnH(), z22.VnH(), z3.VnH(), 2, 180);
1294
1295    __ movprfx(z12.VnS(), p4.Merging(), z14.VnS());
1296    __ fcmla(z12.VnS(), z3.VnS(), z10.VnS(), 1, 270);
1297
1298    __ movprfx(z16.VnD(), p3.Zeroing(), z24.VnD());
1299    __ fmla(z16.VnD(), z24.VnD(), z8.VnD(), 1);
1300
1301    __ movprfx(z9.VnH(), p7.Zeroing(), z0.VnH());
1302    __ fmla(z9.VnH(), z8.VnH(), z0.VnH(), 7);
1303
1304    __ movprfx(z23.VnS(), p5.Merging(), z5.VnS());
1305    __ fmla(z23.VnS(), z7.VnS(), z5.VnS(), 3);
1306
1307    __ movprfx(z19.VnD(), p6.Zeroing(), z8.VnD());
1308    __ fmls(z19.VnD(), z27.VnD(), z13.VnD(), 1);
1309
1310    __ movprfx(z25.VnH(), p7.Merging(), z24.VnH());
1311    __ fmls(z25.VnH(), z24.VnH(), z4.VnH(), 4);
1312
1313    __ movprfx(z2.VnS(), p1.Zeroing(), z0.VnS());
1314    __ fmls(z2.VnS(), z9.VnS(), z0.VnS(), 3);
1315
1316    // Note that ftsmul and ftssel cannot take movprfx.
1317    __ movprfx(z22.VnD(), p6.Merging(), z16.VnD());
1318    __ ftmad(z22.VnD(), z22.VnD(), z20.VnD(), 2);
1319
1320    __ movprfx(z30.VnS(), p0.Zeroing(), z5.VnS());
1321    __ fmmla(z30.VnS(), z29.VnS(), z18.VnS());
1322
1323    __ movprfx(z31.VnD(), p1.Merging(), z5.VnD());
1324    __ fmmla(z31.VnD(), z30.VnD(), z18.VnD());
1325  }
1326  assm.FinalizeCode();
1327
1328  CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false);
1329}
1330
1331TEST(movprfx_positive) {
1332  Assembler assm;
1333  assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM);
1334  {
1335    // We have to use the Assembler directly to generate movprfx, so we need
1336    // to manually reserve space for the code we're about to emit.
1337    static const size_t kPairCount = 123;
1338    CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
1339
1340    __ movprfx(z17, z28);
1341    __ abs(z17.VnB(), p6.Merging(), z28.VnB());
1342
1343    __ movprfx(z9, z7);
1344    __ add(z9.VnB(), p5.Merging(), z9.VnB(), z29.VnB());
1345
1346    __ movprfx(z11, z0);
1347    __ add(z11.VnD(), z11.VnD(), 42);
1348
1349    __ movprfx(z8.VnS(), p3.Zeroing(), z28.VnS());
1350    __ and_(z8.VnS(), p3.Merging(), z8.VnS(), z31.VnS());
1351
1352    __ movprfx(z20, z23);
1353    __ and_(z20.VnS(), z20.VnS(), 4);
1354
1355    __ movprfx(z24.VnD(), p5.Merging(), z11.VnD());
1356    __ asr(z24.VnD(), p5.Merging(), z24.VnD(), 3);
1357
1358    __ movprfx(z1, z13);
1359    __ asr(z1.VnH(), p3.Merging(), z1.VnH(), z4.VnH());
1360
1361    __ movprfx(z0.VnB(), p7.Zeroing(), z28.VnB());
1362    __ asr(z0.VnB(), p7.Merging(), z0.VnB(), z28.VnD());
1363
1364    __ movprfx(z15, z5);
1365    __ asr(z15.VnD(), p3.Merging(), z15.VnD(), z5.VnD());
1366
1367    __ movprfx(z24.VnH(), p3.Merging(), z22.VnH());
1368    __ asrd(z24.VnH(), p3.Merging(), z24.VnH(), 3);
1369
1370    __ movprfx(z2.VnS(), p3.Zeroing(), z20.VnS());
1371    __ asrr(z2.VnS(), p3.Merging(), z2.VnS(), z15.VnS());
1372
1373    __ movprfx(z17.VnB(), p7.Merging(), z6.VnB());
1374    __ bic(z17.VnB(), p7.Merging(), z17.VnB(), z25.VnB());
1375
1376    __ movprfx(z31, z6);
1377    __ bic(z31.VnD(), z31.VnD(), 4);
1378
1379    __ movprfx(z20, z2);
1380    __ clasta(z20.VnB(), p4, z20.VnB(), z15.VnB());
1381
1382    __ movprfx(z27, z11);
1383    __ clastb(z27.VnB(), p5, z27.VnB(), z6.VnB());
1384
1385    __ movprfx(z3.VnS(), p7.Zeroing(), z17.VnS());
1386    __ cls(z3.VnS(), p7.Merging(), z0.VnS());
1387
1388    __ movprfx(z29.VnB(), p0.Zeroing(), z24.VnB());
1389    __ clz(z29.VnB(), p0.Merging(), z7.VnB());
1390
1391    __ movprfx(z2.VnH(), p7.Zeroing(), z29.VnH());
1392    __ cnot(z2.VnH(), p7.Merging(), z28.VnH());
1393
1394    __ movprfx(z23, z5);
1395    __ cnt(z23.VnH(), p0.Merging(), z12.VnH());
1396
1397    __ movprfx(z5, z3);
1398    __ cpy(z5.VnD(), p1.Merging(), -42);
1399
1400    __ movprfx(z0, z12);
1401    __ cpy(z0.VnB(), p1.Merging(), w0);
1402
1403    __ movprfx(z27, z8);
1404    __ cpy(z27.VnB(), p0.Merging(), b0);
1405
1406    __ movprfx(z20, z24);
1407    __ decd(z20.VnD(), SVE_MUL3);
1408
1409    __ movprfx(z5, z28);
1410    __ dech(z5.VnH(), SVE_VL2);
1411
1412    __ movprfx(z7, z3);
1413    __ decp(z7.VnD(), p2);
1414
1415    __ movprfx(z4, z7);
1416    __ decw(z4.VnS(), SVE_ALL);
1417
1418    __ movprfx(z3, z18);
1419    __ eon(z3.VnS(), z3.VnS(), 4);
1420
1421    __ movprfx(z4.VnD(), p0.Merging(), z10.VnD());
1422    __ eor(z4.VnD(), p0.Merging(), z4.VnD(), z10.VnD());
1423
1424    __ movprfx(z15, z18);
1425    __ eor(z15.VnH(), z15.VnH(), 4);
1426
1427    __ movprfx(z17, z30);
1428    __ ext(z17.VnB(), z17.VnB(), z18.VnB(), 2);
1429
1430    __ movprfx(z19, z28);
1431    __ incd(z19.VnD(), SVE_MUL3);
1432
1433    __ movprfx(z13, z7);
1434    __ inch(z13.VnH(), SVE_VL2);
1435
1436    __ movprfx(z14, z21);
1437    __ incp(z14.VnD(), p1);
1438
1439    __ movprfx(z26, z12);
1440    __ incw(z26.VnS(), SVE_ALL);
1441
1442    __ movprfx(z16, z2);
1443    __ insr(z16.VnB(), w16);
1444
1445    __ movprfx(z20, z26);
1446    __ insr(z20.VnB(), b0);
1447
1448    __ movprfx(z30.VnD(), p0.Merging(), z23.VnD());
1449    __ lsl(z30.VnD(), p0.Merging(), z30.VnD(), 3);
1450
1451    __ movprfx(z28.VnS(), p2.Zeroing(), z6.VnS());
1452    __ lsl(z28.VnS(), p2.Merging(), z28.VnS(), z6.VnS());
1453
1454    __ movprfx(z15.VnH(), p6.Zeroing(), z3.VnH());
1455    __ lsl(z15.VnH(), p6.Merging(), z15.VnH(), z3.VnD());
1456
1457    __ movprfx(z13.VnD(), p4.Zeroing(), z14.VnD());
1458    __ lsl(z13.VnD(), p4.Merging(), z13.VnD(), z25.VnD());
1459
1460    __ movprfx(z14, z5);
1461    __ lslr(z14.VnS(), p0.Merging(), z14.VnS(), z17.VnS());
1462
1463    __ movprfx(z21, z1);
1464    __ lsr(z21.VnH(), p5.Merging(), z21.VnH(), 3);
1465
1466    __ movprfx(z11.VnH(), p0.Zeroing(), z13.VnH());
1467    __ lsr(z11.VnH(), p0.Merging(), z11.VnH(), z9.VnH());
1468
1469    __ movprfx(z24, z29);
1470    __ lsr(z24.VnS(), p4.Merging(), z24.VnS(), z1.VnD());
1471
1472    __ movprfx(z1.VnD(), p6.Merging(), z9.VnD());
1473    __ lsr(z1.VnD(), p6.Merging(), z1.VnD(), z9.VnD());
1474
1475    __ movprfx(z22, z3);
1476    __ lsrr(z22.VnB(), p3.Merging(), z22.VnB(), z3.VnB());
1477
1478    __ movprfx(z24.VnB(), p2.Zeroing(), z5.VnB());
1479    __ mad(z24.VnB(), p2.Merging(), z5.VnB(), z10.VnB());
1480
1481    __ movprfx(z8, z4);
1482    __ mla(z8.VnS(), p6.Merging(), z4.VnS(), z26.VnS());
1483
1484    __ movprfx(z10, z8);
1485    __ mls(z10.VnS(), p4.Merging(), z23.VnS(), z16.VnS());
1486
1487    // Aliases of cpy.
1488    __ movprfx(z4.VnH(), p5.Zeroing(), z2.VnH());
1489    __ mov(z4.VnH(), p5.Merging(), -42);
1490
1491    __ movprfx(z2.VnB(), p3.Zeroing(), z24.VnB());
1492    __ mov(z2.VnB(), p3.Merging(), w2);
1493
1494    __ movprfx(z27, z13);
1495    __ mov(z27.VnD(), p3.Merging(), d0);
1496
1497    __ movprfx(z18.VnB(), p5.Zeroing(), z11.VnB());
1498    __ msb(z18.VnB(), p5.Merging(), z3.VnB(), z11.VnB());
1499
1500    __ movprfx(z29, z16);
1501    __ mul(z29.VnS(), p6.Merging(), z29.VnS(), z9.VnS());
1502
1503    __ movprfx(z21, z23);
1504    __ mul(z21.VnH(), z21.VnH(), 42);
1505
1506    __ movprfx(z7.VnS(), p4.Merging(), z14.VnS());
1507    __ neg(z7.VnS(), p4.Merging(), z14.VnS());
1508
1509    __ movprfx(z8.VnD(), p4.Zeroing(), z5.VnD());
1510    __ not_(z8.VnD(), p4.Merging(), z5.VnD());
1511
1512    __ movprfx(z14, z13);
1513    __ orn(z14.VnS(), z14.VnS(), 4);
1514
1515    __ movprfx(z14, z13);
1516    __ orn(z14.VnS(), z14.VnS(), 4);
1517
1518    __ movprfx(z27, z17);
1519    __ orr(z27.VnD(), p2.Merging(), z27.VnD(), z17.VnD());
1520
1521    __ movprfx(z13.VnH(), p2.Zeroing(), z27.VnH());
1522    __ rbit(z13.VnH(), p2.Merging(), z1.VnH());
1523
1524    __ movprfx(z1, z29);
1525    __ revb(z1.VnS(), p4.Merging(), z6.VnS());
1526
1527    __ movprfx(z18.VnD(), p2.Zeroing(), z10.VnD());
1528    __ revh(z18.VnD(), p2.Merging(), z16.VnD());
1529
1530    __ movprfx(z2.VnD(), p1.Merging(), z10.VnD());
1531    __ revw(z2.VnD(), p1.Merging(), z1.VnD());
1532
1533    __ movprfx(z28.VnS(), p7.Merging(), z11.VnS());
1534    __ sabd(z28.VnS(), p7.Merging(), z28.VnS(), z11.VnS());
1535
1536    __ movprfx(z22.VnS(), p0.Merging(), z20.VnS());
1537    __ sdiv(z22.VnS(), p0.Merging(), z22.VnS(), z6.VnS());
1538
1539    __ movprfx(z13.VnS(), p7.Merging(), z0.VnS());
1540    __ sdivr(z13.VnS(), p7.Merging(), z13.VnS(), z2.VnS());
1541
1542    __ movprfx(z0, z12);
1543    __ sdot(z0.VnD(), z10.VnH(), z12.VnH(), 1);
1544
1545    __ movprfx(z8, z15);
1546    __ sdot(z8.VnS(), z15.VnB(), z12.VnB());
1547
1548    __ movprfx(z13, z0);
1549    __ sdot(z13.VnS(), z10.VnB(), z0.VnB(), 1);
1550
1551    __ movprfx(z11, z13);
1552    __ smax(z11.VnB(), p5.Merging(), z11.VnB(), z24.VnB());
1553
1554    __ movprfx(z3, z17);
1555    __ smax(z3.VnD(), z3.VnD(), 42);
1556
1557    __ movprfx(z10, z29);
1558    __ smin(z10.VnD(), p4.Merging(), z10.VnD(), z29.VnD());
1559
1560    __ movprfx(z13, z29);
1561    __ smin(z13.VnD(), z13.VnD(), 42);
1562
1563    __ movprfx(z6, z17);
1564    __ smulh(z6.VnS(), p7.Merging(), z6.VnS(), z31.VnS());
1565
1566    __ movprfx(z19, z20);
1567    __ splice(z19.VnB(), p3, z19.VnB(), z20.VnB());
1568
1569    __ movprfx(z0, z3);
1570    __ sqadd(z0.VnD(), z0.VnD(), 42);
1571
1572    __ movprfx(z29, z5);
1573    __ sqdecd(z29.VnD(), SVE_MUL3);
1574
1575    __ movprfx(z25, z11);
1576    __ sqdech(z25.VnH(), SVE_VL2);
1577
1578    __ movprfx(z16, z9);
1579    __ sqdecp(z16.VnS(), p1);
1580
1581    __ movprfx(z8, z17);
1582    __ sqdecw(z8.VnS(), SVE_ALL);
1583
1584    __ movprfx(z4, z5);
1585    __ sqincd(z4.VnD(), SVE_MUL3);
1586
1587    __ movprfx(z0, z17);
1588    __ sqinch(z0.VnH(), SVE_VL2);
1589
1590    __ movprfx(z7, z27);
1591    __ sqincp(z7.VnS(), p6);
1592
1593    __ movprfx(z10, z9);
1594    __ sqincw(z10.VnS(), SVE_ALL);
1595
1596    __ movprfx(z31, z22);
1597    __ sqsub(z31.VnB(), z31.VnB(), 42);
1598
1599    __ movprfx(z12.VnH(), p7.Zeroing(), z23.VnH());
1600    __ sub(z12.VnH(), p7.Merging(), z12.VnH(), z23.VnH());
1601
1602    __ movprfx(z10, z1);
1603    __ sub(z10.VnH(), z10.VnH(), 42);
1604
1605    __ movprfx(z15.VnB(), p0.Merging(), z0.VnB());
1606    __ subr(z15.VnB(), p0.Merging(), z15.VnB(), z0.VnB());
1607
1608    __ movprfx(z17, z2);
1609    __ subr(z17.VnH(), z17.VnH(), 42);
1610
1611    __ movprfx(z5, z3);
1612    __ sxtb(z5.VnD(), p6.Merging(), z20.VnD());
1613
1614    __ movprfx(z11, z17);
1615    __ sxth(z11.VnD(), p6.Merging(), z25.VnD());
1616
1617    __ movprfx(z26, z4);
1618    __ sxtw(z26.VnD(), p5.Merging(), z4.VnD());
1619
1620    __ movprfx(z15.VnD(), p0.Zeroing(), z8.VnD());
1621    __ uabd(z15.VnD(), p0.Merging(), z15.VnD(), z20.VnD());
1622
1623    __ movprfx(z21, z24);
1624    __ udiv(z21.VnD(), p3.Merging(), z21.VnD(), z24.VnD());
1625
1626    __ movprfx(z22, z10);
1627    __ udivr(z22.VnD(), p7.Merging(), z22.VnD(), z27.VnD());
1628
1629    __ movprfx(z27, z25);
1630    __ udot(z27.VnD(), z29.VnH(), z3.VnH(), 1);
1631
1632    __ movprfx(z29, z10);
1633    __ udot(z29.VnS(), z10.VnB(), z21.VnB());
1634
1635    __ movprfx(z18, z0);
1636    __ udot(z18.VnS(), z14.VnB(), z0.VnB(), 1);
1637
1638    __ movprfx(z6, z30);
1639    __ umax(z6.VnS(), p2.Merging(), z6.VnS(), z27.VnS());
1640
1641    __ movprfx(z31, z17);
1642    __ umax(z31.VnD(), z31.VnD(), 42);
1643
1644    __ movprfx(z27.VnS(), p0.Merging(), z20.VnS());
1645    __ umin(z27.VnS(), p0.Merging(), z27.VnS(), z8.VnS());
1646
1647    __ movprfx(z0, z11);
1648    __ umin(z0.VnH(), z0.VnH(), 42);
1649
1650    __ movprfx(z21, z17);
1651    __ umulh(z21.VnB(), p0.Merging(), z21.VnB(), z30.VnB());
1652
1653    __ movprfx(z9, z24);
1654    __ uqadd(z9.VnD(), z9.VnD(), 42);
1655
1656    __ movprfx(z18, z13);
1657    __ uqdecd(z18.VnD(), SVE_MUL3);
1658
1659    __ movprfx(z20, z23);
1660    __ uqdech(z20.VnH(), SVE_VL2);
1661
1662    __ movprfx(z12, z29);
1663    __ uqdecp(z12.VnS(), p7);
1664
1665    __ movprfx(z24, z25);
1666    __ uqdecw(z24.VnS(), SVE_ALL);
1667
1668    __ movprfx(z13, z1);
1669    __ uqincd(z13.VnD(), SVE_MUL3);
1670
1671    __ movprfx(z5, z19);
1672    __ uqinch(z5.VnH(), SVE_VL2);
1673
1674    __ movprfx(z6, z25);
1675    __ uqincp(z6.VnS(), p5);
1676
1677    __ movprfx(z12, z14);
1678    __ uqincw(z12.VnS(), SVE_ALL);
1679
1680    __ movprfx(z13, z6);
1681    __ uqsub(z13.VnH(), z13.VnH(), 42);
1682
1683    __ movprfx(z31, z3);
1684    __ uxtb(z31.VnS(), p0.Merging(), z3.VnS());
1685
1686    __ movprfx(z18.VnD(), p4.Merging(), z25.VnD());
1687    __ uxth(z18.VnD(), p4.Merging(), z25.VnD());
1688
1689    __ movprfx(z18.VnD(), p7.Merging(), z25.VnD());
1690    __ uxtw(z18.VnD(), p7.Merging(), z25.VnD());
1691
1692    __ movprfx(z22, z5);
1693    __ smmla(z22.VnS(), z21.VnB(), z0.VnB());
1694
1695    __ movprfx(z1, z5);
1696    __ ummla(z1.VnS(), z10.VnB(), z0.VnB());
1697
1698    __ movprfx(z30, z5);
1699    __ usmmla(z30.VnS(), z31.VnB(), z18.VnB());
1700
1701    __ movprfx(z4, z5);
1702    __ usdot(z4.VnS(), z3.VnB(), z3.VnB());
1703
1704    __ movprfx(z10, z5);
1705    __ usdot(z10.VnS(), z9.VnB(), z0.VnB(), 0);
1706
1707    __ movprfx(z1, z5);
1708    __ sudot(z1.VnS(), z10.VnB(), z2.VnB(), 1);
1709  }
1710  assm.FinalizeCode();
1711
1712  CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), true);
1713}
1714
1715TEST(movprfx_positive_fp) {
1716  Assembler assm;
1717  assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE,
1718                                 CPUFeatures::kSVEF32MM,
1719                                 CPUFeatures::kSVEF64MM);
1720  {
1721    // We have to use the Assembler directly to generate movprfx, so we need
1722    // to manually reserve space for the code we're about to emit.
1723    static const size_t kPairCount = 75;
1724    CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
1725
1726    __ movprfx(z18.VnS(), p6.Zeroing(), z20.VnS());
1727    __ fabd(z18.VnS(), p6.Merging(), z18.VnS(), z19.VnS());
1728
1729    __ movprfx(z28.VnD(), p4.Zeroing(), z24.VnD());
1730    __ fabs(z28.VnD(), p4.Merging(), z24.VnD());
1731
1732    __ movprfx(z12, z8);
1733    __ fadd(z12.VnS(), p2.Merging(), z12.VnS(), 0.5);
1734
1735    __ movprfx(z0.VnS(), p1.Merging(), z9.VnS());
1736    __ fadd(z0.VnS(), p1.Merging(), z0.VnS(), z9.VnS());
1737
1738    __ movprfx(z10.VnH(), p2.Merging(), z2.VnH());
1739    __ fcadd(z10.VnH(), p2.Merging(), z10.VnH(), z20.VnH(), 90);
1740
1741    __ movprfx(z21, z6);
1742    __ fcmla(z21.VnH(), z31.VnH(), z6.VnH(), 2, 180);
1743
1744    __ movprfx(z16, z6);
1745    __ fcmla(z16.VnS(), z11.VnS(), z6.VnS(), 1, 270);
1746
1747    __ movprfx(z15.VnH(), p6.Merging(), z16.VnH());
1748    __ fcpy(z15.VnH(), p6.Merging(), 1.25);
1749
1750    __ movprfx(z1, z14);
1751    __ fcvt(z1.VnD(), p2.Merging(), z4.VnH());
1752
1753    __ movprfx(z25.VnD(), p6.Merging(), z1.VnD());
1754    __ fcvt(z25.VnD(), p6.Merging(), z1.VnS());
1755
1756    __ movprfx(z18.VnS(), p2.Merging(), z2.VnS());
1757    __ fcvt(z18.VnH(), p2.Merging(), z7.VnS());
1758
1759    __ movprfx(z21.VnD(), p5.Zeroing(), z26.VnD());
1760    __ fcvt(z21.VnH(), p5.Merging(), z26.VnD());
1761
1762    __ movprfx(z12.VnD(), p1.Merging(), z18.VnD());
1763    __ fcvtzs(z12.VnD(), p1.Merging(), z18.VnH());
1764
1765    __ movprfx(z3.VnS(), p2.Merging(), z0.VnS());
1766    __ fcvtzs(z3.VnS(), p2.Merging(), z26.VnS());
1767
1768    __ movprfx(z21.VnS(), p4.Merging(), z7.VnS());
1769    __ fcvtzs(z21.VnS(), p4.Merging(), z7.VnH());
1770
1771    __ movprfx(z16.VnD(), p3.Zeroing(), z4.VnD());
1772    __ fcvtzs(z16.VnS(), p3.Merging(), z28.VnD());
1773
1774    __ movprfx(z31.VnD(), p4.Merging(), z1.VnD());
1775    __ fcvtzu(z31.VnD(), p4.Merging(), z1.VnH());
1776
1777    __ movprfx(z23.VnH(), p0.Zeroing(), z28.VnH());
1778    __ fcvtzu(z23.VnH(), p0.Merging(), z28.VnH());
1779
1780    __ movprfx(z2, z12);
1781    __ fcvtzu(z2.VnD(), p3.Merging(), z28.VnS());
1782
1783    __ movprfx(z4, z7);
1784    __ fcvtzu(z4.VnS(), p7.Merging(), z16.VnD());
1785
1786    __ movprfx(z13.VnS(), p3.Zeroing(), z23.VnS());
1787    __ fdiv(z13.VnS(), p3.Merging(), z13.VnS(), z23.VnS());
1788
1789    __ movprfx(z6.VnD(), p1.Zeroing(), z16.VnD());
1790    __ fdivr(z6.VnD(), p1.Merging(), z6.VnD(), z5.VnD());
1791
1792    __ movprfx(z31, z23);
1793    __ fmad(z31.VnS(), p5.Merging(), z23.VnS(), z11.VnS());
1794
1795    __ movprfx(z14.VnH(), p7.Merging(), z21.VnH());
1796    __ fmax(z14.VnH(), p7.Merging(), z14.VnH(), 0.0);
1797
1798    __ movprfx(z17.VnS(), p4.Merging(), z9.VnS());
1799    __ fmax(z17.VnS(), p4.Merging(), z17.VnS(), z9.VnS());
1800
1801    __ movprfx(z1.VnS(), p3.Zeroing(), z30.VnS());
1802    __ fmaxnm(z1.VnS(), p3.Merging(), z1.VnS(), 0.0);
1803
1804    __ movprfx(z10.VnD(), p1.Zeroing(), z17.VnD());
1805    __ fmaxnm(z10.VnD(), p1.Merging(), z10.VnD(), z17.VnD());
1806
1807    __ movprfx(z3, z13);
1808    __ fmin(z3.VnS(), p0.Merging(), z3.VnS(), 0.0);
1809
1810    __ movprfx(z15, z21);
1811    __ fmin(z15.VnS(), p4.Merging(), z15.VnS(), z21.VnS());
1812
1813    __ movprfx(z30.VnH(), p7.Zeroing(), z25.VnH());
1814    __ fminnm(z30.VnH(), p7.Merging(), z30.VnH(), 0.0);
1815
1816    __ movprfx(z31, z15);
1817    __ fminnm(z31.VnD(), p5.Merging(), z31.VnD(), z25.VnD());
1818
1819    __ movprfx(z27, z28);
1820    __ fmla(z27.VnD(), z28.VnD(), z12.VnD(), 1);
1821
1822    __ movprfx(z26.VnH(), p6.Zeroing(), z13.VnH());
1823    __ fmla(z26.VnH(), p6.Merging(), z13.VnH(), z7.VnH());
1824
1825    __ movprfx(z26, z10);
1826    __ fmla(z26.VnH(), z10.VnH(), z1.VnH(), 7);
1827
1828    __ movprfx(z0, z1);
1829    __ fmla(z0.VnS(), z25.VnS(), z1.VnS(), 3);
1830
1831    __ movprfx(z7, z3);
1832    __ fmls(z7.VnD(), z30.VnD(), z3.VnD(), 1);
1833
1834    __ movprfx(z1, z24);
1835    __ fmls(z1.VnD(), p5.Merging(), z20.VnD(), z24.VnD());
1836
1837    __ movprfx(z19, z18);
1838    __ fmls(z19.VnH(), z18.VnH(), z7.VnH(), 4);
1839
1840    __ movprfx(z0, z26);
1841    __ fmls(z0.VnS(), z17.VnS(), z4.VnS(), 3);
1842
1843    __ movprfx(z19.VnS(), p7.Zeroing(), z6.VnS());
1844    __ fmov(z19.VnS(), p7.Merging(), 0.0);
1845
1846    __ movprfx(z21, z15);
1847    __ fmov(z21.VnH(), p7.Merging(), 2.5);
1848
1849    __ movprfx(z23, z18);
1850    __ fmsb(z23.VnS(), p4.Merging(), z1.VnS(), z7.VnS());
1851
1852    __ movprfx(z8, z28);
1853    __ fmul(z8.VnS(), p4.Merging(), z8.VnS(), 2.0);
1854
1855    __ movprfx(z6.VnD(), p6.Merging(), z27.VnD());
1856    __ fmul(z6.VnD(), p6.Merging(), z6.VnD(), z27.VnD());
1857
1858    __ movprfx(z6.VnH(), p0.Merging(), z19.VnH());
1859    __ fmulx(z6.VnH(), p0.Merging(), z6.VnH(), z19.VnH());
1860
1861    __ movprfx(z5.VnH(), p0.Merging(), z1.VnH());
1862    __ fneg(z5.VnH(), p0.Merging(), z1.VnH());
1863
1864    __ movprfx(z22.VnD(), p4.Zeroing(), z24.VnD());
1865    __ fnmad(z22.VnD(), p4.Merging(), z24.VnD(), z12.VnD());
1866
1867    __ movprfx(z5.VnS(), p0.Merging(), z29.VnS());
1868    __ fnmla(z5.VnS(), p0.Merging(), z17.VnS(), z29.VnS());
1869
1870    __ movprfx(z5, z3);
1871    __ fnmls(z5.VnD(), p5.Merging(), z3.VnD(), z2.VnD());
1872
1873    __ movprfx(z9.VnD(), p2.Zeroing(), z7.VnD());
1874    __ fnmsb(z9.VnD(), p2.Merging(), z7.VnD(), z23.VnD());
1875
1876    // Note that frecpe and frecps _cannot_ take movprfx.
1877    __ movprfx(z12.VnH(), p1.Zeroing(), z17.VnH());
1878    __ frecpx(z12.VnH(), p1.Merging(), z4.VnH());
1879
1880    __ movprfx(z28.VnS(), p4.Zeroing(), z27.VnS());
1881    __ frinta(z28.VnS(), p4.Merging(), z24.VnS());
1882
1883    __ movprfx(z7.VnD(), p7.Merging(), z25.VnD());
1884    __ frinti(z7.VnD(), p7.Merging(), z25.VnD());
1885
1886    __ movprfx(z10, z21);
1887    __ frintm(z10.VnD(), p5.Merging(), z26.VnD());
1888
1889    __ movprfx(z25, z21);
1890    __ frintn(z25.VnH(), p4.Merging(), z1.VnH());
1891
1892    __ movprfx(z25, z9);
1893    __ frintp(z25.VnH(), p1.Merging(), z9.VnH());
1894
1895    __ movprfx(z30, z16);
1896    __ frintx(z30.VnS(), p1.Merging(), z16.VnS());
1897
1898    __ movprfx(z0.VnD(), p5.Merging(), z9.VnD());
1899    __ frintz(z0.VnD(), p5.Merging(), z23.VnD());
1900
1901    __ movprfx(z11.VnD(), p7.Merging(), z2.VnD());
1902    __ fscale(z11.VnD(), p7.Merging(), z11.VnD(), z2.VnD());
1903
1904    __ movprfx(z23.VnS(), p4.Merging(), z17.VnS());
1905    __ fsqrt(z23.VnS(), p4.Merging(), z10.VnS());
1906
1907    __ movprfx(z0.VnD(), p2.Merging(), z26.VnD());
1908    __ fsub(z0.VnD(), p2.Merging(), z0.VnD(), 1.0);
1909
1910    __ movprfx(z28.VnD(), p1.Zeroing(), z16.VnD());
1911    __ fsub(z28.VnD(), p1.Merging(), z28.VnD(), z16.VnD());
1912
1913    __ movprfx(z22, z27);
1914    __ fsubr(z22.VnD(), p4.Merging(), z22.VnD(), 1.0);
1915
1916    __ movprfx(z4.VnS(), p2.Merging(), z26.VnS());
1917    __ fsubr(z4.VnS(), p2.Merging(), z4.VnS(), z26.VnS());
1918
1919    // Note that ftsmul and ftssel _cannot_ take movprfx.
1920    __ movprfx(z10, z4);
1921    __ ftmad(z10.VnS(), z10.VnS(), z4.VnS(), 2);
1922
1923    __ movprfx(z2, z16);
1924    __ scvtf(z2.VnD(), p1.Merging(), z16.VnS());
1925
1926    __ movprfx(z10, z20);
1927    __ scvtf(z10.VnD(), p5.Merging(), z20.VnD());
1928
1929    __ movprfx(z29, z28);
1930    __ scvtf(z29.VnS(), p0.Merging(), z31.VnD());
1931
1932    __ movprfx(z26.VnD(), p3.Merging(), z13.VnD());
1933    __ scvtf(z26.VnH(), p3.Merging(), z5.VnD());
1934
1935    __ movprfx(z7.VnD(), p3.Zeroing(), z26.VnD());
1936    __ ucvtf(z7.VnD(), p3.Merging(), z26.VnS());
1937
1938    __ movprfx(z13, z17);
1939    __ ucvtf(z13.VnD(), p7.Merging(), z17.VnD());
1940
1941    __ movprfx(z24.VnD(), p1.Merging(), z31.VnD());
1942    __ ucvtf(z24.VnS(), p1.Merging(), z18.VnD());
1943
1944    __ movprfx(z17.VnD(), p4.Merging(), z22.VnD());
1945    __ ucvtf(z17.VnH(), p4.Merging(), z4.VnD());
1946
1947    __ movprfx(z30, z5);
1948    __ fmmla(z30.VnS(), z29.VnS(), z18.VnS());
1949
1950    __ movprfx(z31, z5);
1951    __ fmmla(z31.VnD(), z30.VnD(), z18.VnD());
1952  }
1953  assm.FinalizeCode();
1954
1955  CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), true);
1956}
1957
1958TEST(movprfx_positive_sve2) {
1959  Assembler assm;
1960  assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2);
1961  {
1962    // We have to use the Assembler directly to generate movprfx, so we need
1963    // to manually reserve space for the code we're about to emit.
1964    static const size_t kPairCount = 145;
1965    CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
1966
1967    __ movprfx(z25, z26);
1968    __ adclb(z25.VnS(), z17.VnS(), z24.VnS());
1969
1970    __ movprfx(z0, z1);
1971    __ adclt(z0.VnS(), z2.VnS(), z15.VnS());
1972
1973    __ movprfx(z3, z4);
1974    __ addp(z3.VnB(), p1.Merging(), z3.VnB(), z0.VnB());
1975
1976    __ movprfx(z6, z7);
1977    __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD());
1978
1979    __ movprfx(z18, z19);
1980    __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD());
1981
1982    __ movprfx(z7, z8);
1983    __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD());
1984
1985    __ movprfx(z21, z22);
1986    __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD());
1987
1988    __ movprfx(z5, z6);
1989    __ cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90);
1990
1991    __ movprfx(z7, z8);
1992    __ cdot(z7.VnS(), z4.VnB(), z10.VnB(), 0);
1993
1994    __ movprfx(z7, z8);
1995    __ cdot(z7.VnS(), z4.VnB(), z0.VnB(), 0, 0);
1996
1997    __ movprfx(z7, z8);
1998    __ cdot(z7.VnD(), z4.VnH(), z0.VnH(), 0, 0);
1999
2000    __ movprfx(z19, z20);
2001    __ cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0);
2002
2003    __ movprfx(z19, z20);
2004    __ cmla(z19.VnS(), z7.VnS(), z2.VnS(), 0, 0);
2005
2006    __ movprfx(z19, z20);
2007    __ cmla(z19.VnH(), z7.VnH(), z2.VnH(), 0, 0);
2008
2009    __ movprfx(z10, z11);
2010    __ eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD());
2011
2012    __ movprfx(z3, z4);
2013    __ eorbt(z3.VnB(), z10.VnB(), z8.VnB());
2014
2015    __ movprfx(z20, z22);
2016    __ eortb(z20.VnB(), z21.VnB(), z15.VnB());
2017
2018    __ movprfx(z14, z15);
2019    __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD());
2020
2021    __ movprfx(z14.VnD(), p4.Merging(), z15.VnD());
2022    __ fcvtx(z14.VnS(), p4.Merging(), z0.VnD());
2023
2024    __ movprfx(z15.VnH(), p0.Merging(), z16.VnH());
2025    __ flogb(z15.VnH(), p0.Merging(), z3.VnH());
2026
2027    __ movprfx(z2, z3);
2028    __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD());
2029
2030    __ movprfx(z22, z23);
2031    __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD());
2032
2033    __ movprfx(z1, z2);
2034    __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD());
2035
2036    __ movprfx(z16, z17);
2037    __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD());
2038
2039    __ movprfx(z16, z17);
2040    __ fmlalb(z16.VnS(), z18.VnH(), z29.VnH());
2041
2042    __ movprfx(z16, z17);
2043    __ fmlalb(z16.VnS(), z18.VnH(), z2.VnH(), 0);
2044
2045    __ movprfx(z18, z19);
2046    __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH());
2047
2048    __ movprfx(z18, z19);
2049    __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH(), 0);
2050
2051    __ movprfx(z16, z17);
2052    __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH());
2053
2054    __ movprfx(z16, z17);
2055    __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH(), 0);
2056
2057    __ movprfx(z3, z4);
2058    __ fmlslt(z3.VnS(), z17.VnH(), z14.VnH());
2059
2060    __ movprfx(z3, z4);
2061    __ fmlslt(z3.VnS(), z17.VnH(), z1.VnH(), 0);
2062
2063    __ movprfx(z2, z3);
2064    __ mla(z2.VnH(), z0.VnH(), z1.VnH(), 0);
2065
2066    __ movprfx(z2, z3);
2067    __ mla(z2.VnS(), z0.VnS(), z1.VnS(), 0);
2068
2069    __ movprfx(z2, z3);
2070    __ mla(z2.VnD(), z0.VnD(), z1.VnD(), 0);
2071
2072    __ movprfx(z2, z3);
2073    __ mls(z2.VnH(), z0.VnH(), z1.VnH(), 0);
2074
2075    __ movprfx(z2, z3);
2076    __ mls(z2.VnS(), z0.VnS(), z1.VnS(), 0);
2077
2078    __ movprfx(z2, z3);
2079    __ mls(z2.VnD(), z0.VnD(), z1.VnD(), 0);
2080
2081    __ movprfx(z17, z18);
2082    __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD());
2083
2084    __ movprfx(z13, z14);
2085    __ saba(z13.VnB(), z2.VnB(), z31.VnB());
2086
2087    __ movprfx(z13, z14);
2088    __ sabalb(z13.VnD(), z20.VnS(), z26.VnS());
2089
2090    __ movprfx(z14, z15);
2091    __ sabalt(z14.VnD(), z19.VnS(), z10.VnS());
2092
2093    __ movprfx(z19.VnD(), p5.Merging(), z20.VnD());
2094    __ sadalp(z19.VnD(), p5.Merging(), z9.VnS());
2095
2096    __ movprfx(z17, z18);
2097    __ sbclb(z17.VnS(), z10.VnS(), z8.VnS());
2098
2099    __ movprfx(z20, z21);
2100    __ sbclt(z20.VnS(), z0.VnS(), z13.VnS());
2101
2102    __ movprfx(z20.VnB(), p3.Merging(), z21.VnB());
2103    __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB());
2104
2105    __ movprfx(z21.VnB(), p0.Merging(), z22.VnB());
2106    __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB());
2107
2108    __ movprfx(z1.VnB(), p0.Merging(), z2.VnB());
2109    __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB());
2110
2111    __ movprfx(z5, z6);
2112    __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB());
2113
2114    __ movprfx(z27, z28);
2115    __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB());
2116
2117    __ movprfx(z1, z2);
2118    __ smlalb(z1.VnD(), z3.VnS(), z23.VnS());
2119
2120    __ movprfx(z1, z2);
2121    __ smlalb(z1.VnD(), z3.VnS(), z2.VnS(), 0);
2122
2123    __ movprfx(z1, z2);
2124    __ smlalb(z1.VnS(), z3.VnH(), z2.VnH(), 0);
2125
2126    __ movprfx(z1, z2);
2127    __ smlalt(z1.VnD(), z3.VnS(), z23.VnS());
2128
2129    __ movprfx(z1, z2);
2130    __ smlalt(z1.VnD(), z3.VnS(), z2.VnS(), 0);
2131
2132    __ movprfx(z1, z2);
2133    __ smlalt(z1.VnS(), z3.VnH(), z2.VnH(), 0);
2134
2135    __ movprfx(z1, z2);
2136    __ smlslb(z1.VnD(), z3.VnS(), z23.VnS());
2137
2138    __ movprfx(z1, z2);
2139    __ smlslb(z1.VnD(), z3.VnS(), z2.VnS(), 0);
2140
2141    __ movprfx(z1, z2);
2142    __ smlslb(z1.VnS(), z3.VnH(), z2.VnH(), 0);
2143
2144    __ movprfx(z1, z2);
2145    __ smlslt(z1.VnD(), z3.VnS(), z23.VnS());
2146
2147    __ movprfx(z1, z2);
2148    __ smlslt(z1.VnD(), z3.VnS(), z2.VnS(), 0);
2149
2150    __ movprfx(z1, z2);
2151    __ smlslt(z1.VnS(), z3.VnH(), z2.VnH(), 0);
2152
2153    __ movprfx(z29.VnB(), p1.Merging(), z30.VnB());
2154    __ sqabs(z29.VnB(), p1.Merging(), z18.VnB());
2155
2156    __ movprfx(z28.VnB(), p0.Merging(), z29.VnB());
2157    __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB());
2158
2159    __ movprfx(z20, z21);
2160    __ sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90);
2161
2162    __ movprfx(z6, z7);
2163    __ sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS());
2164
2165    __ movprfx(z6, z7);
2166    __ sqdmlalb(z6.VnD(), z19.VnS(), z2.VnS(), 0);
2167
2168    __ movprfx(z6, z7);
2169    __ sqdmlalb(z6.VnS(), z19.VnH(), z2.VnH(), 0);
2170
2171    __ movprfx(z23, z24);
2172    __ sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS());
2173
2174    __ movprfx(z11, z12);
2175    __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS());
2176
2177    __ movprfx(z11, z12);
2178    __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS(), 0);
2179
2180    __ movprfx(z11, z12);
2181    __ sqdmlalt(z11.VnS(), z0.VnH(), z0.VnH(), 0);
2182
2183    __ movprfx(z16, z17);
2184    __ sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS());
2185
2186    __ movprfx(z16, z17);
2187    __ sqdmlslb(z16.VnD(), z26.VnS(), z2.VnS(), 0);
2188
2189    __ movprfx(z16, z17);
2190    __ sqdmlslb(z16.VnS(), z26.VnH(), z2.VnH(), 0);
2191
2192    __ movprfx(z26, z27);
2193    __ sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS());
2194
2195    __ movprfx(z21, z22);
2196    __ sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS());
2197
2198    __ movprfx(z21, z22);
2199    __ sqdmlslt(z21.VnD(), z23.VnS(), z0.VnS(), 0);
2200
2201    __ movprfx(z21, z22);
2202    __ sqdmlslt(z21.VnS(), z23.VnH(), z0.VnH(), 0);
2203
2204    __ movprfx(z21.VnB(), p0.Merging(), z22.VnB());
2205    __ sqneg(z21.VnB(), p0.Merging(), z17.VnB());
2206
2207    __ movprfx(z31, z0);
2208    __ sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0);
2209
2210    __ movprfx(z31, z0);
2211    __ sqrdcmlah(z31.VnH(), z15.VnH(), z2.VnH(), 0, 0);
2212
2213    __ movprfx(z31, z0);
2214    __ sqrdcmlah(z31.VnS(), z15.VnS(), z2.VnS(), 0, 0);
2215
2216    __ movprfx(z27, z28);
2217    __ sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB());
2218
2219    __ movprfx(z27, z28);
2220    __ sqrdmlah(z27.VnH(), z28.VnH(), z1.VnH(), 0);
2221
2222    __ movprfx(z27, z28);
2223    __ sqrdmlah(z27.VnS(), z28.VnS(), z1.VnS(), 0);
2224
2225    __ movprfx(z27, z28);
2226    __ sqrdmlah(z27.VnD(), z28.VnD(), z1.VnD(), 0);
2227
2228    __ movprfx(z11, z12);
2229    __ sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB());
2230
2231    __ movprfx(z11, z12);
2232    __ sqrdmlsh(z11.VnH(), z16.VnH(), z1.VnH(), 0);
2233
2234    __ movprfx(z11, z12);
2235    __ sqrdmlsh(z11.VnS(), z16.VnS(), z1.VnS(), 0);
2236
2237    __ movprfx(z11, z12);
2238    __ sqrdmlsh(z11.VnD(), z16.VnD(), z1.VnD(), 0);
2239
2240    __ movprfx(z31.VnB(), p5.Merging(), z0.VnB());
2241    __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB());
2242
2243    __ movprfx(z25.VnB(), p6.Merging(), z26.VnB());
2244    __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB());
2245
2246    __ movprfx(z0.VnB(), p5.Merging(), z1.VnB());
2247    __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0);
2248
2249    __ movprfx(z0.VnB(), p5.Merging(), z1.VnB());
2250    __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z2.VnB());
2251
2252    __ movprfx(z7.VnB(), p3.Merging(), z8.VnB());
2253    __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB());
2254
2255    __ movprfx(z10.VnB(), p1.Merging(), z11.VnB());
2256    __ sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0);
2257
2258    __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
2259    __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB());
2260
2261    __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
2262    __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB());
2263
2264    __ movprfx(z23.VnB(), p4.Merging(), z24.VnB());
2265    __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB());
2266
2267    __ movprfx(z31.VnB(), p7.Merging(), z0.VnB());
2268    __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB());
2269
2270    __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
2271    __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB());
2272
2273    __ movprfx(z12.VnB(), p0.Merging(), z13.VnB());
2274    __ srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1);
2275
2276    __ movprfx(z0, z1);
2277    __ srsra(z0.VnB(), z8.VnB(), 1);
2278
2279    __ movprfx(z0, z1);
2280    __ ssra(z0.VnB(), z8.VnB(), 1);
2281
2282    __ movprfx(z26.VnB(), p2.Merging(), z27.VnB());
2283    __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB());
2284
2285    __ movprfx(z23, z24);
2286    __ uaba(z23.VnB(), z22.VnB(), z20.VnB());
2287
2288    __ movprfx(z11, z12);
2289    __ uabalb(z11.VnD(), z25.VnS(), z12.VnS());
2290
2291    __ movprfx(z4, z5);
2292    __ uabalt(z4.VnD(), z2.VnS(), z31.VnS());
2293
2294    __ movprfx(z20.VnD(), p4.Merging(), z21.VnD());
2295    __ uadalp(z20.VnD(), p4.Merging(), z5.VnS());
2296
2297    __ movprfx(z21.VnB(), p2.Merging(), z22.VnB());
2298    __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB());
2299
2300    __ movprfx(z1.VnB(), p4.Merging(), z2.VnB());
2301    __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB());
2302
2303    __ movprfx(z18.VnB(), p0.Merging(), z19.VnB());
2304    __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB());
2305
2306    __ movprfx(z7, z8);
2307    __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB());
2308
2309    __ movprfx(z10, z11);
2310    __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB());
2311
2312    __ movprfx(z31, z0);
2313    __ umlalb(z31.VnD(), z9.VnS(), z21.VnS());
2314
2315    __ movprfx(z31, z0);
2316    __ umlalb(z31.VnD(), z9.VnS(), z1.VnS(), 0);
2317
2318    __ movprfx(z31, z0);
2319    __ umlalb(z31.VnS(), z9.VnH(), z1.VnH(), 0);
2320
2321    __ movprfx(z11, z12);
2322    __ umlalt(z11.VnD(), z5.VnS(), z22.VnS());
2323
2324    __ movprfx(z11, z12);
2325    __ umlalt(z11.VnD(), z5.VnS(), z2.VnS(), 0);
2326
2327    __ movprfx(z11, z12);
2328    __ umlalt(z11.VnS(), z5.VnH(), z2.VnH(), 0);
2329
2330    __ movprfx(z28, z29);
2331    __ umlslb(z28.VnD(), z13.VnS(), z9.VnS());
2332
2333    __ movprfx(z28, z29);
2334    __ umlslb(z28.VnD(), z13.VnS(), z1.VnS(), 0);
2335
2336    __ movprfx(z28, z29);
2337    __ umlslb(z28.VnS(), z13.VnH(), z1.VnH(), 0);
2338
2339    __ movprfx(z9, z10);
2340    __ umlslt(z9.VnD(), z12.VnS(), z30.VnS());
2341
2342    __ movprfx(z9, z10);
2343    __ umlslt(z9.VnD(), z12.VnS(), z0.VnS(), 0);
2344
2345    __ movprfx(z9, z10);
2346    __ umlslt(z9.VnS(), z12.VnH(), z0.VnH(), 0);
2347
2348    __ movprfx(z24.VnB(), p7.Merging(), z25.VnB());
2349    __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()),
2350
2351        __ movprfx(z20.VnB(), p1.Merging(), z21.VnB());
2352    __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB());
2353
2354    __ movprfx(z8.VnB(), p5.Merging(), z9.VnB());
2355    __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB());
2356
2357    __ movprfx(z29.VnB(), p7.Merging(), z30.VnB());
2358    __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0);
2359
2360    __ movprfx(z29.VnB(), p7.Merging(), z30.VnB());
2361    __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z30.VnB());
2362
2363    __ movprfx(z12.VnB(), p1.Merging(), z13.VnB());
2364    __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z13.VnB());
2365
2366    __ movprfx(z20.VnB(), p0.Merging(), z21.VnB());
2367    __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB());
2368
2369    __ movprfx(z20.VnB(), p0.Merging(), z21.VnB());
2370    __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB());
2371
2372    __ movprfx(z25.VnS(), p7.Merging(), z26.VnS());
2373    __ urecpe(z25.VnS(), p7.Merging(), z2.VnS());
2374
2375    __ movprfx(z29.VnB(), p4.Merging(), z30.VnB());
2376    __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB());
2377
2378    __ movprfx(z15.VnB(), p2.Merging(), z16.VnB());
2379    __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB());
2380
2381    __ movprfx(z27.VnB(), p1.Merging(), z28.VnB());
2382    __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB());
2383
2384    __ movprfx(z31.VnB(), p2.Merging(), z0.VnB());
2385    __ urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1);
2386
2387    __ movprfx(z4.VnS(), p3.Merging(), z5.VnS());
2388    __ ursqrte(z4.VnS(), p3.Merging(), z3.VnS());
2389
2390    __ movprfx(z0, z1);
2391    __ ursra(z0.VnB(), z8.VnB(), 1);
2392
2393    __ movprfx(z25.VnB(), p4.Merging(), z26.VnB());
2394    __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB());
2395
2396    __ movprfx(z0, z1);
2397    __ usra(z0.VnB(), z8.VnB(), 1);
2398
2399    __ movprfx(z16, z17);
2400    __ xar(z16.VnB(), z16.VnB(), z13.VnB(), 1);
2401  }
2402  assm.FinalizeCode();
2403
2404  CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), true);
2405}
2406
2407TEST(movprfx_negative_instructions_sve2) {
2408  Assembler assm;
2409  assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE,
2410                                 CPUFeatures::kSVE2,
2411                                 CPUFeatures::kSVEBitPerm);
2412  {
2413    // We have to use the Assembler directly to generate movprfx, so we need
2414    // to manually reserve space for the code we're about to emit.
2415    static const size_t kPairCount = 134;
2416    CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
2417
2418    __ movprfx(z29, z30);
2419    __ addhnb(z29.VnS(), z19.VnD(), z2.VnD());
2420
2421    __ movprfx(z8, z9);
2422    __ addhnt(z8.VnS(), z12.VnD(), z6.VnD());
2423
2424    __ movprfx(z18, z19);
2425    __ bdep(z18.VnB(), z10.VnB(), z0.VnB());
2426
2427    __ movprfx(z6, z7);
2428    __ bext(z6.VnB(), z2.VnB(), z5.VnB());
2429
2430    __ movprfx(z24, z25);
2431    __ bgrp(z24.VnB(), z9.VnB(), z5.VnB());
2432
2433    __ movprfx(z1, z2);
2434    __ fcvtlt(z1.VnD(), p1.Merging(), z28.VnS());
2435
2436    __ movprfx(z1, z2);
2437    __ fcvtlt(z1.VnS(), p1.Merging(), z28.VnH());
2438
2439    __ movprfx(z4, z5);
2440    __ fcvtnt(z4.VnH(), p7.Merging(), z0.VnS());
2441
2442    __ movprfx(z4, z5);
2443    __ fcvtnt(z4.VnS(), p7.Merging(), z0.VnD());
2444
2445    __ movprfx(z27, z28);
2446    __ fcvtxnt(z27.VnS(), p0.Merging(), z17.VnD());
2447
2448    __ movprfx(z24, z25);
2449    __ histcnt(z24.VnS(), p6.Zeroing(), z3.VnS(), z10.VnS());
2450
2451    __ movprfx(z22, z23);
2452    __ histseg(z22.VnB(), z14.VnB(), z8.VnB());
2453
2454    __ movprfx(z21, z22);
2455    __ ldnt1b(z21.VnS(), p5.Zeroing(), SVEMemOperand(z21.VnS(), x23));
2456
2457    __ movprfx(z21, z22);
2458    __ ldnt1b(z21.VnD(), p5.Zeroing(), SVEMemOperand(z1.VnD(), x23));
2459
2460    __ movprfx(z10, z11);
2461    __ ldnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(z23.VnD(), x6));
2462
2463    __ movprfx(z30, z31);
2464    __ ldnt1h(z30.VnS(), p4.Zeroing(), SVEMemOperand(z6.VnS(), x11));
2465
2466    __ movprfx(z30, z31);
2467    __ ldnt1h(z30.VnD(), p4.Zeroing(), SVEMemOperand(z6.VnD(), x11));
2468
2469    __ movprfx(z7, z8);
2470    __ ldnt1sb(z7.VnS(), p3.Zeroing(), SVEMemOperand(z18.VnS(), x11));
2471
2472    __ movprfx(z7, z8);
2473    __ ldnt1sb(z7.VnD(), p3.Zeroing(), SVEMemOperand(z18.VnD(), x11));
2474
2475    __ movprfx(z17, z18);
2476    __ ldnt1sh(z17.VnS(), p5.Zeroing(), SVEMemOperand(z31.VnS(), x19));
2477
2478    __ movprfx(z17, z18);
2479    __ ldnt1sh(z17.VnD(), p5.Zeroing(), SVEMemOperand(z31.VnD(), x19));
2480
2481    __ movprfx(z3, z4);
2482    __ ldnt1sw(z3.VnD(), p7.Zeroing(), SVEMemOperand(z1.VnD(), x10));
2483
2484    __ movprfx(z0, z1);
2485    __ ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(z11.VnS(), x1));
2486
2487    __ movprfx(z0, z1);
2488    __ ldnt1w(z0.VnD(), p4.Zeroing(), SVEMemOperand(z11.VnD(), x1));
2489
2490    __ movprfx(z18, z19);
2491    __ match(p15.VnB(), p1.Zeroing(), z18.VnB(), z5.VnB());
2492
2493    __ movprfx(z15, z16);
2494    __ mul(z15.VnB(), z15.VnB(), z15.VnB());
2495
2496    __ movprfx(z15, z16);
2497    __ mul(z15.VnH(), z15.VnH(), z1.VnH(), 0);
2498
2499    __ movprfx(z15, z16);
2500    __ mul(z15.VnS(), z15.VnS(), z1.VnS(), 0);
2501
2502    __ movprfx(z15, z16);
2503    __ mul(z15.VnD(), z15.VnD(), z1.VnD(), 0);
2504
2505    __ movprfx(z20, z21);
2506    __ nmatch(p1.VnB(), p1.Zeroing(), z20.VnB(), z17.VnB());
2507
2508    __ movprfx(z0, z1);
2509    __ pmul(z0.VnB(), z5.VnB(), z5.VnB());
2510
2511    __ movprfx(z12, z13);
2512    __ pmullb(z12.VnD(), z21.VnS(), z12.VnS());
2513
2514    __ movprfx(z31, z0);
2515    __ pmullt(z31.VnD(), z30.VnS(), z26.VnS());
2516
2517    __ movprfx(z0, z1);
2518    __ raddhnb(z0.VnS(), z11.VnD(), z10.VnD());
2519
2520    __ movprfx(z23, z24);
2521    __ raddhnt(z23.VnS(), z27.VnD(), z9.VnD());
2522
2523    __ movprfx(z5, z6);
2524    __ rshrnb(z5.VnB(), z1.VnH(), 1);
2525
2526    __ movprfx(z5, z6);
2527    __ rshrnt(z5.VnB(), z1.VnH(), 8);
2528
2529    __ movprfx(z30, z31);
2530    __ rsubhnb(z30.VnS(), z29.VnD(), z11.VnD());
2531
2532    __ movprfx(z25, z26);
2533    __ rsubhnt(z25.VnS(), z7.VnD(), z18.VnD());
2534
2535    __ movprfx(z2, z3);
2536    __ sabdlb(z2.VnD(), z21.VnS(), z3.VnS());
2537
2538    __ movprfx(z25, z26);
2539    __ sabdlt(z25.VnD(), z23.VnS(), z17.VnS());
2540
2541    __ movprfx(z24, z25);
2542    __ saddlb(z24.VnD(), z30.VnS(), z16.VnS());
2543
2544    __ movprfx(z15, z16);
2545    __ saddlbt(z15.VnD(), z6.VnS(), z18.VnS());
2546
2547    __ movprfx(z21, z22);
2548    __ saddlt(z21.VnD(), z29.VnS(), z31.VnS());
2549
2550    __ movprfx(z12, z13);
2551    __ saddwb(z12.VnD(), z8.VnD(), z8.VnS());
2552
2553    __ movprfx(z24, z25);
2554    __ saddwt(z24.VnD(), z0.VnD(), z3.VnS());
2555
2556    __ movprfx(z7, z8);
2557    __ shrnb(z7.VnB(), z4.VnH(), 1);
2558
2559    __ movprfx(z21, z22);
2560    __ shrnt(z21.VnB(), z29.VnH(), 1);
2561
2562    __ movprfx(z29, z30);
2563    __ sli(z29.VnB(), z7.VnB(), 0);
2564
2565    __ movprfx(z23, z24);
2566    __ smulh(z23.VnB(), z23.VnB(), z3.VnB());
2567
2568    __ movprfx(z10, z11);
2569    __ smullb(z10.VnD(), z4.VnS(), z4.VnS());
2570
2571    __ movprfx(z10, z11);
2572    __ smullb(z10.VnS(), z4.VnH(), z4.VnH(), 0);
2573
2574    __ movprfx(z10, z11);
2575    __ smullb(z10.VnD(), z4.VnS(), z4.VnS(), 0);
2576
2577    __ movprfx(z31, z0);
2578    __ smullt(z31.VnD(), z26.VnS(), z5.VnS());
2579
2580    __ movprfx(z31, z0);
2581    __ smullt(z31.VnS(), z26.VnH(), z5.VnH(), 0);
2582
2583    __ movprfx(z31, z0);
2584    __ smullt(z31.VnD(), z26.VnS(), z5.VnS(), 0);
2585
2586    __ movprfx(z4, z5);
2587    __ splice_con(z4.VnB(), p7.Merging(), z0.VnB(), z1.VnB());
2588
2589    __ movprfx(z18, z19);
2590    __ sqdmulh(z18.VnB(), z25.VnB(), z1.VnB());
2591
2592    __ movprfx(z18, z19);
2593    __ sqdmulh(z18.VnH(), z25.VnH(), z1.VnH(), 0);
2594
2595    __ movprfx(z18, z19);
2596    __ sqdmulh(z18.VnS(), z25.VnS(), z1.VnS(), 0);
2597
2598    __ movprfx(z18, z19);
2599    __ sqdmulh(z18.VnD(), z25.VnD(), z1.VnD(), 0);
2600
2601    __ movprfx(z1, z2);
2602    __ sqdmullb(z1.VnD(), z31.VnS(), z21.VnS());
2603
2604    __ movprfx(z1, z2);
2605    __ sqdmullb(z1.VnS(), z31.VnH(), z1.VnH(), 0);
2606
2607    __ movprfx(z1, z2);
2608    __ sqdmullb(z1.VnD(), z31.VnS(), z1.VnS(), 0);
2609
2610    __ movprfx(z2, z3);
2611    __ sqdmullt(z2.VnD(), z1.VnS(), z5.VnS());
2612
2613    __ movprfx(z2, z3);
2614    __ sqdmullt(z2.VnS(), z1.VnH(), z5.VnH(), 0);
2615
2616    __ movprfx(z2, z3);
2617    __ sqdmullt(z2.VnD(), z1.VnS(), z5.VnS(), 0);
2618
2619    __ movprfx(z21, z22);
2620    __ sqrdmulh(z21.VnB(), z21.VnB(), z27.VnB());
2621
2622    __ movprfx(z21, z22);
2623    __ sqrdmulh(z21.VnH(), z21.VnH(), z2.VnH(), 0);
2624
2625    __ movprfx(z21, z22);
2626    __ sqrdmulh(z21.VnS(), z21.VnS(), z2.VnS(), 0);
2627
2628    __ movprfx(z21, z22);
2629    __ sqrdmulh(z21.VnD(), z21.VnD(), z2.VnD(), 0);
2630
2631    __ movprfx(z1, z2);
2632    __ sqrshrnb(z1.VnB(), z1.VnH(), 1);
2633
2634    __ movprfx(z24, z25);
2635    __ sqrshrnt(z24.VnB(), z19.VnH(), 8);
2636
2637    __ movprfx(z23, z24);
2638    __ sqrshrunb(z23.VnB(), z28.VnH(), 1);
2639
2640    __ movprfx(z9, z10);
2641    __ sqrshrunt(z9.VnB(), z15.VnH(), 8);
2642
2643    __ movprfx(z25, z26);
2644    __ sqshrnb(z25.VnB(), z1.VnH(), 1);
2645
2646    __ movprfx(z0, z1);
2647    __ sqshrnt(z0.VnB(), z25.VnH(), 8);
2648
2649    __ movprfx(z25, z26);
2650    __ sqshrunb(z25.VnB(), z10.VnH(), 1);
2651
2652    __ movprfx(z20, z21);
2653    __ sqshrunt(z20.VnB(), z3.VnH(), 8);
2654
2655    __ movprfx(z2, z3);
2656    __ sqxtnb(z2.VnB(), z0.VnH());
2657
2658    __ movprfx(z31, z0);
2659    __ sqxtnt(z31.VnB(), z18.VnH());
2660
2661    __ movprfx(z28, z29);
2662    __ sqxtunb(z28.VnB(), z6.VnH());
2663
2664    __ movprfx(z14, z15);
2665    __ sqxtunt(z14.VnB(), z31.VnH());
2666
2667    __ movprfx(z6, z7);
2668    __ sri(z6.VnB(), z9.VnB(), 1);
2669
2670    __ movprfx(z2, z3);
2671    __ sshllb(z2.VnH(), z20.VnB(), 0);
2672
2673    __ movprfx(z27, z28);
2674    __ sshllt(z27.VnH(), z8.VnB(), 0);
2675
2676    __ movprfx(z4, z5);
2677    __ ssublb(z4.VnD(), z23.VnS(), z7.VnS());
2678
2679    __ movprfx(z6, z7);
2680    __ ssublbt(z6.VnD(), z28.VnS(), z12.VnS());
2681
2682    __ movprfx(z12, z13);
2683    __ ssublt(z12.VnD(), z13.VnS(), z6.VnS());
2684
2685    __ movprfx(z11, z12);
2686    __ ssubltb(z11.VnD(), z18.VnS(), z19.VnS());
2687
2688    __ movprfx(z7, z8);
2689    __ ssubwb(z7.VnD(), z28.VnD(), z11.VnS());
2690
2691    __ movprfx(z29, z30);
2692    __ ssubwt(z29.VnD(), z25.VnD(), z20.VnS());
2693
2694    __ movprfx(z21, z22);
2695    __ stnt1b(z21.VnS(), p5.Zeroing(), SVEMemOperand(z1.VnS(), x23));
2696
2697    __ movprfx(z21, z22);
2698    __ stnt1b(z21.VnD(), p5.Zeroing(), SVEMemOperand(z1.VnD(), x23));
2699
2700    __ movprfx(z10, z11);
2701    __ stnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(z1.VnD(), x23));
2702
2703    __ movprfx(z30, z31);
2704    __ stnt1h(z30.VnS(), p4.Zeroing(), SVEMemOperand(z6.VnS(), x6));
2705
2706    __ movprfx(z30, z31);
2707    __ stnt1h(z30.VnD(), p4.Zeroing(), SVEMemOperand(z6.VnD(), x6));
2708
2709    __ movprfx(z0, z1);
2710    __ stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(z11.VnS(), x1));
2711
2712    __ movprfx(z0, z1);
2713    __ stnt1w(z0.VnD(), p4.Zeroing(), SVEMemOperand(z11.VnD(), x1));
2714
2715    __ movprfx(z31, z0);
2716    __ subhnb(z31.VnS(), z31.VnD(), z7.VnD());
2717
2718    __ movprfx(z31, z0);
2719    __ subhnt(z31.VnS(), z22.VnD(), z27.VnD());
2720
2721    __ movprfx(z24, z25);
2722    __ tbl(z24.VnB(), z29.VnB(), z30.VnB(), z0.VnB());
2723
2724    __ movprfx(z22, z23);
2725    __ tbx(z22.VnB(), z15.VnB(), z19.VnB());
2726
2727    __ movprfx(z1, z2);
2728    __ uabdlb(z1.VnD(), z26.VnS(), z12.VnS());
2729
2730    __ movprfx(z25, z26);
2731    __ uabdlt(z25.VnD(), z29.VnS(), z14.VnS());
2732
2733    __ movprfx(z3, z4);
2734    __ uaddlb(z3.VnD(), z5.VnS(), z2.VnS());
2735
2736    __ movprfx(z15, z16);
2737    __ uaddlt(z15.VnD(), z28.VnS(), z20.VnS());
2738
2739    __ movprfx(z31, z0);
2740    __ uaddwb(z31.VnD(), z8.VnD(), z25.VnS());
2741
2742    __ movprfx(z17, z18);
2743    __ uaddwt(z17.VnD(), z15.VnD(), z2.VnS());
2744
2745    __ movprfx(z12, z13);
2746    __ umulh(z12.VnB(), z12.VnB(), z17.VnB());
2747
2748    __ movprfx(z12, z13);
2749    __ umullb(z12.VnD(), z5.VnS(), z2.VnS());
2750
2751    __ movprfx(z12, z13);
2752    __ umullb(z12.VnS(), z5.VnH(), z2.VnH(), 0);
2753
2754    __ movprfx(z12, z13);
2755    __ umullb(z12.VnD(), z5.VnS(), z2.VnS(), 0);
2756
2757    __ movprfx(z24, z25);
2758    __ umullt(z24.VnD(), z6.VnS(), z6.VnS());
2759
2760    __ movprfx(z24, z25);
2761    __ umullt(z24.VnS(), z6.VnH(), z1.VnH(), 0);
2762
2763    __ movprfx(z24, z25);
2764    __ umullt(z24.VnD(), z6.VnS(), z1.VnS(), 0);
2765
2766    __ movprfx(z30, z31);
2767    __ uqrshrnb(z30.VnB(), z25.VnH(), 1);
2768
2769    __ movprfx(z3, z4);
2770    __ uqrshrnt(z3.VnB(), z25.VnH(), 8);
2771
2772    __ movprfx(z17, z18);
2773    __ uqshrnb(z17.VnB(), z4.VnH(), 1);
2774
2775    __ movprfx(z28, z29);
2776    __ uqshrnt(z28.VnB(), z18.VnH(), 8);
2777
2778    __ movprfx(z28, z29);
2779    __ uqxtnb(z28.VnB(), z4.VnH());
2780
2781    __ movprfx(z19, z20);
2782    __ uqxtnt(z19.VnB(), z7.VnH());
2783
2784    __ movprfx(z8, z9);
2785    __ ushllb(z8.VnH(), z31.VnB(), 0);
2786
2787    __ movprfx(z3, z4);
2788    __ ushllt(z3.VnH(), z21.VnB(), 0);
2789
2790    __ movprfx(z25, z26);
2791    __ usublb(z25.VnD(), z9.VnS(), z17.VnS());
2792
2793    __ movprfx(z5, z6);
2794    __ usublt(z5.VnD(), z11.VnS(), z15.VnS());
2795
2796    __ movprfx(z10, z11);
2797    __ usubwb(z10.VnD(), z13.VnD(), z20.VnS());
2798
2799    __ movprfx(z15, z16);
2800    __ usubwt(z15.VnD(), z8.VnD(), z23.VnS());
2801
2802    __ movprfx(z20, z21);
2803    __ whilege(p0.VnB(), w20, w29);
2804
2805    __ movprfx(z24, z25);
2806    __ whilegt(p11.VnB(), w24, w3);
2807
2808    __ movprfx(z20, z21);
2809    __ whilehi(p2.VnB(), x20, x8);
2810
2811    __ movprfx(z22, z23);
2812    __ whilehs(p4.VnB(), w22, w9);
2813
2814    __ movprfx(z25, z26);
2815    __ whilerw(p7.VnB(), x25, x27);
2816
2817    __ movprfx(z14, z15);
2818    __ whilewr(p8.VnB(), x14, x14);
2819  }
2820  assm.FinalizeCode();
2821
2822  CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false);
2823}
2824
2825TEST(movprfx_negative_predication_sve2) {
2826  Assembler assm;
2827  assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2);
2828  {
2829    // We have to use the Assembler directly to generate movprfx, so we need
2830    // to manually reserve space for the code we're about to emit.
2831    static const size_t kPairCount = 140;
2832    CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
2833
2834    __ movprfx(z25.VnS(), p0.Zeroing(), z26.VnS());
2835    __ adclb(z25.VnS(), z17.VnS(), z24.VnS());
2836
2837    __ movprfx(z0.VnS(), p0.Zeroing(), z1.VnS());
2838    __ adclt(z0.VnS(), z2.VnS(), z15.VnS());
2839
2840    __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD());
2841    __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD());
2842
2843    __ movprfx(z18.VnD(), p0.Zeroing(), z19.VnD());
2844    __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD());
2845
2846    __ movprfx(z7.VnD(), p0.Zeroing(), z8.VnD());
2847    __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD());
2848
2849    __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD());
2850    __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD());
2851
2852    __ movprfx(z5.VnB(), p0.Zeroing(), z6.VnB());
2853    __ cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90);
2854
2855    __ movprfx(z7.VnS(), p0.Zeroing(), z8.VnS());
2856    __ cdot(z7.VnS(), z4.VnB(), z10.VnB(), 0);
2857
2858    __ movprfx(z7.VnS(), p0.Zeroing(), z8.VnS());
2859    __ cdot(z7.VnS(), z4.VnB(), z0.VnB(), 0, 0);
2860
2861    __ movprfx(z7.VnD(), p0.Zeroing(), z8.VnD());
2862    __ cdot(z7.VnD(), z4.VnH(), z0.VnH(), 0, 0);
2863
2864    __ movprfx(z19.VnB(), p0.Zeroing(), z20.VnB());
2865    __ cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0);
2866
2867    __ movprfx(z19.VnS(), p0.Zeroing(), z20.VnS());
2868    __ cmla(z19.VnS(), z7.VnS(), z2.VnS(), 0, 0);
2869
2870    __ movprfx(z19.VnH(), p0.Zeroing(), z20.VnH());
2871    __ cmla(z19.VnH(), z7.VnH(), z2.VnH(), 0, 0);
2872
2873    __ movprfx(z10.VnD(), p0.Zeroing(), z11.VnD());
2874    __ eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD());
2875
2876    __ movprfx(z3.VnB(), p0.Zeroing(), z4.VnB());
2877    __ eorbt(z3.VnB(), z10.VnB(), z8.VnB());
2878
2879    __ movprfx(z20.VnB(), p0.Zeroing(), z22.VnB());
2880    __ eortb(z20.VnB(), z21.VnB(), z15.VnB());
2881
2882    __ movprfx(z14.VnD(), p0.Zeroing(), z15.VnD());
2883    __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD());
2884
2885    __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD());
2886    __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD());
2887
2888    __ movprfx(z22.VnD(), p0.Zeroing(), z23.VnD());
2889    __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD());
2890
2891    __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2892    __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD());
2893
2894    __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD());
2895    __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD());
2896
2897    __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS());
2898    __ fmlalb(z16.VnS(), z18.VnH(), z29.VnH());
2899
2900    __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS());
2901    __ fmlalb(z16.VnS(), z18.VnH(), z2.VnH(), 0);
2902
2903    __ movprfx(z18.VnS(), p0.Zeroing(), z19.VnS());
2904    __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH());
2905
2906    __ movprfx(z18.VnS(), p0.Zeroing(), z19.VnS());
2907    __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH(), 0);
2908
2909    __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS());
2910    __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH());
2911
2912    __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS());
2913    __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH(), 0);
2914
2915    __ movprfx(z3.VnS(), p0.Zeroing(), z4.VnS());
2916    __ fmlslt(z3.VnS(), z17.VnH(), z14.VnH());
2917
2918    __ movprfx(z3.VnS(), p0.Zeroing(), z4.VnS());
2919    __ fmlslt(z3.VnS(), z17.VnH(), z1.VnH(), 0);
2920
2921    __ movprfx(z2.VnH(), p0.Zeroing(), z3.VnH());
2922    __ mla(z2.VnH(), z0.VnH(), z1.VnH(), 0);
2923
2924    __ movprfx(z2.VnS(), p0.Zeroing(), z3.VnS());
2925    __ mla(z2.VnS(), z0.VnS(), z1.VnS(), 0);
2926
2927    __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD());
2928    __ mla(z2.VnD(), z0.VnD(), z1.VnD(), 0);
2929
2930    __ movprfx(z2.VnH(), p0.Zeroing(), z3.VnH());
2931    __ mls(z2.VnH(), z0.VnH(), z1.VnH(), 0);
2932
2933    __ movprfx(z2.VnS(), p0.Zeroing(), z3.VnS());
2934    __ mls(z2.VnS(), z0.VnS(), z1.VnS(), 0);
2935
2936    __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD());
2937    __ mls(z2.VnD(), z0.VnD(), z1.VnD(), 0);
2938
2939    __ movprfx(z17.VnD(), p0.Zeroing(), z18.VnD());
2940    __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD());
2941
2942    __ movprfx(z13.VnB(), p0.Zeroing(), z14.VnB());
2943    __ saba(z13.VnB(), z2.VnB(), z31.VnB());
2944
2945    __ movprfx(z13.VnD(), p0.Zeroing(), z14.VnD());
2946    __ sabalb(z13.VnD(), z20.VnS(), z26.VnS());
2947
2948    __ movprfx(z14.VnD(), p0.Zeroing(), z15.VnD());
2949    __ sabalt(z14.VnD(), z19.VnS(), z10.VnS());
2950
2951    __ movprfx(z17.VnS(), p0.Zeroing(), z18.VnS());
2952    __ sbclb(z17.VnS(), z10.VnS(), z8.VnS());
2953
2954    __ movprfx(z20.VnS(), p0.Zeroing(), z21.VnS());
2955    __ sbclt(z20.VnS(), z0.VnS(), z13.VnS());
2956
2957    __ movprfx(z5.VnB(), p0.Zeroing(), z6.VnB());
2958    __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB());
2959
2960    __ movprfx(z27.VnB(), p0.Zeroing(), z28.VnB());
2961    __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB());
2962
2963    __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2964    __ smlalb(z1.VnD(), z3.VnS(), z23.VnS());
2965
2966    __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2967    __ smlalb(z1.VnD(), z3.VnS(), z2.VnS(), 0);
2968
2969    __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS());
2970    __ smlalb(z1.VnS(), z3.VnH(), z2.VnH(), 0);
2971
2972    __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2973    __ smlalt(z1.VnD(), z3.VnS(), z23.VnS());
2974
2975    __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2976    __ smlalt(z1.VnD(), z3.VnS(), z2.VnS(), 0);
2977
2978    __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS());
2979    __ smlalt(z1.VnS(), z3.VnH(), z2.VnH(), 0);
2980
2981    __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2982    __ smlslb(z1.VnD(), z3.VnS(), z23.VnS());
2983
2984    __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2985    __ smlslb(z1.VnD(), z3.VnS(), z2.VnS(), 0);
2986
2987    __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS());
2988    __ smlslb(z1.VnS(), z3.VnH(), z2.VnH(), 0);
2989
2990    __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2991    __ smlslt(z1.VnD(), z3.VnS(), z23.VnS());
2992
2993    __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2994    __ smlslt(z1.VnD(), z3.VnS(), z2.VnS(), 0);
2995
2996    __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS());
2997    __ smlslt(z1.VnS(), z3.VnH(), z2.VnH(), 0);
2998
2999    __ movprfx(z20.VnB(), p0.Zeroing(), z21.VnB());
3000    __ sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90);
3001
3002    __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD());
3003    __ sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS());
3004
3005    __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD());
3006    __ sqdmlalb(z6.VnD(), z19.VnS(), z2.VnS(), 0);
3007
3008    __ movprfx(z6.VnS(), p0.Zeroing(), z7.VnS());
3009    __ sqdmlalb(z6.VnS(), z19.VnH(), z2.VnH(), 0);
3010
3011    __ movprfx(z23.VnD(), p0.Zeroing(), z24.VnD());
3012    __ sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS());
3013
3014    __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
3015    __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS());
3016
3017    __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
3018    __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS(), 0);
3019
3020    __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS());
3021    __ sqdmlalt(z11.VnS(), z0.VnH(), z0.VnH(), 0);
3022
3023    __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD());
3024    __ sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS());
3025
3026    __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD());
3027    __ sqdmlslb(z16.VnD(), z26.VnS(), z2.VnS(), 0);
3028
3029    __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS());
3030    __ sqdmlslb(z16.VnS(), z26.VnH(), z2.VnH(), 0);
3031
3032    __ movprfx(z26.VnD(), p0.Zeroing(), z27.VnD());
3033    __ sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS());
3034
3035    __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD());
3036    __ sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS());
3037
3038    __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD());
3039    __ sqdmlslt(z21.VnD(), z23.VnS(), z0.VnS(), 0);
3040
3041    __ movprfx(z21.VnS(), p0.Zeroing(), z22.VnS());
3042    __ sqdmlslt(z21.VnS(), z23.VnH(), z0.VnH(), 0);
3043
3044    __ movprfx(z31.VnB(), p0.Zeroing(), z0.VnB());
3045    __ sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0);
3046
3047    __ movprfx(z31.VnH(), p0.Zeroing(), z0.VnH());
3048    __ sqrdcmlah(z31.VnH(), z15.VnH(), z2.VnH(), 0, 0);
3049
3050    __ movprfx(z31.VnS(), p0.Zeroing(), z0.VnS());
3051    __ sqrdcmlah(z31.VnS(), z15.VnS(), z2.VnS(), 0, 0);
3052
3053    __ movprfx(z27.VnB(), p0.Zeroing(), z28.VnB());
3054    __ sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB());
3055
3056    __ movprfx(z27.VnH(), p0.Zeroing(), z28.VnH());
3057    __ sqrdmlah(z27.VnH(), z28.VnH(), z1.VnH(), 0);
3058
3059    __ movprfx(z27.VnS(), p0.Zeroing(), z28.VnS());
3060    __ sqrdmlah(z27.VnS(), z28.VnS(), z1.VnS(), 0);
3061
3062    __ movprfx(z27.VnD(), p0.Zeroing(), z28.VnD());
3063    __ sqrdmlah(z27.VnD(), z28.VnD(), z1.VnD(), 0);
3064
3065    __ movprfx(z11.VnB(), p0.Zeroing(), z12.VnB());
3066    __ sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB());
3067
3068    __ movprfx(z11.VnH(), p0.Zeroing(), z12.VnH());
3069    __ sqrdmlsh(z11.VnH(), z16.VnH(), z1.VnH(), 0);
3070
3071    __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS());
3072    __ sqrdmlsh(z11.VnS(), z16.VnS(), z1.VnS(), 0);
3073
3074    __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
3075    __ sqrdmlsh(z11.VnD(), z16.VnD(), z1.VnD(), 0);
3076
3077    __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB());
3078    __ srsra(z0.VnB(), z8.VnB(), 1);
3079
3080    __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB());
3081    __ ssra(z0.VnB(), z8.VnB(), 1);
3082
3083    __ movprfx(z23.VnB(), p0.Zeroing(), z24.VnB());
3084    __ uaba(z23.VnB(), z22.VnB(), z20.VnB());
3085
3086    __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
3087    __ uabalb(z11.VnD(), z25.VnS(), z12.VnS());
3088
3089    __ movprfx(z4.VnD(), p0.Zeroing(), z5.VnD());
3090    __ uabalt(z4.VnD(), z2.VnS(), z31.VnS());
3091
3092    __ movprfx(z7.VnB(), p0.Zeroing(), z8.VnB());
3093    __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB());
3094
3095    __ movprfx(z10.VnB(), p0.Zeroing(), z11.VnB());
3096    __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB());
3097
3098    __ movprfx(z31.VnD(), p0.Zeroing(), z0.VnD());
3099    __ umlalb(z31.VnD(), z9.VnS(), z21.VnS());
3100
3101    __ movprfx(z31.VnD(), p0.Zeroing(), z0.VnD());
3102    __ umlalb(z31.VnD(), z9.VnS(), z1.VnS(), 0);
3103
3104    __ movprfx(z31.VnS(), p0.Zeroing(), z0.VnS());
3105    __ umlalb(z31.VnS(), z9.VnH(), z1.VnH(), 0);
3106
3107    __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
3108    __ umlalt(z11.VnD(), z5.VnS(), z22.VnS());
3109
3110    __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
3111    __ umlalt(z11.VnD(), z5.VnS(), z2.VnS(), 0);
3112
3113    __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS());
3114    __ umlalt(z11.VnS(), z5.VnH(), z2.VnH(), 0);
3115
3116    __ movprfx(z28.VnD(), p0.Zeroing(), z29.VnD());
3117    __ umlslb(z28.VnD(), z13.VnS(), z9.VnS());
3118
3119    __ movprfx(z28.VnD(), p0.Zeroing(), z29.VnD());
3120    __ umlslb(z28.VnD(), z13.VnS(), z1.VnS(), 0);
3121
3122    __ movprfx(z28.VnS(), p0.Zeroing(), z29.VnS());
3123    __ umlslb(z28.VnS(), z13.VnH(), z1.VnH(), 0);
3124
3125    __ movprfx(z9.VnD(), p0.Zeroing(), z10.VnD());
3126    __ umlslt(z9.VnD(), z12.VnS(), z30.VnS());
3127
3128    __ movprfx(z9.VnD(), p0.Zeroing(), z10.VnD());
3129    __ umlslt(z9.VnD(), z12.VnS(), z0.VnS(), 0);
3130
3131    __ movprfx(z9.VnS(), p0.Zeroing(), z10.VnS());
3132    __ umlslt(z9.VnS(), z12.VnH(), z0.VnH(), 0);
3133
3134    __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB());
3135    __ ursra(z0.VnB(), z8.VnB(), 1);
3136
3137    __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB());
3138    __ usra(z0.VnB(), z8.VnB(), 1);
3139
3140    __ movprfx(z16.VnB(), p0.Zeroing(), z17.VnB());
3141    __ xar(z16.VnB(), z16.VnB(), z13.VnB(), 1);
3142  }
3143  assm.FinalizeCode();
3144
3145  CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false);
3146}
3147
3148TEST(movprfx_negative_aliasing_sve2) {
3149  Assembler assm;
3150  assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2);
3151  {
3152    // We have to use the Assembler directly to generate movprfx, so we need
3153    // to manually reserve space for the code we're about to emit.
3154    static const size_t kPairCount = 140;
3155    CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
3156
3157    __ movprfx(z25, z26);
3158    __ adclb(z25.VnS(), z17.VnS(), z25.VnS());
3159
3160    __ movprfx(z0, z1);
3161    __ adclt(z0.VnS(), z2.VnS(), z0.VnS());
3162
3163    __ movprfx(z3, z4);
3164    __ addp(z3.VnB(), p1.Merging(), z3.VnB(), z3.VnB());
3165
3166    __ movprfx(z6, z7);
3167    __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z6.VnD());
3168
3169    __ movprfx(z18, z19);
3170    __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z18.VnD());
3171
3172    __ movprfx(z7, z8);
3173    __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z7.VnD());
3174
3175    __ movprfx(z21, z22);
3176    __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z21.VnD());
3177
3178    __ movprfx(z5, z6);
3179    __ cadd(z5.VnB(), z5.VnB(), z5.VnB(), 90);
3180
3181    __ movprfx(z7, z8);
3182    __ cdot(z7.VnS(), z4.VnB(), z7.VnB(), 0);
3183
3184    __ movprfx(z7, z8);
3185    __ cdot(z7.VnS(), z4.VnB(), z7.VnB(), 0, 0);
3186
3187    __ movprfx(z7, z8);
3188    __ cdot(z7.VnD(), z7.VnH(), z0.VnH(), 0, 0);
3189
3190    __ movprfx(z19, z20);
3191    __ cmla(z19.VnB(), z19.VnB(), z2.VnB(), 0);
3192
3193    __ movprfx(z19, z20);
3194    __ cmla(z19.VnS(), z19.VnS(), z2.VnS(), 0, 0);
3195
3196    __ movprfx(z1, z20);
3197    __ cmla(z1.VnH(), z7.VnH(), z1.VnH(), 0, 0);
3198
3199    __ movprfx(z10, z11);
3200    __ eor3(z10.VnD(), z10.VnD(), z10.VnD(), z23.VnD());
3201
3202    __ movprfx(z3, z4);
3203    __ eorbt(z3.VnB(), z10.VnB(), z3.VnB());
3204
3205    __ movprfx(z20, z22);
3206    __ eortb(z20.VnB(), z21.VnB(), z20.VnB());
3207
3208    __ movprfx(z14, z15);
3209    __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z14.VnD());
3210
3211    __ movprfx(z14.VnD(), p4.Merging(), z15.VnD());
3212    __ fcvtx(z14.VnS(), p4.Merging(), z14.VnD());
3213
3214    __ movprfx(z15.VnH(), p0.Merging(), z16.VnH());
3215    __ flogb(z15.VnH(), p0.Merging(), z15.VnH());
3216
3217    __ movprfx(z2, z3);
3218    __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z2.VnD());
3219
3220    __ movprfx(z22, z23);
3221    __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z22.VnD());
3222
3223    __ movprfx(z1, z2);
3224    __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z1.VnD());
3225
3226    __ movprfx(z16, z17);
3227    __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z16.VnD());
3228
3229    __ movprfx(z16, z17);
3230    __ fmlalb(z16.VnS(), z18.VnH(), z16.VnH());
3231
3232    __ movprfx(z16, z17);
3233    __ fmlalb(z16.VnS(), z16.VnH(), z2.VnH(), 0);
3234
3235    __ movprfx(z18, z19);
3236    __ fmlalt(z18.VnS(), z13.VnH(), z18.VnH());
3237
3238    __ movprfx(z18, z19);
3239    __ fmlalt(z18.VnS(), z18.VnH(), z5.VnH(), 0);
3240
3241    __ movprfx(z16, z17);
3242    __ fmlslb(z16.VnS(), z16.VnH(), z1.VnH());
3243
3244    __ movprfx(z16, z17);
3245    __ fmlslb(z16.VnS(), z16.VnH(), z1.VnH(), 0);
3246
3247    __ movprfx(z3, z4);
3248    __ fmlslt(z3.VnS(), z17.VnH(), z3.VnH());
3249
3250    __ movprfx(z3, z4);
3251    __ fmlslt(z3.VnS(), z17.VnH(), z3.VnH(), 0);
3252
3253    __ movprfx(z2, z3);
3254    __ mla(z2.VnH(), z0.VnH(), z2.VnH(), 0);
3255
3256    __ movprfx(z2, z3);
3257    __ mla(z2.VnS(), z0.VnS(), z2.VnS(), 0);
3258
3259    __ movprfx(z2, z3);
3260    __ mla(z2.VnD(), z0.VnD(), z2.VnD(), 0);
3261
3262    __ movprfx(z2, z3);
3263    __ mls(z2.VnH(), z0.VnH(), z2.VnH(), 0);
3264
3265    __ movprfx(z2, z3);
3266    __ mls(z2.VnS(), z0.VnS(), z2.VnS(), 0);
3267
3268    __ movprfx(z2, z3);
3269    __ mls(z2.VnD(), z0.VnD(), z2.VnD(), 0);
3270
3271    __ movprfx(z17, z18);
3272    __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z17.VnD());
3273
3274    __ movprfx(z13, z14);
3275    __ saba(z13.VnB(), z2.VnB(), z13.VnB());
3276
3277    __ movprfx(z13, z14);
3278    __ sabalb(z13.VnD(), z13.VnS(), z26.VnS());
3279
3280    __ movprfx(z14, z15);
3281    __ sabalt(z14.VnD(), z14.VnS(), z10.VnS());
3282
3283    __ movprfx(z19.VnD(), p5.Merging(), z20.VnD());
3284    __ sadalp(z19.VnD(), p5.Merging(), z19.VnS());
3285
3286    __ movprfx(z17, z18);
3287    __ sbclb(z17.VnS(), z17.VnS(), z8.VnS());
3288
3289    __ movprfx(z20, z21);
3290    __ sbclt(z20.VnS(), z20.VnS(), z13.VnS());
3291
3292    __ movprfx(z20.VnB(), p3.Merging(), z21.VnB());
3293    __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z20.VnB());
3294
3295    __ movprfx(z21.VnB(), p0.Merging(), z22.VnB());
3296    __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z21.VnB());
3297
3298    __ movprfx(z1.VnB(), p0.Merging(), z2.VnB());
3299    __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z1.VnB());
3300
3301    __ movprfx(z5, z6);
3302    __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z5.VnB());
3303
3304    __ movprfx(z27, z28);
3305    __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z27.VnB());
3306
3307    __ movprfx(z1, z2);
3308    __ smlalb(z1.VnD(), z3.VnS(), z1.VnS());
3309
3310    __ movprfx(z1, z2);
3311    __ smlalb(z1.VnD(), z3.VnS(), z1.VnS(), 0);
3312
3313    __ movprfx(z1, z2);
3314    __ smlalb(z1.VnS(), z1.VnH(), z2.VnH(), 0);
3315
3316    __ movprfx(z1, z2);
3317    __ smlalt(z1.VnD(), z1.VnS(), z23.VnS());
3318
3319    __ movprfx(z1, z2);
3320    __ smlalt(z1.VnD(), z3.VnS(), z1.VnS(), 0);
3321
3322    __ movprfx(z1, z2);
3323    __ smlalt(z1.VnS(), z1.VnH(), z2.VnH(), 0);
3324
3325    __ movprfx(z1, z2);
3326    __ smlslb(z1.VnD(), z1.VnS(), z23.VnS());
3327
3328    __ movprfx(z1, z2);
3329    __ smlslb(z1.VnD(), z3.VnS(), z1.VnS(), 0);
3330
3331    __ movprfx(z1, z2);
3332    __ smlslb(z1.VnS(), z3.VnH(), z1.VnH(), 0);
3333
3334    __ movprfx(z1, z2);
3335    __ smlslt(z1.VnD(), z1.VnS(), z23.VnS());
3336
3337    __ movprfx(z1, z2);
3338    __ smlslt(z1.VnD(), z3.VnS(), z1.VnS(), 0);
3339
3340    __ movprfx(z1, z2);
3341    __ smlslt(z1.VnS(), z1.VnH(), z2.VnH(), 0);
3342
3343    __ movprfx(z29.VnB(), p1.Merging(), z30.VnB());
3344    __ sqabs(z29.VnB(), p1.Merging(), z29.VnB());
3345
3346    __ movprfx(z28.VnB(), p0.Merging(), z29.VnB());
3347    __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB());
3348
3349    __ movprfx(z20, z21);
3350    __ sqcadd(z20.VnB(), z20.VnB(), z20.VnB(), 90);
3351
3352    __ movprfx(z6, z7);
3353    __ sqdmlalb(z6.VnD(), z6.VnS(), z25.VnS());
3354
3355    __ movprfx(z6, z7);
3356    __ sqdmlalb(z6.VnD(), z6.VnS(), z2.VnS(), 0);
3357
3358    __ movprfx(z6, z7);
3359    __ sqdmlalb(z6.VnS(), z6.VnH(), z2.VnH(), 0);
3360
3361    __ movprfx(z23, z24);
3362    __ sqdmlalbt(z23.VnD(), z23.VnS(), z26.VnS());
3363
3364    __ movprfx(z11, z12);
3365    __ sqdmlalt(z11.VnD(), z11.VnS(), z0.VnS());
3366
3367    __ movprfx(z11, z12);
3368    __ sqdmlalt(z11.VnD(), z11.VnS(), z0.VnS(), 0);
3369
3370    __ movprfx(z1, z12);
3371    __ sqdmlalt(z1.VnS(), z0.VnH(), z1.VnH(), 0);
3372
3373    __ movprfx(z16, z17);
3374    __ sqdmlslb(z16.VnD(), z26.VnS(), z16.VnS());
3375
3376    __ movprfx(z16, z17);
3377    __ sqdmlslb(z16.VnD(), z16.VnS(), z2.VnS(), 0);
3378
3379    __ movprfx(z16, z17);
3380    __ sqdmlslb(z16.VnS(), z16.VnH(), z2.VnH(), 0);
3381
3382    __ movprfx(z26, z27);
3383    __ sqdmlslbt(z26.VnD(), z26.VnS(), z4.VnS());
3384
3385    __ movprfx(z21, z22);
3386    __ sqdmlslt(z21.VnD(), z23.VnS(), z21.VnS());
3387
3388    __ movprfx(z21, z22);
3389    __ sqdmlslt(z21.VnD(), z21.VnS(), z0.VnS(), 0);
3390
3391    __ movprfx(z1, z22);
3392    __ sqdmlslt(z21.VnS(), z23.VnH(), z1.VnH(), 0);
3393
3394    __ movprfx(z21.VnB(), p0.Merging(), z22.VnB());
3395    __ sqneg(z21.VnB(), p0.Merging(), z21.VnB());
3396
3397    __ movprfx(z31, z0);
3398    __ sqrdcmlah(z31.VnB(), z15.VnB(), z31.VnB(), 0);
3399
3400    __ movprfx(z31, z0);
3401    __ sqrdcmlah(z31.VnH(), z31.VnH(), z2.VnH(), 0, 0);
3402
3403    __ movprfx(z31, z0);
3404    __ sqrdcmlah(z31.VnS(), z31.VnS(), z2.VnS(), 0, 0);
3405
3406    __ movprfx(z27, z28);
3407    __ sqrdmlah(z27.VnB(), z27.VnB(), z19.VnB());
3408
3409    __ movprfx(z27, z28);
3410    __ sqrdmlah(z27.VnH(), z27.VnH(), z1.VnH(), 0);
3411
3412    __ movprfx(z27, z28);
3413    __ sqrdmlah(z27.VnS(), z27.VnS(), z1.VnS(), 0);
3414
3415    __ movprfx(z27, z28);
3416    __ sqrdmlah(z27.VnD(), z27.VnD(), z1.VnD(), 0);
3417
3418    __ movprfx(z11, z12);
3419    __ sqrdmlsh(z11.VnB(), z16.VnB(), z11.VnB());
3420
3421    __ movprfx(z11, z12);
3422    __ sqrdmlsh(z11.VnH(), z11.VnH(), z1.VnH(), 0);
3423
3424    __ movprfx(z11, z12);
3425    __ sqrdmlsh(z11.VnS(), z11.VnS(), z1.VnS(), 0);
3426
3427    __ movprfx(z11, z12);
3428    __ sqrdmlsh(z11.VnD(), z11.VnD(), z1.VnD(), 0);
3429
3430    __ movprfx(z31.VnB(), p5.Merging(), z0.VnB());
3431    __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z31.VnB());
3432
3433    __ movprfx(z25.VnB(), p6.Merging(), z26.VnB());
3434    __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z25.VnB());
3435
3436    __ movprfx(z0.VnB(), p5.Merging(), z1.VnB());
3437    __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z0.VnB());
3438
3439    __ movprfx(z7.VnB(), p3.Merging(), z8.VnB());
3440    __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z7.VnB());
3441
3442    __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
3443    __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB());
3444
3445    __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
3446    __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB());
3447
3448    __ movprfx(z23.VnB(), p4.Merging(), z24.VnB());
3449    __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z23.VnB());
3450
3451    __ movprfx(z31.VnB(), p7.Merging(), z0.VnB());
3452    __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z31.VnB());
3453
3454    __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
3455    __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB());
3456
3457    __ movprfx(z0, z1);
3458    __ srsra(z0.VnB(), z0.VnB(), 1);
3459
3460    __ movprfx(z0, z1);
3461    __ ssra(z0.VnB(), z0.VnB(), 1);
3462
3463    __ movprfx(z26.VnB(), p2.Merging(), z27.VnB());
3464    __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z26.VnB());
3465
3466    __ movprfx(z23, z24);
3467    __ uaba(z23.VnB(), z22.VnB(), z23.VnB());
3468
3469    __ movprfx(z11, z12);
3470    __ uabalb(z11.VnD(), z25.VnS(), z11.VnS());
3471
3472    __ movprfx(z4, z5);
3473    __ uabalt(z4.VnD(), z4.VnS(), z31.VnS());
3474
3475    __ movprfx(z20.VnD(), p4.Merging(), z21.VnD());
3476    __ uadalp(z20.VnD(), p4.Merging(), z20.VnS());
3477
3478    __ movprfx(z21.VnB(), p2.Merging(), z22.VnB());
3479    __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z21.VnB());
3480
3481    __ movprfx(z1.VnB(), p4.Merging(), z2.VnB());
3482    __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z1.VnB());
3483
3484    __ movprfx(z18.VnB(), p0.Merging(), z19.VnB());
3485    __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z18.VnB());
3486
3487    __ movprfx(z7, z8);
3488    __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z7.VnB());
3489
3490    __ movprfx(z10, z11);
3491    __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z10.VnB());
3492
3493    __ movprfx(z31, z0);
3494    __ umlalb(z31.VnD(), z9.VnS(), z31.VnS());
3495
3496    __ movprfx(z31, z0);
3497    __ umlalb(z31.VnD(), z31.VnS(), z1.VnS(), 0);
3498
3499    __ movprfx(z31, z0);
3500    __ umlalb(z31.VnS(), z31.VnH(), z1.VnH(), 0);
3501
3502    __ movprfx(z11, z12);
3503    __ umlalt(z11.VnD(), z11.VnS(), z22.VnS());
3504
3505    __ movprfx(z11, z12);
3506    __ umlalt(z11.VnD(), z11.VnS(), z2.VnS(), 0);
3507
3508    __ movprfx(z1, z12);
3509    __ umlalt(z1.VnS(), z5.VnH(), z1.VnH(), 0);
3510
3511    __ movprfx(z28, z29);
3512    __ umlslb(z28.VnD(), z28.VnS(), z9.VnS());
3513
3514    __ movprfx(z28, z29);
3515    __ umlslb(z28.VnD(), z28.VnS(), z1.VnS(), 0);
3516
3517    __ movprfx(z28, z29);
3518    __ umlslb(z28.VnS(), z28.VnH(), z1.VnH(), 0);
3519
3520    __ movprfx(z9, z10);
3521    __ umlslt(z9.VnD(), z9.VnS(), z30.VnS());
3522
3523    __ movprfx(z9, z10);
3524    __ umlslt(z9.VnD(), z9.VnS(), z0.VnS(), 0);
3525
3526    __ movprfx(z9, z10);
3527    __ umlslt(z9.VnS(), z9.VnH(), z0.VnH(), 0);
3528
3529    __ movprfx(z24.VnB(), p7.Merging(), z25.VnB());
3530    __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z24.VnB()),
3531
3532        __ movprfx(z20.VnB(), p1.Merging(), z21.VnB());
3533    __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z20.VnB());
3534
3535    __ movprfx(z8.VnB(), p5.Merging(), z9.VnB());
3536    __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z8.VnB());
3537
3538    __ movprfx(z29.VnB(), p7.Merging(), z30.VnB());
3539    __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z29.VnB());
3540
3541    __ movprfx(z12.VnB(), p1.Merging(), z13.VnB());
3542    __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z12.VnB());
3543
3544    __ movprfx(z20.VnB(), p0.Merging(), z21.VnB());
3545    __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z20.VnB());
3546
3547    __ movprfx(z20.VnB(), p0.Merging(), z21.VnB());
3548    __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z20.VnB());
3549
3550    __ movprfx(z25.VnS(), p7.Merging(), z26.VnS());
3551    __ urecpe(z25.VnS(), p7.Merging(), z25.VnS());
3552
3553    __ movprfx(z29.VnB(), p4.Merging(), z30.VnB());
3554    __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z29.VnB());
3555
3556    __ movprfx(z15.VnB(), p2.Merging(), z16.VnB());
3557    __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z15.VnB());
3558
3559    __ movprfx(z27.VnB(), p1.Merging(), z28.VnB());
3560    __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z27.VnB());
3561
3562    __ movprfx(z4.VnS(), p3.Merging(), z5.VnS());
3563    __ ursqrte(z4.VnS(), p3.Merging(), z4.VnS());
3564
3565    __ movprfx(z0, z1);
3566    __ ursra(z0.VnB(), z0.VnB(), 1);
3567
3568    __ movprfx(z25.VnB(), p4.Merging(), z26.VnB());
3569    __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z25.VnB());
3570
3571    __ movprfx(z0, z1);
3572    __ usra(z0.VnB(), z0.VnB(), 1);
3573
3574    __ movprfx(z16, z17);
3575    __ xar(z16.VnB(), z16.VnB(), z16.VnB(), 1);
3576  }
3577  assm.FinalizeCode();
3578
3579  CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false);
3580}
3581
3582TEST(movprfx_negative_lane_size_sve2) {
3583  Assembler assm;
3584  assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2);
3585  {
3586    // We have to use the Assembler directly to generate movprfx, so we need
3587    // to manually reserve space for the code we're about to emit.
3588    static const size_t kPairCount = 140;
3589    CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
3590
3591    __ movprfx(z14.VnS(), p4.Merging(), z15.VnS());
3592    __ fcvtx(z14.VnS(), p4.Merging(), z0.VnD());
3593
3594    __ movprfx(z15.VnS(), p0.Merging(), z16.VnS());
3595    __ flogb(z15.VnH(), p0.Merging(), z3.VnH());
3596
3597    __ movprfx(z19.VnB(), p5.Merging(), z20.VnB());
3598    __ sadalp(z19.VnD(), p5.Merging(), z9.VnS());
3599
3600    __ movprfx(z20.VnH(), p3.Merging(), z21.VnH());
3601    __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB());
3602
3603    __ movprfx(z21.VnH(), p0.Merging(), z22.VnH());
3604    __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB());
3605
3606    __ movprfx(z1.VnS(), p0.Merging(), z2.VnS());
3607    __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB());
3608
3609    __ movprfx(z29.VnD(), p1.Merging(), z30.VnD());
3610    __ sqabs(z29.VnB(), p1.Merging(), z18.VnB());
3611
3612    __ movprfx(z28.VnH(), p0.Merging(), z29.VnH());
3613    __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB());
3614
3615    __ movprfx(z21.VnH(), p0.Merging(), z22.VnH());
3616    __ sqneg(z21.VnB(), p0.Merging(), z17.VnB());
3617
3618    __ movprfx(z31.VnS(), p5.Merging(), z0.VnS());
3619    __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB());
3620
3621    __ movprfx(z25.VnD(), p6.Merging(), z26.VnD());
3622    __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB());
3623
3624    __ movprfx(z0.VnH(), p5.Merging(), z1.VnH());
3625    __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0);
3626
3627    __ movprfx(z0.VnS(), p5.Merging(), z1.VnS());
3628    __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z2.VnB());
3629
3630    __ movprfx(z7.VnD(), p3.Merging(), z8.VnD());
3631    __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB());
3632
3633    __ movprfx(z10.VnH(), p1.Merging(), z11.VnH());
3634    __ sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0);
3635
3636    __ movprfx(z16.VnH(), p7.Merging(), z17.VnH());
3637    __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB());
3638
3639    __ movprfx(z16.VnS(), p7.Merging(), z17.VnS());
3640    __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB());
3641
3642    __ movprfx(z23.VnD(), p4.Merging(), z24.VnD());
3643    __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB());
3644
3645    __ movprfx(z31.VnH(), p7.Merging(), z0.VnH());
3646    __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB());
3647
3648    __ movprfx(z16.VnH(), p7.Merging(), z17.VnH());
3649    __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB());
3650
3651    __ movprfx(z12.VnH(), p0.Merging(), z13.VnH());
3652    __ srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1);
3653
3654    __ movprfx(z26.VnH(), p2.Merging(), z27.VnH());
3655    __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB());
3656
3657    __ movprfx(z20.VnB(), p4.Merging(), z21.VnB());
3658    __ uadalp(z20.VnD(), p4.Merging(), z5.VnS());
3659
3660    __ movprfx(z21.VnH(), p2.Merging(), z22.VnH());
3661    __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB());
3662
3663    __ movprfx(z1.VnH(), p4.Merging(), z2.VnH());
3664    __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB());
3665
3666    __ movprfx(z18.VnH(), p0.Merging(), z19.VnH());
3667    __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB());
3668
3669    __ movprfx(z24.VnH(), p7.Merging(), z25.VnH());
3670    __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()),
3671
3672        __ movprfx(z20.VnS(), p1.Merging(), z21.VnS());
3673    __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB());
3674
3675    __ movprfx(z8.VnS(), p5.Merging(), z9.VnS());
3676    __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB());
3677
3678    __ movprfx(z29.VnS(), p7.Merging(), z30.VnS());
3679    __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0);
3680
3681    __ movprfx(z29.VnS(), p7.Merging(), z30.VnS());
3682    __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z30.VnB());
3683
3684    __ movprfx(z12.VnS(), p1.Merging(), z13.VnS());
3685    __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z13.VnB());
3686
3687    __ movprfx(z20.VnS(), p0.Merging(), z21.VnS());
3688    __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB());
3689
3690    __ movprfx(z20.VnS(), p0.Merging(), z21.VnS());
3691    __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB());
3692
3693    __ movprfx(z25.VnB(), p7.Merging(), z26.VnB());
3694    __ urecpe(z25.VnS(), p7.Merging(), z2.VnS());
3695
3696    __ movprfx(z29.VnD(), p4.Merging(), z30.VnD());
3697    __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB());
3698
3699    __ movprfx(z15.VnD(), p2.Merging(), z16.VnD());
3700    __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB());
3701
3702    __ movprfx(z27.VnD(), p1.Merging(), z28.VnD());
3703    __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB());
3704
3705    __ movprfx(z31.VnD(), p2.Merging(), z0.VnD());
3706    __ urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1);
3707
3708    __ movprfx(z4.VnH(), p3.Merging(), z5.VnH());
3709    __ ursqrte(z4.VnS(), p3.Merging(), z3.VnS());
3710
3711    __ movprfx(z25.VnD(), p4.Merging(), z26.VnD());
3712    __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB());
3713  }
3714  assm.FinalizeCode();
3715
3716  CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false);
3717}
3718
3719}  // namespace aarch64
3720}  // namespace vixl
3721