// Copyright 2017, VIXL authors // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // * Neither the name of ARM Limited nor the names of its contributors may be // used to endorse or promote products derived from this software without // specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include #include "test-runner.h" #include "test-utils.h" #include "aarch64/assembler-aarch64.h" #include "aarch64/instructions-aarch64.h" #include "aarch64/test-utils-aarch64.h" #define __ assm. #define TEST(name) TEST_(AARCH64_API_##name) namespace vixl { namespace aarch64 { class InstructionReporter : public DecoderVisitor { public: InstructionReporter() : DecoderVisitor(kNonConstVisitor) {} void Visit(Metadata* metadata, const Instruction* instr) VIXL_OVERRIDE { USE(instr); instr_form_ = (*metadata)["form"]; } std::string MoveForm() { return instr_form_; } private: std::string instr_form_; }; static void CheckAndMaybeDisassembleMovprfxPairs(const CodeBuffer* buffer, bool can_take_movprfx) { const Instruction* pair = buffer->GetStartAddress(); const Instruction* end = buffer->GetEndAddress(); bool any_failures = false; PrintDisassembler print_disasm(stdout); Decoder decoder; InstructionReporter reporter; decoder.AppendVisitor(&reporter); while (pair < end) { const Instruction* movprfx = pair; const Instruction* candidate = pair->GetNextInstruction(); const Instruction* next_pair = candidate->GetNextInstruction(); VIXL_ASSERT(candidate < end); Instr inst = candidate->GetInstructionBits(); decoder.Decode(reinterpret_cast(&inst)); std::string form = reporter.MoveForm(); bool failed = can_take_movprfx != candidate->CanTakeSVEMovprfx(form.c_str(), movprfx); any_failures = any_failures || failed; if (failed || Test::disassemble()) { printf("----\n"); if (failed) { printf("# ERROR: Expected %sCanTakeSVEMovprfx(movprfx):\n", can_take_movprfx ? "" : "!"); } print_disasm.DisassembleBuffer(pair, next_pair); } pair = next_pair; } // Abort only at the end, so we can see the individual failures. VIXL_CHECK(!any_failures); } TEST(movprfx_negative_aliasing) { // Test that CanTakeSVEMovprfx() checks that the movprfx destination does not // alias an input to the prefixed instruction. Assembler assm; assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. static const size_t kPairCount = 79; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z0.VnB(), p0.Merging(), z9.VnB()); __ abs(z0.VnB(), p0.Merging(), z0.VnB()); __ movprfx(z1, z17); __ add(z1.VnH(), p2.Merging(), z1.VnH(), z1.VnH()); __ movprfx(z12, z13); __ and_(z12.VnD(), p5.Merging(), z12.VnD(), z12.VnD()); __ movprfx(z2, z4); __ asr(z2.VnS(), p2.Merging(), z2.VnS(), z2.VnS()); __ movprfx(z10, z18); __ asr(z10.VnH(), p2.Merging(), z10.VnH(), z10.VnD()); __ movprfx(z17.VnD(), p5.Zeroing(), z20.VnD()); __ asr(z17.VnD(), p5.Merging(), z17.VnD(), z17.VnD()); __ movprfx(z22, z9); __ asrr(z22.VnH(), p1.Merging(), z22.VnH(), z22.VnH()); __ movprfx(z0.VnS(), p6.Zeroing(), z6.VnS()); __ bic(z0.VnS(), p6.Merging(), z0.VnS(), z0.VnS()); __ movprfx(z12, z16); __ clasta(z12.VnD(), p5, z12.VnD(), z12.VnD()); __ movprfx(z7, z15); __ clastb(z7.VnS(), p7, z7.VnS(), z7.VnS()); __ movprfx(z10, z29); __ cls(z10.VnH(), p2.Merging(), z10.VnH()); __ movprfx(z6, z13); __ clz(z6.VnB(), p4.Merging(), z6.VnB()); __ movprfx(z14.VnS(), p6.Zeroing(), z3.VnS()); __ cnot(z14.VnS(), p6.Merging(), z14.VnS()); __ movprfx(z5.VnD(), p6.Merging(), z4.VnD()); __ cnt(z5.VnD(), p6.Merging(), z5.VnD()); __ movprfx(z19.VnB(), p6.Zeroing(), z4.VnB()); __ eor(z19.VnB(), p6.Merging(), z19.VnB(), z19.VnB()); __ movprfx(z27, z2); __ ext(z27.VnB(), z27.VnB(), z27.VnB(), 42); __ movprfx(z4.VnS(), p1.Zeroing(), z22.VnS()); __ lsl(z4.VnS(), p1.Merging(), z4.VnS(), z4.VnS()); __ movprfx(z4, z5); __ lsl(z4.VnB(), p5.Merging(), z4.VnB(), z4.VnD()); __ movprfx(z11.VnD(), p4.Merging(), z29.VnD()); __ lsl(z11.VnD(), p4.Merging(), z11.VnD(), z11.VnD()); __ movprfx(z12.VnD(), p6.Merging(), z3.VnD()); __ lslr(z12.VnD(), p6.Merging(), z12.VnD(), z12.VnD()); __ movprfx(z7, z2); __ lsr(z7.VnB(), p4.Merging(), z7.VnB(), z7.VnB()); __ movprfx(z25.VnH(), p6.Merging(), z28.VnH()); __ lsr(z25.VnH(), p6.Merging(), z25.VnH(), z25.VnD()); __ movprfx(z14.VnD(), p6.Merging(), z6.VnD()); __ lsr(z14.VnD(), p6.Merging(), z14.VnD(), z14.VnD()); __ movprfx(z26.VnH(), p6.Zeroing(), z27.VnH()); __ lsrr(z26.VnH(), p6.Merging(), z26.VnH(), z26.VnH()); __ movprfx(z17.VnS(), p4.Zeroing(), z29.VnS()); __ mad(z17.VnS(), p4.Merging(), z17.VnS(), z23.VnS()); __ movprfx(z7, z17); __ mad(z7.VnD(), p5.Merging(), z4.VnD(), z7.VnD()); __ movprfx(z11, z7); __ mla(z11.VnS(), p1.Merging(), z11.VnS(), z27.VnS()); __ movprfx(z7, z5); __ mla(z7.VnH(), p0.Merging(), z5.VnH(), z7.VnH()); __ movprfx(z1.VnH(), p0.Merging(), z17.VnH()); __ mls(z1.VnH(), p0.Merging(), z1.VnH(), z31.VnH()); __ movprfx(z22.VnB(), p3.Merging(), z18.VnB()); __ mls(z22.VnB(), p3.Merging(), z18.VnB(), z22.VnB()); __ movprfx(z7.VnS(), p0.Merging(), z10.VnS()); __ msb(z7.VnS(), p0.Merging(), z7.VnS(), z10.VnS()); __ movprfx(z12, z6); __ msb(z12.VnH(), p7.Merging(), z6.VnH(), z12.VnH()); __ movprfx(z8.VnB(), p4.Merging(), z3.VnB()); __ mul(z8.VnB(), p4.Merging(), z8.VnB(), z8.VnB()); __ movprfx(z9, z26); __ neg(z9.VnS(), p7.Merging(), z9.VnS()); __ movprfx(z16, z8); __ not_(z16.VnH(), p6.Merging(), z16.VnH()); __ movprfx(z25.VnH(), p5.Zeroing(), z11.VnH()); __ orr(z25.VnH(), p5.Merging(), z25.VnH(), z25.VnH()); __ movprfx(z17.VnH(), p1.Merging(), z22.VnH()); __ rbit(z17.VnH(), p1.Merging(), z17.VnH()); __ movprfx(z11, z25); __ revb(z11.VnD(), p6.Merging(), z11.VnD()); __ movprfx(z13, z27); __ revh(z13.VnS(), p2.Merging(), z13.VnS()); __ movprfx(z30.VnD(), p6.Merging(), z20.VnD()); __ revw(z30.VnD(), p6.Merging(), z30.VnD()); __ movprfx(z2.VnD(), p2.Merging(), z21.VnD()); __ sabd(z2.VnD(), p2.Merging(), z2.VnD(), z2.VnD()); __ movprfx(z0, z7); __ sdiv(z0.VnD(), p0.Merging(), z0.VnD(), z0.VnD()); __ movprfx(z19, z28); __ sdivr(z19.VnS(), p1.Merging(), z19.VnS(), z19.VnS()); __ movprfx(z5, z18); __ sdot(z5.VnS(), z18.VnB(), z5.VnB(), 1); __ movprfx(z15, z11); __ sdot(z15.VnD(), z2.VnH(), z15.VnH(), 1); __ movprfx(z30, z13); __ sdot(z30.VnD(), z30.VnH(), z13.VnH(), 1); __ movprfx(z8, z9); __ sdot(z8.VnS(), z8.VnB(), z9.VnB()); __ movprfx(z23, z14); __ sdot(z23.VnS(), z14.VnB(), z23.VnB()); __ movprfx(z26, z5); __ sdot(z26.VnS(), z26.VnB(), z5.VnB(), 1); __ movprfx(z14, z15); __ smax(z14.VnB(), p2.Merging(), z14.VnB(), z14.VnB()); __ movprfx(z26.VnS(), p0.Merging(), z10.VnS()); __ smin(z26.VnS(), p0.Merging(), z26.VnS(), z26.VnS()); __ movprfx(z22, z18); __ smulh(z22.VnB(), p2.Merging(), z22.VnB(), z22.VnB()); __ movprfx(z8, z19); __ splice(z8.VnD(), p2, z8.VnD(), z8.VnD()); __ movprfx(z23.VnH(), p6.Zeroing(), z2.VnH()); __ sub(z23.VnH(), p6.Merging(), z23.VnH(), z23.VnH()); __ movprfx(z25.VnS(), p2.Merging(), z21.VnS()); __ subr(z25.VnS(), p2.Merging(), z25.VnS(), z25.VnS()); __ movprfx(z28, z31); __ sxtb(z28.VnS(), p6.Merging(), z28.VnS()); __ movprfx(z14.VnD(), p6.Merging(), z17.VnD()); __ sxth(z14.VnD(), p6.Merging(), z14.VnD()); __ movprfx(z21.VnD(), p0.Zeroing(), z28.VnD()); __ sxtw(z21.VnD(), p0.Merging(), z21.VnD()); __ movprfx(z25, z30); __ uabd(z25.VnB(), p5.Merging(), z25.VnB(), z25.VnB()); __ movprfx(z13.VnD(), p2.Merging(), z30.VnD()); __ udiv(z13.VnD(), p2.Merging(), z13.VnD(), z13.VnD()); __ movprfx(z19.VnD(), p4.Zeroing(), z6.VnD()); __ udivr(z19.VnD(), p4.Merging(), z19.VnD(), z19.VnD()); __ movprfx(z1, z20); __ udot(z1.VnS(), z18.VnB(), z1.VnB(), 1); __ movprfx(z8, z2); __ udot(z8.VnD(), z2.VnH(), z8.VnH(), 1); __ movprfx(z28, z10); __ udot(z28.VnD(), z28.VnH(), z7.VnH(), 1); __ movprfx(z21, z11); __ udot(z21.VnD(), z21.VnH(), z11.VnH()); __ movprfx(z1, z22); __ udot(z1.VnD(), z10.VnH(), z1.VnH()); __ movprfx(z8, z23); __ udot(z8.VnS(), z8.VnB(), z0.VnB(), 1); __ movprfx(z10.VnB(), p5.Zeroing(), z0.VnB()); __ umax(z10.VnB(), p5.Merging(), z10.VnB(), z10.VnB()); __ movprfx(z0.VnS(), p2.Zeroing(), z30.VnS()); __ umin(z0.VnS(), p2.Merging(), z0.VnS(), z0.VnS()); __ movprfx(z26.VnD(), p6.Zeroing(), z29.VnD()); __ umulh(z26.VnD(), p6.Merging(), z26.VnD(), z26.VnD()); __ movprfx(z23, z25); __ uxtb(z23.VnS(), p7.Merging(), z23.VnS()); __ movprfx(z14.VnS(), p3.Zeroing(), z5.VnS()); __ uxth(z14.VnS(), p3.Merging(), z14.VnS()); __ movprfx(z14, z5); __ uxtw(z14.VnD(), p3.Merging(), z14.VnD()); __ movprfx(z22, z5); __ smmla(z22.VnS(), z22.VnB(), z0.VnB()); __ movprfx(z1, z5); __ ummla(z1.VnS(), z10.VnB(), z1.VnB()); __ movprfx(z30, z5); __ usmmla(z30.VnS(), z30.VnB(), z18.VnB()); __ movprfx(z4, z5); __ usdot(z4.VnS(), z3.VnB(), z4.VnB()); __ movprfx(z10, z5); __ usdot(z10.VnS(), z10.VnB(), z0.VnB(), 0); __ movprfx(z1, z5); __ sudot(z1.VnS(), z10.VnB(), z1.VnB(), 1); } assm.FinalizeCode(); CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); } TEST(movprfx_negative_aliasing_fp) { // Test that CanTakeSVEMovprfx() checks that the movprfx destination does not // alias an input to the prefixed instruction. Assembler assm; assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEF32MM, CPUFeatures::kSVEF64MM); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. static const size_t kPairCount = 80; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z17.VnS(), p1.Zeroing(), z12.VnS()); __ fabd(z17.VnS(), p1.Merging(), z17.VnS(), z17.VnS()); __ movprfx(z13, z23); __ fabs(z13.VnS(), p4.Merging(), z13.VnS()); __ movprfx(z24.VnS(), p5.Merging(), z15.VnS()); __ fadd(z24.VnS(), p5.Merging(), z24.VnS(), z24.VnS()); __ movprfx(z28.VnD(), p5.Zeroing(), z14.VnD()); __ fcadd(z28.VnD(), p5.Merging(), z28.VnD(), z28.VnD(), 90); __ movprfx(z5, z0); __ fcmla(z5.VnH(), z0.VnH(), z5.VnH(), 2, 180); __ movprfx(z10, z4); __ fcmla(z10.VnS(), z8.VnS(), z10.VnS(), 1, 270); __ movprfx(z12, z26); __ fcmla(z12.VnH(), z12.VnH(), z3.VnH(), 2, 180); __ movprfx(z8, z1); __ fcmla(z8.VnS(), z8.VnS(), z1.VnS(), 1, 270); __ movprfx(z16.VnD(), p0.Merging(), z13.VnD()); __ fcvt(z16.VnD(), p0.Merging(), z16.VnH()); __ movprfx(z12.VnD(), p7.Zeroing(), z13.VnD()); __ fcvt(z12.VnD(), p7.Merging(), z12.VnS()); __ movprfx(z14, z26); __ fcvt(z14.VnS(), p5.Merging(), z14.VnD()); __ movprfx(z26, z2); __ fcvt(z26.VnH(), p7.Merging(), z26.VnD()); __ movprfx(z25.VnD(), p2.Merging(), z13.VnD()); __ fcvtzs(z25.VnD(), p2.Merging(), z25.VnH()); __ movprfx(z31, z2); __ fcvtzs(z31.VnH(), p7.Merging(), z31.VnH()); __ movprfx(z21.VnD(), p1.Merging(), z7.VnD()); __ fcvtzs(z21.VnD(), p1.Merging(), z21.VnS()); __ movprfx(z5, z17); __ fcvtzs(z5.VnS(), p5.Merging(), z5.VnD()); __ movprfx(z19.VnD(), p1.Zeroing(), z16.VnD()); __ fcvtzu(z19.VnD(), p1.Merging(), z19.VnH()); __ movprfx(z2.VnH(), p7.Zeroing(), z28.VnH()); __ fcvtzu(z2.VnH(), p7.Merging(), z2.VnH()); __ movprfx(z21.VnD(), p7.Zeroing(), z27.VnD()); __ fcvtzu(z21.VnD(), p7.Merging(), z21.VnS()); __ movprfx(z22.VnD(), p4.Zeroing(), z8.VnD()); __ fcvtzu(z22.VnS(), p4.Merging(), z22.VnD()); __ movprfx(z0.VnS(), p5.Merging(), z5.VnS()); __ fdiv(z0.VnS(), p5.Merging(), z0.VnS(), z0.VnS()); __ movprfx(z12, z24); __ fdivr(z12.VnD(), p7.Merging(), z12.VnD(), z12.VnD()); __ movprfx(z14.VnD(), p6.Zeroing(), z21.VnD()); __ fmad(z14.VnD(), p6.Merging(), z14.VnD(), z3.VnD()); __ movprfx(z2.VnS(), p5.Zeroing(), z10.VnS()); __ fmad(z2.VnS(), p5.Merging(), z14.VnS(), z2.VnS()); __ movprfx(z24, z5); __ fmax(z24.VnS(), p1.Merging(), z24.VnS(), z24.VnS()); __ movprfx(z15.VnD(), p2.Merging(), z26.VnD()); __ fmaxnm(z15.VnD(), p2.Merging(), z15.VnD(), z15.VnD()); __ movprfx(z20, z22); __ fmin(z20.VnH(), p0.Merging(), z20.VnH(), z20.VnH()); __ movprfx(z24.VnS(), p6.Zeroing(), z30.VnS()); __ fminnm(z24.VnS(), p6.Merging(), z24.VnS(), z24.VnS()); __ movprfx(z4, z24); __ fmla(z4.VnH(), z24.VnH(), z4.VnH(), 7); __ movprfx(z4, z7); __ fmla(z4.VnS(), z24.VnS(), z4.VnS(), 3); __ movprfx(z5, z28); __ fmla(z5.VnD(), z28.VnD(), z5.VnD(), 1); __ movprfx(z24, z2); __ fmla(z24.VnD(), z24.VnD(), z2.VnD(), 1); __ movprfx(z7, z21); __ fmla(z7.VnH(), p2.Merging(), z7.VnH(), z31.VnH()); __ movprfx(z25.VnH(), p5.Zeroing(), z29.VnH()); __ fmla(z25.VnH(), p5.Merging(), z29.VnH(), z25.VnH()); __ movprfx(z31, z25); __ fmla(z31.VnH(), z31.VnH(), z2.VnH(), 7); __ movprfx(z15, z4); __ fmla(z15.VnS(), z15.VnS(), z4.VnS(), 3); __ movprfx(z7, z11); __ fmls(z7.VnH(), z11.VnH(), z7.VnH(), 4); __ movprfx(z3, z10); __ fmls(z3.VnS(), z10.VnS(), z3.VnS(), 3); __ movprfx(z5, z16); __ fmls(z5.VnD(), z16.VnD(), z5.VnD(), 1); __ movprfx(z31, z26); __ fmls(z31.VnD(), z31.VnD(), z8.VnD(), 1); __ movprfx(z5.VnH(), p3.Merging(), z2.VnH()); __ fmls(z5.VnH(), p3.Merging(), z5.VnH(), z2.VnH()); __ movprfx(z22.VnS(), p3.Zeroing(), z17.VnS()); __ fmls(z22.VnS(), p3.Merging(), z21.VnS(), z22.VnS()); __ movprfx(z17, z2); __ fmls(z17.VnH(), z17.VnH(), z2.VnH(), 4); __ movprfx(z28, z11); __ fmls(z28.VnS(), z28.VnS(), z0.VnS(), 3); __ movprfx(z15.VnD(), p1.Merging(), z31.VnD()); __ fmsb(z15.VnD(), p1.Merging(), z15.VnD(), z31.VnD()); __ movprfx(z21.VnD(), p0.Zeroing(), z5.VnD()); __ fmsb(z21.VnD(), p0.Merging(), z19.VnD(), z21.VnD()); __ movprfx(z0.VnH(), p3.Merging(), z31.VnH()); __ fmul(z0.VnH(), p3.Merging(), z0.VnH(), z0.VnH()); __ movprfx(z31.VnH(), p6.Merging(), z8.VnH()); __ fmulx(z31.VnH(), p6.Merging(), z31.VnH(), z31.VnH()); __ movprfx(z17.VnH(), p1.Zeroing(), z10.VnH()); __ fneg(z17.VnH(), p1.Merging(), z17.VnH()); __ movprfx(z22, z31); __ fnmad(z22.VnH(), p1.Merging(), z22.VnH(), z23.VnH()); __ movprfx(z14.VnD(), p0.Zeroing(), z26.VnD()); __ fnmad(z14.VnD(), p0.Merging(), z2.VnD(), z14.VnD()); __ movprfx(z13.VnH(), p6.Zeroing(), z29.VnH()); __ fnmla(z13.VnH(), p6.Merging(), z13.VnH(), z26.VnH()); __ movprfx(z19.VnH(), p7.Zeroing(), z25.VnH()); __ fnmla(z19.VnH(), p7.Merging(), z25.VnH(), z19.VnH()); __ movprfx(z27.VnH(), p5.Merging(), z24.VnH()); __ fnmls(z27.VnH(), p5.Merging(), z27.VnH(), z24.VnH()); __ movprfx(z6.VnH(), p6.Zeroing(), z21.VnH()); __ fnmls(z6.VnH(), p6.Merging(), z21.VnH(), z6.VnH()); __ movprfx(z7.VnS(), p3.Merging(), z23.VnS()); __ fnmsb(z7.VnS(), p3.Merging(), z7.VnS(), z23.VnS()); __ movprfx(z29.VnH(), p2.Zeroing(), z24.VnH()); __ fnmsb(z29.VnH(), p2.Merging(), z24.VnH(), z29.VnH()); __ movprfx(z7.VnH(), p6.Merging(), z23.VnH()); __ frecpx(z7.VnH(), p6.Merging(), z7.VnH()); __ movprfx(z17.VnS(), p5.Zeroing(), z2.VnS()); __ frinta(z17.VnS(), p5.Merging(), z17.VnS()); __ movprfx(z0.VnS(), p2.Zeroing(), z7.VnS()); __ frinti(z0.VnS(), p2.Merging(), z0.VnS()); __ movprfx(z8.VnH(), p3.Merging(), z20.VnH()); __ frintm(z8.VnH(), p3.Merging(), z8.VnH()); __ movprfx(z3.VnD(), p2.Zeroing(), z20.VnD()); __ frintn(z3.VnD(), p2.Merging(), z3.VnD()); __ movprfx(z11, z3); __ frintp(z11.VnS(), p4.Merging(), z11.VnS()); __ movprfx(z23, z29); __ frintx(z23.VnD(), p4.Merging(), z23.VnD()); __ movprfx(z4.VnH(), p4.Zeroing(), z14.VnH()); __ frintz(z4.VnH(), p4.Merging(), z4.VnH()); __ movprfx(z18.VnH(), p3.Zeroing(), z0.VnH()); __ fscale(z18.VnH(), p3.Merging(), z18.VnH(), z18.VnH()); __ movprfx(z2.VnS(), p6.Zeroing(), z4.VnS()); __ fsqrt(z2.VnS(), p6.Merging(), z2.VnS()); __ movprfx(z14.VnD(), p4.Zeroing(), z31.VnD()); __ fsub(z14.VnD(), p4.Merging(), z14.VnD(), z14.VnD()); __ movprfx(z31.VnH(), p2.Merging(), z6.VnH()); __ fsubr(z31.VnH(), p2.Merging(), z31.VnH(), z31.VnH()); __ movprfx(z4, z30); __ ftmad(z4.VnH(), z4.VnH(), z4.VnH(), 2); __ movprfx(z25.VnD(), p6.Zeroing(), z2.VnD()); __ scvtf(z25.VnD(), p6.Merging(), z25.VnS()); __ movprfx(z0.VnD(), p3.Merging(), z16.VnD()); __ scvtf(z0.VnD(), p3.Merging(), z0.VnD()); __ movprfx(z19, z23); __ scvtf(z19.VnS(), p7.Merging(), z19.VnD()); __ movprfx(z19, z4); __ scvtf(z19.VnH(), p4.Merging(), z19.VnD()); __ movprfx(z13.VnD(), p4.Zeroing(), z6.VnD()); __ ucvtf(z13.VnD(), p4.Merging(), z13.VnS()); __ movprfx(z6.VnH(), p0.Zeroing(), z14.VnH()); __ ucvtf(z6.VnH(), p0.Merging(), z6.VnH()); __ movprfx(z19.VnS(), p4.Merging(), z12.VnS()); __ ucvtf(z19.VnH(), p4.Merging(), z19.VnS()); __ movprfx(z0.VnD(), p5.Zeroing(), z12.VnD()); __ ucvtf(z0.VnH(), p5.Merging(), z0.VnD()); __ movprfx(z30, z5); __ fmmla(z30.VnS(), z30.VnS(), z18.VnS()); __ movprfx(z31, z5); __ fmmla(z31.VnD(), z31.VnD(), z18.VnD()); } assm.FinalizeCode(); CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); } TEST(movprfx_negative_instructions) { Assembler assm; assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. static const size_t kPairCount = 13; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z26, z11); __ add(z26.VnB(), z11.VnB(), z4.VnB()); // The merging form can take movprfx, but the zeroing form cannot. __ movprfx(z29.VnB(), p3.Zeroing(), z7.VnB()); __ cpy(z29.VnB(), p3.Zeroing(), -42); // Frecpx can take movprfx, but frecpe and frecps cannot. __ movprfx(z13, z15); __ frecpe(z13.VnD(), z26.VnD()); __ movprfx(z19, z1); __ frecps(z19.VnD(), z1.VnD(), z12.VnD()); __ movprfx(z6, z12); __ frsqrte(z6.VnS(), z12.VnS()); __ movprfx(z29, z5); __ frsqrts(z29.VnH(), z5.VnH(), z20.VnH()); // Ftmad can take movprfx, but ftsmul and ftssel cannot. __ movprfx(z1, z31); __ ftsmul(z1.VnD(), z31.VnD(), z16.VnD()); __ movprfx(z8, z27); __ ftssel(z8.VnH(), z27.VnH(), z1.VnH()); // This looks like a merging unary operation, but it's actually an alias of // sel, which isn't destructive. __ movprfx(z0, z18); __ mov(z0.VnS(), p6.Merging(), z18.VnS()); // The merging form can take movprfx, but the zeroing form cannot. __ movprfx(z12.VnS(), p2.Merging(), z11.VnS()); __ mov(z12.VnS(), p2.Zeroing(), -42); __ movprfx(z13, z6); __ movprfx(z13, z2); // Movprfx can never prefix itself. __ movprfx(z3.VnD(), p5.Zeroing(), z8.VnD()); __ movprfx(z3.VnD(), p5.Merging(), z8.VnD()); __ movprfx(z1.VnD(), p3.Zeroing(), z14.VnD()); __ movprfx(z1.VnD(), p3.Zeroing(), z18.VnD()); } assm.FinalizeCode(); CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); } TEST(movprfx_negative_lane_size) { // Test that CanTakeSVEMovprfx() checks that the (predicated) movprfx lane // size is compatible with the prefixed instruction. Assembler assm; assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. static const size_t kPairCount = 63; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z0.VnH(), p2.Zeroing(), z17.VnH()); __ abs(z0.VnS(), p2.Merging(), z17.VnS()); __ movprfx(z10.VnD(), p0.Zeroing(), z4.VnD()); __ add(z10.VnS(), p0.Merging(), z10.VnS(), z2.VnS()); __ movprfx(z25.VnS(), p4.Zeroing(), z26.VnS()); __ and_(z25.VnB(), p4.Merging(), z25.VnB(), z27.VnB()); __ movprfx(z26.VnD(), p5.Merging(), z23.VnD()); __ asr(z26.VnB(), p5.Merging(), z26.VnB(), 3); __ movprfx(z25.VnS(), p7.Zeroing(), z14.VnS()); __ asr(z25.VnH(), p7.Merging(), z25.VnH(), z14.VnH()); __ movprfx(z12.VnS(), p7.Zeroing(), z23.VnS()); __ asr(z12.VnH(), p7.Merging(), z12.VnH(), z23.VnD()); __ movprfx(z3.VnH(), p4.Zeroing(), z18.VnH()); __ asr(z3.VnD(), p4.Merging(), z3.VnD(), z15.VnD()); __ movprfx(z29.VnH(), p4.Merging(), z31.VnH()); __ asrd(z29.VnB(), p4.Merging(), z29.VnB(), 3); __ movprfx(z31.VnH(), p5.Zeroing(), z14.VnH()); __ asrr(z31.VnB(), p5.Merging(), z31.VnB(), z5.VnB()); __ movprfx(z0.VnS(), p6.Zeroing(), z18.VnS()); __ bic(z0.VnB(), p6.Merging(), z0.VnB(), z23.VnB()); __ movprfx(z19.VnH(), p2.Zeroing(), z24.VnH()); __ cls(z19.VnB(), p2.Merging(), z24.VnB()); __ movprfx(z14.VnS(), p5.Zeroing(), z4.VnS()); __ clz(z14.VnD(), p5.Merging(), z10.VnD()); __ movprfx(z0.VnD(), p5.Merging(), z2.VnD()); __ cnot(z0.VnH(), p5.Merging(), z2.VnH()); __ movprfx(z0.VnB(), p3.Zeroing(), z19.VnB()); __ cnt(z0.VnH(), p3.Merging(), z8.VnH()); __ movprfx(z29.VnS(), p0.Merging(), z7.VnS()); __ cpy(z29.VnD(), p0.Merging(), -42); __ movprfx(z13.VnB(), p2.Merging(), z31.VnB()); __ cpy(z13.VnS(), p2.Merging(), w13); __ movprfx(z0.VnS(), p3.Merging(), z15.VnS()); __ cpy(z0.VnH(), p3.Merging(), h0); __ movprfx(z2.VnD(), p6.Zeroing(), z26.VnD()); __ eor(z2.VnB(), p6.Merging(), z2.VnB(), z26.VnB()); __ movprfx(z7.VnS(), p7.Zeroing(), z30.VnS()); __ lsl(z7.VnD(), p7.Merging(), z7.VnD(), 3); __ movprfx(z11.VnH(), p3.Merging(), z23.VnH()); __ lsl(z11.VnB(), p3.Merging(), z11.VnB(), z21.VnB()); __ movprfx(z31.VnS(), p7.Zeroing(), z21.VnS()); __ lsl(z31.VnH(), p7.Merging(), z31.VnH(), z21.VnD()); __ movprfx(z26.VnH(), p0.Merging(), z0.VnH()); __ lsl(z26.VnD(), p0.Merging(), z26.VnD(), z24.VnD()); __ movprfx(z1.VnS(), p2.Zeroing(), z6.VnS()); __ lslr(z1.VnB(), p2.Merging(), z1.VnB(), z6.VnB()); __ movprfx(z4.VnD(), p4.Zeroing(), z6.VnD()); __ lsr(z4.VnH(), p4.Merging(), z4.VnH(), 3); __ movprfx(z27.VnH(), p0.Zeroing(), z29.VnH()); __ lsr(z27.VnS(), p0.Merging(), z27.VnS(), z29.VnS()); __ movprfx(z5.VnD(), p2.Zeroing(), z16.VnD()); __ lsr(z5.VnH(), p2.Merging(), z5.VnH(), z2.VnD()); __ movprfx(z27.VnB(), p4.Zeroing(), z5.VnB()); __ lsr(z27.VnD(), p4.Merging(), z27.VnD(), z5.VnD()); __ movprfx(z27.VnS(), p3.Merging(), z13.VnS()); __ lsrr(z27.VnD(), p3.Merging(), z27.VnD(), z13.VnD()); __ movprfx(z30.VnS(), p2.Zeroing(), z14.VnS()); __ mad(z30.VnB(), p2.Merging(), z20.VnB(), z14.VnB()); __ movprfx(z14.VnB(), p6.Merging(), z11.VnB()); __ mla(z14.VnD(), p6.Merging(), z28.VnD(), z11.VnD()); __ movprfx(z28.VnH(), p2.Zeroing(), z22.VnH()); __ mls(z28.VnS(), p2.Merging(), z3.VnS(), z22.VnS()); // Aliases of cpy. __ movprfx(z18.VnH(), p6.Zeroing(), z25.VnH()); __ mov(z18.VnD(), p6.Merging(), -42); __ movprfx(z22.VnD(), p2.Zeroing(), z6.VnD()); __ mov(z22.VnS(), p2.Merging(), w22); __ movprfx(z3.VnH(), p0.Zeroing(), z13.VnH()); __ mov(z3.VnB(), p0.Merging(), b0); __ movprfx(z31.VnS(), p7.Zeroing(), z12.VnS()); __ msb(z31.VnH(), p7.Merging(), z14.VnH(), z12.VnH()); __ movprfx(z16.VnS(), p7.Zeroing(), z6.VnS()); __ mul(z16.VnB(), p7.Merging(), z16.VnB(), z30.VnB()); __ movprfx(z17.VnD(), p7.Merging(), z1.VnD()); __ neg(z17.VnB(), p7.Merging(), z1.VnB()); __ movprfx(z31.VnH(), p4.Zeroing(), z12.VnH()); __ not_(z31.VnB(), p4.Merging(), z12.VnB()); __ movprfx(z9.VnH(), p3.Zeroing(), z23.VnH()); __ orr(z9.VnS(), p3.Merging(), z9.VnS(), z13.VnS()); __ movprfx(z25.VnD(), p2.Zeroing(), z21.VnD()); __ rbit(z25.VnS(), p2.Merging(), z21.VnS()); __ movprfx(z26.VnH(), p3.Merging(), z13.VnH()); __ revb(z26.VnD(), p3.Merging(), z13.VnD()); __ movprfx(z8.VnH(), p5.Merging(), z20.VnH()); __ revh(z8.VnS(), p5.Merging(), z0.VnS()); __ movprfx(z22.VnH(), p6.Merging(), z15.VnH()); __ revw(z22.VnD(), p6.Merging(), z10.VnD()); __ movprfx(z1.VnD(), p3.Merging(), z15.VnD()); __ sabd(z1.VnB(), p3.Merging(), z1.VnB(), z15.VnB()); __ movprfx(z25.VnD(), p1.Zeroing(), z30.VnD()); __ sdiv(z25.VnS(), p1.Merging(), z25.VnS(), z30.VnS()); __ movprfx(z19.VnS(), p3.Zeroing(), z11.VnS()); __ sdivr(z19.VnD(), p3.Merging(), z19.VnD(), z24.VnD()); __ movprfx(z12.VnH(), p2.Merging(), z2.VnH()); __ smax(z12.VnS(), p2.Merging(), z12.VnS(), z24.VnS()); __ movprfx(z3.VnD(), p1.Merging(), z15.VnD()); __ smin(z3.VnS(), p1.Merging(), z3.VnS(), z20.VnS()); __ movprfx(z13.VnS(), p5.Merging(), z22.VnS()); __ smulh(z13.VnB(), p5.Merging(), z13.VnB(), z27.VnB()); __ movprfx(z11.VnH(), p5.Zeroing(), z25.VnH()); __ sub(z11.VnB(), p5.Merging(), z11.VnB(), z7.VnB()); __ movprfx(z3.VnB(), p6.Merging(), z13.VnB()); __ subr(z3.VnS(), p6.Merging(), z3.VnS(), z13.VnS()); __ movprfx(z26.VnH(), p5.Merging(), z1.VnH()); __ sxtb(z26.VnS(), p5.Merging(), z17.VnS()); __ movprfx(z11.VnB(), p7.Zeroing(), z26.VnB()); __ sxth(z11.VnS(), p7.Merging(), z26.VnS()); __ movprfx(z1.VnS(), p2.Merging(), z21.VnS()); __ sxtw(z1.VnD(), p2.Merging(), z21.VnD()); __ movprfx(z4.VnS(), p6.Zeroing(), z6.VnS()); __ uabd(z4.VnH(), p6.Merging(), z4.VnH(), z6.VnH()); __ movprfx(z26.VnB(), p2.Zeroing(), z11.VnB()); __ udiv(z26.VnD(), p2.Merging(), z26.VnD(), z11.VnD()); __ movprfx(z19.VnB(), p5.Merging(), z6.VnB()); __ udivr(z19.VnS(), p5.Merging(), z19.VnS(), z9.VnS()); __ movprfx(z16.VnB(), p4.Merging(), z6.VnB()); __ umax(z16.VnH(), p4.Merging(), z16.VnH(), z6.VnH()); __ movprfx(z1.VnD(), p0.Zeroing(), z4.VnD()); __ umin(z1.VnS(), p0.Merging(), z1.VnS(), z28.VnS()); __ movprfx(z25.VnD(), p7.Merging(), z4.VnD()); __ umulh(z25.VnB(), p7.Merging(), z25.VnB(), z16.VnB()); __ movprfx(z29.VnB(), p4.Merging(), z2.VnB()); __ uxtb(z29.VnS(), p4.Merging(), z31.VnS()); __ movprfx(z27.VnH(), p5.Merging(), z21.VnH()); __ uxth(z27.VnD(), p5.Merging(), z1.VnD()); __ movprfx(z29.VnB(), p2.Merging(), z7.VnB()); __ uxtw(z29.VnD(), p2.Merging(), z7.VnD()); } assm.FinalizeCode(); CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); } TEST(movprfx_negative_lane_size_fp) { // Test that CanTakeSVEMovprfx() checks that the (predicated) movprfx lane // size is compatible with the prefixed instruction. Assembler assm; assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. static const size_t kPairCount = 64; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z29.VnD(), p5.Zeroing(), z8.VnD()); __ fabd(z29.VnS(), p5.Merging(), z29.VnS(), z26.VnS()); __ movprfx(z9.VnB(), p0.Zeroing(), z1.VnB()); __ fabs(z9.VnS(), p0.Merging(), z15.VnS()); __ movprfx(z24.VnD(), p0.Zeroing(), z8.VnD()); __ fadd(z24.VnH(), p0.Merging(), z24.VnH(), 0.5); __ movprfx(z24.VnB(), p1.Zeroing(), z27.VnB()); __ fadd(z24.VnH(), p1.Merging(), z24.VnH(), z27.VnH()); __ movprfx(z14.VnH(), p7.Merging(), z12.VnH()); __ fcadd(z14.VnD(), p7.Merging(), z14.VnD(), z12.VnD(), 90); __ movprfx(z10.VnB(), p6.Merging(), z11.VnB()); __ fcpy(z10.VnH(), p6.Merging(), 1.25); __ movprfx(z12.VnB(), p6.Merging(), z18.VnB()); __ fcvt(z12.VnD(), p6.Merging(), z18.VnH()); __ movprfx(z18.VnH(), p7.Zeroing(), z2.VnH()); __ fcvt(z18.VnD(), p7.Merging(), z0.VnS()); __ movprfx(z3.VnH(), p5.Merging(), z14.VnH()); __ fcvt(z3.VnS(), p5.Merging(), z21.VnD()); __ movprfx(z15.VnH(), p1.Zeroing(), z12.VnH()); __ fcvt(z15.VnH(), p1.Merging(), z12.VnD()); __ movprfx(z3.VnH(), p2.Merging(), z22.VnH()); __ fcvtzs(z3.VnD(), p2.Merging(), z7.VnH()); __ movprfx(z17.VnS(), p3.Merging(), z14.VnS()); __ fcvtzs(z17.VnD(), p3.Merging(), z14.VnD()); __ movprfx(z2.VnH(), p1.Zeroing(), z16.VnH()); __ fcvtzs(z2.VnS(), p1.Merging(), z31.VnH()); __ movprfx(z13.VnB(), p2.Merging(), z9.VnB()); __ fcvtzs(z13.VnS(), p2.Merging(), z23.VnD()); __ movprfx(z19.VnB(), p1.Merging(), z4.VnB()); __ fcvtzu(z19.VnD(), p1.Merging(), z14.VnH()); __ movprfx(z29.VnS(), p2.Merging(), z19.VnS()); __ fcvtzu(z29.VnD(), p2.Merging(), z19.VnD()); __ movprfx(z21.VnS(), p4.Zeroing(), z17.VnS()); __ fcvtzu(z21.VnD(), p4.Merging(), z17.VnS()); __ movprfx(z19.VnH(), p4.Zeroing(), z30.VnH()); __ fcvtzu(z19.VnS(), p4.Merging(), z16.VnD()); __ movprfx(z10.VnS(), p7.Zeroing(), z27.VnS()); __ fdiv(z10.VnH(), p7.Merging(), z10.VnH(), z27.VnH()); __ movprfx(z7.VnD(), p7.Zeroing(), z17.VnD()); __ fdivr(z7.VnH(), p7.Merging(), z7.VnH(), z28.VnH()); __ movprfx(z22.VnB(), p0.Merging(), z27.VnB()); __ fmad(z22.VnH(), p0.Merging(), z27.VnH(), z15.VnH()); __ movprfx(z14.VnD(), p1.Zeroing(), z11.VnD()); __ fmax(z14.VnS(), p1.Merging(), z14.VnS(), 0.0); __ movprfx(z27.VnB(), p5.Merging(), z14.VnB()); __ fmax(z27.VnD(), p5.Merging(), z27.VnD(), z14.VnD()); __ movprfx(z31.VnH(), p7.Merging(), z24.VnH()); __ fmaxnm(z31.VnD(), p7.Merging(), z31.VnD(), 0.0); __ movprfx(z11.VnD(), p7.Zeroing(), z25.VnD()); __ fmaxnm(z11.VnS(), p7.Merging(), z11.VnS(), z28.VnS()); __ movprfx(z31.VnD(), p6.Merging(), z19.VnD()); __ fmin(z31.VnH(), p6.Merging(), z31.VnH(), 0.0); __ movprfx(z20.VnS(), p3.Zeroing(), z15.VnS()); __ fmin(z20.VnH(), p3.Merging(), z20.VnH(), z8.VnH()); __ movprfx(z6.VnS(), p0.Merging(), z30.VnS()); __ fminnm(z6.VnH(), p0.Merging(), z6.VnH(), 0.0); __ movprfx(z1.VnH(), p1.Zeroing(), z14.VnH()); __ fminnm(z1.VnS(), p1.Merging(), z1.VnS(), z14.VnS()); __ movprfx(z13.VnB(), p3.Zeroing(), z21.VnB()); __ fmla(z13.VnD(), p3.Merging(), z12.VnD(), z21.VnD()); __ movprfx(z15.VnS(), p1.Zeroing(), z20.VnS()); __ fmls(z15.VnH(), p1.Merging(), z28.VnH(), z20.VnH()); __ movprfx(z19.VnD(), p3.Zeroing(), z31.VnD()); __ fmov(z19.VnH(), p3.Merging(), 0.0); __ movprfx(z16.VnS(), p7.Merging(), z30.VnS()); __ fmov(z16.VnH(), p7.Merging(), 2.5); __ movprfx(z21.VnB(), p1.Merging(), z28.VnB()); __ fmsb(z21.VnH(), p1.Merging(), z30.VnH(), z28.VnH()); __ movprfx(z21.VnS(), p1.Zeroing(), z19.VnS()); __ fmul(z21.VnH(), p1.Merging(), z21.VnH(), 2.0); __ movprfx(z28.VnB(), p7.Zeroing(), z8.VnB()); __ fmul(z28.VnS(), p7.Merging(), z28.VnS(), z26.VnS()); __ movprfx(z2.VnB(), p4.Merging(), z31.VnB()); __ fmulx(z2.VnH(), p4.Merging(), z2.VnH(), z31.VnH()); __ movprfx(z6.VnB(), p2.Zeroing(), z0.VnB()); __ fneg(z6.VnS(), p2.Merging(), z28.VnS()); __ movprfx(z26.VnB(), p0.Zeroing(), z21.VnB()); __ fnmad(z26.VnH(), p0.Merging(), z21.VnH(), z18.VnH()); __ movprfx(z15.VnB(), p1.Zeroing(), z26.VnB()); __ fnmla(z15.VnH(), p1.Merging(), z26.VnH(), z18.VnH()); __ movprfx(z16.VnS(), p0.Merging(), z1.VnS()); __ fnmls(z16.VnD(), p0.Merging(), z1.VnD(), z13.VnD()); __ movprfx(z4.VnH(), p0.Zeroing(), z16.VnH()); __ fnmsb(z4.VnS(), p0.Merging(), z30.VnS(), z3.VnS()); // Note that frecpe and frecps _cannot_ take movprfx. __ movprfx(z9.VnH(), p0.Zeroing(), z21.VnH()); __ frecpx(z9.VnS(), p0.Merging(), z14.VnS()); __ movprfx(z6.VnH(), p2.Zeroing(), z28.VnH()); __ frinta(z6.VnD(), p2.Merging(), z28.VnD()); __ movprfx(z12.VnS(), p4.Zeroing(), z7.VnS()); __ frinti(z12.VnH(), p4.Merging(), z7.VnH()); __ movprfx(z6.VnB(), p5.Merging(), z20.VnB()); __ frintm(z6.VnD(), p5.Merging(), z20.VnD()); __ movprfx(z7.VnB(), p6.Merging(), z19.VnB()); __ frintn(z7.VnH(), p6.Merging(), z11.VnH()); __ movprfx(z12.VnD(), p2.Merging(), z31.VnD()); __ frintp(z12.VnS(), p2.Merging(), z31.VnS()); __ movprfx(z1.VnS(), p5.Merging(), z10.VnS()); __ frintx(z1.VnD(), p5.Merging(), z0.VnD()); __ movprfx(z6.VnH(), p0.Merging(), z12.VnH()); __ frintz(z6.VnS(), p0.Merging(), z7.VnS()); __ movprfx(z8.VnH(), p2.Merging(), z6.VnH()); __ fscale(z8.VnD(), p2.Merging(), z8.VnD(), z6.VnD()); __ movprfx(z20.VnH(), p2.Zeroing(), z2.VnH()); __ fsqrt(z20.VnD(), p2.Merging(), z15.VnD()); __ movprfx(z28.VnS(), p6.Zeroing(), z19.VnS()); __ fsub(z28.VnD(), p6.Merging(), z28.VnD(), 1.0); __ movprfx(z6.VnB(), p0.Zeroing(), z12.VnB()); __ fsub(z6.VnD(), p0.Merging(), z6.VnD(), z20.VnD()); __ movprfx(z6.VnS(), p7.Zeroing(), z11.VnS()); __ fsubr(z6.VnH(), p7.Merging(), z6.VnH(), 1.0); __ movprfx(z28.VnB(), p3.Merging(), z10.VnB()); __ fsubr(z28.VnS(), p3.Merging(), z28.VnS(), z9.VnS()); __ movprfx(z22.VnB(), p3.Zeroing(), z14.VnB()); __ scvtf(z22.VnD(), p3.Merging(), z24.VnS()); __ movprfx(z20.VnS(), p2.Merging(), z9.VnS()); __ scvtf(z20.VnH(), p2.Merging(), z9.VnH()); __ movprfx(z19.VnH(), p1.Merging(), z21.VnH()); __ scvtf(z19.VnS(), p1.Merging(), z6.VnD()); __ movprfx(z31.VnS(), p3.Merging(), z22.VnS()); __ scvtf(z31.VnH(), p3.Merging(), z22.VnD()); __ movprfx(z8.VnS(), p3.Merging(), z3.VnS()); __ ucvtf(z8.VnD(), p3.Merging(), z1.VnS()); __ movprfx(z0.VnB(), p0.Merging(), z23.VnB()); __ ucvtf(z0.VnH(), p0.Merging(), z12.VnH()); __ movprfx(z8.VnH(), p3.Zeroing(), z4.VnH()); __ ucvtf(z8.VnH(), p3.Merging(), z4.VnS()); __ movprfx(z20.VnH(), p2.Zeroing(), z10.VnH()); __ ucvtf(z20.VnH(), p2.Merging(), z11.VnD()); } assm.FinalizeCode(); CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); } TEST(movprfx_negative_predication) { // Test that CanTakeSVEMovprfx() is false when a predicated movprfx appears // before an unpredicated instruction. Assembler assm; assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. static const size_t kPairCount = 60; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z27.VnS(), p1.Zeroing(), z12.VnS()); __ add(z27.VnS(), z27.VnS(), 42); __ movprfx(z31.VnS(), p6.Zeroing(), z1.VnS()); __ and_(z31.VnS(), z31.VnS(), 4); __ movprfx(z27.VnS(), p5.Merging(), z24.VnS()); __ bic(z27.VnS(), z27.VnS(), 4); __ movprfx(z6.VnH(), p7.Merging(), z30.VnH()); __ clasta(z6.VnH(), p7, z6.VnH(), z14.VnH()); __ movprfx(z11.VnB(), p6.Merging(), z5.VnB()); __ clastb(z11.VnB(), p6, z11.VnB(), z29.VnB()); __ movprfx(z5.VnD(), p0.Merging(), z1.VnD()); __ decd(z5.VnD(), SVE_MUL3); __ movprfx(z11.VnH(), p7.Zeroing(), z28.VnH()); __ dech(z11.VnH(), SVE_VL2); __ movprfx(z14.VnS(), p5.Zeroing(), z6.VnS()); __ decp(z14.VnS(), p5); __ movprfx(z6.VnS(), p5.Merging(), z10.VnS()); __ decw(z6.VnS(), SVE_ALL); __ movprfx(z27.VnH(), p7.Zeroing(), z9.VnH()); __ eon(z27.VnH(), z27.VnH(), 4); __ movprfx(z3.VnS(), p3.Zeroing(), z2.VnS()); __ eor(z3.VnS(), z3.VnS(), 4); __ movprfx(z30.VnB(), p2.Zeroing(), z25.VnB()); __ ext(z30.VnB(), z30.VnB(), z25.VnB(), 42); __ movprfx(z22.VnD(), p0.Merging(), z0.VnD()); __ incd(z22.VnD(), SVE_MUL3); __ movprfx(z7.VnH(), p3.Merging(), z3.VnH()); __ inch(z7.VnH(), SVE_VL2); __ movprfx(z9.VnD(), p1.Zeroing(), z28.VnD()); __ incp(z9.VnD(), p1); __ movprfx(z30.VnS(), p3.Merging(), z4.VnS()); __ incw(z30.VnS(), SVE_ALL); __ movprfx(z30.VnB(), p7.Zeroing(), z21.VnB()); __ insr(z30.VnB(), w30); __ movprfx(z2.VnB(), p4.Zeroing(), z26.VnB()); __ insr(z2.VnB(), b0); __ movprfx(z27.VnS(), p5.Zeroing(), z5.VnS()); __ mul(z27.VnS(), z27.VnS(), 42); __ movprfx(z5.VnS(), p0.Merging(), z26.VnS()); __ orn(z5.VnS(), z5.VnS(), 4); __ movprfx(z5.VnS(), p0.Merging(), z26.VnS()); __ orn(z5.VnS(), z5.VnS(), 4); __ movprfx(z16.VnD(), p1.Merging(), z13.VnD()); __ sdot(z16.VnD(), z11.VnH(), z7.VnH(), 1); __ movprfx(z27.VnD(), p5.Merging(), z18.VnD()); __ sdot(z27.VnD(), z18.VnH(), z0.VnH()); __ movprfx(z20.VnS(), p6.Merging(), z1.VnS()); __ sdot(z20.VnS(), z10.VnB(), z1.VnB(), 1); __ movprfx(z19.VnD(), p0.Zeroing(), z7.VnD()); __ smax(z19.VnD(), z19.VnD(), 42); __ movprfx(z15.VnD(), p1.Zeroing(), z7.VnD()); __ smin(z15.VnD(), z15.VnD(), 42); __ movprfx(z15.VnB(), p5.Merging(), z3.VnB()); __ splice(z15.VnB(), p5, z15.VnB(), z3.VnB()); __ movprfx(z5.VnB(), p6.Zeroing(), z4.VnB()); __ sqadd(z5.VnB(), z5.VnB(), 42); __ movprfx(z16.VnD(), p0.Zeroing(), z18.VnD()); __ sqdecd(z16.VnD(), SVE_MUL3); __ movprfx(z7.VnH(), p3.Merging(), z28.VnH()); __ sqdech(z7.VnH(), SVE_VL2); __ movprfx(z7.VnS(), p2.Merging(), z13.VnS()); __ sqdecp(z7.VnS(), p2); __ movprfx(z22.VnS(), p7.Zeroing(), z20.VnS()); __ sqdecw(z22.VnS(), SVE_ALL); __ movprfx(z26.VnD(), p1.Zeroing(), z0.VnD()); __ sqincd(z26.VnD(), SVE_MUL3); __ movprfx(z15.VnH(), p7.Zeroing(), z27.VnH()); __ sqinch(z15.VnH(), SVE_VL2); __ movprfx(z4.VnD(), p7.Merging(), z13.VnD()); __ sqincp(z4.VnD(), p7); __ movprfx(z29.VnS(), p6.Merging(), z14.VnS()); __ sqincw(z29.VnS(), SVE_ALL); __ movprfx(z17.VnB(), p1.Merging(), z24.VnB()); __ sqsub(z17.VnB(), z17.VnB(), 42); __ movprfx(z26.VnS(), p5.Zeroing(), z19.VnS()); __ sub(z26.VnS(), z26.VnS(), 42); __ movprfx(z15.VnD(), p1.Merging(), z3.VnD()); __ subr(z15.VnD(), z15.VnD(), 42); __ movprfx(z4.VnD(), p2.Zeroing(), z14.VnD()); __ udot(z4.VnD(), z15.VnH(), z7.VnH(), 1); __ movprfx(z29.VnD(), p4.Zeroing(), z28.VnD()); __ udot(z29.VnD(), z2.VnH(), z17.VnH()); __ movprfx(z7.VnS(), p6.Merging(), z3.VnS()); __ udot(z7.VnS(), z14.VnB(), z1.VnB(), 1); __ movprfx(z14.VnB(), p3.Merging(), z5.VnB()); __ umax(z14.VnB(), z14.VnB(), 42); __ movprfx(z4.VnD(), p1.Zeroing(), z2.VnD()); __ umin(z4.VnD(), z4.VnD(), 42); __ movprfx(z19.VnB(), p0.Zeroing(), z27.VnB()); __ uqadd(z19.VnB(), z19.VnB(), 42); __ movprfx(z24.VnD(), p7.Zeroing(), z11.VnD()); __ uqdecd(z24.VnD(), SVE_MUL3); __ movprfx(z24.VnH(), p4.Zeroing(), z18.VnH()); __ uqdech(z24.VnH(), SVE_VL2); __ movprfx(z31.VnS(), p5.Zeroing(), z2.VnS()); __ uqdecp(z31.VnS(), p5); __ movprfx(z19.VnS(), p6.Merging(), z21.VnS()); __ uqdecw(z19.VnS(), SVE_ALL); __ movprfx(z27.VnD(), p0.Merging(), z21.VnD()); __ uqincd(z27.VnD(), SVE_MUL3); __ movprfx(z13.VnH(), p4.Zeroing(), z12.VnH()); __ uqinch(z13.VnH(), SVE_VL2); __ movprfx(z0.VnD(), p4.Zeroing(), z1.VnD()); __ uqincp(z0.VnD(), p4); __ movprfx(z12.VnS(), p4.Merging(), z21.VnS()); __ uqincw(z12.VnS(), SVE_ALL); __ movprfx(z9.VnD(), p0.Zeroing(), z16.VnD()); __ uqsub(z9.VnD(), z9.VnD(), 42); __ movprfx(z22.VnS(), p0.Zeroing(), z5.VnS()); __ smmla(z22.VnS(), z21.VnB(), z0.VnB()); __ movprfx(z1.VnS(), p0.Zeroing(), z5.VnS()); __ ummla(z1.VnS(), z10.VnB(), z2.VnB()); __ movprfx(z30.VnS(), p0.Zeroing(), z5.VnS()); __ usmmla(z30.VnS(), z29.VnB(), z18.VnB()); __ movprfx(z4.VnS(), p0.Zeroing(), z5.VnS()); __ usdot(z4.VnS(), z3.VnB(), z4.VnB()); __ movprfx(z10.VnS(), p0.Zeroing(), z5.VnS()); __ usdot(z10.VnS(), z10.VnB(), z0.VnB(), 0); __ movprfx(z1.VnS(), p0.Zeroing(), z5.VnS()); __ sudot(z1.VnS(), z10.VnB(), z1.VnB(), 1); } assm.FinalizeCode(); CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); } TEST(movprfx_negative_predication_fp) { // Test that CanTakeSVEMovprfx() is false when a predicated movprfx appears // before an unpredicated instruction. Assembler assm; assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEF32MM, CPUFeatures::kSVEF64MM); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. static const size_t kPairCount = 11; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z10.VnH(), p3.Zeroing(), z3.VnH()); __ fcmla(z10.VnH(), z22.VnH(), z3.VnH(), 2, 180); __ movprfx(z12.VnS(), p4.Merging(), z14.VnS()); __ fcmla(z12.VnS(), z3.VnS(), z10.VnS(), 1, 270); __ movprfx(z16.VnD(), p3.Zeroing(), z24.VnD()); __ fmla(z16.VnD(), z24.VnD(), z8.VnD(), 1); __ movprfx(z9.VnH(), p7.Zeroing(), z0.VnH()); __ fmla(z9.VnH(), z8.VnH(), z0.VnH(), 7); __ movprfx(z23.VnS(), p5.Merging(), z5.VnS()); __ fmla(z23.VnS(), z7.VnS(), z5.VnS(), 3); __ movprfx(z19.VnD(), p6.Zeroing(), z8.VnD()); __ fmls(z19.VnD(), z27.VnD(), z13.VnD(), 1); __ movprfx(z25.VnH(), p7.Merging(), z24.VnH()); __ fmls(z25.VnH(), z24.VnH(), z4.VnH(), 4); __ movprfx(z2.VnS(), p1.Zeroing(), z0.VnS()); __ fmls(z2.VnS(), z9.VnS(), z0.VnS(), 3); // Note that ftsmul and ftssel cannot take movprfx. __ movprfx(z22.VnD(), p6.Merging(), z16.VnD()); __ ftmad(z22.VnD(), z22.VnD(), z20.VnD(), 2); __ movprfx(z30.VnS(), p0.Zeroing(), z5.VnS()); __ fmmla(z30.VnS(), z29.VnS(), z18.VnS()); __ movprfx(z31.VnD(), p1.Merging(), z5.VnD()); __ fmmla(z31.VnD(), z30.VnD(), z18.VnD()); } assm.FinalizeCode(); CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); } TEST(movprfx_positive) { Assembler assm; assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. static const size_t kPairCount = 123; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z17, z28); __ abs(z17.VnB(), p6.Merging(), z28.VnB()); __ movprfx(z9, z7); __ add(z9.VnB(), p5.Merging(), z9.VnB(), z29.VnB()); __ movprfx(z11, z0); __ add(z11.VnD(), z11.VnD(), 42); __ movprfx(z8.VnS(), p3.Zeroing(), z28.VnS()); __ and_(z8.VnS(), p3.Merging(), z8.VnS(), z31.VnS()); __ movprfx(z20, z23); __ and_(z20.VnS(), z20.VnS(), 4); __ movprfx(z24.VnD(), p5.Merging(), z11.VnD()); __ asr(z24.VnD(), p5.Merging(), z24.VnD(), 3); __ movprfx(z1, z13); __ asr(z1.VnH(), p3.Merging(), z1.VnH(), z4.VnH()); __ movprfx(z0.VnB(), p7.Zeroing(), z28.VnB()); __ asr(z0.VnB(), p7.Merging(), z0.VnB(), z28.VnD()); __ movprfx(z15, z5); __ asr(z15.VnD(), p3.Merging(), z15.VnD(), z5.VnD()); __ movprfx(z24.VnH(), p3.Merging(), z22.VnH()); __ asrd(z24.VnH(), p3.Merging(), z24.VnH(), 3); __ movprfx(z2.VnS(), p3.Zeroing(), z20.VnS()); __ asrr(z2.VnS(), p3.Merging(), z2.VnS(), z15.VnS()); __ movprfx(z17.VnB(), p7.Merging(), z6.VnB()); __ bic(z17.VnB(), p7.Merging(), z17.VnB(), z25.VnB()); __ movprfx(z31, z6); __ bic(z31.VnD(), z31.VnD(), 4); __ movprfx(z20, z2); __ clasta(z20.VnB(), p4, z20.VnB(), z15.VnB()); __ movprfx(z27, z11); __ clastb(z27.VnB(), p5, z27.VnB(), z6.VnB()); __ movprfx(z3.VnS(), p7.Zeroing(), z17.VnS()); __ cls(z3.VnS(), p7.Merging(), z0.VnS()); __ movprfx(z29.VnB(), p0.Zeroing(), z24.VnB()); __ clz(z29.VnB(), p0.Merging(), z7.VnB()); __ movprfx(z2.VnH(), p7.Zeroing(), z29.VnH()); __ cnot(z2.VnH(), p7.Merging(), z28.VnH()); __ movprfx(z23, z5); __ cnt(z23.VnH(), p0.Merging(), z12.VnH()); __ movprfx(z5, z3); __ cpy(z5.VnD(), p1.Merging(), -42); __ movprfx(z0, z12); __ cpy(z0.VnB(), p1.Merging(), w0); __ movprfx(z27, z8); __ cpy(z27.VnB(), p0.Merging(), b0); __ movprfx(z20, z24); __ decd(z20.VnD(), SVE_MUL3); __ movprfx(z5, z28); __ dech(z5.VnH(), SVE_VL2); __ movprfx(z7, z3); __ decp(z7.VnD(), p2); __ movprfx(z4, z7); __ decw(z4.VnS(), SVE_ALL); __ movprfx(z3, z18); __ eon(z3.VnS(), z3.VnS(), 4); __ movprfx(z4.VnD(), p0.Merging(), z10.VnD()); __ eor(z4.VnD(), p0.Merging(), z4.VnD(), z10.VnD()); __ movprfx(z15, z18); __ eor(z15.VnH(), z15.VnH(), 4); __ movprfx(z17, z30); __ ext(z17.VnB(), z17.VnB(), z18.VnB(), 2); __ movprfx(z19, z28); __ incd(z19.VnD(), SVE_MUL3); __ movprfx(z13, z7); __ inch(z13.VnH(), SVE_VL2); __ movprfx(z14, z21); __ incp(z14.VnD(), p1); __ movprfx(z26, z12); __ incw(z26.VnS(), SVE_ALL); __ movprfx(z16, z2); __ insr(z16.VnB(), w16); __ movprfx(z20, z26); __ insr(z20.VnB(), b0); __ movprfx(z30.VnD(), p0.Merging(), z23.VnD()); __ lsl(z30.VnD(), p0.Merging(), z30.VnD(), 3); __ movprfx(z28.VnS(), p2.Zeroing(), z6.VnS()); __ lsl(z28.VnS(), p2.Merging(), z28.VnS(), z6.VnS()); __ movprfx(z15.VnH(), p6.Zeroing(), z3.VnH()); __ lsl(z15.VnH(), p6.Merging(), z15.VnH(), z3.VnD()); __ movprfx(z13.VnD(), p4.Zeroing(), z14.VnD()); __ lsl(z13.VnD(), p4.Merging(), z13.VnD(), z25.VnD()); __ movprfx(z14, z5); __ lslr(z14.VnS(), p0.Merging(), z14.VnS(), z17.VnS()); __ movprfx(z21, z1); __ lsr(z21.VnH(), p5.Merging(), z21.VnH(), 3); __ movprfx(z11.VnH(), p0.Zeroing(), z13.VnH()); __ lsr(z11.VnH(), p0.Merging(), z11.VnH(), z9.VnH()); __ movprfx(z24, z29); __ lsr(z24.VnS(), p4.Merging(), z24.VnS(), z1.VnD()); __ movprfx(z1.VnD(), p6.Merging(), z9.VnD()); __ lsr(z1.VnD(), p6.Merging(), z1.VnD(), z9.VnD()); __ movprfx(z22, z3); __ lsrr(z22.VnB(), p3.Merging(), z22.VnB(), z3.VnB()); __ movprfx(z24.VnB(), p2.Zeroing(), z5.VnB()); __ mad(z24.VnB(), p2.Merging(), z5.VnB(), z10.VnB()); __ movprfx(z8, z4); __ mla(z8.VnS(), p6.Merging(), z4.VnS(), z26.VnS()); __ movprfx(z10, z8); __ mls(z10.VnS(), p4.Merging(), z23.VnS(), z16.VnS()); // Aliases of cpy. __ movprfx(z4.VnH(), p5.Zeroing(), z2.VnH()); __ mov(z4.VnH(), p5.Merging(), -42); __ movprfx(z2.VnB(), p3.Zeroing(), z24.VnB()); __ mov(z2.VnB(), p3.Merging(), w2); __ movprfx(z27, z13); __ mov(z27.VnD(), p3.Merging(), d0); __ movprfx(z18.VnB(), p5.Zeroing(), z11.VnB()); __ msb(z18.VnB(), p5.Merging(), z3.VnB(), z11.VnB()); __ movprfx(z29, z16); __ mul(z29.VnS(), p6.Merging(), z29.VnS(), z9.VnS()); __ movprfx(z21, z23); __ mul(z21.VnH(), z21.VnH(), 42); __ movprfx(z7.VnS(), p4.Merging(), z14.VnS()); __ neg(z7.VnS(), p4.Merging(), z14.VnS()); __ movprfx(z8.VnD(), p4.Zeroing(), z5.VnD()); __ not_(z8.VnD(), p4.Merging(), z5.VnD()); __ movprfx(z14, z13); __ orn(z14.VnS(), z14.VnS(), 4); __ movprfx(z14, z13); __ orn(z14.VnS(), z14.VnS(), 4); __ movprfx(z27, z17); __ orr(z27.VnD(), p2.Merging(), z27.VnD(), z17.VnD()); __ movprfx(z13.VnH(), p2.Zeroing(), z27.VnH()); __ rbit(z13.VnH(), p2.Merging(), z1.VnH()); __ movprfx(z1, z29); __ revb(z1.VnS(), p4.Merging(), z6.VnS()); __ movprfx(z18.VnD(), p2.Zeroing(), z10.VnD()); __ revh(z18.VnD(), p2.Merging(), z16.VnD()); __ movprfx(z2.VnD(), p1.Merging(), z10.VnD()); __ revw(z2.VnD(), p1.Merging(), z1.VnD()); __ movprfx(z28.VnS(), p7.Merging(), z11.VnS()); __ sabd(z28.VnS(), p7.Merging(), z28.VnS(), z11.VnS()); __ movprfx(z22.VnS(), p0.Merging(), z20.VnS()); __ sdiv(z22.VnS(), p0.Merging(), z22.VnS(), z6.VnS()); __ movprfx(z13.VnS(), p7.Merging(), z0.VnS()); __ sdivr(z13.VnS(), p7.Merging(), z13.VnS(), z2.VnS()); __ movprfx(z0, z12); __ sdot(z0.VnD(), z10.VnH(), z12.VnH(), 1); __ movprfx(z8, z15); __ sdot(z8.VnS(), z15.VnB(), z12.VnB()); __ movprfx(z13, z0); __ sdot(z13.VnS(), z10.VnB(), z0.VnB(), 1); __ movprfx(z11, z13); __ smax(z11.VnB(), p5.Merging(), z11.VnB(), z24.VnB()); __ movprfx(z3, z17); __ smax(z3.VnD(), z3.VnD(), 42); __ movprfx(z10, z29); __ smin(z10.VnD(), p4.Merging(), z10.VnD(), z29.VnD()); __ movprfx(z13, z29); __ smin(z13.VnD(), z13.VnD(), 42); __ movprfx(z6, z17); __ smulh(z6.VnS(), p7.Merging(), z6.VnS(), z31.VnS()); __ movprfx(z19, z20); __ splice(z19.VnB(), p3, z19.VnB(), z20.VnB()); __ movprfx(z0, z3); __ sqadd(z0.VnD(), z0.VnD(), 42); __ movprfx(z29, z5); __ sqdecd(z29.VnD(), SVE_MUL3); __ movprfx(z25, z11); __ sqdech(z25.VnH(), SVE_VL2); __ movprfx(z16, z9); __ sqdecp(z16.VnS(), p1); __ movprfx(z8, z17); __ sqdecw(z8.VnS(), SVE_ALL); __ movprfx(z4, z5); __ sqincd(z4.VnD(), SVE_MUL3); __ movprfx(z0, z17); __ sqinch(z0.VnH(), SVE_VL2); __ movprfx(z7, z27); __ sqincp(z7.VnS(), p6); __ movprfx(z10, z9); __ sqincw(z10.VnS(), SVE_ALL); __ movprfx(z31, z22); __ sqsub(z31.VnB(), z31.VnB(), 42); __ movprfx(z12.VnH(), p7.Zeroing(), z23.VnH()); __ sub(z12.VnH(), p7.Merging(), z12.VnH(), z23.VnH()); __ movprfx(z10, z1); __ sub(z10.VnH(), z10.VnH(), 42); __ movprfx(z15.VnB(), p0.Merging(), z0.VnB()); __ subr(z15.VnB(), p0.Merging(), z15.VnB(), z0.VnB()); __ movprfx(z17, z2); __ subr(z17.VnH(), z17.VnH(), 42); __ movprfx(z5, z3); __ sxtb(z5.VnD(), p6.Merging(), z20.VnD()); __ movprfx(z11, z17); __ sxth(z11.VnD(), p6.Merging(), z25.VnD()); __ movprfx(z26, z4); __ sxtw(z26.VnD(), p5.Merging(), z4.VnD()); __ movprfx(z15.VnD(), p0.Zeroing(), z8.VnD()); __ uabd(z15.VnD(), p0.Merging(), z15.VnD(), z20.VnD()); __ movprfx(z21, z24); __ udiv(z21.VnD(), p3.Merging(), z21.VnD(), z24.VnD()); __ movprfx(z22, z10); __ udivr(z22.VnD(), p7.Merging(), z22.VnD(), z27.VnD()); __ movprfx(z27, z25); __ udot(z27.VnD(), z29.VnH(), z3.VnH(), 1); __ movprfx(z29, z10); __ udot(z29.VnS(), z10.VnB(), z21.VnB()); __ movprfx(z18, z0); __ udot(z18.VnS(), z14.VnB(), z0.VnB(), 1); __ movprfx(z6, z30); __ umax(z6.VnS(), p2.Merging(), z6.VnS(), z27.VnS()); __ movprfx(z31, z17); __ umax(z31.VnD(), z31.VnD(), 42); __ movprfx(z27.VnS(), p0.Merging(), z20.VnS()); __ umin(z27.VnS(), p0.Merging(), z27.VnS(), z8.VnS()); __ movprfx(z0, z11); __ umin(z0.VnH(), z0.VnH(), 42); __ movprfx(z21, z17); __ umulh(z21.VnB(), p0.Merging(), z21.VnB(), z30.VnB()); __ movprfx(z9, z24); __ uqadd(z9.VnD(), z9.VnD(), 42); __ movprfx(z18, z13); __ uqdecd(z18.VnD(), SVE_MUL3); __ movprfx(z20, z23); __ uqdech(z20.VnH(), SVE_VL2); __ movprfx(z12, z29); __ uqdecp(z12.VnS(), p7); __ movprfx(z24, z25); __ uqdecw(z24.VnS(), SVE_ALL); __ movprfx(z13, z1); __ uqincd(z13.VnD(), SVE_MUL3); __ movprfx(z5, z19); __ uqinch(z5.VnH(), SVE_VL2); __ movprfx(z6, z25); __ uqincp(z6.VnS(), p5); __ movprfx(z12, z14); __ uqincw(z12.VnS(), SVE_ALL); __ movprfx(z13, z6); __ uqsub(z13.VnH(), z13.VnH(), 42); __ movprfx(z31, z3); __ uxtb(z31.VnS(), p0.Merging(), z3.VnS()); __ movprfx(z18.VnD(), p4.Merging(), z25.VnD()); __ uxth(z18.VnD(), p4.Merging(), z25.VnD()); __ movprfx(z18.VnD(), p7.Merging(), z25.VnD()); __ uxtw(z18.VnD(), p7.Merging(), z25.VnD()); __ movprfx(z22, z5); __ smmla(z22.VnS(), z21.VnB(), z0.VnB()); __ movprfx(z1, z5); __ ummla(z1.VnS(), z10.VnB(), z0.VnB()); __ movprfx(z30, z5); __ usmmla(z30.VnS(), z31.VnB(), z18.VnB()); __ movprfx(z4, z5); __ usdot(z4.VnS(), z3.VnB(), z3.VnB()); __ movprfx(z10, z5); __ usdot(z10.VnS(), z9.VnB(), z0.VnB(), 0); __ movprfx(z1, z5); __ sudot(z1.VnS(), z10.VnB(), z2.VnB(), 1); } assm.FinalizeCode(); CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), true); } TEST(movprfx_positive_fp) { Assembler assm; assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEF32MM, CPUFeatures::kSVEF64MM); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. static const size_t kPairCount = 75; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z18.VnS(), p6.Zeroing(), z20.VnS()); __ fabd(z18.VnS(), p6.Merging(), z18.VnS(), z19.VnS()); __ movprfx(z28.VnD(), p4.Zeroing(), z24.VnD()); __ fabs(z28.VnD(), p4.Merging(), z24.VnD()); __ movprfx(z12, z8); __ fadd(z12.VnS(), p2.Merging(), z12.VnS(), 0.5); __ movprfx(z0.VnS(), p1.Merging(), z9.VnS()); __ fadd(z0.VnS(), p1.Merging(), z0.VnS(), z9.VnS()); __ movprfx(z10.VnH(), p2.Merging(), z2.VnH()); __ fcadd(z10.VnH(), p2.Merging(), z10.VnH(), z20.VnH(), 90); __ movprfx(z21, z6); __ fcmla(z21.VnH(), z31.VnH(), z6.VnH(), 2, 180); __ movprfx(z16, z6); __ fcmla(z16.VnS(), z11.VnS(), z6.VnS(), 1, 270); __ movprfx(z15.VnH(), p6.Merging(), z16.VnH()); __ fcpy(z15.VnH(), p6.Merging(), 1.25); __ movprfx(z1, z14); __ fcvt(z1.VnD(), p2.Merging(), z4.VnH()); __ movprfx(z25.VnD(), p6.Merging(), z1.VnD()); __ fcvt(z25.VnD(), p6.Merging(), z1.VnS()); __ movprfx(z18.VnS(), p2.Merging(), z2.VnS()); __ fcvt(z18.VnH(), p2.Merging(), z7.VnS()); __ movprfx(z21.VnD(), p5.Zeroing(), z26.VnD()); __ fcvt(z21.VnH(), p5.Merging(), z26.VnD()); __ movprfx(z12.VnD(), p1.Merging(), z18.VnD()); __ fcvtzs(z12.VnD(), p1.Merging(), z18.VnH()); __ movprfx(z3.VnS(), p2.Merging(), z0.VnS()); __ fcvtzs(z3.VnS(), p2.Merging(), z26.VnS()); __ movprfx(z21.VnS(), p4.Merging(), z7.VnS()); __ fcvtzs(z21.VnS(), p4.Merging(), z7.VnH()); __ movprfx(z16.VnD(), p3.Zeroing(), z4.VnD()); __ fcvtzs(z16.VnS(), p3.Merging(), z28.VnD()); __ movprfx(z31.VnD(), p4.Merging(), z1.VnD()); __ fcvtzu(z31.VnD(), p4.Merging(), z1.VnH()); __ movprfx(z23.VnH(), p0.Zeroing(), z28.VnH()); __ fcvtzu(z23.VnH(), p0.Merging(), z28.VnH()); __ movprfx(z2, z12); __ fcvtzu(z2.VnD(), p3.Merging(), z28.VnS()); __ movprfx(z4, z7); __ fcvtzu(z4.VnS(), p7.Merging(), z16.VnD()); __ movprfx(z13.VnS(), p3.Zeroing(), z23.VnS()); __ fdiv(z13.VnS(), p3.Merging(), z13.VnS(), z23.VnS()); __ movprfx(z6.VnD(), p1.Zeroing(), z16.VnD()); __ fdivr(z6.VnD(), p1.Merging(), z6.VnD(), z5.VnD()); __ movprfx(z31, z23); __ fmad(z31.VnS(), p5.Merging(), z23.VnS(), z11.VnS()); __ movprfx(z14.VnH(), p7.Merging(), z21.VnH()); __ fmax(z14.VnH(), p7.Merging(), z14.VnH(), 0.0); __ movprfx(z17.VnS(), p4.Merging(), z9.VnS()); __ fmax(z17.VnS(), p4.Merging(), z17.VnS(), z9.VnS()); __ movprfx(z1.VnS(), p3.Zeroing(), z30.VnS()); __ fmaxnm(z1.VnS(), p3.Merging(), z1.VnS(), 0.0); __ movprfx(z10.VnD(), p1.Zeroing(), z17.VnD()); __ fmaxnm(z10.VnD(), p1.Merging(), z10.VnD(), z17.VnD()); __ movprfx(z3, z13); __ fmin(z3.VnS(), p0.Merging(), z3.VnS(), 0.0); __ movprfx(z15, z21); __ fmin(z15.VnS(), p4.Merging(), z15.VnS(), z21.VnS()); __ movprfx(z30.VnH(), p7.Zeroing(), z25.VnH()); __ fminnm(z30.VnH(), p7.Merging(), z30.VnH(), 0.0); __ movprfx(z31, z15); __ fminnm(z31.VnD(), p5.Merging(), z31.VnD(), z25.VnD()); __ movprfx(z27, z28); __ fmla(z27.VnD(), z28.VnD(), z12.VnD(), 1); __ movprfx(z26.VnH(), p6.Zeroing(), z13.VnH()); __ fmla(z26.VnH(), p6.Merging(), z13.VnH(), z7.VnH()); __ movprfx(z26, z10); __ fmla(z26.VnH(), z10.VnH(), z1.VnH(), 7); __ movprfx(z0, z1); __ fmla(z0.VnS(), z25.VnS(), z1.VnS(), 3); __ movprfx(z7, z3); __ fmls(z7.VnD(), z30.VnD(), z3.VnD(), 1); __ movprfx(z1, z24); __ fmls(z1.VnD(), p5.Merging(), z20.VnD(), z24.VnD()); __ movprfx(z19, z18); __ fmls(z19.VnH(), z18.VnH(), z7.VnH(), 4); __ movprfx(z0, z26); __ fmls(z0.VnS(), z17.VnS(), z4.VnS(), 3); __ movprfx(z19.VnS(), p7.Zeroing(), z6.VnS()); __ fmov(z19.VnS(), p7.Merging(), 0.0); __ movprfx(z21, z15); __ fmov(z21.VnH(), p7.Merging(), 2.5); __ movprfx(z23, z18); __ fmsb(z23.VnS(), p4.Merging(), z1.VnS(), z7.VnS()); __ movprfx(z8, z28); __ fmul(z8.VnS(), p4.Merging(), z8.VnS(), 2.0); __ movprfx(z6.VnD(), p6.Merging(), z27.VnD()); __ fmul(z6.VnD(), p6.Merging(), z6.VnD(), z27.VnD()); __ movprfx(z6.VnH(), p0.Merging(), z19.VnH()); __ fmulx(z6.VnH(), p0.Merging(), z6.VnH(), z19.VnH()); __ movprfx(z5.VnH(), p0.Merging(), z1.VnH()); __ fneg(z5.VnH(), p0.Merging(), z1.VnH()); __ movprfx(z22.VnD(), p4.Zeroing(), z24.VnD()); __ fnmad(z22.VnD(), p4.Merging(), z24.VnD(), z12.VnD()); __ movprfx(z5.VnS(), p0.Merging(), z29.VnS()); __ fnmla(z5.VnS(), p0.Merging(), z17.VnS(), z29.VnS()); __ movprfx(z5, z3); __ fnmls(z5.VnD(), p5.Merging(), z3.VnD(), z2.VnD()); __ movprfx(z9.VnD(), p2.Zeroing(), z7.VnD()); __ fnmsb(z9.VnD(), p2.Merging(), z7.VnD(), z23.VnD()); // Note that frecpe and frecps _cannot_ take movprfx. __ movprfx(z12.VnH(), p1.Zeroing(), z17.VnH()); __ frecpx(z12.VnH(), p1.Merging(), z4.VnH()); __ movprfx(z28.VnS(), p4.Zeroing(), z27.VnS()); __ frinta(z28.VnS(), p4.Merging(), z24.VnS()); __ movprfx(z7.VnD(), p7.Merging(), z25.VnD()); __ frinti(z7.VnD(), p7.Merging(), z25.VnD()); __ movprfx(z10, z21); __ frintm(z10.VnD(), p5.Merging(), z26.VnD()); __ movprfx(z25, z21); __ frintn(z25.VnH(), p4.Merging(), z1.VnH()); __ movprfx(z25, z9); __ frintp(z25.VnH(), p1.Merging(), z9.VnH()); __ movprfx(z30, z16); __ frintx(z30.VnS(), p1.Merging(), z16.VnS()); __ movprfx(z0.VnD(), p5.Merging(), z9.VnD()); __ frintz(z0.VnD(), p5.Merging(), z23.VnD()); __ movprfx(z11.VnD(), p7.Merging(), z2.VnD()); __ fscale(z11.VnD(), p7.Merging(), z11.VnD(), z2.VnD()); __ movprfx(z23.VnS(), p4.Merging(), z17.VnS()); __ fsqrt(z23.VnS(), p4.Merging(), z10.VnS()); __ movprfx(z0.VnD(), p2.Merging(), z26.VnD()); __ fsub(z0.VnD(), p2.Merging(), z0.VnD(), 1.0); __ movprfx(z28.VnD(), p1.Zeroing(), z16.VnD()); __ fsub(z28.VnD(), p1.Merging(), z28.VnD(), z16.VnD()); __ movprfx(z22, z27); __ fsubr(z22.VnD(), p4.Merging(), z22.VnD(), 1.0); __ movprfx(z4.VnS(), p2.Merging(), z26.VnS()); __ fsubr(z4.VnS(), p2.Merging(), z4.VnS(), z26.VnS()); // Note that ftsmul and ftssel _cannot_ take movprfx. __ movprfx(z10, z4); __ ftmad(z10.VnS(), z10.VnS(), z4.VnS(), 2); __ movprfx(z2, z16); __ scvtf(z2.VnD(), p1.Merging(), z16.VnS()); __ movprfx(z10, z20); __ scvtf(z10.VnD(), p5.Merging(), z20.VnD()); __ movprfx(z29, z28); __ scvtf(z29.VnS(), p0.Merging(), z31.VnD()); __ movprfx(z26.VnD(), p3.Merging(), z13.VnD()); __ scvtf(z26.VnH(), p3.Merging(), z5.VnD()); __ movprfx(z7.VnD(), p3.Zeroing(), z26.VnD()); __ ucvtf(z7.VnD(), p3.Merging(), z26.VnS()); __ movprfx(z13, z17); __ ucvtf(z13.VnD(), p7.Merging(), z17.VnD()); __ movprfx(z24.VnD(), p1.Merging(), z31.VnD()); __ ucvtf(z24.VnS(), p1.Merging(), z18.VnD()); __ movprfx(z17.VnD(), p4.Merging(), z22.VnD()); __ ucvtf(z17.VnH(), p4.Merging(), z4.VnD()); __ movprfx(z30, z5); __ fmmla(z30.VnS(), z29.VnS(), z18.VnS()); __ movprfx(z31, z5); __ fmmla(z31.VnD(), z30.VnD(), z18.VnD()); } assm.FinalizeCode(); CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), true); } TEST(movprfx_positive_sve2) { Assembler assm; assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. static const size_t kPairCount = 145; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z25, z26); __ adclb(z25.VnS(), z17.VnS(), z24.VnS()); __ movprfx(z0, z1); __ adclt(z0.VnS(), z2.VnS(), z15.VnS()); __ movprfx(z3, z4); __ addp(z3.VnB(), p1.Merging(), z3.VnB(), z0.VnB()); __ movprfx(z6, z7); __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD()); __ movprfx(z18, z19); __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD()); __ movprfx(z7, z8); __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD()); __ movprfx(z21, z22); __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD()); __ movprfx(z5, z6); __ cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90); __ movprfx(z7, z8); __ cdot(z7.VnS(), z4.VnB(), z10.VnB(), 0); __ movprfx(z7, z8); __ cdot(z7.VnS(), z4.VnB(), z0.VnB(), 0, 0); __ movprfx(z7, z8); __ cdot(z7.VnD(), z4.VnH(), z0.VnH(), 0, 0); __ movprfx(z19, z20); __ cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0); __ movprfx(z19, z20); __ cmla(z19.VnS(), z7.VnS(), z2.VnS(), 0, 0); __ movprfx(z19, z20); __ cmla(z19.VnH(), z7.VnH(), z2.VnH(), 0, 0); __ movprfx(z10, z11); __ eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD()); __ movprfx(z3, z4); __ eorbt(z3.VnB(), z10.VnB(), z8.VnB()); __ movprfx(z20, z22); __ eortb(z20.VnB(), z21.VnB(), z15.VnB()); __ movprfx(z14, z15); __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD()); __ movprfx(z14.VnD(), p4.Merging(), z15.VnD()); __ fcvtx(z14.VnS(), p4.Merging(), z0.VnD()); __ movprfx(z15.VnH(), p0.Merging(), z16.VnH()); __ flogb(z15.VnH(), p0.Merging(), z3.VnH()); __ movprfx(z2, z3); __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD()); __ movprfx(z22, z23); __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD()); __ movprfx(z1, z2); __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD()); __ movprfx(z16, z17); __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD()); __ movprfx(z16, z17); __ fmlalb(z16.VnS(), z18.VnH(), z29.VnH()); __ movprfx(z16, z17); __ fmlalb(z16.VnS(), z18.VnH(), z2.VnH(), 0); __ movprfx(z18, z19); __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH()); __ movprfx(z18, z19); __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH(), 0); __ movprfx(z16, z17); __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH()); __ movprfx(z16, z17); __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH(), 0); __ movprfx(z3, z4); __ fmlslt(z3.VnS(), z17.VnH(), z14.VnH()); __ movprfx(z3, z4); __ fmlslt(z3.VnS(), z17.VnH(), z1.VnH(), 0); __ movprfx(z2, z3); __ mla(z2.VnH(), z0.VnH(), z1.VnH(), 0); __ movprfx(z2, z3); __ mla(z2.VnS(), z0.VnS(), z1.VnS(), 0); __ movprfx(z2, z3); __ mla(z2.VnD(), z0.VnD(), z1.VnD(), 0); __ movprfx(z2, z3); __ mls(z2.VnH(), z0.VnH(), z1.VnH(), 0); __ movprfx(z2, z3); __ mls(z2.VnS(), z0.VnS(), z1.VnS(), 0); __ movprfx(z2, z3); __ mls(z2.VnD(), z0.VnD(), z1.VnD(), 0); __ movprfx(z17, z18); __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD()); __ movprfx(z13, z14); __ saba(z13.VnB(), z2.VnB(), z31.VnB()); __ movprfx(z13, z14); __ sabalb(z13.VnD(), z20.VnS(), z26.VnS()); __ movprfx(z14, z15); __ sabalt(z14.VnD(), z19.VnS(), z10.VnS()); __ movprfx(z19.VnD(), p5.Merging(), z20.VnD()); __ sadalp(z19.VnD(), p5.Merging(), z9.VnS()); __ movprfx(z17, z18); __ sbclb(z17.VnS(), z10.VnS(), z8.VnS()); __ movprfx(z20, z21); __ sbclt(z20.VnS(), z0.VnS(), z13.VnS()); __ movprfx(z20.VnB(), p3.Merging(), z21.VnB()); __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB()); __ movprfx(z21.VnB(), p0.Merging(), z22.VnB()); __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB()); __ movprfx(z1.VnB(), p0.Merging(), z2.VnB()); __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB()); __ movprfx(z5, z6); __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB()); __ movprfx(z27, z28); __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB()); __ movprfx(z1, z2); __ smlalb(z1.VnD(), z3.VnS(), z23.VnS()); __ movprfx(z1, z2); __ smlalb(z1.VnD(), z3.VnS(), z2.VnS(), 0); __ movprfx(z1, z2); __ smlalb(z1.VnS(), z3.VnH(), z2.VnH(), 0); __ movprfx(z1, z2); __ smlalt(z1.VnD(), z3.VnS(), z23.VnS()); __ movprfx(z1, z2); __ smlalt(z1.VnD(), z3.VnS(), z2.VnS(), 0); __ movprfx(z1, z2); __ smlalt(z1.VnS(), z3.VnH(), z2.VnH(), 0); __ movprfx(z1, z2); __ smlslb(z1.VnD(), z3.VnS(), z23.VnS()); __ movprfx(z1, z2); __ smlslb(z1.VnD(), z3.VnS(), z2.VnS(), 0); __ movprfx(z1, z2); __ smlslb(z1.VnS(), z3.VnH(), z2.VnH(), 0); __ movprfx(z1, z2); __ smlslt(z1.VnD(), z3.VnS(), z23.VnS()); __ movprfx(z1, z2); __ smlslt(z1.VnD(), z3.VnS(), z2.VnS(), 0); __ movprfx(z1, z2); __ smlslt(z1.VnS(), z3.VnH(), z2.VnH(), 0); __ movprfx(z29.VnB(), p1.Merging(), z30.VnB()); __ sqabs(z29.VnB(), p1.Merging(), z18.VnB()); __ movprfx(z28.VnB(), p0.Merging(), z29.VnB()); __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB()); __ movprfx(z20, z21); __ sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90); __ movprfx(z6, z7); __ sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS()); __ movprfx(z6, z7); __ sqdmlalb(z6.VnD(), z19.VnS(), z2.VnS(), 0); __ movprfx(z6, z7); __ sqdmlalb(z6.VnS(), z19.VnH(), z2.VnH(), 0); __ movprfx(z23, z24); __ sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS()); __ movprfx(z11, z12); __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS()); __ movprfx(z11, z12); __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS(), 0); __ movprfx(z11, z12); __ sqdmlalt(z11.VnS(), z0.VnH(), z0.VnH(), 0); __ movprfx(z16, z17); __ sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS()); __ movprfx(z16, z17); __ sqdmlslb(z16.VnD(), z26.VnS(), z2.VnS(), 0); __ movprfx(z16, z17); __ sqdmlslb(z16.VnS(), z26.VnH(), z2.VnH(), 0); __ movprfx(z26, z27); __ sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS()); __ movprfx(z21, z22); __ sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS()); __ movprfx(z21, z22); __ sqdmlslt(z21.VnD(), z23.VnS(), z0.VnS(), 0); __ movprfx(z21, z22); __ sqdmlslt(z21.VnS(), z23.VnH(), z0.VnH(), 0); __ movprfx(z21.VnB(), p0.Merging(), z22.VnB()); __ sqneg(z21.VnB(), p0.Merging(), z17.VnB()); __ movprfx(z31, z0); __ sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0); __ movprfx(z31, z0); __ sqrdcmlah(z31.VnH(), z15.VnH(), z2.VnH(), 0, 0); __ movprfx(z31, z0); __ sqrdcmlah(z31.VnS(), z15.VnS(), z2.VnS(), 0, 0); __ movprfx(z27, z28); __ sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB()); __ movprfx(z27, z28); __ sqrdmlah(z27.VnH(), z28.VnH(), z1.VnH(), 0); __ movprfx(z27, z28); __ sqrdmlah(z27.VnS(), z28.VnS(), z1.VnS(), 0); __ movprfx(z27, z28); __ sqrdmlah(z27.VnD(), z28.VnD(), z1.VnD(), 0); __ movprfx(z11, z12); __ sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB()); __ movprfx(z11, z12); __ sqrdmlsh(z11.VnH(), z16.VnH(), z1.VnH(), 0); __ movprfx(z11, z12); __ sqrdmlsh(z11.VnS(), z16.VnS(), z1.VnS(), 0); __ movprfx(z11, z12); __ sqrdmlsh(z11.VnD(), z16.VnD(), z1.VnD(), 0); __ movprfx(z31.VnB(), p5.Merging(), z0.VnB()); __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB()); __ movprfx(z25.VnB(), p6.Merging(), z26.VnB()); __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB()); __ movprfx(z0.VnB(), p5.Merging(), z1.VnB()); __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0); __ movprfx(z0.VnB(), p5.Merging(), z1.VnB()); __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z2.VnB()); __ movprfx(z7.VnB(), p3.Merging(), z8.VnB()); __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB()); __ movprfx(z10.VnB(), p1.Merging(), z11.VnB()); __ sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0); __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()); __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()); __ movprfx(z23.VnB(), p4.Merging(), z24.VnB()); __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB()); __ movprfx(z31.VnB(), p7.Merging(), z0.VnB()); __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB()); __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB()); __ movprfx(z12.VnB(), p0.Merging(), z13.VnB()); __ srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1); __ movprfx(z0, z1); __ srsra(z0.VnB(), z8.VnB(), 1); __ movprfx(z0, z1); __ ssra(z0.VnB(), z8.VnB(), 1); __ movprfx(z26.VnB(), p2.Merging(), z27.VnB()); __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB()); __ movprfx(z23, z24); __ uaba(z23.VnB(), z22.VnB(), z20.VnB()); __ movprfx(z11, z12); __ uabalb(z11.VnD(), z25.VnS(), z12.VnS()); __ movprfx(z4, z5); __ uabalt(z4.VnD(), z2.VnS(), z31.VnS()); __ movprfx(z20.VnD(), p4.Merging(), z21.VnD()); __ uadalp(z20.VnD(), p4.Merging(), z5.VnS()); __ movprfx(z21.VnB(), p2.Merging(), z22.VnB()); __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB()); __ movprfx(z1.VnB(), p4.Merging(), z2.VnB()); __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB()); __ movprfx(z18.VnB(), p0.Merging(), z19.VnB()); __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB()); __ movprfx(z7, z8); __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB()); __ movprfx(z10, z11); __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB()); __ movprfx(z31, z0); __ umlalb(z31.VnD(), z9.VnS(), z21.VnS()); __ movprfx(z31, z0); __ umlalb(z31.VnD(), z9.VnS(), z1.VnS(), 0); __ movprfx(z31, z0); __ umlalb(z31.VnS(), z9.VnH(), z1.VnH(), 0); __ movprfx(z11, z12); __ umlalt(z11.VnD(), z5.VnS(), z22.VnS()); __ movprfx(z11, z12); __ umlalt(z11.VnD(), z5.VnS(), z2.VnS(), 0); __ movprfx(z11, z12); __ umlalt(z11.VnS(), z5.VnH(), z2.VnH(), 0); __ movprfx(z28, z29); __ umlslb(z28.VnD(), z13.VnS(), z9.VnS()); __ movprfx(z28, z29); __ umlslb(z28.VnD(), z13.VnS(), z1.VnS(), 0); __ movprfx(z28, z29); __ umlslb(z28.VnS(), z13.VnH(), z1.VnH(), 0); __ movprfx(z9, z10); __ umlslt(z9.VnD(), z12.VnS(), z30.VnS()); __ movprfx(z9, z10); __ umlslt(z9.VnD(), z12.VnS(), z0.VnS(), 0); __ movprfx(z9, z10); __ umlslt(z9.VnS(), z12.VnH(), z0.VnH(), 0); __ movprfx(z24.VnB(), p7.Merging(), z25.VnB()); __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()), __ movprfx(z20.VnB(), p1.Merging(), z21.VnB()); __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB()); __ movprfx(z8.VnB(), p5.Merging(), z9.VnB()); __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB()); __ movprfx(z29.VnB(), p7.Merging(), z30.VnB()); __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0); __ movprfx(z29.VnB(), p7.Merging(), z30.VnB()); __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z30.VnB()); __ movprfx(z12.VnB(), p1.Merging(), z13.VnB()); __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z13.VnB()); __ movprfx(z20.VnB(), p0.Merging(), z21.VnB()); __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()); __ movprfx(z20.VnB(), p0.Merging(), z21.VnB()); __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()); __ movprfx(z25.VnS(), p7.Merging(), z26.VnS()); __ urecpe(z25.VnS(), p7.Merging(), z2.VnS()); __ movprfx(z29.VnB(), p4.Merging(), z30.VnB()); __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB()); __ movprfx(z15.VnB(), p2.Merging(), z16.VnB()); __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB()); __ movprfx(z27.VnB(), p1.Merging(), z28.VnB()); __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB()); __ movprfx(z31.VnB(), p2.Merging(), z0.VnB()); __ urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1); __ movprfx(z4.VnS(), p3.Merging(), z5.VnS()); __ ursqrte(z4.VnS(), p3.Merging(), z3.VnS()); __ movprfx(z0, z1); __ ursra(z0.VnB(), z8.VnB(), 1); __ movprfx(z25.VnB(), p4.Merging(), z26.VnB()); __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB()); __ movprfx(z0, z1); __ usra(z0.VnB(), z8.VnB(), 1); __ movprfx(z16, z17); __ xar(z16.VnB(), z16.VnB(), z13.VnB(), 1); } assm.FinalizeCode(); CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), true); } TEST(movprfx_negative_instructions_sve2) { Assembler assm; assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. static const size_t kPairCount = 134; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z29, z30); __ addhnb(z29.VnS(), z19.VnD(), z2.VnD()); __ movprfx(z8, z9); __ addhnt(z8.VnS(), z12.VnD(), z6.VnD()); __ movprfx(z18, z19); __ bdep(z18.VnB(), z10.VnB(), z0.VnB()); __ movprfx(z6, z7); __ bext(z6.VnB(), z2.VnB(), z5.VnB()); __ movprfx(z24, z25); __ bgrp(z24.VnB(), z9.VnB(), z5.VnB()); __ movprfx(z1, z2); __ fcvtlt(z1.VnD(), p1.Merging(), z28.VnS()); __ movprfx(z1, z2); __ fcvtlt(z1.VnS(), p1.Merging(), z28.VnH()); __ movprfx(z4, z5); __ fcvtnt(z4.VnH(), p7.Merging(), z0.VnS()); __ movprfx(z4, z5); __ fcvtnt(z4.VnS(), p7.Merging(), z0.VnD()); __ movprfx(z27, z28); __ fcvtxnt(z27.VnS(), p0.Merging(), z17.VnD()); __ movprfx(z24, z25); __ histcnt(z24.VnS(), p6.Zeroing(), z3.VnS(), z10.VnS()); __ movprfx(z22, z23); __ histseg(z22.VnB(), z14.VnB(), z8.VnB()); __ movprfx(z21, z22); __ ldnt1b(z21.VnS(), p5.Zeroing(), SVEMemOperand(z21.VnS(), x23)); __ movprfx(z21, z22); __ ldnt1b(z21.VnD(), p5.Zeroing(), SVEMemOperand(z1.VnD(), x23)); __ movprfx(z10, z11); __ ldnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(z23.VnD(), x6)); __ movprfx(z30, z31); __ ldnt1h(z30.VnS(), p4.Zeroing(), SVEMemOperand(z6.VnS(), x11)); __ movprfx(z30, z31); __ ldnt1h(z30.VnD(), p4.Zeroing(), SVEMemOperand(z6.VnD(), x11)); __ movprfx(z7, z8); __ ldnt1sb(z7.VnS(), p3.Zeroing(), SVEMemOperand(z18.VnS(), x11)); __ movprfx(z7, z8); __ ldnt1sb(z7.VnD(), p3.Zeroing(), SVEMemOperand(z18.VnD(), x11)); __ movprfx(z17, z18); __ ldnt1sh(z17.VnS(), p5.Zeroing(), SVEMemOperand(z31.VnS(), x19)); __ movprfx(z17, z18); __ ldnt1sh(z17.VnD(), p5.Zeroing(), SVEMemOperand(z31.VnD(), x19)); __ movprfx(z3, z4); __ ldnt1sw(z3.VnD(), p7.Zeroing(), SVEMemOperand(z1.VnD(), x10)); __ movprfx(z0, z1); __ ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(z11.VnS(), x1)); __ movprfx(z0, z1); __ ldnt1w(z0.VnD(), p4.Zeroing(), SVEMemOperand(z11.VnD(), x1)); __ movprfx(z18, z19); __ match(p15.VnB(), p1.Zeroing(), z18.VnB(), z5.VnB()); __ movprfx(z15, z16); __ mul(z15.VnB(), z15.VnB(), z15.VnB()); __ movprfx(z15, z16); __ mul(z15.VnH(), z15.VnH(), z1.VnH(), 0); __ movprfx(z15, z16); __ mul(z15.VnS(), z15.VnS(), z1.VnS(), 0); __ movprfx(z15, z16); __ mul(z15.VnD(), z15.VnD(), z1.VnD(), 0); __ movprfx(z20, z21); __ nmatch(p1.VnB(), p1.Zeroing(), z20.VnB(), z17.VnB()); __ movprfx(z0, z1); __ pmul(z0.VnB(), z5.VnB(), z5.VnB()); __ movprfx(z12, z13); __ pmullb(z12.VnD(), z21.VnS(), z12.VnS()); __ movprfx(z31, z0); __ pmullt(z31.VnD(), z30.VnS(), z26.VnS()); __ movprfx(z0, z1); __ raddhnb(z0.VnS(), z11.VnD(), z10.VnD()); __ movprfx(z23, z24); __ raddhnt(z23.VnS(), z27.VnD(), z9.VnD()); __ movprfx(z5, z6); __ rshrnb(z5.VnB(), z1.VnH(), 1); __ movprfx(z5, z6); __ rshrnt(z5.VnB(), z1.VnH(), 8); __ movprfx(z30, z31); __ rsubhnb(z30.VnS(), z29.VnD(), z11.VnD()); __ movprfx(z25, z26); __ rsubhnt(z25.VnS(), z7.VnD(), z18.VnD()); __ movprfx(z2, z3); __ sabdlb(z2.VnD(), z21.VnS(), z3.VnS()); __ movprfx(z25, z26); __ sabdlt(z25.VnD(), z23.VnS(), z17.VnS()); __ movprfx(z24, z25); __ saddlb(z24.VnD(), z30.VnS(), z16.VnS()); __ movprfx(z15, z16); __ saddlbt(z15.VnD(), z6.VnS(), z18.VnS()); __ movprfx(z21, z22); __ saddlt(z21.VnD(), z29.VnS(), z31.VnS()); __ movprfx(z12, z13); __ saddwb(z12.VnD(), z8.VnD(), z8.VnS()); __ movprfx(z24, z25); __ saddwt(z24.VnD(), z0.VnD(), z3.VnS()); __ movprfx(z7, z8); __ shrnb(z7.VnB(), z4.VnH(), 1); __ movprfx(z21, z22); __ shrnt(z21.VnB(), z29.VnH(), 1); __ movprfx(z29, z30); __ sli(z29.VnB(), z7.VnB(), 0); __ movprfx(z23, z24); __ smulh(z23.VnB(), z23.VnB(), z3.VnB()); __ movprfx(z10, z11); __ smullb(z10.VnD(), z4.VnS(), z4.VnS()); __ movprfx(z10, z11); __ smullb(z10.VnS(), z4.VnH(), z4.VnH(), 0); __ movprfx(z10, z11); __ smullb(z10.VnD(), z4.VnS(), z4.VnS(), 0); __ movprfx(z31, z0); __ smullt(z31.VnD(), z26.VnS(), z5.VnS()); __ movprfx(z31, z0); __ smullt(z31.VnS(), z26.VnH(), z5.VnH(), 0); __ movprfx(z31, z0); __ smullt(z31.VnD(), z26.VnS(), z5.VnS(), 0); __ movprfx(z4, z5); __ splice_con(z4.VnB(), p7.Merging(), z0.VnB(), z1.VnB()); __ movprfx(z18, z19); __ sqdmulh(z18.VnB(), z25.VnB(), z1.VnB()); __ movprfx(z18, z19); __ sqdmulh(z18.VnH(), z25.VnH(), z1.VnH(), 0); __ movprfx(z18, z19); __ sqdmulh(z18.VnS(), z25.VnS(), z1.VnS(), 0); __ movprfx(z18, z19); __ sqdmulh(z18.VnD(), z25.VnD(), z1.VnD(), 0); __ movprfx(z1, z2); __ sqdmullb(z1.VnD(), z31.VnS(), z21.VnS()); __ movprfx(z1, z2); __ sqdmullb(z1.VnS(), z31.VnH(), z1.VnH(), 0); __ movprfx(z1, z2); __ sqdmullb(z1.VnD(), z31.VnS(), z1.VnS(), 0); __ movprfx(z2, z3); __ sqdmullt(z2.VnD(), z1.VnS(), z5.VnS()); __ movprfx(z2, z3); __ sqdmullt(z2.VnS(), z1.VnH(), z5.VnH(), 0); __ movprfx(z2, z3); __ sqdmullt(z2.VnD(), z1.VnS(), z5.VnS(), 0); __ movprfx(z21, z22); __ sqrdmulh(z21.VnB(), z21.VnB(), z27.VnB()); __ movprfx(z21, z22); __ sqrdmulh(z21.VnH(), z21.VnH(), z2.VnH(), 0); __ movprfx(z21, z22); __ sqrdmulh(z21.VnS(), z21.VnS(), z2.VnS(), 0); __ movprfx(z21, z22); __ sqrdmulh(z21.VnD(), z21.VnD(), z2.VnD(), 0); __ movprfx(z1, z2); __ sqrshrnb(z1.VnB(), z1.VnH(), 1); __ movprfx(z24, z25); __ sqrshrnt(z24.VnB(), z19.VnH(), 8); __ movprfx(z23, z24); __ sqrshrunb(z23.VnB(), z28.VnH(), 1); __ movprfx(z9, z10); __ sqrshrunt(z9.VnB(), z15.VnH(), 8); __ movprfx(z25, z26); __ sqshrnb(z25.VnB(), z1.VnH(), 1); __ movprfx(z0, z1); __ sqshrnt(z0.VnB(), z25.VnH(), 8); __ movprfx(z25, z26); __ sqshrunb(z25.VnB(), z10.VnH(), 1); __ movprfx(z20, z21); __ sqshrunt(z20.VnB(), z3.VnH(), 8); __ movprfx(z2, z3); __ sqxtnb(z2.VnB(), z0.VnH()); __ movprfx(z31, z0); __ sqxtnt(z31.VnB(), z18.VnH()); __ movprfx(z28, z29); __ sqxtunb(z28.VnB(), z6.VnH()); __ movprfx(z14, z15); __ sqxtunt(z14.VnB(), z31.VnH()); __ movprfx(z6, z7); __ sri(z6.VnB(), z9.VnB(), 1); __ movprfx(z2, z3); __ sshllb(z2.VnH(), z20.VnB(), 0); __ movprfx(z27, z28); __ sshllt(z27.VnH(), z8.VnB(), 0); __ movprfx(z4, z5); __ ssublb(z4.VnD(), z23.VnS(), z7.VnS()); __ movprfx(z6, z7); __ ssublbt(z6.VnD(), z28.VnS(), z12.VnS()); __ movprfx(z12, z13); __ ssublt(z12.VnD(), z13.VnS(), z6.VnS()); __ movprfx(z11, z12); __ ssubltb(z11.VnD(), z18.VnS(), z19.VnS()); __ movprfx(z7, z8); __ ssubwb(z7.VnD(), z28.VnD(), z11.VnS()); __ movprfx(z29, z30); __ ssubwt(z29.VnD(), z25.VnD(), z20.VnS()); __ movprfx(z21, z22); __ stnt1b(z21.VnS(), p5.Zeroing(), SVEMemOperand(z1.VnS(), x23)); __ movprfx(z21, z22); __ stnt1b(z21.VnD(), p5.Zeroing(), SVEMemOperand(z1.VnD(), x23)); __ movprfx(z10, z11); __ stnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(z1.VnD(), x23)); __ movprfx(z30, z31); __ stnt1h(z30.VnS(), p4.Zeroing(), SVEMemOperand(z6.VnS(), x6)); __ movprfx(z30, z31); __ stnt1h(z30.VnD(), p4.Zeroing(), SVEMemOperand(z6.VnD(), x6)); __ movprfx(z0, z1); __ stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(z11.VnS(), x1)); __ movprfx(z0, z1); __ stnt1w(z0.VnD(), p4.Zeroing(), SVEMemOperand(z11.VnD(), x1)); __ movprfx(z31, z0); __ subhnb(z31.VnS(), z31.VnD(), z7.VnD()); __ movprfx(z31, z0); __ subhnt(z31.VnS(), z22.VnD(), z27.VnD()); __ movprfx(z24, z25); __ tbl(z24.VnB(), z29.VnB(), z30.VnB(), z0.VnB()); __ movprfx(z22, z23); __ tbx(z22.VnB(), z15.VnB(), z19.VnB()); __ movprfx(z1, z2); __ uabdlb(z1.VnD(), z26.VnS(), z12.VnS()); __ movprfx(z25, z26); __ uabdlt(z25.VnD(), z29.VnS(), z14.VnS()); __ movprfx(z3, z4); __ uaddlb(z3.VnD(), z5.VnS(), z2.VnS()); __ movprfx(z15, z16); __ uaddlt(z15.VnD(), z28.VnS(), z20.VnS()); __ movprfx(z31, z0); __ uaddwb(z31.VnD(), z8.VnD(), z25.VnS()); __ movprfx(z17, z18); __ uaddwt(z17.VnD(), z15.VnD(), z2.VnS()); __ movprfx(z12, z13); __ umulh(z12.VnB(), z12.VnB(), z17.VnB()); __ movprfx(z12, z13); __ umullb(z12.VnD(), z5.VnS(), z2.VnS()); __ movprfx(z12, z13); __ umullb(z12.VnS(), z5.VnH(), z2.VnH(), 0); __ movprfx(z12, z13); __ umullb(z12.VnD(), z5.VnS(), z2.VnS(), 0); __ movprfx(z24, z25); __ umullt(z24.VnD(), z6.VnS(), z6.VnS()); __ movprfx(z24, z25); __ umullt(z24.VnS(), z6.VnH(), z1.VnH(), 0); __ movprfx(z24, z25); __ umullt(z24.VnD(), z6.VnS(), z1.VnS(), 0); __ movprfx(z30, z31); __ uqrshrnb(z30.VnB(), z25.VnH(), 1); __ movprfx(z3, z4); __ uqrshrnt(z3.VnB(), z25.VnH(), 8); __ movprfx(z17, z18); __ uqshrnb(z17.VnB(), z4.VnH(), 1); __ movprfx(z28, z29); __ uqshrnt(z28.VnB(), z18.VnH(), 8); __ movprfx(z28, z29); __ uqxtnb(z28.VnB(), z4.VnH()); __ movprfx(z19, z20); __ uqxtnt(z19.VnB(), z7.VnH()); __ movprfx(z8, z9); __ ushllb(z8.VnH(), z31.VnB(), 0); __ movprfx(z3, z4); __ ushllt(z3.VnH(), z21.VnB(), 0); __ movprfx(z25, z26); __ usublb(z25.VnD(), z9.VnS(), z17.VnS()); __ movprfx(z5, z6); __ usublt(z5.VnD(), z11.VnS(), z15.VnS()); __ movprfx(z10, z11); __ usubwb(z10.VnD(), z13.VnD(), z20.VnS()); __ movprfx(z15, z16); __ usubwt(z15.VnD(), z8.VnD(), z23.VnS()); __ movprfx(z20, z21); __ whilege(p0.VnB(), w20, w29); __ movprfx(z24, z25); __ whilegt(p11.VnB(), w24, w3); __ movprfx(z20, z21); __ whilehi(p2.VnB(), x20, x8); __ movprfx(z22, z23); __ whilehs(p4.VnB(), w22, w9); __ movprfx(z25, z26); __ whilerw(p7.VnB(), x25, x27); __ movprfx(z14, z15); __ whilewr(p8.VnB(), x14, x14); } assm.FinalizeCode(); CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); } TEST(movprfx_negative_predication_sve2) { Assembler assm; assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. static const size_t kPairCount = 140; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z25.VnS(), p0.Zeroing(), z26.VnS()); __ adclb(z25.VnS(), z17.VnS(), z24.VnS()); __ movprfx(z0.VnS(), p0.Zeroing(), z1.VnS()); __ adclt(z0.VnS(), z2.VnS(), z15.VnS()); __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD()); __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD()); __ movprfx(z18.VnD(), p0.Zeroing(), z19.VnD()); __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD()); __ movprfx(z7.VnD(), p0.Zeroing(), z8.VnD()); __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD()); __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD()); __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD()); __ movprfx(z5.VnB(), p0.Zeroing(), z6.VnB()); __ cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90); __ movprfx(z7.VnS(), p0.Zeroing(), z8.VnS()); __ cdot(z7.VnS(), z4.VnB(), z10.VnB(), 0); __ movprfx(z7.VnS(), p0.Zeroing(), z8.VnS()); __ cdot(z7.VnS(), z4.VnB(), z0.VnB(), 0, 0); __ movprfx(z7.VnD(), p0.Zeroing(), z8.VnD()); __ cdot(z7.VnD(), z4.VnH(), z0.VnH(), 0, 0); __ movprfx(z19.VnB(), p0.Zeroing(), z20.VnB()); __ cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0); __ movprfx(z19.VnS(), p0.Zeroing(), z20.VnS()); __ cmla(z19.VnS(), z7.VnS(), z2.VnS(), 0, 0); __ movprfx(z19.VnH(), p0.Zeroing(), z20.VnH()); __ cmla(z19.VnH(), z7.VnH(), z2.VnH(), 0, 0); __ movprfx(z10.VnD(), p0.Zeroing(), z11.VnD()); __ eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD()); __ movprfx(z3.VnB(), p0.Zeroing(), z4.VnB()); __ eorbt(z3.VnB(), z10.VnB(), z8.VnB()); __ movprfx(z20.VnB(), p0.Zeroing(), z22.VnB()); __ eortb(z20.VnB(), z21.VnB(), z15.VnB()); __ movprfx(z14.VnD(), p0.Zeroing(), z15.VnD()); __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD()); __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD()); __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD()); __ movprfx(z22.VnD(), p0.Zeroing(), z23.VnD()); __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD()); __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD()); __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD()); __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD()); __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); __ fmlalb(z16.VnS(), z18.VnH(), z29.VnH()); __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); __ fmlalb(z16.VnS(), z18.VnH(), z2.VnH(), 0); __ movprfx(z18.VnS(), p0.Zeroing(), z19.VnS()); __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH()); __ movprfx(z18.VnS(), p0.Zeroing(), z19.VnS()); __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH(), 0); __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH()); __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH(), 0); __ movprfx(z3.VnS(), p0.Zeroing(), z4.VnS()); __ fmlslt(z3.VnS(), z17.VnH(), z14.VnH()); __ movprfx(z3.VnS(), p0.Zeroing(), z4.VnS()); __ fmlslt(z3.VnS(), z17.VnH(), z1.VnH(), 0); __ movprfx(z2.VnH(), p0.Zeroing(), z3.VnH()); __ mla(z2.VnH(), z0.VnH(), z1.VnH(), 0); __ movprfx(z2.VnS(), p0.Zeroing(), z3.VnS()); __ mla(z2.VnS(), z0.VnS(), z1.VnS(), 0); __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD()); __ mla(z2.VnD(), z0.VnD(), z1.VnD(), 0); __ movprfx(z2.VnH(), p0.Zeroing(), z3.VnH()); __ mls(z2.VnH(), z0.VnH(), z1.VnH(), 0); __ movprfx(z2.VnS(), p0.Zeroing(), z3.VnS()); __ mls(z2.VnS(), z0.VnS(), z1.VnS(), 0); __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD()); __ mls(z2.VnD(), z0.VnD(), z1.VnD(), 0); __ movprfx(z17.VnD(), p0.Zeroing(), z18.VnD()); __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD()); __ movprfx(z13.VnB(), p0.Zeroing(), z14.VnB()); __ saba(z13.VnB(), z2.VnB(), z31.VnB()); __ movprfx(z13.VnD(), p0.Zeroing(), z14.VnD()); __ sabalb(z13.VnD(), z20.VnS(), z26.VnS()); __ movprfx(z14.VnD(), p0.Zeroing(), z15.VnD()); __ sabalt(z14.VnD(), z19.VnS(), z10.VnS()); __ movprfx(z17.VnS(), p0.Zeroing(), z18.VnS()); __ sbclb(z17.VnS(), z10.VnS(), z8.VnS()); __ movprfx(z20.VnS(), p0.Zeroing(), z21.VnS()); __ sbclt(z20.VnS(), z0.VnS(), z13.VnS()); __ movprfx(z5.VnB(), p0.Zeroing(), z6.VnB()); __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB()); __ movprfx(z27.VnB(), p0.Zeroing(), z28.VnB()); __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB()); __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); __ smlalb(z1.VnD(), z3.VnS(), z23.VnS()); __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); __ smlalb(z1.VnD(), z3.VnS(), z2.VnS(), 0); __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS()); __ smlalb(z1.VnS(), z3.VnH(), z2.VnH(), 0); __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); __ smlalt(z1.VnD(), z3.VnS(), z23.VnS()); __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); __ smlalt(z1.VnD(), z3.VnS(), z2.VnS(), 0); __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS()); __ smlalt(z1.VnS(), z3.VnH(), z2.VnH(), 0); __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); __ smlslb(z1.VnD(), z3.VnS(), z23.VnS()); __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); __ smlslb(z1.VnD(), z3.VnS(), z2.VnS(), 0); __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS()); __ smlslb(z1.VnS(), z3.VnH(), z2.VnH(), 0); __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); __ smlslt(z1.VnD(), z3.VnS(), z23.VnS()); __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); __ smlslt(z1.VnD(), z3.VnS(), z2.VnS(), 0); __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS()); __ smlslt(z1.VnS(), z3.VnH(), z2.VnH(), 0); __ movprfx(z20.VnB(), p0.Zeroing(), z21.VnB()); __ sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90); __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD()); __ sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS()); __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD()); __ sqdmlalb(z6.VnD(), z19.VnS(), z2.VnS(), 0); __ movprfx(z6.VnS(), p0.Zeroing(), z7.VnS()); __ sqdmlalb(z6.VnS(), z19.VnH(), z2.VnH(), 0); __ movprfx(z23.VnD(), p0.Zeroing(), z24.VnD()); __ sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS()); __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS()); __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS(), 0); __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS()); __ sqdmlalt(z11.VnS(), z0.VnH(), z0.VnH(), 0); __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD()); __ sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS()); __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD()); __ sqdmlslb(z16.VnD(), z26.VnS(), z2.VnS(), 0); __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); __ sqdmlslb(z16.VnS(), z26.VnH(), z2.VnH(), 0); __ movprfx(z26.VnD(), p0.Zeroing(), z27.VnD()); __ sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS()); __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD()); __ sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS()); __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD()); __ sqdmlslt(z21.VnD(), z23.VnS(), z0.VnS(), 0); __ movprfx(z21.VnS(), p0.Zeroing(), z22.VnS()); __ sqdmlslt(z21.VnS(), z23.VnH(), z0.VnH(), 0); __ movprfx(z31.VnB(), p0.Zeroing(), z0.VnB()); __ sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0); __ movprfx(z31.VnH(), p0.Zeroing(), z0.VnH()); __ sqrdcmlah(z31.VnH(), z15.VnH(), z2.VnH(), 0, 0); __ movprfx(z31.VnS(), p0.Zeroing(), z0.VnS()); __ sqrdcmlah(z31.VnS(), z15.VnS(), z2.VnS(), 0, 0); __ movprfx(z27.VnB(), p0.Zeroing(), z28.VnB()); __ sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB()); __ movprfx(z27.VnH(), p0.Zeroing(), z28.VnH()); __ sqrdmlah(z27.VnH(), z28.VnH(), z1.VnH(), 0); __ movprfx(z27.VnS(), p0.Zeroing(), z28.VnS()); __ sqrdmlah(z27.VnS(), z28.VnS(), z1.VnS(), 0); __ movprfx(z27.VnD(), p0.Zeroing(), z28.VnD()); __ sqrdmlah(z27.VnD(), z28.VnD(), z1.VnD(), 0); __ movprfx(z11.VnB(), p0.Zeroing(), z12.VnB()); __ sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB()); __ movprfx(z11.VnH(), p0.Zeroing(), z12.VnH()); __ sqrdmlsh(z11.VnH(), z16.VnH(), z1.VnH(), 0); __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS()); __ sqrdmlsh(z11.VnS(), z16.VnS(), z1.VnS(), 0); __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); __ sqrdmlsh(z11.VnD(), z16.VnD(), z1.VnD(), 0); __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB()); __ srsra(z0.VnB(), z8.VnB(), 1); __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB()); __ ssra(z0.VnB(), z8.VnB(), 1); __ movprfx(z23.VnB(), p0.Zeroing(), z24.VnB()); __ uaba(z23.VnB(), z22.VnB(), z20.VnB()); __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); __ uabalb(z11.VnD(), z25.VnS(), z12.VnS()); __ movprfx(z4.VnD(), p0.Zeroing(), z5.VnD()); __ uabalt(z4.VnD(), z2.VnS(), z31.VnS()); __ movprfx(z7.VnB(), p0.Zeroing(), z8.VnB()); __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB()); __ movprfx(z10.VnB(), p0.Zeroing(), z11.VnB()); __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB()); __ movprfx(z31.VnD(), p0.Zeroing(), z0.VnD()); __ umlalb(z31.VnD(), z9.VnS(), z21.VnS()); __ movprfx(z31.VnD(), p0.Zeroing(), z0.VnD()); __ umlalb(z31.VnD(), z9.VnS(), z1.VnS(), 0); __ movprfx(z31.VnS(), p0.Zeroing(), z0.VnS()); __ umlalb(z31.VnS(), z9.VnH(), z1.VnH(), 0); __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); __ umlalt(z11.VnD(), z5.VnS(), z22.VnS()); __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); __ umlalt(z11.VnD(), z5.VnS(), z2.VnS(), 0); __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS()); __ umlalt(z11.VnS(), z5.VnH(), z2.VnH(), 0); __ movprfx(z28.VnD(), p0.Zeroing(), z29.VnD()); __ umlslb(z28.VnD(), z13.VnS(), z9.VnS()); __ movprfx(z28.VnD(), p0.Zeroing(), z29.VnD()); __ umlslb(z28.VnD(), z13.VnS(), z1.VnS(), 0); __ movprfx(z28.VnS(), p0.Zeroing(), z29.VnS()); __ umlslb(z28.VnS(), z13.VnH(), z1.VnH(), 0); __ movprfx(z9.VnD(), p0.Zeroing(), z10.VnD()); __ umlslt(z9.VnD(), z12.VnS(), z30.VnS()); __ movprfx(z9.VnD(), p0.Zeroing(), z10.VnD()); __ umlslt(z9.VnD(), z12.VnS(), z0.VnS(), 0); __ movprfx(z9.VnS(), p0.Zeroing(), z10.VnS()); __ umlslt(z9.VnS(), z12.VnH(), z0.VnH(), 0); __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB()); __ ursra(z0.VnB(), z8.VnB(), 1); __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB()); __ usra(z0.VnB(), z8.VnB(), 1); __ movprfx(z16.VnB(), p0.Zeroing(), z17.VnB()); __ xar(z16.VnB(), z16.VnB(), z13.VnB(), 1); } assm.FinalizeCode(); CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); } TEST(movprfx_negative_aliasing_sve2) { Assembler assm; assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. static const size_t kPairCount = 140; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z25, z26); __ adclb(z25.VnS(), z17.VnS(), z25.VnS()); __ movprfx(z0, z1); __ adclt(z0.VnS(), z2.VnS(), z0.VnS()); __ movprfx(z3, z4); __ addp(z3.VnB(), p1.Merging(), z3.VnB(), z3.VnB()); __ movprfx(z6, z7); __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z6.VnD()); __ movprfx(z18, z19); __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z18.VnD()); __ movprfx(z7, z8); __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z7.VnD()); __ movprfx(z21, z22); __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z21.VnD()); __ movprfx(z5, z6); __ cadd(z5.VnB(), z5.VnB(), z5.VnB(), 90); __ movprfx(z7, z8); __ cdot(z7.VnS(), z4.VnB(), z7.VnB(), 0); __ movprfx(z7, z8); __ cdot(z7.VnS(), z4.VnB(), z7.VnB(), 0, 0); __ movprfx(z7, z8); __ cdot(z7.VnD(), z7.VnH(), z0.VnH(), 0, 0); __ movprfx(z19, z20); __ cmla(z19.VnB(), z19.VnB(), z2.VnB(), 0); __ movprfx(z19, z20); __ cmla(z19.VnS(), z19.VnS(), z2.VnS(), 0, 0); __ movprfx(z1, z20); __ cmla(z1.VnH(), z7.VnH(), z1.VnH(), 0, 0); __ movprfx(z10, z11); __ eor3(z10.VnD(), z10.VnD(), z10.VnD(), z23.VnD()); __ movprfx(z3, z4); __ eorbt(z3.VnB(), z10.VnB(), z3.VnB()); __ movprfx(z20, z22); __ eortb(z20.VnB(), z21.VnB(), z20.VnB()); __ movprfx(z14, z15); __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z14.VnD()); __ movprfx(z14.VnD(), p4.Merging(), z15.VnD()); __ fcvtx(z14.VnS(), p4.Merging(), z14.VnD()); __ movprfx(z15.VnH(), p0.Merging(), z16.VnH()); __ flogb(z15.VnH(), p0.Merging(), z15.VnH()); __ movprfx(z2, z3); __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z2.VnD()); __ movprfx(z22, z23); __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z22.VnD()); __ movprfx(z1, z2); __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z1.VnD()); __ movprfx(z16, z17); __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z16.VnD()); __ movprfx(z16, z17); __ fmlalb(z16.VnS(), z18.VnH(), z16.VnH()); __ movprfx(z16, z17); __ fmlalb(z16.VnS(), z16.VnH(), z2.VnH(), 0); __ movprfx(z18, z19); __ fmlalt(z18.VnS(), z13.VnH(), z18.VnH()); __ movprfx(z18, z19); __ fmlalt(z18.VnS(), z18.VnH(), z5.VnH(), 0); __ movprfx(z16, z17); __ fmlslb(z16.VnS(), z16.VnH(), z1.VnH()); __ movprfx(z16, z17); __ fmlslb(z16.VnS(), z16.VnH(), z1.VnH(), 0); __ movprfx(z3, z4); __ fmlslt(z3.VnS(), z17.VnH(), z3.VnH()); __ movprfx(z3, z4); __ fmlslt(z3.VnS(), z17.VnH(), z3.VnH(), 0); __ movprfx(z2, z3); __ mla(z2.VnH(), z0.VnH(), z2.VnH(), 0); __ movprfx(z2, z3); __ mla(z2.VnS(), z0.VnS(), z2.VnS(), 0); __ movprfx(z2, z3); __ mla(z2.VnD(), z0.VnD(), z2.VnD(), 0); __ movprfx(z2, z3); __ mls(z2.VnH(), z0.VnH(), z2.VnH(), 0); __ movprfx(z2, z3); __ mls(z2.VnS(), z0.VnS(), z2.VnS(), 0); __ movprfx(z2, z3); __ mls(z2.VnD(), z0.VnD(), z2.VnD(), 0); __ movprfx(z17, z18); __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z17.VnD()); __ movprfx(z13, z14); __ saba(z13.VnB(), z2.VnB(), z13.VnB()); __ movprfx(z13, z14); __ sabalb(z13.VnD(), z13.VnS(), z26.VnS()); __ movprfx(z14, z15); __ sabalt(z14.VnD(), z14.VnS(), z10.VnS()); __ movprfx(z19.VnD(), p5.Merging(), z20.VnD()); __ sadalp(z19.VnD(), p5.Merging(), z19.VnS()); __ movprfx(z17, z18); __ sbclb(z17.VnS(), z17.VnS(), z8.VnS()); __ movprfx(z20, z21); __ sbclt(z20.VnS(), z20.VnS(), z13.VnS()); __ movprfx(z20.VnB(), p3.Merging(), z21.VnB()); __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z20.VnB()); __ movprfx(z21.VnB(), p0.Merging(), z22.VnB()); __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z21.VnB()); __ movprfx(z1.VnB(), p0.Merging(), z2.VnB()); __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z1.VnB()); __ movprfx(z5, z6); __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z5.VnB()); __ movprfx(z27, z28); __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z27.VnB()); __ movprfx(z1, z2); __ smlalb(z1.VnD(), z3.VnS(), z1.VnS()); __ movprfx(z1, z2); __ smlalb(z1.VnD(), z3.VnS(), z1.VnS(), 0); __ movprfx(z1, z2); __ smlalb(z1.VnS(), z1.VnH(), z2.VnH(), 0); __ movprfx(z1, z2); __ smlalt(z1.VnD(), z1.VnS(), z23.VnS()); __ movprfx(z1, z2); __ smlalt(z1.VnD(), z3.VnS(), z1.VnS(), 0); __ movprfx(z1, z2); __ smlalt(z1.VnS(), z1.VnH(), z2.VnH(), 0); __ movprfx(z1, z2); __ smlslb(z1.VnD(), z1.VnS(), z23.VnS()); __ movprfx(z1, z2); __ smlslb(z1.VnD(), z3.VnS(), z1.VnS(), 0); __ movprfx(z1, z2); __ smlslb(z1.VnS(), z3.VnH(), z1.VnH(), 0); __ movprfx(z1, z2); __ smlslt(z1.VnD(), z1.VnS(), z23.VnS()); __ movprfx(z1, z2); __ smlslt(z1.VnD(), z3.VnS(), z1.VnS(), 0); __ movprfx(z1, z2); __ smlslt(z1.VnS(), z1.VnH(), z2.VnH(), 0); __ movprfx(z29.VnB(), p1.Merging(), z30.VnB()); __ sqabs(z29.VnB(), p1.Merging(), z29.VnB()); __ movprfx(z28.VnB(), p0.Merging(), z29.VnB()); __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()); __ movprfx(z20, z21); __ sqcadd(z20.VnB(), z20.VnB(), z20.VnB(), 90); __ movprfx(z6, z7); __ sqdmlalb(z6.VnD(), z6.VnS(), z25.VnS()); __ movprfx(z6, z7); __ sqdmlalb(z6.VnD(), z6.VnS(), z2.VnS(), 0); __ movprfx(z6, z7); __ sqdmlalb(z6.VnS(), z6.VnH(), z2.VnH(), 0); __ movprfx(z23, z24); __ sqdmlalbt(z23.VnD(), z23.VnS(), z26.VnS()); __ movprfx(z11, z12); __ sqdmlalt(z11.VnD(), z11.VnS(), z0.VnS()); __ movprfx(z11, z12); __ sqdmlalt(z11.VnD(), z11.VnS(), z0.VnS(), 0); __ movprfx(z1, z12); __ sqdmlalt(z1.VnS(), z0.VnH(), z1.VnH(), 0); __ movprfx(z16, z17); __ sqdmlslb(z16.VnD(), z26.VnS(), z16.VnS()); __ movprfx(z16, z17); __ sqdmlslb(z16.VnD(), z16.VnS(), z2.VnS(), 0); __ movprfx(z16, z17); __ sqdmlslb(z16.VnS(), z16.VnH(), z2.VnH(), 0); __ movprfx(z26, z27); __ sqdmlslbt(z26.VnD(), z26.VnS(), z4.VnS()); __ movprfx(z21, z22); __ sqdmlslt(z21.VnD(), z23.VnS(), z21.VnS()); __ movprfx(z21, z22); __ sqdmlslt(z21.VnD(), z21.VnS(), z0.VnS(), 0); __ movprfx(z1, z22); __ sqdmlslt(z21.VnS(), z23.VnH(), z1.VnH(), 0); __ movprfx(z21.VnB(), p0.Merging(), z22.VnB()); __ sqneg(z21.VnB(), p0.Merging(), z21.VnB()); __ movprfx(z31, z0); __ sqrdcmlah(z31.VnB(), z15.VnB(), z31.VnB(), 0); __ movprfx(z31, z0); __ sqrdcmlah(z31.VnH(), z31.VnH(), z2.VnH(), 0, 0); __ movprfx(z31, z0); __ sqrdcmlah(z31.VnS(), z31.VnS(), z2.VnS(), 0, 0); __ movprfx(z27, z28); __ sqrdmlah(z27.VnB(), z27.VnB(), z19.VnB()); __ movprfx(z27, z28); __ sqrdmlah(z27.VnH(), z27.VnH(), z1.VnH(), 0); __ movprfx(z27, z28); __ sqrdmlah(z27.VnS(), z27.VnS(), z1.VnS(), 0); __ movprfx(z27, z28); __ sqrdmlah(z27.VnD(), z27.VnD(), z1.VnD(), 0); __ movprfx(z11, z12); __ sqrdmlsh(z11.VnB(), z16.VnB(), z11.VnB()); __ movprfx(z11, z12); __ sqrdmlsh(z11.VnH(), z11.VnH(), z1.VnH(), 0); __ movprfx(z11, z12); __ sqrdmlsh(z11.VnS(), z11.VnS(), z1.VnS(), 0); __ movprfx(z11, z12); __ sqrdmlsh(z11.VnD(), z11.VnD(), z1.VnD(), 0); __ movprfx(z31.VnB(), p5.Merging(), z0.VnB()); __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z31.VnB()); __ movprfx(z25.VnB(), p6.Merging(), z26.VnB()); __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z25.VnB()); __ movprfx(z0.VnB(), p5.Merging(), z1.VnB()); __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z0.VnB()); __ movprfx(z7.VnB(), p3.Merging(), z8.VnB()); __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z7.VnB()); __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB()); __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB()); __ movprfx(z23.VnB(), p4.Merging(), z24.VnB()); __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z23.VnB()); __ movprfx(z31.VnB(), p7.Merging(), z0.VnB()); __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z31.VnB()); __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB()); __ movprfx(z0, z1); __ srsra(z0.VnB(), z0.VnB(), 1); __ movprfx(z0, z1); __ ssra(z0.VnB(), z0.VnB(), 1); __ movprfx(z26.VnB(), p2.Merging(), z27.VnB()); __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z26.VnB()); __ movprfx(z23, z24); __ uaba(z23.VnB(), z22.VnB(), z23.VnB()); __ movprfx(z11, z12); __ uabalb(z11.VnD(), z25.VnS(), z11.VnS()); __ movprfx(z4, z5); __ uabalt(z4.VnD(), z4.VnS(), z31.VnS()); __ movprfx(z20.VnD(), p4.Merging(), z21.VnD()); __ uadalp(z20.VnD(), p4.Merging(), z20.VnS()); __ movprfx(z21.VnB(), p2.Merging(), z22.VnB()); __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z21.VnB()); __ movprfx(z1.VnB(), p4.Merging(), z2.VnB()); __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z1.VnB()); __ movprfx(z18.VnB(), p0.Merging(), z19.VnB()); __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z18.VnB()); __ movprfx(z7, z8); __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z7.VnB()); __ movprfx(z10, z11); __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z10.VnB()); __ movprfx(z31, z0); __ umlalb(z31.VnD(), z9.VnS(), z31.VnS()); __ movprfx(z31, z0); __ umlalb(z31.VnD(), z31.VnS(), z1.VnS(), 0); __ movprfx(z31, z0); __ umlalb(z31.VnS(), z31.VnH(), z1.VnH(), 0); __ movprfx(z11, z12); __ umlalt(z11.VnD(), z11.VnS(), z22.VnS()); __ movprfx(z11, z12); __ umlalt(z11.VnD(), z11.VnS(), z2.VnS(), 0); __ movprfx(z1, z12); __ umlalt(z1.VnS(), z5.VnH(), z1.VnH(), 0); __ movprfx(z28, z29); __ umlslb(z28.VnD(), z28.VnS(), z9.VnS()); __ movprfx(z28, z29); __ umlslb(z28.VnD(), z28.VnS(), z1.VnS(), 0); __ movprfx(z28, z29); __ umlslb(z28.VnS(), z28.VnH(), z1.VnH(), 0); __ movprfx(z9, z10); __ umlslt(z9.VnD(), z9.VnS(), z30.VnS()); __ movprfx(z9, z10); __ umlslt(z9.VnD(), z9.VnS(), z0.VnS(), 0); __ movprfx(z9, z10); __ umlslt(z9.VnS(), z9.VnH(), z0.VnH(), 0); __ movprfx(z24.VnB(), p7.Merging(), z25.VnB()); __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z24.VnB()), __ movprfx(z20.VnB(), p1.Merging(), z21.VnB()); __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z20.VnB()); __ movprfx(z8.VnB(), p5.Merging(), z9.VnB()); __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z8.VnB()); __ movprfx(z29.VnB(), p7.Merging(), z30.VnB()); __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z29.VnB()); __ movprfx(z12.VnB(), p1.Merging(), z13.VnB()); __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z12.VnB()); __ movprfx(z20.VnB(), p0.Merging(), z21.VnB()); __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z20.VnB()); __ movprfx(z20.VnB(), p0.Merging(), z21.VnB()); __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z20.VnB()); __ movprfx(z25.VnS(), p7.Merging(), z26.VnS()); __ urecpe(z25.VnS(), p7.Merging(), z25.VnS()); __ movprfx(z29.VnB(), p4.Merging(), z30.VnB()); __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z29.VnB()); __ movprfx(z15.VnB(), p2.Merging(), z16.VnB()); __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z15.VnB()); __ movprfx(z27.VnB(), p1.Merging(), z28.VnB()); __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z27.VnB()); __ movprfx(z4.VnS(), p3.Merging(), z5.VnS()); __ ursqrte(z4.VnS(), p3.Merging(), z4.VnS()); __ movprfx(z0, z1); __ ursra(z0.VnB(), z0.VnB(), 1); __ movprfx(z25.VnB(), p4.Merging(), z26.VnB()); __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z25.VnB()); __ movprfx(z0, z1); __ usra(z0.VnB(), z0.VnB(), 1); __ movprfx(z16, z17); __ xar(z16.VnB(), z16.VnB(), z16.VnB(), 1); } assm.FinalizeCode(); CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); } TEST(movprfx_negative_lane_size_sve2) { Assembler assm; assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2); { // We have to use the Assembler directly to generate movprfx, so we need // to manually reserve space for the code we're about to emit. static const size_t kPairCount = 140; CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); __ movprfx(z14.VnS(), p4.Merging(), z15.VnS()); __ fcvtx(z14.VnS(), p4.Merging(), z0.VnD()); __ movprfx(z15.VnS(), p0.Merging(), z16.VnS()); __ flogb(z15.VnH(), p0.Merging(), z3.VnH()); __ movprfx(z19.VnB(), p5.Merging(), z20.VnB()); __ sadalp(z19.VnD(), p5.Merging(), z9.VnS()); __ movprfx(z20.VnH(), p3.Merging(), z21.VnH()); __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB()); __ movprfx(z21.VnH(), p0.Merging(), z22.VnH()); __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB()); __ movprfx(z1.VnS(), p0.Merging(), z2.VnS()); __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB()); __ movprfx(z29.VnD(), p1.Merging(), z30.VnD()); __ sqabs(z29.VnB(), p1.Merging(), z18.VnB()); __ movprfx(z28.VnH(), p0.Merging(), z29.VnH()); __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB()); __ movprfx(z21.VnH(), p0.Merging(), z22.VnH()); __ sqneg(z21.VnB(), p0.Merging(), z17.VnB()); __ movprfx(z31.VnS(), p5.Merging(), z0.VnS()); __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB()); __ movprfx(z25.VnD(), p6.Merging(), z26.VnD()); __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB()); __ movprfx(z0.VnH(), p5.Merging(), z1.VnH()); __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0); __ movprfx(z0.VnS(), p5.Merging(), z1.VnS()); __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z2.VnB()); __ movprfx(z7.VnD(), p3.Merging(), z8.VnD()); __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB()); __ movprfx(z10.VnH(), p1.Merging(), z11.VnH()); __ sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0); __ movprfx(z16.VnH(), p7.Merging(), z17.VnH()); __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()); __ movprfx(z16.VnS(), p7.Merging(), z17.VnS()); __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()); __ movprfx(z23.VnD(), p4.Merging(), z24.VnD()); __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB()); __ movprfx(z31.VnH(), p7.Merging(), z0.VnH()); __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB()); __ movprfx(z16.VnH(), p7.Merging(), z17.VnH()); __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB()); __ movprfx(z12.VnH(), p0.Merging(), z13.VnH()); __ srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1); __ movprfx(z26.VnH(), p2.Merging(), z27.VnH()); __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB()); __ movprfx(z20.VnB(), p4.Merging(), z21.VnB()); __ uadalp(z20.VnD(), p4.Merging(), z5.VnS()); __ movprfx(z21.VnH(), p2.Merging(), z22.VnH()); __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB()); __ movprfx(z1.VnH(), p4.Merging(), z2.VnH()); __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB()); __ movprfx(z18.VnH(), p0.Merging(), z19.VnH()); __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB()); __ movprfx(z24.VnH(), p7.Merging(), z25.VnH()); __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()), __ movprfx(z20.VnS(), p1.Merging(), z21.VnS()); __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB()); __ movprfx(z8.VnS(), p5.Merging(), z9.VnS()); __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB()); __ movprfx(z29.VnS(), p7.Merging(), z30.VnS()); __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0); __ movprfx(z29.VnS(), p7.Merging(), z30.VnS()); __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z30.VnB()); __ movprfx(z12.VnS(), p1.Merging(), z13.VnS()); __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z13.VnB()); __ movprfx(z20.VnS(), p0.Merging(), z21.VnS()); __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()); __ movprfx(z20.VnS(), p0.Merging(), z21.VnS()); __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()); __ movprfx(z25.VnB(), p7.Merging(), z26.VnB()); __ urecpe(z25.VnS(), p7.Merging(), z2.VnS()); __ movprfx(z29.VnD(), p4.Merging(), z30.VnD()); __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB()); __ movprfx(z15.VnD(), p2.Merging(), z16.VnD()); __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB()); __ movprfx(z27.VnD(), p1.Merging(), z28.VnD()); __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB()); __ movprfx(z31.VnD(), p2.Merging(), z0.VnD()); __ urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1); __ movprfx(z4.VnH(), p3.Merging(), z5.VnH()); __ ursqrte(z4.VnS(), p3.Merging(), z3.VnS()); __ movprfx(z25.VnD(), p4.Merging(), z26.VnD()); __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB()); } assm.FinalizeCode(); CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); } } // namespace aarch64 } // namespace vixl