1b8021494Sopenharmony_ci// Copyright 2017, VIXL authors 2b8021494Sopenharmony_ci// All rights reserved. 3b8021494Sopenharmony_ci// 4b8021494Sopenharmony_ci// Redistribution and use in source and binary forms, with or without 5b8021494Sopenharmony_ci// modification, are permitted provided that the following conditions are met: 6b8021494Sopenharmony_ci// 7b8021494Sopenharmony_ci// * Redistributions of source code must retain the above copyright notice, 8b8021494Sopenharmony_ci// this list of conditions and the following disclaimer. 9b8021494Sopenharmony_ci// * Redistributions in binary form must reproduce the above copyright notice, 10b8021494Sopenharmony_ci// this list of conditions and the following disclaimer in the documentation 11b8021494Sopenharmony_ci// and/or other materials provided with the distribution. 12b8021494Sopenharmony_ci// * Neither the name of ARM Limited nor the names of its contributors may be 13b8021494Sopenharmony_ci// used to endorse or promote products derived from this software without 14b8021494Sopenharmony_ci// specific prior written permission. 15b8021494Sopenharmony_ci// 16b8021494Sopenharmony_ci// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17b8021494Sopenharmony_ci// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18b8021494Sopenharmony_ci// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19b8021494Sopenharmony_ci// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20b8021494Sopenharmony_ci// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21b8021494Sopenharmony_ci// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22b8021494Sopenharmony_ci// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23b8021494Sopenharmony_ci// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24b8021494Sopenharmony_ci// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25b8021494Sopenharmony_ci// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26b8021494Sopenharmony_ci 27b8021494Sopenharmony_ci#include <cstdio> 28b8021494Sopenharmony_ci#include <cstring> 29b8021494Sopenharmony_ci#include <string> 30b8021494Sopenharmony_ci 31b8021494Sopenharmony_ci#include "test-runner.h" 32b8021494Sopenharmony_ci#include "test-utils.h" 33b8021494Sopenharmony_ci 34b8021494Sopenharmony_ci#include "aarch64/assembler-aarch64.h" 35b8021494Sopenharmony_ci#include "aarch64/instructions-aarch64.h" 36b8021494Sopenharmony_ci#include "aarch64/test-utils-aarch64.h" 37b8021494Sopenharmony_ci 38b8021494Sopenharmony_ci#define __ assm. 39b8021494Sopenharmony_ci#define TEST(name) TEST_(AARCH64_API_##name) 40b8021494Sopenharmony_ci 41b8021494Sopenharmony_cinamespace vixl { 42b8021494Sopenharmony_cinamespace aarch64 { 43b8021494Sopenharmony_ci 44b8021494Sopenharmony_ciclass InstructionReporter : public DecoderVisitor { 45b8021494Sopenharmony_ci public: 46b8021494Sopenharmony_ci InstructionReporter() : DecoderVisitor(kNonConstVisitor) {} 47b8021494Sopenharmony_ci 48b8021494Sopenharmony_ci void Visit(Metadata* metadata, const Instruction* instr) VIXL_OVERRIDE { 49b8021494Sopenharmony_ci USE(instr); 50b8021494Sopenharmony_ci instr_form_ = (*metadata)["form"]; 51b8021494Sopenharmony_ci } 52b8021494Sopenharmony_ci 53b8021494Sopenharmony_ci std::string MoveForm() { return instr_form_; } 54b8021494Sopenharmony_ci 55b8021494Sopenharmony_ci private: 56b8021494Sopenharmony_ci std::string instr_form_; 57b8021494Sopenharmony_ci}; 58b8021494Sopenharmony_ci 59b8021494Sopenharmony_cistatic void CheckAndMaybeDisassembleMovprfxPairs(const CodeBuffer* buffer, 60b8021494Sopenharmony_ci bool can_take_movprfx) { 61b8021494Sopenharmony_ci const Instruction* pair = buffer->GetStartAddress<Instruction*>(); 62b8021494Sopenharmony_ci const Instruction* end = buffer->GetEndAddress<Instruction*>(); 63b8021494Sopenharmony_ci bool any_failures = false; 64b8021494Sopenharmony_ci PrintDisassembler print_disasm(stdout); 65b8021494Sopenharmony_ci Decoder decoder; 66b8021494Sopenharmony_ci InstructionReporter reporter; 67b8021494Sopenharmony_ci decoder.AppendVisitor(&reporter); 68b8021494Sopenharmony_ci 69b8021494Sopenharmony_ci while (pair < end) { 70b8021494Sopenharmony_ci const Instruction* movprfx = pair; 71b8021494Sopenharmony_ci const Instruction* candidate = pair->GetNextInstruction(); 72b8021494Sopenharmony_ci const Instruction* next_pair = candidate->GetNextInstruction(); 73b8021494Sopenharmony_ci VIXL_ASSERT(candidate < end); 74b8021494Sopenharmony_ci 75b8021494Sopenharmony_ci Instr inst = candidate->GetInstructionBits(); 76b8021494Sopenharmony_ci decoder.Decode(reinterpret_cast<Instruction*>(&inst)); 77b8021494Sopenharmony_ci std::string form = reporter.MoveForm(); 78b8021494Sopenharmony_ci bool failed = 79b8021494Sopenharmony_ci can_take_movprfx != candidate->CanTakeSVEMovprfx(form.c_str(), movprfx); 80b8021494Sopenharmony_ci any_failures = any_failures || failed; 81b8021494Sopenharmony_ci 82b8021494Sopenharmony_ci if (failed || Test::disassemble()) { 83b8021494Sopenharmony_ci printf("----\n"); 84b8021494Sopenharmony_ci if (failed) { 85b8021494Sopenharmony_ci printf("# ERROR: Expected %sCanTakeSVEMovprfx(movprfx):\n", 86b8021494Sopenharmony_ci can_take_movprfx ? "" : "!"); 87b8021494Sopenharmony_ci } 88b8021494Sopenharmony_ci print_disasm.DisassembleBuffer(pair, next_pair); 89b8021494Sopenharmony_ci } 90b8021494Sopenharmony_ci 91b8021494Sopenharmony_ci pair = next_pair; 92b8021494Sopenharmony_ci } 93b8021494Sopenharmony_ci // Abort only at the end, so we can see the individual failures. 94b8021494Sopenharmony_ci VIXL_CHECK(!any_failures); 95b8021494Sopenharmony_ci} 96b8021494Sopenharmony_ci 97b8021494Sopenharmony_ciTEST(movprfx_negative_aliasing) { 98b8021494Sopenharmony_ci // Test that CanTakeSVEMovprfx() checks that the movprfx destination does not 99b8021494Sopenharmony_ci // alias an input to the prefixed instruction. 100b8021494Sopenharmony_ci Assembler assm; 101b8021494Sopenharmony_ci assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM); 102b8021494Sopenharmony_ci { 103b8021494Sopenharmony_ci // We have to use the Assembler directly to generate movprfx, so we need 104b8021494Sopenharmony_ci // to manually reserve space for the code we're about to emit. 105b8021494Sopenharmony_ci static const size_t kPairCount = 79; 106b8021494Sopenharmony_ci CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 107b8021494Sopenharmony_ci 108b8021494Sopenharmony_ci __ movprfx(z0.VnB(), p0.Merging(), z9.VnB()); 109b8021494Sopenharmony_ci __ abs(z0.VnB(), p0.Merging(), z0.VnB()); 110b8021494Sopenharmony_ci 111b8021494Sopenharmony_ci __ movprfx(z1, z17); 112b8021494Sopenharmony_ci __ add(z1.VnH(), p2.Merging(), z1.VnH(), z1.VnH()); 113b8021494Sopenharmony_ci 114b8021494Sopenharmony_ci __ movprfx(z12, z13); 115b8021494Sopenharmony_ci __ and_(z12.VnD(), p5.Merging(), z12.VnD(), z12.VnD()); 116b8021494Sopenharmony_ci 117b8021494Sopenharmony_ci __ movprfx(z2, z4); 118b8021494Sopenharmony_ci __ asr(z2.VnS(), p2.Merging(), z2.VnS(), z2.VnS()); 119b8021494Sopenharmony_ci 120b8021494Sopenharmony_ci __ movprfx(z10, z18); 121b8021494Sopenharmony_ci __ asr(z10.VnH(), p2.Merging(), z10.VnH(), z10.VnD()); 122b8021494Sopenharmony_ci 123b8021494Sopenharmony_ci __ movprfx(z17.VnD(), p5.Zeroing(), z20.VnD()); 124b8021494Sopenharmony_ci __ asr(z17.VnD(), p5.Merging(), z17.VnD(), z17.VnD()); 125b8021494Sopenharmony_ci 126b8021494Sopenharmony_ci __ movprfx(z22, z9); 127b8021494Sopenharmony_ci __ asrr(z22.VnH(), p1.Merging(), z22.VnH(), z22.VnH()); 128b8021494Sopenharmony_ci 129b8021494Sopenharmony_ci __ movprfx(z0.VnS(), p6.Zeroing(), z6.VnS()); 130b8021494Sopenharmony_ci __ bic(z0.VnS(), p6.Merging(), z0.VnS(), z0.VnS()); 131b8021494Sopenharmony_ci 132b8021494Sopenharmony_ci __ movprfx(z12, z16); 133b8021494Sopenharmony_ci __ clasta(z12.VnD(), p5, z12.VnD(), z12.VnD()); 134b8021494Sopenharmony_ci 135b8021494Sopenharmony_ci __ movprfx(z7, z15); 136b8021494Sopenharmony_ci __ clastb(z7.VnS(), p7, z7.VnS(), z7.VnS()); 137b8021494Sopenharmony_ci 138b8021494Sopenharmony_ci __ movprfx(z10, z29); 139b8021494Sopenharmony_ci __ cls(z10.VnH(), p2.Merging(), z10.VnH()); 140b8021494Sopenharmony_ci 141b8021494Sopenharmony_ci __ movprfx(z6, z13); 142b8021494Sopenharmony_ci __ clz(z6.VnB(), p4.Merging(), z6.VnB()); 143b8021494Sopenharmony_ci 144b8021494Sopenharmony_ci __ movprfx(z14.VnS(), p6.Zeroing(), z3.VnS()); 145b8021494Sopenharmony_ci __ cnot(z14.VnS(), p6.Merging(), z14.VnS()); 146b8021494Sopenharmony_ci 147b8021494Sopenharmony_ci __ movprfx(z5.VnD(), p6.Merging(), z4.VnD()); 148b8021494Sopenharmony_ci __ cnt(z5.VnD(), p6.Merging(), z5.VnD()); 149b8021494Sopenharmony_ci 150b8021494Sopenharmony_ci __ movprfx(z19.VnB(), p6.Zeroing(), z4.VnB()); 151b8021494Sopenharmony_ci __ eor(z19.VnB(), p6.Merging(), z19.VnB(), z19.VnB()); 152b8021494Sopenharmony_ci 153b8021494Sopenharmony_ci __ movprfx(z27, z2); 154b8021494Sopenharmony_ci __ ext(z27.VnB(), z27.VnB(), z27.VnB(), 42); 155b8021494Sopenharmony_ci 156b8021494Sopenharmony_ci __ movprfx(z4.VnS(), p1.Zeroing(), z22.VnS()); 157b8021494Sopenharmony_ci __ lsl(z4.VnS(), p1.Merging(), z4.VnS(), z4.VnS()); 158b8021494Sopenharmony_ci 159b8021494Sopenharmony_ci __ movprfx(z4, z5); 160b8021494Sopenharmony_ci __ lsl(z4.VnB(), p5.Merging(), z4.VnB(), z4.VnD()); 161b8021494Sopenharmony_ci 162b8021494Sopenharmony_ci __ movprfx(z11.VnD(), p4.Merging(), z29.VnD()); 163b8021494Sopenharmony_ci __ lsl(z11.VnD(), p4.Merging(), z11.VnD(), z11.VnD()); 164b8021494Sopenharmony_ci 165b8021494Sopenharmony_ci __ movprfx(z12.VnD(), p6.Merging(), z3.VnD()); 166b8021494Sopenharmony_ci __ lslr(z12.VnD(), p6.Merging(), z12.VnD(), z12.VnD()); 167b8021494Sopenharmony_ci 168b8021494Sopenharmony_ci __ movprfx(z7, z2); 169b8021494Sopenharmony_ci __ lsr(z7.VnB(), p4.Merging(), z7.VnB(), z7.VnB()); 170b8021494Sopenharmony_ci 171b8021494Sopenharmony_ci __ movprfx(z25.VnH(), p6.Merging(), z28.VnH()); 172b8021494Sopenharmony_ci __ lsr(z25.VnH(), p6.Merging(), z25.VnH(), z25.VnD()); 173b8021494Sopenharmony_ci 174b8021494Sopenharmony_ci __ movprfx(z14.VnD(), p6.Merging(), z6.VnD()); 175b8021494Sopenharmony_ci __ lsr(z14.VnD(), p6.Merging(), z14.VnD(), z14.VnD()); 176b8021494Sopenharmony_ci 177b8021494Sopenharmony_ci __ movprfx(z26.VnH(), p6.Zeroing(), z27.VnH()); 178b8021494Sopenharmony_ci __ lsrr(z26.VnH(), p6.Merging(), z26.VnH(), z26.VnH()); 179b8021494Sopenharmony_ci 180b8021494Sopenharmony_ci __ movprfx(z17.VnS(), p4.Zeroing(), z29.VnS()); 181b8021494Sopenharmony_ci __ mad(z17.VnS(), p4.Merging(), z17.VnS(), z23.VnS()); 182b8021494Sopenharmony_ci 183b8021494Sopenharmony_ci __ movprfx(z7, z17); 184b8021494Sopenharmony_ci __ mad(z7.VnD(), p5.Merging(), z4.VnD(), z7.VnD()); 185b8021494Sopenharmony_ci 186b8021494Sopenharmony_ci __ movprfx(z11, z7); 187b8021494Sopenharmony_ci __ mla(z11.VnS(), p1.Merging(), z11.VnS(), z27.VnS()); 188b8021494Sopenharmony_ci 189b8021494Sopenharmony_ci __ movprfx(z7, z5); 190b8021494Sopenharmony_ci __ mla(z7.VnH(), p0.Merging(), z5.VnH(), z7.VnH()); 191b8021494Sopenharmony_ci 192b8021494Sopenharmony_ci __ movprfx(z1.VnH(), p0.Merging(), z17.VnH()); 193b8021494Sopenharmony_ci __ mls(z1.VnH(), p0.Merging(), z1.VnH(), z31.VnH()); 194b8021494Sopenharmony_ci 195b8021494Sopenharmony_ci __ movprfx(z22.VnB(), p3.Merging(), z18.VnB()); 196b8021494Sopenharmony_ci __ mls(z22.VnB(), p3.Merging(), z18.VnB(), z22.VnB()); 197b8021494Sopenharmony_ci 198b8021494Sopenharmony_ci __ movprfx(z7.VnS(), p0.Merging(), z10.VnS()); 199b8021494Sopenharmony_ci __ msb(z7.VnS(), p0.Merging(), z7.VnS(), z10.VnS()); 200b8021494Sopenharmony_ci 201b8021494Sopenharmony_ci __ movprfx(z12, z6); 202b8021494Sopenharmony_ci __ msb(z12.VnH(), p7.Merging(), z6.VnH(), z12.VnH()); 203b8021494Sopenharmony_ci 204b8021494Sopenharmony_ci __ movprfx(z8.VnB(), p4.Merging(), z3.VnB()); 205b8021494Sopenharmony_ci __ mul(z8.VnB(), p4.Merging(), z8.VnB(), z8.VnB()); 206b8021494Sopenharmony_ci 207b8021494Sopenharmony_ci __ movprfx(z9, z26); 208b8021494Sopenharmony_ci __ neg(z9.VnS(), p7.Merging(), z9.VnS()); 209b8021494Sopenharmony_ci 210b8021494Sopenharmony_ci __ movprfx(z16, z8); 211b8021494Sopenharmony_ci __ not_(z16.VnH(), p6.Merging(), z16.VnH()); 212b8021494Sopenharmony_ci 213b8021494Sopenharmony_ci __ movprfx(z25.VnH(), p5.Zeroing(), z11.VnH()); 214b8021494Sopenharmony_ci __ orr(z25.VnH(), p5.Merging(), z25.VnH(), z25.VnH()); 215b8021494Sopenharmony_ci 216b8021494Sopenharmony_ci __ movprfx(z17.VnH(), p1.Merging(), z22.VnH()); 217b8021494Sopenharmony_ci __ rbit(z17.VnH(), p1.Merging(), z17.VnH()); 218b8021494Sopenharmony_ci 219b8021494Sopenharmony_ci __ movprfx(z11, z25); 220b8021494Sopenharmony_ci __ revb(z11.VnD(), p6.Merging(), z11.VnD()); 221b8021494Sopenharmony_ci 222b8021494Sopenharmony_ci __ movprfx(z13, z27); 223b8021494Sopenharmony_ci __ revh(z13.VnS(), p2.Merging(), z13.VnS()); 224b8021494Sopenharmony_ci 225b8021494Sopenharmony_ci __ movprfx(z30.VnD(), p6.Merging(), z20.VnD()); 226b8021494Sopenharmony_ci __ revw(z30.VnD(), p6.Merging(), z30.VnD()); 227b8021494Sopenharmony_ci 228b8021494Sopenharmony_ci __ movprfx(z2.VnD(), p2.Merging(), z21.VnD()); 229b8021494Sopenharmony_ci __ sabd(z2.VnD(), p2.Merging(), z2.VnD(), z2.VnD()); 230b8021494Sopenharmony_ci 231b8021494Sopenharmony_ci __ movprfx(z0, z7); 232b8021494Sopenharmony_ci __ sdiv(z0.VnD(), p0.Merging(), z0.VnD(), z0.VnD()); 233b8021494Sopenharmony_ci 234b8021494Sopenharmony_ci __ movprfx(z19, z28); 235b8021494Sopenharmony_ci __ sdivr(z19.VnS(), p1.Merging(), z19.VnS(), z19.VnS()); 236b8021494Sopenharmony_ci 237b8021494Sopenharmony_ci __ movprfx(z5, z18); 238b8021494Sopenharmony_ci __ sdot(z5.VnS(), z18.VnB(), z5.VnB(), 1); 239b8021494Sopenharmony_ci 240b8021494Sopenharmony_ci __ movprfx(z15, z11); 241b8021494Sopenharmony_ci __ sdot(z15.VnD(), z2.VnH(), z15.VnH(), 1); 242b8021494Sopenharmony_ci 243b8021494Sopenharmony_ci __ movprfx(z30, z13); 244b8021494Sopenharmony_ci __ sdot(z30.VnD(), z30.VnH(), z13.VnH(), 1); 245b8021494Sopenharmony_ci 246b8021494Sopenharmony_ci __ movprfx(z8, z9); 247b8021494Sopenharmony_ci __ sdot(z8.VnS(), z8.VnB(), z9.VnB()); 248b8021494Sopenharmony_ci 249b8021494Sopenharmony_ci __ movprfx(z23, z14); 250b8021494Sopenharmony_ci __ sdot(z23.VnS(), z14.VnB(), z23.VnB()); 251b8021494Sopenharmony_ci 252b8021494Sopenharmony_ci __ movprfx(z26, z5); 253b8021494Sopenharmony_ci __ sdot(z26.VnS(), z26.VnB(), z5.VnB(), 1); 254b8021494Sopenharmony_ci 255b8021494Sopenharmony_ci __ movprfx(z14, z15); 256b8021494Sopenharmony_ci __ smax(z14.VnB(), p2.Merging(), z14.VnB(), z14.VnB()); 257b8021494Sopenharmony_ci 258b8021494Sopenharmony_ci __ movprfx(z26.VnS(), p0.Merging(), z10.VnS()); 259b8021494Sopenharmony_ci __ smin(z26.VnS(), p0.Merging(), z26.VnS(), z26.VnS()); 260b8021494Sopenharmony_ci 261b8021494Sopenharmony_ci __ movprfx(z22, z18); 262b8021494Sopenharmony_ci __ smulh(z22.VnB(), p2.Merging(), z22.VnB(), z22.VnB()); 263b8021494Sopenharmony_ci 264b8021494Sopenharmony_ci __ movprfx(z8, z19); 265b8021494Sopenharmony_ci __ splice(z8.VnD(), p2, z8.VnD(), z8.VnD()); 266b8021494Sopenharmony_ci 267b8021494Sopenharmony_ci __ movprfx(z23.VnH(), p6.Zeroing(), z2.VnH()); 268b8021494Sopenharmony_ci __ sub(z23.VnH(), p6.Merging(), z23.VnH(), z23.VnH()); 269b8021494Sopenharmony_ci 270b8021494Sopenharmony_ci __ movprfx(z25.VnS(), p2.Merging(), z21.VnS()); 271b8021494Sopenharmony_ci __ subr(z25.VnS(), p2.Merging(), z25.VnS(), z25.VnS()); 272b8021494Sopenharmony_ci 273b8021494Sopenharmony_ci __ movprfx(z28, z31); 274b8021494Sopenharmony_ci __ sxtb(z28.VnS(), p6.Merging(), z28.VnS()); 275b8021494Sopenharmony_ci 276b8021494Sopenharmony_ci __ movprfx(z14.VnD(), p6.Merging(), z17.VnD()); 277b8021494Sopenharmony_ci __ sxth(z14.VnD(), p6.Merging(), z14.VnD()); 278b8021494Sopenharmony_ci 279b8021494Sopenharmony_ci __ movprfx(z21.VnD(), p0.Zeroing(), z28.VnD()); 280b8021494Sopenharmony_ci __ sxtw(z21.VnD(), p0.Merging(), z21.VnD()); 281b8021494Sopenharmony_ci 282b8021494Sopenharmony_ci __ movprfx(z25, z30); 283b8021494Sopenharmony_ci __ uabd(z25.VnB(), p5.Merging(), z25.VnB(), z25.VnB()); 284b8021494Sopenharmony_ci 285b8021494Sopenharmony_ci __ movprfx(z13.VnD(), p2.Merging(), z30.VnD()); 286b8021494Sopenharmony_ci __ udiv(z13.VnD(), p2.Merging(), z13.VnD(), z13.VnD()); 287b8021494Sopenharmony_ci 288b8021494Sopenharmony_ci __ movprfx(z19.VnD(), p4.Zeroing(), z6.VnD()); 289b8021494Sopenharmony_ci __ udivr(z19.VnD(), p4.Merging(), z19.VnD(), z19.VnD()); 290b8021494Sopenharmony_ci 291b8021494Sopenharmony_ci __ movprfx(z1, z20); 292b8021494Sopenharmony_ci __ udot(z1.VnS(), z18.VnB(), z1.VnB(), 1); 293b8021494Sopenharmony_ci 294b8021494Sopenharmony_ci __ movprfx(z8, z2); 295b8021494Sopenharmony_ci __ udot(z8.VnD(), z2.VnH(), z8.VnH(), 1); 296b8021494Sopenharmony_ci 297b8021494Sopenharmony_ci __ movprfx(z28, z10); 298b8021494Sopenharmony_ci __ udot(z28.VnD(), z28.VnH(), z7.VnH(), 1); 299b8021494Sopenharmony_ci 300b8021494Sopenharmony_ci __ movprfx(z21, z11); 301b8021494Sopenharmony_ci __ udot(z21.VnD(), z21.VnH(), z11.VnH()); 302b8021494Sopenharmony_ci 303b8021494Sopenharmony_ci __ movprfx(z1, z22); 304b8021494Sopenharmony_ci __ udot(z1.VnD(), z10.VnH(), z1.VnH()); 305b8021494Sopenharmony_ci 306b8021494Sopenharmony_ci __ movprfx(z8, z23); 307b8021494Sopenharmony_ci __ udot(z8.VnS(), z8.VnB(), z0.VnB(), 1); 308b8021494Sopenharmony_ci 309b8021494Sopenharmony_ci __ movprfx(z10.VnB(), p5.Zeroing(), z0.VnB()); 310b8021494Sopenharmony_ci __ umax(z10.VnB(), p5.Merging(), z10.VnB(), z10.VnB()); 311b8021494Sopenharmony_ci 312b8021494Sopenharmony_ci __ movprfx(z0.VnS(), p2.Zeroing(), z30.VnS()); 313b8021494Sopenharmony_ci __ umin(z0.VnS(), p2.Merging(), z0.VnS(), z0.VnS()); 314b8021494Sopenharmony_ci 315b8021494Sopenharmony_ci __ movprfx(z26.VnD(), p6.Zeroing(), z29.VnD()); 316b8021494Sopenharmony_ci __ umulh(z26.VnD(), p6.Merging(), z26.VnD(), z26.VnD()); 317b8021494Sopenharmony_ci 318b8021494Sopenharmony_ci __ movprfx(z23, z25); 319b8021494Sopenharmony_ci __ uxtb(z23.VnS(), p7.Merging(), z23.VnS()); 320b8021494Sopenharmony_ci 321b8021494Sopenharmony_ci __ movprfx(z14.VnS(), p3.Zeroing(), z5.VnS()); 322b8021494Sopenharmony_ci __ uxth(z14.VnS(), p3.Merging(), z14.VnS()); 323b8021494Sopenharmony_ci 324b8021494Sopenharmony_ci __ movprfx(z14, z5); 325b8021494Sopenharmony_ci __ uxtw(z14.VnD(), p3.Merging(), z14.VnD()); 326b8021494Sopenharmony_ci 327b8021494Sopenharmony_ci __ movprfx(z22, z5); 328b8021494Sopenharmony_ci __ smmla(z22.VnS(), z22.VnB(), z0.VnB()); 329b8021494Sopenharmony_ci 330b8021494Sopenharmony_ci __ movprfx(z1, z5); 331b8021494Sopenharmony_ci __ ummla(z1.VnS(), z10.VnB(), z1.VnB()); 332b8021494Sopenharmony_ci 333b8021494Sopenharmony_ci __ movprfx(z30, z5); 334b8021494Sopenharmony_ci __ usmmla(z30.VnS(), z30.VnB(), z18.VnB()); 335b8021494Sopenharmony_ci 336b8021494Sopenharmony_ci __ movprfx(z4, z5); 337b8021494Sopenharmony_ci __ usdot(z4.VnS(), z3.VnB(), z4.VnB()); 338b8021494Sopenharmony_ci 339b8021494Sopenharmony_ci __ movprfx(z10, z5); 340b8021494Sopenharmony_ci __ usdot(z10.VnS(), z10.VnB(), z0.VnB(), 0); 341b8021494Sopenharmony_ci 342b8021494Sopenharmony_ci __ movprfx(z1, z5); 343b8021494Sopenharmony_ci __ sudot(z1.VnS(), z10.VnB(), z1.VnB(), 1); 344b8021494Sopenharmony_ci } 345b8021494Sopenharmony_ci assm.FinalizeCode(); 346b8021494Sopenharmony_ci 347b8021494Sopenharmony_ci CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 348b8021494Sopenharmony_ci} 349b8021494Sopenharmony_ci 350b8021494Sopenharmony_ciTEST(movprfx_negative_aliasing_fp) { 351b8021494Sopenharmony_ci // Test that CanTakeSVEMovprfx() checks that the movprfx destination does not 352b8021494Sopenharmony_ci // alias an input to the prefixed instruction. 353b8021494Sopenharmony_ci Assembler assm; 354b8021494Sopenharmony_ci assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, 355b8021494Sopenharmony_ci CPUFeatures::kSVEF32MM, 356b8021494Sopenharmony_ci CPUFeatures::kSVEF64MM); 357b8021494Sopenharmony_ci { 358b8021494Sopenharmony_ci // We have to use the Assembler directly to generate movprfx, so we need 359b8021494Sopenharmony_ci // to manually reserve space for the code we're about to emit. 360b8021494Sopenharmony_ci static const size_t kPairCount = 80; 361b8021494Sopenharmony_ci CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 362b8021494Sopenharmony_ci 363b8021494Sopenharmony_ci __ movprfx(z17.VnS(), p1.Zeroing(), z12.VnS()); 364b8021494Sopenharmony_ci __ fabd(z17.VnS(), p1.Merging(), z17.VnS(), z17.VnS()); 365b8021494Sopenharmony_ci 366b8021494Sopenharmony_ci __ movprfx(z13, z23); 367b8021494Sopenharmony_ci __ fabs(z13.VnS(), p4.Merging(), z13.VnS()); 368b8021494Sopenharmony_ci 369b8021494Sopenharmony_ci __ movprfx(z24.VnS(), p5.Merging(), z15.VnS()); 370b8021494Sopenharmony_ci __ fadd(z24.VnS(), p5.Merging(), z24.VnS(), z24.VnS()); 371b8021494Sopenharmony_ci 372b8021494Sopenharmony_ci __ movprfx(z28.VnD(), p5.Zeroing(), z14.VnD()); 373b8021494Sopenharmony_ci __ fcadd(z28.VnD(), p5.Merging(), z28.VnD(), z28.VnD(), 90); 374b8021494Sopenharmony_ci 375b8021494Sopenharmony_ci __ movprfx(z5, z0); 376b8021494Sopenharmony_ci __ fcmla(z5.VnH(), z0.VnH(), z5.VnH(), 2, 180); 377b8021494Sopenharmony_ci 378b8021494Sopenharmony_ci __ movprfx(z10, z4); 379b8021494Sopenharmony_ci __ fcmla(z10.VnS(), z8.VnS(), z10.VnS(), 1, 270); 380b8021494Sopenharmony_ci 381b8021494Sopenharmony_ci __ movprfx(z12, z26); 382b8021494Sopenharmony_ci __ fcmla(z12.VnH(), z12.VnH(), z3.VnH(), 2, 180); 383b8021494Sopenharmony_ci 384b8021494Sopenharmony_ci __ movprfx(z8, z1); 385b8021494Sopenharmony_ci __ fcmla(z8.VnS(), z8.VnS(), z1.VnS(), 1, 270); 386b8021494Sopenharmony_ci 387b8021494Sopenharmony_ci __ movprfx(z16.VnD(), p0.Merging(), z13.VnD()); 388b8021494Sopenharmony_ci __ fcvt(z16.VnD(), p0.Merging(), z16.VnH()); 389b8021494Sopenharmony_ci 390b8021494Sopenharmony_ci __ movprfx(z12.VnD(), p7.Zeroing(), z13.VnD()); 391b8021494Sopenharmony_ci __ fcvt(z12.VnD(), p7.Merging(), z12.VnS()); 392b8021494Sopenharmony_ci 393b8021494Sopenharmony_ci __ movprfx(z14, z26); 394b8021494Sopenharmony_ci __ fcvt(z14.VnS(), p5.Merging(), z14.VnD()); 395b8021494Sopenharmony_ci 396b8021494Sopenharmony_ci __ movprfx(z26, z2); 397b8021494Sopenharmony_ci __ fcvt(z26.VnH(), p7.Merging(), z26.VnD()); 398b8021494Sopenharmony_ci 399b8021494Sopenharmony_ci __ movprfx(z25.VnD(), p2.Merging(), z13.VnD()); 400b8021494Sopenharmony_ci __ fcvtzs(z25.VnD(), p2.Merging(), z25.VnH()); 401b8021494Sopenharmony_ci 402b8021494Sopenharmony_ci __ movprfx(z31, z2); 403b8021494Sopenharmony_ci __ fcvtzs(z31.VnH(), p7.Merging(), z31.VnH()); 404b8021494Sopenharmony_ci 405b8021494Sopenharmony_ci __ movprfx(z21.VnD(), p1.Merging(), z7.VnD()); 406b8021494Sopenharmony_ci __ fcvtzs(z21.VnD(), p1.Merging(), z21.VnS()); 407b8021494Sopenharmony_ci 408b8021494Sopenharmony_ci __ movprfx(z5, z17); 409b8021494Sopenharmony_ci __ fcvtzs(z5.VnS(), p5.Merging(), z5.VnD()); 410b8021494Sopenharmony_ci 411b8021494Sopenharmony_ci __ movprfx(z19.VnD(), p1.Zeroing(), z16.VnD()); 412b8021494Sopenharmony_ci __ fcvtzu(z19.VnD(), p1.Merging(), z19.VnH()); 413b8021494Sopenharmony_ci 414b8021494Sopenharmony_ci __ movprfx(z2.VnH(), p7.Zeroing(), z28.VnH()); 415b8021494Sopenharmony_ci __ fcvtzu(z2.VnH(), p7.Merging(), z2.VnH()); 416b8021494Sopenharmony_ci 417b8021494Sopenharmony_ci __ movprfx(z21.VnD(), p7.Zeroing(), z27.VnD()); 418b8021494Sopenharmony_ci __ fcvtzu(z21.VnD(), p7.Merging(), z21.VnS()); 419b8021494Sopenharmony_ci 420b8021494Sopenharmony_ci __ movprfx(z22.VnD(), p4.Zeroing(), z8.VnD()); 421b8021494Sopenharmony_ci __ fcvtzu(z22.VnS(), p4.Merging(), z22.VnD()); 422b8021494Sopenharmony_ci 423b8021494Sopenharmony_ci __ movprfx(z0.VnS(), p5.Merging(), z5.VnS()); 424b8021494Sopenharmony_ci __ fdiv(z0.VnS(), p5.Merging(), z0.VnS(), z0.VnS()); 425b8021494Sopenharmony_ci 426b8021494Sopenharmony_ci __ movprfx(z12, z24); 427b8021494Sopenharmony_ci __ fdivr(z12.VnD(), p7.Merging(), z12.VnD(), z12.VnD()); 428b8021494Sopenharmony_ci 429b8021494Sopenharmony_ci __ movprfx(z14.VnD(), p6.Zeroing(), z21.VnD()); 430b8021494Sopenharmony_ci __ fmad(z14.VnD(), p6.Merging(), z14.VnD(), z3.VnD()); 431b8021494Sopenharmony_ci 432b8021494Sopenharmony_ci __ movprfx(z2.VnS(), p5.Zeroing(), z10.VnS()); 433b8021494Sopenharmony_ci __ fmad(z2.VnS(), p5.Merging(), z14.VnS(), z2.VnS()); 434b8021494Sopenharmony_ci 435b8021494Sopenharmony_ci __ movprfx(z24, z5); 436b8021494Sopenharmony_ci __ fmax(z24.VnS(), p1.Merging(), z24.VnS(), z24.VnS()); 437b8021494Sopenharmony_ci 438b8021494Sopenharmony_ci __ movprfx(z15.VnD(), p2.Merging(), z26.VnD()); 439b8021494Sopenharmony_ci __ fmaxnm(z15.VnD(), p2.Merging(), z15.VnD(), z15.VnD()); 440b8021494Sopenharmony_ci 441b8021494Sopenharmony_ci __ movprfx(z20, z22); 442b8021494Sopenharmony_ci __ fmin(z20.VnH(), p0.Merging(), z20.VnH(), z20.VnH()); 443b8021494Sopenharmony_ci 444b8021494Sopenharmony_ci __ movprfx(z24.VnS(), p6.Zeroing(), z30.VnS()); 445b8021494Sopenharmony_ci __ fminnm(z24.VnS(), p6.Merging(), z24.VnS(), z24.VnS()); 446b8021494Sopenharmony_ci 447b8021494Sopenharmony_ci __ movprfx(z4, z24); 448b8021494Sopenharmony_ci __ fmla(z4.VnH(), z24.VnH(), z4.VnH(), 7); 449b8021494Sopenharmony_ci 450b8021494Sopenharmony_ci __ movprfx(z4, z7); 451b8021494Sopenharmony_ci __ fmla(z4.VnS(), z24.VnS(), z4.VnS(), 3); 452b8021494Sopenharmony_ci 453b8021494Sopenharmony_ci __ movprfx(z5, z28); 454b8021494Sopenharmony_ci __ fmla(z5.VnD(), z28.VnD(), z5.VnD(), 1); 455b8021494Sopenharmony_ci 456b8021494Sopenharmony_ci __ movprfx(z24, z2); 457b8021494Sopenharmony_ci __ fmla(z24.VnD(), z24.VnD(), z2.VnD(), 1); 458b8021494Sopenharmony_ci 459b8021494Sopenharmony_ci __ movprfx(z7, z21); 460b8021494Sopenharmony_ci __ fmla(z7.VnH(), p2.Merging(), z7.VnH(), z31.VnH()); 461b8021494Sopenharmony_ci 462b8021494Sopenharmony_ci __ movprfx(z25.VnH(), p5.Zeroing(), z29.VnH()); 463b8021494Sopenharmony_ci __ fmla(z25.VnH(), p5.Merging(), z29.VnH(), z25.VnH()); 464b8021494Sopenharmony_ci 465b8021494Sopenharmony_ci __ movprfx(z31, z25); 466b8021494Sopenharmony_ci __ fmla(z31.VnH(), z31.VnH(), z2.VnH(), 7); 467b8021494Sopenharmony_ci 468b8021494Sopenharmony_ci __ movprfx(z15, z4); 469b8021494Sopenharmony_ci __ fmla(z15.VnS(), z15.VnS(), z4.VnS(), 3); 470b8021494Sopenharmony_ci 471b8021494Sopenharmony_ci __ movprfx(z7, z11); 472b8021494Sopenharmony_ci __ fmls(z7.VnH(), z11.VnH(), z7.VnH(), 4); 473b8021494Sopenharmony_ci 474b8021494Sopenharmony_ci __ movprfx(z3, z10); 475b8021494Sopenharmony_ci __ fmls(z3.VnS(), z10.VnS(), z3.VnS(), 3); 476b8021494Sopenharmony_ci 477b8021494Sopenharmony_ci __ movprfx(z5, z16); 478b8021494Sopenharmony_ci __ fmls(z5.VnD(), z16.VnD(), z5.VnD(), 1); 479b8021494Sopenharmony_ci 480b8021494Sopenharmony_ci __ movprfx(z31, z26); 481b8021494Sopenharmony_ci __ fmls(z31.VnD(), z31.VnD(), z8.VnD(), 1); 482b8021494Sopenharmony_ci 483b8021494Sopenharmony_ci __ movprfx(z5.VnH(), p3.Merging(), z2.VnH()); 484b8021494Sopenharmony_ci __ fmls(z5.VnH(), p3.Merging(), z5.VnH(), z2.VnH()); 485b8021494Sopenharmony_ci 486b8021494Sopenharmony_ci __ movprfx(z22.VnS(), p3.Zeroing(), z17.VnS()); 487b8021494Sopenharmony_ci __ fmls(z22.VnS(), p3.Merging(), z21.VnS(), z22.VnS()); 488b8021494Sopenharmony_ci 489b8021494Sopenharmony_ci __ movprfx(z17, z2); 490b8021494Sopenharmony_ci __ fmls(z17.VnH(), z17.VnH(), z2.VnH(), 4); 491b8021494Sopenharmony_ci 492b8021494Sopenharmony_ci __ movprfx(z28, z11); 493b8021494Sopenharmony_ci __ fmls(z28.VnS(), z28.VnS(), z0.VnS(), 3); 494b8021494Sopenharmony_ci 495b8021494Sopenharmony_ci __ movprfx(z15.VnD(), p1.Merging(), z31.VnD()); 496b8021494Sopenharmony_ci __ fmsb(z15.VnD(), p1.Merging(), z15.VnD(), z31.VnD()); 497b8021494Sopenharmony_ci 498b8021494Sopenharmony_ci __ movprfx(z21.VnD(), p0.Zeroing(), z5.VnD()); 499b8021494Sopenharmony_ci __ fmsb(z21.VnD(), p0.Merging(), z19.VnD(), z21.VnD()); 500b8021494Sopenharmony_ci 501b8021494Sopenharmony_ci __ movprfx(z0.VnH(), p3.Merging(), z31.VnH()); 502b8021494Sopenharmony_ci __ fmul(z0.VnH(), p3.Merging(), z0.VnH(), z0.VnH()); 503b8021494Sopenharmony_ci 504b8021494Sopenharmony_ci __ movprfx(z31.VnH(), p6.Merging(), z8.VnH()); 505b8021494Sopenharmony_ci __ fmulx(z31.VnH(), p6.Merging(), z31.VnH(), z31.VnH()); 506b8021494Sopenharmony_ci 507b8021494Sopenharmony_ci __ movprfx(z17.VnH(), p1.Zeroing(), z10.VnH()); 508b8021494Sopenharmony_ci __ fneg(z17.VnH(), p1.Merging(), z17.VnH()); 509b8021494Sopenharmony_ci 510b8021494Sopenharmony_ci __ movprfx(z22, z31); 511b8021494Sopenharmony_ci __ fnmad(z22.VnH(), p1.Merging(), z22.VnH(), z23.VnH()); 512b8021494Sopenharmony_ci 513b8021494Sopenharmony_ci __ movprfx(z14.VnD(), p0.Zeroing(), z26.VnD()); 514b8021494Sopenharmony_ci __ fnmad(z14.VnD(), p0.Merging(), z2.VnD(), z14.VnD()); 515b8021494Sopenharmony_ci 516b8021494Sopenharmony_ci __ movprfx(z13.VnH(), p6.Zeroing(), z29.VnH()); 517b8021494Sopenharmony_ci __ fnmla(z13.VnH(), p6.Merging(), z13.VnH(), z26.VnH()); 518b8021494Sopenharmony_ci 519b8021494Sopenharmony_ci __ movprfx(z19.VnH(), p7.Zeroing(), z25.VnH()); 520b8021494Sopenharmony_ci __ fnmla(z19.VnH(), p7.Merging(), z25.VnH(), z19.VnH()); 521b8021494Sopenharmony_ci 522b8021494Sopenharmony_ci __ movprfx(z27.VnH(), p5.Merging(), z24.VnH()); 523b8021494Sopenharmony_ci __ fnmls(z27.VnH(), p5.Merging(), z27.VnH(), z24.VnH()); 524b8021494Sopenharmony_ci 525b8021494Sopenharmony_ci __ movprfx(z6.VnH(), p6.Zeroing(), z21.VnH()); 526b8021494Sopenharmony_ci __ fnmls(z6.VnH(), p6.Merging(), z21.VnH(), z6.VnH()); 527b8021494Sopenharmony_ci 528b8021494Sopenharmony_ci __ movprfx(z7.VnS(), p3.Merging(), z23.VnS()); 529b8021494Sopenharmony_ci __ fnmsb(z7.VnS(), p3.Merging(), z7.VnS(), z23.VnS()); 530b8021494Sopenharmony_ci 531b8021494Sopenharmony_ci __ movprfx(z29.VnH(), p2.Zeroing(), z24.VnH()); 532b8021494Sopenharmony_ci __ fnmsb(z29.VnH(), p2.Merging(), z24.VnH(), z29.VnH()); 533b8021494Sopenharmony_ci 534b8021494Sopenharmony_ci __ movprfx(z7.VnH(), p6.Merging(), z23.VnH()); 535b8021494Sopenharmony_ci __ frecpx(z7.VnH(), p6.Merging(), z7.VnH()); 536b8021494Sopenharmony_ci 537b8021494Sopenharmony_ci __ movprfx(z17.VnS(), p5.Zeroing(), z2.VnS()); 538b8021494Sopenharmony_ci __ frinta(z17.VnS(), p5.Merging(), z17.VnS()); 539b8021494Sopenharmony_ci 540b8021494Sopenharmony_ci __ movprfx(z0.VnS(), p2.Zeroing(), z7.VnS()); 541b8021494Sopenharmony_ci __ frinti(z0.VnS(), p2.Merging(), z0.VnS()); 542b8021494Sopenharmony_ci 543b8021494Sopenharmony_ci __ movprfx(z8.VnH(), p3.Merging(), z20.VnH()); 544b8021494Sopenharmony_ci __ frintm(z8.VnH(), p3.Merging(), z8.VnH()); 545b8021494Sopenharmony_ci 546b8021494Sopenharmony_ci __ movprfx(z3.VnD(), p2.Zeroing(), z20.VnD()); 547b8021494Sopenharmony_ci __ frintn(z3.VnD(), p2.Merging(), z3.VnD()); 548b8021494Sopenharmony_ci 549b8021494Sopenharmony_ci __ movprfx(z11, z3); 550b8021494Sopenharmony_ci __ frintp(z11.VnS(), p4.Merging(), z11.VnS()); 551b8021494Sopenharmony_ci 552b8021494Sopenharmony_ci __ movprfx(z23, z29); 553b8021494Sopenharmony_ci __ frintx(z23.VnD(), p4.Merging(), z23.VnD()); 554b8021494Sopenharmony_ci 555b8021494Sopenharmony_ci __ movprfx(z4.VnH(), p4.Zeroing(), z14.VnH()); 556b8021494Sopenharmony_ci __ frintz(z4.VnH(), p4.Merging(), z4.VnH()); 557b8021494Sopenharmony_ci 558b8021494Sopenharmony_ci __ movprfx(z18.VnH(), p3.Zeroing(), z0.VnH()); 559b8021494Sopenharmony_ci __ fscale(z18.VnH(), p3.Merging(), z18.VnH(), z18.VnH()); 560b8021494Sopenharmony_ci 561b8021494Sopenharmony_ci __ movprfx(z2.VnS(), p6.Zeroing(), z4.VnS()); 562b8021494Sopenharmony_ci __ fsqrt(z2.VnS(), p6.Merging(), z2.VnS()); 563b8021494Sopenharmony_ci 564b8021494Sopenharmony_ci __ movprfx(z14.VnD(), p4.Zeroing(), z31.VnD()); 565b8021494Sopenharmony_ci __ fsub(z14.VnD(), p4.Merging(), z14.VnD(), z14.VnD()); 566b8021494Sopenharmony_ci 567b8021494Sopenharmony_ci __ movprfx(z31.VnH(), p2.Merging(), z6.VnH()); 568b8021494Sopenharmony_ci __ fsubr(z31.VnH(), p2.Merging(), z31.VnH(), z31.VnH()); 569b8021494Sopenharmony_ci 570b8021494Sopenharmony_ci __ movprfx(z4, z30); 571b8021494Sopenharmony_ci __ ftmad(z4.VnH(), z4.VnH(), z4.VnH(), 2); 572b8021494Sopenharmony_ci 573b8021494Sopenharmony_ci __ movprfx(z25.VnD(), p6.Zeroing(), z2.VnD()); 574b8021494Sopenharmony_ci __ scvtf(z25.VnD(), p6.Merging(), z25.VnS()); 575b8021494Sopenharmony_ci 576b8021494Sopenharmony_ci __ movprfx(z0.VnD(), p3.Merging(), z16.VnD()); 577b8021494Sopenharmony_ci __ scvtf(z0.VnD(), p3.Merging(), z0.VnD()); 578b8021494Sopenharmony_ci 579b8021494Sopenharmony_ci __ movprfx(z19, z23); 580b8021494Sopenharmony_ci __ scvtf(z19.VnS(), p7.Merging(), z19.VnD()); 581b8021494Sopenharmony_ci 582b8021494Sopenharmony_ci __ movprfx(z19, z4); 583b8021494Sopenharmony_ci __ scvtf(z19.VnH(), p4.Merging(), z19.VnD()); 584b8021494Sopenharmony_ci 585b8021494Sopenharmony_ci __ movprfx(z13.VnD(), p4.Zeroing(), z6.VnD()); 586b8021494Sopenharmony_ci __ ucvtf(z13.VnD(), p4.Merging(), z13.VnS()); 587b8021494Sopenharmony_ci 588b8021494Sopenharmony_ci __ movprfx(z6.VnH(), p0.Zeroing(), z14.VnH()); 589b8021494Sopenharmony_ci __ ucvtf(z6.VnH(), p0.Merging(), z6.VnH()); 590b8021494Sopenharmony_ci 591b8021494Sopenharmony_ci __ movprfx(z19.VnS(), p4.Merging(), z12.VnS()); 592b8021494Sopenharmony_ci __ ucvtf(z19.VnH(), p4.Merging(), z19.VnS()); 593b8021494Sopenharmony_ci 594b8021494Sopenharmony_ci __ movprfx(z0.VnD(), p5.Zeroing(), z12.VnD()); 595b8021494Sopenharmony_ci __ ucvtf(z0.VnH(), p5.Merging(), z0.VnD()); 596b8021494Sopenharmony_ci 597b8021494Sopenharmony_ci __ movprfx(z30, z5); 598b8021494Sopenharmony_ci __ fmmla(z30.VnS(), z30.VnS(), z18.VnS()); 599b8021494Sopenharmony_ci 600b8021494Sopenharmony_ci __ movprfx(z31, z5); 601b8021494Sopenharmony_ci __ fmmla(z31.VnD(), z31.VnD(), z18.VnD()); 602b8021494Sopenharmony_ci } 603b8021494Sopenharmony_ci assm.FinalizeCode(); 604b8021494Sopenharmony_ci 605b8021494Sopenharmony_ci CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 606b8021494Sopenharmony_ci} 607b8021494Sopenharmony_ci 608b8021494Sopenharmony_ciTEST(movprfx_negative_instructions) { 609b8021494Sopenharmony_ci Assembler assm; 610b8021494Sopenharmony_ci assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE); 611b8021494Sopenharmony_ci { 612b8021494Sopenharmony_ci // We have to use the Assembler directly to generate movprfx, so we need 613b8021494Sopenharmony_ci // to manually reserve space for the code we're about to emit. 614b8021494Sopenharmony_ci static const size_t kPairCount = 13; 615b8021494Sopenharmony_ci CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 616b8021494Sopenharmony_ci 617b8021494Sopenharmony_ci __ movprfx(z26, z11); 618b8021494Sopenharmony_ci __ add(z26.VnB(), z11.VnB(), z4.VnB()); 619b8021494Sopenharmony_ci 620b8021494Sopenharmony_ci // The merging form can take movprfx, but the zeroing form cannot. 621b8021494Sopenharmony_ci __ movprfx(z29.VnB(), p3.Zeroing(), z7.VnB()); 622b8021494Sopenharmony_ci __ cpy(z29.VnB(), p3.Zeroing(), -42); 623b8021494Sopenharmony_ci 624b8021494Sopenharmony_ci // Frecpx can take movprfx, but frecpe and frecps cannot. 625b8021494Sopenharmony_ci __ movprfx(z13, z15); 626b8021494Sopenharmony_ci __ frecpe(z13.VnD(), z26.VnD()); 627b8021494Sopenharmony_ci 628b8021494Sopenharmony_ci __ movprfx(z19, z1); 629b8021494Sopenharmony_ci __ frecps(z19.VnD(), z1.VnD(), z12.VnD()); 630b8021494Sopenharmony_ci 631b8021494Sopenharmony_ci __ movprfx(z6, z12); 632b8021494Sopenharmony_ci __ frsqrte(z6.VnS(), z12.VnS()); 633b8021494Sopenharmony_ci 634b8021494Sopenharmony_ci __ movprfx(z29, z5); 635b8021494Sopenharmony_ci __ frsqrts(z29.VnH(), z5.VnH(), z20.VnH()); 636b8021494Sopenharmony_ci 637b8021494Sopenharmony_ci // Ftmad can take movprfx, but ftsmul and ftssel cannot. 638b8021494Sopenharmony_ci __ movprfx(z1, z31); 639b8021494Sopenharmony_ci __ ftsmul(z1.VnD(), z31.VnD(), z16.VnD()); 640b8021494Sopenharmony_ci 641b8021494Sopenharmony_ci __ movprfx(z8, z27); 642b8021494Sopenharmony_ci __ ftssel(z8.VnH(), z27.VnH(), z1.VnH()); 643b8021494Sopenharmony_ci 644b8021494Sopenharmony_ci // This looks like a merging unary operation, but it's actually an alias of 645b8021494Sopenharmony_ci // sel, which isn't destructive. 646b8021494Sopenharmony_ci __ movprfx(z0, z18); 647b8021494Sopenharmony_ci __ mov(z0.VnS(), p6.Merging(), z18.VnS()); 648b8021494Sopenharmony_ci 649b8021494Sopenharmony_ci // The merging form can take movprfx, but the zeroing form cannot. 650b8021494Sopenharmony_ci __ movprfx(z12.VnS(), p2.Merging(), z11.VnS()); 651b8021494Sopenharmony_ci __ mov(z12.VnS(), p2.Zeroing(), -42); 652b8021494Sopenharmony_ci 653b8021494Sopenharmony_ci __ movprfx(z13, z6); 654b8021494Sopenharmony_ci __ movprfx(z13, z2); 655b8021494Sopenharmony_ci 656b8021494Sopenharmony_ci // Movprfx can never prefix itself. 657b8021494Sopenharmony_ci __ movprfx(z3.VnD(), p5.Zeroing(), z8.VnD()); 658b8021494Sopenharmony_ci __ movprfx(z3.VnD(), p5.Merging(), z8.VnD()); 659b8021494Sopenharmony_ci 660b8021494Sopenharmony_ci __ movprfx(z1.VnD(), p3.Zeroing(), z14.VnD()); 661b8021494Sopenharmony_ci __ movprfx(z1.VnD(), p3.Zeroing(), z18.VnD()); 662b8021494Sopenharmony_ci } 663b8021494Sopenharmony_ci assm.FinalizeCode(); 664b8021494Sopenharmony_ci 665b8021494Sopenharmony_ci CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 666b8021494Sopenharmony_ci} 667b8021494Sopenharmony_ci 668b8021494Sopenharmony_ciTEST(movprfx_negative_lane_size) { 669b8021494Sopenharmony_ci // Test that CanTakeSVEMovprfx() checks that the (predicated) movprfx lane 670b8021494Sopenharmony_ci // size is compatible with the prefixed instruction. 671b8021494Sopenharmony_ci Assembler assm; 672b8021494Sopenharmony_ci assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE); 673b8021494Sopenharmony_ci { 674b8021494Sopenharmony_ci // We have to use the Assembler directly to generate movprfx, so we need 675b8021494Sopenharmony_ci // to manually reserve space for the code we're about to emit. 676b8021494Sopenharmony_ci static const size_t kPairCount = 63; 677b8021494Sopenharmony_ci CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 678b8021494Sopenharmony_ci 679b8021494Sopenharmony_ci __ movprfx(z0.VnH(), p2.Zeroing(), z17.VnH()); 680b8021494Sopenharmony_ci __ abs(z0.VnS(), p2.Merging(), z17.VnS()); 681b8021494Sopenharmony_ci 682b8021494Sopenharmony_ci __ movprfx(z10.VnD(), p0.Zeroing(), z4.VnD()); 683b8021494Sopenharmony_ci __ add(z10.VnS(), p0.Merging(), z10.VnS(), z2.VnS()); 684b8021494Sopenharmony_ci 685b8021494Sopenharmony_ci __ movprfx(z25.VnS(), p4.Zeroing(), z26.VnS()); 686b8021494Sopenharmony_ci __ and_(z25.VnB(), p4.Merging(), z25.VnB(), z27.VnB()); 687b8021494Sopenharmony_ci 688b8021494Sopenharmony_ci __ movprfx(z26.VnD(), p5.Merging(), z23.VnD()); 689b8021494Sopenharmony_ci __ asr(z26.VnB(), p5.Merging(), z26.VnB(), 3); 690b8021494Sopenharmony_ci 691b8021494Sopenharmony_ci __ movprfx(z25.VnS(), p7.Zeroing(), z14.VnS()); 692b8021494Sopenharmony_ci __ asr(z25.VnH(), p7.Merging(), z25.VnH(), z14.VnH()); 693b8021494Sopenharmony_ci 694b8021494Sopenharmony_ci __ movprfx(z12.VnS(), p7.Zeroing(), z23.VnS()); 695b8021494Sopenharmony_ci __ asr(z12.VnH(), p7.Merging(), z12.VnH(), z23.VnD()); 696b8021494Sopenharmony_ci 697b8021494Sopenharmony_ci __ movprfx(z3.VnH(), p4.Zeroing(), z18.VnH()); 698b8021494Sopenharmony_ci __ asr(z3.VnD(), p4.Merging(), z3.VnD(), z15.VnD()); 699b8021494Sopenharmony_ci 700b8021494Sopenharmony_ci __ movprfx(z29.VnH(), p4.Merging(), z31.VnH()); 701b8021494Sopenharmony_ci __ asrd(z29.VnB(), p4.Merging(), z29.VnB(), 3); 702b8021494Sopenharmony_ci 703b8021494Sopenharmony_ci __ movprfx(z31.VnH(), p5.Zeroing(), z14.VnH()); 704b8021494Sopenharmony_ci __ asrr(z31.VnB(), p5.Merging(), z31.VnB(), z5.VnB()); 705b8021494Sopenharmony_ci 706b8021494Sopenharmony_ci __ movprfx(z0.VnS(), p6.Zeroing(), z18.VnS()); 707b8021494Sopenharmony_ci __ bic(z0.VnB(), p6.Merging(), z0.VnB(), z23.VnB()); 708b8021494Sopenharmony_ci 709b8021494Sopenharmony_ci __ movprfx(z19.VnH(), p2.Zeroing(), z24.VnH()); 710b8021494Sopenharmony_ci __ cls(z19.VnB(), p2.Merging(), z24.VnB()); 711b8021494Sopenharmony_ci 712b8021494Sopenharmony_ci __ movprfx(z14.VnS(), p5.Zeroing(), z4.VnS()); 713b8021494Sopenharmony_ci __ clz(z14.VnD(), p5.Merging(), z10.VnD()); 714b8021494Sopenharmony_ci 715b8021494Sopenharmony_ci __ movprfx(z0.VnD(), p5.Merging(), z2.VnD()); 716b8021494Sopenharmony_ci __ cnot(z0.VnH(), p5.Merging(), z2.VnH()); 717b8021494Sopenharmony_ci 718b8021494Sopenharmony_ci __ movprfx(z0.VnB(), p3.Zeroing(), z19.VnB()); 719b8021494Sopenharmony_ci __ cnt(z0.VnH(), p3.Merging(), z8.VnH()); 720b8021494Sopenharmony_ci 721b8021494Sopenharmony_ci __ movprfx(z29.VnS(), p0.Merging(), z7.VnS()); 722b8021494Sopenharmony_ci __ cpy(z29.VnD(), p0.Merging(), -42); 723b8021494Sopenharmony_ci 724b8021494Sopenharmony_ci __ movprfx(z13.VnB(), p2.Merging(), z31.VnB()); 725b8021494Sopenharmony_ci __ cpy(z13.VnS(), p2.Merging(), w13); 726b8021494Sopenharmony_ci 727b8021494Sopenharmony_ci __ movprfx(z0.VnS(), p3.Merging(), z15.VnS()); 728b8021494Sopenharmony_ci __ cpy(z0.VnH(), p3.Merging(), h0); 729b8021494Sopenharmony_ci 730b8021494Sopenharmony_ci __ movprfx(z2.VnD(), p6.Zeroing(), z26.VnD()); 731b8021494Sopenharmony_ci __ eor(z2.VnB(), p6.Merging(), z2.VnB(), z26.VnB()); 732b8021494Sopenharmony_ci 733b8021494Sopenharmony_ci __ movprfx(z7.VnS(), p7.Zeroing(), z30.VnS()); 734b8021494Sopenharmony_ci __ lsl(z7.VnD(), p7.Merging(), z7.VnD(), 3); 735b8021494Sopenharmony_ci 736b8021494Sopenharmony_ci __ movprfx(z11.VnH(), p3.Merging(), z23.VnH()); 737b8021494Sopenharmony_ci __ lsl(z11.VnB(), p3.Merging(), z11.VnB(), z21.VnB()); 738b8021494Sopenharmony_ci 739b8021494Sopenharmony_ci __ movprfx(z31.VnS(), p7.Zeroing(), z21.VnS()); 740b8021494Sopenharmony_ci __ lsl(z31.VnH(), p7.Merging(), z31.VnH(), z21.VnD()); 741b8021494Sopenharmony_ci 742b8021494Sopenharmony_ci __ movprfx(z26.VnH(), p0.Merging(), z0.VnH()); 743b8021494Sopenharmony_ci __ lsl(z26.VnD(), p0.Merging(), z26.VnD(), z24.VnD()); 744b8021494Sopenharmony_ci 745b8021494Sopenharmony_ci __ movprfx(z1.VnS(), p2.Zeroing(), z6.VnS()); 746b8021494Sopenharmony_ci __ lslr(z1.VnB(), p2.Merging(), z1.VnB(), z6.VnB()); 747b8021494Sopenharmony_ci 748b8021494Sopenharmony_ci __ movprfx(z4.VnD(), p4.Zeroing(), z6.VnD()); 749b8021494Sopenharmony_ci __ lsr(z4.VnH(), p4.Merging(), z4.VnH(), 3); 750b8021494Sopenharmony_ci 751b8021494Sopenharmony_ci __ movprfx(z27.VnH(), p0.Zeroing(), z29.VnH()); 752b8021494Sopenharmony_ci __ lsr(z27.VnS(), p0.Merging(), z27.VnS(), z29.VnS()); 753b8021494Sopenharmony_ci 754b8021494Sopenharmony_ci __ movprfx(z5.VnD(), p2.Zeroing(), z16.VnD()); 755b8021494Sopenharmony_ci __ lsr(z5.VnH(), p2.Merging(), z5.VnH(), z2.VnD()); 756b8021494Sopenharmony_ci 757b8021494Sopenharmony_ci __ movprfx(z27.VnB(), p4.Zeroing(), z5.VnB()); 758b8021494Sopenharmony_ci __ lsr(z27.VnD(), p4.Merging(), z27.VnD(), z5.VnD()); 759b8021494Sopenharmony_ci 760b8021494Sopenharmony_ci __ movprfx(z27.VnS(), p3.Merging(), z13.VnS()); 761b8021494Sopenharmony_ci __ lsrr(z27.VnD(), p3.Merging(), z27.VnD(), z13.VnD()); 762b8021494Sopenharmony_ci 763b8021494Sopenharmony_ci __ movprfx(z30.VnS(), p2.Zeroing(), z14.VnS()); 764b8021494Sopenharmony_ci __ mad(z30.VnB(), p2.Merging(), z20.VnB(), z14.VnB()); 765b8021494Sopenharmony_ci 766b8021494Sopenharmony_ci __ movprfx(z14.VnB(), p6.Merging(), z11.VnB()); 767b8021494Sopenharmony_ci __ mla(z14.VnD(), p6.Merging(), z28.VnD(), z11.VnD()); 768b8021494Sopenharmony_ci 769b8021494Sopenharmony_ci __ movprfx(z28.VnH(), p2.Zeroing(), z22.VnH()); 770b8021494Sopenharmony_ci __ mls(z28.VnS(), p2.Merging(), z3.VnS(), z22.VnS()); 771b8021494Sopenharmony_ci 772b8021494Sopenharmony_ci // Aliases of cpy. 773b8021494Sopenharmony_ci __ movprfx(z18.VnH(), p6.Zeroing(), z25.VnH()); 774b8021494Sopenharmony_ci __ mov(z18.VnD(), p6.Merging(), -42); 775b8021494Sopenharmony_ci 776b8021494Sopenharmony_ci __ movprfx(z22.VnD(), p2.Zeroing(), z6.VnD()); 777b8021494Sopenharmony_ci __ mov(z22.VnS(), p2.Merging(), w22); 778b8021494Sopenharmony_ci 779b8021494Sopenharmony_ci __ movprfx(z3.VnH(), p0.Zeroing(), z13.VnH()); 780b8021494Sopenharmony_ci __ mov(z3.VnB(), p0.Merging(), b0); 781b8021494Sopenharmony_ci 782b8021494Sopenharmony_ci __ movprfx(z31.VnS(), p7.Zeroing(), z12.VnS()); 783b8021494Sopenharmony_ci __ msb(z31.VnH(), p7.Merging(), z14.VnH(), z12.VnH()); 784b8021494Sopenharmony_ci 785b8021494Sopenharmony_ci __ movprfx(z16.VnS(), p7.Zeroing(), z6.VnS()); 786b8021494Sopenharmony_ci __ mul(z16.VnB(), p7.Merging(), z16.VnB(), z30.VnB()); 787b8021494Sopenharmony_ci 788b8021494Sopenharmony_ci __ movprfx(z17.VnD(), p7.Merging(), z1.VnD()); 789b8021494Sopenharmony_ci __ neg(z17.VnB(), p7.Merging(), z1.VnB()); 790b8021494Sopenharmony_ci 791b8021494Sopenharmony_ci __ movprfx(z31.VnH(), p4.Zeroing(), z12.VnH()); 792b8021494Sopenharmony_ci __ not_(z31.VnB(), p4.Merging(), z12.VnB()); 793b8021494Sopenharmony_ci 794b8021494Sopenharmony_ci __ movprfx(z9.VnH(), p3.Zeroing(), z23.VnH()); 795b8021494Sopenharmony_ci __ orr(z9.VnS(), p3.Merging(), z9.VnS(), z13.VnS()); 796b8021494Sopenharmony_ci 797b8021494Sopenharmony_ci __ movprfx(z25.VnD(), p2.Zeroing(), z21.VnD()); 798b8021494Sopenharmony_ci __ rbit(z25.VnS(), p2.Merging(), z21.VnS()); 799b8021494Sopenharmony_ci 800b8021494Sopenharmony_ci __ movprfx(z26.VnH(), p3.Merging(), z13.VnH()); 801b8021494Sopenharmony_ci __ revb(z26.VnD(), p3.Merging(), z13.VnD()); 802b8021494Sopenharmony_ci 803b8021494Sopenharmony_ci __ movprfx(z8.VnH(), p5.Merging(), z20.VnH()); 804b8021494Sopenharmony_ci __ revh(z8.VnS(), p5.Merging(), z0.VnS()); 805b8021494Sopenharmony_ci 806b8021494Sopenharmony_ci __ movprfx(z22.VnH(), p6.Merging(), z15.VnH()); 807b8021494Sopenharmony_ci __ revw(z22.VnD(), p6.Merging(), z10.VnD()); 808b8021494Sopenharmony_ci 809b8021494Sopenharmony_ci __ movprfx(z1.VnD(), p3.Merging(), z15.VnD()); 810b8021494Sopenharmony_ci __ sabd(z1.VnB(), p3.Merging(), z1.VnB(), z15.VnB()); 811b8021494Sopenharmony_ci 812b8021494Sopenharmony_ci __ movprfx(z25.VnD(), p1.Zeroing(), z30.VnD()); 813b8021494Sopenharmony_ci __ sdiv(z25.VnS(), p1.Merging(), z25.VnS(), z30.VnS()); 814b8021494Sopenharmony_ci 815b8021494Sopenharmony_ci __ movprfx(z19.VnS(), p3.Zeroing(), z11.VnS()); 816b8021494Sopenharmony_ci __ sdivr(z19.VnD(), p3.Merging(), z19.VnD(), z24.VnD()); 817b8021494Sopenharmony_ci 818b8021494Sopenharmony_ci __ movprfx(z12.VnH(), p2.Merging(), z2.VnH()); 819b8021494Sopenharmony_ci __ smax(z12.VnS(), p2.Merging(), z12.VnS(), z24.VnS()); 820b8021494Sopenharmony_ci 821b8021494Sopenharmony_ci __ movprfx(z3.VnD(), p1.Merging(), z15.VnD()); 822b8021494Sopenharmony_ci __ smin(z3.VnS(), p1.Merging(), z3.VnS(), z20.VnS()); 823b8021494Sopenharmony_ci 824b8021494Sopenharmony_ci __ movprfx(z13.VnS(), p5.Merging(), z22.VnS()); 825b8021494Sopenharmony_ci __ smulh(z13.VnB(), p5.Merging(), z13.VnB(), z27.VnB()); 826b8021494Sopenharmony_ci 827b8021494Sopenharmony_ci __ movprfx(z11.VnH(), p5.Zeroing(), z25.VnH()); 828b8021494Sopenharmony_ci __ sub(z11.VnB(), p5.Merging(), z11.VnB(), z7.VnB()); 829b8021494Sopenharmony_ci 830b8021494Sopenharmony_ci __ movprfx(z3.VnB(), p6.Merging(), z13.VnB()); 831b8021494Sopenharmony_ci __ subr(z3.VnS(), p6.Merging(), z3.VnS(), z13.VnS()); 832b8021494Sopenharmony_ci 833b8021494Sopenharmony_ci __ movprfx(z26.VnH(), p5.Merging(), z1.VnH()); 834b8021494Sopenharmony_ci __ sxtb(z26.VnS(), p5.Merging(), z17.VnS()); 835b8021494Sopenharmony_ci 836b8021494Sopenharmony_ci __ movprfx(z11.VnB(), p7.Zeroing(), z26.VnB()); 837b8021494Sopenharmony_ci __ sxth(z11.VnS(), p7.Merging(), z26.VnS()); 838b8021494Sopenharmony_ci 839b8021494Sopenharmony_ci __ movprfx(z1.VnS(), p2.Merging(), z21.VnS()); 840b8021494Sopenharmony_ci __ sxtw(z1.VnD(), p2.Merging(), z21.VnD()); 841b8021494Sopenharmony_ci 842b8021494Sopenharmony_ci __ movprfx(z4.VnS(), p6.Zeroing(), z6.VnS()); 843b8021494Sopenharmony_ci __ uabd(z4.VnH(), p6.Merging(), z4.VnH(), z6.VnH()); 844b8021494Sopenharmony_ci 845b8021494Sopenharmony_ci __ movprfx(z26.VnB(), p2.Zeroing(), z11.VnB()); 846b8021494Sopenharmony_ci __ udiv(z26.VnD(), p2.Merging(), z26.VnD(), z11.VnD()); 847b8021494Sopenharmony_ci 848b8021494Sopenharmony_ci __ movprfx(z19.VnB(), p5.Merging(), z6.VnB()); 849b8021494Sopenharmony_ci __ udivr(z19.VnS(), p5.Merging(), z19.VnS(), z9.VnS()); 850b8021494Sopenharmony_ci 851b8021494Sopenharmony_ci __ movprfx(z16.VnB(), p4.Merging(), z6.VnB()); 852b8021494Sopenharmony_ci __ umax(z16.VnH(), p4.Merging(), z16.VnH(), z6.VnH()); 853b8021494Sopenharmony_ci 854b8021494Sopenharmony_ci __ movprfx(z1.VnD(), p0.Zeroing(), z4.VnD()); 855b8021494Sopenharmony_ci __ umin(z1.VnS(), p0.Merging(), z1.VnS(), z28.VnS()); 856b8021494Sopenharmony_ci 857b8021494Sopenharmony_ci __ movprfx(z25.VnD(), p7.Merging(), z4.VnD()); 858b8021494Sopenharmony_ci __ umulh(z25.VnB(), p7.Merging(), z25.VnB(), z16.VnB()); 859b8021494Sopenharmony_ci 860b8021494Sopenharmony_ci __ movprfx(z29.VnB(), p4.Merging(), z2.VnB()); 861b8021494Sopenharmony_ci __ uxtb(z29.VnS(), p4.Merging(), z31.VnS()); 862b8021494Sopenharmony_ci 863b8021494Sopenharmony_ci __ movprfx(z27.VnH(), p5.Merging(), z21.VnH()); 864b8021494Sopenharmony_ci __ uxth(z27.VnD(), p5.Merging(), z1.VnD()); 865b8021494Sopenharmony_ci 866b8021494Sopenharmony_ci __ movprfx(z29.VnB(), p2.Merging(), z7.VnB()); 867b8021494Sopenharmony_ci __ uxtw(z29.VnD(), p2.Merging(), z7.VnD()); 868b8021494Sopenharmony_ci } 869b8021494Sopenharmony_ci assm.FinalizeCode(); 870b8021494Sopenharmony_ci 871b8021494Sopenharmony_ci CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 872b8021494Sopenharmony_ci} 873b8021494Sopenharmony_ci 874b8021494Sopenharmony_ciTEST(movprfx_negative_lane_size_fp) { 875b8021494Sopenharmony_ci // Test that CanTakeSVEMovprfx() checks that the (predicated) movprfx lane 876b8021494Sopenharmony_ci // size is compatible with the prefixed instruction. 877b8021494Sopenharmony_ci Assembler assm; 878b8021494Sopenharmony_ci assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE); 879b8021494Sopenharmony_ci { 880b8021494Sopenharmony_ci // We have to use the Assembler directly to generate movprfx, so we need 881b8021494Sopenharmony_ci // to manually reserve space for the code we're about to emit. 882b8021494Sopenharmony_ci static const size_t kPairCount = 64; 883b8021494Sopenharmony_ci CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 884b8021494Sopenharmony_ci 885b8021494Sopenharmony_ci __ movprfx(z29.VnD(), p5.Zeroing(), z8.VnD()); 886b8021494Sopenharmony_ci __ fabd(z29.VnS(), p5.Merging(), z29.VnS(), z26.VnS()); 887b8021494Sopenharmony_ci 888b8021494Sopenharmony_ci __ movprfx(z9.VnB(), p0.Zeroing(), z1.VnB()); 889b8021494Sopenharmony_ci __ fabs(z9.VnS(), p0.Merging(), z15.VnS()); 890b8021494Sopenharmony_ci 891b8021494Sopenharmony_ci __ movprfx(z24.VnD(), p0.Zeroing(), z8.VnD()); 892b8021494Sopenharmony_ci __ fadd(z24.VnH(), p0.Merging(), z24.VnH(), 0.5); 893b8021494Sopenharmony_ci 894b8021494Sopenharmony_ci __ movprfx(z24.VnB(), p1.Zeroing(), z27.VnB()); 895b8021494Sopenharmony_ci __ fadd(z24.VnH(), p1.Merging(), z24.VnH(), z27.VnH()); 896b8021494Sopenharmony_ci 897b8021494Sopenharmony_ci __ movprfx(z14.VnH(), p7.Merging(), z12.VnH()); 898b8021494Sopenharmony_ci __ fcadd(z14.VnD(), p7.Merging(), z14.VnD(), z12.VnD(), 90); 899b8021494Sopenharmony_ci 900b8021494Sopenharmony_ci __ movprfx(z10.VnB(), p6.Merging(), z11.VnB()); 901b8021494Sopenharmony_ci __ fcpy(z10.VnH(), p6.Merging(), 1.25); 902b8021494Sopenharmony_ci 903b8021494Sopenharmony_ci __ movprfx(z12.VnB(), p6.Merging(), z18.VnB()); 904b8021494Sopenharmony_ci __ fcvt(z12.VnD(), p6.Merging(), z18.VnH()); 905b8021494Sopenharmony_ci 906b8021494Sopenharmony_ci __ movprfx(z18.VnH(), p7.Zeroing(), z2.VnH()); 907b8021494Sopenharmony_ci __ fcvt(z18.VnD(), p7.Merging(), z0.VnS()); 908b8021494Sopenharmony_ci 909b8021494Sopenharmony_ci __ movprfx(z3.VnH(), p5.Merging(), z14.VnH()); 910b8021494Sopenharmony_ci __ fcvt(z3.VnS(), p5.Merging(), z21.VnD()); 911b8021494Sopenharmony_ci 912b8021494Sopenharmony_ci __ movprfx(z15.VnH(), p1.Zeroing(), z12.VnH()); 913b8021494Sopenharmony_ci __ fcvt(z15.VnH(), p1.Merging(), z12.VnD()); 914b8021494Sopenharmony_ci 915b8021494Sopenharmony_ci __ movprfx(z3.VnH(), p2.Merging(), z22.VnH()); 916b8021494Sopenharmony_ci __ fcvtzs(z3.VnD(), p2.Merging(), z7.VnH()); 917b8021494Sopenharmony_ci 918b8021494Sopenharmony_ci __ movprfx(z17.VnS(), p3.Merging(), z14.VnS()); 919b8021494Sopenharmony_ci __ fcvtzs(z17.VnD(), p3.Merging(), z14.VnD()); 920b8021494Sopenharmony_ci 921b8021494Sopenharmony_ci __ movprfx(z2.VnH(), p1.Zeroing(), z16.VnH()); 922b8021494Sopenharmony_ci __ fcvtzs(z2.VnS(), p1.Merging(), z31.VnH()); 923b8021494Sopenharmony_ci 924b8021494Sopenharmony_ci __ movprfx(z13.VnB(), p2.Merging(), z9.VnB()); 925b8021494Sopenharmony_ci __ fcvtzs(z13.VnS(), p2.Merging(), z23.VnD()); 926b8021494Sopenharmony_ci 927b8021494Sopenharmony_ci __ movprfx(z19.VnB(), p1.Merging(), z4.VnB()); 928b8021494Sopenharmony_ci __ fcvtzu(z19.VnD(), p1.Merging(), z14.VnH()); 929b8021494Sopenharmony_ci 930b8021494Sopenharmony_ci __ movprfx(z29.VnS(), p2.Merging(), z19.VnS()); 931b8021494Sopenharmony_ci __ fcvtzu(z29.VnD(), p2.Merging(), z19.VnD()); 932b8021494Sopenharmony_ci 933b8021494Sopenharmony_ci __ movprfx(z21.VnS(), p4.Zeroing(), z17.VnS()); 934b8021494Sopenharmony_ci __ fcvtzu(z21.VnD(), p4.Merging(), z17.VnS()); 935b8021494Sopenharmony_ci 936b8021494Sopenharmony_ci __ movprfx(z19.VnH(), p4.Zeroing(), z30.VnH()); 937b8021494Sopenharmony_ci __ fcvtzu(z19.VnS(), p4.Merging(), z16.VnD()); 938b8021494Sopenharmony_ci 939b8021494Sopenharmony_ci __ movprfx(z10.VnS(), p7.Zeroing(), z27.VnS()); 940b8021494Sopenharmony_ci __ fdiv(z10.VnH(), p7.Merging(), z10.VnH(), z27.VnH()); 941b8021494Sopenharmony_ci 942b8021494Sopenharmony_ci __ movprfx(z7.VnD(), p7.Zeroing(), z17.VnD()); 943b8021494Sopenharmony_ci __ fdivr(z7.VnH(), p7.Merging(), z7.VnH(), z28.VnH()); 944b8021494Sopenharmony_ci 945b8021494Sopenharmony_ci __ movprfx(z22.VnB(), p0.Merging(), z27.VnB()); 946b8021494Sopenharmony_ci __ fmad(z22.VnH(), p0.Merging(), z27.VnH(), z15.VnH()); 947b8021494Sopenharmony_ci 948b8021494Sopenharmony_ci __ movprfx(z14.VnD(), p1.Zeroing(), z11.VnD()); 949b8021494Sopenharmony_ci __ fmax(z14.VnS(), p1.Merging(), z14.VnS(), 0.0); 950b8021494Sopenharmony_ci 951b8021494Sopenharmony_ci __ movprfx(z27.VnB(), p5.Merging(), z14.VnB()); 952b8021494Sopenharmony_ci __ fmax(z27.VnD(), p5.Merging(), z27.VnD(), z14.VnD()); 953b8021494Sopenharmony_ci 954b8021494Sopenharmony_ci __ movprfx(z31.VnH(), p7.Merging(), z24.VnH()); 955b8021494Sopenharmony_ci __ fmaxnm(z31.VnD(), p7.Merging(), z31.VnD(), 0.0); 956b8021494Sopenharmony_ci 957b8021494Sopenharmony_ci __ movprfx(z11.VnD(), p7.Zeroing(), z25.VnD()); 958b8021494Sopenharmony_ci __ fmaxnm(z11.VnS(), p7.Merging(), z11.VnS(), z28.VnS()); 959b8021494Sopenharmony_ci 960b8021494Sopenharmony_ci __ movprfx(z31.VnD(), p6.Merging(), z19.VnD()); 961b8021494Sopenharmony_ci __ fmin(z31.VnH(), p6.Merging(), z31.VnH(), 0.0); 962b8021494Sopenharmony_ci 963b8021494Sopenharmony_ci __ movprfx(z20.VnS(), p3.Zeroing(), z15.VnS()); 964b8021494Sopenharmony_ci __ fmin(z20.VnH(), p3.Merging(), z20.VnH(), z8.VnH()); 965b8021494Sopenharmony_ci 966b8021494Sopenharmony_ci __ movprfx(z6.VnS(), p0.Merging(), z30.VnS()); 967b8021494Sopenharmony_ci __ fminnm(z6.VnH(), p0.Merging(), z6.VnH(), 0.0); 968b8021494Sopenharmony_ci 969b8021494Sopenharmony_ci __ movprfx(z1.VnH(), p1.Zeroing(), z14.VnH()); 970b8021494Sopenharmony_ci __ fminnm(z1.VnS(), p1.Merging(), z1.VnS(), z14.VnS()); 971b8021494Sopenharmony_ci 972b8021494Sopenharmony_ci __ movprfx(z13.VnB(), p3.Zeroing(), z21.VnB()); 973b8021494Sopenharmony_ci __ fmla(z13.VnD(), p3.Merging(), z12.VnD(), z21.VnD()); 974b8021494Sopenharmony_ci 975b8021494Sopenharmony_ci __ movprfx(z15.VnS(), p1.Zeroing(), z20.VnS()); 976b8021494Sopenharmony_ci __ fmls(z15.VnH(), p1.Merging(), z28.VnH(), z20.VnH()); 977b8021494Sopenharmony_ci 978b8021494Sopenharmony_ci __ movprfx(z19.VnD(), p3.Zeroing(), z31.VnD()); 979b8021494Sopenharmony_ci __ fmov(z19.VnH(), p3.Merging(), 0.0); 980b8021494Sopenharmony_ci 981b8021494Sopenharmony_ci __ movprfx(z16.VnS(), p7.Merging(), z30.VnS()); 982b8021494Sopenharmony_ci __ fmov(z16.VnH(), p7.Merging(), 2.5); 983b8021494Sopenharmony_ci 984b8021494Sopenharmony_ci __ movprfx(z21.VnB(), p1.Merging(), z28.VnB()); 985b8021494Sopenharmony_ci __ fmsb(z21.VnH(), p1.Merging(), z30.VnH(), z28.VnH()); 986b8021494Sopenharmony_ci 987b8021494Sopenharmony_ci __ movprfx(z21.VnS(), p1.Zeroing(), z19.VnS()); 988b8021494Sopenharmony_ci __ fmul(z21.VnH(), p1.Merging(), z21.VnH(), 2.0); 989b8021494Sopenharmony_ci 990b8021494Sopenharmony_ci __ movprfx(z28.VnB(), p7.Zeroing(), z8.VnB()); 991b8021494Sopenharmony_ci __ fmul(z28.VnS(), p7.Merging(), z28.VnS(), z26.VnS()); 992b8021494Sopenharmony_ci 993b8021494Sopenharmony_ci __ movprfx(z2.VnB(), p4.Merging(), z31.VnB()); 994b8021494Sopenharmony_ci __ fmulx(z2.VnH(), p4.Merging(), z2.VnH(), z31.VnH()); 995b8021494Sopenharmony_ci 996b8021494Sopenharmony_ci __ movprfx(z6.VnB(), p2.Zeroing(), z0.VnB()); 997b8021494Sopenharmony_ci __ fneg(z6.VnS(), p2.Merging(), z28.VnS()); 998b8021494Sopenharmony_ci 999b8021494Sopenharmony_ci __ movprfx(z26.VnB(), p0.Zeroing(), z21.VnB()); 1000b8021494Sopenharmony_ci __ fnmad(z26.VnH(), p0.Merging(), z21.VnH(), z18.VnH()); 1001b8021494Sopenharmony_ci 1002b8021494Sopenharmony_ci __ movprfx(z15.VnB(), p1.Zeroing(), z26.VnB()); 1003b8021494Sopenharmony_ci __ fnmla(z15.VnH(), p1.Merging(), z26.VnH(), z18.VnH()); 1004b8021494Sopenharmony_ci 1005b8021494Sopenharmony_ci __ movprfx(z16.VnS(), p0.Merging(), z1.VnS()); 1006b8021494Sopenharmony_ci __ fnmls(z16.VnD(), p0.Merging(), z1.VnD(), z13.VnD()); 1007b8021494Sopenharmony_ci 1008b8021494Sopenharmony_ci __ movprfx(z4.VnH(), p0.Zeroing(), z16.VnH()); 1009b8021494Sopenharmony_ci __ fnmsb(z4.VnS(), p0.Merging(), z30.VnS(), z3.VnS()); 1010b8021494Sopenharmony_ci 1011b8021494Sopenharmony_ci // Note that frecpe and frecps _cannot_ take movprfx. 1012b8021494Sopenharmony_ci __ movprfx(z9.VnH(), p0.Zeroing(), z21.VnH()); 1013b8021494Sopenharmony_ci __ frecpx(z9.VnS(), p0.Merging(), z14.VnS()); 1014b8021494Sopenharmony_ci 1015b8021494Sopenharmony_ci __ movprfx(z6.VnH(), p2.Zeroing(), z28.VnH()); 1016b8021494Sopenharmony_ci __ frinta(z6.VnD(), p2.Merging(), z28.VnD()); 1017b8021494Sopenharmony_ci 1018b8021494Sopenharmony_ci __ movprfx(z12.VnS(), p4.Zeroing(), z7.VnS()); 1019b8021494Sopenharmony_ci __ frinti(z12.VnH(), p4.Merging(), z7.VnH()); 1020b8021494Sopenharmony_ci 1021b8021494Sopenharmony_ci __ movprfx(z6.VnB(), p5.Merging(), z20.VnB()); 1022b8021494Sopenharmony_ci __ frintm(z6.VnD(), p5.Merging(), z20.VnD()); 1023b8021494Sopenharmony_ci 1024b8021494Sopenharmony_ci __ movprfx(z7.VnB(), p6.Merging(), z19.VnB()); 1025b8021494Sopenharmony_ci __ frintn(z7.VnH(), p6.Merging(), z11.VnH()); 1026b8021494Sopenharmony_ci 1027b8021494Sopenharmony_ci __ movprfx(z12.VnD(), p2.Merging(), z31.VnD()); 1028b8021494Sopenharmony_ci __ frintp(z12.VnS(), p2.Merging(), z31.VnS()); 1029b8021494Sopenharmony_ci 1030b8021494Sopenharmony_ci __ movprfx(z1.VnS(), p5.Merging(), z10.VnS()); 1031b8021494Sopenharmony_ci __ frintx(z1.VnD(), p5.Merging(), z0.VnD()); 1032b8021494Sopenharmony_ci 1033b8021494Sopenharmony_ci __ movprfx(z6.VnH(), p0.Merging(), z12.VnH()); 1034b8021494Sopenharmony_ci __ frintz(z6.VnS(), p0.Merging(), z7.VnS()); 1035b8021494Sopenharmony_ci 1036b8021494Sopenharmony_ci __ movprfx(z8.VnH(), p2.Merging(), z6.VnH()); 1037b8021494Sopenharmony_ci __ fscale(z8.VnD(), p2.Merging(), z8.VnD(), z6.VnD()); 1038b8021494Sopenharmony_ci 1039b8021494Sopenharmony_ci __ movprfx(z20.VnH(), p2.Zeroing(), z2.VnH()); 1040b8021494Sopenharmony_ci __ fsqrt(z20.VnD(), p2.Merging(), z15.VnD()); 1041b8021494Sopenharmony_ci 1042b8021494Sopenharmony_ci __ movprfx(z28.VnS(), p6.Zeroing(), z19.VnS()); 1043b8021494Sopenharmony_ci __ fsub(z28.VnD(), p6.Merging(), z28.VnD(), 1.0); 1044b8021494Sopenharmony_ci 1045b8021494Sopenharmony_ci __ movprfx(z6.VnB(), p0.Zeroing(), z12.VnB()); 1046b8021494Sopenharmony_ci __ fsub(z6.VnD(), p0.Merging(), z6.VnD(), z20.VnD()); 1047b8021494Sopenharmony_ci 1048b8021494Sopenharmony_ci __ movprfx(z6.VnS(), p7.Zeroing(), z11.VnS()); 1049b8021494Sopenharmony_ci __ fsubr(z6.VnH(), p7.Merging(), z6.VnH(), 1.0); 1050b8021494Sopenharmony_ci 1051b8021494Sopenharmony_ci __ movprfx(z28.VnB(), p3.Merging(), z10.VnB()); 1052b8021494Sopenharmony_ci __ fsubr(z28.VnS(), p3.Merging(), z28.VnS(), z9.VnS()); 1053b8021494Sopenharmony_ci 1054b8021494Sopenharmony_ci __ movprfx(z22.VnB(), p3.Zeroing(), z14.VnB()); 1055b8021494Sopenharmony_ci __ scvtf(z22.VnD(), p3.Merging(), z24.VnS()); 1056b8021494Sopenharmony_ci 1057b8021494Sopenharmony_ci __ movprfx(z20.VnS(), p2.Merging(), z9.VnS()); 1058b8021494Sopenharmony_ci __ scvtf(z20.VnH(), p2.Merging(), z9.VnH()); 1059b8021494Sopenharmony_ci 1060b8021494Sopenharmony_ci __ movprfx(z19.VnH(), p1.Merging(), z21.VnH()); 1061b8021494Sopenharmony_ci __ scvtf(z19.VnS(), p1.Merging(), z6.VnD()); 1062b8021494Sopenharmony_ci 1063b8021494Sopenharmony_ci __ movprfx(z31.VnS(), p3.Merging(), z22.VnS()); 1064b8021494Sopenharmony_ci __ scvtf(z31.VnH(), p3.Merging(), z22.VnD()); 1065b8021494Sopenharmony_ci 1066b8021494Sopenharmony_ci __ movprfx(z8.VnS(), p3.Merging(), z3.VnS()); 1067b8021494Sopenharmony_ci __ ucvtf(z8.VnD(), p3.Merging(), z1.VnS()); 1068b8021494Sopenharmony_ci 1069b8021494Sopenharmony_ci __ movprfx(z0.VnB(), p0.Merging(), z23.VnB()); 1070b8021494Sopenharmony_ci __ ucvtf(z0.VnH(), p0.Merging(), z12.VnH()); 1071b8021494Sopenharmony_ci 1072b8021494Sopenharmony_ci __ movprfx(z8.VnH(), p3.Zeroing(), z4.VnH()); 1073b8021494Sopenharmony_ci __ ucvtf(z8.VnH(), p3.Merging(), z4.VnS()); 1074b8021494Sopenharmony_ci 1075b8021494Sopenharmony_ci __ movprfx(z20.VnH(), p2.Zeroing(), z10.VnH()); 1076b8021494Sopenharmony_ci __ ucvtf(z20.VnH(), p2.Merging(), z11.VnD()); 1077b8021494Sopenharmony_ci } 1078b8021494Sopenharmony_ci assm.FinalizeCode(); 1079b8021494Sopenharmony_ci 1080b8021494Sopenharmony_ci CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 1081b8021494Sopenharmony_ci} 1082b8021494Sopenharmony_ci 1083b8021494Sopenharmony_ciTEST(movprfx_negative_predication) { 1084b8021494Sopenharmony_ci // Test that CanTakeSVEMovprfx() is false when a predicated movprfx appears 1085b8021494Sopenharmony_ci // before an unpredicated instruction. 1086b8021494Sopenharmony_ci Assembler assm; 1087b8021494Sopenharmony_ci assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM); 1088b8021494Sopenharmony_ci { 1089b8021494Sopenharmony_ci // We have to use the Assembler directly to generate movprfx, so we need 1090b8021494Sopenharmony_ci // to manually reserve space for the code we're about to emit. 1091b8021494Sopenharmony_ci static const size_t kPairCount = 60; 1092b8021494Sopenharmony_ci CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 1093b8021494Sopenharmony_ci 1094b8021494Sopenharmony_ci __ movprfx(z27.VnS(), p1.Zeroing(), z12.VnS()); 1095b8021494Sopenharmony_ci __ add(z27.VnS(), z27.VnS(), 42); 1096b8021494Sopenharmony_ci 1097b8021494Sopenharmony_ci __ movprfx(z31.VnS(), p6.Zeroing(), z1.VnS()); 1098b8021494Sopenharmony_ci __ and_(z31.VnS(), z31.VnS(), 4); 1099b8021494Sopenharmony_ci 1100b8021494Sopenharmony_ci __ movprfx(z27.VnS(), p5.Merging(), z24.VnS()); 1101b8021494Sopenharmony_ci __ bic(z27.VnS(), z27.VnS(), 4); 1102b8021494Sopenharmony_ci 1103b8021494Sopenharmony_ci __ movprfx(z6.VnH(), p7.Merging(), z30.VnH()); 1104b8021494Sopenharmony_ci __ clasta(z6.VnH(), p7, z6.VnH(), z14.VnH()); 1105b8021494Sopenharmony_ci 1106b8021494Sopenharmony_ci __ movprfx(z11.VnB(), p6.Merging(), z5.VnB()); 1107b8021494Sopenharmony_ci __ clastb(z11.VnB(), p6, z11.VnB(), z29.VnB()); 1108b8021494Sopenharmony_ci 1109b8021494Sopenharmony_ci __ movprfx(z5.VnD(), p0.Merging(), z1.VnD()); 1110b8021494Sopenharmony_ci __ decd(z5.VnD(), SVE_MUL3); 1111b8021494Sopenharmony_ci 1112b8021494Sopenharmony_ci __ movprfx(z11.VnH(), p7.Zeroing(), z28.VnH()); 1113b8021494Sopenharmony_ci __ dech(z11.VnH(), SVE_VL2); 1114b8021494Sopenharmony_ci 1115b8021494Sopenharmony_ci __ movprfx(z14.VnS(), p5.Zeroing(), z6.VnS()); 1116b8021494Sopenharmony_ci __ decp(z14.VnS(), p5); 1117b8021494Sopenharmony_ci 1118b8021494Sopenharmony_ci __ movprfx(z6.VnS(), p5.Merging(), z10.VnS()); 1119b8021494Sopenharmony_ci __ decw(z6.VnS(), SVE_ALL); 1120b8021494Sopenharmony_ci 1121b8021494Sopenharmony_ci __ movprfx(z27.VnH(), p7.Zeroing(), z9.VnH()); 1122b8021494Sopenharmony_ci __ eon(z27.VnH(), z27.VnH(), 4); 1123b8021494Sopenharmony_ci 1124b8021494Sopenharmony_ci __ movprfx(z3.VnS(), p3.Zeroing(), z2.VnS()); 1125b8021494Sopenharmony_ci __ eor(z3.VnS(), z3.VnS(), 4); 1126b8021494Sopenharmony_ci 1127b8021494Sopenharmony_ci __ movprfx(z30.VnB(), p2.Zeroing(), z25.VnB()); 1128b8021494Sopenharmony_ci __ ext(z30.VnB(), z30.VnB(), z25.VnB(), 42); 1129b8021494Sopenharmony_ci 1130b8021494Sopenharmony_ci __ movprfx(z22.VnD(), p0.Merging(), z0.VnD()); 1131b8021494Sopenharmony_ci __ incd(z22.VnD(), SVE_MUL3); 1132b8021494Sopenharmony_ci 1133b8021494Sopenharmony_ci __ movprfx(z7.VnH(), p3.Merging(), z3.VnH()); 1134b8021494Sopenharmony_ci __ inch(z7.VnH(), SVE_VL2); 1135b8021494Sopenharmony_ci 1136b8021494Sopenharmony_ci __ movprfx(z9.VnD(), p1.Zeroing(), z28.VnD()); 1137b8021494Sopenharmony_ci __ incp(z9.VnD(), p1); 1138b8021494Sopenharmony_ci 1139b8021494Sopenharmony_ci __ movprfx(z30.VnS(), p3.Merging(), z4.VnS()); 1140b8021494Sopenharmony_ci __ incw(z30.VnS(), SVE_ALL); 1141b8021494Sopenharmony_ci 1142b8021494Sopenharmony_ci __ movprfx(z30.VnB(), p7.Zeroing(), z21.VnB()); 1143b8021494Sopenharmony_ci __ insr(z30.VnB(), w30); 1144b8021494Sopenharmony_ci 1145b8021494Sopenharmony_ci __ movprfx(z2.VnB(), p4.Zeroing(), z26.VnB()); 1146b8021494Sopenharmony_ci __ insr(z2.VnB(), b0); 1147b8021494Sopenharmony_ci 1148b8021494Sopenharmony_ci __ movprfx(z27.VnS(), p5.Zeroing(), z5.VnS()); 1149b8021494Sopenharmony_ci __ mul(z27.VnS(), z27.VnS(), 42); 1150b8021494Sopenharmony_ci 1151b8021494Sopenharmony_ci __ movprfx(z5.VnS(), p0.Merging(), z26.VnS()); 1152b8021494Sopenharmony_ci __ orn(z5.VnS(), z5.VnS(), 4); 1153b8021494Sopenharmony_ci 1154b8021494Sopenharmony_ci __ movprfx(z5.VnS(), p0.Merging(), z26.VnS()); 1155b8021494Sopenharmony_ci __ orn(z5.VnS(), z5.VnS(), 4); 1156b8021494Sopenharmony_ci 1157b8021494Sopenharmony_ci __ movprfx(z16.VnD(), p1.Merging(), z13.VnD()); 1158b8021494Sopenharmony_ci __ sdot(z16.VnD(), z11.VnH(), z7.VnH(), 1); 1159b8021494Sopenharmony_ci 1160b8021494Sopenharmony_ci __ movprfx(z27.VnD(), p5.Merging(), z18.VnD()); 1161b8021494Sopenharmony_ci __ sdot(z27.VnD(), z18.VnH(), z0.VnH()); 1162b8021494Sopenharmony_ci 1163b8021494Sopenharmony_ci __ movprfx(z20.VnS(), p6.Merging(), z1.VnS()); 1164b8021494Sopenharmony_ci __ sdot(z20.VnS(), z10.VnB(), z1.VnB(), 1); 1165b8021494Sopenharmony_ci 1166b8021494Sopenharmony_ci __ movprfx(z19.VnD(), p0.Zeroing(), z7.VnD()); 1167b8021494Sopenharmony_ci __ smax(z19.VnD(), z19.VnD(), 42); 1168b8021494Sopenharmony_ci 1169b8021494Sopenharmony_ci __ movprfx(z15.VnD(), p1.Zeroing(), z7.VnD()); 1170b8021494Sopenharmony_ci __ smin(z15.VnD(), z15.VnD(), 42); 1171b8021494Sopenharmony_ci 1172b8021494Sopenharmony_ci __ movprfx(z15.VnB(), p5.Merging(), z3.VnB()); 1173b8021494Sopenharmony_ci __ splice(z15.VnB(), p5, z15.VnB(), z3.VnB()); 1174b8021494Sopenharmony_ci 1175b8021494Sopenharmony_ci __ movprfx(z5.VnB(), p6.Zeroing(), z4.VnB()); 1176b8021494Sopenharmony_ci __ sqadd(z5.VnB(), z5.VnB(), 42); 1177b8021494Sopenharmony_ci 1178b8021494Sopenharmony_ci __ movprfx(z16.VnD(), p0.Zeroing(), z18.VnD()); 1179b8021494Sopenharmony_ci __ sqdecd(z16.VnD(), SVE_MUL3); 1180b8021494Sopenharmony_ci 1181b8021494Sopenharmony_ci __ movprfx(z7.VnH(), p3.Merging(), z28.VnH()); 1182b8021494Sopenharmony_ci __ sqdech(z7.VnH(), SVE_VL2); 1183b8021494Sopenharmony_ci 1184b8021494Sopenharmony_ci __ movprfx(z7.VnS(), p2.Merging(), z13.VnS()); 1185b8021494Sopenharmony_ci __ sqdecp(z7.VnS(), p2); 1186b8021494Sopenharmony_ci 1187b8021494Sopenharmony_ci __ movprfx(z22.VnS(), p7.Zeroing(), z20.VnS()); 1188b8021494Sopenharmony_ci __ sqdecw(z22.VnS(), SVE_ALL); 1189b8021494Sopenharmony_ci 1190b8021494Sopenharmony_ci __ movprfx(z26.VnD(), p1.Zeroing(), z0.VnD()); 1191b8021494Sopenharmony_ci __ sqincd(z26.VnD(), SVE_MUL3); 1192b8021494Sopenharmony_ci 1193b8021494Sopenharmony_ci __ movprfx(z15.VnH(), p7.Zeroing(), z27.VnH()); 1194b8021494Sopenharmony_ci __ sqinch(z15.VnH(), SVE_VL2); 1195b8021494Sopenharmony_ci 1196b8021494Sopenharmony_ci __ movprfx(z4.VnD(), p7.Merging(), z13.VnD()); 1197b8021494Sopenharmony_ci __ sqincp(z4.VnD(), p7); 1198b8021494Sopenharmony_ci 1199b8021494Sopenharmony_ci __ movprfx(z29.VnS(), p6.Merging(), z14.VnS()); 1200b8021494Sopenharmony_ci __ sqincw(z29.VnS(), SVE_ALL); 1201b8021494Sopenharmony_ci 1202b8021494Sopenharmony_ci __ movprfx(z17.VnB(), p1.Merging(), z24.VnB()); 1203b8021494Sopenharmony_ci __ sqsub(z17.VnB(), z17.VnB(), 42); 1204b8021494Sopenharmony_ci 1205b8021494Sopenharmony_ci __ movprfx(z26.VnS(), p5.Zeroing(), z19.VnS()); 1206b8021494Sopenharmony_ci __ sub(z26.VnS(), z26.VnS(), 42); 1207b8021494Sopenharmony_ci 1208b8021494Sopenharmony_ci __ movprfx(z15.VnD(), p1.Merging(), z3.VnD()); 1209b8021494Sopenharmony_ci __ subr(z15.VnD(), z15.VnD(), 42); 1210b8021494Sopenharmony_ci 1211b8021494Sopenharmony_ci __ movprfx(z4.VnD(), p2.Zeroing(), z14.VnD()); 1212b8021494Sopenharmony_ci __ udot(z4.VnD(), z15.VnH(), z7.VnH(), 1); 1213b8021494Sopenharmony_ci 1214b8021494Sopenharmony_ci __ movprfx(z29.VnD(), p4.Zeroing(), z28.VnD()); 1215b8021494Sopenharmony_ci __ udot(z29.VnD(), z2.VnH(), z17.VnH()); 1216b8021494Sopenharmony_ci 1217b8021494Sopenharmony_ci __ movprfx(z7.VnS(), p6.Merging(), z3.VnS()); 1218b8021494Sopenharmony_ci __ udot(z7.VnS(), z14.VnB(), z1.VnB(), 1); 1219b8021494Sopenharmony_ci 1220b8021494Sopenharmony_ci __ movprfx(z14.VnB(), p3.Merging(), z5.VnB()); 1221b8021494Sopenharmony_ci __ umax(z14.VnB(), z14.VnB(), 42); 1222b8021494Sopenharmony_ci 1223b8021494Sopenharmony_ci __ movprfx(z4.VnD(), p1.Zeroing(), z2.VnD()); 1224b8021494Sopenharmony_ci __ umin(z4.VnD(), z4.VnD(), 42); 1225b8021494Sopenharmony_ci 1226b8021494Sopenharmony_ci __ movprfx(z19.VnB(), p0.Zeroing(), z27.VnB()); 1227b8021494Sopenharmony_ci __ uqadd(z19.VnB(), z19.VnB(), 42); 1228b8021494Sopenharmony_ci 1229b8021494Sopenharmony_ci __ movprfx(z24.VnD(), p7.Zeroing(), z11.VnD()); 1230b8021494Sopenharmony_ci __ uqdecd(z24.VnD(), SVE_MUL3); 1231b8021494Sopenharmony_ci 1232b8021494Sopenharmony_ci __ movprfx(z24.VnH(), p4.Zeroing(), z18.VnH()); 1233b8021494Sopenharmony_ci __ uqdech(z24.VnH(), SVE_VL2); 1234b8021494Sopenharmony_ci 1235b8021494Sopenharmony_ci __ movprfx(z31.VnS(), p5.Zeroing(), z2.VnS()); 1236b8021494Sopenharmony_ci __ uqdecp(z31.VnS(), p5); 1237b8021494Sopenharmony_ci 1238b8021494Sopenharmony_ci __ movprfx(z19.VnS(), p6.Merging(), z21.VnS()); 1239b8021494Sopenharmony_ci __ uqdecw(z19.VnS(), SVE_ALL); 1240b8021494Sopenharmony_ci 1241b8021494Sopenharmony_ci __ movprfx(z27.VnD(), p0.Merging(), z21.VnD()); 1242b8021494Sopenharmony_ci __ uqincd(z27.VnD(), SVE_MUL3); 1243b8021494Sopenharmony_ci 1244b8021494Sopenharmony_ci __ movprfx(z13.VnH(), p4.Zeroing(), z12.VnH()); 1245b8021494Sopenharmony_ci __ uqinch(z13.VnH(), SVE_VL2); 1246b8021494Sopenharmony_ci 1247b8021494Sopenharmony_ci __ movprfx(z0.VnD(), p4.Zeroing(), z1.VnD()); 1248b8021494Sopenharmony_ci __ uqincp(z0.VnD(), p4); 1249b8021494Sopenharmony_ci 1250b8021494Sopenharmony_ci __ movprfx(z12.VnS(), p4.Merging(), z21.VnS()); 1251b8021494Sopenharmony_ci __ uqincw(z12.VnS(), SVE_ALL); 1252b8021494Sopenharmony_ci 1253b8021494Sopenharmony_ci __ movprfx(z9.VnD(), p0.Zeroing(), z16.VnD()); 1254b8021494Sopenharmony_ci __ uqsub(z9.VnD(), z9.VnD(), 42); 1255b8021494Sopenharmony_ci 1256b8021494Sopenharmony_ci __ movprfx(z22.VnS(), p0.Zeroing(), z5.VnS()); 1257b8021494Sopenharmony_ci __ smmla(z22.VnS(), z21.VnB(), z0.VnB()); 1258b8021494Sopenharmony_ci 1259b8021494Sopenharmony_ci __ movprfx(z1.VnS(), p0.Zeroing(), z5.VnS()); 1260b8021494Sopenharmony_ci __ ummla(z1.VnS(), z10.VnB(), z2.VnB()); 1261b8021494Sopenharmony_ci 1262b8021494Sopenharmony_ci __ movprfx(z30.VnS(), p0.Zeroing(), z5.VnS()); 1263b8021494Sopenharmony_ci __ usmmla(z30.VnS(), z29.VnB(), z18.VnB()); 1264b8021494Sopenharmony_ci 1265b8021494Sopenharmony_ci __ movprfx(z4.VnS(), p0.Zeroing(), z5.VnS()); 1266b8021494Sopenharmony_ci __ usdot(z4.VnS(), z3.VnB(), z4.VnB()); 1267b8021494Sopenharmony_ci 1268b8021494Sopenharmony_ci __ movprfx(z10.VnS(), p0.Zeroing(), z5.VnS()); 1269b8021494Sopenharmony_ci __ usdot(z10.VnS(), z10.VnB(), z0.VnB(), 0); 1270b8021494Sopenharmony_ci 1271b8021494Sopenharmony_ci __ movprfx(z1.VnS(), p0.Zeroing(), z5.VnS()); 1272b8021494Sopenharmony_ci __ sudot(z1.VnS(), z10.VnB(), z1.VnB(), 1); 1273b8021494Sopenharmony_ci } 1274b8021494Sopenharmony_ci assm.FinalizeCode(); 1275b8021494Sopenharmony_ci 1276b8021494Sopenharmony_ci CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 1277b8021494Sopenharmony_ci} 1278b8021494Sopenharmony_ci 1279b8021494Sopenharmony_ciTEST(movprfx_negative_predication_fp) { 1280b8021494Sopenharmony_ci // Test that CanTakeSVEMovprfx() is false when a predicated movprfx appears 1281b8021494Sopenharmony_ci // before an unpredicated instruction. 1282b8021494Sopenharmony_ci Assembler assm; 1283b8021494Sopenharmony_ci assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, 1284b8021494Sopenharmony_ci CPUFeatures::kSVEF32MM, 1285b8021494Sopenharmony_ci CPUFeatures::kSVEF64MM); 1286b8021494Sopenharmony_ci { 1287b8021494Sopenharmony_ci // We have to use the Assembler directly to generate movprfx, so we need 1288b8021494Sopenharmony_ci // to manually reserve space for the code we're about to emit. 1289b8021494Sopenharmony_ci static const size_t kPairCount = 11; 1290b8021494Sopenharmony_ci CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 1291b8021494Sopenharmony_ci 1292b8021494Sopenharmony_ci __ movprfx(z10.VnH(), p3.Zeroing(), z3.VnH()); 1293b8021494Sopenharmony_ci __ fcmla(z10.VnH(), z22.VnH(), z3.VnH(), 2, 180); 1294b8021494Sopenharmony_ci 1295b8021494Sopenharmony_ci __ movprfx(z12.VnS(), p4.Merging(), z14.VnS()); 1296b8021494Sopenharmony_ci __ fcmla(z12.VnS(), z3.VnS(), z10.VnS(), 1, 270); 1297b8021494Sopenharmony_ci 1298b8021494Sopenharmony_ci __ movprfx(z16.VnD(), p3.Zeroing(), z24.VnD()); 1299b8021494Sopenharmony_ci __ fmla(z16.VnD(), z24.VnD(), z8.VnD(), 1); 1300b8021494Sopenharmony_ci 1301b8021494Sopenharmony_ci __ movprfx(z9.VnH(), p7.Zeroing(), z0.VnH()); 1302b8021494Sopenharmony_ci __ fmla(z9.VnH(), z8.VnH(), z0.VnH(), 7); 1303b8021494Sopenharmony_ci 1304b8021494Sopenharmony_ci __ movprfx(z23.VnS(), p5.Merging(), z5.VnS()); 1305b8021494Sopenharmony_ci __ fmla(z23.VnS(), z7.VnS(), z5.VnS(), 3); 1306b8021494Sopenharmony_ci 1307b8021494Sopenharmony_ci __ movprfx(z19.VnD(), p6.Zeroing(), z8.VnD()); 1308b8021494Sopenharmony_ci __ fmls(z19.VnD(), z27.VnD(), z13.VnD(), 1); 1309b8021494Sopenharmony_ci 1310b8021494Sopenharmony_ci __ movprfx(z25.VnH(), p7.Merging(), z24.VnH()); 1311b8021494Sopenharmony_ci __ fmls(z25.VnH(), z24.VnH(), z4.VnH(), 4); 1312b8021494Sopenharmony_ci 1313b8021494Sopenharmony_ci __ movprfx(z2.VnS(), p1.Zeroing(), z0.VnS()); 1314b8021494Sopenharmony_ci __ fmls(z2.VnS(), z9.VnS(), z0.VnS(), 3); 1315b8021494Sopenharmony_ci 1316b8021494Sopenharmony_ci // Note that ftsmul and ftssel cannot take movprfx. 1317b8021494Sopenharmony_ci __ movprfx(z22.VnD(), p6.Merging(), z16.VnD()); 1318b8021494Sopenharmony_ci __ ftmad(z22.VnD(), z22.VnD(), z20.VnD(), 2); 1319b8021494Sopenharmony_ci 1320b8021494Sopenharmony_ci __ movprfx(z30.VnS(), p0.Zeroing(), z5.VnS()); 1321b8021494Sopenharmony_ci __ fmmla(z30.VnS(), z29.VnS(), z18.VnS()); 1322b8021494Sopenharmony_ci 1323b8021494Sopenharmony_ci __ movprfx(z31.VnD(), p1.Merging(), z5.VnD()); 1324b8021494Sopenharmony_ci __ fmmla(z31.VnD(), z30.VnD(), z18.VnD()); 1325b8021494Sopenharmony_ci } 1326b8021494Sopenharmony_ci assm.FinalizeCode(); 1327b8021494Sopenharmony_ci 1328b8021494Sopenharmony_ci CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 1329b8021494Sopenharmony_ci} 1330b8021494Sopenharmony_ci 1331b8021494Sopenharmony_ciTEST(movprfx_positive) { 1332b8021494Sopenharmony_ci Assembler assm; 1333b8021494Sopenharmony_ci assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM); 1334b8021494Sopenharmony_ci { 1335b8021494Sopenharmony_ci // We have to use the Assembler directly to generate movprfx, so we need 1336b8021494Sopenharmony_ci // to manually reserve space for the code we're about to emit. 1337b8021494Sopenharmony_ci static const size_t kPairCount = 123; 1338b8021494Sopenharmony_ci CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 1339b8021494Sopenharmony_ci 1340b8021494Sopenharmony_ci __ movprfx(z17, z28); 1341b8021494Sopenharmony_ci __ abs(z17.VnB(), p6.Merging(), z28.VnB()); 1342b8021494Sopenharmony_ci 1343b8021494Sopenharmony_ci __ movprfx(z9, z7); 1344b8021494Sopenharmony_ci __ add(z9.VnB(), p5.Merging(), z9.VnB(), z29.VnB()); 1345b8021494Sopenharmony_ci 1346b8021494Sopenharmony_ci __ movprfx(z11, z0); 1347b8021494Sopenharmony_ci __ add(z11.VnD(), z11.VnD(), 42); 1348b8021494Sopenharmony_ci 1349b8021494Sopenharmony_ci __ movprfx(z8.VnS(), p3.Zeroing(), z28.VnS()); 1350b8021494Sopenharmony_ci __ and_(z8.VnS(), p3.Merging(), z8.VnS(), z31.VnS()); 1351b8021494Sopenharmony_ci 1352b8021494Sopenharmony_ci __ movprfx(z20, z23); 1353b8021494Sopenharmony_ci __ and_(z20.VnS(), z20.VnS(), 4); 1354b8021494Sopenharmony_ci 1355b8021494Sopenharmony_ci __ movprfx(z24.VnD(), p5.Merging(), z11.VnD()); 1356b8021494Sopenharmony_ci __ asr(z24.VnD(), p5.Merging(), z24.VnD(), 3); 1357b8021494Sopenharmony_ci 1358b8021494Sopenharmony_ci __ movprfx(z1, z13); 1359b8021494Sopenharmony_ci __ asr(z1.VnH(), p3.Merging(), z1.VnH(), z4.VnH()); 1360b8021494Sopenharmony_ci 1361b8021494Sopenharmony_ci __ movprfx(z0.VnB(), p7.Zeroing(), z28.VnB()); 1362b8021494Sopenharmony_ci __ asr(z0.VnB(), p7.Merging(), z0.VnB(), z28.VnD()); 1363b8021494Sopenharmony_ci 1364b8021494Sopenharmony_ci __ movprfx(z15, z5); 1365b8021494Sopenharmony_ci __ asr(z15.VnD(), p3.Merging(), z15.VnD(), z5.VnD()); 1366b8021494Sopenharmony_ci 1367b8021494Sopenharmony_ci __ movprfx(z24.VnH(), p3.Merging(), z22.VnH()); 1368b8021494Sopenharmony_ci __ asrd(z24.VnH(), p3.Merging(), z24.VnH(), 3); 1369b8021494Sopenharmony_ci 1370b8021494Sopenharmony_ci __ movprfx(z2.VnS(), p3.Zeroing(), z20.VnS()); 1371b8021494Sopenharmony_ci __ asrr(z2.VnS(), p3.Merging(), z2.VnS(), z15.VnS()); 1372b8021494Sopenharmony_ci 1373b8021494Sopenharmony_ci __ movprfx(z17.VnB(), p7.Merging(), z6.VnB()); 1374b8021494Sopenharmony_ci __ bic(z17.VnB(), p7.Merging(), z17.VnB(), z25.VnB()); 1375b8021494Sopenharmony_ci 1376b8021494Sopenharmony_ci __ movprfx(z31, z6); 1377b8021494Sopenharmony_ci __ bic(z31.VnD(), z31.VnD(), 4); 1378b8021494Sopenharmony_ci 1379b8021494Sopenharmony_ci __ movprfx(z20, z2); 1380b8021494Sopenharmony_ci __ clasta(z20.VnB(), p4, z20.VnB(), z15.VnB()); 1381b8021494Sopenharmony_ci 1382b8021494Sopenharmony_ci __ movprfx(z27, z11); 1383b8021494Sopenharmony_ci __ clastb(z27.VnB(), p5, z27.VnB(), z6.VnB()); 1384b8021494Sopenharmony_ci 1385b8021494Sopenharmony_ci __ movprfx(z3.VnS(), p7.Zeroing(), z17.VnS()); 1386b8021494Sopenharmony_ci __ cls(z3.VnS(), p7.Merging(), z0.VnS()); 1387b8021494Sopenharmony_ci 1388b8021494Sopenharmony_ci __ movprfx(z29.VnB(), p0.Zeroing(), z24.VnB()); 1389b8021494Sopenharmony_ci __ clz(z29.VnB(), p0.Merging(), z7.VnB()); 1390b8021494Sopenharmony_ci 1391b8021494Sopenharmony_ci __ movprfx(z2.VnH(), p7.Zeroing(), z29.VnH()); 1392b8021494Sopenharmony_ci __ cnot(z2.VnH(), p7.Merging(), z28.VnH()); 1393b8021494Sopenharmony_ci 1394b8021494Sopenharmony_ci __ movprfx(z23, z5); 1395b8021494Sopenharmony_ci __ cnt(z23.VnH(), p0.Merging(), z12.VnH()); 1396b8021494Sopenharmony_ci 1397b8021494Sopenharmony_ci __ movprfx(z5, z3); 1398b8021494Sopenharmony_ci __ cpy(z5.VnD(), p1.Merging(), -42); 1399b8021494Sopenharmony_ci 1400b8021494Sopenharmony_ci __ movprfx(z0, z12); 1401b8021494Sopenharmony_ci __ cpy(z0.VnB(), p1.Merging(), w0); 1402b8021494Sopenharmony_ci 1403b8021494Sopenharmony_ci __ movprfx(z27, z8); 1404b8021494Sopenharmony_ci __ cpy(z27.VnB(), p0.Merging(), b0); 1405b8021494Sopenharmony_ci 1406b8021494Sopenharmony_ci __ movprfx(z20, z24); 1407b8021494Sopenharmony_ci __ decd(z20.VnD(), SVE_MUL3); 1408b8021494Sopenharmony_ci 1409b8021494Sopenharmony_ci __ movprfx(z5, z28); 1410b8021494Sopenharmony_ci __ dech(z5.VnH(), SVE_VL2); 1411b8021494Sopenharmony_ci 1412b8021494Sopenharmony_ci __ movprfx(z7, z3); 1413b8021494Sopenharmony_ci __ decp(z7.VnD(), p2); 1414b8021494Sopenharmony_ci 1415b8021494Sopenharmony_ci __ movprfx(z4, z7); 1416b8021494Sopenharmony_ci __ decw(z4.VnS(), SVE_ALL); 1417b8021494Sopenharmony_ci 1418b8021494Sopenharmony_ci __ movprfx(z3, z18); 1419b8021494Sopenharmony_ci __ eon(z3.VnS(), z3.VnS(), 4); 1420b8021494Sopenharmony_ci 1421b8021494Sopenharmony_ci __ movprfx(z4.VnD(), p0.Merging(), z10.VnD()); 1422b8021494Sopenharmony_ci __ eor(z4.VnD(), p0.Merging(), z4.VnD(), z10.VnD()); 1423b8021494Sopenharmony_ci 1424b8021494Sopenharmony_ci __ movprfx(z15, z18); 1425b8021494Sopenharmony_ci __ eor(z15.VnH(), z15.VnH(), 4); 1426b8021494Sopenharmony_ci 1427b8021494Sopenharmony_ci __ movprfx(z17, z30); 1428b8021494Sopenharmony_ci __ ext(z17.VnB(), z17.VnB(), z18.VnB(), 2); 1429b8021494Sopenharmony_ci 1430b8021494Sopenharmony_ci __ movprfx(z19, z28); 1431b8021494Sopenharmony_ci __ incd(z19.VnD(), SVE_MUL3); 1432b8021494Sopenharmony_ci 1433b8021494Sopenharmony_ci __ movprfx(z13, z7); 1434b8021494Sopenharmony_ci __ inch(z13.VnH(), SVE_VL2); 1435b8021494Sopenharmony_ci 1436b8021494Sopenharmony_ci __ movprfx(z14, z21); 1437b8021494Sopenharmony_ci __ incp(z14.VnD(), p1); 1438b8021494Sopenharmony_ci 1439b8021494Sopenharmony_ci __ movprfx(z26, z12); 1440b8021494Sopenharmony_ci __ incw(z26.VnS(), SVE_ALL); 1441b8021494Sopenharmony_ci 1442b8021494Sopenharmony_ci __ movprfx(z16, z2); 1443b8021494Sopenharmony_ci __ insr(z16.VnB(), w16); 1444b8021494Sopenharmony_ci 1445b8021494Sopenharmony_ci __ movprfx(z20, z26); 1446b8021494Sopenharmony_ci __ insr(z20.VnB(), b0); 1447b8021494Sopenharmony_ci 1448b8021494Sopenharmony_ci __ movprfx(z30.VnD(), p0.Merging(), z23.VnD()); 1449b8021494Sopenharmony_ci __ lsl(z30.VnD(), p0.Merging(), z30.VnD(), 3); 1450b8021494Sopenharmony_ci 1451b8021494Sopenharmony_ci __ movprfx(z28.VnS(), p2.Zeroing(), z6.VnS()); 1452b8021494Sopenharmony_ci __ lsl(z28.VnS(), p2.Merging(), z28.VnS(), z6.VnS()); 1453b8021494Sopenharmony_ci 1454b8021494Sopenharmony_ci __ movprfx(z15.VnH(), p6.Zeroing(), z3.VnH()); 1455b8021494Sopenharmony_ci __ lsl(z15.VnH(), p6.Merging(), z15.VnH(), z3.VnD()); 1456b8021494Sopenharmony_ci 1457b8021494Sopenharmony_ci __ movprfx(z13.VnD(), p4.Zeroing(), z14.VnD()); 1458b8021494Sopenharmony_ci __ lsl(z13.VnD(), p4.Merging(), z13.VnD(), z25.VnD()); 1459b8021494Sopenharmony_ci 1460b8021494Sopenharmony_ci __ movprfx(z14, z5); 1461b8021494Sopenharmony_ci __ lslr(z14.VnS(), p0.Merging(), z14.VnS(), z17.VnS()); 1462b8021494Sopenharmony_ci 1463b8021494Sopenharmony_ci __ movprfx(z21, z1); 1464b8021494Sopenharmony_ci __ lsr(z21.VnH(), p5.Merging(), z21.VnH(), 3); 1465b8021494Sopenharmony_ci 1466b8021494Sopenharmony_ci __ movprfx(z11.VnH(), p0.Zeroing(), z13.VnH()); 1467b8021494Sopenharmony_ci __ lsr(z11.VnH(), p0.Merging(), z11.VnH(), z9.VnH()); 1468b8021494Sopenharmony_ci 1469b8021494Sopenharmony_ci __ movprfx(z24, z29); 1470b8021494Sopenharmony_ci __ lsr(z24.VnS(), p4.Merging(), z24.VnS(), z1.VnD()); 1471b8021494Sopenharmony_ci 1472b8021494Sopenharmony_ci __ movprfx(z1.VnD(), p6.Merging(), z9.VnD()); 1473b8021494Sopenharmony_ci __ lsr(z1.VnD(), p6.Merging(), z1.VnD(), z9.VnD()); 1474b8021494Sopenharmony_ci 1475b8021494Sopenharmony_ci __ movprfx(z22, z3); 1476b8021494Sopenharmony_ci __ lsrr(z22.VnB(), p3.Merging(), z22.VnB(), z3.VnB()); 1477b8021494Sopenharmony_ci 1478b8021494Sopenharmony_ci __ movprfx(z24.VnB(), p2.Zeroing(), z5.VnB()); 1479b8021494Sopenharmony_ci __ mad(z24.VnB(), p2.Merging(), z5.VnB(), z10.VnB()); 1480b8021494Sopenharmony_ci 1481b8021494Sopenharmony_ci __ movprfx(z8, z4); 1482b8021494Sopenharmony_ci __ mla(z8.VnS(), p6.Merging(), z4.VnS(), z26.VnS()); 1483b8021494Sopenharmony_ci 1484b8021494Sopenharmony_ci __ movprfx(z10, z8); 1485b8021494Sopenharmony_ci __ mls(z10.VnS(), p4.Merging(), z23.VnS(), z16.VnS()); 1486b8021494Sopenharmony_ci 1487b8021494Sopenharmony_ci // Aliases of cpy. 1488b8021494Sopenharmony_ci __ movprfx(z4.VnH(), p5.Zeroing(), z2.VnH()); 1489b8021494Sopenharmony_ci __ mov(z4.VnH(), p5.Merging(), -42); 1490b8021494Sopenharmony_ci 1491b8021494Sopenharmony_ci __ movprfx(z2.VnB(), p3.Zeroing(), z24.VnB()); 1492b8021494Sopenharmony_ci __ mov(z2.VnB(), p3.Merging(), w2); 1493b8021494Sopenharmony_ci 1494b8021494Sopenharmony_ci __ movprfx(z27, z13); 1495b8021494Sopenharmony_ci __ mov(z27.VnD(), p3.Merging(), d0); 1496b8021494Sopenharmony_ci 1497b8021494Sopenharmony_ci __ movprfx(z18.VnB(), p5.Zeroing(), z11.VnB()); 1498b8021494Sopenharmony_ci __ msb(z18.VnB(), p5.Merging(), z3.VnB(), z11.VnB()); 1499b8021494Sopenharmony_ci 1500b8021494Sopenharmony_ci __ movprfx(z29, z16); 1501b8021494Sopenharmony_ci __ mul(z29.VnS(), p6.Merging(), z29.VnS(), z9.VnS()); 1502b8021494Sopenharmony_ci 1503b8021494Sopenharmony_ci __ movprfx(z21, z23); 1504b8021494Sopenharmony_ci __ mul(z21.VnH(), z21.VnH(), 42); 1505b8021494Sopenharmony_ci 1506b8021494Sopenharmony_ci __ movprfx(z7.VnS(), p4.Merging(), z14.VnS()); 1507b8021494Sopenharmony_ci __ neg(z7.VnS(), p4.Merging(), z14.VnS()); 1508b8021494Sopenharmony_ci 1509b8021494Sopenharmony_ci __ movprfx(z8.VnD(), p4.Zeroing(), z5.VnD()); 1510b8021494Sopenharmony_ci __ not_(z8.VnD(), p4.Merging(), z5.VnD()); 1511b8021494Sopenharmony_ci 1512b8021494Sopenharmony_ci __ movprfx(z14, z13); 1513b8021494Sopenharmony_ci __ orn(z14.VnS(), z14.VnS(), 4); 1514b8021494Sopenharmony_ci 1515b8021494Sopenharmony_ci __ movprfx(z14, z13); 1516b8021494Sopenharmony_ci __ orn(z14.VnS(), z14.VnS(), 4); 1517b8021494Sopenharmony_ci 1518b8021494Sopenharmony_ci __ movprfx(z27, z17); 1519b8021494Sopenharmony_ci __ orr(z27.VnD(), p2.Merging(), z27.VnD(), z17.VnD()); 1520b8021494Sopenharmony_ci 1521b8021494Sopenharmony_ci __ movprfx(z13.VnH(), p2.Zeroing(), z27.VnH()); 1522b8021494Sopenharmony_ci __ rbit(z13.VnH(), p2.Merging(), z1.VnH()); 1523b8021494Sopenharmony_ci 1524b8021494Sopenharmony_ci __ movprfx(z1, z29); 1525b8021494Sopenharmony_ci __ revb(z1.VnS(), p4.Merging(), z6.VnS()); 1526b8021494Sopenharmony_ci 1527b8021494Sopenharmony_ci __ movprfx(z18.VnD(), p2.Zeroing(), z10.VnD()); 1528b8021494Sopenharmony_ci __ revh(z18.VnD(), p2.Merging(), z16.VnD()); 1529b8021494Sopenharmony_ci 1530b8021494Sopenharmony_ci __ movprfx(z2.VnD(), p1.Merging(), z10.VnD()); 1531b8021494Sopenharmony_ci __ revw(z2.VnD(), p1.Merging(), z1.VnD()); 1532b8021494Sopenharmony_ci 1533b8021494Sopenharmony_ci __ movprfx(z28.VnS(), p7.Merging(), z11.VnS()); 1534b8021494Sopenharmony_ci __ sabd(z28.VnS(), p7.Merging(), z28.VnS(), z11.VnS()); 1535b8021494Sopenharmony_ci 1536b8021494Sopenharmony_ci __ movprfx(z22.VnS(), p0.Merging(), z20.VnS()); 1537b8021494Sopenharmony_ci __ sdiv(z22.VnS(), p0.Merging(), z22.VnS(), z6.VnS()); 1538b8021494Sopenharmony_ci 1539b8021494Sopenharmony_ci __ movprfx(z13.VnS(), p7.Merging(), z0.VnS()); 1540b8021494Sopenharmony_ci __ sdivr(z13.VnS(), p7.Merging(), z13.VnS(), z2.VnS()); 1541b8021494Sopenharmony_ci 1542b8021494Sopenharmony_ci __ movprfx(z0, z12); 1543b8021494Sopenharmony_ci __ sdot(z0.VnD(), z10.VnH(), z12.VnH(), 1); 1544b8021494Sopenharmony_ci 1545b8021494Sopenharmony_ci __ movprfx(z8, z15); 1546b8021494Sopenharmony_ci __ sdot(z8.VnS(), z15.VnB(), z12.VnB()); 1547b8021494Sopenharmony_ci 1548b8021494Sopenharmony_ci __ movprfx(z13, z0); 1549b8021494Sopenharmony_ci __ sdot(z13.VnS(), z10.VnB(), z0.VnB(), 1); 1550b8021494Sopenharmony_ci 1551b8021494Sopenharmony_ci __ movprfx(z11, z13); 1552b8021494Sopenharmony_ci __ smax(z11.VnB(), p5.Merging(), z11.VnB(), z24.VnB()); 1553b8021494Sopenharmony_ci 1554b8021494Sopenharmony_ci __ movprfx(z3, z17); 1555b8021494Sopenharmony_ci __ smax(z3.VnD(), z3.VnD(), 42); 1556b8021494Sopenharmony_ci 1557b8021494Sopenharmony_ci __ movprfx(z10, z29); 1558b8021494Sopenharmony_ci __ smin(z10.VnD(), p4.Merging(), z10.VnD(), z29.VnD()); 1559b8021494Sopenharmony_ci 1560b8021494Sopenharmony_ci __ movprfx(z13, z29); 1561b8021494Sopenharmony_ci __ smin(z13.VnD(), z13.VnD(), 42); 1562b8021494Sopenharmony_ci 1563b8021494Sopenharmony_ci __ movprfx(z6, z17); 1564b8021494Sopenharmony_ci __ smulh(z6.VnS(), p7.Merging(), z6.VnS(), z31.VnS()); 1565b8021494Sopenharmony_ci 1566b8021494Sopenharmony_ci __ movprfx(z19, z20); 1567b8021494Sopenharmony_ci __ splice(z19.VnB(), p3, z19.VnB(), z20.VnB()); 1568b8021494Sopenharmony_ci 1569b8021494Sopenharmony_ci __ movprfx(z0, z3); 1570b8021494Sopenharmony_ci __ sqadd(z0.VnD(), z0.VnD(), 42); 1571b8021494Sopenharmony_ci 1572b8021494Sopenharmony_ci __ movprfx(z29, z5); 1573b8021494Sopenharmony_ci __ sqdecd(z29.VnD(), SVE_MUL3); 1574b8021494Sopenharmony_ci 1575b8021494Sopenharmony_ci __ movprfx(z25, z11); 1576b8021494Sopenharmony_ci __ sqdech(z25.VnH(), SVE_VL2); 1577b8021494Sopenharmony_ci 1578b8021494Sopenharmony_ci __ movprfx(z16, z9); 1579b8021494Sopenharmony_ci __ sqdecp(z16.VnS(), p1); 1580b8021494Sopenharmony_ci 1581b8021494Sopenharmony_ci __ movprfx(z8, z17); 1582b8021494Sopenharmony_ci __ sqdecw(z8.VnS(), SVE_ALL); 1583b8021494Sopenharmony_ci 1584b8021494Sopenharmony_ci __ movprfx(z4, z5); 1585b8021494Sopenharmony_ci __ sqincd(z4.VnD(), SVE_MUL3); 1586b8021494Sopenharmony_ci 1587b8021494Sopenharmony_ci __ movprfx(z0, z17); 1588b8021494Sopenharmony_ci __ sqinch(z0.VnH(), SVE_VL2); 1589b8021494Sopenharmony_ci 1590b8021494Sopenharmony_ci __ movprfx(z7, z27); 1591b8021494Sopenharmony_ci __ sqincp(z7.VnS(), p6); 1592b8021494Sopenharmony_ci 1593b8021494Sopenharmony_ci __ movprfx(z10, z9); 1594b8021494Sopenharmony_ci __ sqincw(z10.VnS(), SVE_ALL); 1595b8021494Sopenharmony_ci 1596b8021494Sopenharmony_ci __ movprfx(z31, z22); 1597b8021494Sopenharmony_ci __ sqsub(z31.VnB(), z31.VnB(), 42); 1598b8021494Sopenharmony_ci 1599b8021494Sopenharmony_ci __ movprfx(z12.VnH(), p7.Zeroing(), z23.VnH()); 1600b8021494Sopenharmony_ci __ sub(z12.VnH(), p7.Merging(), z12.VnH(), z23.VnH()); 1601b8021494Sopenharmony_ci 1602b8021494Sopenharmony_ci __ movprfx(z10, z1); 1603b8021494Sopenharmony_ci __ sub(z10.VnH(), z10.VnH(), 42); 1604b8021494Sopenharmony_ci 1605b8021494Sopenharmony_ci __ movprfx(z15.VnB(), p0.Merging(), z0.VnB()); 1606b8021494Sopenharmony_ci __ subr(z15.VnB(), p0.Merging(), z15.VnB(), z0.VnB()); 1607b8021494Sopenharmony_ci 1608b8021494Sopenharmony_ci __ movprfx(z17, z2); 1609b8021494Sopenharmony_ci __ subr(z17.VnH(), z17.VnH(), 42); 1610b8021494Sopenharmony_ci 1611b8021494Sopenharmony_ci __ movprfx(z5, z3); 1612b8021494Sopenharmony_ci __ sxtb(z5.VnD(), p6.Merging(), z20.VnD()); 1613b8021494Sopenharmony_ci 1614b8021494Sopenharmony_ci __ movprfx(z11, z17); 1615b8021494Sopenharmony_ci __ sxth(z11.VnD(), p6.Merging(), z25.VnD()); 1616b8021494Sopenharmony_ci 1617b8021494Sopenharmony_ci __ movprfx(z26, z4); 1618b8021494Sopenharmony_ci __ sxtw(z26.VnD(), p5.Merging(), z4.VnD()); 1619b8021494Sopenharmony_ci 1620b8021494Sopenharmony_ci __ movprfx(z15.VnD(), p0.Zeroing(), z8.VnD()); 1621b8021494Sopenharmony_ci __ uabd(z15.VnD(), p0.Merging(), z15.VnD(), z20.VnD()); 1622b8021494Sopenharmony_ci 1623b8021494Sopenharmony_ci __ movprfx(z21, z24); 1624b8021494Sopenharmony_ci __ udiv(z21.VnD(), p3.Merging(), z21.VnD(), z24.VnD()); 1625b8021494Sopenharmony_ci 1626b8021494Sopenharmony_ci __ movprfx(z22, z10); 1627b8021494Sopenharmony_ci __ udivr(z22.VnD(), p7.Merging(), z22.VnD(), z27.VnD()); 1628b8021494Sopenharmony_ci 1629b8021494Sopenharmony_ci __ movprfx(z27, z25); 1630b8021494Sopenharmony_ci __ udot(z27.VnD(), z29.VnH(), z3.VnH(), 1); 1631b8021494Sopenharmony_ci 1632b8021494Sopenharmony_ci __ movprfx(z29, z10); 1633b8021494Sopenharmony_ci __ udot(z29.VnS(), z10.VnB(), z21.VnB()); 1634b8021494Sopenharmony_ci 1635b8021494Sopenharmony_ci __ movprfx(z18, z0); 1636b8021494Sopenharmony_ci __ udot(z18.VnS(), z14.VnB(), z0.VnB(), 1); 1637b8021494Sopenharmony_ci 1638b8021494Sopenharmony_ci __ movprfx(z6, z30); 1639b8021494Sopenharmony_ci __ umax(z6.VnS(), p2.Merging(), z6.VnS(), z27.VnS()); 1640b8021494Sopenharmony_ci 1641b8021494Sopenharmony_ci __ movprfx(z31, z17); 1642b8021494Sopenharmony_ci __ umax(z31.VnD(), z31.VnD(), 42); 1643b8021494Sopenharmony_ci 1644b8021494Sopenharmony_ci __ movprfx(z27.VnS(), p0.Merging(), z20.VnS()); 1645b8021494Sopenharmony_ci __ umin(z27.VnS(), p0.Merging(), z27.VnS(), z8.VnS()); 1646b8021494Sopenharmony_ci 1647b8021494Sopenharmony_ci __ movprfx(z0, z11); 1648b8021494Sopenharmony_ci __ umin(z0.VnH(), z0.VnH(), 42); 1649b8021494Sopenharmony_ci 1650b8021494Sopenharmony_ci __ movprfx(z21, z17); 1651b8021494Sopenharmony_ci __ umulh(z21.VnB(), p0.Merging(), z21.VnB(), z30.VnB()); 1652b8021494Sopenharmony_ci 1653b8021494Sopenharmony_ci __ movprfx(z9, z24); 1654b8021494Sopenharmony_ci __ uqadd(z9.VnD(), z9.VnD(), 42); 1655b8021494Sopenharmony_ci 1656b8021494Sopenharmony_ci __ movprfx(z18, z13); 1657b8021494Sopenharmony_ci __ uqdecd(z18.VnD(), SVE_MUL3); 1658b8021494Sopenharmony_ci 1659b8021494Sopenharmony_ci __ movprfx(z20, z23); 1660b8021494Sopenharmony_ci __ uqdech(z20.VnH(), SVE_VL2); 1661b8021494Sopenharmony_ci 1662b8021494Sopenharmony_ci __ movprfx(z12, z29); 1663b8021494Sopenharmony_ci __ uqdecp(z12.VnS(), p7); 1664b8021494Sopenharmony_ci 1665b8021494Sopenharmony_ci __ movprfx(z24, z25); 1666b8021494Sopenharmony_ci __ uqdecw(z24.VnS(), SVE_ALL); 1667b8021494Sopenharmony_ci 1668b8021494Sopenharmony_ci __ movprfx(z13, z1); 1669b8021494Sopenharmony_ci __ uqincd(z13.VnD(), SVE_MUL3); 1670b8021494Sopenharmony_ci 1671b8021494Sopenharmony_ci __ movprfx(z5, z19); 1672b8021494Sopenharmony_ci __ uqinch(z5.VnH(), SVE_VL2); 1673b8021494Sopenharmony_ci 1674b8021494Sopenharmony_ci __ movprfx(z6, z25); 1675b8021494Sopenharmony_ci __ uqincp(z6.VnS(), p5); 1676b8021494Sopenharmony_ci 1677b8021494Sopenharmony_ci __ movprfx(z12, z14); 1678b8021494Sopenharmony_ci __ uqincw(z12.VnS(), SVE_ALL); 1679b8021494Sopenharmony_ci 1680b8021494Sopenharmony_ci __ movprfx(z13, z6); 1681b8021494Sopenharmony_ci __ uqsub(z13.VnH(), z13.VnH(), 42); 1682b8021494Sopenharmony_ci 1683b8021494Sopenharmony_ci __ movprfx(z31, z3); 1684b8021494Sopenharmony_ci __ uxtb(z31.VnS(), p0.Merging(), z3.VnS()); 1685b8021494Sopenharmony_ci 1686b8021494Sopenharmony_ci __ movprfx(z18.VnD(), p4.Merging(), z25.VnD()); 1687b8021494Sopenharmony_ci __ uxth(z18.VnD(), p4.Merging(), z25.VnD()); 1688b8021494Sopenharmony_ci 1689b8021494Sopenharmony_ci __ movprfx(z18.VnD(), p7.Merging(), z25.VnD()); 1690b8021494Sopenharmony_ci __ uxtw(z18.VnD(), p7.Merging(), z25.VnD()); 1691b8021494Sopenharmony_ci 1692b8021494Sopenharmony_ci __ movprfx(z22, z5); 1693b8021494Sopenharmony_ci __ smmla(z22.VnS(), z21.VnB(), z0.VnB()); 1694b8021494Sopenharmony_ci 1695b8021494Sopenharmony_ci __ movprfx(z1, z5); 1696b8021494Sopenharmony_ci __ ummla(z1.VnS(), z10.VnB(), z0.VnB()); 1697b8021494Sopenharmony_ci 1698b8021494Sopenharmony_ci __ movprfx(z30, z5); 1699b8021494Sopenharmony_ci __ usmmla(z30.VnS(), z31.VnB(), z18.VnB()); 1700b8021494Sopenharmony_ci 1701b8021494Sopenharmony_ci __ movprfx(z4, z5); 1702b8021494Sopenharmony_ci __ usdot(z4.VnS(), z3.VnB(), z3.VnB()); 1703b8021494Sopenharmony_ci 1704b8021494Sopenharmony_ci __ movprfx(z10, z5); 1705b8021494Sopenharmony_ci __ usdot(z10.VnS(), z9.VnB(), z0.VnB(), 0); 1706b8021494Sopenharmony_ci 1707b8021494Sopenharmony_ci __ movprfx(z1, z5); 1708b8021494Sopenharmony_ci __ sudot(z1.VnS(), z10.VnB(), z2.VnB(), 1); 1709b8021494Sopenharmony_ci } 1710b8021494Sopenharmony_ci assm.FinalizeCode(); 1711b8021494Sopenharmony_ci 1712b8021494Sopenharmony_ci CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), true); 1713b8021494Sopenharmony_ci} 1714b8021494Sopenharmony_ci 1715b8021494Sopenharmony_ciTEST(movprfx_positive_fp) { 1716b8021494Sopenharmony_ci Assembler assm; 1717b8021494Sopenharmony_ci assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, 1718b8021494Sopenharmony_ci CPUFeatures::kSVEF32MM, 1719b8021494Sopenharmony_ci CPUFeatures::kSVEF64MM); 1720b8021494Sopenharmony_ci { 1721b8021494Sopenharmony_ci // We have to use the Assembler directly to generate movprfx, so we need 1722b8021494Sopenharmony_ci // to manually reserve space for the code we're about to emit. 1723b8021494Sopenharmony_ci static const size_t kPairCount = 75; 1724b8021494Sopenharmony_ci CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 1725b8021494Sopenharmony_ci 1726b8021494Sopenharmony_ci __ movprfx(z18.VnS(), p6.Zeroing(), z20.VnS()); 1727b8021494Sopenharmony_ci __ fabd(z18.VnS(), p6.Merging(), z18.VnS(), z19.VnS()); 1728b8021494Sopenharmony_ci 1729b8021494Sopenharmony_ci __ movprfx(z28.VnD(), p4.Zeroing(), z24.VnD()); 1730b8021494Sopenharmony_ci __ fabs(z28.VnD(), p4.Merging(), z24.VnD()); 1731b8021494Sopenharmony_ci 1732b8021494Sopenharmony_ci __ movprfx(z12, z8); 1733b8021494Sopenharmony_ci __ fadd(z12.VnS(), p2.Merging(), z12.VnS(), 0.5); 1734b8021494Sopenharmony_ci 1735b8021494Sopenharmony_ci __ movprfx(z0.VnS(), p1.Merging(), z9.VnS()); 1736b8021494Sopenharmony_ci __ fadd(z0.VnS(), p1.Merging(), z0.VnS(), z9.VnS()); 1737b8021494Sopenharmony_ci 1738b8021494Sopenharmony_ci __ movprfx(z10.VnH(), p2.Merging(), z2.VnH()); 1739b8021494Sopenharmony_ci __ fcadd(z10.VnH(), p2.Merging(), z10.VnH(), z20.VnH(), 90); 1740b8021494Sopenharmony_ci 1741b8021494Sopenharmony_ci __ movprfx(z21, z6); 1742b8021494Sopenharmony_ci __ fcmla(z21.VnH(), z31.VnH(), z6.VnH(), 2, 180); 1743b8021494Sopenharmony_ci 1744b8021494Sopenharmony_ci __ movprfx(z16, z6); 1745b8021494Sopenharmony_ci __ fcmla(z16.VnS(), z11.VnS(), z6.VnS(), 1, 270); 1746b8021494Sopenharmony_ci 1747b8021494Sopenharmony_ci __ movprfx(z15.VnH(), p6.Merging(), z16.VnH()); 1748b8021494Sopenharmony_ci __ fcpy(z15.VnH(), p6.Merging(), 1.25); 1749b8021494Sopenharmony_ci 1750b8021494Sopenharmony_ci __ movprfx(z1, z14); 1751b8021494Sopenharmony_ci __ fcvt(z1.VnD(), p2.Merging(), z4.VnH()); 1752b8021494Sopenharmony_ci 1753b8021494Sopenharmony_ci __ movprfx(z25.VnD(), p6.Merging(), z1.VnD()); 1754b8021494Sopenharmony_ci __ fcvt(z25.VnD(), p6.Merging(), z1.VnS()); 1755b8021494Sopenharmony_ci 1756b8021494Sopenharmony_ci __ movprfx(z18.VnS(), p2.Merging(), z2.VnS()); 1757b8021494Sopenharmony_ci __ fcvt(z18.VnH(), p2.Merging(), z7.VnS()); 1758b8021494Sopenharmony_ci 1759b8021494Sopenharmony_ci __ movprfx(z21.VnD(), p5.Zeroing(), z26.VnD()); 1760b8021494Sopenharmony_ci __ fcvt(z21.VnH(), p5.Merging(), z26.VnD()); 1761b8021494Sopenharmony_ci 1762b8021494Sopenharmony_ci __ movprfx(z12.VnD(), p1.Merging(), z18.VnD()); 1763b8021494Sopenharmony_ci __ fcvtzs(z12.VnD(), p1.Merging(), z18.VnH()); 1764b8021494Sopenharmony_ci 1765b8021494Sopenharmony_ci __ movprfx(z3.VnS(), p2.Merging(), z0.VnS()); 1766b8021494Sopenharmony_ci __ fcvtzs(z3.VnS(), p2.Merging(), z26.VnS()); 1767b8021494Sopenharmony_ci 1768b8021494Sopenharmony_ci __ movprfx(z21.VnS(), p4.Merging(), z7.VnS()); 1769b8021494Sopenharmony_ci __ fcvtzs(z21.VnS(), p4.Merging(), z7.VnH()); 1770b8021494Sopenharmony_ci 1771b8021494Sopenharmony_ci __ movprfx(z16.VnD(), p3.Zeroing(), z4.VnD()); 1772b8021494Sopenharmony_ci __ fcvtzs(z16.VnS(), p3.Merging(), z28.VnD()); 1773b8021494Sopenharmony_ci 1774b8021494Sopenharmony_ci __ movprfx(z31.VnD(), p4.Merging(), z1.VnD()); 1775b8021494Sopenharmony_ci __ fcvtzu(z31.VnD(), p4.Merging(), z1.VnH()); 1776b8021494Sopenharmony_ci 1777b8021494Sopenharmony_ci __ movprfx(z23.VnH(), p0.Zeroing(), z28.VnH()); 1778b8021494Sopenharmony_ci __ fcvtzu(z23.VnH(), p0.Merging(), z28.VnH()); 1779b8021494Sopenharmony_ci 1780b8021494Sopenharmony_ci __ movprfx(z2, z12); 1781b8021494Sopenharmony_ci __ fcvtzu(z2.VnD(), p3.Merging(), z28.VnS()); 1782b8021494Sopenharmony_ci 1783b8021494Sopenharmony_ci __ movprfx(z4, z7); 1784b8021494Sopenharmony_ci __ fcvtzu(z4.VnS(), p7.Merging(), z16.VnD()); 1785b8021494Sopenharmony_ci 1786b8021494Sopenharmony_ci __ movprfx(z13.VnS(), p3.Zeroing(), z23.VnS()); 1787b8021494Sopenharmony_ci __ fdiv(z13.VnS(), p3.Merging(), z13.VnS(), z23.VnS()); 1788b8021494Sopenharmony_ci 1789b8021494Sopenharmony_ci __ movprfx(z6.VnD(), p1.Zeroing(), z16.VnD()); 1790b8021494Sopenharmony_ci __ fdivr(z6.VnD(), p1.Merging(), z6.VnD(), z5.VnD()); 1791b8021494Sopenharmony_ci 1792b8021494Sopenharmony_ci __ movprfx(z31, z23); 1793b8021494Sopenharmony_ci __ fmad(z31.VnS(), p5.Merging(), z23.VnS(), z11.VnS()); 1794b8021494Sopenharmony_ci 1795b8021494Sopenharmony_ci __ movprfx(z14.VnH(), p7.Merging(), z21.VnH()); 1796b8021494Sopenharmony_ci __ fmax(z14.VnH(), p7.Merging(), z14.VnH(), 0.0); 1797b8021494Sopenharmony_ci 1798b8021494Sopenharmony_ci __ movprfx(z17.VnS(), p4.Merging(), z9.VnS()); 1799b8021494Sopenharmony_ci __ fmax(z17.VnS(), p4.Merging(), z17.VnS(), z9.VnS()); 1800b8021494Sopenharmony_ci 1801b8021494Sopenharmony_ci __ movprfx(z1.VnS(), p3.Zeroing(), z30.VnS()); 1802b8021494Sopenharmony_ci __ fmaxnm(z1.VnS(), p3.Merging(), z1.VnS(), 0.0); 1803b8021494Sopenharmony_ci 1804b8021494Sopenharmony_ci __ movprfx(z10.VnD(), p1.Zeroing(), z17.VnD()); 1805b8021494Sopenharmony_ci __ fmaxnm(z10.VnD(), p1.Merging(), z10.VnD(), z17.VnD()); 1806b8021494Sopenharmony_ci 1807b8021494Sopenharmony_ci __ movprfx(z3, z13); 1808b8021494Sopenharmony_ci __ fmin(z3.VnS(), p0.Merging(), z3.VnS(), 0.0); 1809b8021494Sopenharmony_ci 1810b8021494Sopenharmony_ci __ movprfx(z15, z21); 1811b8021494Sopenharmony_ci __ fmin(z15.VnS(), p4.Merging(), z15.VnS(), z21.VnS()); 1812b8021494Sopenharmony_ci 1813b8021494Sopenharmony_ci __ movprfx(z30.VnH(), p7.Zeroing(), z25.VnH()); 1814b8021494Sopenharmony_ci __ fminnm(z30.VnH(), p7.Merging(), z30.VnH(), 0.0); 1815b8021494Sopenharmony_ci 1816b8021494Sopenharmony_ci __ movprfx(z31, z15); 1817b8021494Sopenharmony_ci __ fminnm(z31.VnD(), p5.Merging(), z31.VnD(), z25.VnD()); 1818b8021494Sopenharmony_ci 1819b8021494Sopenharmony_ci __ movprfx(z27, z28); 1820b8021494Sopenharmony_ci __ fmla(z27.VnD(), z28.VnD(), z12.VnD(), 1); 1821b8021494Sopenharmony_ci 1822b8021494Sopenharmony_ci __ movprfx(z26.VnH(), p6.Zeroing(), z13.VnH()); 1823b8021494Sopenharmony_ci __ fmla(z26.VnH(), p6.Merging(), z13.VnH(), z7.VnH()); 1824b8021494Sopenharmony_ci 1825b8021494Sopenharmony_ci __ movprfx(z26, z10); 1826b8021494Sopenharmony_ci __ fmla(z26.VnH(), z10.VnH(), z1.VnH(), 7); 1827b8021494Sopenharmony_ci 1828b8021494Sopenharmony_ci __ movprfx(z0, z1); 1829b8021494Sopenharmony_ci __ fmla(z0.VnS(), z25.VnS(), z1.VnS(), 3); 1830b8021494Sopenharmony_ci 1831b8021494Sopenharmony_ci __ movprfx(z7, z3); 1832b8021494Sopenharmony_ci __ fmls(z7.VnD(), z30.VnD(), z3.VnD(), 1); 1833b8021494Sopenharmony_ci 1834b8021494Sopenharmony_ci __ movprfx(z1, z24); 1835b8021494Sopenharmony_ci __ fmls(z1.VnD(), p5.Merging(), z20.VnD(), z24.VnD()); 1836b8021494Sopenharmony_ci 1837b8021494Sopenharmony_ci __ movprfx(z19, z18); 1838b8021494Sopenharmony_ci __ fmls(z19.VnH(), z18.VnH(), z7.VnH(), 4); 1839b8021494Sopenharmony_ci 1840b8021494Sopenharmony_ci __ movprfx(z0, z26); 1841b8021494Sopenharmony_ci __ fmls(z0.VnS(), z17.VnS(), z4.VnS(), 3); 1842b8021494Sopenharmony_ci 1843b8021494Sopenharmony_ci __ movprfx(z19.VnS(), p7.Zeroing(), z6.VnS()); 1844b8021494Sopenharmony_ci __ fmov(z19.VnS(), p7.Merging(), 0.0); 1845b8021494Sopenharmony_ci 1846b8021494Sopenharmony_ci __ movprfx(z21, z15); 1847b8021494Sopenharmony_ci __ fmov(z21.VnH(), p7.Merging(), 2.5); 1848b8021494Sopenharmony_ci 1849b8021494Sopenharmony_ci __ movprfx(z23, z18); 1850b8021494Sopenharmony_ci __ fmsb(z23.VnS(), p4.Merging(), z1.VnS(), z7.VnS()); 1851b8021494Sopenharmony_ci 1852b8021494Sopenharmony_ci __ movprfx(z8, z28); 1853b8021494Sopenharmony_ci __ fmul(z8.VnS(), p4.Merging(), z8.VnS(), 2.0); 1854b8021494Sopenharmony_ci 1855b8021494Sopenharmony_ci __ movprfx(z6.VnD(), p6.Merging(), z27.VnD()); 1856b8021494Sopenharmony_ci __ fmul(z6.VnD(), p6.Merging(), z6.VnD(), z27.VnD()); 1857b8021494Sopenharmony_ci 1858b8021494Sopenharmony_ci __ movprfx(z6.VnH(), p0.Merging(), z19.VnH()); 1859b8021494Sopenharmony_ci __ fmulx(z6.VnH(), p0.Merging(), z6.VnH(), z19.VnH()); 1860b8021494Sopenharmony_ci 1861b8021494Sopenharmony_ci __ movprfx(z5.VnH(), p0.Merging(), z1.VnH()); 1862b8021494Sopenharmony_ci __ fneg(z5.VnH(), p0.Merging(), z1.VnH()); 1863b8021494Sopenharmony_ci 1864b8021494Sopenharmony_ci __ movprfx(z22.VnD(), p4.Zeroing(), z24.VnD()); 1865b8021494Sopenharmony_ci __ fnmad(z22.VnD(), p4.Merging(), z24.VnD(), z12.VnD()); 1866b8021494Sopenharmony_ci 1867b8021494Sopenharmony_ci __ movprfx(z5.VnS(), p0.Merging(), z29.VnS()); 1868b8021494Sopenharmony_ci __ fnmla(z5.VnS(), p0.Merging(), z17.VnS(), z29.VnS()); 1869b8021494Sopenharmony_ci 1870b8021494Sopenharmony_ci __ movprfx(z5, z3); 1871b8021494Sopenharmony_ci __ fnmls(z5.VnD(), p5.Merging(), z3.VnD(), z2.VnD()); 1872b8021494Sopenharmony_ci 1873b8021494Sopenharmony_ci __ movprfx(z9.VnD(), p2.Zeroing(), z7.VnD()); 1874b8021494Sopenharmony_ci __ fnmsb(z9.VnD(), p2.Merging(), z7.VnD(), z23.VnD()); 1875b8021494Sopenharmony_ci 1876b8021494Sopenharmony_ci // Note that frecpe and frecps _cannot_ take movprfx. 1877b8021494Sopenharmony_ci __ movprfx(z12.VnH(), p1.Zeroing(), z17.VnH()); 1878b8021494Sopenharmony_ci __ frecpx(z12.VnH(), p1.Merging(), z4.VnH()); 1879b8021494Sopenharmony_ci 1880b8021494Sopenharmony_ci __ movprfx(z28.VnS(), p4.Zeroing(), z27.VnS()); 1881b8021494Sopenharmony_ci __ frinta(z28.VnS(), p4.Merging(), z24.VnS()); 1882b8021494Sopenharmony_ci 1883b8021494Sopenharmony_ci __ movprfx(z7.VnD(), p7.Merging(), z25.VnD()); 1884b8021494Sopenharmony_ci __ frinti(z7.VnD(), p7.Merging(), z25.VnD()); 1885b8021494Sopenharmony_ci 1886b8021494Sopenharmony_ci __ movprfx(z10, z21); 1887b8021494Sopenharmony_ci __ frintm(z10.VnD(), p5.Merging(), z26.VnD()); 1888b8021494Sopenharmony_ci 1889b8021494Sopenharmony_ci __ movprfx(z25, z21); 1890b8021494Sopenharmony_ci __ frintn(z25.VnH(), p4.Merging(), z1.VnH()); 1891b8021494Sopenharmony_ci 1892b8021494Sopenharmony_ci __ movprfx(z25, z9); 1893b8021494Sopenharmony_ci __ frintp(z25.VnH(), p1.Merging(), z9.VnH()); 1894b8021494Sopenharmony_ci 1895b8021494Sopenharmony_ci __ movprfx(z30, z16); 1896b8021494Sopenharmony_ci __ frintx(z30.VnS(), p1.Merging(), z16.VnS()); 1897b8021494Sopenharmony_ci 1898b8021494Sopenharmony_ci __ movprfx(z0.VnD(), p5.Merging(), z9.VnD()); 1899b8021494Sopenharmony_ci __ frintz(z0.VnD(), p5.Merging(), z23.VnD()); 1900b8021494Sopenharmony_ci 1901b8021494Sopenharmony_ci __ movprfx(z11.VnD(), p7.Merging(), z2.VnD()); 1902b8021494Sopenharmony_ci __ fscale(z11.VnD(), p7.Merging(), z11.VnD(), z2.VnD()); 1903b8021494Sopenharmony_ci 1904b8021494Sopenharmony_ci __ movprfx(z23.VnS(), p4.Merging(), z17.VnS()); 1905b8021494Sopenharmony_ci __ fsqrt(z23.VnS(), p4.Merging(), z10.VnS()); 1906b8021494Sopenharmony_ci 1907b8021494Sopenharmony_ci __ movprfx(z0.VnD(), p2.Merging(), z26.VnD()); 1908b8021494Sopenharmony_ci __ fsub(z0.VnD(), p2.Merging(), z0.VnD(), 1.0); 1909b8021494Sopenharmony_ci 1910b8021494Sopenharmony_ci __ movprfx(z28.VnD(), p1.Zeroing(), z16.VnD()); 1911b8021494Sopenharmony_ci __ fsub(z28.VnD(), p1.Merging(), z28.VnD(), z16.VnD()); 1912b8021494Sopenharmony_ci 1913b8021494Sopenharmony_ci __ movprfx(z22, z27); 1914b8021494Sopenharmony_ci __ fsubr(z22.VnD(), p4.Merging(), z22.VnD(), 1.0); 1915b8021494Sopenharmony_ci 1916b8021494Sopenharmony_ci __ movprfx(z4.VnS(), p2.Merging(), z26.VnS()); 1917b8021494Sopenharmony_ci __ fsubr(z4.VnS(), p2.Merging(), z4.VnS(), z26.VnS()); 1918b8021494Sopenharmony_ci 1919b8021494Sopenharmony_ci // Note that ftsmul and ftssel _cannot_ take movprfx. 1920b8021494Sopenharmony_ci __ movprfx(z10, z4); 1921b8021494Sopenharmony_ci __ ftmad(z10.VnS(), z10.VnS(), z4.VnS(), 2); 1922b8021494Sopenharmony_ci 1923b8021494Sopenharmony_ci __ movprfx(z2, z16); 1924b8021494Sopenharmony_ci __ scvtf(z2.VnD(), p1.Merging(), z16.VnS()); 1925b8021494Sopenharmony_ci 1926b8021494Sopenharmony_ci __ movprfx(z10, z20); 1927b8021494Sopenharmony_ci __ scvtf(z10.VnD(), p5.Merging(), z20.VnD()); 1928b8021494Sopenharmony_ci 1929b8021494Sopenharmony_ci __ movprfx(z29, z28); 1930b8021494Sopenharmony_ci __ scvtf(z29.VnS(), p0.Merging(), z31.VnD()); 1931b8021494Sopenharmony_ci 1932b8021494Sopenharmony_ci __ movprfx(z26.VnD(), p3.Merging(), z13.VnD()); 1933b8021494Sopenharmony_ci __ scvtf(z26.VnH(), p3.Merging(), z5.VnD()); 1934b8021494Sopenharmony_ci 1935b8021494Sopenharmony_ci __ movprfx(z7.VnD(), p3.Zeroing(), z26.VnD()); 1936b8021494Sopenharmony_ci __ ucvtf(z7.VnD(), p3.Merging(), z26.VnS()); 1937b8021494Sopenharmony_ci 1938b8021494Sopenharmony_ci __ movprfx(z13, z17); 1939b8021494Sopenharmony_ci __ ucvtf(z13.VnD(), p7.Merging(), z17.VnD()); 1940b8021494Sopenharmony_ci 1941b8021494Sopenharmony_ci __ movprfx(z24.VnD(), p1.Merging(), z31.VnD()); 1942b8021494Sopenharmony_ci __ ucvtf(z24.VnS(), p1.Merging(), z18.VnD()); 1943b8021494Sopenharmony_ci 1944b8021494Sopenharmony_ci __ movprfx(z17.VnD(), p4.Merging(), z22.VnD()); 1945b8021494Sopenharmony_ci __ ucvtf(z17.VnH(), p4.Merging(), z4.VnD()); 1946b8021494Sopenharmony_ci 1947b8021494Sopenharmony_ci __ movprfx(z30, z5); 1948b8021494Sopenharmony_ci __ fmmla(z30.VnS(), z29.VnS(), z18.VnS()); 1949b8021494Sopenharmony_ci 1950b8021494Sopenharmony_ci __ movprfx(z31, z5); 1951b8021494Sopenharmony_ci __ fmmla(z31.VnD(), z30.VnD(), z18.VnD()); 1952b8021494Sopenharmony_ci } 1953b8021494Sopenharmony_ci assm.FinalizeCode(); 1954b8021494Sopenharmony_ci 1955b8021494Sopenharmony_ci CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), true); 1956b8021494Sopenharmony_ci} 1957b8021494Sopenharmony_ci 1958b8021494Sopenharmony_ciTEST(movprfx_positive_sve2) { 1959b8021494Sopenharmony_ci Assembler assm; 1960b8021494Sopenharmony_ci assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2); 1961b8021494Sopenharmony_ci { 1962b8021494Sopenharmony_ci // We have to use the Assembler directly to generate movprfx, so we need 1963b8021494Sopenharmony_ci // to manually reserve space for the code we're about to emit. 1964b8021494Sopenharmony_ci static const size_t kPairCount = 145; 1965b8021494Sopenharmony_ci CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 1966b8021494Sopenharmony_ci 1967b8021494Sopenharmony_ci __ movprfx(z25, z26); 1968b8021494Sopenharmony_ci __ adclb(z25.VnS(), z17.VnS(), z24.VnS()); 1969b8021494Sopenharmony_ci 1970b8021494Sopenharmony_ci __ movprfx(z0, z1); 1971b8021494Sopenharmony_ci __ adclt(z0.VnS(), z2.VnS(), z15.VnS()); 1972b8021494Sopenharmony_ci 1973b8021494Sopenharmony_ci __ movprfx(z3, z4); 1974b8021494Sopenharmony_ci __ addp(z3.VnB(), p1.Merging(), z3.VnB(), z0.VnB()); 1975b8021494Sopenharmony_ci 1976b8021494Sopenharmony_ci __ movprfx(z6, z7); 1977b8021494Sopenharmony_ci __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD()); 1978b8021494Sopenharmony_ci 1979b8021494Sopenharmony_ci __ movprfx(z18, z19); 1980b8021494Sopenharmony_ci __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD()); 1981b8021494Sopenharmony_ci 1982b8021494Sopenharmony_ci __ movprfx(z7, z8); 1983b8021494Sopenharmony_ci __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD()); 1984b8021494Sopenharmony_ci 1985b8021494Sopenharmony_ci __ movprfx(z21, z22); 1986b8021494Sopenharmony_ci __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD()); 1987b8021494Sopenharmony_ci 1988b8021494Sopenharmony_ci __ movprfx(z5, z6); 1989b8021494Sopenharmony_ci __ cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90); 1990b8021494Sopenharmony_ci 1991b8021494Sopenharmony_ci __ movprfx(z7, z8); 1992b8021494Sopenharmony_ci __ cdot(z7.VnS(), z4.VnB(), z10.VnB(), 0); 1993b8021494Sopenharmony_ci 1994b8021494Sopenharmony_ci __ movprfx(z7, z8); 1995b8021494Sopenharmony_ci __ cdot(z7.VnS(), z4.VnB(), z0.VnB(), 0, 0); 1996b8021494Sopenharmony_ci 1997b8021494Sopenharmony_ci __ movprfx(z7, z8); 1998b8021494Sopenharmony_ci __ cdot(z7.VnD(), z4.VnH(), z0.VnH(), 0, 0); 1999b8021494Sopenharmony_ci 2000b8021494Sopenharmony_ci __ movprfx(z19, z20); 2001b8021494Sopenharmony_ci __ cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0); 2002b8021494Sopenharmony_ci 2003b8021494Sopenharmony_ci __ movprfx(z19, z20); 2004b8021494Sopenharmony_ci __ cmla(z19.VnS(), z7.VnS(), z2.VnS(), 0, 0); 2005b8021494Sopenharmony_ci 2006b8021494Sopenharmony_ci __ movprfx(z19, z20); 2007b8021494Sopenharmony_ci __ cmla(z19.VnH(), z7.VnH(), z2.VnH(), 0, 0); 2008b8021494Sopenharmony_ci 2009b8021494Sopenharmony_ci __ movprfx(z10, z11); 2010b8021494Sopenharmony_ci __ eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD()); 2011b8021494Sopenharmony_ci 2012b8021494Sopenharmony_ci __ movprfx(z3, z4); 2013b8021494Sopenharmony_ci __ eorbt(z3.VnB(), z10.VnB(), z8.VnB()); 2014b8021494Sopenharmony_ci 2015b8021494Sopenharmony_ci __ movprfx(z20, z22); 2016b8021494Sopenharmony_ci __ eortb(z20.VnB(), z21.VnB(), z15.VnB()); 2017b8021494Sopenharmony_ci 2018b8021494Sopenharmony_ci __ movprfx(z14, z15); 2019b8021494Sopenharmony_ci __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD()); 2020b8021494Sopenharmony_ci 2021b8021494Sopenharmony_ci __ movprfx(z14.VnD(), p4.Merging(), z15.VnD()); 2022b8021494Sopenharmony_ci __ fcvtx(z14.VnS(), p4.Merging(), z0.VnD()); 2023b8021494Sopenharmony_ci 2024b8021494Sopenharmony_ci __ movprfx(z15.VnH(), p0.Merging(), z16.VnH()); 2025b8021494Sopenharmony_ci __ flogb(z15.VnH(), p0.Merging(), z3.VnH()); 2026b8021494Sopenharmony_ci 2027b8021494Sopenharmony_ci __ movprfx(z2, z3); 2028b8021494Sopenharmony_ci __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD()); 2029b8021494Sopenharmony_ci 2030b8021494Sopenharmony_ci __ movprfx(z22, z23); 2031b8021494Sopenharmony_ci __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD()); 2032b8021494Sopenharmony_ci 2033b8021494Sopenharmony_ci __ movprfx(z1, z2); 2034b8021494Sopenharmony_ci __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD()); 2035b8021494Sopenharmony_ci 2036b8021494Sopenharmony_ci __ movprfx(z16, z17); 2037b8021494Sopenharmony_ci __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD()); 2038b8021494Sopenharmony_ci 2039b8021494Sopenharmony_ci __ movprfx(z16, z17); 2040b8021494Sopenharmony_ci __ fmlalb(z16.VnS(), z18.VnH(), z29.VnH()); 2041b8021494Sopenharmony_ci 2042b8021494Sopenharmony_ci __ movprfx(z16, z17); 2043b8021494Sopenharmony_ci __ fmlalb(z16.VnS(), z18.VnH(), z2.VnH(), 0); 2044b8021494Sopenharmony_ci 2045b8021494Sopenharmony_ci __ movprfx(z18, z19); 2046b8021494Sopenharmony_ci __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH()); 2047b8021494Sopenharmony_ci 2048b8021494Sopenharmony_ci __ movprfx(z18, z19); 2049b8021494Sopenharmony_ci __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH(), 0); 2050b8021494Sopenharmony_ci 2051b8021494Sopenharmony_ci __ movprfx(z16, z17); 2052b8021494Sopenharmony_ci __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH()); 2053b8021494Sopenharmony_ci 2054b8021494Sopenharmony_ci __ movprfx(z16, z17); 2055b8021494Sopenharmony_ci __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH(), 0); 2056b8021494Sopenharmony_ci 2057b8021494Sopenharmony_ci __ movprfx(z3, z4); 2058b8021494Sopenharmony_ci __ fmlslt(z3.VnS(), z17.VnH(), z14.VnH()); 2059b8021494Sopenharmony_ci 2060b8021494Sopenharmony_ci __ movprfx(z3, z4); 2061b8021494Sopenharmony_ci __ fmlslt(z3.VnS(), z17.VnH(), z1.VnH(), 0); 2062b8021494Sopenharmony_ci 2063b8021494Sopenharmony_ci __ movprfx(z2, z3); 2064b8021494Sopenharmony_ci __ mla(z2.VnH(), z0.VnH(), z1.VnH(), 0); 2065b8021494Sopenharmony_ci 2066b8021494Sopenharmony_ci __ movprfx(z2, z3); 2067b8021494Sopenharmony_ci __ mla(z2.VnS(), z0.VnS(), z1.VnS(), 0); 2068b8021494Sopenharmony_ci 2069b8021494Sopenharmony_ci __ movprfx(z2, z3); 2070b8021494Sopenharmony_ci __ mla(z2.VnD(), z0.VnD(), z1.VnD(), 0); 2071b8021494Sopenharmony_ci 2072b8021494Sopenharmony_ci __ movprfx(z2, z3); 2073b8021494Sopenharmony_ci __ mls(z2.VnH(), z0.VnH(), z1.VnH(), 0); 2074b8021494Sopenharmony_ci 2075b8021494Sopenharmony_ci __ movprfx(z2, z3); 2076b8021494Sopenharmony_ci __ mls(z2.VnS(), z0.VnS(), z1.VnS(), 0); 2077b8021494Sopenharmony_ci 2078b8021494Sopenharmony_ci __ movprfx(z2, z3); 2079b8021494Sopenharmony_ci __ mls(z2.VnD(), z0.VnD(), z1.VnD(), 0); 2080b8021494Sopenharmony_ci 2081b8021494Sopenharmony_ci __ movprfx(z17, z18); 2082b8021494Sopenharmony_ci __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD()); 2083b8021494Sopenharmony_ci 2084b8021494Sopenharmony_ci __ movprfx(z13, z14); 2085b8021494Sopenharmony_ci __ saba(z13.VnB(), z2.VnB(), z31.VnB()); 2086b8021494Sopenharmony_ci 2087b8021494Sopenharmony_ci __ movprfx(z13, z14); 2088b8021494Sopenharmony_ci __ sabalb(z13.VnD(), z20.VnS(), z26.VnS()); 2089b8021494Sopenharmony_ci 2090b8021494Sopenharmony_ci __ movprfx(z14, z15); 2091b8021494Sopenharmony_ci __ sabalt(z14.VnD(), z19.VnS(), z10.VnS()); 2092b8021494Sopenharmony_ci 2093b8021494Sopenharmony_ci __ movprfx(z19.VnD(), p5.Merging(), z20.VnD()); 2094b8021494Sopenharmony_ci __ sadalp(z19.VnD(), p5.Merging(), z9.VnS()); 2095b8021494Sopenharmony_ci 2096b8021494Sopenharmony_ci __ movprfx(z17, z18); 2097b8021494Sopenharmony_ci __ sbclb(z17.VnS(), z10.VnS(), z8.VnS()); 2098b8021494Sopenharmony_ci 2099b8021494Sopenharmony_ci __ movprfx(z20, z21); 2100b8021494Sopenharmony_ci __ sbclt(z20.VnS(), z0.VnS(), z13.VnS()); 2101b8021494Sopenharmony_ci 2102b8021494Sopenharmony_ci __ movprfx(z20.VnB(), p3.Merging(), z21.VnB()); 2103b8021494Sopenharmony_ci __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB()); 2104b8021494Sopenharmony_ci 2105b8021494Sopenharmony_ci __ movprfx(z21.VnB(), p0.Merging(), z22.VnB()); 2106b8021494Sopenharmony_ci __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB()); 2107b8021494Sopenharmony_ci 2108b8021494Sopenharmony_ci __ movprfx(z1.VnB(), p0.Merging(), z2.VnB()); 2109b8021494Sopenharmony_ci __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB()); 2110b8021494Sopenharmony_ci 2111b8021494Sopenharmony_ci __ movprfx(z5, z6); 2112b8021494Sopenharmony_ci __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB()); 2113b8021494Sopenharmony_ci 2114b8021494Sopenharmony_ci __ movprfx(z27, z28); 2115b8021494Sopenharmony_ci __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB()); 2116b8021494Sopenharmony_ci 2117b8021494Sopenharmony_ci __ movprfx(z1, z2); 2118b8021494Sopenharmony_ci __ smlalb(z1.VnD(), z3.VnS(), z23.VnS()); 2119b8021494Sopenharmony_ci 2120b8021494Sopenharmony_ci __ movprfx(z1, z2); 2121b8021494Sopenharmony_ci __ smlalb(z1.VnD(), z3.VnS(), z2.VnS(), 0); 2122b8021494Sopenharmony_ci 2123b8021494Sopenharmony_ci __ movprfx(z1, z2); 2124b8021494Sopenharmony_ci __ smlalb(z1.VnS(), z3.VnH(), z2.VnH(), 0); 2125b8021494Sopenharmony_ci 2126b8021494Sopenharmony_ci __ movprfx(z1, z2); 2127b8021494Sopenharmony_ci __ smlalt(z1.VnD(), z3.VnS(), z23.VnS()); 2128b8021494Sopenharmony_ci 2129b8021494Sopenharmony_ci __ movprfx(z1, z2); 2130b8021494Sopenharmony_ci __ smlalt(z1.VnD(), z3.VnS(), z2.VnS(), 0); 2131b8021494Sopenharmony_ci 2132b8021494Sopenharmony_ci __ movprfx(z1, z2); 2133b8021494Sopenharmony_ci __ smlalt(z1.VnS(), z3.VnH(), z2.VnH(), 0); 2134b8021494Sopenharmony_ci 2135b8021494Sopenharmony_ci __ movprfx(z1, z2); 2136b8021494Sopenharmony_ci __ smlslb(z1.VnD(), z3.VnS(), z23.VnS()); 2137b8021494Sopenharmony_ci 2138b8021494Sopenharmony_ci __ movprfx(z1, z2); 2139b8021494Sopenharmony_ci __ smlslb(z1.VnD(), z3.VnS(), z2.VnS(), 0); 2140b8021494Sopenharmony_ci 2141b8021494Sopenharmony_ci __ movprfx(z1, z2); 2142b8021494Sopenharmony_ci __ smlslb(z1.VnS(), z3.VnH(), z2.VnH(), 0); 2143b8021494Sopenharmony_ci 2144b8021494Sopenharmony_ci __ movprfx(z1, z2); 2145b8021494Sopenharmony_ci __ smlslt(z1.VnD(), z3.VnS(), z23.VnS()); 2146b8021494Sopenharmony_ci 2147b8021494Sopenharmony_ci __ movprfx(z1, z2); 2148b8021494Sopenharmony_ci __ smlslt(z1.VnD(), z3.VnS(), z2.VnS(), 0); 2149b8021494Sopenharmony_ci 2150b8021494Sopenharmony_ci __ movprfx(z1, z2); 2151b8021494Sopenharmony_ci __ smlslt(z1.VnS(), z3.VnH(), z2.VnH(), 0); 2152b8021494Sopenharmony_ci 2153b8021494Sopenharmony_ci __ movprfx(z29.VnB(), p1.Merging(), z30.VnB()); 2154b8021494Sopenharmony_ci __ sqabs(z29.VnB(), p1.Merging(), z18.VnB()); 2155b8021494Sopenharmony_ci 2156b8021494Sopenharmony_ci __ movprfx(z28.VnB(), p0.Merging(), z29.VnB()); 2157b8021494Sopenharmony_ci __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB()); 2158b8021494Sopenharmony_ci 2159b8021494Sopenharmony_ci __ movprfx(z20, z21); 2160b8021494Sopenharmony_ci __ sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90); 2161b8021494Sopenharmony_ci 2162b8021494Sopenharmony_ci __ movprfx(z6, z7); 2163b8021494Sopenharmony_ci __ sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS()); 2164b8021494Sopenharmony_ci 2165b8021494Sopenharmony_ci __ movprfx(z6, z7); 2166b8021494Sopenharmony_ci __ sqdmlalb(z6.VnD(), z19.VnS(), z2.VnS(), 0); 2167b8021494Sopenharmony_ci 2168b8021494Sopenharmony_ci __ movprfx(z6, z7); 2169b8021494Sopenharmony_ci __ sqdmlalb(z6.VnS(), z19.VnH(), z2.VnH(), 0); 2170b8021494Sopenharmony_ci 2171b8021494Sopenharmony_ci __ movprfx(z23, z24); 2172b8021494Sopenharmony_ci __ sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS()); 2173b8021494Sopenharmony_ci 2174b8021494Sopenharmony_ci __ movprfx(z11, z12); 2175b8021494Sopenharmony_ci __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS()); 2176b8021494Sopenharmony_ci 2177b8021494Sopenharmony_ci __ movprfx(z11, z12); 2178b8021494Sopenharmony_ci __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS(), 0); 2179b8021494Sopenharmony_ci 2180b8021494Sopenharmony_ci __ movprfx(z11, z12); 2181b8021494Sopenharmony_ci __ sqdmlalt(z11.VnS(), z0.VnH(), z0.VnH(), 0); 2182b8021494Sopenharmony_ci 2183b8021494Sopenharmony_ci __ movprfx(z16, z17); 2184b8021494Sopenharmony_ci __ sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS()); 2185b8021494Sopenharmony_ci 2186b8021494Sopenharmony_ci __ movprfx(z16, z17); 2187b8021494Sopenharmony_ci __ sqdmlslb(z16.VnD(), z26.VnS(), z2.VnS(), 0); 2188b8021494Sopenharmony_ci 2189b8021494Sopenharmony_ci __ movprfx(z16, z17); 2190b8021494Sopenharmony_ci __ sqdmlslb(z16.VnS(), z26.VnH(), z2.VnH(), 0); 2191b8021494Sopenharmony_ci 2192b8021494Sopenharmony_ci __ movprfx(z26, z27); 2193b8021494Sopenharmony_ci __ sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS()); 2194b8021494Sopenharmony_ci 2195b8021494Sopenharmony_ci __ movprfx(z21, z22); 2196b8021494Sopenharmony_ci __ sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS()); 2197b8021494Sopenharmony_ci 2198b8021494Sopenharmony_ci __ movprfx(z21, z22); 2199b8021494Sopenharmony_ci __ sqdmlslt(z21.VnD(), z23.VnS(), z0.VnS(), 0); 2200b8021494Sopenharmony_ci 2201b8021494Sopenharmony_ci __ movprfx(z21, z22); 2202b8021494Sopenharmony_ci __ sqdmlslt(z21.VnS(), z23.VnH(), z0.VnH(), 0); 2203b8021494Sopenharmony_ci 2204b8021494Sopenharmony_ci __ movprfx(z21.VnB(), p0.Merging(), z22.VnB()); 2205b8021494Sopenharmony_ci __ sqneg(z21.VnB(), p0.Merging(), z17.VnB()); 2206b8021494Sopenharmony_ci 2207b8021494Sopenharmony_ci __ movprfx(z31, z0); 2208b8021494Sopenharmony_ci __ sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0); 2209b8021494Sopenharmony_ci 2210b8021494Sopenharmony_ci __ movprfx(z31, z0); 2211b8021494Sopenharmony_ci __ sqrdcmlah(z31.VnH(), z15.VnH(), z2.VnH(), 0, 0); 2212b8021494Sopenharmony_ci 2213b8021494Sopenharmony_ci __ movprfx(z31, z0); 2214b8021494Sopenharmony_ci __ sqrdcmlah(z31.VnS(), z15.VnS(), z2.VnS(), 0, 0); 2215b8021494Sopenharmony_ci 2216b8021494Sopenharmony_ci __ movprfx(z27, z28); 2217b8021494Sopenharmony_ci __ sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB()); 2218b8021494Sopenharmony_ci 2219b8021494Sopenharmony_ci __ movprfx(z27, z28); 2220b8021494Sopenharmony_ci __ sqrdmlah(z27.VnH(), z28.VnH(), z1.VnH(), 0); 2221b8021494Sopenharmony_ci 2222b8021494Sopenharmony_ci __ movprfx(z27, z28); 2223b8021494Sopenharmony_ci __ sqrdmlah(z27.VnS(), z28.VnS(), z1.VnS(), 0); 2224b8021494Sopenharmony_ci 2225b8021494Sopenharmony_ci __ movprfx(z27, z28); 2226b8021494Sopenharmony_ci __ sqrdmlah(z27.VnD(), z28.VnD(), z1.VnD(), 0); 2227b8021494Sopenharmony_ci 2228b8021494Sopenharmony_ci __ movprfx(z11, z12); 2229b8021494Sopenharmony_ci __ sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB()); 2230b8021494Sopenharmony_ci 2231b8021494Sopenharmony_ci __ movprfx(z11, z12); 2232b8021494Sopenharmony_ci __ sqrdmlsh(z11.VnH(), z16.VnH(), z1.VnH(), 0); 2233b8021494Sopenharmony_ci 2234b8021494Sopenharmony_ci __ movprfx(z11, z12); 2235b8021494Sopenharmony_ci __ sqrdmlsh(z11.VnS(), z16.VnS(), z1.VnS(), 0); 2236b8021494Sopenharmony_ci 2237b8021494Sopenharmony_ci __ movprfx(z11, z12); 2238b8021494Sopenharmony_ci __ sqrdmlsh(z11.VnD(), z16.VnD(), z1.VnD(), 0); 2239b8021494Sopenharmony_ci 2240b8021494Sopenharmony_ci __ movprfx(z31.VnB(), p5.Merging(), z0.VnB()); 2241b8021494Sopenharmony_ci __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB()); 2242b8021494Sopenharmony_ci 2243b8021494Sopenharmony_ci __ movprfx(z25.VnB(), p6.Merging(), z26.VnB()); 2244b8021494Sopenharmony_ci __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB()); 2245b8021494Sopenharmony_ci 2246b8021494Sopenharmony_ci __ movprfx(z0.VnB(), p5.Merging(), z1.VnB()); 2247b8021494Sopenharmony_ci __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0); 2248b8021494Sopenharmony_ci 2249b8021494Sopenharmony_ci __ movprfx(z0.VnB(), p5.Merging(), z1.VnB()); 2250b8021494Sopenharmony_ci __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z2.VnB()); 2251b8021494Sopenharmony_ci 2252b8021494Sopenharmony_ci __ movprfx(z7.VnB(), p3.Merging(), z8.VnB()); 2253b8021494Sopenharmony_ci __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB()); 2254b8021494Sopenharmony_ci 2255b8021494Sopenharmony_ci __ movprfx(z10.VnB(), p1.Merging(), z11.VnB()); 2256b8021494Sopenharmony_ci __ sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0); 2257b8021494Sopenharmony_ci 2258b8021494Sopenharmony_ci __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); 2259b8021494Sopenharmony_ci __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()); 2260b8021494Sopenharmony_ci 2261b8021494Sopenharmony_ci __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); 2262b8021494Sopenharmony_ci __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()); 2263b8021494Sopenharmony_ci 2264b8021494Sopenharmony_ci __ movprfx(z23.VnB(), p4.Merging(), z24.VnB()); 2265b8021494Sopenharmony_ci __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB()); 2266b8021494Sopenharmony_ci 2267b8021494Sopenharmony_ci __ movprfx(z31.VnB(), p7.Merging(), z0.VnB()); 2268b8021494Sopenharmony_ci __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB()); 2269b8021494Sopenharmony_ci 2270b8021494Sopenharmony_ci __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); 2271b8021494Sopenharmony_ci __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB()); 2272b8021494Sopenharmony_ci 2273b8021494Sopenharmony_ci __ movprfx(z12.VnB(), p0.Merging(), z13.VnB()); 2274b8021494Sopenharmony_ci __ srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1); 2275b8021494Sopenharmony_ci 2276b8021494Sopenharmony_ci __ movprfx(z0, z1); 2277b8021494Sopenharmony_ci __ srsra(z0.VnB(), z8.VnB(), 1); 2278b8021494Sopenharmony_ci 2279b8021494Sopenharmony_ci __ movprfx(z0, z1); 2280b8021494Sopenharmony_ci __ ssra(z0.VnB(), z8.VnB(), 1); 2281b8021494Sopenharmony_ci 2282b8021494Sopenharmony_ci __ movprfx(z26.VnB(), p2.Merging(), z27.VnB()); 2283b8021494Sopenharmony_ci __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB()); 2284b8021494Sopenharmony_ci 2285b8021494Sopenharmony_ci __ movprfx(z23, z24); 2286b8021494Sopenharmony_ci __ uaba(z23.VnB(), z22.VnB(), z20.VnB()); 2287b8021494Sopenharmony_ci 2288b8021494Sopenharmony_ci __ movprfx(z11, z12); 2289b8021494Sopenharmony_ci __ uabalb(z11.VnD(), z25.VnS(), z12.VnS()); 2290b8021494Sopenharmony_ci 2291b8021494Sopenharmony_ci __ movprfx(z4, z5); 2292b8021494Sopenharmony_ci __ uabalt(z4.VnD(), z2.VnS(), z31.VnS()); 2293b8021494Sopenharmony_ci 2294b8021494Sopenharmony_ci __ movprfx(z20.VnD(), p4.Merging(), z21.VnD()); 2295b8021494Sopenharmony_ci __ uadalp(z20.VnD(), p4.Merging(), z5.VnS()); 2296b8021494Sopenharmony_ci 2297b8021494Sopenharmony_ci __ movprfx(z21.VnB(), p2.Merging(), z22.VnB()); 2298b8021494Sopenharmony_ci __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB()); 2299b8021494Sopenharmony_ci 2300b8021494Sopenharmony_ci __ movprfx(z1.VnB(), p4.Merging(), z2.VnB()); 2301b8021494Sopenharmony_ci __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB()); 2302b8021494Sopenharmony_ci 2303b8021494Sopenharmony_ci __ movprfx(z18.VnB(), p0.Merging(), z19.VnB()); 2304b8021494Sopenharmony_ci __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB()); 2305b8021494Sopenharmony_ci 2306b8021494Sopenharmony_ci __ movprfx(z7, z8); 2307b8021494Sopenharmony_ci __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB()); 2308b8021494Sopenharmony_ci 2309b8021494Sopenharmony_ci __ movprfx(z10, z11); 2310b8021494Sopenharmony_ci __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB()); 2311b8021494Sopenharmony_ci 2312b8021494Sopenharmony_ci __ movprfx(z31, z0); 2313b8021494Sopenharmony_ci __ umlalb(z31.VnD(), z9.VnS(), z21.VnS()); 2314b8021494Sopenharmony_ci 2315b8021494Sopenharmony_ci __ movprfx(z31, z0); 2316b8021494Sopenharmony_ci __ umlalb(z31.VnD(), z9.VnS(), z1.VnS(), 0); 2317b8021494Sopenharmony_ci 2318b8021494Sopenharmony_ci __ movprfx(z31, z0); 2319b8021494Sopenharmony_ci __ umlalb(z31.VnS(), z9.VnH(), z1.VnH(), 0); 2320b8021494Sopenharmony_ci 2321b8021494Sopenharmony_ci __ movprfx(z11, z12); 2322b8021494Sopenharmony_ci __ umlalt(z11.VnD(), z5.VnS(), z22.VnS()); 2323b8021494Sopenharmony_ci 2324b8021494Sopenharmony_ci __ movprfx(z11, z12); 2325b8021494Sopenharmony_ci __ umlalt(z11.VnD(), z5.VnS(), z2.VnS(), 0); 2326b8021494Sopenharmony_ci 2327b8021494Sopenharmony_ci __ movprfx(z11, z12); 2328b8021494Sopenharmony_ci __ umlalt(z11.VnS(), z5.VnH(), z2.VnH(), 0); 2329b8021494Sopenharmony_ci 2330b8021494Sopenharmony_ci __ movprfx(z28, z29); 2331b8021494Sopenharmony_ci __ umlslb(z28.VnD(), z13.VnS(), z9.VnS()); 2332b8021494Sopenharmony_ci 2333b8021494Sopenharmony_ci __ movprfx(z28, z29); 2334b8021494Sopenharmony_ci __ umlslb(z28.VnD(), z13.VnS(), z1.VnS(), 0); 2335b8021494Sopenharmony_ci 2336b8021494Sopenharmony_ci __ movprfx(z28, z29); 2337b8021494Sopenharmony_ci __ umlslb(z28.VnS(), z13.VnH(), z1.VnH(), 0); 2338b8021494Sopenharmony_ci 2339b8021494Sopenharmony_ci __ movprfx(z9, z10); 2340b8021494Sopenharmony_ci __ umlslt(z9.VnD(), z12.VnS(), z30.VnS()); 2341b8021494Sopenharmony_ci 2342b8021494Sopenharmony_ci __ movprfx(z9, z10); 2343b8021494Sopenharmony_ci __ umlslt(z9.VnD(), z12.VnS(), z0.VnS(), 0); 2344b8021494Sopenharmony_ci 2345b8021494Sopenharmony_ci __ movprfx(z9, z10); 2346b8021494Sopenharmony_ci __ umlslt(z9.VnS(), z12.VnH(), z0.VnH(), 0); 2347b8021494Sopenharmony_ci 2348b8021494Sopenharmony_ci __ movprfx(z24.VnB(), p7.Merging(), z25.VnB()); 2349b8021494Sopenharmony_ci __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()), 2350b8021494Sopenharmony_ci 2351b8021494Sopenharmony_ci __ movprfx(z20.VnB(), p1.Merging(), z21.VnB()); 2352b8021494Sopenharmony_ci __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB()); 2353b8021494Sopenharmony_ci 2354b8021494Sopenharmony_ci __ movprfx(z8.VnB(), p5.Merging(), z9.VnB()); 2355b8021494Sopenharmony_ci __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB()); 2356b8021494Sopenharmony_ci 2357b8021494Sopenharmony_ci __ movprfx(z29.VnB(), p7.Merging(), z30.VnB()); 2358b8021494Sopenharmony_ci __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0); 2359b8021494Sopenharmony_ci 2360b8021494Sopenharmony_ci __ movprfx(z29.VnB(), p7.Merging(), z30.VnB()); 2361b8021494Sopenharmony_ci __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z30.VnB()); 2362b8021494Sopenharmony_ci 2363b8021494Sopenharmony_ci __ movprfx(z12.VnB(), p1.Merging(), z13.VnB()); 2364b8021494Sopenharmony_ci __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z13.VnB()); 2365b8021494Sopenharmony_ci 2366b8021494Sopenharmony_ci __ movprfx(z20.VnB(), p0.Merging(), z21.VnB()); 2367b8021494Sopenharmony_ci __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()); 2368b8021494Sopenharmony_ci 2369b8021494Sopenharmony_ci __ movprfx(z20.VnB(), p0.Merging(), z21.VnB()); 2370b8021494Sopenharmony_ci __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()); 2371b8021494Sopenharmony_ci 2372b8021494Sopenharmony_ci __ movprfx(z25.VnS(), p7.Merging(), z26.VnS()); 2373b8021494Sopenharmony_ci __ urecpe(z25.VnS(), p7.Merging(), z2.VnS()); 2374b8021494Sopenharmony_ci 2375b8021494Sopenharmony_ci __ movprfx(z29.VnB(), p4.Merging(), z30.VnB()); 2376b8021494Sopenharmony_ci __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB()); 2377b8021494Sopenharmony_ci 2378b8021494Sopenharmony_ci __ movprfx(z15.VnB(), p2.Merging(), z16.VnB()); 2379b8021494Sopenharmony_ci __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB()); 2380b8021494Sopenharmony_ci 2381b8021494Sopenharmony_ci __ movprfx(z27.VnB(), p1.Merging(), z28.VnB()); 2382b8021494Sopenharmony_ci __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB()); 2383b8021494Sopenharmony_ci 2384b8021494Sopenharmony_ci __ movprfx(z31.VnB(), p2.Merging(), z0.VnB()); 2385b8021494Sopenharmony_ci __ urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1); 2386b8021494Sopenharmony_ci 2387b8021494Sopenharmony_ci __ movprfx(z4.VnS(), p3.Merging(), z5.VnS()); 2388b8021494Sopenharmony_ci __ ursqrte(z4.VnS(), p3.Merging(), z3.VnS()); 2389b8021494Sopenharmony_ci 2390b8021494Sopenharmony_ci __ movprfx(z0, z1); 2391b8021494Sopenharmony_ci __ ursra(z0.VnB(), z8.VnB(), 1); 2392b8021494Sopenharmony_ci 2393b8021494Sopenharmony_ci __ movprfx(z25.VnB(), p4.Merging(), z26.VnB()); 2394b8021494Sopenharmony_ci __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB()); 2395b8021494Sopenharmony_ci 2396b8021494Sopenharmony_ci __ movprfx(z0, z1); 2397b8021494Sopenharmony_ci __ usra(z0.VnB(), z8.VnB(), 1); 2398b8021494Sopenharmony_ci 2399b8021494Sopenharmony_ci __ movprfx(z16, z17); 2400b8021494Sopenharmony_ci __ xar(z16.VnB(), z16.VnB(), z13.VnB(), 1); 2401b8021494Sopenharmony_ci } 2402b8021494Sopenharmony_ci assm.FinalizeCode(); 2403b8021494Sopenharmony_ci 2404b8021494Sopenharmony_ci CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), true); 2405b8021494Sopenharmony_ci} 2406b8021494Sopenharmony_ci 2407b8021494Sopenharmony_ciTEST(movprfx_negative_instructions_sve2) { 2408b8021494Sopenharmony_ci Assembler assm; 2409b8021494Sopenharmony_ci assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, 2410b8021494Sopenharmony_ci CPUFeatures::kSVE2, 2411b8021494Sopenharmony_ci CPUFeatures::kSVEBitPerm); 2412b8021494Sopenharmony_ci { 2413b8021494Sopenharmony_ci // We have to use the Assembler directly to generate movprfx, so we need 2414b8021494Sopenharmony_ci // to manually reserve space for the code we're about to emit. 2415b8021494Sopenharmony_ci static const size_t kPairCount = 134; 2416b8021494Sopenharmony_ci CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 2417b8021494Sopenharmony_ci 2418b8021494Sopenharmony_ci __ movprfx(z29, z30); 2419b8021494Sopenharmony_ci __ addhnb(z29.VnS(), z19.VnD(), z2.VnD()); 2420b8021494Sopenharmony_ci 2421b8021494Sopenharmony_ci __ movprfx(z8, z9); 2422b8021494Sopenharmony_ci __ addhnt(z8.VnS(), z12.VnD(), z6.VnD()); 2423b8021494Sopenharmony_ci 2424b8021494Sopenharmony_ci __ movprfx(z18, z19); 2425b8021494Sopenharmony_ci __ bdep(z18.VnB(), z10.VnB(), z0.VnB()); 2426b8021494Sopenharmony_ci 2427b8021494Sopenharmony_ci __ movprfx(z6, z7); 2428b8021494Sopenharmony_ci __ bext(z6.VnB(), z2.VnB(), z5.VnB()); 2429b8021494Sopenharmony_ci 2430b8021494Sopenharmony_ci __ movprfx(z24, z25); 2431b8021494Sopenharmony_ci __ bgrp(z24.VnB(), z9.VnB(), z5.VnB()); 2432b8021494Sopenharmony_ci 2433b8021494Sopenharmony_ci __ movprfx(z1, z2); 2434b8021494Sopenharmony_ci __ fcvtlt(z1.VnD(), p1.Merging(), z28.VnS()); 2435b8021494Sopenharmony_ci 2436b8021494Sopenharmony_ci __ movprfx(z1, z2); 2437b8021494Sopenharmony_ci __ fcvtlt(z1.VnS(), p1.Merging(), z28.VnH()); 2438b8021494Sopenharmony_ci 2439b8021494Sopenharmony_ci __ movprfx(z4, z5); 2440b8021494Sopenharmony_ci __ fcvtnt(z4.VnH(), p7.Merging(), z0.VnS()); 2441b8021494Sopenharmony_ci 2442b8021494Sopenharmony_ci __ movprfx(z4, z5); 2443b8021494Sopenharmony_ci __ fcvtnt(z4.VnS(), p7.Merging(), z0.VnD()); 2444b8021494Sopenharmony_ci 2445b8021494Sopenharmony_ci __ movprfx(z27, z28); 2446b8021494Sopenharmony_ci __ fcvtxnt(z27.VnS(), p0.Merging(), z17.VnD()); 2447b8021494Sopenharmony_ci 2448b8021494Sopenharmony_ci __ movprfx(z24, z25); 2449b8021494Sopenharmony_ci __ histcnt(z24.VnS(), p6.Zeroing(), z3.VnS(), z10.VnS()); 2450b8021494Sopenharmony_ci 2451b8021494Sopenharmony_ci __ movprfx(z22, z23); 2452b8021494Sopenharmony_ci __ histseg(z22.VnB(), z14.VnB(), z8.VnB()); 2453b8021494Sopenharmony_ci 2454b8021494Sopenharmony_ci __ movprfx(z21, z22); 2455b8021494Sopenharmony_ci __ ldnt1b(z21.VnS(), p5.Zeroing(), SVEMemOperand(z21.VnS(), x23)); 2456b8021494Sopenharmony_ci 2457b8021494Sopenharmony_ci __ movprfx(z21, z22); 2458b8021494Sopenharmony_ci __ ldnt1b(z21.VnD(), p5.Zeroing(), SVEMemOperand(z1.VnD(), x23)); 2459b8021494Sopenharmony_ci 2460b8021494Sopenharmony_ci __ movprfx(z10, z11); 2461b8021494Sopenharmony_ci __ ldnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(z23.VnD(), x6)); 2462b8021494Sopenharmony_ci 2463b8021494Sopenharmony_ci __ movprfx(z30, z31); 2464b8021494Sopenharmony_ci __ ldnt1h(z30.VnS(), p4.Zeroing(), SVEMemOperand(z6.VnS(), x11)); 2465b8021494Sopenharmony_ci 2466b8021494Sopenharmony_ci __ movprfx(z30, z31); 2467b8021494Sopenharmony_ci __ ldnt1h(z30.VnD(), p4.Zeroing(), SVEMemOperand(z6.VnD(), x11)); 2468b8021494Sopenharmony_ci 2469b8021494Sopenharmony_ci __ movprfx(z7, z8); 2470b8021494Sopenharmony_ci __ ldnt1sb(z7.VnS(), p3.Zeroing(), SVEMemOperand(z18.VnS(), x11)); 2471b8021494Sopenharmony_ci 2472b8021494Sopenharmony_ci __ movprfx(z7, z8); 2473b8021494Sopenharmony_ci __ ldnt1sb(z7.VnD(), p3.Zeroing(), SVEMemOperand(z18.VnD(), x11)); 2474b8021494Sopenharmony_ci 2475b8021494Sopenharmony_ci __ movprfx(z17, z18); 2476b8021494Sopenharmony_ci __ ldnt1sh(z17.VnS(), p5.Zeroing(), SVEMemOperand(z31.VnS(), x19)); 2477b8021494Sopenharmony_ci 2478b8021494Sopenharmony_ci __ movprfx(z17, z18); 2479b8021494Sopenharmony_ci __ ldnt1sh(z17.VnD(), p5.Zeroing(), SVEMemOperand(z31.VnD(), x19)); 2480b8021494Sopenharmony_ci 2481b8021494Sopenharmony_ci __ movprfx(z3, z4); 2482b8021494Sopenharmony_ci __ ldnt1sw(z3.VnD(), p7.Zeroing(), SVEMemOperand(z1.VnD(), x10)); 2483b8021494Sopenharmony_ci 2484b8021494Sopenharmony_ci __ movprfx(z0, z1); 2485b8021494Sopenharmony_ci __ ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(z11.VnS(), x1)); 2486b8021494Sopenharmony_ci 2487b8021494Sopenharmony_ci __ movprfx(z0, z1); 2488b8021494Sopenharmony_ci __ ldnt1w(z0.VnD(), p4.Zeroing(), SVEMemOperand(z11.VnD(), x1)); 2489b8021494Sopenharmony_ci 2490b8021494Sopenharmony_ci __ movprfx(z18, z19); 2491b8021494Sopenharmony_ci __ match(p15.VnB(), p1.Zeroing(), z18.VnB(), z5.VnB()); 2492b8021494Sopenharmony_ci 2493b8021494Sopenharmony_ci __ movprfx(z15, z16); 2494b8021494Sopenharmony_ci __ mul(z15.VnB(), z15.VnB(), z15.VnB()); 2495b8021494Sopenharmony_ci 2496b8021494Sopenharmony_ci __ movprfx(z15, z16); 2497b8021494Sopenharmony_ci __ mul(z15.VnH(), z15.VnH(), z1.VnH(), 0); 2498b8021494Sopenharmony_ci 2499b8021494Sopenharmony_ci __ movprfx(z15, z16); 2500b8021494Sopenharmony_ci __ mul(z15.VnS(), z15.VnS(), z1.VnS(), 0); 2501b8021494Sopenharmony_ci 2502b8021494Sopenharmony_ci __ movprfx(z15, z16); 2503b8021494Sopenharmony_ci __ mul(z15.VnD(), z15.VnD(), z1.VnD(), 0); 2504b8021494Sopenharmony_ci 2505b8021494Sopenharmony_ci __ movprfx(z20, z21); 2506b8021494Sopenharmony_ci __ nmatch(p1.VnB(), p1.Zeroing(), z20.VnB(), z17.VnB()); 2507b8021494Sopenharmony_ci 2508b8021494Sopenharmony_ci __ movprfx(z0, z1); 2509b8021494Sopenharmony_ci __ pmul(z0.VnB(), z5.VnB(), z5.VnB()); 2510b8021494Sopenharmony_ci 2511b8021494Sopenharmony_ci __ movprfx(z12, z13); 2512b8021494Sopenharmony_ci __ pmullb(z12.VnD(), z21.VnS(), z12.VnS()); 2513b8021494Sopenharmony_ci 2514b8021494Sopenharmony_ci __ movprfx(z31, z0); 2515b8021494Sopenharmony_ci __ pmullt(z31.VnD(), z30.VnS(), z26.VnS()); 2516b8021494Sopenharmony_ci 2517b8021494Sopenharmony_ci __ movprfx(z0, z1); 2518b8021494Sopenharmony_ci __ raddhnb(z0.VnS(), z11.VnD(), z10.VnD()); 2519b8021494Sopenharmony_ci 2520b8021494Sopenharmony_ci __ movprfx(z23, z24); 2521b8021494Sopenharmony_ci __ raddhnt(z23.VnS(), z27.VnD(), z9.VnD()); 2522b8021494Sopenharmony_ci 2523b8021494Sopenharmony_ci __ movprfx(z5, z6); 2524b8021494Sopenharmony_ci __ rshrnb(z5.VnB(), z1.VnH(), 1); 2525b8021494Sopenharmony_ci 2526b8021494Sopenharmony_ci __ movprfx(z5, z6); 2527b8021494Sopenharmony_ci __ rshrnt(z5.VnB(), z1.VnH(), 8); 2528b8021494Sopenharmony_ci 2529b8021494Sopenharmony_ci __ movprfx(z30, z31); 2530b8021494Sopenharmony_ci __ rsubhnb(z30.VnS(), z29.VnD(), z11.VnD()); 2531b8021494Sopenharmony_ci 2532b8021494Sopenharmony_ci __ movprfx(z25, z26); 2533b8021494Sopenharmony_ci __ rsubhnt(z25.VnS(), z7.VnD(), z18.VnD()); 2534b8021494Sopenharmony_ci 2535b8021494Sopenharmony_ci __ movprfx(z2, z3); 2536b8021494Sopenharmony_ci __ sabdlb(z2.VnD(), z21.VnS(), z3.VnS()); 2537b8021494Sopenharmony_ci 2538b8021494Sopenharmony_ci __ movprfx(z25, z26); 2539b8021494Sopenharmony_ci __ sabdlt(z25.VnD(), z23.VnS(), z17.VnS()); 2540b8021494Sopenharmony_ci 2541b8021494Sopenharmony_ci __ movprfx(z24, z25); 2542b8021494Sopenharmony_ci __ saddlb(z24.VnD(), z30.VnS(), z16.VnS()); 2543b8021494Sopenharmony_ci 2544b8021494Sopenharmony_ci __ movprfx(z15, z16); 2545b8021494Sopenharmony_ci __ saddlbt(z15.VnD(), z6.VnS(), z18.VnS()); 2546b8021494Sopenharmony_ci 2547b8021494Sopenharmony_ci __ movprfx(z21, z22); 2548b8021494Sopenharmony_ci __ saddlt(z21.VnD(), z29.VnS(), z31.VnS()); 2549b8021494Sopenharmony_ci 2550b8021494Sopenharmony_ci __ movprfx(z12, z13); 2551b8021494Sopenharmony_ci __ saddwb(z12.VnD(), z8.VnD(), z8.VnS()); 2552b8021494Sopenharmony_ci 2553b8021494Sopenharmony_ci __ movprfx(z24, z25); 2554b8021494Sopenharmony_ci __ saddwt(z24.VnD(), z0.VnD(), z3.VnS()); 2555b8021494Sopenharmony_ci 2556b8021494Sopenharmony_ci __ movprfx(z7, z8); 2557b8021494Sopenharmony_ci __ shrnb(z7.VnB(), z4.VnH(), 1); 2558b8021494Sopenharmony_ci 2559b8021494Sopenharmony_ci __ movprfx(z21, z22); 2560b8021494Sopenharmony_ci __ shrnt(z21.VnB(), z29.VnH(), 1); 2561b8021494Sopenharmony_ci 2562b8021494Sopenharmony_ci __ movprfx(z29, z30); 2563b8021494Sopenharmony_ci __ sli(z29.VnB(), z7.VnB(), 0); 2564b8021494Sopenharmony_ci 2565b8021494Sopenharmony_ci __ movprfx(z23, z24); 2566b8021494Sopenharmony_ci __ smulh(z23.VnB(), z23.VnB(), z3.VnB()); 2567b8021494Sopenharmony_ci 2568b8021494Sopenharmony_ci __ movprfx(z10, z11); 2569b8021494Sopenharmony_ci __ smullb(z10.VnD(), z4.VnS(), z4.VnS()); 2570b8021494Sopenharmony_ci 2571b8021494Sopenharmony_ci __ movprfx(z10, z11); 2572b8021494Sopenharmony_ci __ smullb(z10.VnS(), z4.VnH(), z4.VnH(), 0); 2573b8021494Sopenharmony_ci 2574b8021494Sopenharmony_ci __ movprfx(z10, z11); 2575b8021494Sopenharmony_ci __ smullb(z10.VnD(), z4.VnS(), z4.VnS(), 0); 2576b8021494Sopenharmony_ci 2577b8021494Sopenharmony_ci __ movprfx(z31, z0); 2578b8021494Sopenharmony_ci __ smullt(z31.VnD(), z26.VnS(), z5.VnS()); 2579b8021494Sopenharmony_ci 2580b8021494Sopenharmony_ci __ movprfx(z31, z0); 2581b8021494Sopenharmony_ci __ smullt(z31.VnS(), z26.VnH(), z5.VnH(), 0); 2582b8021494Sopenharmony_ci 2583b8021494Sopenharmony_ci __ movprfx(z31, z0); 2584b8021494Sopenharmony_ci __ smullt(z31.VnD(), z26.VnS(), z5.VnS(), 0); 2585b8021494Sopenharmony_ci 2586b8021494Sopenharmony_ci __ movprfx(z4, z5); 2587b8021494Sopenharmony_ci __ splice_con(z4.VnB(), p7.Merging(), z0.VnB(), z1.VnB()); 2588b8021494Sopenharmony_ci 2589b8021494Sopenharmony_ci __ movprfx(z18, z19); 2590b8021494Sopenharmony_ci __ sqdmulh(z18.VnB(), z25.VnB(), z1.VnB()); 2591b8021494Sopenharmony_ci 2592b8021494Sopenharmony_ci __ movprfx(z18, z19); 2593b8021494Sopenharmony_ci __ sqdmulh(z18.VnH(), z25.VnH(), z1.VnH(), 0); 2594b8021494Sopenharmony_ci 2595b8021494Sopenharmony_ci __ movprfx(z18, z19); 2596b8021494Sopenharmony_ci __ sqdmulh(z18.VnS(), z25.VnS(), z1.VnS(), 0); 2597b8021494Sopenharmony_ci 2598b8021494Sopenharmony_ci __ movprfx(z18, z19); 2599b8021494Sopenharmony_ci __ sqdmulh(z18.VnD(), z25.VnD(), z1.VnD(), 0); 2600b8021494Sopenharmony_ci 2601b8021494Sopenharmony_ci __ movprfx(z1, z2); 2602b8021494Sopenharmony_ci __ sqdmullb(z1.VnD(), z31.VnS(), z21.VnS()); 2603b8021494Sopenharmony_ci 2604b8021494Sopenharmony_ci __ movprfx(z1, z2); 2605b8021494Sopenharmony_ci __ sqdmullb(z1.VnS(), z31.VnH(), z1.VnH(), 0); 2606b8021494Sopenharmony_ci 2607b8021494Sopenharmony_ci __ movprfx(z1, z2); 2608b8021494Sopenharmony_ci __ sqdmullb(z1.VnD(), z31.VnS(), z1.VnS(), 0); 2609b8021494Sopenharmony_ci 2610b8021494Sopenharmony_ci __ movprfx(z2, z3); 2611b8021494Sopenharmony_ci __ sqdmullt(z2.VnD(), z1.VnS(), z5.VnS()); 2612b8021494Sopenharmony_ci 2613b8021494Sopenharmony_ci __ movprfx(z2, z3); 2614b8021494Sopenharmony_ci __ sqdmullt(z2.VnS(), z1.VnH(), z5.VnH(), 0); 2615b8021494Sopenharmony_ci 2616b8021494Sopenharmony_ci __ movprfx(z2, z3); 2617b8021494Sopenharmony_ci __ sqdmullt(z2.VnD(), z1.VnS(), z5.VnS(), 0); 2618b8021494Sopenharmony_ci 2619b8021494Sopenharmony_ci __ movprfx(z21, z22); 2620b8021494Sopenharmony_ci __ sqrdmulh(z21.VnB(), z21.VnB(), z27.VnB()); 2621b8021494Sopenharmony_ci 2622b8021494Sopenharmony_ci __ movprfx(z21, z22); 2623b8021494Sopenharmony_ci __ sqrdmulh(z21.VnH(), z21.VnH(), z2.VnH(), 0); 2624b8021494Sopenharmony_ci 2625b8021494Sopenharmony_ci __ movprfx(z21, z22); 2626b8021494Sopenharmony_ci __ sqrdmulh(z21.VnS(), z21.VnS(), z2.VnS(), 0); 2627b8021494Sopenharmony_ci 2628b8021494Sopenharmony_ci __ movprfx(z21, z22); 2629b8021494Sopenharmony_ci __ sqrdmulh(z21.VnD(), z21.VnD(), z2.VnD(), 0); 2630b8021494Sopenharmony_ci 2631b8021494Sopenharmony_ci __ movprfx(z1, z2); 2632b8021494Sopenharmony_ci __ sqrshrnb(z1.VnB(), z1.VnH(), 1); 2633b8021494Sopenharmony_ci 2634b8021494Sopenharmony_ci __ movprfx(z24, z25); 2635b8021494Sopenharmony_ci __ sqrshrnt(z24.VnB(), z19.VnH(), 8); 2636b8021494Sopenharmony_ci 2637b8021494Sopenharmony_ci __ movprfx(z23, z24); 2638b8021494Sopenharmony_ci __ sqrshrunb(z23.VnB(), z28.VnH(), 1); 2639b8021494Sopenharmony_ci 2640b8021494Sopenharmony_ci __ movprfx(z9, z10); 2641b8021494Sopenharmony_ci __ sqrshrunt(z9.VnB(), z15.VnH(), 8); 2642b8021494Sopenharmony_ci 2643b8021494Sopenharmony_ci __ movprfx(z25, z26); 2644b8021494Sopenharmony_ci __ sqshrnb(z25.VnB(), z1.VnH(), 1); 2645b8021494Sopenharmony_ci 2646b8021494Sopenharmony_ci __ movprfx(z0, z1); 2647b8021494Sopenharmony_ci __ sqshrnt(z0.VnB(), z25.VnH(), 8); 2648b8021494Sopenharmony_ci 2649b8021494Sopenharmony_ci __ movprfx(z25, z26); 2650b8021494Sopenharmony_ci __ sqshrunb(z25.VnB(), z10.VnH(), 1); 2651b8021494Sopenharmony_ci 2652b8021494Sopenharmony_ci __ movprfx(z20, z21); 2653b8021494Sopenharmony_ci __ sqshrunt(z20.VnB(), z3.VnH(), 8); 2654b8021494Sopenharmony_ci 2655b8021494Sopenharmony_ci __ movprfx(z2, z3); 2656b8021494Sopenharmony_ci __ sqxtnb(z2.VnB(), z0.VnH()); 2657b8021494Sopenharmony_ci 2658b8021494Sopenharmony_ci __ movprfx(z31, z0); 2659b8021494Sopenharmony_ci __ sqxtnt(z31.VnB(), z18.VnH()); 2660b8021494Sopenharmony_ci 2661b8021494Sopenharmony_ci __ movprfx(z28, z29); 2662b8021494Sopenharmony_ci __ sqxtunb(z28.VnB(), z6.VnH()); 2663b8021494Sopenharmony_ci 2664b8021494Sopenharmony_ci __ movprfx(z14, z15); 2665b8021494Sopenharmony_ci __ sqxtunt(z14.VnB(), z31.VnH()); 2666b8021494Sopenharmony_ci 2667b8021494Sopenharmony_ci __ movprfx(z6, z7); 2668b8021494Sopenharmony_ci __ sri(z6.VnB(), z9.VnB(), 1); 2669b8021494Sopenharmony_ci 2670b8021494Sopenharmony_ci __ movprfx(z2, z3); 2671b8021494Sopenharmony_ci __ sshllb(z2.VnH(), z20.VnB(), 0); 2672b8021494Sopenharmony_ci 2673b8021494Sopenharmony_ci __ movprfx(z27, z28); 2674b8021494Sopenharmony_ci __ sshllt(z27.VnH(), z8.VnB(), 0); 2675b8021494Sopenharmony_ci 2676b8021494Sopenharmony_ci __ movprfx(z4, z5); 2677b8021494Sopenharmony_ci __ ssublb(z4.VnD(), z23.VnS(), z7.VnS()); 2678b8021494Sopenharmony_ci 2679b8021494Sopenharmony_ci __ movprfx(z6, z7); 2680b8021494Sopenharmony_ci __ ssublbt(z6.VnD(), z28.VnS(), z12.VnS()); 2681b8021494Sopenharmony_ci 2682b8021494Sopenharmony_ci __ movprfx(z12, z13); 2683b8021494Sopenharmony_ci __ ssublt(z12.VnD(), z13.VnS(), z6.VnS()); 2684b8021494Sopenharmony_ci 2685b8021494Sopenharmony_ci __ movprfx(z11, z12); 2686b8021494Sopenharmony_ci __ ssubltb(z11.VnD(), z18.VnS(), z19.VnS()); 2687b8021494Sopenharmony_ci 2688b8021494Sopenharmony_ci __ movprfx(z7, z8); 2689b8021494Sopenharmony_ci __ ssubwb(z7.VnD(), z28.VnD(), z11.VnS()); 2690b8021494Sopenharmony_ci 2691b8021494Sopenharmony_ci __ movprfx(z29, z30); 2692b8021494Sopenharmony_ci __ ssubwt(z29.VnD(), z25.VnD(), z20.VnS()); 2693b8021494Sopenharmony_ci 2694b8021494Sopenharmony_ci __ movprfx(z21, z22); 2695b8021494Sopenharmony_ci __ stnt1b(z21.VnS(), p5.Zeroing(), SVEMemOperand(z1.VnS(), x23)); 2696b8021494Sopenharmony_ci 2697b8021494Sopenharmony_ci __ movprfx(z21, z22); 2698b8021494Sopenharmony_ci __ stnt1b(z21.VnD(), p5.Zeroing(), SVEMemOperand(z1.VnD(), x23)); 2699b8021494Sopenharmony_ci 2700b8021494Sopenharmony_ci __ movprfx(z10, z11); 2701b8021494Sopenharmony_ci __ stnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(z1.VnD(), x23)); 2702b8021494Sopenharmony_ci 2703b8021494Sopenharmony_ci __ movprfx(z30, z31); 2704b8021494Sopenharmony_ci __ stnt1h(z30.VnS(), p4.Zeroing(), SVEMemOperand(z6.VnS(), x6)); 2705b8021494Sopenharmony_ci 2706b8021494Sopenharmony_ci __ movprfx(z30, z31); 2707b8021494Sopenharmony_ci __ stnt1h(z30.VnD(), p4.Zeroing(), SVEMemOperand(z6.VnD(), x6)); 2708b8021494Sopenharmony_ci 2709b8021494Sopenharmony_ci __ movprfx(z0, z1); 2710b8021494Sopenharmony_ci __ stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(z11.VnS(), x1)); 2711b8021494Sopenharmony_ci 2712b8021494Sopenharmony_ci __ movprfx(z0, z1); 2713b8021494Sopenharmony_ci __ stnt1w(z0.VnD(), p4.Zeroing(), SVEMemOperand(z11.VnD(), x1)); 2714b8021494Sopenharmony_ci 2715b8021494Sopenharmony_ci __ movprfx(z31, z0); 2716b8021494Sopenharmony_ci __ subhnb(z31.VnS(), z31.VnD(), z7.VnD()); 2717b8021494Sopenharmony_ci 2718b8021494Sopenharmony_ci __ movprfx(z31, z0); 2719b8021494Sopenharmony_ci __ subhnt(z31.VnS(), z22.VnD(), z27.VnD()); 2720b8021494Sopenharmony_ci 2721b8021494Sopenharmony_ci __ movprfx(z24, z25); 2722b8021494Sopenharmony_ci __ tbl(z24.VnB(), z29.VnB(), z30.VnB(), z0.VnB()); 2723b8021494Sopenharmony_ci 2724b8021494Sopenharmony_ci __ movprfx(z22, z23); 2725b8021494Sopenharmony_ci __ tbx(z22.VnB(), z15.VnB(), z19.VnB()); 2726b8021494Sopenharmony_ci 2727b8021494Sopenharmony_ci __ movprfx(z1, z2); 2728b8021494Sopenharmony_ci __ uabdlb(z1.VnD(), z26.VnS(), z12.VnS()); 2729b8021494Sopenharmony_ci 2730b8021494Sopenharmony_ci __ movprfx(z25, z26); 2731b8021494Sopenharmony_ci __ uabdlt(z25.VnD(), z29.VnS(), z14.VnS()); 2732b8021494Sopenharmony_ci 2733b8021494Sopenharmony_ci __ movprfx(z3, z4); 2734b8021494Sopenharmony_ci __ uaddlb(z3.VnD(), z5.VnS(), z2.VnS()); 2735b8021494Sopenharmony_ci 2736b8021494Sopenharmony_ci __ movprfx(z15, z16); 2737b8021494Sopenharmony_ci __ uaddlt(z15.VnD(), z28.VnS(), z20.VnS()); 2738b8021494Sopenharmony_ci 2739b8021494Sopenharmony_ci __ movprfx(z31, z0); 2740b8021494Sopenharmony_ci __ uaddwb(z31.VnD(), z8.VnD(), z25.VnS()); 2741b8021494Sopenharmony_ci 2742b8021494Sopenharmony_ci __ movprfx(z17, z18); 2743b8021494Sopenharmony_ci __ uaddwt(z17.VnD(), z15.VnD(), z2.VnS()); 2744b8021494Sopenharmony_ci 2745b8021494Sopenharmony_ci __ movprfx(z12, z13); 2746b8021494Sopenharmony_ci __ umulh(z12.VnB(), z12.VnB(), z17.VnB()); 2747b8021494Sopenharmony_ci 2748b8021494Sopenharmony_ci __ movprfx(z12, z13); 2749b8021494Sopenharmony_ci __ umullb(z12.VnD(), z5.VnS(), z2.VnS()); 2750b8021494Sopenharmony_ci 2751b8021494Sopenharmony_ci __ movprfx(z12, z13); 2752b8021494Sopenharmony_ci __ umullb(z12.VnS(), z5.VnH(), z2.VnH(), 0); 2753b8021494Sopenharmony_ci 2754b8021494Sopenharmony_ci __ movprfx(z12, z13); 2755b8021494Sopenharmony_ci __ umullb(z12.VnD(), z5.VnS(), z2.VnS(), 0); 2756b8021494Sopenharmony_ci 2757b8021494Sopenharmony_ci __ movprfx(z24, z25); 2758b8021494Sopenharmony_ci __ umullt(z24.VnD(), z6.VnS(), z6.VnS()); 2759b8021494Sopenharmony_ci 2760b8021494Sopenharmony_ci __ movprfx(z24, z25); 2761b8021494Sopenharmony_ci __ umullt(z24.VnS(), z6.VnH(), z1.VnH(), 0); 2762b8021494Sopenharmony_ci 2763b8021494Sopenharmony_ci __ movprfx(z24, z25); 2764b8021494Sopenharmony_ci __ umullt(z24.VnD(), z6.VnS(), z1.VnS(), 0); 2765b8021494Sopenharmony_ci 2766b8021494Sopenharmony_ci __ movprfx(z30, z31); 2767b8021494Sopenharmony_ci __ uqrshrnb(z30.VnB(), z25.VnH(), 1); 2768b8021494Sopenharmony_ci 2769b8021494Sopenharmony_ci __ movprfx(z3, z4); 2770b8021494Sopenharmony_ci __ uqrshrnt(z3.VnB(), z25.VnH(), 8); 2771b8021494Sopenharmony_ci 2772b8021494Sopenharmony_ci __ movprfx(z17, z18); 2773b8021494Sopenharmony_ci __ uqshrnb(z17.VnB(), z4.VnH(), 1); 2774b8021494Sopenharmony_ci 2775b8021494Sopenharmony_ci __ movprfx(z28, z29); 2776b8021494Sopenharmony_ci __ uqshrnt(z28.VnB(), z18.VnH(), 8); 2777b8021494Sopenharmony_ci 2778b8021494Sopenharmony_ci __ movprfx(z28, z29); 2779b8021494Sopenharmony_ci __ uqxtnb(z28.VnB(), z4.VnH()); 2780b8021494Sopenharmony_ci 2781b8021494Sopenharmony_ci __ movprfx(z19, z20); 2782b8021494Sopenharmony_ci __ uqxtnt(z19.VnB(), z7.VnH()); 2783b8021494Sopenharmony_ci 2784b8021494Sopenharmony_ci __ movprfx(z8, z9); 2785b8021494Sopenharmony_ci __ ushllb(z8.VnH(), z31.VnB(), 0); 2786b8021494Sopenharmony_ci 2787b8021494Sopenharmony_ci __ movprfx(z3, z4); 2788b8021494Sopenharmony_ci __ ushllt(z3.VnH(), z21.VnB(), 0); 2789b8021494Sopenharmony_ci 2790b8021494Sopenharmony_ci __ movprfx(z25, z26); 2791b8021494Sopenharmony_ci __ usublb(z25.VnD(), z9.VnS(), z17.VnS()); 2792b8021494Sopenharmony_ci 2793b8021494Sopenharmony_ci __ movprfx(z5, z6); 2794b8021494Sopenharmony_ci __ usublt(z5.VnD(), z11.VnS(), z15.VnS()); 2795b8021494Sopenharmony_ci 2796b8021494Sopenharmony_ci __ movprfx(z10, z11); 2797b8021494Sopenharmony_ci __ usubwb(z10.VnD(), z13.VnD(), z20.VnS()); 2798b8021494Sopenharmony_ci 2799b8021494Sopenharmony_ci __ movprfx(z15, z16); 2800b8021494Sopenharmony_ci __ usubwt(z15.VnD(), z8.VnD(), z23.VnS()); 2801b8021494Sopenharmony_ci 2802b8021494Sopenharmony_ci __ movprfx(z20, z21); 2803b8021494Sopenharmony_ci __ whilege(p0.VnB(), w20, w29); 2804b8021494Sopenharmony_ci 2805b8021494Sopenharmony_ci __ movprfx(z24, z25); 2806b8021494Sopenharmony_ci __ whilegt(p11.VnB(), w24, w3); 2807b8021494Sopenharmony_ci 2808b8021494Sopenharmony_ci __ movprfx(z20, z21); 2809b8021494Sopenharmony_ci __ whilehi(p2.VnB(), x20, x8); 2810b8021494Sopenharmony_ci 2811b8021494Sopenharmony_ci __ movprfx(z22, z23); 2812b8021494Sopenharmony_ci __ whilehs(p4.VnB(), w22, w9); 2813b8021494Sopenharmony_ci 2814b8021494Sopenharmony_ci __ movprfx(z25, z26); 2815b8021494Sopenharmony_ci __ whilerw(p7.VnB(), x25, x27); 2816b8021494Sopenharmony_ci 2817b8021494Sopenharmony_ci __ movprfx(z14, z15); 2818b8021494Sopenharmony_ci __ whilewr(p8.VnB(), x14, x14); 2819b8021494Sopenharmony_ci } 2820b8021494Sopenharmony_ci assm.FinalizeCode(); 2821b8021494Sopenharmony_ci 2822b8021494Sopenharmony_ci CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 2823b8021494Sopenharmony_ci} 2824b8021494Sopenharmony_ci 2825b8021494Sopenharmony_ciTEST(movprfx_negative_predication_sve2) { 2826b8021494Sopenharmony_ci Assembler assm; 2827b8021494Sopenharmony_ci assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2); 2828b8021494Sopenharmony_ci { 2829b8021494Sopenharmony_ci // We have to use the Assembler directly to generate movprfx, so we need 2830b8021494Sopenharmony_ci // to manually reserve space for the code we're about to emit. 2831b8021494Sopenharmony_ci static const size_t kPairCount = 140; 2832b8021494Sopenharmony_ci CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 2833b8021494Sopenharmony_ci 2834b8021494Sopenharmony_ci __ movprfx(z25.VnS(), p0.Zeroing(), z26.VnS()); 2835b8021494Sopenharmony_ci __ adclb(z25.VnS(), z17.VnS(), z24.VnS()); 2836b8021494Sopenharmony_ci 2837b8021494Sopenharmony_ci __ movprfx(z0.VnS(), p0.Zeroing(), z1.VnS()); 2838b8021494Sopenharmony_ci __ adclt(z0.VnS(), z2.VnS(), z15.VnS()); 2839b8021494Sopenharmony_ci 2840b8021494Sopenharmony_ci __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD()); 2841b8021494Sopenharmony_ci __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD()); 2842b8021494Sopenharmony_ci 2843b8021494Sopenharmony_ci __ movprfx(z18.VnD(), p0.Zeroing(), z19.VnD()); 2844b8021494Sopenharmony_ci __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD()); 2845b8021494Sopenharmony_ci 2846b8021494Sopenharmony_ci __ movprfx(z7.VnD(), p0.Zeroing(), z8.VnD()); 2847b8021494Sopenharmony_ci __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD()); 2848b8021494Sopenharmony_ci 2849b8021494Sopenharmony_ci __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD()); 2850b8021494Sopenharmony_ci __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD()); 2851b8021494Sopenharmony_ci 2852b8021494Sopenharmony_ci __ movprfx(z5.VnB(), p0.Zeroing(), z6.VnB()); 2853b8021494Sopenharmony_ci __ cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90); 2854b8021494Sopenharmony_ci 2855b8021494Sopenharmony_ci __ movprfx(z7.VnS(), p0.Zeroing(), z8.VnS()); 2856b8021494Sopenharmony_ci __ cdot(z7.VnS(), z4.VnB(), z10.VnB(), 0); 2857b8021494Sopenharmony_ci 2858b8021494Sopenharmony_ci __ movprfx(z7.VnS(), p0.Zeroing(), z8.VnS()); 2859b8021494Sopenharmony_ci __ cdot(z7.VnS(), z4.VnB(), z0.VnB(), 0, 0); 2860b8021494Sopenharmony_ci 2861b8021494Sopenharmony_ci __ movprfx(z7.VnD(), p0.Zeroing(), z8.VnD()); 2862b8021494Sopenharmony_ci __ cdot(z7.VnD(), z4.VnH(), z0.VnH(), 0, 0); 2863b8021494Sopenharmony_ci 2864b8021494Sopenharmony_ci __ movprfx(z19.VnB(), p0.Zeroing(), z20.VnB()); 2865b8021494Sopenharmony_ci __ cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0); 2866b8021494Sopenharmony_ci 2867b8021494Sopenharmony_ci __ movprfx(z19.VnS(), p0.Zeroing(), z20.VnS()); 2868b8021494Sopenharmony_ci __ cmla(z19.VnS(), z7.VnS(), z2.VnS(), 0, 0); 2869b8021494Sopenharmony_ci 2870b8021494Sopenharmony_ci __ movprfx(z19.VnH(), p0.Zeroing(), z20.VnH()); 2871b8021494Sopenharmony_ci __ cmla(z19.VnH(), z7.VnH(), z2.VnH(), 0, 0); 2872b8021494Sopenharmony_ci 2873b8021494Sopenharmony_ci __ movprfx(z10.VnD(), p0.Zeroing(), z11.VnD()); 2874b8021494Sopenharmony_ci __ eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD()); 2875b8021494Sopenharmony_ci 2876b8021494Sopenharmony_ci __ movprfx(z3.VnB(), p0.Zeroing(), z4.VnB()); 2877b8021494Sopenharmony_ci __ eorbt(z3.VnB(), z10.VnB(), z8.VnB()); 2878b8021494Sopenharmony_ci 2879b8021494Sopenharmony_ci __ movprfx(z20.VnB(), p0.Zeroing(), z22.VnB()); 2880b8021494Sopenharmony_ci __ eortb(z20.VnB(), z21.VnB(), z15.VnB()); 2881b8021494Sopenharmony_ci 2882b8021494Sopenharmony_ci __ movprfx(z14.VnD(), p0.Zeroing(), z15.VnD()); 2883b8021494Sopenharmony_ci __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD()); 2884b8021494Sopenharmony_ci 2885b8021494Sopenharmony_ci __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD()); 2886b8021494Sopenharmony_ci __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD()); 2887b8021494Sopenharmony_ci 2888b8021494Sopenharmony_ci __ movprfx(z22.VnD(), p0.Zeroing(), z23.VnD()); 2889b8021494Sopenharmony_ci __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD()); 2890b8021494Sopenharmony_ci 2891b8021494Sopenharmony_ci __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2892b8021494Sopenharmony_ci __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD()); 2893b8021494Sopenharmony_ci 2894b8021494Sopenharmony_ci __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD()); 2895b8021494Sopenharmony_ci __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD()); 2896b8021494Sopenharmony_ci 2897b8021494Sopenharmony_ci __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); 2898b8021494Sopenharmony_ci __ fmlalb(z16.VnS(), z18.VnH(), z29.VnH()); 2899b8021494Sopenharmony_ci 2900b8021494Sopenharmony_ci __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); 2901b8021494Sopenharmony_ci __ fmlalb(z16.VnS(), z18.VnH(), z2.VnH(), 0); 2902b8021494Sopenharmony_ci 2903b8021494Sopenharmony_ci __ movprfx(z18.VnS(), p0.Zeroing(), z19.VnS()); 2904b8021494Sopenharmony_ci __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH()); 2905b8021494Sopenharmony_ci 2906b8021494Sopenharmony_ci __ movprfx(z18.VnS(), p0.Zeroing(), z19.VnS()); 2907b8021494Sopenharmony_ci __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH(), 0); 2908b8021494Sopenharmony_ci 2909b8021494Sopenharmony_ci __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); 2910b8021494Sopenharmony_ci __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH()); 2911b8021494Sopenharmony_ci 2912b8021494Sopenharmony_ci __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); 2913b8021494Sopenharmony_ci __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH(), 0); 2914b8021494Sopenharmony_ci 2915b8021494Sopenharmony_ci __ movprfx(z3.VnS(), p0.Zeroing(), z4.VnS()); 2916b8021494Sopenharmony_ci __ fmlslt(z3.VnS(), z17.VnH(), z14.VnH()); 2917b8021494Sopenharmony_ci 2918b8021494Sopenharmony_ci __ movprfx(z3.VnS(), p0.Zeroing(), z4.VnS()); 2919b8021494Sopenharmony_ci __ fmlslt(z3.VnS(), z17.VnH(), z1.VnH(), 0); 2920b8021494Sopenharmony_ci 2921b8021494Sopenharmony_ci __ movprfx(z2.VnH(), p0.Zeroing(), z3.VnH()); 2922b8021494Sopenharmony_ci __ mla(z2.VnH(), z0.VnH(), z1.VnH(), 0); 2923b8021494Sopenharmony_ci 2924b8021494Sopenharmony_ci __ movprfx(z2.VnS(), p0.Zeroing(), z3.VnS()); 2925b8021494Sopenharmony_ci __ mla(z2.VnS(), z0.VnS(), z1.VnS(), 0); 2926b8021494Sopenharmony_ci 2927b8021494Sopenharmony_ci __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD()); 2928b8021494Sopenharmony_ci __ mla(z2.VnD(), z0.VnD(), z1.VnD(), 0); 2929b8021494Sopenharmony_ci 2930b8021494Sopenharmony_ci __ movprfx(z2.VnH(), p0.Zeroing(), z3.VnH()); 2931b8021494Sopenharmony_ci __ mls(z2.VnH(), z0.VnH(), z1.VnH(), 0); 2932b8021494Sopenharmony_ci 2933b8021494Sopenharmony_ci __ movprfx(z2.VnS(), p0.Zeroing(), z3.VnS()); 2934b8021494Sopenharmony_ci __ mls(z2.VnS(), z0.VnS(), z1.VnS(), 0); 2935b8021494Sopenharmony_ci 2936b8021494Sopenharmony_ci __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD()); 2937b8021494Sopenharmony_ci __ mls(z2.VnD(), z0.VnD(), z1.VnD(), 0); 2938b8021494Sopenharmony_ci 2939b8021494Sopenharmony_ci __ movprfx(z17.VnD(), p0.Zeroing(), z18.VnD()); 2940b8021494Sopenharmony_ci __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD()); 2941b8021494Sopenharmony_ci 2942b8021494Sopenharmony_ci __ movprfx(z13.VnB(), p0.Zeroing(), z14.VnB()); 2943b8021494Sopenharmony_ci __ saba(z13.VnB(), z2.VnB(), z31.VnB()); 2944b8021494Sopenharmony_ci 2945b8021494Sopenharmony_ci __ movprfx(z13.VnD(), p0.Zeroing(), z14.VnD()); 2946b8021494Sopenharmony_ci __ sabalb(z13.VnD(), z20.VnS(), z26.VnS()); 2947b8021494Sopenharmony_ci 2948b8021494Sopenharmony_ci __ movprfx(z14.VnD(), p0.Zeroing(), z15.VnD()); 2949b8021494Sopenharmony_ci __ sabalt(z14.VnD(), z19.VnS(), z10.VnS()); 2950b8021494Sopenharmony_ci 2951b8021494Sopenharmony_ci __ movprfx(z17.VnS(), p0.Zeroing(), z18.VnS()); 2952b8021494Sopenharmony_ci __ sbclb(z17.VnS(), z10.VnS(), z8.VnS()); 2953b8021494Sopenharmony_ci 2954b8021494Sopenharmony_ci __ movprfx(z20.VnS(), p0.Zeroing(), z21.VnS()); 2955b8021494Sopenharmony_ci __ sbclt(z20.VnS(), z0.VnS(), z13.VnS()); 2956b8021494Sopenharmony_ci 2957b8021494Sopenharmony_ci __ movprfx(z5.VnB(), p0.Zeroing(), z6.VnB()); 2958b8021494Sopenharmony_ci __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB()); 2959b8021494Sopenharmony_ci 2960b8021494Sopenharmony_ci __ movprfx(z27.VnB(), p0.Zeroing(), z28.VnB()); 2961b8021494Sopenharmony_ci __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB()); 2962b8021494Sopenharmony_ci 2963b8021494Sopenharmony_ci __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2964b8021494Sopenharmony_ci __ smlalb(z1.VnD(), z3.VnS(), z23.VnS()); 2965b8021494Sopenharmony_ci 2966b8021494Sopenharmony_ci __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2967b8021494Sopenharmony_ci __ smlalb(z1.VnD(), z3.VnS(), z2.VnS(), 0); 2968b8021494Sopenharmony_ci 2969b8021494Sopenharmony_ci __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS()); 2970b8021494Sopenharmony_ci __ smlalb(z1.VnS(), z3.VnH(), z2.VnH(), 0); 2971b8021494Sopenharmony_ci 2972b8021494Sopenharmony_ci __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2973b8021494Sopenharmony_ci __ smlalt(z1.VnD(), z3.VnS(), z23.VnS()); 2974b8021494Sopenharmony_ci 2975b8021494Sopenharmony_ci __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2976b8021494Sopenharmony_ci __ smlalt(z1.VnD(), z3.VnS(), z2.VnS(), 0); 2977b8021494Sopenharmony_ci 2978b8021494Sopenharmony_ci __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS()); 2979b8021494Sopenharmony_ci __ smlalt(z1.VnS(), z3.VnH(), z2.VnH(), 0); 2980b8021494Sopenharmony_ci 2981b8021494Sopenharmony_ci __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2982b8021494Sopenharmony_ci __ smlslb(z1.VnD(), z3.VnS(), z23.VnS()); 2983b8021494Sopenharmony_ci 2984b8021494Sopenharmony_ci __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2985b8021494Sopenharmony_ci __ smlslb(z1.VnD(), z3.VnS(), z2.VnS(), 0); 2986b8021494Sopenharmony_ci 2987b8021494Sopenharmony_ci __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS()); 2988b8021494Sopenharmony_ci __ smlslb(z1.VnS(), z3.VnH(), z2.VnH(), 0); 2989b8021494Sopenharmony_ci 2990b8021494Sopenharmony_ci __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2991b8021494Sopenharmony_ci __ smlslt(z1.VnD(), z3.VnS(), z23.VnS()); 2992b8021494Sopenharmony_ci 2993b8021494Sopenharmony_ci __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2994b8021494Sopenharmony_ci __ smlslt(z1.VnD(), z3.VnS(), z2.VnS(), 0); 2995b8021494Sopenharmony_ci 2996b8021494Sopenharmony_ci __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS()); 2997b8021494Sopenharmony_ci __ smlslt(z1.VnS(), z3.VnH(), z2.VnH(), 0); 2998b8021494Sopenharmony_ci 2999b8021494Sopenharmony_ci __ movprfx(z20.VnB(), p0.Zeroing(), z21.VnB()); 3000b8021494Sopenharmony_ci __ sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90); 3001b8021494Sopenharmony_ci 3002b8021494Sopenharmony_ci __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD()); 3003b8021494Sopenharmony_ci __ sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS()); 3004b8021494Sopenharmony_ci 3005b8021494Sopenharmony_ci __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD()); 3006b8021494Sopenharmony_ci __ sqdmlalb(z6.VnD(), z19.VnS(), z2.VnS(), 0); 3007b8021494Sopenharmony_ci 3008b8021494Sopenharmony_ci __ movprfx(z6.VnS(), p0.Zeroing(), z7.VnS()); 3009b8021494Sopenharmony_ci __ sqdmlalb(z6.VnS(), z19.VnH(), z2.VnH(), 0); 3010b8021494Sopenharmony_ci 3011b8021494Sopenharmony_ci __ movprfx(z23.VnD(), p0.Zeroing(), z24.VnD()); 3012b8021494Sopenharmony_ci __ sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS()); 3013b8021494Sopenharmony_ci 3014b8021494Sopenharmony_ci __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); 3015b8021494Sopenharmony_ci __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS()); 3016b8021494Sopenharmony_ci 3017b8021494Sopenharmony_ci __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); 3018b8021494Sopenharmony_ci __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS(), 0); 3019b8021494Sopenharmony_ci 3020b8021494Sopenharmony_ci __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS()); 3021b8021494Sopenharmony_ci __ sqdmlalt(z11.VnS(), z0.VnH(), z0.VnH(), 0); 3022b8021494Sopenharmony_ci 3023b8021494Sopenharmony_ci __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD()); 3024b8021494Sopenharmony_ci __ sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS()); 3025b8021494Sopenharmony_ci 3026b8021494Sopenharmony_ci __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD()); 3027b8021494Sopenharmony_ci __ sqdmlslb(z16.VnD(), z26.VnS(), z2.VnS(), 0); 3028b8021494Sopenharmony_ci 3029b8021494Sopenharmony_ci __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); 3030b8021494Sopenharmony_ci __ sqdmlslb(z16.VnS(), z26.VnH(), z2.VnH(), 0); 3031b8021494Sopenharmony_ci 3032b8021494Sopenharmony_ci __ movprfx(z26.VnD(), p0.Zeroing(), z27.VnD()); 3033b8021494Sopenharmony_ci __ sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS()); 3034b8021494Sopenharmony_ci 3035b8021494Sopenharmony_ci __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD()); 3036b8021494Sopenharmony_ci __ sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS()); 3037b8021494Sopenharmony_ci 3038b8021494Sopenharmony_ci __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD()); 3039b8021494Sopenharmony_ci __ sqdmlslt(z21.VnD(), z23.VnS(), z0.VnS(), 0); 3040b8021494Sopenharmony_ci 3041b8021494Sopenharmony_ci __ movprfx(z21.VnS(), p0.Zeroing(), z22.VnS()); 3042b8021494Sopenharmony_ci __ sqdmlslt(z21.VnS(), z23.VnH(), z0.VnH(), 0); 3043b8021494Sopenharmony_ci 3044b8021494Sopenharmony_ci __ movprfx(z31.VnB(), p0.Zeroing(), z0.VnB()); 3045b8021494Sopenharmony_ci __ sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0); 3046b8021494Sopenharmony_ci 3047b8021494Sopenharmony_ci __ movprfx(z31.VnH(), p0.Zeroing(), z0.VnH()); 3048b8021494Sopenharmony_ci __ sqrdcmlah(z31.VnH(), z15.VnH(), z2.VnH(), 0, 0); 3049b8021494Sopenharmony_ci 3050b8021494Sopenharmony_ci __ movprfx(z31.VnS(), p0.Zeroing(), z0.VnS()); 3051b8021494Sopenharmony_ci __ sqrdcmlah(z31.VnS(), z15.VnS(), z2.VnS(), 0, 0); 3052b8021494Sopenharmony_ci 3053b8021494Sopenharmony_ci __ movprfx(z27.VnB(), p0.Zeroing(), z28.VnB()); 3054b8021494Sopenharmony_ci __ sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB()); 3055b8021494Sopenharmony_ci 3056b8021494Sopenharmony_ci __ movprfx(z27.VnH(), p0.Zeroing(), z28.VnH()); 3057b8021494Sopenharmony_ci __ sqrdmlah(z27.VnH(), z28.VnH(), z1.VnH(), 0); 3058b8021494Sopenharmony_ci 3059b8021494Sopenharmony_ci __ movprfx(z27.VnS(), p0.Zeroing(), z28.VnS()); 3060b8021494Sopenharmony_ci __ sqrdmlah(z27.VnS(), z28.VnS(), z1.VnS(), 0); 3061b8021494Sopenharmony_ci 3062b8021494Sopenharmony_ci __ movprfx(z27.VnD(), p0.Zeroing(), z28.VnD()); 3063b8021494Sopenharmony_ci __ sqrdmlah(z27.VnD(), z28.VnD(), z1.VnD(), 0); 3064b8021494Sopenharmony_ci 3065b8021494Sopenharmony_ci __ movprfx(z11.VnB(), p0.Zeroing(), z12.VnB()); 3066b8021494Sopenharmony_ci __ sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB()); 3067b8021494Sopenharmony_ci 3068b8021494Sopenharmony_ci __ movprfx(z11.VnH(), p0.Zeroing(), z12.VnH()); 3069b8021494Sopenharmony_ci __ sqrdmlsh(z11.VnH(), z16.VnH(), z1.VnH(), 0); 3070b8021494Sopenharmony_ci 3071b8021494Sopenharmony_ci __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS()); 3072b8021494Sopenharmony_ci __ sqrdmlsh(z11.VnS(), z16.VnS(), z1.VnS(), 0); 3073b8021494Sopenharmony_ci 3074b8021494Sopenharmony_ci __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); 3075b8021494Sopenharmony_ci __ sqrdmlsh(z11.VnD(), z16.VnD(), z1.VnD(), 0); 3076b8021494Sopenharmony_ci 3077b8021494Sopenharmony_ci __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB()); 3078b8021494Sopenharmony_ci __ srsra(z0.VnB(), z8.VnB(), 1); 3079b8021494Sopenharmony_ci 3080b8021494Sopenharmony_ci __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB()); 3081b8021494Sopenharmony_ci __ ssra(z0.VnB(), z8.VnB(), 1); 3082b8021494Sopenharmony_ci 3083b8021494Sopenharmony_ci __ movprfx(z23.VnB(), p0.Zeroing(), z24.VnB()); 3084b8021494Sopenharmony_ci __ uaba(z23.VnB(), z22.VnB(), z20.VnB()); 3085b8021494Sopenharmony_ci 3086b8021494Sopenharmony_ci __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); 3087b8021494Sopenharmony_ci __ uabalb(z11.VnD(), z25.VnS(), z12.VnS()); 3088b8021494Sopenharmony_ci 3089b8021494Sopenharmony_ci __ movprfx(z4.VnD(), p0.Zeroing(), z5.VnD()); 3090b8021494Sopenharmony_ci __ uabalt(z4.VnD(), z2.VnS(), z31.VnS()); 3091b8021494Sopenharmony_ci 3092b8021494Sopenharmony_ci __ movprfx(z7.VnB(), p0.Zeroing(), z8.VnB()); 3093b8021494Sopenharmony_ci __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB()); 3094b8021494Sopenharmony_ci 3095b8021494Sopenharmony_ci __ movprfx(z10.VnB(), p0.Zeroing(), z11.VnB()); 3096b8021494Sopenharmony_ci __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB()); 3097b8021494Sopenharmony_ci 3098b8021494Sopenharmony_ci __ movprfx(z31.VnD(), p0.Zeroing(), z0.VnD()); 3099b8021494Sopenharmony_ci __ umlalb(z31.VnD(), z9.VnS(), z21.VnS()); 3100b8021494Sopenharmony_ci 3101b8021494Sopenharmony_ci __ movprfx(z31.VnD(), p0.Zeroing(), z0.VnD()); 3102b8021494Sopenharmony_ci __ umlalb(z31.VnD(), z9.VnS(), z1.VnS(), 0); 3103b8021494Sopenharmony_ci 3104b8021494Sopenharmony_ci __ movprfx(z31.VnS(), p0.Zeroing(), z0.VnS()); 3105b8021494Sopenharmony_ci __ umlalb(z31.VnS(), z9.VnH(), z1.VnH(), 0); 3106b8021494Sopenharmony_ci 3107b8021494Sopenharmony_ci __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); 3108b8021494Sopenharmony_ci __ umlalt(z11.VnD(), z5.VnS(), z22.VnS()); 3109b8021494Sopenharmony_ci 3110b8021494Sopenharmony_ci __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); 3111b8021494Sopenharmony_ci __ umlalt(z11.VnD(), z5.VnS(), z2.VnS(), 0); 3112b8021494Sopenharmony_ci 3113b8021494Sopenharmony_ci __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS()); 3114b8021494Sopenharmony_ci __ umlalt(z11.VnS(), z5.VnH(), z2.VnH(), 0); 3115b8021494Sopenharmony_ci 3116b8021494Sopenharmony_ci __ movprfx(z28.VnD(), p0.Zeroing(), z29.VnD()); 3117b8021494Sopenharmony_ci __ umlslb(z28.VnD(), z13.VnS(), z9.VnS()); 3118b8021494Sopenharmony_ci 3119b8021494Sopenharmony_ci __ movprfx(z28.VnD(), p0.Zeroing(), z29.VnD()); 3120b8021494Sopenharmony_ci __ umlslb(z28.VnD(), z13.VnS(), z1.VnS(), 0); 3121b8021494Sopenharmony_ci 3122b8021494Sopenharmony_ci __ movprfx(z28.VnS(), p0.Zeroing(), z29.VnS()); 3123b8021494Sopenharmony_ci __ umlslb(z28.VnS(), z13.VnH(), z1.VnH(), 0); 3124b8021494Sopenharmony_ci 3125b8021494Sopenharmony_ci __ movprfx(z9.VnD(), p0.Zeroing(), z10.VnD()); 3126b8021494Sopenharmony_ci __ umlslt(z9.VnD(), z12.VnS(), z30.VnS()); 3127b8021494Sopenharmony_ci 3128b8021494Sopenharmony_ci __ movprfx(z9.VnD(), p0.Zeroing(), z10.VnD()); 3129b8021494Sopenharmony_ci __ umlslt(z9.VnD(), z12.VnS(), z0.VnS(), 0); 3130b8021494Sopenharmony_ci 3131b8021494Sopenharmony_ci __ movprfx(z9.VnS(), p0.Zeroing(), z10.VnS()); 3132b8021494Sopenharmony_ci __ umlslt(z9.VnS(), z12.VnH(), z0.VnH(), 0); 3133b8021494Sopenharmony_ci 3134b8021494Sopenharmony_ci __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB()); 3135b8021494Sopenharmony_ci __ ursra(z0.VnB(), z8.VnB(), 1); 3136b8021494Sopenharmony_ci 3137b8021494Sopenharmony_ci __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB()); 3138b8021494Sopenharmony_ci __ usra(z0.VnB(), z8.VnB(), 1); 3139b8021494Sopenharmony_ci 3140b8021494Sopenharmony_ci __ movprfx(z16.VnB(), p0.Zeroing(), z17.VnB()); 3141b8021494Sopenharmony_ci __ xar(z16.VnB(), z16.VnB(), z13.VnB(), 1); 3142b8021494Sopenharmony_ci } 3143b8021494Sopenharmony_ci assm.FinalizeCode(); 3144b8021494Sopenharmony_ci 3145b8021494Sopenharmony_ci CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 3146b8021494Sopenharmony_ci} 3147b8021494Sopenharmony_ci 3148b8021494Sopenharmony_ciTEST(movprfx_negative_aliasing_sve2) { 3149b8021494Sopenharmony_ci Assembler assm; 3150b8021494Sopenharmony_ci assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2); 3151b8021494Sopenharmony_ci { 3152b8021494Sopenharmony_ci // We have to use the Assembler directly to generate movprfx, so we need 3153b8021494Sopenharmony_ci // to manually reserve space for the code we're about to emit. 3154b8021494Sopenharmony_ci static const size_t kPairCount = 140; 3155b8021494Sopenharmony_ci CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 3156b8021494Sopenharmony_ci 3157b8021494Sopenharmony_ci __ movprfx(z25, z26); 3158b8021494Sopenharmony_ci __ adclb(z25.VnS(), z17.VnS(), z25.VnS()); 3159b8021494Sopenharmony_ci 3160b8021494Sopenharmony_ci __ movprfx(z0, z1); 3161b8021494Sopenharmony_ci __ adclt(z0.VnS(), z2.VnS(), z0.VnS()); 3162b8021494Sopenharmony_ci 3163b8021494Sopenharmony_ci __ movprfx(z3, z4); 3164b8021494Sopenharmony_ci __ addp(z3.VnB(), p1.Merging(), z3.VnB(), z3.VnB()); 3165b8021494Sopenharmony_ci 3166b8021494Sopenharmony_ci __ movprfx(z6, z7); 3167b8021494Sopenharmony_ci __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z6.VnD()); 3168b8021494Sopenharmony_ci 3169b8021494Sopenharmony_ci __ movprfx(z18, z19); 3170b8021494Sopenharmony_ci __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z18.VnD()); 3171b8021494Sopenharmony_ci 3172b8021494Sopenharmony_ci __ movprfx(z7, z8); 3173b8021494Sopenharmony_ci __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z7.VnD()); 3174b8021494Sopenharmony_ci 3175b8021494Sopenharmony_ci __ movprfx(z21, z22); 3176b8021494Sopenharmony_ci __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z21.VnD()); 3177b8021494Sopenharmony_ci 3178b8021494Sopenharmony_ci __ movprfx(z5, z6); 3179b8021494Sopenharmony_ci __ cadd(z5.VnB(), z5.VnB(), z5.VnB(), 90); 3180b8021494Sopenharmony_ci 3181b8021494Sopenharmony_ci __ movprfx(z7, z8); 3182b8021494Sopenharmony_ci __ cdot(z7.VnS(), z4.VnB(), z7.VnB(), 0); 3183b8021494Sopenharmony_ci 3184b8021494Sopenharmony_ci __ movprfx(z7, z8); 3185b8021494Sopenharmony_ci __ cdot(z7.VnS(), z4.VnB(), z7.VnB(), 0, 0); 3186b8021494Sopenharmony_ci 3187b8021494Sopenharmony_ci __ movprfx(z7, z8); 3188b8021494Sopenharmony_ci __ cdot(z7.VnD(), z7.VnH(), z0.VnH(), 0, 0); 3189b8021494Sopenharmony_ci 3190b8021494Sopenharmony_ci __ movprfx(z19, z20); 3191b8021494Sopenharmony_ci __ cmla(z19.VnB(), z19.VnB(), z2.VnB(), 0); 3192b8021494Sopenharmony_ci 3193b8021494Sopenharmony_ci __ movprfx(z19, z20); 3194b8021494Sopenharmony_ci __ cmla(z19.VnS(), z19.VnS(), z2.VnS(), 0, 0); 3195b8021494Sopenharmony_ci 3196b8021494Sopenharmony_ci __ movprfx(z1, z20); 3197b8021494Sopenharmony_ci __ cmla(z1.VnH(), z7.VnH(), z1.VnH(), 0, 0); 3198b8021494Sopenharmony_ci 3199b8021494Sopenharmony_ci __ movprfx(z10, z11); 3200b8021494Sopenharmony_ci __ eor3(z10.VnD(), z10.VnD(), z10.VnD(), z23.VnD()); 3201b8021494Sopenharmony_ci 3202b8021494Sopenharmony_ci __ movprfx(z3, z4); 3203b8021494Sopenharmony_ci __ eorbt(z3.VnB(), z10.VnB(), z3.VnB()); 3204b8021494Sopenharmony_ci 3205b8021494Sopenharmony_ci __ movprfx(z20, z22); 3206b8021494Sopenharmony_ci __ eortb(z20.VnB(), z21.VnB(), z20.VnB()); 3207b8021494Sopenharmony_ci 3208b8021494Sopenharmony_ci __ movprfx(z14, z15); 3209b8021494Sopenharmony_ci __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z14.VnD()); 3210b8021494Sopenharmony_ci 3211b8021494Sopenharmony_ci __ movprfx(z14.VnD(), p4.Merging(), z15.VnD()); 3212b8021494Sopenharmony_ci __ fcvtx(z14.VnS(), p4.Merging(), z14.VnD()); 3213b8021494Sopenharmony_ci 3214b8021494Sopenharmony_ci __ movprfx(z15.VnH(), p0.Merging(), z16.VnH()); 3215b8021494Sopenharmony_ci __ flogb(z15.VnH(), p0.Merging(), z15.VnH()); 3216b8021494Sopenharmony_ci 3217b8021494Sopenharmony_ci __ movprfx(z2, z3); 3218b8021494Sopenharmony_ci __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z2.VnD()); 3219b8021494Sopenharmony_ci 3220b8021494Sopenharmony_ci __ movprfx(z22, z23); 3221b8021494Sopenharmony_ci __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z22.VnD()); 3222b8021494Sopenharmony_ci 3223b8021494Sopenharmony_ci __ movprfx(z1, z2); 3224b8021494Sopenharmony_ci __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z1.VnD()); 3225b8021494Sopenharmony_ci 3226b8021494Sopenharmony_ci __ movprfx(z16, z17); 3227b8021494Sopenharmony_ci __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z16.VnD()); 3228b8021494Sopenharmony_ci 3229b8021494Sopenharmony_ci __ movprfx(z16, z17); 3230b8021494Sopenharmony_ci __ fmlalb(z16.VnS(), z18.VnH(), z16.VnH()); 3231b8021494Sopenharmony_ci 3232b8021494Sopenharmony_ci __ movprfx(z16, z17); 3233b8021494Sopenharmony_ci __ fmlalb(z16.VnS(), z16.VnH(), z2.VnH(), 0); 3234b8021494Sopenharmony_ci 3235b8021494Sopenharmony_ci __ movprfx(z18, z19); 3236b8021494Sopenharmony_ci __ fmlalt(z18.VnS(), z13.VnH(), z18.VnH()); 3237b8021494Sopenharmony_ci 3238b8021494Sopenharmony_ci __ movprfx(z18, z19); 3239b8021494Sopenharmony_ci __ fmlalt(z18.VnS(), z18.VnH(), z5.VnH(), 0); 3240b8021494Sopenharmony_ci 3241b8021494Sopenharmony_ci __ movprfx(z16, z17); 3242b8021494Sopenharmony_ci __ fmlslb(z16.VnS(), z16.VnH(), z1.VnH()); 3243b8021494Sopenharmony_ci 3244b8021494Sopenharmony_ci __ movprfx(z16, z17); 3245b8021494Sopenharmony_ci __ fmlslb(z16.VnS(), z16.VnH(), z1.VnH(), 0); 3246b8021494Sopenharmony_ci 3247b8021494Sopenharmony_ci __ movprfx(z3, z4); 3248b8021494Sopenharmony_ci __ fmlslt(z3.VnS(), z17.VnH(), z3.VnH()); 3249b8021494Sopenharmony_ci 3250b8021494Sopenharmony_ci __ movprfx(z3, z4); 3251b8021494Sopenharmony_ci __ fmlslt(z3.VnS(), z17.VnH(), z3.VnH(), 0); 3252b8021494Sopenharmony_ci 3253b8021494Sopenharmony_ci __ movprfx(z2, z3); 3254b8021494Sopenharmony_ci __ mla(z2.VnH(), z0.VnH(), z2.VnH(), 0); 3255b8021494Sopenharmony_ci 3256b8021494Sopenharmony_ci __ movprfx(z2, z3); 3257b8021494Sopenharmony_ci __ mla(z2.VnS(), z0.VnS(), z2.VnS(), 0); 3258b8021494Sopenharmony_ci 3259b8021494Sopenharmony_ci __ movprfx(z2, z3); 3260b8021494Sopenharmony_ci __ mla(z2.VnD(), z0.VnD(), z2.VnD(), 0); 3261b8021494Sopenharmony_ci 3262b8021494Sopenharmony_ci __ movprfx(z2, z3); 3263b8021494Sopenharmony_ci __ mls(z2.VnH(), z0.VnH(), z2.VnH(), 0); 3264b8021494Sopenharmony_ci 3265b8021494Sopenharmony_ci __ movprfx(z2, z3); 3266b8021494Sopenharmony_ci __ mls(z2.VnS(), z0.VnS(), z2.VnS(), 0); 3267b8021494Sopenharmony_ci 3268b8021494Sopenharmony_ci __ movprfx(z2, z3); 3269b8021494Sopenharmony_ci __ mls(z2.VnD(), z0.VnD(), z2.VnD(), 0); 3270b8021494Sopenharmony_ci 3271b8021494Sopenharmony_ci __ movprfx(z17, z18); 3272b8021494Sopenharmony_ci __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z17.VnD()); 3273b8021494Sopenharmony_ci 3274b8021494Sopenharmony_ci __ movprfx(z13, z14); 3275b8021494Sopenharmony_ci __ saba(z13.VnB(), z2.VnB(), z13.VnB()); 3276b8021494Sopenharmony_ci 3277b8021494Sopenharmony_ci __ movprfx(z13, z14); 3278b8021494Sopenharmony_ci __ sabalb(z13.VnD(), z13.VnS(), z26.VnS()); 3279b8021494Sopenharmony_ci 3280b8021494Sopenharmony_ci __ movprfx(z14, z15); 3281b8021494Sopenharmony_ci __ sabalt(z14.VnD(), z14.VnS(), z10.VnS()); 3282b8021494Sopenharmony_ci 3283b8021494Sopenharmony_ci __ movprfx(z19.VnD(), p5.Merging(), z20.VnD()); 3284b8021494Sopenharmony_ci __ sadalp(z19.VnD(), p5.Merging(), z19.VnS()); 3285b8021494Sopenharmony_ci 3286b8021494Sopenharmony_ci __ movprfx(z17, z18); 3287b8021494Sopenharmony_ci __ sbclb(z17.VnS(), z17.VnS(), z8.VnS()); 3288b8021494Sopenharmony_ci 3289b8021494Sopenharmony_ci __ movprfx(z20, z21); 3290b8021494Sopenharmony_ci __ sbclt(z20.VnS(), z20.VnS(), z13.VnS()); 3291b8021494Sopenharmony_ci 3292b8021494Sopenharmony_ci __ movprfx(z20.VnB(), p3.Merging(), z21.VnB()); 3293b8021494Sopenharmony_ci __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z20.VnB()); 3294b8021494Sopenharmony_ci 3295b8021494Sopenharmony_ci __ movprfx(z21.VnB(), p0.Merging(), z22.VnB()); 3296b8021494Sopenharmony_ci __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z21.VnB()); 3297b8021494Sopenharmony_ci 3298b8021494Sopenharmony_ci __ movprfx(z1.VnB(), p0.Merging(), z2.VnB()); 3299b8021494Sopenharmony_ci __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z1.VnB()); 3300b8021494Sopenharmony_ci 3301b8021494Sopenharmony_ci __ movprfx(z5, z6); 3302b8021494Sopenharmony_ci __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z5.VnB()); 3303b8021494Sopenharmony_ci 3304b8021494Sopenharmony_ci __ movprfx(z27, z28); 3305b8021494Sopenharmony_ci __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z27.VnB()); 3306b8021494Sopenharmony_ci 3307b8021494Sopenharmony_ci __ movprfx(z1, z2); 3308b8021494Sopenharmony_ci __ smlalb(z1.VnD(), z3.VnS(), z1.VnS()); 3309b8021494Sopenharmony_ci 3310b8021494Sopenharmony_ci __ movprfx(z1, z2); 3311b8021494Sopenharmony_ci __ smlalb(z1.VnD(), z3.VnS(), z1.VnS(), 0); 3312b8021494Sopenharmony_ci 3313b8021494Sopenharmony_ci __ movprfx(z1, z2); 3314b8021494Sopenharmony_ci __ smlalb(z1.VnS(), z1.VnH(), z2.VnH(), 0); 3315b8021494Sopenharmony_ci 3316b8021494Sopenharmony_ci __ movprfx(z1, z2); 3317b8021494Sopenharmony_ci __ smlalt(z1.VnD(), z1.VnS(), z23.VnS()); 3318b8021494Sopenharmony_ci 3319b8021494Sopenharmony_ci __ movprfx(z1, z2); 3320b8021494Sopenharmony_ci __ smlalt(z1.VnD(), z3.VnS(), z1.VnS(), 0); 3321b8021494Sopenharmony_ci 3322b8021494Sopenharmony_ci __ movprfx(z1, z2); 3323b8021494Sopenharmony_ci __ smlalt(z1.VnS(), z1.VnH(), z2.VnH(), 0); 3324b8021494Sopenharmony_ci 3325b8021494Sopenharmony_ci __ movprfx(z1, z2); 3326b8021494Sopenharmony_ci __ smlslb(z1.VnD(), z1.VnS(), z23.VnS()); 3327b8021494Sopenharmony_ci 3328b8021494Sopenharmony_ci __ movprfx(z1, z2); 3329b8021494Sopenharmony_ci __ smlslb(z1.VnD(), z3.VnS(), z1.VnS(), 0); 3330b8021494Sopenharmony_ci 3331b8021494Sopenharmony_ci __ movprfx(z1, z2); 3332b8021494Sopenharmony_ci __ smlslb(z1.VnS(), z3.VnH(), z1.VnH(), 0); 3333b8021494Sopenharmony_ci 3334b8021494Sopenharmony_ci __ movprfx(z1, z2); 3335b8021494Sopenharmony_ci __ smlslt(z1.VnD(), z1.VnS(), z23.VnS()); 3336b8021494Sopenharmony_ci 3337b8021494Sopenharmony_ci __ movprfx(z1, z2); 3338b8021494Sopenharmony_ci __ smlslt(z1.VnD(), z3.VnS(), z1.VnS(), 0); 3339b8021494Sopenharmony_ci 3340b8021494Sopenharmony_ci __ movprfx(z1, z2); 3341b8021494Sopenharmony_ci __ smlslt(z1.VnS(), z1.VnH(), z2.VnH(), 0); 3342b8021494Sopenharmony_ci 3343b8021494Sopenharmony_ci __ movprfx(z29.VnB(), p1.Merging(), z30.VnB()); 3344b8021494Sopenharmony_ci __ sqabs(z29.VnB(), p1.Merging(), z29.VnB()); 3345b8021494Sopenharmony_ci 3346b8021494Sopenharmony_ci __ movprfx(z28.VnB(), p0.Merging(), z29.VnB()); 3347b8021494Sopenharmony_ci __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()); 3348b8021494Sopenharmony_ci 3349b8021494Sopenharmony_ci __ movprfx(z20, z21); 3350b8021494Sopenharmony_ci __ sqcadd(z20.VnB(), z20.VnB(), z20.VnB(), 90); 3351b8021494Sopenharmony_ci 3352b8021494Sopenharmony_ci __ movprfx(z6, z7); 3353b8021494Sopenharmony_ci __ sqdmlalb(z6.VnD(), z6.VnS(), z25.VnS()); 3354b8021494Sopenharmony_ci 3355b8021494Sopenharmony_ci __ movprfx(z6, z7); 3356b8021494Sopenharmony_ci __ sqdmlalb(z6.VnD(), z6.VnS(), z2.VnS(), 0); 3357b8021494Sopenharmony_ci 3358b8021494Sopenharmony_ci __ movprfx(z6, z7); 3359b8021494Sopenharmony_ci __ sqdmlalb(z6.VnS(), z6.VnH(), z2.VnH(), 0); 3360b8021494Sopenharmony_ci 3361b8021494Sopenharmony_ci __ movprfx(z23, z24); 3362b8021494Sopenharmony_ci __ sqdmlalbt(z23.VnD(), z23.VnS(), z26.VnS()); 3363b8021494Sopenharmony_ci 3364b8021494Sopenharmony_ci __ movprfx(z11, z12); 3365b8021494Sopenharmony_ci __ sqdmlalt(z11.VnD(), z11.VnS(), z0.VnS()); 3366b8021494Sopenharmony_ci 3367b8021494Sopenharmony_ci __ movprfx(z11, z12); 3368b8021494Sopenharmony_ci __ sqdmlalt(z11.VnD(), z11.VnS(), z0.VnS(), 0); 3369b8021494Sopenharmony_ci 3370b8021494Sopenharmony_ci __ movprfx(z1, z12); 3371b8021494Sopenharmony_ci __ sqdmlalt(z1.VnS(), z0.VnH(), z1.VnH(), 0); 3372b8021494Sopenharmony_ci 3373b8021494Sopenharmony_ci __ movprfx(z16, z17); 3374b8021494Sopenharmony_ci __ sqdmlslb(z16.VnD(), z26.VnS(), z16.VnS()); 3375b8021494Sopenharmony_ci 3376b8021494Sopenharmony_ci __ movprfx(z16, z17); 3377b8021494Sopenharmony_ci __ sqdmlslb(z16.VnD(), z16.VnS(), z2.VnS(), 0); 3378b8021494Sopenharmony_ci 3379b8021494Sopenharmony_ci __ movprfx(z16, z17); 3380b8021494Sopenharmony_ci __ sqdmlslb(z16.VnS(), z16.VnH(), z2.VnH(), 0); 3381b8021494Sopenharmony_ci 3382b8021494Sopenharmony_ci __ movprfx(z26, z27); 3383b8021494Sopenharmony_ci __ sqdmlslbt(z26.VnD(), z26.VnS(), z4.VnS()); 3384b8021494Sopenharmony_ci 3385b8021494Sopenharmony_ci __ movprfx(z21, z22); 3386b8021494Sopenharmony_ci __ sqdmlslt(z21.VnD(), z23.VnS(), z21.VnS()); 3387b8021494Sopenharmony_ci 3388b8021494Sopenharmony_ci __ movprfx(z21, z22); 3389b8021494Sopenharmony_ci __ sqdmlslt(z21.VnD(), z21.VnS(), z0.VnS(), 0); 3390b8021494Sopenharmony_ci 3391b8021494Sopenharmony_ci __ movprfx(z1, z22); 3392b8021494Sopenharmony_ci __ sqdmlslt(z21.VnS(), z23.VnH(), z1.VnH(), 0); 3393b8021494Sopenharmony_ci 3394b8021494Sopenharmony_ci __ movprfx(z21.VnB(), p0.Merging(), z22.VnB()); 3395b8021494Sopenharmony_ci __ sqneg(z21.VnB(), p0.Merging(), z21.VnB()); 3396b8021494Sopenharmony_ci 3397b8021494Sopenharmony_ci __ movprfx(z31, z0); 3398b8021494Sopenharmony_ci __ sqrdcmlah(z31.VnB(), z15.VnB(), z31.VnB(), 0); 3399b8021494Sopenharmony_ci 3400b8021494Sopenharmony_ci __ movprfx(z31, z0); 3401b8021494Sopenharmony_ci __ sqrdcmlah(z31.VnH(), z31.VnH(), z2.VnH(), 0, 0); 3402b8021494Sopenharmony_ci 3403b8021494Sopenharmony_ci __ movprfx(z31, z0); 3404b8021494Sopenharmony_ci __ sqrdcmlah(z31.VnS(), z31.VnS(), z2.VnS(), 0, 0); 3405b8021494Sopenharmony_ci 3406b8021494Sopenharmony_ci __ movprfx(z27, z28); 3407b8021494Sopenharmony_ci __ sqrdmlah(z27.VnB(), z27.VnB(), z19.VnB()); 3408b8021494Sopenharmony_ci 3409b8021494Sopenharmony_ci __ movprfx(z27, z28); 3410b8021494Sopenharmony_ci __ sqrdmlah(z27.VnH(), z27.VnH(), z1.VnH(), 0); 3411b8021494Sopenharmony_ci 3412b8021494Sopenharmony_ci __ movprfx(z27, z28); 3413b8021494Sopenharmony_ci __ sqrdmlah(z27.VnS(), z27.VnS(), z1.VnS(), 0); 3414b8021494Sopenharmony_ci 3415b8021494Sopenharmony_ci __ movprfx(z27, z28); 3416b8021494Sopenharmony_ci __ sqrdmlah(z27.VnD(), z27.VnD(), z1.VnD(), 0); 3417b8021494Sopenharmony_ci 3418b8021494Sopenharmony_ci __ movprfx(z11, z12); 3419b8021494Sopenharmony_ci __ sqrdmlsh(z11.VnB(), z16.VnB(), z11.VnB()); 3420b8021494Sopenharmony_ci 3421b8021494Sopenharmony_ci __ movprfx(z11, z12); 3422b8021494Sopenharmony_ci __ sqrdmlsh(z11.VnH(), z11.VnH(), z1.VnH(), 0); 3423b8021494Sopenharmony_ci 3424b8021494Sopenharmony_ci __ movprfx(z11, z12); 3425b8021494Sopenharmony_ci __ sqrdmlsh(z11.VnS(), z11.VnS(), z1.VnS(), 0); 3426b8021494Sopenharmony_ci 3427b8021494Sopenharmony_ci __ movprfx(z11, z12); 3428b8021494Sopenharmony_ci __ sqrdmlsh(z11.VnD(), z11.VnD(), z1.VnD(), 0); 3429b8021494Sopenharmony_ci 3430b8021494Sopenharmony_ci __ movprfx(z31.VnB(), p5.Merging(), z0.VnB()); 3431b8021494Sopenharmony_ci __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z31.VnB()); 3432b8021494Sopenharmony_ci 3433b8021494Sopenharmony_ci __ movprfx(z25.VnB(), p6.Merging(), z26.VnB()); 3434b8021494Sopenharmony_ci __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z25.VnB()); 3435b8021494Sopenharmony_ci 3436b8021494Sopenharmony_ci __ movprfx(z0.VnB(), p5.Merging(), z1.VnB()); 3437b8021494Sopenharmony_ci __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z0.VnB()); 3438b8021494Sopenharmony_ci 3439b8021494Sopenharmony_ci __ movprfx(z7.VnB(), p3.Merging(), z8.VnB()); 3440b8021494Sopenharmony_ci __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z7.VnB()); 3441b8021494Sopenharmony_ci 3442b8021494Sopenharmony_ci __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); 3443b8021494Sopenharmony_ci __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB()); 3444b8021494Sopenharmony_ci 3445b8021494Sopenharmony_ci __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); 3446b8021494Sopenharmony_ci __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB()); 3447b8021494Sopenharmony_ci 3448b8021494Sopenharmony_ci __ movprfx(z23.VnB(), p4.Merging(), z24.VnB()); 3449b8021494Sopenharmony_ci __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z23.VnB()); 3450b8021494Sopenharmony_ci 3451b8021494Sopenharmony_ci __ movprfx(z31.VnB(), p7.Merging(), z0.VnB()); 3452b8021494Sopenharmony_ci __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z31.VnB()); 3453b8021494Sopenharmony_ci 3454b8021494Sopenharmony_ci __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); 3455b8021494Sopenharmony_ci __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB()); 3456b8021494Sopenharmony_ci 3457b8021494Sopenharmony_ci __ movprfx(z0, z1); 3458b8021494Sopenharmony_ci __ srsra(z0.VnB(), z0.VnB(), 1); 3459b8021494Sopenharmony_ci 3460b8021494Sopenharmony_ci __ movprfx(z0, z1); 3461b8021494Sopenharmony_ci __ ssra(z0.VnB(), z0.VnB(), 1); 3462b8021494Sopenharmony_ci 3463b8021494Sopenharmony_ci __ movprfx(z26.VnB(), p2.Merging(), z27.VnB()); 3464b8021494Sopenharmony_ci __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z26.VnB()); 3465b8021494Sopenharmony_ci 3466b8021494Sopenharmony_ci __ movprfx(z23, z24); 3467b8021494Sopenharmony_ci __ uaba(z23.VnB(), z22.VnB(), z23.VnB()); 3468b8021494Sopenharmony_ci 3469b8021494Sopenharmony_ci __ movprfx(z11, z12); 3470b8021494Sopenharmony_ci __ uabalb(z11.VnD(), z25.VnS(), z11.VnS()); 3471b8021494Sopenharmony_ci 3472b8021494Sopenharmony_ci __ movprfx(z4, z5); 3473b8021494Sopenharmony_ci __ uabalt(z4.VnD(), z4.VnS(), z31.VnS()); 3474b8021494Sopenharmony_ci 3475b8021494Sopenharmony_ci __ movprfx(z20.VnD(), p4.Merging(), z21.VnD()); 3476b8021494Sopenharmony_ci __ uadalp(z20.VnD(), p4.Merging(), z20.VnS()); 3477b8021494Sopenharmony_ci 3478b8021494Sopenharmony_ci __ movprfx(z21.VnB(), p2.Merging(), z22.VnB()); 3479b8021494Sopenharmony_ci __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z21.VnB()); 3480b8021494Sopenharmony_ci 3481b8021494Sopenharmony_ci __ movprfx(z1.VnB(), p4.Merging(), z2.VnB()); 3482b8021494Sopenharmony_ci __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z1.VnB()); 3483b8021494Sopenharmony_ci 3484b8021494Sopenharmony_ci __ movprfx(z18.VnB(), p0.Merging(), z19.VnB()); 3485b8021494Sopenharmony_ci __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z18.VnB()); 3486b8021494Sopenharmony_ci 3487b8021494Sopenharmony_ci __ movprfx(z7, z8); 3488b8021494Sopenharmony_ci __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z7.VnB()); 3489b8021494Sopenharmony_ci 3490b8021494Sopenharmony_ci __ movprfx(z10, z11); 3491b8021494Sopenharmony_ci __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z10.VnB()); 3492b8021494Sopenharmony_ci 3493b8021494Sopenharmony_ci __ movprfx(z31, z0); 3494b8021494Sopenharmony_ci __ umlalb(z31.VnD(), z9.VnS(), z31.VnS()); 3495b8021494Sopenharmony_ci 3496b8021494Sopenharmony_ci __ movprfx(z31, z0); 3497b8021494Sopenharmony_ci __ umlalb(z31.VnD(), z31.VnS(), z1.VnS(), 0); 3498b8021494Sopenharmony_ci 3499b8021494Sopenharmony_ci __ movprfx(z31, z0); 3500b8021494Sopenharmony_ci __ umlalb(z31.VnS(), z31.VnH(), z1.VnH(), 0); 3501b8021494Sopenharmony_ci 3502b8021494Sopenharmony_ci __ movprfx(z11, z12); 3503b8021494Sopenharmony_ci __ umlalt(z11.VnD(), z11.VnS(), z22.VnS()); 3504b8021494Sopenharmony_ci 3505b8021494Sopenharmony_ci __ movprfx(z11, z12); 3506b8021494Sopenharmony_ci __ umlalt(z11.VnD(), z11.VnS(), z2.VnS(), 0); 3507b8021494Sopenharmony_ci 3508b8021494Sopenharmony_ci __ movprfx(z1, z12); 3509b8021494Sopenharmony_ci __ umlalt(z1.VnS(), z5.VnH(), z1.VnH(), 0); 3510b8021494Sopenharmony_ci 3511b8021494Sopenharmony_ci __ movprfx(z28, z29); 3512b8021494Sopenharmony_ci __ umlslb(z28.VnD(), z28.VnS(), z9.VnS()); 3513b8021494Sopenharmony_ci 3514b8021494Sopenharmony_ci __ movprfx(z28, z29); 3515b8021494Sopenharmony_ci __ umlslb(z28.VnD(), z28.VnS(), z1.VnS(), 0); 3516b8021494Sopenharmony_ci 3517b8021494Sopenharmony_ci __ movprfx(z28, z29); 3518b8021494Sopenharmony_ci __ umlslb(z28.VnS(), z28.VnH(), z1.VnH(), 0); 3519b8021494Sopenharmony_ci 3520b8021494Sopenharmony_ci __ movprfx(z9, z10); 3521b8021494Sopenharmony_ci __ umlslt(z9.VnD(), z9.VnS(), z30.VnS()); 3522b8021494Sopenharmony_ci 3523b8021494Sopenharmony_ci __ movprfx(z9, z10); 3524b8021494Sopenharmony_ci __ umlslt(z9.VnD(), z9.VnS(), z0.VnS(), 0); 3525b8021494Sopenharmony_ci 3526b8021494Sopenharmony_ci __ movprfx(z9, z10); 3527b8021494Sopenharmony_ci __ umlslt(z9.VnS(), z9.VnH(), z0.VnH(), 0); 3528b8021494Sopenharmony_ci 3529b8021494Sopenharmony_ci __ movprfx(z24.VnB(), p7.Merging(), z25.VnB()); 3530b8021494Sopenharmony_ci __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z24.VnB()), 3531b8021494Sopenharmony_ci 3532b8021494Sopenharmony_ci __ movprfx(z20.VnB(), p1.Merging(), z21.VnB()); 3533b8021494Sopenharmony_ci __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z20.VnB()); 3534b8021494Sopenharmony_ci 3535b8021494Sopenharmony_ci __ movprfx(z8.VnB(), p5.Merging(), z9.VnB()); 3536b8021494Sopenharmony_ci __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z8.VnB()); 3537b8021494Sopenharmony_ci 3538b8021494Sopenharmony_ci __ movprfx(z29.VnB(), p7.Merging(), z30.VnB()); 3539b8021494Sopenharmony_ci __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z29.VnB()); 3540b8021494Sopenharmony_ci 3541b8021494Sopenharmony_ci __ movprfx(z12.VnB(), p1.Merging(), z13.VnB()); 3542b8021494Sopenharmony_ci __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z12.VnB()); 3543b8021494Sopenharmony_ci 3544b8021494Sopenharmony_ci __ movprfx(z20.VnB(), p0.Merging(), z21.VnB()); 3545b8021494Sopenharmony_ci __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z20.VnB()); 3546b8021494Sopenharmony_ci 3547b8021494Sopenharmony_ci __ movprfx(z20.VnB(), p0.Merging(), z21.VnB()); 3548b8021494Sopenharmony_ci __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z20.VnB()); 3549b8021494Sopenharmony_ci 3550b8021494Sopenharmony_ci __ movprfx(z25.VnS(), p7.Merging(), z26.VnS()); 3551b8021494Sopenharmony_ci __ urecpe(z25.VnS(), p7.Merging(), z25.VnS()); 3552b8021494Sopenharmony_ci 3553b8021494Sopenharmony_ci __ movprfx(z29.VnB(), p4.Merging(), z30.VnB()); 3554b8021494Sopenharmony_ci __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z29.VnB()); 3555b8021494Sopenharmony_ci 3556b8021494Sopenharmony_ci __ movprfx(z15.VnB(), p2.Merging(), z16.VnB()); 3557b8021494Sopenharmony_ci __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z15.VnB()); 3558b8021494Sopenharmony_ci 3559b8021494Sopenharmony_ci __ movprfx(z27.VnB(), p1.Merging(), z28.VnB()); 3560b8021494Sopenharmony_ci __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z27.VnB()); 3561b8021494Sopenharmony_ci 3562b8021494Sopenharmony_ci __ movprfx(z4.VnS(), p3.Merging(), z5.VnS()); 3563b8021494Sopenharmony_ci __ ursqrte(z4.VnS(), p3.Merging(), z4.VnS()); 3564b8021494Sopenharmony_ci 3565b8021494Sopenharmony_ci __ movprfx(z0, z1); 3566b8021494Sopenharmony_ci __ ursra(z0.VnB(), z0.VnB(), 1); 3567b8021494Sopenharmony_ci 3568b8021494Sopenharmony_ci __ movprfx(z25.VnB(), p4.Merging(), z26.VnB()); 3569b8021494Sopenharmony_ci __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z25.VnB()); 3570b8021494Sopenharmony_ci 3571b8021494Sopenharmony_ci __ movprfx(z0, z1); 3572b8021494Sopenharmony_ci __ usra(z0.VnB(), z0.VnB(), 1); 3573b8021494Sopenharmony_ci 3574b8021494Sopenharmony_ci __ movprfx(z16, z17); 3575b8021494Sopenharmony_ci __ xar(z16.VnB(), z16.VnB(), z16.VnB(), 1); 3576b8021494Sopenharmony_ci } 3577b8021494Sopenharmony_ci assm.FinalizeCode(); 3578b8021494Sopenharmony_ci 3579b8021494Sopenharmony_ci CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 3580b8021494Sopenharmony_ci} 3581b8021494Sopenharmony_ci 3582b8021494Sopenharmony_ciTEST(movprfx_negative_lane_size_sve2) { 3583b8021494Sopenharmony_ci Assembler assm; 3584b8021494Sopenharmony_ci assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2); 3585b8021494Sopenharmony_ci { 3586b8021494Sopenharmony_ci // We have to use the Assembler directly to generate movprfx, so we need 3587b8021494Sopenharmony_ci // to manually reserve space for the code we're about to emit. 3588b8021494Sopenharmony_ci static const size_t kPairCount = 140; 3589b8021494Sopenharmony_ci CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 3590b8021494Sopenharmony_ci 3591b8021494Sopenharmony_ci __ movprfx(z14.VnS(), p4.Merging(), z15.VnS()); 3592b8021494Sopenharmony_ci __ fcvtx(z14.VnS(), p4.Merging(), z0.VnD()); 3593b8021494Sopenharmony_ci 3594b8021494Sopenharmony_ci __ movprfx(z15.VnS(), p0.Merging(), z16.VnS()); 3595b8021494Sopenharmony_ci __ flogb(z15.VnH(), p0.Merging(), z3.VnH()); 3596b8021494Sopenharmony_ci 3597b8021494Sopenharmony_ci __ movprfx(z19.VnB(), p5.Merging(), z20.VnB()); 3598b8021494Sopenharmony_ci __ sadalp(z19.VnD(), p5.Merging(), z9.VnS()); 3599b8021494Sopenharmony_ci 3600b8021494Sopenharmony_ci __ movprfx(z20.VnH(), p3.Merging(), z21.VnH()); 3601b8021494Sopenharmony_ci __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB()); 3602b8021494Sopenharmony_ci 3603b8021494Sopenharmony_ci __ movprfx(z21.VnH(), p0.Merging(), z22.VnH()); 3604b8021494Sopenharmony_ci __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB()); 3605b8021494Sopenharmony_ci 3606b8021494Sopenharmony_ci __ movprfx(z1.VnS(), p0.Merging(), z2.VnS()); 3607b8021494Sopenharmony_ci __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB()); 3608b8021494Sopenharmony_ci 3609b8021494Sopenharmony_ci __ movprfx(z29.VnD(), p1.Merging(), z30.VnD()); 3610b8021494Sopenharmony_ci __ sqabs(z29.VnB(), p1.Merging(), z18.VnB()); 3611b8021494Sopenharmony_ci 3612b8021494Sopenharmony_ci __ movprfx(z28.VnH(), p0.Merging(), z29.VnH()); 3613b8021494Sopenharmony_ci __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB()); 3614b8021494Sopenharmony_ci 3615b8021494Sopenharmony_ci __ movprfx(z21.VnH(), p0.Merging(), z22.VnH()); 3616b8021494Sopenharmony_ci __ sqneg(z21.VnB(), p0.Merging(), z17.VnB()); 3617b8021494Sopenharmony_ci 3618b8021494Sopenharmony_ci __ movprfx(z31.VnS(), p5.Merging(), z0.VnS()); 3619b8021494Sopenharmony_ci __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB()); 3620b8021494Sopenharmony_ci 3621b8021494Sopenharmony_ci __ movprfx(z25.VnD(), p6.Merging(), z26.VnD()); 3622b8021494Sopenharmony_ci __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB()); 3623b8021494Sopenharmony_ci 3624b8021494Sopenharmony_ci __ movprfx(z0.VnH(), p5.Merging(), z1.VnH()); 3625b8021494Sopenharmony_ci __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0); 3626b8021494Sopenharmony_ci 3627b8021494Sopenharmony_ci __ movprfx(z0.VnS(), p5.Merging(), z1.VnS()); 3628b8021494Sopenharmony_ci __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z2.VnB()); 3629b8021494Sopenharmony_ci 3630b8021494Sopenharmony_ci __ movprfx(z7.VnD(), p3.Merging(), z8.VnD()); 3631b8021494Sopenharmony_ci __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB()); 3632b8021494Sopenharmony_ci 3633b8021494Sopenharmony_ci __ movprfx(z10.VnH(), p1.Merging(), z11.VnH()); 3634b8021494Sopenharmony_ci __ sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0); 3635b8021494Sopenharmony_ci 3636b8021494Sopenharmony_ci __ movprfx(z16.VnH(), p7.Merging(), z17.VnH()); 3637b8021494Sopenharmony_ci __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()); 3638b8021494Sopenharmony_ci 3639b8021494Sopenharmony_ci __ movprfx(z16.VnS(), p7.Merging(), z17.VnS()); 3640b8021494Sopenharmony_ci __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()); 3641b8021494Sopenharmony_ci 3642b8021494Sopenharmony_ci __ movprfx(z23.VnD(), p4.Merging(), z24.VnD()); 3643b8021494Sopenharmony_ci __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB()); 3644b8021494Sopenharmony_ci 3645b8021494Sopenharmony_ci __ movprfx(z31.VnH(), p7.Merging(), z0.VnH()); 3646b8021494Sopenharmony_ci __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB()); 3647b8021494Sopenharmony_ci 3648b8021494Sopenharmony_ci __ movprfx(z16.VnH(), p7.Merging(), z17.VnH()); 3649b8021494Sopenharmony_ci __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB()); 3650b8021494Sopenharmony_ci 3651b8021494Sopenharmony_ci __ movprfx(z12.VnH(), p0.Merging(), z13.VnH()); 3652b8021494Sopenharmony_ci __ srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1); 3653b8021494Sopenharmony_ci 3654b8021494Sopenharmony_ci __ movprfx(z26.VnH(), p2.Merging(), z27.VnH()); 3655b8021494Sopenharmony_ci __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB()); 3656b8021494Sopenharmony_ci 3657b8021494Sopenharmony_ci __ movprfx(z20.VnB(), p4.Merging(), z21.VnB()); 3658b8021494Sopenharmony_ci __ uadalp(z20.VnD(), p4.Merging(), z5.VnS()); 3659b8021494Sopenharmony_ci 3660b8021494Sopenharmony_ci __ movprfx(z21.VnH(), p2.Merging(), z22.VnH()); 3661b8021494Sopenharmony_ci __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB()); 3662b8021494Sopenharmony_ci 3663b8021494Sopenharmony_ci __ movprfx(z1.VnH(), p4.Merging(), z2.VnH()); 3664b8021494Sopenharmony_ci __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB()); 3665b8021494Sopenharmony_ci 3666b8021494Sopenharmony_ci __ movprfx(z18.VnH(), p0.Merging(), z19.VnH()); 3667b8021494Sopenharmony_ci __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB()); 3668b8021494Sopenharmony_ci 3669b8021494Sopenharmony_ci __ movprfx(z24.VnH(), p7.Merging(), z25.VnH()); 3670b8021494Sopenharmony_ci __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()), 3671b8021494Sopenharmony_ci 3672b8021494Sopenharmony_ci __ movprfx(z20.VnS(), p1.Merging(), z21.VnS()); 3673b8021494Sopenharmony_ci __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB()); 3674b8021494Sopenharmony_ci 3675b8021494Sopenharmony_ci __ movprfx(z8.VnS(), p5.Merging(), z9.VnS()); 3676b8021494Sopenharmony_ci __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB()); 3677b8021494Sopenharmony_ci 3678b8021494Sopenharmony_ci __ movprfx(z29.VnS(), p7.Merging(), z30.VnS()); 3679b8021494Sopenharmony_ci __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0); 3680b8021494Sopenharmony_ci 3681b8021494Sopenharmony_ci __ movprfx(z29.VnS(), p7.Merging(), z30.VnS()); 3682b8021494Sopenharmony_ci __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z30.VnB()); 3683b8021494Sopenharmony_ci 3684b8021494Sopenharmony_ci __ movprfx(z12.VnS(), p1.Merging(), z13.VnS()); 3685b8021494Sopenharmony_ci __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z13.VnB()); 3686b8021494Sopenharmony_ci 3687b8021494Sopenharmony_ci __ movprfx(z20.VnS(), p0.Merging(), z21.VnS()); 3688b8021494Sopenharmony_ci __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()); 3689b8021494Sopenharmony_ci 3690b8021494Sopenharmony_ci __ movprfx(z20.VnS(), p0.Merging(), z21.VnS()); 3691b8021494Sopenharmony_ci __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()); 3692b8021494Sopenharmony_ci 3693b8021494Sopenharmony_ci __ movprfx(z25.VnB(), p7.Merging(), z26.VnB()); 3694b8021494Sopenharmony_ci __ urecpe(z25.VnS(), p7.Merging(), z2.VnS()); 3695b8021494Sopenharmony_ci 3696b8021494Sopenharmony_ci __ movprfx(z29.VnD(), p4.Merging(), z30.VnD()); 3697b8021494Sopenharmony_ci __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB()); 3698b8021494Sopenharmony_ci 3699b8021494Sopenharmony_ci __ movprfx(z15.VnD(), p2.Merging(), z16.VnD()); 3700b8021494Sopenharmony_ci __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB()); 3701b8021494Sopenharmony_ci 3702b8021494Sopenharmony_ci __ movprfx(z27.VnD(), p1.Merging(), z28.VnD()); 3703b8021494Sopenharmony_ci __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB()); 3704b8021494Sopenharmony_ci 3705b8021494Sopenharmony_ci __ movprfx(z31.VnD(), p2.Merging(), z0.VnD()); 3706b8021494Sopenharmony_ci __ urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1); 3707b8021494Sopenharmony_ci 3708b8021494Sopenharmony_ci __ movprfx(z4.VnH(), p3.Merging(), z5.VnH()); 3709b8021494Sopenharmony_ci __ ursqrte(z4.VnS(), p3.Merging(), z3.VnS()); 3710b8021494Sopenharmony_ci 3711b8021494Sopenharmony_ci __ movprfx(z25.VnD(), p4.Merging(), z26.VnD()); 3712b8021494Sopenharmony_ci __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB()); 3713b8021494Sopenharmony_ci } 3714b8021494Sopenharmony_ci assm.FinalizeCode(); 3715b8021494Sopenharmony_ci 3716b8021494Sopenharmony_ci CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 3717b8021494Sopenharmony_ci} 3718b8021494Sopenharmony_ci 3719b8021494Sopenharmony_ci} // namespace aarch64 3720b8021494Sopenharmony_ci} // namespace vixl 3721