1// Copyright 2021, VIXL authors 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are met: 6// 7// * Redistributions of source code must retain the above copyright notice, 8// this list of conditions and the following disclaimer. 9// * Redistributions in binary form must reproduce the above copyright notice, 10// this list of conditions and the following disclaimer in the documentation 11// and/or other materials provided with the distribution. 12// * Neither the name of ARM Limited nor the names of its contributors may be 13// used to endorse or promote products derived from this software without 14// specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27#include "test-runner.h" 28#include "test-utils.h" 29 30#include "aarch64/cpu-aarch64.h" 31#include "aarch64/disasm-aarch64.h" 32#include "aarch64/macro-assembler-aarch64.h" 33#include "aarch64/simulator-aarch64.h" 34#include "aarch64/test-utils-aarch64.h" 35#include "test-assembler-aarch64.h" 36 37#define TEST_SVE(name) TEST_SVE_INNER("SIM", name) 38 39namespace vixl { 40namespace aarch64 { 41 42TEST_SVE(sve_matmul) { 43 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 44 CPUFeatures::kSVEI8MM, 45 CPUFeatures::kNEON, 46 CPUFeatures::kCRC32); 47 START(); 48 49 SetInitialMachineState(&masm); 50 // state = 0xe2bd2480 51 52 { 53 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 54 __ dci(0x45179979); // smmla z25.s, z11.b, z23.b 55 // vl128 state = 0xf1ca8a4d 56 __ dci(0x45179b51); // smmla z17.s, z26.b, z23.b 57 // vl128 state = 0x4458ad10 58 __ dci(0x45d79b53); // ummla z19.s, z26.b, z23.b 59 // vl128 state = 0x43d4d064 60 __ dci(0x45d69b17); // ummla z23.s, z24.b, z22.b 61 // vl128 state = 0x601e77c8 62 __ dci(0x45c69b33); // ummla z19.s, z25.b, z6.b 63 // vl128 state = 0x561b4e22 64 __ dci(0x45c49b1b); // ummla z27.s, z24.b, z4.b 65 // vl128 state = 0x89b65d78 66 __ dci(0x45dc9b1a); // ummla z26.s, z24.b, z28.b 67 // vl128 state = 0x85c9e62d 68 __ dci(0x45d99b1b); // ummla z27.s, z24.b, z25.b 69 // vl128 state = 0x3fc74134 70 __ dci(0x45d99b19); // ummla z25.s, z24.b, z25.b 71 // vl128 state = 0xa2fa347b 72 __ dci(0x45d99b1b); // ummla z27.s, z24.b, z25.b 73 // vl128 state = 0xb9854782 74 __ dci(0x45899b1a); // usmmla z26.s, z24.b, z9.b 75 // vl128 state = 0x7fd376d8 76 __ dci(0x45099b8a); // smmla z10.s, z28.b, z9.b 77 // vl128 state = 0xb41d8433 78 __ dci(0x45019bcb); // smmla z11.s, z30.b, z1.b 79 // vl128 state = 0xc9c0e80d 80 __ dci(0x45019bdb); // smmla z27.s, z30.b, z1.b 81 // vl128 state = 0xf1130e02 82 __ dci(0x45019b6b); // smmla z11.s, z27.b, z1.b 83 // vl128 state = 0x282d3dc7 84 __ dci(0x45019b6f); // smmla z15.s, z27.b, z1.b 85 // vl128 state = 0x34570238 86 __ dci(0x45859b6b); // usmmla z11.s, z27.b, z5.b 87 // vl128 state = 0xc451206a 88 __ dci(0x45919b6a); // usmmla z10.s, z27.b, z17.b 89 // vl128 state = 0xa58e2ea8 90 __ dci(0x45909a62); // usmmla z2.s, z19.b, z16.b 91 // vl128 state = 0x7b5f948d 92 __ dci(0x45809a52); // usmmla z18.s, z18.b, z0.b 93 // vl128 state = 0xf746260d 94 __ dci(0x45889b53); // usmmla z19.s, z26.b, z8.b 95 // vl128 state = 0xc31cc539 96 __ dci(0x45809a57); // usmmla z23.s, z18.b, z0.b 97 // vl128 state = 0x736bb3ee 98 __ dci(0x45809a96); // usmmla z22.s, z20.b, z0.b 99 // vl128 state = 0xbb05fef6 100 __ dci(0x45809a92); // usmmla z18.s, z20.b, z0.b 101 // vl128 state = 0xbc594372 102 __ dci(0x45809a82); // usmmla z2.s, z20.b, z0.b 103 // vl128 state = 0x87c5a584 104 __ dci(0x45829ad2); // usmmla z18.s, z22.b, z2.b 105 // vl128 state = 0xa413f733 106 __ dci(0x45889ad6); // usmmla z22.s, z22.b, z8.b 107 // vl128 state = 0x87ec445d 108 __ dci(0x45c898d2); // ummla z18.s, z6.b, z8.b 109 // vl128 state = 0x3ca8a6e5 110 __ dci(0x450898d0); // smmla z16.s, z6.b, z8.b 111 // vl128 state = 0x4300d87b 112 __ dci(0x45189ad8); // smmla z24.s, z22.b, z24.b 113 // vl128 state = 0x38be2e8a 114 __ dci(0x451c9bd9); // smmla z25.s, z30.b, z28.b 115 // vl128 state = 0x8a3e6103 116 __ dci(0x45989bc9); // usmmla z9.s, z30.b, z24.b 117 // vl128 state = 0xc728e586 118 __ dci(0x451c9bd9); // smmla z25.s, z30.b, z28.b 119 // vl128 state = 0x4cb44c0e 120 __ dci(0x459c99d1); // usmmla z17.s, z14.b, z28.b 121 // vl128 state = 0x84ebcb36 122 __ dci(0x459c99d5); // usmmla z21.s, z14.b, z28.b 123 // vl128 state = 0x8813d2e2 124 __ dci(0x451c999d); // smmla z29.s, z12.b, z28.b 125 // vl128 state = 0x8f26ee51 126 __ dci(0x451c999f); // smmla z31.s, z12.b, z28.b 127 // vl128 state = 0x5d626fd0 128 __ dci(0x459e998f); // usmmla z15.s, z12.b, z30.b 129 // vl128 state = 0x6b64cc8f 130 __ dci(0x459f991f); // usmmla z31.s, z8.b, z31.b 131 // vl128 state = 0x41648186 132 __ dci(0x4587991e); // usmmla z30.s, z8.b, z7.b 133 // vl128 state = 0x701525ec 134 __ dci(0x45079816); // smmla z22.s, z0.b, z7.b 135 // vl128 state = 0x61a2d024 136 __ dci(0x450f9897); // smmla z23.s, z4.b, z15.b 137 // vl128 state = 0x82ba6bd5 138 __ dci(0x450b98d3); // smmla z19.s, z6.b, z11.b 139 // vl128 state = 0xa842bbde 140 __ dci(0x450b98db); // smmla z27.s, z6.b, z11.b 141 // vl128 state = 0x9977677a 142 __ dci(0x451f98d3); // smmla z19.s, z6.b, z31.b 143 // vl128 state = 0xe6d6c2ef 144 __ dci(0x451b9adb); // smmla z27.s, z22.b, z27.b 145 // vl128 state = 0xa535453f 146 __ dci(0x450b98d9); // smmla z25.s, z6.b, z11.b 147 // vl128 state = 0xeda3f381 148 __ dci(0x458b9adb); // usmmla z27.s, z22.b, z11.b 149 // vl128 state = 0xd72dbdef 150 __ dci(0x45cb98da); // ummla z26.s, z6.b, z11.b 151 // vl128 state = 0xfae4975b 152 __ dci(0x45c999d2); // ummla z18.s, z14.b, z9.b 153 // vl128 state = 0x0aa6e1f6 154 } 155 156 uint32_t state; 157 ComputeMachineStateHash(&masm, &state); 158 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 159 __ Ldr(w0, MemOperand(x0)); 160 161 END(); 162 if (CAN_RUN()) { 163 RUN(); 164 uint32_t expected_hashes[] = { 165 0x0aa6e1f6, 166 0xba2d4547, 167 0x0e72a647, 168 0x15b8fc1b, 169 0x92eddc98, 170 0xe0c72bcf, 171 0x36b4e3ba, 172 0x1041114e, 173 0x4d44ebd4, 174 0xfe0e3cbf, 175 0x81c43455, 176 0x678617c5, 177 0xf72fac1f, 178 0xabdcd4e4, 179 0x108864bd, 180 0x035f6eca, 181 }; 182 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 183 } 184} 185 186TEST_SVE(sve_fmatmul_s) { 187 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 188 CPUFeatures::kSVEF32MM, 189 CPUFeatures::kNEON, 190 CPUFeatures::kCRC32); 191 START(); 192 193 SetInitialMachineState(&masm); 194 // state = 0xe2bd2480 195 196 { 197 ExactAssemblyScope scope(&masm, 20 * kInstructionSize); 198 __ dci(0x64a1e6ee); // fmmla z14.s, z23.s, z1.s 199 // vl128 state = 0x9db41bef 200 __ dci(0x64b1e7fe); // fmmla z30.s, z31.s, z17.s 201 // vl128 state = 0xc1535e55 202 __ dci(0x64b9e7d6); // fmmla z22.s, z30.s, z25.s 203 // vl128 state = 0xc65aad35 204 __ dci(0x64bde6c6); // fmmla z6.s, z22.s, z29.s 205 // vl128 state = 0x68387c22 206 __ dci(0x64b9e4c2); // fmmla z2.s, z6.s, z25.s 207 // vl128 state = 0xcf08b3a4 208 __ dci(0x64b9e543); // fmmla z3.s, z10.s, z25.s 209 // vl128 state = 0x969bbe77 210 __ dci(0x64b9e553); // fmmla z19.s, z10.s, z25.s 211 // vl128 state = 0xc3f514e1 212 __ dci(0x64b9e557); // fmmla z23.s, z10.s, z25.s 213 // vl128 state = 0x4b351c29 214 __ dci(0x64b9e773); // fmmla z19.s, z27.s, z25.s 215 // vl128 state = 0x5e026315 216 __ dci(0x64bbe757); // fmmla z23.s, z26.s, z27.s 217 // vl128 state = 0x61684fe6 218 __ dci(0x64bbe755); // fmmla z21.s, z26.s, z27.s 219 // vl128 state = 0x719b4ce0 220 __ dci(0x64bfe554); // fmmla z20.s, z10.s, z31.s 221 // vl128 state = 0xdf3d2a1c 222 __ dci(0x64bfe550); // fmmla z16.s, z10.s, z31.s 223 // vl128 state = 0x3279aab8 224 __ dci(0x64bfe714); // fmmla z20.s, z24.s, z31.s 225 // vl128 state = 0x0b985869 226 __ dci(0x64b7e756); // fmmla z22.s, z26.s, z23.s 227 // vl128 state = 0x14230587 228 __ dci(0x64b7e737); // fmmla z23.s, z25.s, z23.s 229 // vl128 state = 0x2cb88e7f 230 __ dci(0x64bfe767); // fmmla z7.s, z27.s, z31.s 231 // vl128 state = 0xb5ec0c65 232 __ dci(0x64bfe777); // fmmla z23.s, z27.s, z31.s 233 // vl128 state = 0xb5e5eab0 234 __ dci(0x64bfe715); // fmmla z21.s, z24.s, z31.s 235 // vl128 state = 0xd0491fb5 236 __ dci(0x64b7e797); // fmmla z23.s, z28.s, z23.s 237 // vl128 state = 0x98a55a30 238 } 239 240 uint32_t state; 241 ComputeMachineStateHash(&masm, &state); 242 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 243 __ Ldr(w0, MemOperand(x0)); 244 245 END(); 246 if (CAN_RUN()) { 247 RUN(); 248 uint32_t expected_hashes[] = { 249 0x98a55a30, 250 0x590b7715, 251 0x4562ccf3, 252 0x1f8653a6, 253 0x5fe174d5, 254 0xb300dcb8, 255 0x3cefa79e, 256 0xa22484c7, 257 0x380697ec, 258 0xde9e699b, 259 0x99d21870, 260 0x456cb46b, 261 0x207d2615, 262 0xecaf9678, 263 0x0949e2d2, 264 0xa764c43f, 265 }; 266 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 267 } 268} 269 270} // namespace aarch64 271} // namespace vixl 272