1// Copyright 2016 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "src/execution/arm64/simulator-arm64.h" 6 7#if defined(USE_SIMULATOR) 8 9#include <cmath> 10 11namespace v8 { 12namespace internal { 13 14namespace { 15 16// See FPRound for a description of this function. 17inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa, 18 FPRounding round_mode) { 19 uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>( 20 sign, exponent, mantissa, round_mode); 21 return bit_cast<double>(bits); 22} 23 24// See FPRound for a description of this function. 25inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa, 26 FPRounding round_mode) { 27 uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>( 28 sign, exponent, mantissa, round_mode); 29 return bit_cast<float>(bits); 30} 31 32// See FPRound for a description of this function. 33inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent, 34 uint64_t mantissa, FPRounding round_mode) { 35 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>( 36 sign, exponent, mantissa, round_mode); 37} 38 39} // namespace 40 41double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { 42 if (src >= 0) { 43 return UFixedToDouble(src, fbits, round); 44 } else if (src == INT64_MIN) { 45 return -UFixedToDouble(src, fbits, round); 46 } else { 47 return -UFixedToDouble(-src, fbits, round); 48 } 49} 50 51double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { 52 // An input of 0 is a special case because the result is effectively 53 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 54 if (src == 0) { 55 return 0.0; 56 } 57 58 // Calculate the exponent. The highest significant bit will have the value 59 // 2^exponent. 60 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64); 61 const int64_t exponent = highest_significant_bit - fbits; 62 63 return FPRoundToDouble(0, exponent, src, round); 64} 65 66float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { 67 if (src >= 0) { 68 return UFixedToFloat(src, fbits, round); 69 } else if (src == INT64_MIN) { 70 return -UFixedToFloat(src, fbits, round); 71 } else { 72 return -UFixedToFloat(-src, fbits, round); 73 } 74} 75 76float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { 77 // An input of 0 is a special case because the result is effectively 78 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 79 if (src == 0) { 80 return 0.0f; 81 } 82 83 // Calculate the exponent. The highest significant bit will have the value 84 // 2^exponent. 85 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64); 86 const int32_t exponent = highest_significant_bit - fbits; 87 88 return FPRoundToFloat(0, exponent, src, round); 89} 90 91double Simulator::FPToDouble(float value) { 92 switch (std::fpclassify(value)) { 93 case FP_NAN: { 94 if (IsSignallingNaN(value)) { 95 FPProcessException(); 96 } 97 if (DN()) return kFP64DefaultNaN; 98 99 // Convert NaNs as the processor would: 100 // - The sign is propagated. 101 // - The mantissa is transferred entirely, except that the top bit is 102 // forced to '1', making the result a quiet NaN. The unused (low-order) 103 // mantissa bits are set to 0. 104 uint32_t raw = bit_cast<uint32_t>(value); 105 106 uint64_t sign = raw >> 31; 107 uint64_t exponent = (1 << kDoubleExponentBits) - 1; 108 uint64_t mantissa = unsigned_bitextract_64(21, 0, raw); 109 110 // Unused low-order bits remain zero. 111 mantissa <<= (kDoubleMantissaBits - kFloatMantissaBits); 112 113 // Force a quiet NaN. 114 mantissa |= (UINT64_C(1) << (kDoubleMantissaBits - 1)); 115 116 return double_pack(sign, exponent, mantissa); 117 } 118 119 case FP_ZERO: 120 case FP_NORMAL: 121 case FP_SUBNORMAL: 122 case FP_INFINITE: { 123 // All other inputs are preserved in a standard cast, because every value 124 // representable using an IEEE-754 float is also representable using an 125 // IEEE-754 double. 126 return static_cast<double>(value); 127 } 128 } 129 130 UNREACHABLE(); 131} 132 133float Simulator::FPToFloat(float16 value) { 134 uint32_t sign = value >> 15; 135 uint32_t exponent = 136 unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1, 137 kFloat16MantissaBits, value); 138 uint32_t mantissa = 139 unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value); 140 141 switch (float16classify(value)) { 142 case FP_ZERO: 143 return (sign == 0) ? 0.0f : -0.0f; 144 145 case FP_INFINITE: 146 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity; 147 148 case FP_SUBNORMAL: { 149 // Calculate shift required to put mantissa into the most-significant bits 150 // of the destination mantissa. 151 int shift = CountLeadingZeros(mantissa << (32 - 10), 32); 152 153 // Shift mantissa and discard implicit '1'. 154 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1; 155 mantissa &= (1 << kFloatMantissaBits) - 1; 156 157 // Adjust the exponent for the shift applied, and rebias. 158 exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias); 159 break; 160 } 161 162 case FP_NAN: { 163 if (IsSignallingNaN(value)) { 164 FPProcessException(); 165 } 166 if (DN()) return kFP32DefaultNaN; 167 168 // Convert NaNs as the processor would: 169 // - The sign is propagated. 170 // - The mantissa is transferred entirely, except that the top bit is 171 // forced to '1', making the result a quiet NaN. The unused (low-order) 172 // mantissa bits are set to 0. 173 exponent = (1 << kFloatExponentBits) - 1; 174 175 // Increase bits in mantissa, making low-order bits 0. 176 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); 177 mantissa |= 1 << (kFloatMantissaBits - 1); // Force a quiet NaN. 178 break; 179 } 180 181 case FP_NORMAL: { 182 // Increase bits in mantissa, making low-order bits 0. 183 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); 184 185 // Change exponent bias. 186 exponent += (kFloatExponentBias - kFloat16ExponentBias); 187 break; 188 } 189 190 default: 191 UNREACHABLE(); 192 } 193 return float_pack(sign, exponent, mantissa); 194} 195 196float16 Simulator::FPToFloat16(float value, FPRounding round_mode) { 197 // Only the FPTieEven rounding mode is implemented. 198 DCHECK_EQ(round_mode, FPTieEven); 199 USE(round_mode); 200 201 int64_t sign = float_sign(value); 202 int64_t exponent = 203 static_cast<int64_t>(float_exp(value)) - kFloatExponentBias; 204 uint32_t mantissa = float_mantissa(value); 205 206 switch (std::fpclassify(value)) { 207 case FP_NAN: { 208 if (IsSignallingNaN(value)) { 209 FPProcessException(); 210 } 211 if (DN()) return kFP16DefaultNaN; 212 213 // Convert NaNs as the processor would: 214 // - The sign is propagated. 215 // - The mantissa is transferred as much as possible, except that the top 216 // bit is forced to '1', making the result a quiet NaN. 217 float16 result = 218 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 219 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits); 220 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN; 221 return result; 222 } 223 224 case FP_ZERO: 225 return (sign == 0) ? 0 : 0x8000; 226 227 case FP_INFINITE: 228 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 229 230 case FP_NORMAL: 231 case FP_SUBNORMAL: { 232 // Convert float-to-half as the processor would, assuming that FPCR.FZ 233 // (flush-to-zero) is not set. 234 235 // Add the implicit '1' bit to the mantissa. 236 mantissa += (1 << kFloatMantissaBits); 237 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); 238 } 239 } 240 241 UNREACHABLE(); 242} 243 244float16 Simulator::FPToFloat16(double value, FPRounding round_mode) { 245 // Only the FPTieEven rounding mode is implemented. 246 DCHECK_EQ(round_mode, FPTieEven); 247 USE(round_mode); 248 249 int64_t sign = double_sign(value); 250 int64_t exponent = 251 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias; 252 uint64_t mantissa = double_mantissa(value); 253 254 switch (std::fpclassify(value)) { 255 case FP_NAN: { 256 if (IsSignallingNaN(value)) { 257 FPProcessException(); 258 } 259 if (DN()) return kFP16DefaultNaN; 260 261 // Convert NaNs as the processor would: 262 // - The sign is propagated. 263 // - The mantissa is transferred as much as possible, except that the top 264 // bit is forced to '1', making the result a quiet NaN. 265 float16 result = 266 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 267 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits); 268 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN; 269 return result; 270 } 271 272 case FP_ZERO: 273 return (sign == 0) ? 0 : 0x8000; 274 275 case FP_INFINITE: 276 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 277 278 case FP_NORMAL: 279 case FP_SUBNORMAL: { 280 // Convert double-to-half as the processor would, assuming that FPCR.FZ 281 // (flush-to-zero) is not set. 282 283 // Add the implicit '1' bit to the mantissa. 284 mantissa += (UINT64_C(1) << kDoubleMantissaBits); 285 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); 286 } 287 } 288 289 UNREACHABLE(); 290} 291 292float Simulator::FPToFloat(double value, FPRounding round_mode) { 293 // Only the FPTieEven rounding mode is implemented. 294 DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); 295 USE(round_mode); 296 297 switch (std::fpclassify(value)) { 298 case FP_NAN: { 299 if (IsSignallingNaN(value)) { 300 FPProcessException(); 301 } 302 if (DN()) return kFP32DefaultNaN; 303 304 // Convert NaNs as the processor would: 305 // - The sign is propagated. 306 // - The mantissa is transferred as much as possible, except that the 307 // top bit is forced to '1', making the result a quiet NaN. 308 309 uint64_t raw = bit_cast<uint64_t>(value); 310 311 uint32_t sign = raw >> 63; 312 uint32_t exponent = (1 << 8) - 1; 313 uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64( 314 50, kDoubleMantissaBits - kFloatMantissaBits, raw)); 315 mantissa |= (1 << (kFloatMantissaBits - 1)); // Force a quiet NaN. 316 317 return float_pack(sign, exponent, mantissa); 318 } 319 320 case FP_ZERO: 321 case FP_INFINITE: { 322 // In a C++ cast, any value representable in the target type will be 323 // unchanged. This is always the case for +/-0.0 and infinities. 324 return static_cast<float>(value); 325 } 326 327 case FP_NORMAL: 328 case FP_SUBNORMAL: { 329 // Convert double-to-float as the processor would, assuming that FPCR.FZ 330 // (flush-to-zero) is not set. 331 uint32_t sign = double_sign(value); 332 int64_t exponent = 333 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias; 334 uint64_t mantissa = double_mantissa(value); 335 if (std::fpclassify(value) == FP_NORMAL) { 336 // For normal FP values, add the hidden bit. 337 mantissa |= (UINT64_C(1) << kDoubleMantissaBits); 338 } 339 return FPRoundToFloat(sign, exponent, mantissa, round_mode); 340 } 341 } 342 343 UNREACHABLE(); 344} 345 346void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) { 347 dst.ClearForWrite(vform); 348 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 349 dst.ReadUintFromMem(vform, i, addr); 350 addr += LaneSizeInBytesFromFormat(vform); 351 } 352} 353 354void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index, 355 uint64_t addr) { 356 dst.ReadUintFromMem(vform, index, addr); 357} 358 359void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { 360 dst.ClearForWrite(vform); 361 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 362 dst.ReadUintFromMem(vform, i, addr); 363 } 364} 365 366void Simulator::ld2(VectorFormat vform, LogicVRegister dst1, 367 LogicVRegister dst2, uint64_t addr1) { 368 dst1.ClearForWrite(vform); 369 dst2.ClearForWrite(vform); 370 int esize = LaneSizeInBytesFromFormat(vform); 371 uint64_t addr2 = addr1 + esize; 372 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 373 dst1.ReadUintFromMem(vform, i, addr1); 374 dst2.ReadUintFromMem(vform, i, addr2); 375 addr1 += 2 * esize; 376 addr2 += 2 * esize; 377 } 378} 379 380void Simulator::ld2(VectorFormat vform, LogicVRegister dst1, 381 LogicVRegister dst2, int index, uint64_t addr1) { 382 dst1.ClearForWrite(vform); 383 dst2.ClearForWrite(vform); 384 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 385 dst1.ReadUintFromMem(vform, index, addr1); 386 dst2.ReadUintFromMem(vform, index, addr2); 387} 388 389void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1, 390 LogicVRegister dst2, uint64_t addr) { 391 dst1.ClearForWrite(vform); 392 dst2.ClearForWrite(vform); 393 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 394 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 395 dst1.ReadUintFromMem(vform, i, addr); 396 dst2.ReadUintFromMem(vform, i, addr2); 397 } 398} 399 400void Simulator::ld3(VectorFormat vform, LogicVRegister dst1, 401 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) { 402 dst1.ClearForWrite(vform); 403 dst2.ClearForWrite(vform); 404 dst3.ClearForWrite(vform); 405 int esize = LaneSizeInBytesFromFormat(vform); 406 uint64_t addr2 = addr1 + esize; 407 uint64_t addr3 = addr2 + esize; 408 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 409 dst1.ReadUintFromMem(vform, i, addr1); 410 dst2.ReadUintFromMem(vform, i, addr2); 411 dst3.ReadUintFromMem(vform, i, addr3); 412 addr1 += 3 * esize; 413 addr2 += 3 * esize; 414 addr3 += 3 * esize; 415 } 416} 417 418void Simulator::ld3(VectorFormat vform, LogicVRegister dst1, 419 LogicVRegister dst2, LogicVRegister dst3, int index, 420 uint64_t addr1) { 421 dst1.ClearForWrite(vform); 422 dst2.ClearForWrite(vform); 423 dst3.ClearForWrite(vform); 424 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 425 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 426 dst1.ReadUintFromMem(vform, index, addr1); 427 dst2.ReadUintFromMem(vform, index, addr2); 428 dst3.ReadUintFromMem(vform, index, addr3); 429} 430 431void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1, 432 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) { 433 dst1.ClearForWrite(vform); 434 dst2.ClearForWrite(vform); 435 dst3.ClearForWrite(vform); 436 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 437 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 438 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 439 dst1.ReadUintFromMem(vform, i, addr); 440 dst2.ReadUintFromMem(vform, i, addr2); 441 dst3.ReadUintFromMem(vform, i, addr3); 442 } 443} 444 445void Simulator::ld4(VectorFormat vform, LogicVRegister dst1, 446 LogicVRegister dst2, LogicVRegister dst3, 447 LogicVRegister dst4, uint64_t addr1) { 448 dst1.ClearForWrite(vform); 449 dst2.ClearForWrite(vform); 450 dst3.ClearForWrite(vform); 451 dst4.ClearForWrite(vform); 452 int esize = LaneSizeInBytesFromFormat(vform); 453 uint64_t addr2 = addr1 + esize; 454 uint64_t addr3 = addr2 + esize; 455 uint64_t addr4 = addr3 + esize; 456 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 457 dst1.ReadUintFromMem(vform, i, addr1); 458 dst2.ReadUintFromMem(vform, i, addr2); 459 dst3.ReadUintFromMem(vform, i, addr3); 460 dst4.ReadUintFromMem(vform, i, addr4); 461 addr1 += 4 * esize; 462 addr2 += 4 * esize; 463 addr3 += 4 * esize; 464 addr4 += 4 * esize; 465 } 466} 467 468void Simulator::ld4(VectorFormat vform, LogicVRegister dst1, 469 LogicVRegister dst2, LogicVRegister dst3, 470 LogicVRegister dst4, int index, uint64_t addr1) { 471 dst1.ClearForWrite(vform); 472 dst2.ClearForWrite(vform); 473 dst3.ClearForWrite(vform); 474 dst4.ClearForWrite(vform); 475 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 476 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 477 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 478 dst1.ReadUintFromMem(vform, index, addr1); 479 dst2.ReadUintFromMem(vform, index, addr2); 480 dst3.ReadUintFromMem(vform, index, addr3); 481 dst4.ReadUintFromMem(vform, index, addr4); 482} 483 484void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1, 485 LogicVRegister dst2, LogicVRegister dst3, 486 LogicVRegister dst4, uint64_t addr) { 487 dst1.ClearForWrite(vform); 488 dst2.ClearForWrite(vform); 489 dst3.ClearForWrite(vform); 490 dst4.ClearForWrite(vform); 491 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 492 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 493 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 494 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 495 dst1.ReadUintFromMem(vform, i, addr); 496 dst2.ReadUintFromMem(vform, i, addr2); 497 dst3.ReadUintFromMem(vform, i, addr3); 498 dst4.ReadUintFromMem(vform, i, addr4); 499 } 500} 501 502void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) { 503 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 504 src.WriteUintToMem(vform, i, addr); 505 addr += LaneSizeInBytesFromFormat(vform); 506 } 507} 508 509void Simulator::st1(VectorFormat vform, LogicVRegister src, int index, 510 uint64_t addr) { 511 src.WriteUintToMem(vform, index, addr); 512} 513 514void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, 515 uint64_t addr) { 516 int esize = LaneSizeInBytesFromFormat(vform); 517 uint64_t addr2 = addr + esize; 518 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 519 dst.WriteUintToMem(vform, i, addr); 520 dst2.WriteUintToMem(vform, i, addr2); 521 addr += 2 * esize; 522 addr2 += 2 * esize; 523 } 524} 525 526void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, 527 int index, uint64_t addr) { 528 int esize = LaneSizeInBytesFromFormat(vform); 529 dst.WriteUintToMem(vform, index, addr); 530 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 531} 532 533void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, 534 LogicVRegister dst3, uint64_t addr) { 535 int esize = LaneSizeInBytesFromFormat(vform); 536 uint64_t addr2 = addr + esize; 537 uint64_t addr3 = addr2 + esize; 538 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 539 dst.WriteUintToMem(vform, i, addr); 540 dst2.WriteUintToMem(vform, i, addr2); 541 dst3.WriteUintToMem(vform, i, addr3); 542 addr += 3 * esize; 543 addr2 += 3 * esize; 544 addr3 += 3 * esize; 545 } 546} 547 548void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, 549 LogicVRegister dst3, int index, uint64_t addr) { 550 int esize = LaneSizeInBytesFromFormat(vform); 551 dst.WriteUintToMem(vform, index, addr); 552 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 553 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 554} 555 556void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, 557 LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) { 558 int esize = LaneSizeInBytesFromFormat(vform); 559 uint64_t addr2 = addr + esize; 560 uint64_t addr3 = addr2 + esize; 561 uint64_t addr4 = addr3 + esize; 562 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 563 dst.WriteUintToMem(vform, i, addr); 564 dst2.WriteUintToMem(vform, i, addr2); 565 dst3.WriteUintToMem(vform, i, addr3); 566 dst4.WriteUintToMem(vform, i, addr4); 567 addr += 4 * esize; 568 addr2 += 4 * esize; 569 addr3 += 4 * esize; 570 addr4 += 4 * esize; 571 } 572} 573 574void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, 575 LogicVRegister dst3, LogicVRegister dst4, int index, 576 uint64_t addr) { 577 int esize = LaneSizeInBytesFromFormat(vform); 578 dst.WriteUintToMem(vform, index, addr); 579 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 580 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 581 dst4.WriteUintToMem(vform, index, addr + 3 * esize); 582} 583 584LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst, 585 const LogicVRegister& src1, 586 const LogicVRegister& src2, Condition cond) { 587 dst.ClearForWrite(vform); 588 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 589 int64_t sa = src1.Int(vform, i); 590 int64_t sb = src2.Int(vform, i); 591 uint64_t ua = src1.Uint(vform, i); 592 uint64_t ub = src2.Uint(vform, i); 593 bool result = false; 594 switch (cond) { 595 case eq: 596 result = (ua == ub); 597 break; 598 case ge: 599 result = (sa >= sb); 600 break; 601 case gt: 602 result = (sa > sb); 603 break; 604 case hi: 605 result = (ua > ub); 606 break; 607 case hs: 608 result = (ua >= ub); 609 break; 610 case lt: 611 result = (sa < sb); 612 break; 613 case le: 614 result = (sa <= sb); 615 break; 616 default: 617 UNREACHABLE(); 618 } 619 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 620 } 621 return dst; 622} 623 624LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst, 625 const LogicVRegister& src1, int imm, 626 Condition cond) { 627 SimVRegister temp; 628 LogicVRegister imm_reg = dup_immediate(vform, temp, imm); 629 return cmp(vform, dst, src1, imm_reg, cond); 630} 631 632LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst, 633 const LogicVRegister& src1, 634 const LogicVRegister& src2) { 635 dst.ClearForWrite(vform); 636 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 637 uint64_t ua = src1.Uint(vform, i); 638 uint64_t ub = src2.Uint(vform, i); 639 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); 640 } 641 return dst; 642} 643 644LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst, 645 const LogicVRegister& src1, 646 const LogicVRegister& src2) { 647 int lane_size = LaneSizeInBitsFromFormat(vform); 648 dst.ClearForWrite(vform); 649 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 650 // Test for unsigned saturation. 651 uint64_t ua = src1.UintLeftJustified(vform, i); 652 uint64_t ub = src2.UintLeftJustified(vform, i); 653 uint64_t ur = ua + ub; 654 if (ur < ua) { 655 dst.SetUnsignedSat(i, true); 656 } 657 658 // Test for signed saturation. 659 bool pos_a = (ua >> 63) == 0; 660 bool pos_b = (ub >> 63) == 0; 661 bool pos_r = (ur >> 63) == 0; 662 // If the signs of the operands are the same, but different from the result, 663 // there was an overflow. 664 if ((pos_a == pos_b) && (pos_a != pos_r)) { 665 dst.SetSignedSat(i, pos_a); 666 } 667 668 dst.SetInt(vform, i, ur >> (64 - lane_size)); 669 } 670 return dst; 671} 672 673LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst, 674 const LogicVRegister& src1, 675 const LogicVRegister& src2) { 676 SimVRegister temp1, temp2; 677 uzp1(vform, temp1, src1, src2); 678 uzp2(vform, temp2, src1, src2); 679 add(vform, dst, temp1, temp2); 680 return dst; 681} 682 683LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst, 684 const LogicVRegister& src1, 685 const LogicVRegister& src2) { 686 SimVRegister temp; 687 mul(vform, temp, src1, src2); 688 add(vform, dst, dst, temp); 689 return dst; 690} 691 692LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst, 693 const LogicVRegister& src1, 694 const LogicVRegister& src2) { 695 SimVRegister temp; 696 mul(vform, temp, src1, src2); 697 sub(vform, dst, dst, temp); 698 return dst; 699} 700 701LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst, 702 const LogicVRegister& src1, 703 const LogicVRegister& src2) { 704 dst.ClearForWrite(vform); 705 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 706 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); 707 } 708 return dst; 709} 710 711LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst, 712 const LogicVRegister& src1, 713 const LogicVRegister& src2, int index) { 714 SimVRegister temp; 715 VectorFormat indexform = VectorFormatFillQ(vform); 716 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); 717} 718 719LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst, 720 const LogicVRegister& src1, 721 const LogicVRegister& src2, int index) { 722 SimVRegister temp; 723 VectorFormat indexform = VectorFormatFillQ(vform); 724 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); 725} 726 727LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst, 728 const LogicVRegister& src1, 729 const LogicVRegister& src2, int index) { 730 SimVRegister temp; 731 VectorFormat indexform = VectorFormatFillQ(vform); 732 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); 733} 734 735LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst, 736 const LogicVRegister& src1, 737 const LogicVRegister& src2, int index) { 738 SimVRegister temp; 739 VectorFormat indexform = 740 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 741 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 742} 743 744LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst, 745 const LogicVRegister& src1, 746 const LogicVRegister& src2, int index) { 747 SimVRegister temp; 748 VectorFormat indexform = 749 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 750 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 751} 752 753LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst, 754 const LogicVRegister& src1, 755 const LogicVRegister& src2, int index) { 756 SimVRegister temp; 757 VectorFormat indexform = 758 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 759 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 760} 761 762LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst, 763 const LogicVRegister& src1, 764 const LogicVRegister& src2, int index) { 765 SimVRegister temp; 766 VectorFormat indexform = 767 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 768 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 769} 770 771LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst, 772 const LogicVRegister& src1, 773 const LogicVRegister& src2, int index) { 774 SimVRegister temp; 775 VectorFormat indexform = 776 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 777 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 778} 779 780LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst, 781 const LogicVRegister& src1, 782 const LogicVRegister& src2, int index) { 783 SimVRegister temp; 784 VectorFormat indexform = 785 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 786 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 787} 788 789LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst, 790 const LogicVRegister& src1, 791 const LogicVRegister& src2, int index) { 792 SimVRegister temp; 793 VectorFormat indexform = 794 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 795 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 796} 797 798LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst, 799 const LogicVRegister& src1, 800 const LogicVRegister& src2, int index) { 801 SimVRegister temp; 802 VectorFormat indexform = 803 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 804 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 805} 806 807LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst, 808 const LogicVRegister& src1, 809 const LogicVRegister& src2, int index) { 810 SimVRegister temp; 811 VectorFormat indexform = 812 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 813 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 814} 815 816LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst, 817 const LogicVRegister& src1, 818 const LogicVRegister& src2, int index) { 819 SimVRegister temp; 820 VectorFormat indexform = 821 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 822 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 823} 824 825LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst, 826 const LogicVRegister& src1, 827 const LogicVRegister& src2, int index) { 828 SimVRegister temp; 829 VectorFormat indexform = 830 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 831 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 832} 833 834LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst, 835 const LogicVRegister& src1, 836 const LogicVRegister& src2, int index) { 837 SimVRegister temp; 838 VectorFormat indexform = 839 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 840 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 841} 842 843LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst, 844 const LogicVRegister& src1, 845 const LogicVRegister& src2, int index) { 846 SimVRegister temp; 847 VectorFormat indexform = 848 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 849 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 850} 851 852LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst, 853 const LogicVRegister& src1, 854 const LogicVRegister& src2, int index) { 855 SimVRegister temp; 856 VectorFormat indexform = 857 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 858 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 859} 860 861LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst, 862 const LogicVRegister& src1, 863 const LogicVRegister& src2, int index) { 864 SimVRegister temp; 865 VectorFormat indexform = 866 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 867 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 868} 869 870LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst, 871 const LogicVRegister& src1, 872 const LogicVRegister& src2, int index) { 873 SimVRegister temp; 874 VectorFormat indexform = 875 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 876 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 877} 878 879LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst, 880 const LogicVRegister& src1, 881 const LogicVRegister& src2, int index) { 882 SimVRegister temp; 883 VectorFormat indexform = 884 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 885 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 886} 887 888LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst, 889 const LogicVRegister& src1, 890 const LogicVRegister& src2, int index) { 891 SimVRegister temp; 892 VectorFormat indexform = 893 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 894 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 895} 896 897LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst, 898 const LogicVRegister& src1, 899 const LogicVRegister& src2, int index) { 900 SimVRegister temp; 901 VectorFormat indexform = VectorFormatFillQ(vform); 902 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 903} 904 905LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst, 906 const LogicVRegister& src1, 907 const LogicVRegister& src2, int index) { 908 SimVRegister temp; 909 VectorFormat indexform = VectorFormatFillQ(vform); 910 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 911} 912 913uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) { 914 uint16_t result = 0; 915 uint16_t extended_op2 = op2; 916 for (int i = 0; i < 8; ++i) { 917 if ((op1 >> i) & 1) { 918 result = result ^ (extended_op2 << i); 919 } 920 } 921 return result; 922} 923 924LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst, 925 const LogicVRegister& src1, 926 const LogicVRegister& src2) { 927 dst.ClearForWrite(vform); 928 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 929 dst.SetUint(vform, i, 930 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i))); 931 } 932 return dst; 933} 934 935LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst, 936 const LogicVRegister& src1, 937 const LogicVRegister& src2) { 938 VectorFormat vform_src = VectorFormatHalfWidth(vform); 939 dst.ClearForWrite(vform); 940 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 941 dst.SetUint( 942 vform, i, 943 PolynomialMult(src1.Uint(vform_src, i), src2.Uint(vform_src, i))); 944 } 945 return dst; 946} 947 948LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst, 949 const LogicVRegister& src1, 950 const LogicVRegister& src2) { 951 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); 952 dst.ClearForWrite(vform); 953 int lane_count = LaneCountFromFormat(vform); 954 for (int i = 0; i < lane_count; i++) { 955 dst.SetUint(vform, i, 956 PolynomialMult(src1.Uint(vform_src, lane_count + i), 957 src2.Uint(vform_src, lane_count + i))); 958 } 959 return dst; 960} 961 962LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst, 963 const LogicVRegister& src1, 964 const LogicVRegister& src2) { 965 int lane_size = LaneSizeInBitsFromFormat(vform); 966 dst.ClearForWrite(vform); 967 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 968 // Test for unsigned saturation. 969 uint64_t ua = src1.UintLeftJustified(vform, i); 970 uint64_t ub = src2.UintLeftJustified(vform, i); 971 uint64_t ur = ua - ub; 972 if (ub > ua) { 973 dst.SetUnsignedSat(i, false); 974 } 975 976 // Test for signed saturation. 977 bool pos_a = (ua >> 63) == 0; 978 bool pos_b = (ub >> 63) == 0; 979 bool pos_r = (ur >> 63) == 0; 980 // If the signs of the operands are different, and the sign of the first 981 // operand doesn't match the result, there was an overflow. 982 if ((pos_a != pos_b) && (pos_a != pos_r)) { 983 dst.SetSignedSat(i, pos_a); 984 } 985 986 dst.SetInt(vform, i, ur >> (64 - lane_size)); 987 } 988 return dst; 989} 990 991LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst, 992 const LogicVRegister& src1, 993 const LogicVRegister& src2) { 994 dst.ClearForWrite(vform); 995 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 996 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); 997 } 998 return dst; 999} 1000 1001LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst, 1002 const LogicVRegister& src1, 1003 const LogicVRegister& src2) { 1004 dst.ClearForWrite(vform); 1005 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1006 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); 1007 } 1008 return dst; 1009} 1010 1011LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst, 1012 const LogicVRegister& src1, 1013 const LogicVRegister& src2) { 1014 dst.ClearForWrite(vform); 1015 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1016 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); 1017 } 1018 return dst; 1019} 1020 1021LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst, 1022 const LogicVRegister& src1, 1023 const LogicVRegister& src2) { 1024 dst.ClearForWrite(vform); 1025 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1026 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); 1027 } 1028 return dst; 1029} 1030 1031LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst, 1032 const LogicVRegister& src1, 1033 const LogicVRegister& src2) { 1034 dst.ClearForWrite(vform); 1035 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1036 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); 1037 } 1038 return dst; 1039} 1040 1041LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst, 1042 const LogicVRegister& src, uint64_t imm) { 1043 uint64_t result[16]; 1044 int laneCount = LaneCountFromFormat(vform); 1045 for (int i = 0; i < laneCount; ++i) { 1046 result[i] = src.Uint(vform, i) & ~imm; 1047 } 1048 dst.SetUintArray(vform, result); 1049 return dst; 1050} 1051 1052LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst, 1053 const LogicVRegister& src1, 1054 const LogicVRegister& src2) { 1055 dst.ClearForWrite(vform); 1056 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1057 uint64_t operand1 = dst.Uint(vform, i); 1058 uint64_t operand2 = ~src2.Uint(vform, i); 1059 uint64_t operand3 = src1.Uint(vform, i); 1060 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1061 dst.SetUint(vform, i, result); 1062 } 1063 return dst; 1064} 1065 1066LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst, 1067 const LogicVRegister& src1, 1068 const LogicVRegister& src2) { 1069 dst.ClearForWrite(vform); 1070 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1071 uint64_t operand1 = dst.Uint(vform, i); 1072 uint64_t operand2 = src2.Uint(vform, i); 1073 uint64_t operand3 = src1.Uint(vform, i); 1074 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1075 dst.SetUint(vform, i, result); 1076 } 1077 return dst; 1078} 1079 1080LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst, 1081 const LogicVRegister& src1, 1082 const LogicVRegister& src2) { 1083 dst.ClearForWrite(vform); 1084 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1085 uint64_t operand1 = src2.Uint(vform, i); 1086 uint64_t operand2 = dst.Uint(vform, i); 1087 uint64_t operand3 = src1.Uint(vform, i); 1088 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1089 dst.SetUint(vform, i, result); 1090 } 1091 return dst; 1092} 1093 1094LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst, 1095 const LogicVRegister& src1, 1096 const LogicVRegister& src2, bool max) { 1097 dst.ClearForWrite(vform); 1098 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1099 int64_t src1_val = src1.Int(vform, i); 1100 int64_t src2_val = src2.Int(vform, i); 1101 int64_t dst_val; 1102 if (max) { 1103 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1104 } else { 1105 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1106 } 1107 dst.SetInt(vform, i, dst_val); 1108 } 1109 return dst; 1110} 1111 1112LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst, 1113 const LogicVRegister& src1, 1114 const LogicVRegister& src2) { 1115 return SMinMax(vform, dst, src1, src2, true); 1116} 1117 1118LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst, 1119 const LogicVRegister& src1, 1120 const LogicVRegister& src2) { 1121 return SMinMax(vform, dst, src1, src2, false); 1122} 1123 1124LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst, 1125 const LogicVRegister& src1, 1126 const LogicVRegister& src2, bool max) { 1127 int lanes = LaneCountFromFormat(vform); 1128 int64_t result[kMaxLanesPerVector]; 1129 const LogicVRegister* src = &src1; 1130 for (int j = 0; j < 2; j++) { 1131 for (int i = 0; i < lanes; i += 2) { 1132 int64_t first_val = src->Int(vform, i); 1133 int64_t second_val = src->Int(vform, i + 1); 1134 int64_t dst_val; 1135 if (max) { 1136 dst_val = (first_val > second_val) ? first_val : second_val; 1137 } else { 1138 dst_val = (first_val < second_val) ? first_val : second_val; 1139 } 1140 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector); 1141 result[(i >> 1) + (j * lanes / 2)] = dst_val; 1142 } 1143 src = &src2; 1144 } 1145 dst.SetIntArray(vform, result); 1146 return dst; 1147} 1148 1149LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst, 1150 const LogicVRegister& src1, 1151 const LogicVRegister& src2) { 1152 return SMinMaxP(vform, dst, src1, src2, true); 1153} 1154 1155LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst, 1156 const LogicVRegister& src1, 1157 const LogicVRegister& src2) { 1158 return SMinMaxP(vform, dst, src1, src2, false); 1159} 1160 1161LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst, 1162 const LogicVRegister& src) { 1163 DCHECK_EQ(vform, kFormatD); 1164 1165 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1); 1166 dst.ClearForWrite(vform); 1167 dst.SetUint(vform, 0, dst_val); 1168 return dst; 1169} 1170 1171LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst, 1172 const LogicVRegister& src) { 1173 VectorFormat vform_dst = 1174 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); 1175 1176 int64_t dst_val = 0; 1177 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1178 dst_val += src.Int(vform, i); 1179 } 1180 1181 dst.ClearForWrite(vform_dst); 1182 dst.SetInt(vform_dst, 0, dst_val); 1183 return dst; 1184} 1185 1186LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst, 1187 const LogicVRegister& src) { 1188 VectorFormat vform_dst = 1189 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1190 1191 int64_t dst_val = 0; 1192 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1193 dst_val += src.Int(vform, i); 1194 } 1195 1196 dst.ClearForWrite(vform_dst); 1197 dst.SetInt(vform_dst, 0, dst_val); 1198 return dst; 1199} 1200 1201LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst, 1202 const LogicVRegister& src) { 1203 VectorFormat vform_dst = 1204 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1205 1206 uint64_t dst_val = 0; 1207 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1208 dst_val += src.Uint(vform, i); 1209 } 1210 1211 dst.ClearForWrite(vform_dst); 1212 dst.SetUint(vform_dst, 0, dst_val); 1213 return dst; 1214} 1215 1216LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst, 1217 const LogicVRegister& src, bool max) { 1218 int64_t dst_val = max ? INT64_MIN : INT64_MAX; 1219 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1220 int64_t src_val = src.Int(vform, i); 1221 if (max) { 1222 dst_val = (src_val > dst_val) ? src_val : dst_val; 1223 } else { 1224 dst_val = (src_val < dst_val) ? src_val : dst_val; 1225 } 1226 } 1227 dst.ClearForWrite(ScalarFormatFromFormat(vform)); 1228 dst.SetInt(vform, 0, dst_val); 1229 return dst; 1230} 1231 1232LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst, 1233 const LogicVRegister& src) { 1234 SMinMaxV(vform, dst, src, true); 1235 return dst; 1236} 1237 1238LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst, 1239 const LogicVRegister& src) { 1240 SMinMaxV(vform, dst, src, false); 1241 return dst; 1242} 1243 1244LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst, 1245 const LogicVRegister& src1, 1246 const LogicVRegister& src2, bool max) { 1247 dst.ClearForWrite(vform); 1248 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1249 uint64_t src1_val = src1.Uint(vform, i); 1250 uint64_t src2_val = src2.Uint(vform, i); 1251 uint64_t dst_val; 1252 if (max) { 1253 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1254 } else { 1255 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1256 } 1257 dst.SetUint(vform, i, dst_val); 1258 } 1259 return dst; 1260} 1261 1262LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst, 1263 const LogicVRegister& src1, 1264 const LogicVRegister& src2) { 1265 return UMinMax(vform, dst, src1, src2, true); 1266} 1267 1268LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst, 1269 const LogicVRegister& src1, 1270 const LogicVRegister& src2) { 1271 return UMinMax(vform, dst, src1, src2, false); 1272} 1273 1274LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst, 1275 const LogicVRegister& src1, 1276 const LogicVRegister& src2, bool max) { 1277 int lanes = LaneCountFromFormat(vform); 1278 uint64_t result[kMaxLanesPerVector]; 1279 const LogicVRegister* src = &src1; 1280 for (int j = 0; j < 2; j++) { 1281 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { 1282 uint64_t first_val = src->Uint(vform, i); 1283 uint64_t second_val = src->Uint(vform, i + 1); 1284 uint64_t dst_val; 1285 if (max) { 1286 dst_val = (first_val > second_val) ? first_val : second_val; 1287 } else { 1288 dst_val = (first_val < second_val) ? first_val : second_val; 1289 } 1290 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector); 1291 result[(i >> 1) + (j * lanes / 2)] = dst_val; 1292 } 1293 src = &src2; 1294 } 1295 dst.SetUintArray(vform, result); 1296 return dst; 1297} 1298 1299LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst, 1300 const LogicVRegister& src1, 1301 const LogicVRegister& src2) { 1302 return UMinMaxP(vform, dst, src1, src2, true); 1303} 1304 1305LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst, 1306 const LogicVRegister& src1, 1307 const LogicVRegister& src2) { 1308 return UMinMaxP(vform, dst, src1, src2, false); 1309} 1310 1311LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst, 1312 const LogicVRegister& src, bool max) { 1313 uint64_t dst_val = max ? 0 : UINT64_MAX; 1314 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1315 uint64_t src_val = src.Uint(vform, i); 1316 if (max) { 1317 dst_val = (src_val > dst_val) ? src_val : dst_val; 1318 } else { 1319 dst_val = (src_val < dst_val) ? src_val : dst_val; 1320 } 1321 } 1322 dst.ClearForWrite(ScalarFormatFromFormat(vform)); 1323 dst.SetUint(vform, 0, dst_val); 1324 return dst; 1325} 1326 1327LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst, 1328 const LogicVRegister& src) { 1329 UMinMaxV(vform, dst, src, true); 1330 return dst; 1331} 1332 1333LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst, 1334 const LogicVRegister& src) { 1335 UMinMaxV(vform, dst, src, false); 1336 return dst; 1337} 1338 1339LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst, 1340 const LogicVRegister& src, int shift) { 1341 DCHECK_GE(shift, 0); 1342 SimVRegister temp; 1343 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1344 return ushl(vform, dst, src, shiftreg); 1345} 1346 1347LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst, 1348 const LogicVRegister& src, int shift) { 1349 DCHECK_GE(shift, 0); 1350 SimVRegister temp1, temp2; 1351 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1352 LogicVRegister extendedreg = sxtl(vform, temp2, src); 1353 return sshl(vform, dst, extendedreg, shiftreg); 1354} 1355 1356LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst, 1357 const LogicVRegister& src, int shift) { 1358 DCHECK_GE(shift, 0); 1359 SimVRegister temp1, temp2; 1360 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1361 LogicVRegister extendedreg = sxtl2(vform, temp2, src); 1362 return sshl(vform, dst, extendedreg, shiftreg); 1363} 1364 1365LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst, 1366 const LogicVRegister& src) { 1367 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1368 return sshll(vform, dst, src, shift); 1369} 1370 1371LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst, 1372 const LogicVRegister& src) { 1373 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1374 return sshll2(vform, dst, src, shift); 1375} 1376 1377LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst, 1378 const LogicVRegister& src, int shift) { 1379 DCHECK_GE(shift, 0); 1380 SimVRegister temp1, temp2; 1381 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1382 LogicVRegister extendedreg = uxtl(vform, temp2, src); 1383 return ushl(vform, dst, extendedreg, shiftreg); 1384} 1385 1386LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst, 1387 const LogicVRegister& src, int shift) { 1388 DCHECK_GE(shift, 0); 1389 SimVRegister temp1, temp2; 1390 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1391 LogicVRegister extendedreg = uxtl2(vform, temp2, src); 1392 return ushl(vform, dst, extendedreg, shiftreg); 1393} 1394 1395LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst, 1396 const LogicVRegister& src, int shift) { 1397 dst.ClearForWrite(vform); 1398 int laneCount = LaneCountFromFormat(vform); 1399 for (int i = 0; i < laneCount; i++) { 1400 uint64_t src_lane = src.Uint(vform, i); 1401 uint64_t dst_lane = dst.Uint(vform, i); 1402 uint64_t shifted = src_lane << shift; 1403 uint64_t mask = MaxUintFromFormat(vform) << shift; 1404 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1405 } 1406 return dst; 1407} 1408 1409LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst, 1410 const LogicVRegister& src, int shift) { 1411 DCHECK_GE(shift, 0); 1412 SimVRegister temp; 1413 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1414 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); 1415} 1416 1417LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst, 1418 const LogicVRegister& src, int shift) { 1419 DCHECK_GE(shift, 0); 1420 SimVRegister temp; 1421 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1422 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1423} 1424 1425LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst, 1426 const LogicVRegister& src, int shift) { 1427 DCHECK_GE(shift, 0); 1428 SimVRegister temp; 1429 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1430 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1431} 1432 1433LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst, 1434 const LogicVRegister& src, int shift) { 1435 dst.ClearForWrite(vform); 1436 int laneCount = LaneCountFromFormat(vform); 1437 DCHECK((shift > 0) && 1438 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); 1439 for (int i = 0; i < laneCount; i++) { 1440 uint64_t src_lane = src.Uint(vform, i); 1441 uint64_t dst_lane = dst.Uint(vform, i); 1442 uint64_t shifted; 1443 uint64_t mask; 1444 if (shift == 64) { 1445 shifted = 0; 1446 mask = 0; 1447 } else { 1448 shifted = src_lane >> shift; 1449 mask = MaxUintFromFormat(vform) >> shift; 1450 } 1451 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1452 } 1453 return dst; 1454} 1455 1456LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst, 1457 const LogicVRegister& src, int shift) { 1458 DCHECK_GE(shift, 0); 1459 SimVRegister temp; 1460 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1461 return ushl(vform, dst, src, shiftreg); 1462} 1463 1464LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst, 1465 const LogicVRegister& src, int shift) { 1466 DCHECK_GE(shift, 0); 1467 SimVRegister temp; 1468 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1469 return sshl(vform, dst, src, shiftreg); 1470} 1471 1472LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst, 1473 const LogicVRegister& src, int shift) { 1474 SimVRegister temp; 1475 LogicVRegister shifted_reg = sshr(vform, temp, src, shift); 1476 return add(vform, dst, dst, shifted_reg); 1477} 1478 1479LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst, 1480 const LogicVRegister& src, int shift) { 1481 SimVRegister temp; 1482 LogicVRegister shifted_reg = ushr(vform, temp, src, shift); 1483 return add(vform, dst, dst, shifted_reg); 1484} 1485 1486LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst, 1487 const LogicVRegister& src, int shift) { 1488 SimVRegister temp; 1489 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); 1490 return add(vform, dst, dst, shifted_reg); 1491} 1492 1493LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst, 1494 const LogicVRegister& src, int shift) { 1495 SimVRegister temp; 1496 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); 1497 return add(vform, dst, dst, shifted_reg); 1498} 1499 1500LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst, 1501 const LogicVRegister& src) { 1502 uint64_t result[16]; 1503 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1504 int laneCount = LaneCountFromFormat(vform); 1505 for (int i = 0; i < laneCount; i++) { 1506 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); 1507 } 1508 1509 dst.SetUintArray(vform, result); 1510 return dst; 1511} 1512 1513LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst, 1514 const LogicVRegister& src) { 1515 uint64_t result[16]; 1516 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1517 int laneCount = LaneCountFromFormat(vform); 1518 for (int i = 0; i < laneCount; i++) { 1519 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); 1520 } 1521 1522 dst.SetUintArray(vform, result); 1523 return dst; 1524} 1525 1526LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst, 1527 const LogicVRegister& src) { 1528 uint64_t result[16]; 1529 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1530 int laneCount = LaneCountFromFormat(vform); 1531 for (int i = 0; i < laneCount; i++) { 1532 uint64_t value = src.Uint(vform, i); 1533 result[i] = 0; 1534 for (int j = 0; j < laneSizeInBits; j++) { 1535 result[i] += (value & 1); 1536 value >>= 1; 1537 } 1538 } 1539 1540 dst.SetUintArray(vform, result); 1541 return dst; 1542} 1543 1544LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst, 1545 const LogicVRegister& src1, 1546 const LogicVRegister& src2) { 1547 dst.ClearForWrite(vform); 1548 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1549 int8_t shift_val = src2.Int(vform, i); 1550 int64_t lj_src_val = src1.IntLeftJustified(vform, i); 1551 1552 // Set signed saturation state. 1553 if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) && 1554 (lj_src_val != 0)) { 1555 dst.SetSignedSat(i, lj_src_val >= 0); 1556 } 1557 1558 // Set unsigned saturation state. 1559 if (lj_src_val < 0) { 1560 dst.SetUnsignedSat(i, false); 1561 } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && 1562 (lj_src_val != 0)) { 1563 dst.SetUnsignedSat(i, true); 1564 } 1565 1566 int64_t src_val = src1.Int(vform, i); 1567 bool src_is_negative = src_val < 0; 1568 if (shift_val > 63) { 1569 dst.SetInt(vform, i, 0); 1570 } else if (shift_val < -63) { 1571 dst.SetRounding(i, src_is_negative); 1572 dst.SetInt(vform, i, src_is_negative ? -1 : 0); 1573 } else { 1574 // Use unsigned types for shifts, as behaviour is undefined for signed 1575 // lhs. 1576 uint64_t usrc_val = static_cast<uint64_t>(src_val); 1577 1578 if (shift_val < 0) { 1579 // Convert to right shift. 1580 shift_val = -shift_val; 1581 1582 // Set rounding state by testing most-significant bit shifted out. 1583 // Rounding only needed on right shifts. 1584 if (((usrc_val >> (shift_val - 1)) & 1) == 1) { 1585 dst.SetRounding(i, true); 1586 } 1587 1588 usrc_val >>= shift_val; 1589 1590 if (src_is_negative) { 1591 // Simulate sign-extension. 1592 usrc_val |= (~UINT64_C(0) << (64 - shift_val)); 1593 } 1594 } else { 1595 usrc_val <<= shift_val; 1596 } 1597 dst.SetUint(vform, i, usrc_val); 1598 } 1599 } 1600 return dst; 1601} 1602 1603LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst, 1604 const LogicVRegister& src1, 1605 const LogicVRegister& src2) { 1606 dst.ClearForWrite(vform); 1607 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1608 int8_t shift_val = src2.Int(vform, i); 1609 uint64_t lj_src_val = src1.UintLeftJustified(vform, i); 1610 1611 // Set saturation state. 1612 if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) { 1613 dst.SetUnsignedSat(i, true); 1614 } 1615 1616 uint64_t src_val = src1.Uint(vform, i); 1617 if ((shift_val > 63) || (shift_val < -64)) { 1618 dst.SetUint(vform, i, 0); 1619 } else { 1620 if (shift_val < 0) { 1621 // Set rounding state. Rounding only needed on right shifts. 1622 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 1623 dst.SetRounding(i, true); 1624 } 1625 1626 if (shift_val == -64) { 1627 src_val = 0; 1628 } else { 1629 src_val >>= -shift_val; 1630 } 1631 } else { 1632 src_val <<= shift_val; 1633 } 1634 dst.SetUint(vform, i, src_val); 1635 } 1636 } 1637 return dst; 1638} 1639 1640LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst, 1641 const LogicVRegister& src) { 1642 dst.ClearForWrite(vform); 1643 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1644 // Test for signed saturation. 1645 int64_t sa = src.Int(vform, i); 1646 if (sa == MinIntFromFormat(vform)) { 1647 dst.SetSignedSat(i, true); 1648 } 1649 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); 1650 } 1651 return dst; 1652} 1653 1654LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst, 1655 const LogicVRegister& src) { 1656 dst.ClearForWrite(vform); 1657 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1658 int64_t sa = dst.IntLeftJustified(vform, i); 1659 uint64_t ub = src.UintLeftJustified(vform, i); 1660 uint64_t ur = sa + ub; 1661 1662 int64_t sr = bit_cast<int64_t>(ur); 1663 if (sr < sa) { // Test for signed positive saturation. 1664 dst.SetInt(vform, i, MaxIntFromFormat(vform)); 1665 } else { 1666 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i)); 1667 } 1668 } 1669 return dst; 1670} 1671 1672LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst, 1673 const LogicVRegister& src) { 1674 dst.ClearForWrite(vform); 1675 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1676 uint64_t ua = dst.UintLeftJustified(vform, i); 1677 int64_t sb = src.IntLeftJustified(vform, i); 1678 uint64_t ur = ua + sb; 1679 1680 if ((sb > 0) && (ur <= ua)) { 1681 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. 1682 } else if ((sb < 0) && (ur >= ua)) { 1683 dst.SetUint(vform, i, 0); // Negative saturation. 1684 } else { 1685 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i)); 1686 } 1687 } 1688 return dst; 1689} 1690 1691LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst, 1692 const LogicVRegister& src) { 1693 dst.ClearForWrite(vform); 1694 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1695 // Test for signed saturation. 1696 int64_t sa = src.Int(vform, i); 1697 if (sa == MinIntFromFormat(vform)) { 1698 dst.SetSignedSat(i, true); 1699 } 1700 if (sa < 0) { 1701 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); 1702 } else { 1703 dst.SetInt(vform, i, sa); 1704 } 1705 } 1706 return dst; 1707} 1708 1709LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform, 1710 LogicVRegister dst, bool dstIsSigned, 1711 const LogicVRegister& src, 1712 bool srcIsSigned) { 1713 bool upperhalf = false; 1714 VectorFormat srcform = kFormatUndefined; 1715 int64_t ssrc[8]; 1716 uint64_t usrc[8]; 1717 1718 switch (dstform) { 1719 case kFormat8B: 1720 upperhalf = false; 1721 srcform = kFormat8H; 1722 break; 1723 case kFormat16B: 1724 upperhalf = true; 1725 srcform = kFormat8H; 1726 break; 1727 case kFormat4H: 1728 upperhalf = false; 1729 srcform = kFormat4S; 1730 break; 1731 case kFormat8H: 1732 upperhalf = true; 1733 srcform = kFormat4S; 1734 break; 1735 case kFormat2S: 1736 upperhalf = false; 1737 srcform = kFormat2D; 1738 break; 1739 case kFormat4S: 1740 upperhalf = true; 1741 srcform = kFormat2D; 1742 break; 1743 case kFormatB: 1744 upperhalf = false; 1745 srcform = kFormatH; 1746 break; 1747 case kFormatH: 1748 upperhalf = false; 1749 srcform = kFormatS; 1750 break; 1751 case kFormatS: 1752 upperhalf = false; 1753 srcform = kFormatD; 1754 break; 1755 default: 1756 UNIMPLEMENTED(); 1757 } 1758 1759 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 1760 ssrc[i] = src.Int(srcform, i); 1761 usrc[i] = src.Uint(srcform, i); 1762 } 1763 1764 int offset; 1765 if (upperhalf) { 1766 offset = LaneCountFromFormat(dstform) / 2; 1767 } else { 1768 offset = 0; 1769 dst.ClearForWrite(dstform); 1770 } 1771 1772 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 1773 // Test for signed saturation 1774 if (ssrc[i] > MaxIntFromFormat(dstform)) { 1775 dst.SetSignedSat(offset + i, true); 1776 } else if (ssrc[i] < MinIntFromFormat(dstform)) { 1777 dst.SetSignedSat(offset + i, false); 1778 } 1779 1780 // Test for unsigned saturation 1781 if (srcIsSigned) { 1782 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { 1783 dst.SetUnsignedSat(offset + i, true); 1784 } else if (ssrc[i] < 0) { 1785 dst.SetUnsignedSat(offset + i, false); 1786 } 1787 } else { 1788 if (usrc[i] > MaxUintFromFormat(dstform)) { 1789 dst.SetUnsignedSat(offset + i, true); 1790 } 1791 } 1792 1793 int64_t result; 1794 if (srcIsSigned) { 1795 result = ssrc[i] & MaxUintFromFormat(dstform); 1796 } else { 1797 result = usrc[i] & MaxUintFromFormat(dstform); 1798 } 1799 1800 if (dstIsSigned) { 1801 dst.SetInt(dstform, offset + i, result); 1802 } else { 1803 dst.SetUint(dstform, offset + i, result); 1804 } 1805 } 1806 return dst; 1807} 1808 1809LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst, 1810 const LogicVRegister& src) { 1811 return ExtractNarrow(vform, dst, true, src, true); 1812} 1813 1814LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst, 1815 const LogicVRegister& src) { 1816 return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform); 1817} 1818 1819LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst, 1820 const LogicVRegister& src) { 1821 return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform); 1822} 1823 1824LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst, 1825 const LogicVRegister& src) { 1826 return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform); 1827} 1828 1829LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst, 1830 const LogicVRegister& src1, 1831 const LogicVRegister& src2, bool issigned) { 1832 dst.ClearForWrite(vform); 1833 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1834 if (issigned) { 1835 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); 1836 sr = sr > 0 ? sr : -sr; 1837 dst.SetInt(vform, i, sr); 1838 } else { 1839 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); 1840 sr = sr > 0 ? sr : -sr; 1841 dst.SetUint(vform, i, sr); 1842 } 1843 } 1844 return dst; 1845} 1846 1847LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst, 1848 const LogicVRegister& src1, 1849 const LogicVRegister& src2) { 1850 SimVRegister temp; 1851 dst.ClearForWrite(vform); 1852 AbsDiff(vform, temp, src1, src2, true); 1853 add(vform, dst, dst, temp); 1854 return dst; 1855} 1856 1857LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst, 1858 const LogicVRegister& src1, 1859 const LogicVRegister& src2) { 1860 SimVRegister temp; 1861 dst.ClearForWrite(vform); 1862 AbsDiff(vform, temp, src1, src2, false); 1863 add(vform, dst, dst, temp); 1864 return dst; 1865} 1866 1867LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst, 1868 const LogicVRegister& src) { 1869 dst.ClearForWrite(vform); 1870 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1871 dst.SetUint(vform, i, ~src.Uint(vform, i)); 1872 } 1873 return dst; 1874} 1875 1876LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst, 1877 const LogicVRegister& src) { 1878 uint64_t result[16]; 1879 int laneCount = LaneCountFromFormat(vform); 1880 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1881 uint64_t reversed_value; 1882 uint64_t value; 1883 for (int i = 0; i < laneCount; i++) { 1884 value = src.Uint(vform, i); 1885 reversed_value = 0; 1886 for (int j = 0; j < laneSizeInBits; j++) { 1887 reversed_value = (reversed_value << 1) | (value & 1); 1888 value >>= 1; 1889 } 1890 result[i] = reversed_value; 1891 } 1892 1893 dst.SetUintArray(vform, result); 1894 return dst; 1895} 1896 1897LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst, 1898 const LogicVRegister& src, int revSize) { 1899 uint64_t result[16]; 1900 int laneCount = LaneCountFromFormat(vform); 1901 int laneSize = LaneSizeInBytesFromFormat(vform); 1902 int lanesPerLoop = revSize / laneSize; 1903 for (int i = 0; i < laneCount; i += lanesPerLoop) { 1904 for (int j = 0; j < lanesPerLoop; j++) { 1905 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); 1906 } 1907 } 1908 dst.SetUintArray(vform, result); 1909 return dst; 1910} 1911 1912LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst, 1913 const LogicVRegister& src) { 1914 return rev(vform, dst, src, 2); 1915} 1916 1917LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst, 1918 const LogicVRegister& src) { 1919 return rev(vform, dst, src, 4); 1920} 1921 1922LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst, 1923 const LogicVRegister& src) { 1924 return rev(vform, dst, src, 8); 1925} 1926 1927LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst, 1928 const LogicVRegister& src, bool is_signed, 1929 bool do_accumulate) { 1930 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); 1931 DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U); 1932 DCHECK_LE(LaneCountFromFormat(vform), 8); 1933 1934 uint64_t result[8]; 1935 int lane_count = LaneCountFromFormat(vform); 1936 for (int i = 0; i < lane_count; i++) { 1937 if (is_signed) { 1938 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) + 1939 src.Int(vformsrc, 2 * i + 1)); 1940 } else { 1941 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); 1942 } 1943 } 1944 1945 dst.ClearForWrite(vform); 1946 for (int i = 0; i < lane_count; ++i) { 1947 if (do_accumulate) { 1948 result[i] += dst.Uint(vform, i); 1949 } 1950 dst.SetUint(vform, i, result[i]); 1951 } 1952 1953 return dst; 1954} 1955 1956LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst, 1957 const LogicVRegister& src) { 1958 return addlp(vform, dst, src, true, false); 1959} 1960 1961LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst, 1962 const LogicVRegister& src) { 1963 return addlp(vform, dst, src, false, false); 1964} 1965 1966LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst, 1967 const LogicVRegister& src) { 1968 return addlp(vform, dst, src, true, true); 1969} 1970 1971LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst, 1972 const LogicVRegister& src) { 1973 return addlp(vform, dst, src, false, true); 1974} 1975 1976LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst, 1977 const LogicVRegister& src1, 1978 const LogicVRegister& src2, int index) { 1979 uint8_t result[16]; 1980 int laneCount = LaneCountFromFormat(vform); 1981 for (int i = 0; i < laneCount - index; ++i) { 1982 result[i] = src1.Uint(vform, i + index); 1983 } 1984 for (int i = 0; i < index; ++i) { 1985 result[laneCount - index + i] = src2.Uint(vform, i); 1986 } 1987 dst.ClearForWrite(vform); 1988 for (int i = 0; i < laneCount; ++i) { 1989 dst.SetUint(vform, i, result[i]); 1990 } 1991 return dst; 1992} 1993 1994LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst, 1995 const LogicVRegister& src, 1996 int src_index) { 1997 int laneCount = LaneCountFromFormat(vform); 1998 uint64_t value = src.Uint(vform, src_index); 1999 dst.ClearForWrite(vform); 2000 for (int i = 0; i < laneCount; ++i) { 2001 dst.SetUint(vform, i, value); 2002 } 2003 return dst; 2004} 2005 2006LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst, 2007 uint64_t imm) { 2008 int laneCount = LaneCountFromFormat(vform); 2009 uint64_t value = imm & MaxUintFromFormat(vform); 2010 dst.ClearForWrite(vform); 2011 for (int i = 0; i < laneCount; ++i) { 2012 dst.SetUint(vform, i, value); 2013 } 2014 return dst; 2015} 2016 2017LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst, 2018 int dst_index, const LogicVRegister& src, 2019 int src_index) { 2020 dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); 2021 return dst; 2022} 2023 2024LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst, 2025 int dst_index, uint64_t imm) { 2026 uint64_t value = imm & MaxUintFromFormat(vform); 2027 dst.SetUint(vform, dst_index, value); 2028 return dst; 2029} 2030 2031LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst, 2032 uint64_t imm) { 2033 int laneCount = LaneCountFromFormat(vform); 2034 dst.ClearForWrite(vform); 2035 for (int i = 0; i < laneCount; ++i) { 2036 dst.SetUint(vform, i, imm); 2037 } 2038 return dst; 2039} 2040 2041LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst, 2042 uint64_t imm) { 2043 int laneCount = LaneCountFromFormat(vform); 2044 dst.ClearForWrite(vform); 2045 for (int i = 0; i < laneCount; ++i) { 2046 dst.SetUint(vform, i, ~imm); 2047 } 2048 return dst; 2049} 2050 2051LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst, 2052 const LogicVRegister& src, uint64_t imm) { 2053 uint64_t result[16]; 2054 int laneCount = LaneCountFromFormat(vform); 2055 for (int i = 0; i < laneCount; ++i) { 2056 result[i] = src.Uint(vform, i) | imm; 2057 } 2058 dst.SetUintArray(vform, result); 2059 return dst; 2060} 2061 2062LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst, 2063 const LogicVRegister& src) { 2064 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2065 2066 dst.ClearForWrite(vform); 2067 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2068 dst.SetUint(vform, i, src.Uint(vform_half, i)); 2069 } 2070 return dst; 2071} 2072 2073LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst, 2074 const LogicVRegister& src) { 2075 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2076 2077 dst.ClearForWrite(vform); 2078 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2079 dst.SetInt(vform, i, src.Int(vform_half, i)); 2080 } 2081 return dst; 2082} 2083 2084LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst, 2085 const LogicVRegister& src) { 2086 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2087 int lane_count = LaneCountFromFormat(vform); 2088 2089 dst.ClearForWrite(vform); 2090 for (int i = 0; i < lane_count; i++) { 2091 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i)); 2092 } 2093 return dst; 2094} 2095 2096LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst, 2097 const LogicVRegister& src) { 2098 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2099 int lane_count = LaneCountFromFormat(vform); 2100 2101 dst.ClearForWrite(vform); 2102 for (int i = 0; i < lane_count; i++) { 2103 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i)); 2104 } 2105 return dst; 2106} 2107 2108LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst, 2109 const LogicVRegister& src, int shift) { 2110 SimVRegister temp; 2111 VectorFormat vform_src = VectorFormatDoubleWidth(vform); 2112 VectorFormat vform_dst = vform; 2113 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); 2114 return ExtractNarrow(vform_dst, dst, false, shifted_src, false); 2115} 2116 2117LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst, 2118 const LogicVRegister& src, int shift) { 2119 SimVRegister temp; 2120 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2121 VectorFormat vformdst = vform; 2122 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); 2123 return ExtractNarrow(vformdst, dst, false, shifted_src, false); 2124} 2125 2126LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst, 2127 const LogicVRegister& src, int shift) { 2128 SimVRegister temp; 2129 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2130 VectorFormat vformdst = vform; 2131 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2132 return ExtractNarrow(vformdst, dst, false, shifted_src, false); 2133} 2134 2135LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst, 2136 const LogicVRegister& src, int shift) { 2137 SimVRegister temp; 2138 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2139 VectorFormat vformdst = vform; 2140 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2141 return ExtractNarrow(vformdst, dst, false, shifted_src, false); 2142} 2143 2144LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst, 2145 const LogicVRegister& ind, 2146 bool zero_out_of_bounds, 2147 const LogicVRegister* tab1, 2148 const LogicVRegister* tab2, 2149 const LogicVRegister* tab3, 2150 const LogicVRegister* tab4) { 2151 DCHECK_NOT_NULL(tab1); 2152 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4}; 2153 uint64_t result[kMaxLanesPerVector]; 2154 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2155 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i); 2156 } 2157 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2158 uint64_t j = ind.Uint(vform, i); 2159 int tab_idx = static_cast<int>(j >> 4); 2160 int j_idx = static_cast<int>(j & 15); 2161 if ((tab_idx < 4) && (tab[tab_idx] != nullptr)) { 2162 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx); 2163 } 2164 } 2165 dst.SetUintArray(vform, result); 2166 return dst; 2167} 2168 2169LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, 2170 const LogicVRegister& tab, 2171 const LogicVRegister& ind) { 2172 return Table(vform, dst, ind, true, &tab); 2173} 2174 2175LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, 2176 const LogicVRegister& tab, 2177 const LogicVRegister& tab2, 2178 const LogicVRegister& ind) { 2179 return Table(vform, dst, ind, true, &tab, &tab2); 2180} 2181 2182LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, 2183 const LogicVRegister& tab, 2184 const LogicVRegister& tab2, 2185 const LogicVRegister& tab3, 2186 const LogicVRegister& ind) { 2187 return Table(vform, dst, ind, true, &tab, &tab2, &tab3); 2188} 2189 2190LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, 2191 const LogicVRegister& tab, 2192 const LogicVRegister& tab2, 2193 const LogicVRegister& tab3, 2194 const LogicVRegister& tab4, 2195 const LogicVRegister& ind) { 2196 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4); 2197} 2198 2199LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, 2200 const LogicVRegister& tab, 2201 const LogicVRegister& ind) { 2202 return Table(vform, dst, ind, false, &tab); 2203} 2204 2205LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, 2206 const LogicVRegister& tab, 2207 const LogicVRegister& tab2, 2208 const LogicVRegister& ind) { 2209 return Table(vform, dst, ind, false, &tab, &tab2); 2210} 2211 2212LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, 2213 const LogicVRegister& tab, 2214 const LogicVRegister& tab2, 2215 const LogicVRegister& tab3, 2216 const LogicVRegister& ind) { 2217 return Table(vform, dst, ind, false, &tab, &tab2, &tab3); 2218} 2219 2220LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, 2221 const LogicVRegister& tab, 2222 const LogicVRegister& tab2, 2223 const LogicVRegister& tab3, 2224 const LogicVRegister& tab4, 2225 const LogicVRegister& ind) { 2226 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4); 2227} 2228 2229LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst, 2230 const LogicVRegister& src, int shift) { 2231 return shrn(vform, dst, src, shift).UnsignedSaturate(vform); 2232} 2233 2234LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst, 2235 const LogicVRegister& src, int shift) { 2236 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2237} 2238 2239LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst, 2240 const LogicVRegister& src, int shift) { 2241 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); 2242} 2243 2244LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst, 2245 const LogicVRegister& src, int shift) { 2246 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2247} 2248 2249LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst, 2250 const LogicVRegister& src, int shift) { 2251 SimVRegister temp; 2252 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2253 VectorFormat vformdst = vform; 2254 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2255 return sqxtn(vformdst, dst, shifted_src); 2256} 2257 2258LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst, 2259 const LogicVRegister& src, int shift) { 2260 SimVRegister temp; 2261 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2262 VectorFormat vformdst = vform; 2263 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2264 return sqxtn(vformdst, dst, shifted_src); 2265} 2266 2267LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst, 2268 const LogicVRegister& src, int shift) { 2269 SimVRegister temp; 2270 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2271 VectorFormat vformdst = vform; 2272 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2273 return sqxtn(vformdst, dst, shifted_src); 2274} 2275 2276LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst, 2277 const LogicVRegister& src, int shift) { 2278 SimVRegister temp; 2279 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2280 VectorFormat vformdst = vform; 2281 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2282 return sqxtn(vformdst, dst, shifted_src); 2283} 2284 2285LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst, 2286 const LogicVRegister& src, int shift) { 2287 SimVRegister temp; 2288 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2289 VectorFormat vformdst = vform; 2290 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2291 return sqxtun(vformdst, dst, shifted_src); 2292} 2293 2294LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst, 2295 const LogicVRegister& src, int shift) { 2296 SimVRegister temp; 2297 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2298 VectorFormat vformdst = vform; 2299 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2300 return sqxtun(vformdst, dst, shifted_src); 2301} 2302 2303LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst, 2304 const LogicVRegister& src, int shift) { 2305 SimVRegister temp; 2306 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2307 VectorFormat vformdst = vform; 2308 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2309 return sqxtun(vformdst, dst, shifted_src); 2310} 2311 2312LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst, 2313 const LogicVRegister& src, int shift) { 2314 SimVRegister temp; 2315 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2316 VectorFormat vformdst = vform; 2317 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2318 return sqxtun(vformdst, dst, shifted_src); 2319} 2320 2321LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst, 2322 const LogicVRegister& src1, 2323 const LogicVRegister& src2) { 2324 SimVRegister temp1, temp2; 2325 uxtl(vform, temp1, src1); 2326 uxtl(vform, temp2, src2); 2327 add(vform, dst, temp1, temp2); 2328 return dst; 2329} 2330 2331LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst, 2332 const LogicVRegister& src1, 2333 const LogicVRegister& src2) { 2334 SimVRegister temp1, temp2; 2335 uxtl2(vform, temp1, src1); 2336 uxtl2(vform, temp2, src2); 2337 add(vform, dst, temp1, temp2); 2338 return dst; 2339} 2340 2341LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst, 2342 const LogicVRegister& src1, 2343 const LogicVRegister& src2) { 2344 SimVRegister temp; 2345 uxtl(vform, temp, src2); 2346 add(vform, dst, src1, temp); 2347 return dst; 2348} 2349 2350LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst, 2351 const LogicVRegister& src1, 2352 const LogicVRegister& src2) { 2353 SimVRegister temp; 2354 uxtl2(vform, temp, src2); 2355 add(vform, dst, src1, temp); 2356 return dst; 2357} 2358 2359LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst, 2360 const LogicVRegister& src1, 2361 const LogicVRegister& src2) { 2362 SimVRegister temp1, temp2; 2363 sxtl(vform, temp1, src1); 2364 sxtl(vform, temp2, src2); 2365 add(vform, dst, temp1, temp2); 2366 return dst; 2367} 2368 2369LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst, 2370 const LogicVRegister& src1, 2371 const LogicVRegister& src2) { 2372 SimVRegister temp1, temp2; 2373 sxtl2(vform, temp1, src1); 2374 sxtl2(vform, temp2, src2); 2375 add(vform, dst, temp1, temp2); 2376 return dst; 2377} 2378 2379LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst, 2380 const LogicVRegister& src1, 2381 const LogicVRegister& src2) { 2382 SimVRegister temp; 2383 sxtl(vform, temp, src2); 2384 add(vform, dst, src1, temp); 2385 return dst; 2386} 2387 2388LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst, 2389 const LogicVRegister& src1, 2390 const LogicVRegister& src2) { 2391 SimVRegister temp; 2392 sxtl2(vform, temp, src2); 2393 add(vform, dst, src1, temp); 2394 return dst; 2395} 2396 2397LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst, 2398 const LogicVRegister& src1, 2399 const LogicVRegister& src2) { 2400 SimVRegister temp1, temp2; 2401 uxtl(vform, temp1, src1); 2402 uxtl(vform, temp2, src2); 2403 sub(vform, dst, temp1, temp2); 2404 return dst; 2405} 2406 2407LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst, 2408 const LogicVRegister& src1, 2409 const LogicVRegister& src2) { 2410 SimVRegister temp1, temp2; 2411 uxtl2(vform, temp1, src1); 2412 uxtl2(vform, temp2, src2); 2413 sub(vform, dst, temp1, temp2); 2414 return dst; 2415} 2416 2417LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst, 2418 const LogicVRegister& src1, 2419 const LogicVRegister& src2) { 2420 SimVRegister temp; 2421 uxtl(vform, temp, src2); 2422 sub(vform, dst, src1, temp); 2423 return dst; 2424} 2425 2426LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst, 2427 const LogicVRegister& src1, 2428 const LogicVRegister& src2) { 2429 SimVRegister temp; 2430 uxtl2(vform, temp, src2); 2431 sub(vform, dst, src1, temp); 2432 return dst; 2433} 2434 2435LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst, 2436 const LogicVRegister& src1, 2437 const LogicVRegister& src2) { 2438 SimVRegister temp1, temp2; 2439 sxtl(vform, temp1, src1); 2440 sxtl(vform, temp2, src2); 2441 sub(vform, dst, temp1, temp2); 2442 return dst; 2443} 2444 2445LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst, 2446 const LogicVRegister& src1, 2447 const LogicVRegister& src2) { 2448 SimVRegister temp1, temp2; 2449 sxtl2(vform, temp1, src1); 2450 sxtl2(vform, temp2, src2); 2451 sub(vform, dst, temp1, temp2); 2452 return dst; 2453} 2454 2455LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst, 2456 const LogicVRegister& src1, 2457 const LogicVRegister& src2) { 2458 SimVRegister temp; 2459 sxtl(vform, temp, src2); 2460 sub(vform, dst, src1, temp); 2461 return dst; 2462} 2463 2464LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst, 2465 const LogicVRegister& src1, 2466 const LogicVRegister& src2) { 2467 SimVRegister temp; 2468 sxtl2(vform, temp, src2); 2469 sub(vform, dst, src1, temp); 2470 return dst; 2471} 2472 2473LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst, 2474 const LogicVRegister& src1, 2475 const LogicVRegister& src2) { 2476 SimVRegister temp1, temp2; 2477 uxtl(vform, temp1, src1); 2478 uxtl(vform, temp2, src2); 2479 uaba(vform, dst, temp1, temp2); 2480 return dst; 2481} 2482 2483LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst, 2484 const LogicVRegister& src1, 2485 const LogicVRegister& src2) { 2486 SimVRegister temp1, temp2; 2487 uxtl2(vform, temp1, src1); 2488 uxtl2(vform, temp2, src2); 2489 uaba(vform, dst, temp1, temp2); 2490 return dst; 2491} 2492 2493LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst, 2494 const LogicVRegister& src1, 2495 const LogicVRegister& src2) { 2496 SimVRegister temp1, temp2; 2497 sxtl(vform, temp1, src1); 2498 sxtl(vform, temp2, src2); 2499 saba(vform, dst, temp1, temp2); 2500 return dst; 2501} 2502 2503LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst, 2504 const LogicVRegister& src1, 2505 const LogicVRegister& src2) { 2506 SimVRegister temp1, temp2; 2507 sxtl2(vform, temp1, src1); 2508 sxtl2(vform, temp2, src2); 2509 saba(vform, dst, temp1, temp2); 2510 return dst; 2511} 2512 2513LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst, 2514 const LogicVRegister& src1, 2515 const LogicVRegister& src2) { 2516 SimVRegister temp1, temp2; 2517 uxtl(vform, temp1, src1); 2518 uxtl(vform, temp2, src2); 2519 AbsDiff(vform, dst, temp1, temp2, false); 2520 return dst; 2521} 2522 2523LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst, 2524 const LogicVRegister& src1, 2525 const LogicVRegister& src2) { 2526 SimVRegister temp1, temp2; 2527 uxtl2(vform, temp1, src1); 2528 uxtl2(vform, temp2, src2); 2529 AbsDiff(vform, dst, temp1, temp2, false); 2530 return dst; 2531} 2532 2533LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst, 2534 const LogicVRegister& src1, 2535 const LogicVRegister& src2) { 2536 SimVRegister temp1, temp2; 2537 sxtl(vform, temp1, src1); 2538 sxtl(vform, temp2, src2); 2539 AbsDiff(vform, dst, temp1, temp2, true); 2540 return dst; 2541} 2542 2543LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst, 2544 const LogicVRegister& src1, 2545 const LogicVRegister& src2) { 2546 SimVRegister temp1, temp2; 2547 sxtl2(vform, temp1, src1); 2548 sxtl2(vform, temp2, src2); 2549 AbsDiff(vform, dst, temp1, temp2, true); 2550 return dst; 2551} 2552 2553LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst, 2554 const LogicVRegister& src1, 2555 const LogicVRegister& src2) { 2556 SimVRegister temp1, temp2; 2557 uxtl(vform, temp1, src1); 2558 uxtl(vform, temp2, src2); 2559 mul(vform, dst, temp1, temp2); 2560 return dst; 2561} 2562 2563LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst, 2564 const LogicVRegister& src1, 2565 const LogicVRegister& src2) { 2566 SimVRegister temp1, temp2; 2567 uxtl2(vform, temp1, src1); 2568 uxtl2(vform, temp2, src2); 2569 mul(vform, dst, temp1, temp2); 2570 return dst; 2571} 2572 2573LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst, 2574 const LogicVRegister& src1, 2575 const LogicVRegister& src2) { 2576 SimVRegister temp1, temp2; 2577 sxtl(vform, temp1, src1); 2578 sxtl(vform, temp2, src2); 2579 mul(vform, dst, temp1, temp2); 2580 return dst; 2581} 2582 2583LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst, 2584 const LogicVRegister& src1, 2585 const LogicVRegister& src2) { 2586 SimVRegister temp1, temp2; 2587 sxtl2(vform, temp1, src1); 2588 sxtl2(vform, temp2, src2); 2589 mul(vform, dst, temp1, temp2); 2590 return dst; 2591} 2592 2593LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst, 2594 const LogicVRegister& src1, 2595 const LogicVRegister& src2) { 2596 SimVRegister temp1, temp2; 2597 uxtl(vform, temp1, src1); 2598 uxtl(vform, temp2, src2); 2599 mls(vform, dst, temp1, temp2); 2600 return dst; 2601} 2602 2603LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst, 2604 const LogicVRegister& src1, 2605 const LogicVRegister& src2) { 2606 SimVRegister temp1, temp2; 2607 uxtl2(vform, temp1, src1); 2608 uxtl2(vform, temp2, src2); 2609 mls(vform, dst, temp1, temp2); 2610 return dst; 2611} 2612 2613LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst, 2614 const LogicVRegister& src1, 2615 const LogicVRegister& src2) { 2616 SimVRegister temp1, temp2; 2617 sxtl(vform, temp1, src1); 2618 sxtl(vform, temp2, src2); 2619 mls(vform, dst, temp1, temp2); 2620 return dst; 2621} 2622 2623LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst, 2624 const LogicVRegister& src1, 2625 const LogicVRegister& src2) { 2626 SimVRegister temp1, temp2; 2627 sxtl2(vform, temp1, src1); 2628 sxtl2(vform, temp2, src2); 2629 mls(vform, dst, temp1, temp2); 2630 return dst; 2631} 2632 2633LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst, 2634 const LogicVRegister& src1, 2635 const LogicVRegister& src2) { 2636 SimVRegister temp1, temp2; 2637 uxtl(vform, temp1, src1); 2638 uxtl(vform, temp2, src2); 2639 mla(vform, dst, temp1, temp2); 2640 return dst; 2641} 2642 2643LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst, 2644 const LogicVRegister& src1, 2645 const LogicVRegister& src2) { 2646 SimVRegister temp1, temp2; 2647 uxtl2(vform, temp1, src1); 2648 uxtl2(vform, temp2, src2); 2649 mla(vform, dst, temp1, temp2); 2650 return dst; 2651} 2652 2653LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst, 2654 const LogicVRegister& src1, 2655 const LogicVRegister& src2) { 2656 SimVRegister temp1, temp2; 2657 sxtl(vform, temp1, src1); 2658 sxtl(vform, temp2, src2); 2659 mla(vform, dst, temp1, temp2); 2660 return dst; 2661} 2662 2663LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst, 2664 const LogicVRegister& src1, 2665 const LogicVRegister& src2) { 2666 SimVRegister temp1, temp2; 2667 sxtl2(vform, temp1, src1); 2668 sxtl2(vform, temp2, src2); 2669 mla(vform, dst, temp1, temp2); 2670 return dst; 2671} 2672 2673LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst, 2674 const LogicVRegister& src1, 2675 const LogicVRegister& src2) { 2676 SimVRegister temp; 2677 LogicVRegister product = sqdmull(vform, temp, src1, src2); 2678 return add(vform, dst, dst, product).SignedSaturate(vform); 2679} 2680 2681LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst, 2682 const LogicVRegister& src1, 2683 const LogicVRegister& src2) { 2684 SimVRegister temp; 2685 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 2686 return add(vform, dst, dst, product).SignedSaturate(vform); 2687} 2688 2689LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst, 2690 const LogicVRegister& src1, 2691 const LogicVRegister& src2) { 2692 SimVRegister temp; 2693 LogicVRegister product = sqdmull(vform, temp, src1, src2); 2694 return sub(vform, dst, dst, product).SignedSaturate(vform); 2695} 2696 2697LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst, 2698 const LogicVRegister& src1, 2699 const LogicVRegister& src2) { 2700 SimVRegister temp; 2701 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 2702 return sub(vform, dst, dst, product).SignedSaturate(vform); 2703} 2704 2705LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst, 2706 const LogicVRegister& src1, 2707 const LogicVRegister& src2) { 2708 SimVRegister temp; 2709 LogicVRegister product = smull(vform, temp, src1, src2); 2710 return add(vform, dst, product, product).SignedSaturate(vform); 2711} 2712 2713LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst, 2714 const LogicVRegister& src1, 2715 const LogicVRegister& src2) { 2716 SimVRegister temp; 2717 LogicVRegister product = smull2(vform, temp, src1, src2); 2718 return add(vform, dst, product, product).SignedSaturate(vform); 2719} 2720 2721LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst, 2722 const LogicVRegister& src1, 2723 const LogicVRegister& src2, bool round) { 2724 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. 2725 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1) 2726 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize. 2727 2728 int esize = LaneSizeInBitsFromFormat(vform); 2729 int round_const = round ? (1 << (esize - 2)) : 0; 2730 int64_t product; 2731 2732 dst.ClearForWrite(vform); 2733 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2734 product = src1.Int(vform, i) * src2.Int(vform, i); 2735 product += round_const; 2736 product = product >> (esize - 1); 2737 2738 if (product > MaxIntFromFormat(vform)) { 2739 product = MaxIntFromFormat(vform); 2740 } else if (product < MinIntFromFormat(vform)) { 2741 product = MinIntFromFormat(vform); 2742 } 2743 dst.SetInt(vform, i, product); 2744 } 2745 return dst; 2746} 2747 2748LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst, 2749 const LogicVRegister& src1, 2750 const LogicVRegister& src2) { 2751 return sqrdmulh(vform, dst, src1, src2, false); 2752} 2753 2754LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst, 2755 const LogicVRegister& src1, 2756 const LogicVRegister& src2) { 2757 SimVRegister temp; 2758 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 2759 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 2760 return dst; 2761} 2762 2763LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst, 2764 const LogicVRegister& src1, 2765 const LogicVRegister& src2) { 2766 SimVRegister temp; 2767 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 2768 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 2769 return dst; 2770} 2771 2772LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst, 2773 const LogicVRegister& src1, 2774 const LogicVRegister& src2) { 2775 SimVRegister temp; 2776 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 2777 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 2778 return dst; 2779} 2780 2781LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst, 2782 const LogicVRegister& src1, 2783 const LogicVRegister& src2) { 2784 SimVRegister temp; 2785 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 2786 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 2787 return dst; 2788} 2789 2790LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst, 2791 const LogicVRegister& src1, 2792 const LogicVRegister& src2) { 2793 SimVRegister temp; 2794 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 2795 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 2796 return dst; 2797} 2798 2799LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst, 2800 const LogicVRegister& src1, 2801 const LogicVRegister& src2) { 2802 SimVRegister temp; 2803 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 2804 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 2805 return dst; 2806} 2807 2808LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst, 2809 const LogicVRegister& src1, 2810 const LogicVRegister& src2) { 2811 SimVRegister temp; 2812 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 2813 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 2814 return dst; 2815} 2816 2817LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst, 2818 const LogicVRegister& src1, 2819 const LogicVRegister& src2) { 2820 SimVRegister temp; 2821 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 2822 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 2823 return dst; 2824} 2825 2826LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst, 2827 const LogicVRegister& src1, 2828 const LogicVRegister& src2) { 2829 uint64_t result[16]; 2830 int laneCount = LaneCountFromFormat(vform); 2831 int pairs = laneCount / 2; 2832 for (int i = 0; i < pairs; ++i) { 2833 result[2 * i] = src1.Uint(vform, 2 * i); 2834 result[(2 * i) + 1] = src2.Uint(vform, 2 * i); 2835 } 2836 2837 dst.SetUintArray(vform, result); 2838 return dst; 2839} 2840 2841LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst, 2842 const LogicVRegister& src1, 2843 const LogicVRegister& src2) { 2844 uint64_t result[16]; 2845 int laneCount = LaneCountFromFormat(vform); 2846 int pairs = laneCount / 2; 2847 for (int i = 0; i < pairs; ++i) { 2848 result[2 * i] = src1.Uint(vform, (2 * i) + 1); 2849 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); 2850 } 2851 2852 dst.SetUintArray(vform, result); 2853 return dst; 2854} 2855 2856LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst, 2857 const LogicVRegister& src1, 2858 const LogicVRegister& src2) { 2859 uint64_t result[16]; 2860 int laneCount = LaneCountFromFormat(vform); 2861 int pairs = laneCount / 2; 2862 for (int i = 0; i < pairs; ++i) { 2863 result[2 * i] = src1.Uint(vform, i); 2864 result[(2 * i) + 1] = src2.Uint(vform, i); 2865 } 2866 2867 dst.SetUintArray(vform, result); 2868 return dst; 2869} 2870 2871LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst, 2872 const LogicVRegister& src1, 2873 const LogicVRegister& src2) { 2874 uint64_t result[16]; 2875 int laneCount = LaneCountFromFormat(vform); 2876 int pairs = laneCount / 2; 2877 for (int i = 0; i < pairs; ++i) { 2878 result[2 * i] = src1.Uint(vform, pairs + i); 2879 result[(2 * i) + 1] = src2.Uint(vform, pairs + i); 2880 } 2881 2882 dst.SetUintArray(vform, result); 2883 return dst; 2884} 2885 2886LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst, 2887 const LogicVRegister& src1, 2888 const LogicVRegister& src2) { 2889 uint64_t result[32]; 2890 int laneCount = LaneCountFromFormat(vform); 2891 for (int i = 0; i < laneCount; ++i) { 2892 result[i] = src1.Uint(vform, i); 2893 result[laneCount + i] = src2.Uint(vform, i); 2894 } 2895 2896 dst.ClearForWrite(vform); 2897 for (int i = 0; i < laneCount; ++i) { 2898 dst.SetUint(vform, i, result[2 * i]); 2899 } 2900 return dst; 2901} 2902 2903LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst, 2904 const LogicVRegister& src1, 2905 const LogicVRegister& src2) { 2906 uint64_t result[32]; 2907 int laneCount = LaneCountFromFormat(vform); 2908 for (int i = 0; i < laneCount; ++i) { 2909 result[i] = src1.Uint(vform, i); 2910 result[laneCount + i] = src2.Uint(vform, i); 2911 } 2912 2913 dst.ClearForWrite(vform); 2914 for (int i = 0; i < laneCount; ++i) { 2915 dst.SetUint(vform, i, result[(2 * i) + 1]); 2916 } 2917 return dst; 2918} 2919 2920template <typename T> 2921T Simulator::FPAdd(T op1, T op2) { 2922 T result = FPProcessNaNs(op1, op2); 2923 if (std::isnan(result)) return result; 2924 2925 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) { 2926 // inf + -inf returns the default NaN. 2927 FPProcessException(); 2928 return FPDefaultNaN<T>(); 2929 } else { 2930 // Other cases should be handled by standard arithmetic. 2931 return op1 + op2; 2932 } 2933} 2934 2935template <typename T> 2936T Simulator::FPSub(T op1, T op2) { 2937 // NaNs should be handled elsewhere. 2938 DCHECK(!std::isnan(op1) && !std::isnan(op2)); 2939 2940 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) { 2941 // inf - inf returns the default NaN. 2942 FPProcessException(); 2943 return FPDefaultNaN<T>(); 2944 } else { 2945 // Other cases should be handled by standard arithmetic. 2946 return op1 - op2; 2947 } 2948} 2949 2950template <typename T> 2951T Simulator::FPMul(T op1, T op2) { 2952 // NaNs should be handled elsewhere. 2953 DCHECK(!std::isnan(op1) && !std::isnan(op2)); 2954 2955 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 2956 // inf * 0.0 returns the default NaN. 2957 FPProcessException(); 2958 return FPDefaultNaN<T>(); 2959 } else { 2960 // Other cases should be handled by standard arithmetic. 2961 return op1 * op2; 2962 } 2963} 2964 2965template <typename T> 2966T Simulator::FPMulx(T op1, T op2) { 2967 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 2968 // inf * 0.0 returns +/-2.0. 2969 T two = 2.0; 2970 return std::copysign(1.0, op1) * std::copysign(1.0, op2) * two; 2971 } 2972 return FPMul(op1, op2); 2973} 2974 2975template <typename T> 2976T Simulator::FPMulAdd(T a, T op1, T op2) { 2977 T result = FPProcessNaNs3(a, op1, op2); 2978 2979 T sign_a = std::copysign(1.0, a); 2980 T sign_prod = std::copysign(1.0, op1) * std::copysign(1.0, op2); 2981 bool isinf_prod = std::isinf(op1) || std::isinf(op2); 2982 bool operation_generates_nan = 2983 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0 2984 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf 2985 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf 2986 2987 if (std::isnan(result)) { 2988 // Generated NaNs override quiet NaNs propagated from a. 2989 if (operation_generates_nan && IsQuietNaN(a)) { 2990 FPProcessException(); 2991 return FPDefaultNaN<T>(); 2992 } else { 2993 return result; 2994 } 2995 } 2996 2997 // If the operation would produce a NaN, return the default NaN. 2998 if (operation_generates_nan) { 2999 FPProcessException(); 3000 return FPDefaultNaN<T>(); 3001 } 3002 3003 // Work around broken fma implementations for exact zero results: The sign of 3004 // exact 0.0 results is positive unless both a and op1 * op2 are negative. 3005 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { 3006 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0; 3007 } 3008 3009 result = FusedMultiplyAdd(op1, op2, a); 3010 DCHECK(!std::isnan(result)); 3011 3012 // Work around broken fma implementations for rounded zero results: If a is 3013 // 0.0, the sign of the result is the sign of op1 * op2 before rounding. 3014 if ((a == 0.0) && (result == 0.0)) { 3015 return std::copysign(0.0, sign_prod); 3016 } 3017 3018 return result; 3019} 3020 3021template <typename T> 3022T Simulator::FPDiv(T op1, T op2) { 3023 // NaNs should be handled elsewhere. 3024 DCHECK(!std::isnan(op1) && !std::isnan(op2)); 3025 3026 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { 3027 // inf / inf and 0.0 / 0.0 return the default NaN. 3028 FPProcessException(); 3029 return FPDefaultNaN<T>(); 3030 } else { 3031 if (op2 == 0.0) { 3032 FPProcessException(); 3033 if (!std::isnan(op1)) { 3034 double op1_sign = std::copysign(1.0, op1); 3035 double op2_sign = std::copysign(1.0, op2); 3036 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity); 3037 } 3038 } 3039 3040 // Other cases should be handled by standard arithmetic. 3041 return op1 / op2; 3042 } 3043} 3044 3045template <typename T> 3046T Simulator::FPSqrt(T op) { 3047 if (std::isnan(op)) { 3048 return FPProcessNaN(op); 3049 } else if (op < 0.0) { 3050 FPProcessException(); 3051 return FPDefaultNaN<T>(); 3052 } else { 3053 return std::sqrt(op); 3054 } 3055} 3056 3057template <typename T> 3058T Simulator::FPMax(T a, T b) { 3059 T result = FPProcessNaNs(a, b); 3060 if (std::isnan(result)) return result; 3061 3062 if ((a == 0.0) && (b == 0.0) && 3063 (std::copysign(1.0, a) != std::copysign(1.0, b))) { 3064 // a and b are zero, and the sign differs: return +0.0. 3065 return 0.0; 3066 } else { 3067 return (a > b) ? a : b; 3068 } 3069} 3070 3071template <typename T> 3072T Simulator::FPMaxNM(T a, T b) { 3073 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3074 a = kFP64NegativeInfinity; 3075 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3076 b = kFP64NegativeInfinity; 3077 } 3078 3079 T result = FPProcessNaNs(a, b); 3080 return std::isnan(result) ? result : FPMax(a, b); 3081} 3082 3083template <typename T> 3084T Simulator::FPMin(T a, T b) { 3085 T result = FPProcessNaNs(a, b); 3086 if (std::isnan(result)) return result; 3087 3088 if ((a == 0.0) && (b == 0.0) && 3089 (std::copysign(1.0, a) != std::copysign(1.0, b))) { 3090 // a and b are zero, and the sign differs: return -0.0. 3091 return -0.0; 3092 } else { 3093 return (a < b) ? a : b; 3094 } 3095} 3096 3097template <typename T> 3098T Simulator::FPMinNM(T a, T b) { 3099 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3100 a = kFP64PositiveInfinity; 3101 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3102 b = kFP64PositiveInfinity; 3103 } 3104 3105 T result = FPProcessNaNs(a, b); 3106 return std::isnan(result) ? result : FPMin(a, b); 3107} 3108 3109template <typename T> 3110T Simulator::FPRecipStepFused(T op1, T op2) { 3111 const T two = 2.0; 3112 if ((std::isinf(op1) && (op2 == 0.0)) || 3113 ((op1 == 0.0) && (std::isinf(op2)))) { 3114 return two; 3115 } else if (std::isinf(op1) || std::isinf(op2)) { 3116 // Return +inf if signs match, otherwise -inf. 3117 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3118 : kFP64NegativeInfinity; 3119 } else { 3120 return FusedMultiplyAdd(op1, op2, two); 3121 } 3122} 3123 3124template <typename T> 3125T Simulator::FPRSqrtStepFused(T op1, T op2) { 3126 const T one_point_five = 1.5; 3127 const T two = 2.0; 3128 3129 if ((std::isinf(op1) && (op2 == 0.0)) || 3130 ((op1 == 0.0) && (std::isinf(op2)))) { 3131 return one_point_five; 3132 } else if (std::isinf(op1) || std::isinf(op2)) { 3133 // Return +inf if signs match, otherwise -inf. 3134 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3135 : kFP64NegativeInfinity; 3136 } else { 3137 // The multiply-add-halve operation must be fully fused, so avoid interim 3138 // rounding by checking which operand can be losslessly divided by two 3139 // before doing the multiply-add. 3140 if (std::isnormal(op1 / two)) { 3141 return FusedMultiplyAdd(op1 / two, op2, one_point_five); 3142 } else if (std::isnormal(op2 / two)) { 3143 return FusedMultiplyAdd(op1, op2 / two, one_point_five); 3144 } else { 3145 // Neither operand is normal after halving: the result is dominated by 3146 // the addition term, so just return that. 3147 return one_point_five; 3148 } 3149 } 3150} 3151 3152double Simulator::FPRoundInt(double value, FPRounding round_mode) { 3153 if ((value == 0.0) || (value == kFP64PositiveInfinity) || 3154 (value == kFP64NegativeInfinity)) { 3155 return value; 3156 } else if (std::isnan(value)) { 3157 return FPProcessNaN(value); 3158 } 3159 3160 double int_result = std::floor(value); 3161 double error = value - int_result; 3162 switch (round_mode) { 3163 case FPTieAway: { 3164 // Take care of correctly handling the range ]-0.5, -0.0], which must 3165 // yield -0.0. 3166 if ((-0.5 < value) && (value < 0.0)) { 3167 int_result = -0.0; 3168 3169 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { 3170 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3171 // result is positive, round up. 3172 int_result++; 3173 } 3174 break; 3175 } 3176 case FPTieEven: { 3177 // Take care of correctly handling the range [-0.5, -0.0], which must 3178 // yield -0.0. 3179 if ((-0.5 <= value) && (value < 0.0)) { 3180 int_result = -0.0; 3181 3182 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3183 // result is odd, round up. 3184 } else if ((error > 0.5) || 3185 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { 3186 int_result++; 3187 } 3188 break; 3189 } 3190 case FPZero: { 3191 // If value>0 then we take floor(value) 3192 // otherwise, ceil(value). 3193 if (value < 0) { 3194 int_result = ceil(value); 3195 } 3196 break; 3197 } 3198 case FPNegativeInfinity: { 3199 // We always use floor(value). 3200 break; 3201 } 3202 case FPPositiveInfinity: { 3203 // Take care of correctly handling the range ]-1.0, -0.0], which must 3204 // yield -0.0. 3205 if ((-1.0 < value) && (value < 0.0)) { 3206 int_result = -0.0; 3207 3208 // If the error is non-zero, round up. 3209 } else if (error > 0.0) { 3210 int_result++; 3211 } 3212 break; 3213 } 3214 default: 3215 UNIMPLEMENTED(); 3216 } 3217 return int_result; 3218} 3219 3220int32_t Simulator::FPToInt32(double value, FPRounding rmode) { 3221 value = FPRoundInt(value, rmode); 3222 return base::saturated_cast<int32_t>(value); 3223} 3224 3225int64_t Simulator::FPToInt64(double value, FPRounding rmode) { 3226 value = FPRoundInt(value, rmode); 3227 return base::saturated_cast<int64_t>(value); 3228} 3229 3230uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { 3231 value = FPRoundInt(value, rmode); 3232 return base::saturated_cast<uint32_t>(value); 3233} 3234 3235uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { 3236 value = FPRoundInt(value, rmode); 3237 return base::saturated_cast<uint64_t>(value); 3238} 3239 3240#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ 3241 template <typename T> \ 3242 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \ 3243 const LogicVRegister& src1, \ 3244 const LogicVRegister& src2) { \ 3245 dst.ClearForWrite(vform); \ 3246 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ 3247 T op1 = src1.Float<T>(i); \ 3248 T op2 = src2.Float<T>(i); \ 3249 T result; \ 3250 if (PROCNAN) { \ 3251 result = FPProcessNaNs(op1, op2); \ 3252 if (!std::isnan(result)) { \ 3253 result = OP(op1, op2); \ 3254 } \ 3255 } else { \ 3256 result = OP(op1, op2); \ 3257 } \ 3258 dst.SetFloat(i, result); \ 3259 } \ 3260 return dst; \ 3261 } \ 3262 \ 3263 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \ 3264 const LogicVRegister& src1, \ 3265 const LogicVRegister& src2) { \ 3266 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { \ 3267 FN<float>(vform, dst, src1, src2); \ 3268 } else { \ 3269 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); \ 3270 FN<double>(vform, dst, src1, src2); \ 3271 } \ 3272 return dst; \ 3273 } 3274NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) 3275#undef DEFINE_NEON_FP_VECTOR_OP 3276 3277LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst, 3278 const LogicVRegister& src1, 3279 const LogicVRegister& src2) { 3280 SimVRegister temp; 3281 LogicVRegister product = fmul(vform, temp, src1, src2); 3282 return fneg(vform, dst, product); 3283} 3284 3285template <typename T> 3286LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst, 3287 const LogicVRegister& src1, 3288 const LogicVRegister& src2) { 3289 dst.ClearForWrite(vform); 3290 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3291 T op1 = -src1.Float<T>(i); 3292 T op2 = src2.Float<T>(i); 3293 T result = FPProcessNaNs(op1, op2); 3294 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2)); 3295 } 3296 return dst; 3297} 3298 3299LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst, 3300 const LogicVRegister& src1, 3301 const LogicVRegister& src2) { 3302 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3303 frecps<float>(vform, dst, src1, src2); 3304 } else { 3305 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3306 frecps<double>(vform, dst, src1, src2); 3307 } 3308 return dst; 3309} 3310 3311template <typename T> 3312LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst, 3313 const LogicVRegister& src1, 3314 const LogicVRegister& src2) { 3315 dst.ClearForWrite(vform); 3316 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3317 T op1 = -src1.Float<T>(i); 3318 T op2 = src2.Float<T>(i); 3319 T result = FPProcessNaNs(op1, op2); 3320 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2)); 3321 } 3322 return dst; 3323} 3324 3325int32_t Simulator::FPToFixedJS(double value) { 3326 // The Z-flag is set when the conversion from double precision floating-point 3327 // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN, 3328 // outside the bounds of a 32-bit integer, or isn't an exact integer then the 3329 // Z-flag is unset. 3330 int Z = 1; 3331 int32_t result; 3332 if ((value == 0.0) || (value == kFP64PositiveInfinity) || 3333 (value == kFP64NegativeInfinity)) { 3334 // +/- zero and infinity all return zero, however -0 and +/- Infinity also 3335 // unset the Z-flag. 3336 result = 0.0; 3337 if ((value != 0.0) || std::signbit(value)) { 3338 Z = 0; 3339 } 3340 } else if (std::isnan(value)) { 3341 // NaN values unset the Z-flag and set the result to 0. 3342 result = 0; 3343 Z = 0; 3344 } else { 3345 // All other values are converted to an integer representation, rounded 3346 // toward zero. 3347 double int_result = std::floor(value); 3348 double error = value - int_result; 3349 if ((error != 0.0) && (int_result < 0.0)) { 3350 int_result++; 3351 } 3352 // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost 3353 // write a one-liner with std::round, but the behaviour on ties is incorrect 3354 // for our purposes. 3355 double mod_const = static_cast<double>(UINT64_C(1) << 32); 3356 double mod_error = 3357 (int_result / mod_const) - std::floor(int_result / mod_const); 3358 double constrained; 3359 if (mod_error == 0.5) { 3360 constrained = INT32_MIN; 3361 } else { 3362 constrained = int_result - mod_const * round(int_result / mod_const); 3363 } 3364 DCHECK(std::floor(constrained) == constrained); 3365 DCHECK(constrained >= INT32_MIN); 3366 DCHECK(constrained <= INT32_MAX); 3367 // Take the bottom 32 bits of the result as a 32-bit integer. 3368 result = static_cast<int32_t>(constrained); 3369 if ((int_result < INT32_MIN) || (int_result > INT32_MAX) || 3370 (error != 0.0)) { 3371 // If the integer result is out of range or the conversion isn't exact, 3372 // take exception and unset the Z-flag. 3373 FPProcessException(); 3374 Z = 0; 3375 } 3376 } 3377 nzcv().SetN(0); 3378 nzcv().SetZ(Z); 3379 nzcv().SetC(0); 3380 nzcv().SetV(0); 3381 return result; 3382} 3383 3384LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst, 3385 const LogicVRegister& src1, 3386 const LogicVRegister& src2) { 3387 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3388 frsqrts<float>(vform, dst, src1, src2); 3389 } else { 3390 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3391 frsqrts<double>(vform, dst, src1, src2); 3392 } 3393 return dst; 3394} 3395 3396template <typename T> 3397LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst, 3398 const LogicVRegister& src1, 3399 const LogicVRegister& src2, Condition cond) { 3400 dst.ClearForWrite(vform); 3401 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3402 bool result = false; 3403 T op1 = src1.Float<T>(i); 3404 T op2 = src2.Float<T>(i); 3405 T nan_result = FPProcessNaNs(op1, op2); 3406 if (!std::isnan(nan_result)) { 3407 switch (cond) { 3408 case eq: 3409 result = (op1 == op2); 3410 break; 3411 case ge: 3412 result = (op1 >= op2); 3413 break; 3414 case gt: 3415 result = (op1 > op2); 3416 break; 3417 case le: 3418 result = (op1 <= op2); 3419 break; 3420 case lt: 3421 result = (op1 < op2); 3422 break; 3423 default: 3424 UNREACHABLE(); 3425 } 3426 } 3427 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 3428 } 3429 return dst; 3430} 3431 3432LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst, 3433 const LogicVRegister& src1, 3434 const LogicVRegister& src2, Condition cond) { 3435 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3436 fcmp<float>(vform, dst, src1, src2, cond); 3437 } else { 3438 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3439 fcmp<double>(vform, dst, src1, src2, cond); 3440 } 3441 return dst; 3442} 3443 3444LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst, 3445 const LogicVRegister& src, Condition cond) { 3446 SimVRegister temp; 3447 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3448 LogicVRegister zero_reg = 3449 dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f)); 3450 fcmp<float>(vform, dst, src, zero_reg, cond); 3451 } else { 3452 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3453 LogicVRegister zero_reg = 3454 dup_immediate(vform, temp, bit_cast<uint64_t>(0.0)); 3455 fcmp<double>(vform, dst, src, zero_reg, cond); 3456 } 3457 return dst; 3458} 3459 3460LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst, 3461 const LogicVRegister& src1, 3462 const LogicVRegister& src2, Condition cond) { 3463 SimVRegister temp1, temp2; 3464 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3465 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1); 3466 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2); 3467 fcmp<float>(vform, dst, abs_src1, abs_src2, cond); 3468 } else { 3469 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3470 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1); 3471 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2); 3472 fcmp<double>(vform, dst, abs_src1, abs_src2, cond); 3473 } 3474 return dst; 3475} 3476 3477template <typename T> 3478LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, 3479 const LogicVRegister& src1, 3480 const LogicVRegister& src2) { 3481 dst.ClearForWrite(vform); 3482 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3483 T op1 = src1.Float<T>(i); 3484 T op2 = src2.Float<T>(i); 3485 T acc = dst.Float<T>(i); 3486 T result = FPMulAdd(acc, op1, op2); 3487 dst.SetFloat(i, result); 3488 } 3489 return dst; 3490} 3491 3492LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, 3493 const LogicVRegister& src1, 3494 const LogicVRegister& src2) { 3495 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3496 fmla<float>(vform, dst, src1, src2); 3497 } else { 3498 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3499 fmla<double>(vform, dst, src1, src2); 3500 } 3501 return dst; 3502} 3503 3504template <typename T> 3505LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, 3506 const LogicVRegister& src1, 3507 const LogicVRegister& src2) { 3508 dst.ClearForWrite(vform); 3509 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3510 T op1 = -src1.Float<T>(i); 3511 T op2 = src2.Float<T>(i); 3512 T acc = dst.Float<T>(i); 3513 T result = FPMulAdd(acc, op1, op2); 3514 dst.SetFloat(i, result); 3515 } 3516 return dst; 3517} 3518 3519LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, 3520 const LogicVRegister& src1, 3521 const LogicVRegister& src2) { 3522 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3523 fmls<float>(vform, dst, src1, src2); 3524 } else { 3525 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3526 fmls<double>(vform, dst, src1, src2); 3527 } 3528 return dst; 3529} 3530 3531template <typename T> 3532LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst, 3533 const LogicVRegister& src) { 3534 dst.ClearForWrite(vform); 3535 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3536 T op = src.Float<T>(i); 3537 op = -op; 3538 dst.SetFloat(i, op); 3539 } 3540 return dst; 3541} 3542 3543LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst, 3544 const LogicVRegister& src) { 3545 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3546 fneg<float>(vform, dst, src); 3547 } else { 3548 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3549 fneg<double>(vform, dst, src); 3550 } 3551 return dst; 3552} 3553 3554template <typename T> 3555LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst, 3556 const LogicVRegister& src) { 3557 dst.ClearForWrite(vform); 3558 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3559 T op = src.Float<T>(i); 3560 if (std::copysign(1.0, op) < 0.0) { 3561 op = -op; 3562 } 3563 dst.SetFloat(i, op); 3564 } 3565 return dst; 3566} 3567 3568LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst, 3569 const LogicVRegister& src) { 3570 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3571 fabs_<float>(vform, dst, src); 3572 } else { 3573 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3574 fabs_<double>(vform, dst, src); 3575 } 3576 return dst; 3577} 3578 3579LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst, 3580 const LogicVRegister& src1, 3581 const LogicVRegister& src2) { 3582 SimVRegister temp; 3583 fsub(vform, temp, src1, src2); 3584 fabs_(vform, dst, temp); 3585 return dst; 3586} 3587 3588LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst, 3589 const LogicVRegister& src) { 3590 dst.ClearForWrite(vform); 3591 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3592 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3593 float result = FPSqrt(src.Float<float>(i)); 3594 dst.SetFloat(i, result); 3595 } 3596 } else { 3597 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3598 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3599 double result = FPSqrt(src.Float<double>(i)); 3600 dst.SetFloat(i, result); 3601 } 3602 } 3603 return dst; 3604} 3605 3606#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ 3607 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \ 3608 const LogicVRegister& src1, \ 3609 const LogicVRegister& src2) { \ 3610 SimVRegister temp1, temp2; \ 3611 uzp1(vform, temp1, src1, src2); \ 3612 uzp2(vform, temp2, src1, src2); \ 3613 FN(vform, dst, temp1, temp2); \ 3614 return dst; \ 3615 } \ 3616 \ 3617 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \ 3618 const LogicVRegister& src) { \ 3619 if (vform == kFormatS) { \ 3620 float result = OP(src.Float<float>(0), src.Float<float>(1)); \ 3621 dst.SetFloat(0, result); \ 3622 } else { \ 3623 DCHECK_EQ(vform, kFormatD); \ 3624 double result = OP(src.Float<double>(0), src.Float<double>(1)); \ 3625 dst.SetFloat(0, result); \ 3626 } \ 3627 dst.ClearForWrite(vform); \ 3628 return dst; \ 3629 } 3630NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) 3631#undef DEFINE_NEON_FP_PAIR_OP 3632 3633LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst, 3634 const LogicVRegister& src, FPMinMaxOp Op) { 3635 DCHECK_EQ(vform, kFormat4S); 3636 USE(vform); 3637 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1)); 3638 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3)); 3639 float result = (this->*Op)(result1, result2); 3640 dst.ClearForWrite(kFormatS); 3641 dst.SetFloat<float>(0, result); 3642 return dst; 3643} 3644 3645LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst, 3646 const LogicVRegister& src) { 3647 return FMinMaxV(vform, dst, src, &Simulator::FPMax); 3648} 3649 3650LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst, 3651 const LogicVRegister& src) { 3652 return FMinMaxV(vform, dst, src, &Simulator::FPMin); 3653} 3654 3655LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst, 3656 const LogicVRegister& src) { 3657 return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM); 3658} 3659 3660LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst, 3661 const LogicVRegister& src) { 3662 return FMinMaxV(vform, dst, src, &Simulator::FPMinNM); 3663} 3664 3665LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst, 3666 const LogicVRegister& src1, 3667 const LogicVRegister& src2, int index) { 3668 dst.ClearForWrite(vform); 3669 SimVRegister temp; 3670 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3671 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 3672 fmul<float>(vform, dst, src1, index_reg); 3673 } else { 3674 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3675 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 3676 fmul<double>(vform, dst, src1, index_reg); 3677 } 3678 return dst; 3679} 3680 3681LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, 3682 const LogicVRegister& src1, 3683 const LogicVRegister& src2, int index) { 3684 dst.ClearForWrite(vform); 3685 SimVRegister temp; 3686 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3687 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 3688 fmla<float>(vform, dst, src1, index_reg); 3689 } else { 3690 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3691 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 3692 fmla<double>(vform, dst, src1, index_reg); 3693 } 3694 return dst; 3695} 3696 3697LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, 3698 const LogicVRegister& src1, 3699 const LogicVRegister& src2, int index) { 3700 dst.ClearForWrite(vform); 3701 SimVRegister temp; 3702 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3703 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 3704 fmls<float>(vform, dst, src1, index_reg); 3705 } else { 3706 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3707 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 3708 fmls<double>(vform, dst, src1, index_reg); 3709 } 3710 return dst; 3711} 3712 3713LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst, 3714 const LogicVRegister& src1, 3715 const LogicVRegister& src2, int index) { 3716 dst.ClearForWrite(vform); 3717 SimVRegister temp; 3718 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3719 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 3720 fmulx<float>(vform, dst, src1, index_reg); 3721 3722 } else { 3723 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3724 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 3725 fmulx<double>(vform, dst, src1, index_reg); 3726 } 3727 return dst; 3728} 3729 3730LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst, 3731 const LogicVRegister& src, 3732 FPRounding rounding_mode, 3733 bool inexact_exception) { 3734 dst.ClearForWrite(vform); 3735 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3736 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3737 float input = src.Float<float>(i); 3738 float rounded = FPRoundInt(input, rounding_mode); 3739 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 3740 FPProcessException(); 3741 } 3742 dst.SetFloat<float>(i, rounded); 3743 } 3744 } else { 3745 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3746 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3747 double input = src.Float<double>(i); 3748 double rounded = FPRoundInt(input, rounding_mode); 3749 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 3750 FPProcessException(); 3751 } 3752 dst.SetFloat<double>(i, rounded); 3753 } 3754 } 3755 return dst; 3756} 3757 3758LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst, 3759 const LogicVRegister& src, 3760 FPRounding rounding_mode, int fbits) { 3761 dst.ClearForWrite(vform); 3762 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3763 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3764 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 3765 dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); 3766 } 3767 } else { 3768 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3769 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3770 double op = src.Float<double>(i) * std::pow(2.0, fbits); 3771 dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); 3772 } 3773 } 3774 return dst; 3775} 3776 3777LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst, 3778 const LogicVRegister& src, 3779 FPRounding rounding_mode, int fbits) { 3780 dst.ClearForWrite(vform); 3781 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3782 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3783 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 3784 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); 3785 } 3786 } else { 3787 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3788 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3789 double op = src.Float<double>(i) * std::pow(2.0, fbits); 3790 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); 3791 } 3792 } 3793 return dst; 3794} 3795 3796LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst, 3797 const LogicVRegister& src) { 3798 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3799 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 3800 dst.SetFloat(i, FPToFloat(src.Float<float16>(i))); 3801 } 3802 } else { 3803 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3804 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 3805 dst.SetFloat(i, FPToDouble(src.Float<float>(i))); 3806 } 3807 } 3808 return dst; 3809} 3810 3811LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst, 3812 const LogicVRegister& src) { 3813 int lane_count = LaneCountFromFormat(vform); 3814 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3815 for (int i = 0; i < lane_count; i++) { 3816 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count))); 3817 } 3818 } else { 3819 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3820 for (int i = 0; i < lane_count; i++) { 3821 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count))); 3822 } 3823 } 3824 return dst; 3825} 3826 3827LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst, 3828 const LogicVRegister& src) { 3829 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) { 3830 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3831 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven)); 3832 } 3833 } else { 3834 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); 3835 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3836 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven)); 3837 } 3838 } 3839 dst.ClearForWrite(vform); 3840 return dst; 3841} 3842 3843LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst, 3844 const LogicVRegister& src) { 3845 int lane_count = LaneCountFromFormat(vform) / 2; 3846 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) { 3847 for (int i = lane_count - 1; i >= 0; i--) { 3848 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven)); 3849 } 3850 } else { 3851 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); 3852 for (int i = lane_count - 1; i >= 0; i--) { 3853 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven)); 3854 } 3855 } 3856 return dst; 3857} 3858 3859LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst, 3860 const LogicVRegister& src) { 3861 dst.ClearForWrite(vform); 3862 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); 3863 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3864 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd)); 3865 } 3866 return dst; 3867} 3868 3869LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst, 3870 const LogicVRegister& src) { 3871 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); 3872 int lane_count = LaneCountFromFormat(vform) / 2; 3873 for (int i = lane_count - 1; i >= 0; i--) { 3874 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd)); 3875 } 3876 return dst; 3877} 3878 3879// Based on reference C function recip_sqrt_estimate from ARM ARM. 3880double Simulator::recip_sqrt_estimate(double a) { 3881 int q0, q1, s; 3882 double r; 3883 if (a < 0.5) { 3884 q0 = static_cast<int>(a * 512.0); 3885 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); 3886 } else { 3887 q1 = static_cast<int>(a * 256.0); 3888 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); 3889 } 3890 s = static_cast<int>(256.0 * r + 0.5); 3891 return static_cast<double>(s) / 256.0; 3892} 3893 3894namespace { 3895 3896inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { 3897 return unsigned_bitextract_64(start_bit, end_bit, val); 3898} 3899 3900} // anonymous namespace 3901 3902template <typename T> 3903T Simulator::FPRecipSqrtEstimate(T op) { 3904 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value, 3905 "T must be a float or double"); 3906 3907 if (std::isnan(op)) { 3908 return FPProcessNaN(op); 3909 } else if (op == 0.0) { 3910 if (std::copysign(1.0, op) < 0.0) { 3911 return kFP64NegativeInfinity; 3912 } else { 3913 return kFP64PositiveInfinity; 3914 } 3915 } else if (std::copysign(1.0, op) < 0.0) { 3916 FPProcessException(); 3917 return FPDefaultNaN<T>(); 3918 } else if (std::isinf(op)) { 3919 return 0.0; 3920 } else { 3921 uint64_t fraction; 3922 int32_t exp, result_exp; 3923 3924 if (sizeof(T) == sizeof(float)) { 3925 exp = static_cast<int32_t>(float_exp(op)); 3926 fraction = float_mantissa(op); 3927 fraction <<= 29; 3928 } else { 3929 exp = static_cast<int32_t>(double_exp(op)); 3930 fraction = double_mantissa(op); 3931 } 3932 3933 if (exp == 0) { 3934 while (Bits(fraction, 51, 51) == 0) { 3935 fraction = Bits(fraction, 50, 0) << 1; 3936 exp -= 1; 3937 } 3938 fraction = Bits(fraction, 50, 0) << 1; 3939 } 3940 3941 double scaled; 3942 if (Bits(exp, 0, 0) == 0) { 3943 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44); 3944 } else { 3945 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44); 3946 } 3947 3948 if (sizeof(T) == sizeof(float)) { 3949 result_exp = (380 - exp) / 2; 3950 } else { 3951 result_exp = (3068 - exp) / 2; 3952 } 3953 3954 uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled)); 3955 3956 if (sizeof(T) == sizeof(float)) { 3957 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 3958 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29)); 3959 return float_pack(0, exp_bits, est_bits); 3960 } else { 3961 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0)); 3962 } 3963 } 3964} 3965 3966LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst, 3967 const LogicVRegister& src) { 3968 dst.ClearForWrite(vform); 3969 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3970 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3971 float input = src.Float<float>(i); 3972 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input)); 3973 } 3974 } else { 3975 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3976 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3977 double input = src.Float<double>(i); 3978 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input)); 3979 } 3980 } 3981 return dst; 3982} 3983 3984template <typename T> 3985T Simulator::FPRecipEstimate(T op, FPRounding rounding) { 3986 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value, 3987 "T must be a float or double"); 3988 uint32_t sign; 3989 3990 if (sizeof(T) == sizeof(float)) { 3991 sign = float_sign(op); 3992 } else { 3993 sign = double_sign(op); 3994 } 3995 3996 if (std::isnan(op)) { 3997 return FPProcessNaN(op); 3998 } else if (std::isinf(op)) { 3999 return (sign == 1) ? -0.0 : 0.0; 4000 } else if (op == 0.0) { 4001 FPProcessException(); // FPExc_DivideByZero exception. 4002 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 4003 } else if (((sizeof(T) == sizeof(float)) && 4004 (std::fabs(op) < std::pow(2.0, -128.0))) || 4005 ((sizeof(T) == sizeof(double)) && 4006 (std::fabs(op) < std::pow(2.0, -1024.0)))) { 4007 bool overflow_to_inf = false; 4008 switch (rounding) { 4009 case FPTieEven: 4010 overflow_to_inf = true; 4011 break; 4012 case FPPositiveInfinity: 4013 overflow_to_inf = (sign == 0); 4014 break; 4015 case FPNegativeInfinity: 4016 overflow_to_inf = (sign == 1); 4017 break; 4018 case FPZero: 4019 overflow_to_inf = false; 4020 break; 4021 default: 4022 break; 4023 } 4024 FPProcessException(); // FPExc_Overflow and FPExc_Inexact. 4025 if (overflow_to_inf) { 4026 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 4027 } else { 4028 // Return FPMaxNormal(sign). 4029 if (sizeof(T) == sizeof(float)) { 4030 return float_pack(sign, 0xFE, 0x07FFFFF); 4031 } else { 4032 return double_pack(sign, 0x7FE, 0x0FFFFFFFFFFFFFl); 4033 } 4034 } 4035 } else { 4036 uint64_t fraction; 4037 int32_t exp, result_exp; 4038 uint32_t sign; 4039 4040 if (sizeof(T) == sizeof(float)) { 4041 sign = float_sign(op); 4042 exp = static_cast<int32_t>(float_exp(op)); 4043 fraction = float_mantissa(op); 4044 fraction <<= 29; 4045 } else { 4046 sign = double_sign(op); 4047 exp = static_cast<int32_t>(double_exp(op)); 4048 fraction = double_mantissa(op); 4049 } 4050 4051 if (exp == 0) { 4052 if (Bits(fraction, 51, 51) == 0) { 4053 exp -= 1; 4054 fraction = Bits(fraction, 49, 0) << 2; 4055 } else { 4056 fraction = Bits(fraction, 50, 0) << 1; 4057 } 4058 } 4059 4060 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44); 4061 4062 if (sizeof(T) == sizeof(float)) { 4063 result_exp = 253 - exp; 4064 } else { 4065 result_exp = 2045 - exp; 4066 } 4067 4068 double estimate = recip_estimate(scaled); 4069 4070 fraction = double_mantissa(estimate); 4071 if (result_exp == 0) { 4072 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); 4073 } else if (result_exp == -1) { 4074 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); 4075 result_exp = 0; 4076 } 4077 if (sizeof(T) == sizeof(float)) { 4078 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 4079 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29)); 4080 return float_pack(sign, exp_bits, frac_bits); 4081 } else { 4082 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); 4083 } 4084 } 4085} 4086 4087LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst, 4088 const LogicVRegister& src, FPRounding round) { 4089 dst.ClearForWrite(vform); 4090 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 4091 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4092 float input = src.Float<float>(i); 4093 dst.SetFloat(i, FPRecipEstimate<float>(input, round)); 4094 } 4095 } else { 4096 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 4097 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4098 double input = src.Float<double>(i); 4099 dst.SetFloat(i, FPRecipEstimate<double>(input, round)); 4100 } 4101 } 4102 return dst; 4103} 4104 4105LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst, 4106 const LogicVRegister& src) { 4107 dst.ClearForWrite(vform); 4108 uint64_t operand; 4109 uint32_t result; 4110 double dp_operand, dp_result; 4111 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4112 operand = src.Uint(vform, i); 4113 if (operand <= 0x3FFFFFFF) { 4114 result = 0xFFFFFFFF; 4115 } else { 4116 dp_operand = operand * std::pow(2.0, -32); 4117 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); 4118 result = static_cast<uint32_t>(dp_result); 4119 } 4120 dst.SetUint(vform, i, result); 4121 } 4122 return dst; 4123} 4124 4125// Based on reference C function recip_estimate from ARM ARM. 4126double Simulator::recip_estimate(double a) { 4127 int q, s; 4128 double r; 4129 q = static_cast<int>(a * 512.0); 4130 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0); 4131 s = static_cast<int>(256.0 * r + 0.5); 4132 return static_cast<double>(s) / 256.0; 4133} 4134 4135LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst, 4136 const LogicVRegister& src) { 4137 dst.ClearForWrite(vform); 4138 uint64_t operand; 4139 uint32_t result; 4140 double dp_operand, dp_result; 4141 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4142 operand = src.Uint(vform, i); 4143 if (operand <= 0x7FFFFFFF) { 4144 result = 0xFFFFFFFF; 4145 } else { 4146 dp_operand = operand * std::pow(2.0, -32); 4147 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); 4148 result = static_cast<uint32_t>(dp_result); 4149 } 4150 dst.SetUint(vform, i, result); 4151 } 4152 return dst; 4153} 4154 4155template <typename T> 4156LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst, 4157 const LogicVRegister& src) { 4158 dst.ClearForWrite(vform); 4159 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4160 T op = src.Float<T>(i); 4161 T result; 4162 if (std::isnan(op)) { 4163 result = FPProcessNaN(op); 4164 } else { 4165 int exp; 4166 uint32_t sign; 4167 if (sizeof(T) == sizeof(float)) { 4168 sign = float_sign(op); 4169 exp = static_cast<int>(float_exp(op)); 4170 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0)); 4171 result = float_pack(sign, exp, 0); 4172 } else { 4173 sign = double_sign(op); 4174 exp = static_cast<int>(double_exp(op)); 4175 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0)); 4176 result = double_pack(sign, exp, 0); 4177 } 4178 } 4179 dst.SetFloat(i, result); 4180 } 4181 return dst; 4182} 4183 4184LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst, 4185 const LogicVRegister& src) { 4186 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 4187 frecpx<float>(vform, dst, src); 4188 } else { 4189 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 4190 frecpx<double>(vform, dst, src); 4191 } 4192 return dst; 4193} 4194 4195LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst, 4196 const LogicVRegister& src, int fbits, 4197 FPRounding round) { 4198 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4199 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 4200 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); 4201 dst.SetFloat<float>(i, result); 4202 } else { 4203 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 4204 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); 4205 dst.SetFloat<double>(i, result); 4206 } 4207 } 4208 return dst; 4209} 4210 4211LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst, 4212 const LogicVRegister& src, int fbits, 4213 FPRounding round) { 4214 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4215 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 4216 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); 4217 dst.SetFloat<float>(i, result); 4218 } else { 4219 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 4220 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); 4221 dst.SetFloat<double>(i, result); 4222 } 4223 } 4224 return dst; 4225} 4226 4227} // namespace internal 4228} // namespace v8 4229 4230#endif // USE_SIMULATOR 4231