1// Copyright 2016 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/execution/arm64/simulator-arm64.h"
6
7#if defined(USE_SIMULATOR)
8
9#include <cmath>
10
11namespace v8 {
12namespace internal {
13
14namespace {
15
16// See FPRound for a description of this function.
17inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa,
18                              FPRounding round_mode) {
19  uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(
20      sign, exponent, mantissa, round_mode);
21  return bit_cast<double>(bits);
22}
23
24// See FPRound for a description of this function.
25inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa,
26                            FPRounding round_mode) {
27  uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(
28      sign, exponent, mantissa, round_mode);
29  return bit_cast<float>(bits);
30}
31
32// See FPRound for a description of this function.
33inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent,
34                                uint64_t mantissa, FPRounding round_mode) {
35  return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
36      sign, exponent, mantissa, round_mode);
37}
38
39}  // namespace
40
41double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
42  if (src >= 0) {
43    return UFixedToDouble(src, fbits, round);
44  } else if (src == INT64_MIN) {
45    return -UFixedToDouble(src, fbits, round);
46  } else {
47    return -UFixedToDouble(-src, fbits, round);
48  }
49}
50
51double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
52  // An input of 0 is a special case because the result is effectively
53  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
54  if (src == 0) {
55    return 0.0;
56  }
57
58  // Calculate the exponent. The highest significant bit will have the value
59  // 2^exponent.
60  const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
61  const int64_t exponent = highest_significant_bit - fbits;
62
63  return FPRoundToDouble(0, exponent, src, round);
64}
65
66float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
67  if (src >= 0) {
68    return UFixedToFloat(src, fbits, round);
69  } else if (src == INT64_MIN) {
70    return -UFixedToFloat(src, fbits, round);
71  } else {
72    return -UFixedToFloat(-src, fbits, round);
73  }
74}
75
76float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
77  // An input of 0 is a special case because the result is effectively
78  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
79  if (src == 0) {
80    return 0.0f;
81  }
82
83  // Calculate the exponent. The highest significant bit will have the value
84  // 2^exponent.
85  const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
86  const int32_t exponent = highest_significant_bit - fbits;
87
88  return FPRoundToFloat(0, exponent, src, round);
89}
90
91double Simulator::FPToDouble(float value) {
92  switch (std::fpclassify(value)) {
93    case FP_NAN: {
94      if (IsSignallingNaN(value)) {
95        FPProcessException();
96      }
97      if (DN()) return kFP64DefaultNaN;
98
99      // Convert NaNs as the processor would:
100      //  - The sign is propagated.
101      //  - The mantissa is transferred entirely, except that the top bit is
102      //    forced to '1', making the result a quiet NaN. The unused (low-order)
103      //    mantissa bits are set to 0.
104      uint32_t raw = bit_cast<uint32_t>(value);
105
106      uint64_t sign = raw >> 31;
107      uint64_t exponent = (1 << kDoubleExponentBits) - 1;
108      uint64_t mantissa = unsigned_bitextract_64(21, 0, raw);
109
110      // Unused low-order bits remain zero.
111      mantissa <<= (kDoubleMantissaBits - kFloatMantissaBits);
112
113      // Force a quiet NaN.
114      mantissa |= (UINT64_C(1) << (kDoubleMantissaBits - 1));
115
116      return double_pack(sign, exponent, mantissa);
117    }
118
119    case FP_ZERO:
120    case FP_NORMAL:
121    case FP_SUBNORMAL:
122    case FP_INFINITE: {
123      // All other inputs are preserved in a standard cast, because every value
124      // representable using an IEEE-754 float is also representable using an
125      // IEEE-754 double.
126      return static_cast<double>(value);
127    }
128  }
129
130  UNREACHABLE();
131}
132
133float Simulator::FPToFloat(float16 value) {
134  uint32_t sign = value >> 15;
135  uint32_t exponent =
136      unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
137                             kFloat16MantissaBits, value);
138  uint32_t mantissa =
139      unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value);
140
141  switch (float16classify(value)) {
142    case FP_ZERO:
143      return (sign == 0) ? 0.0f : -0.0f;
144
145    case FP_INFINITE:
146      return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
147
148    case FP_SUBNORMAL: {
149      // Calculate shift required to put mantissa into the most-significant bits
150      // of the destination mantissa.
151      int shift = CountLeadingZeros(mantissa << (32 - 10), 32);
152
153      // Shift mantissa and discard implicit '1'.
154      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
155      mantissa &= (1 << kFloatMantissaBits) - 1;
156
157      // Adjust the exponent for the shift applied, and rebias.
158      exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias);
159      break;
160    }
161
162    case FP_NAN: {
163      if (IsSignallingNaN(value)) {
164        FPProcessException();
165      }
166      if (DN()) return kFP32DefaultNaN;
167
168      // Convert NaNs as the processor would:
169      //  - The sign is propagated.
170      //  - The mantissa is transferred entirely, except that the top bit is
171      //    forced to '1', making the result a quiet NaN. The unused (low-order)
172      //    mantissa bits are set to 0.
173      exponent = (1 << kFloatExponentBits) - 1;
174
175      // Increase bits in mantissa, making low-order bits 0.
176      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
177      mantissa |= 1 << (kFloatMantissaBits - 1);  // Force a quiet NaN.
178      break;
179    }
180
181    case FP_NORMAL: {
182      // Increase bits in mantissa, making low-order bits 0.
183      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
184
185      // Change exponent bias.
186      exponent += (kFloatExponentBias - kFloat16ExponentBias);
187      break;
188    }
189
190    default:
191      UNREACHABLE();
192  }
193  return float_pack(sign, exponent, mantissa);
194}
195
196float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
197  // Only the FPTieEven rounding mode is implemented.
198  DCHECK_EQ(round_mode, FPTieEven);
199  USE(round_mode);
200
201  int64_t sign = float_sign(value);
202  int64_t exponent =
203      static_cast<int64_t>(float_exp(value)) - kFloatExponentBias;
204  uint32_t mantissa = float_mantissa(value);
205
206  switch (std::fpclassify(value)) {
207    case FP_NAN: {
208      if (IsSignallingNaN(value)) {
209        FPProcessException();
210      }
211      if (DN()) return kFP16DefaultNaN;
212
213      // Convert NaNs as the processor would:
214      //  - The sign is propagated.
215      //  - The mantissa is transferred as much as possible, except that the top
216      //    bit is forced to '1', making the result a quiet NaN.
217      float16 result =
218          (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
219      result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
220      result |= (1 << (kFloat16MantissaBits - 1));  // Force a quiet NaN;
221      return result;
222    }
223
224    case FP_ZERO:
225      return (sign == 0) ? 0 : 0x8000;
226
227    case FP_INFINITE:
228      return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
229
230    case FP_NORMAL:
231    case FP_SUBNORMAL: {
232      // Convert float-to-half as the processor would, assuming that FPCR.FZ
233      // (flush-to-zero) is not set.
234
235      // Add the implicit '1' bit to the mantissa.
236      mantissa += (1 << kFloatMantissaBits);
237      return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
238    }
239  }
240
241  UNREACHABLE();
242}
243
244float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
245  // Only the FPTieEven rounding mode is implemented.
246  DCHECK_EQ(round_mode, FPTieEven);
247  USE(round_mode);
248
249  int64_t sign = double_sign(value);
250  int64_t exponent =
251      static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
252  uint64_t mantissa = double_mantissa(value);
253
254  switch (std::fpclassify(value)) {
255    case FP_NAN: {
256      if (IsSignallingNaN(value)) {
257        FPProcessException();
258      }
259      if (DN()) return kFP16DefaultNaN;
260
261      // Convert NaNs as the processor would:
262      //  - The sign is propagated.
263      //  - The mantissa is transferred as much as possible, except that the top
264      //    bit is forced to '1', making the result a quiet NaN.
265      float16 result =
266          (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
267      result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
268      result |= (1 << (kFloat16MantissaBits - 1));  // Force a quiet NaN;
269      return result;
270    }
271
272    case FP_ZERO:
273      return (sign == 0) ? 0 : 0x8000;
274
275    case FP_INFINITE:
276      return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
277
278    case FP_NORMAL:
279    case FP_SUBNORMAL: {
280      // Convert double-to-half as the processor would, assuming that FPCR.FZ
281      // (flush-to-zero) is not set.
282
283      // Add the implicit '1' bit to the mantissa.
284      mantissa += (UINT64_C(1) << kDoubleMantissaBits);
285      return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
286    }
287  }
288
289  UNREACHABLE();
290}
291
292float Simulator::FPToFloat(double value, FPRounding round_mode) {
293  // Only the FPTieEven rounding mode is implemented.
294  DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
295  USE(round_mode);
296
297  switch (std::fpclassify(value)) {
298    case FP_NAN: {
299      if (IsSignallingNaN(value)) {
300        FPProcessException();
301      }
302      if (DN()) return kFP32DefaultNaN;
303
304      // Convert NaNs as the processor would:
305      //  - The sign is propagated.
306      //  - The mantissa is transferred as much as possible, except that the
307      //    top bit is forced to '1', making the result a quiet NaN.
308
309      uint64_t raw = bit_cast<uint64_t>(value);
310
311      uint32_t sign = raw >> 63;
312      uint32_t exponent = (1 << 8) - 1;
313      uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64(
314          50, kDoubleMantissaBits - kFloatMantissaBits, raw));
315      mantissa |= (1 << (kFloatMantissaBits - 1));  // Force a quiet NaN.
316
317      return float_pack(sign, exponent, mantissa);
318    }
319
320    case FP_ZERO:
321    case FP_INFINITE: {
322      // In a C++ cast, any value representable in the target type will be
323      // unchanged. This is always the case for +/-0.0 and infinities.
324      return static_cast<float>(value);
325    }
326
327    case FP_NORMAL:
328    case FP_SUBNORMAL: {
329      // Convert double-to-float as the processor would, assuming that FPCR.FZ
330      // (flush-to-zero) is not set.
331      uint32_t sign = double_sign(value);
332      int64_t exponent =
333          static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
334      uint64_t mantissa = double_mantissa(value);
335      if (std::fpclassify(value) == FP_NORMAL) {
336        // For normal FP values, add the hidden bit.
337        mantissa |= (UINT64_C(1) << kDoubleMantissaBits);
338      }
339      return FPRoundToFloat(sign, exponent, mantissa, round_mode);
340    }
341  }
342
343  UNREACHABLE();
344}
345
346void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
347  dst.ClearForWrite(vform);
348  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
349    dst.ReadUintFromMem(vform, i, addr);
350    addr += LaneSizeInBytesFromFormat(vform);
351  }
352}
353
354void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index,
355                    uint64_t addr) {
356  dst.ReadUintFromMem(vform, index, addr);
357}
358
359void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
360  dst.ClearForWrite(vform);
361  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
362    dst.ReadUintFromMem(vform, i, addr);
363  }
364}
365
366void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
367                    LogicVRegister dst2, uint64_t addr1) {
368  dst1.ClearForWrite(vform);
369  dst2.ClearForWrite(vform);
370  int esize = LaneSizeInBytesFromFormat(vform);
371  uint64_t addr2 = addr1 + esize;
372  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
373    dst1.ReadUintFromMem(vform, i, addr1);
374    dst2.ReadUintFromMem(vform, i, addr2);
375    addr1 += 2 * esize;
376    addr2 += 2 * esize;
377  }
378}
379
380void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
381                    LogicVRegister dst2, int index, uint64_t addr1) {
382  dst1.ClearForWrite(vform);
383  dst2.ClearForWrite(vform);
384  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
385  dst1.ReadUintFromMem(vform, index, addr1);
386  dst2.ReadUintFromMem(vform, index, addr2);
387}
388
389void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1,
390                     LogicVRegister dst2, uint64_t addr) {
391  dst1.ClearForWrite(vform);
392  dst2.ClearForWrite(vform);
393  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
394  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
395    dst1.ReadUintFromMem(vform, i, addr);
396    dst2.ReadUintFromMem(vform, i, addr2);
397  }
398}
399
400void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
401                    LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) {
402  dst1.ClearForWrite(vform);
403  dst2.ClearForWrite(vform);
404  dst3.ClearForWrite(vform);
405  int esize = LaneSizeInBytesFromFormat(vform);
406  uint64_t addr2 = addr1 + esize;
407  uint64_t addr3 = addr2 + esize;
408  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
409    dst1.ReadUintFromMem(vform, i, addr1);
410    dst2.ReadUintFromMem(vform, i, addr2);
411    dst3.ReadUintFromMem(vform, i, addr3);
412    addr1 += 3 * esize;
413    addr2 += 3 * esize;
414    addr3 += 3 * esize;
415  }
416}
417
418void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
419                    LogicVRegister dst2, LogicVRegister dst3, int index,
420                    uint64_t addr1) {
421  dst1.ClearForWrite(vform);
422  dst2.ClearForWrite(vform);
423  dst3.ClearForWrite(vform);
424  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
425  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
426  dst1.ReadUintFromMem(vform, index, addr1);
427  dst2.ReadUintFromMem(vform, index, addr2);
428  dst3.ReadUintFromMem(vform, index, addr3);
429}
430
431void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1,
432                     LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) {
433  dst1.ClearForWrite(vform);
434  dst2.ClearForWrite(vform);
435  dst3.ClearForWrite(vform);
436  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
437  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
438  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
439    dst1.ReadUintFromMem(vform, i, addr);
440    dst2.ReadUintFromMem(vform, i, addr2);
441    dst3.ReadUintFromMem(vform, i, addr3);
442  }
443}
444
445void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
446                    LogicVRegister dst2, LogicVRegister dst3,
447                    LogicVRegister dst4, uint64_t addr1) {
448  dst1.ClearForWrite(vform);
449  dst2.ClearForWrite(vform);
450  dst3.ClearForWrite(vform);
451  dst4.ClearForWrite(vform);
452  int esize = LaneSizeInBytesFromFormat(vform);
453  uint64_t addr2 = addr1 + esize;
454  uint64_t addr3 = addr2 + esize;
455  uint64_t addr4 = addr3 + esize;
456  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
457    dst1.ReadUintFromMem(vform, i, addr1);
458    dst2.ReadUintFromMem(vform, i, addr2);
459    dst3.ReadUintFromMem(vform, i, addr3);
460    dst4.ReadUintFromMem(vform, i, addr4);
461    addr1 += 4 * esize;
462    addr2 += 4 * esize;
463    addr3 += 4 * esize;
464    addr4 += 4 * esize;
465  }
466}
467
468void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
469                    LogicVRegister dst2, LogicVRegister dst3,
470                    LogicVRegister dst4, int index, uint64_t addr1) {
471  dst1.ClearForWrite(vform);
472  dst2.ClearForWrite(vform);
473  dst3.ClearForWrite(vform);
474  dst4.ClearForWrite(vform);
475  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
476  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
477  uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
478  dst1.ReadUintFromMem(vform, index, addr1);
479  dst2.ReadUintFromMem(vform, index, addr2);
480  dst3.ReadUintFromMem(vform, index, addr3);
481  dst4.ReadUintFromMem(vform, index, addr4);
482}
483
484void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1,
485                     LogicVRegister dst2, LogicVRegister dst3,
486                     LogicVRegister dst4, uint64_t addr) {
487  dst1.ClearForWrite(vform);
488  dst2.ClearForWrite(vform);
489  dst3.ClearForWrite(vform);
490  dst4.ClearForWrite(vform);
491  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
492  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
493  uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
494  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
495    dst1.ReadUintFromMem(vform, i, addr);
496    dst2.ReadUintFromMem(vform, i, addr2);
497    dst3.ReadUintFromMem(vform, i, addr3);
498    dst4.ReadUintFromMem(vform, i, addr4);
499  }
500}
501
502void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
503  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
504    src.WriteUintToMem(vform, i, addr);
505    addr += LaneSizeInBytesFromFormat(vform);
506  }
507}
508
509void Simulator::st1(VectorFormat vform, LogicVRegister src, int index,
510                    uint64_t addr) {
511  src.WriteUintToMem(vform, index, addr);
512}
513
514void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
515                    uint64_t addr) {
516  int esize = LaneSizeInBytesFromFormat(vform);
517  uint64_t addr2 = addr + esize;
518  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
519    dst.WriteUintToMem(vform, i, addr);
520    dst2.WriteUintToMem(vform, i, addr2);
521    addr += 2 * esize;
522    addr2 += 2 * esize;
523  }
524}
525
526void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
527                    int index, uint64_t addr) {
528  int esize = LaneSizeInBytesFromFormat(vform);
529  dst.WriteUintToMem(vform, index, addr);
530  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
531}
532
533void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
534                    LogicVRegister dst3, uint64_t addr) {
535  int esize = LaneSizeInBytesFromFormat(vform);
536  uint64_t addr2 = addr + esize;
537  uint64_t addr3 = addr2 + esize;
538  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
539    dst.WriteUintToMem(vform, i, addr);
540    dst2.WriteUintToMem(vform, i, addr2);
541    dst3.WriteUintToMem(vform, i, addr3);
542    addr += 3 * esize;
543    addr2 += 3 * esize;
544    addr3 += 3 * esize;
545  }
546}
547
548void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
549                    LogicVRegister dst3, int index, uint64_t addr) {
550  int esize = LaneSizeInBytesFromFormat(vform);
551  dst.WriteUintToMem(vform, index, addr);
552  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
553  dst3.WriteUintToMem(vform, index, addr + 2 * esize);
554}
555
556void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
557                    LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) {
558  int esize = LaneSizeInBytesFromFormat(vform);
559  uint64_t addr2 = addr + esize;
560  uint64_t addr3 = addr2 + esize;
561  uint64_t addr4 = addr3 + esize;
562  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
563    dst.WriteUintToMem(vform, i, addr);
564    dst2.WriteUintToMem(vform, i, addr2);
565    dst3.WriteUintToMem(vform, i, addr3);
566    dst4.WriteUintToMem(vform, i, addr4);
567    addr += 4 * esize;
568    addr2 += 4 * esize;
569    addr3 += 4 * esize;
570    addr4 += 4 * esize;
571  }
572}
573
574void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
575                    LogicVRegister dst3, LogicVRegister dst4, int index,
576                    uint64_t addr) {
577  int esize = LaneSizeInBytesFromFormat(vform);
578  dst.WriteUintToMem(vform, index, addr);
579  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
580  dst3.WriteUintToMem(vform, index, addr + 2 * esize);
581  dst4.WriteUintToMem(vform, index, addr + 3 * esize);
582}
583
584LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
585                              const LogicVRegister& src1,
586                              const LogicVRegister& src2, Condition cond) {
587  dst.ClearForWrite(vform);
588  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
589    int64_t sa = src1.Int(vform, i);
590    int64_t sb = src2.Int(vform, i);
591    uint64_t ua = src1.Uint(vform, i);
592    uint64_t ub = src2.Uint(vform, i);
593    bool result = false;
594    switch (cond) {
595      case eq:
596        result = (ua == ub);
597        break;
598      case ge:
599        result = (sa >= sb);
600        break;
601      case gt:
602        result = (sa > sb);
603        break;
604      case hi:
605        result = (ua > ub);
606        break;
607      case hs:
608        result = (ua >= ub);
609        break;
610      case lt:
611        result = (sa < sb);
612        break;
613      case le:
614        result = (sa <= sb);
615        break;
616      default:
617        UNREACHABLE();
618    }
619    dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
620  }
621  return dst;
622}
623
624LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
625                              const LogicVRegister& src1, int imm,
626                              Condition cond) {
627  SimVRegister temp;
628  LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
629  return cmp(vform, dst, src1, imm_reg, cond);
630}
631
632LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst,
633                                 const LogicVRegister& src1,
634                                 const LogicVRegister& src2) {
635  dst.ClearForWrite(vform);
636  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
637    uint64_t ua = src1.Uint(vform, i);
638    uint64_t ub = src2.Uint(vform, i);
639    dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
640  }
641  return dst;
642}
643
644LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst,
645                              const LogicVRegister& src1,
646                              const LogicVRegister& src2) {
647  int lane_size = LaneSizeInBitsFromFormat(vform);
648  dst.ClearForWrite(vform);
649  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
650    // Test for unsigned saturation.
651    uint64_t ua = src1.UintLeftJustified(vform, i);
652    uint64_t ub = src2.UintLeftJustified(vform, i);
653    uint64_t ur = ua + ub;
654    if (ur < ua) {
655      dst.SetUnsignedSat(i, true);
656    }
657
658    // Test for signed saturation.
659    bool pos_a = (ua >> 63) == 0;
660    bool pos_b = (ub >> 63) == 0;
661    bool pos_r = (ur >> 63) == 0;
662    // If the signs of the operands are the same, but different from the result,
663    // there was an overflow.
664    if ((pos_a == pos_b) && (pos_a != pos_r)) {
665      dst.SetSignedSat(i, pos_a);
666    }
667
668    dst.SetInt(vform, i, ur >> (64 - lane_size));
669  }
670  return dst;
671}
672
673LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
674                               const LogicVRegister& src1,
675                               const LogicVRegister& src2) {
676  SimVRegister temp1, temp2;
677  uzp1(vform, temp1, src1, src2);
678  uzp2(vform, temp2, src1, src2);
679  add(vform, dst, temp1, temp2);
680  return dst;
681}
682
683LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
684                              const LogicVRegister& src1,
685                              const LogicVRegister& src2) {
686  SimVRegister temp;
687  mul(vform, temp, src1, src2);
688  add(vform, dst, dst, temp);
689  return dst;
690}
691
692LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
693                              const LogicVRegister& src1,
694                              const LogicVRegister& src2) {
695  SimVRegister temp;
696  mul(vform, temp, src1, src2);
697  sub(vform, dst, dst, temp);
698  return dst;
699}
700
701LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
702                              const LogicVRegister& src1,
703                              const LogicVRegister& src2) {
704  dst.ClearForWrite(vform);
705  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
706    dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
707  }
708  return dst;
709}
710
711LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
712                              const LogicVRegister& src1,
713                              const LogicVRegister& src2, int index) {
714  SimVRegister temp;
715  VectorFormat indexform = VectorFormatFillQ(vform);
716  return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
717}
718
719LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
720                              const LogicVRegister& src1,
721                              const LogicVRegister& src2, int index) {
722  SimVRegister temp;
723  VectorFormat indexform = VectorFormatFillQ(vform);
724  return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
725}
726
727LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
728                              const LogicVRegister& src1,
729                              const LogicVRegister& src2, int index) {
730  SimVRegister temp;
731  VectorFormat indexform = VectorFormatFillQ(vform);
732  return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
733}
734
735LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
736                                const LogicVRegister& src1,
737                                const LogicVRegister& src2, int index) {
738  SimVRegister temp;
739  VectorFormat indexform =
740      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
741  return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
742}
743
744LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
745                                 const LogicVRegister& src1,
746                                 const LogicVRegister& src2, int index) {
747  SimVRegister temp;
748  VectorFormat indexform =
749      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
750  return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
751}
752
753LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
754                                const LogicVRegister& src1,
755                                const LogicVRegister& src2, int index) {
756  SimVRegister temp;
757  VectorFormat indexform =
758      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
759  return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
760}
761
762LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
763                                 const LogicVRegister& src1,
764                                 const LogicVRegister& src2, int index) {
765  SimVRegister temp;
766  VectorFormat indexform =
767      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
768  return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
769}
770
771LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
772                                const LogicVRegister& src1,
773                                const LogicVRegister& src2, int index) {
774  SimVRegister temp;
775  VectorFormat indexform =
776      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
777  return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
778}
779
780LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
781                                 const LogicVRegister& src1,
782                                 const LogicVRegister& src2, int index) {
783  SimVRegister temp;
784  VectorFormat indexform =
785      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
786  return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
787}
788
789LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
790                                const LogicVRegister& src1,
791                                const LogicVRegister& src2, int index) {
792  SimVRegister temp;
793  VectorFormat indexform =
794      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
795  return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
796}
797
798LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
799                                 const LogicVRegister& src1,
800                                 const LogicVRegister& src2, int index) {
801  SimVRegister temp;
802  VectorFormat indexform =
803      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
804  return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
805}
806
807LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
808                                const LogicVRegister& src1,
809                                const LogicVRegister& src2, int index) {
810  SimVRegister temp;
811  VectorFormat indexform =
812      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
813  return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
814}
815
816LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
817                                 const LogicVRegister& src1,
818                                 const LogicVRegister& src2, int index) {
819  SimVRegister temp;
820  VectorFormat indexform =
821      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
822  return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
823}
824
825LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
826                                const LogicVRegister& src1,
827                                const LogicVRegister& src2, int index) {
828  SimVRegister temp;
829  VectorFormat indexform =
830      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
831  return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
832}
833
834LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
835                                 const LogicVRegister& src1,
836                                 const LogicVRegister& src2, int index) {
837  SimVRegister temp;
838  VectorFormat indexform =
839      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
840  return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
841}
842
843LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
844                                  const LogicVRegister& src1,
845                                  const LogicVRegister& src2, int index) {
846  SimVRegister temp;
847  VectorFormat indexform =
848      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
849  return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
850}
851
852LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
853                                   const LogicVRegister& src1,
854                                   const LogicVRegister& src2, int index) {
855  SimVRegister temp;
856  VectorFormat indexform =
857      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
858  return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
859}
860
861LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
862                                  const LogicVRegister& src1,
863                                  const LogicVRegister& src2, int index) {
864  SimVRegister temp;
865  VectorFormat indexform =
866      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
867  return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
868}
869
870LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
871                                   const LogicVRegister& src1,
872                                   const LogicVRegister& src2, int index) {
873  SimVRegister temp;
874  VectorFormat indexform =
875      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
876  return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
877}
878
879LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
880                                  const LogicVRegister& src1,
881                                  const LogicVRegister& src2, int index) {
882  SimVRegister temp;
883  VectorFormat indexform =
884      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
885  return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
886}
887
888LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
889                                   const LogicVRegister& src1,
890                                   const LogicVRegister& src2, int index) {
891  SimVRegister temp;
892  VectorFormat indexform =
893      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
894  return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
895}
896
897LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
898                                  const LogicVRegister& src1,
899                                  const LogicVRegister& src2, int index) {
900  SimVRegister temp;
901  VectorFormat indexform = VectorFormatFillQ(vform);
902  return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
903}
904
905LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
906                                   const LogicVRegister& src1,
907                                   const LogicVRegister& src2, int index) {
908  SimVRegister temp;
909  VectorFormat indexform = VectorFormatFillQ(vform);
910  return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
911}
912
913uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
914  uint16_t result = 0;
915  uint16_t extended_op2 = op2;
916  for (int i = 0; i < 8; ++i) {
917    if ((op1 >> i) & 1) {
918      result = result ^ (extended_op2 << i);
919    }
920  }
921  return result;
922}
923
924LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst,
925                               const LogicVRegister& src1,
926                               const LogicVRegister& src2) {
927  dst.ClearForWrite(vform);
928  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
929    dst.SetUint(vform, i,
930                PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
931  }
932  return dst;
933}
934
935LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst,
936                                const LogicVRegister& src1,
937                                const LogicVRegister& src2) {
938  VectorFormat vform_src = VectorFormatHalfWidth(vform);
939  dst.ClearForWrite(vform);
940  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
941    dst.SetUint(
942        vform, i,
943        PolynomialMult(src1.Uint(vform_src, i), src2.Uint(vform_src, i)));
944  }
945  return dst;
946}
947
948LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst,
949                                 const LogicVRegister& src1,
950                                 const LogicVRegister& src2) {
951  VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
952  dst.ClearForWrite(vform);
953  int lane_count = LaneCountFromFormat(vform);
954  for (int i = 0; i < lane_count; i++) {
955    dst.SetUint(vform, i,
956                PolynomialMult(src1.Uint(vform_src, lane_count + i),
957                               src2.Uint(vform_src, lane_count + i)));
958  }
959  return dst;
960}
961
962LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst,
963                              const LogicVRegister& src1,
964                              const LogicVRegister& src2) {
965  int lane_size = LaneSizeInBitsFromFormat(vform);
966  dst.ClearForWrite(vform);
967  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
968    // Test for unsigned saturation.
969    uint64_t ua = src1.UintLeftJustified(vform, i);
970    uint64_t ub = src2.UintLeftJustified(vform, i);
971    uint64_t ur = ua - ub;
972    if (ub > ua) {
973      dst.SetUnsignedSat(i, false);
974    }
975
976    // Test for signed saturation.
977    bool pos_a = (ua >> 63) == 0;
978    bool pos_b = (ub >> 63) == 0;
979    bool pos_r = (ur >> 63) == 0;
980    // If the signs of the operands are different, and the sign of the first
981    // operand doesn't match the result, there was an overflow.
982    if ((pos_a != pos_b) && (pos_a != pos_r)) {
983      dst.SetSignedSat(i, pos_a);
984    }
985
986    dst.SetInt(vform, i, ur >> (64 - lane_size));
987  }
988  return dst;
989}
990
991LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst,
992                               const LogicVRegister& src1,
993                               const LogicVRegister& src2) {
994  dst.ClearForWrite(vform);
995  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
996    dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
997  }
998  return dst;
999}
1000
1001LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
1002                              const LogicVRegister& src1,
1003                              const LogicVRegister& src2) {
1004  dst.ClearForWrite(vform);
1005  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1006    dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1007  }
1008  return dst;
1009}
1010
1011LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst,
1012                              const LogicVRegister& src1,
1013                              const LogicVRegister& src2) {
1014  dst.ClearForWrite(vform);
1015  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1016    dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1017  }
1018  return dst;
1019}
1020
1021LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst,
1022                              const LogicVRegister& src1,
1023                              const LogicVRegister& src2) {
1024  dst.ClearForWrite(vform);
1025  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1026    dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1027  }
1028  return dst;
1029}
1030
1031LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1032                              const LogicVRegister& src1,
1033                              const LogicVRegister& src2) {
1034  dst.ClearForWrite(vform);
1035  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1036    dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1037  }
1038  return dst;
1039}
1040
1041LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1042                              const LogicVRegister& src, uint64_t imm) {
1043  uint64_t result[16];
1044  int laneCount = LaneCountFromFormat(vform);
1045  for (int i = 0; i < laneCount; ++i) {
1046    result[i] = src.Uint(vform, i) & ~imm;
1047  }
1048  dst.SetUintArray(vform, result);
1049  return dst;
1050}
1051
1052LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst,
1053                              const LogicVRegister& src1,
1054                              const LogicVRegister& src2) {
1055  dst.ClearForWrite(vform);
1056  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1057    uint64_t operand1 = dst.Uint(vform, i);
1058    uint64_t operand2 = ~src2.Uint(vform, i);
1059    uint64_t operand3 = src1.Uint(vform, i);
1060    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1061    dst.SetUint(vform, i, result);
1062  }
1063  return dst;
1064}
1065
1066LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst,
1067                              const LogicVRegister& src1,
1068                              const LogicVRegister& src2) {
1069  dst.ClearForWrite(vform);
1070  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1071    uint64_t operand1 = dst.Uint(vform, i);
1072    uint64_t operand2 = src2.Uint(vform, i);
1073    uint64_t operand3 = src1.Uint(vform, i);
1074    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1075    dst.SetUint(vform, i, result);
1076  }
1077  return dst;
1078}
1079
1080LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst,
1081                              const LogicVRegister& src1,
1082                              const LogicVRegister& src2) {
1083  dst.ClearForWrite(vform);
1084  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1085    uint64_t operand1 = src2.Uint(vform, i);
1086    uint64_t operand2 = dst.Uint(vform, i);
1087    uint64_t operand3 = src1.Uint(vform, i);
1088    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1089    dst.SetUint(vform, i, result);
1090  }
1091  return dst;
1092}
1093
1094LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst,
1095                                  const LogicVRegister& src1,
1096                                  const LogicVRegister& src2, bool max) {
1097  dst.ClearForWrite(vform);
1098  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1099    int64_t src1_val = src1.Int(vform, i);
1100    int64_t src2_val = src2.Int(vform, i);
1101    int64_t dst_val;
1102    if (max) {
1103      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1104    } else {
1105      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1106    }
1107    dst.SetInt(vform, i, dst_val);
1108  }
1109  return dst;
1110}
1111
1112LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst,
1113                               const LogicVRegister& src1,
1114                               const LogicVRegister& src2) {
1115  return SMinMax(vform, dst, src1, src2, true);
1116}
1117
1118LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst,
1119                               const LogicVRegister& src1,
1120                               const LogicVRegister& src2) {
1121  return SMinMax(vform, dst, src1, src2, false);
1122}
1123
1124LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst,
1125                                   const LogicVRegister& src1,
1126                                   const LogicVRegister& src2, bool max) {
1127  int lanes = LaneCountFromFormat(vform);
1128  int64_t result[kMaxLanesPerVector];
1129  const LogicVRegister* src = &src1;
1130  for (int j = 0; j < 2; j++) {
1131    for (int i = 0; i < lanes; i += 2) {
1132      int64_t first_val = src->Int(vform, i);
1133      int64_t second_val = src->Int(vform, i + 1);
1134      int64_t dst_val;
1135      if (max) {
1136        dst_val = (first_val > second_val) ? first_val : second_val;
1137      } else {
1138        dst_val = (first_val < second_val) ? first_val : second_val;
1139      }
1140      DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1141      result[(i >> 1) + (j * lanes / 2)] = dst_val;
1142    }
1143    src = &src2;
1144  }
1145  dst.SetIntArray(vform, result);
1146  return dst;
1147}
1148
1149LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst,
1150                                const LogicVRegister& src1,
1151                                const LogicVRegister& src2) {
1152  return SMinMaxP(vform, dst, src1, src2, true);
1153}
1154
1155LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst,
1156                                const LogicVRegister& src1,
1157                                const LogicVRegister& src2) {
1158  return SMinMaxP(vform, dst, src1, src2, false);
1159}
1160
1161LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
1162                               const LogicVRegister& src) {
1163  DCHECK_EQ(vform, kFormatD);
1164
1165  uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1166  dst.ClearForWrite(vform);
1167  dst.SetUint(vform, 0, dst_val);
1168  return dst;
1169}
1170
1171LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst,
1172                               const LogicVRegister& src) {
1173  VectorFormat vform_dst =
1174      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1175
1176  int64_t dst_val = 0;
1177  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1178    dst_val += src.Int(vform, i);
1179  }
1180
1181  dst.ClearForWrite(vform_dst);
1182  dst.SetInt(vform_dst, 0, dst_val);
1183  return dst;
1184}
1185
1186LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst,
1187                                 const LogicVRegister& src) {
1188  VectorFormat vform_dst =
1189      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1190
1191  int64_t dst_val = 0;
1192  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1193    dst_val += src.Int(vform, i);
1194  }
1195
1196  dst.ClearForWrite(vform_dst);
1197  dst.SetInt(vform_dst, 0, dst_val);
1198  return dst;
1199}
1200
1201LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst,
1202                                 const LogicVRegister& src) {
1203  VectorFormat vform_dst =
1204      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1205
1206  uint64_t dst_val = 0;
1207  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1208    dst_val += src.Uint(vform, i);
1209  }
1210
1211  dst.ClearForWrite(vform_dst);
1212  dst.SetUint(vform_dst, 0, dst_val);
1213  return dst;
1214}
1215
1216LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst,
1217                                   const LogicVRegister& src, bool max) {
1218  int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1219  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1220    int64_t src_val = src.Int(vform, i);
1221    if (max) {
1222      dst_val = (src_val > dst_val) ? src_val : dst_val;
1223    } else {
1224      dst_val = (src_val < dst_val) ? src_val : dst_val;
1225    }
1226  }
1227  dst.ClearForWrite(ScalarFormatFromFormat(vform));
1228  dst.SetInt(vform, 0, dst_val);
1229  return dst;
1230}
1231
1232LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst,
1233                                const LogicVRegister& src) {
1234  SMinMaxV(vform, dst, src, true);
1235  return dst;
1236}
1237
1238LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst,
1239                                const LogicVRegister& src) {
1240  SMinMaxV(vform, dst, src, false);
1241  return dst;
1242}
1243
1244LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst,
1245                                  const LogicVRegister& src1,
1246                                  const LogicVRegister& src2, bool max) {
1247  dst.ClearForWrite(vform);
1248  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1249    uint64_t src1_val = src1.Uint(vform, i);
1250    uint64_t src2_val = src2.Uint(vform, i);
1251    uint64_t dst_val;
1252    if (max) {
1253      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1254    } else {
1255      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1256    }
1257    dst.SetUint(vform, i, dst_val);
1258  }
1259  return dst;
1260}
1261
1262LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst,
1263                               const LogicVRegister& src1,
1264                               const LogicVRegister& src2) {
1265  return UMinMax(vform, dst, src1, src2, true);
1266}
1267
1268LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst,
1269                               const LogicVRegister& src1,
1270                               const LogicVRegister& src2) {
1271  return UMinMax(vform, dst, src1, src2, false);
1272}
1273
1274LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst,
1275                                   const LogicVRegister& src1,
1276                                   const LogicVRegister& src2, bool max) {
1277  int lanes = LaneCountFromFormat(vform);
1278  uint64_t result[kMaxLanesPerVector];
1279  const LogicVRegister* src = &src1;
1280  for (int j = 0; j < 2; j++) {
1281    for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1282      uint64_t first_val = src->Uint(vform, i);
1283      uint64_t second_val = src->Uint(vform, i + 1);
1284      uint64_t dst_val;
1285      if (max) {
1286        dst_val = (first_val > second_val) ? first_val : second_val;
1287      } else {
1288        dst_val = (first_val < second_val) ? first_val : second_val;
1289      }
1290      DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1291      result[(i >> 1) + (j * lanes / 2)] = dst_val;
1292    }
1293    src = &src2;
1294  }
1295  dst.SetUintArray(vform, result);
1296  return dst;
1297}
1298
1299LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst,
1300                                const LogicVRegister& src1,
1301                                const LogicVRegister& src2) {
1302  return UMinMaxP(vform, dst, src1, src2, true);
1303}
1304
1305LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst,
1306                                const LogicVRegister& src1,
1307                                const LogicVRegister& src2) {
1308  return UMinMaxP(vform, dst, src1, src2, false);
1309}
1310
1311LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst,
1312                                   const LogicVRegister& src, bool max) {
1313  uint64_t dst_val = max ? 0 : UINT64_MAX;
1314  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1315    uint64_t src_val = src.Uint(vform, i);
1316    if (max) {
1317      dst_val = (src_val > dst_val) ? src_val : dst_val;
1318    } else {
1319      dst_val = (src_val < dst_val) ? src_val : dst_val;
1320    }
1321  }
1322  dst.ClearForWrite(ScalarFormatFromFormat(vform));
1323  dst.SetUint(vform, 0, dst_val);
1324  return dst;
1325}
1326
1327LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst,
1328                                const LogicVRegister& src) {
1329  UMinMaxV(vform, dst, src, true);
1330  return dst;
1331}
1332
1333LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst,
1334                                const LogicVRegister& src) {
1335  UMinMaxV(vform, dst, src, false);
1336  return dst;
1337}
1338
1339LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst,
1340                              const LogicVRegister& src, int shift) {
1341  DCHECK_GE(shift, 0);
1342  SimVRegister temp;
1343  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1344  return ushl(vform, dst, src, shiftreg);
1345}
1346
1347LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst,
1348                                const LogicVRegister& src, int shift) {
1349  DCHECK_GE(shift, 0);
1350  SimVRegister temp1, temp2;
1351  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1352  LogicVRegister extendedreg = sxtl(vform, temp2, src);
1353  return sshl(vform, dst, extendedreg, shiftreg);
1354}
1355
1356LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst,
1357                                 const LogicVRegister& src, int shift) {
1358  DCHECK_GE(shift, 0);
1359  SimVRegister temp1, temp2;
1360  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1361  LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1362  return sshl(vform, dst, extendedreg, shiftreg);
1363}
1364
1365LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst,
1366                               const LogicVRegister& src) {
1367  int shift = LaneSizeInBitsFromFormat(vform) / 2;
1368  return sshll(vform, dst, src, shift);
1369}
1370
1371LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst,
1372                                const LogicVRegister& src) {
1373  int shift = LaneSizeInBitsFromFormat(vform) / 2;
1374  return sshll2(vform, dst, src, shift);
1375}
1376
1377LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst,
1378                                const LogicVRegister& src, int shift) {
1379  DCHECK_GE(shift, 0);
1380  SimVRegister temp1, temp2;
1381  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1382  LogicVRegister extendedreg = uxtl(vform, temp2, src);
1383  return ushl(vform, dst, extendedreg, shiftreg);
1384}
1385
1386LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst,
1387                                 const LogicVRegister& src, int shift) {
1388  DCHECK_GE(shift, 0);
1389  SimVRegister temp1, temp2;
1390  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1391  LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1392  return ushl(vform, dst, extendedreg, shiftreg);
1393}
1394
1395LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst,
1396                              const LogicVRegister& src, int shift) {
1397  dst.ClearForWrite(vform);
1398  int laneCount = LaneCountFromFormat(vform);
1399  for (int i = 0; i < laneCount; i++) {
1400    uint64_t src_lane = src.Uint(vform, i);
1401    uint64_t dst_lane = dst.Uint(vform, i);
1402    uint64_t shifted = src_lane << shift;
1403    uint64_t mask = MaxUintFromFormat(vform) << shift;
1404    dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1405  }
1406  return dst;
1407}
1408
1409LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst,
1410                                const LogicVRegister& src, int shift) {
1411  DCHECK_GE(shift, 0);
1412  SimVRegister temp;
1413  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1414  return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1415}
1416
1417LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst,
1418                                const LogicVRegister& src, int shift) {
1419  DCHECK_GE(shift, 0);
1420  SimVRegister temp;
1421  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1422  return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1423}
1424
1425LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst,
1426                                 const LogicVRegister& src, int shift) {
1427  DCHECK_GE(shift, 0);
1428  SimVRegister temp;
1429  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1430  return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1431}
1432
1433LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst,
1434                              const LogicVRegister& src, int shift) {
1435  dst.ClearForWrite(vform);
1436  int laneCount = LaneCountFromFormat(vform);
1437  DCHECK((shift > 0) &&
1438         (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1439  for (int i = 0; i < laneCount; i++) {
1440    uint64_t src_lane = src.Uint(vform, i);
1441    uint64_t dst_lane = dst.Uint(vform, i);
1442    uint64_t shifted;
1443    uint64_t mask;
1444    if (shift == 64) {
1445      shifted = 0;
1446      mask = 0;
1447    } else {
1448      shifted = src_lane >> shift;
1449      mask = MaxUintFromFormat(vform) >> shift;
1450    }
1451    dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1452  }
1453  return dst;
1454}
1455
1456LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst,
1457                               const LogicVRegister& src, int shift) {
1458  DCHECK_GE(shift, 0);
1459  SimVRegister temp;
1460  LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1461  return ushl(vform, dst, src, shiftreg);
1462}
1463
1464LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst,
1465                               const LogicVRegister& src, int shift) {
1466  DCHECK_GE(shift, 0);
1467  SimVRegister temp;
1468  LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1469  return sshl(vform, dst, src, shiftreg);
1470}
1471
1472LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst,
1473                               const LogicVRegister& src, int shift) {
1474  SimVRegister temp;
1475  LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1476  return add(vform, dst, dst, shifted_reg);
1477}
1478
1479LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst,
1480                               const LogicVRegister& src, int shift) {
1481  SimVRegister temp;
1482  LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1483  return add(vform, dst, dst, shifted_reg);
1484}
1485
1486LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst,
1487                                const LogicVRegister& src, int shift) {
1488  SimVRegister temp;
1489  LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1490  return add(vform, dst, dst, shifted_reg);
1491}
1492
1493LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst,
1494                                const LogicVRegister& src, int shift) {
1495  SimVRegister temp;
1496  LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1497  return add(vform, dst, dst, shifted_reg);
1498}
1499
1500LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst,
1501                              const LogicVRegister& src) {
1502  uint64_t result[16];
1503  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1504  int laneCount = LaneCountFromFormat(vform);
1505  for (int i = 0; i < laneCount; i++) {
1506    result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1507  }
1508
1509  dst.SetUintArray(vform, result);
1510  return dst;
1511}
1512
1513LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst,
1514                              const LogicVRegister& src) {
1515  uint64_t result[16];
1516  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1517  int laneCount = LaneCountFromFormat(vform);
1518  for (int i = 0; i < laneCount; i++) {
1519    result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1520  }
1521
1522  dst.SetUintArray(vform, result);
1523  return dst;
1524}
1525
1526LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst,
1527                              const LogicVRegister& src) {
1528  uint64_t result[16];
1529  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1530  int laneCount = LaneCountFromFormat(vform);
1531  for (int i = 0; i < laneCount; i++) {
1532    uint64_t value = src.Uint(vform, i);
1533    result[i] = 0;
1534    for (int j = 0; j < laneSizeInBits; j++) {
1535      result[i] += (value & 1);
1536      value >>= 1;
1537    }
1538  }
1539
1540  dst.SetUintArray(vform, result);
1541  return dst;
1542}
1543
1544LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst,
1545                               const LogicVRegister& src1,
1546                               const LogicVRegister& src2) {
1547  dst.ClearForWrite(vform);
1548  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1549    int8_t shift_val = src2.Int(vform, i);
1550    int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1551
1552    // Set signed saturation state.
1553    if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&
1554        (lj_src_val != 0)) {
1555      dst.SetSignedSat(i, lj_src_val >= 0);
1556    }
1557
1558    // Set unsigned saturation state.
1559    if (lj_src_val < 0) {
1560      dst.SetUnsignedSat(i, false);
1561    } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&
1562               (lj_src_val != 0)) {
1563      dst.SetUnsignedSat(i, true);
1564    }
1565
1566    int64_t src_val = src1.Int(vform, i);
1567    bool src_is_negative = src_val < 0;
1568    if (shift_val > 63) {
1569      dst.SetInt(vform, i, 0);
1570    } else if (shift_val < -63) {
1571      dst.SetRounding(i, src_is_negative);
1572      dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1573    } else {
1574      // Use unsigned types for shifts, as behaviour is undefined for signed
1575      // lhs.
1576      uint64_t usrc_val = static_cast<uint64_t>(src_val);
1577
1578      if (shift_val < 0) {
1579        // Convert to right shift.
1580        shift_val = -shift_val;
1581
1582        // Set rounding state by testing most-significant bit shifted out.
1583        // Rounding only needed on right shifts.
1584        if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1585          dst.SetRounding(i, true);
1586        }
1587
1588        usrc_val >>= shift_val;
1589
1590        if (src_is_negative) {
1591          // Simulate sign-extension.
1592          usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1593        }
1594      } else {
1595        usrc_val <<= shift_val;
1596      }
1597      dst.SetUint(vform, i, usrc_val);
1598    }
1599  }
1600  return dst;
1601}
1602
1603LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst,
1604                               const LogicVRegister& src1,
1605                               const LogicVRegister& src2) {
1606  dst.ClearForWrite(vform);
1607  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1608    int8_t shift_val = src2.Int(vform, i);
1609    uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1610
1611    // Set saturation state.
1612    if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {
1613      dst.SetUnsignedSat(i, true);
1614    }
1615
1616    uint64_t src_val = src1.Uint(vform, i);
1617    if ((shift_val > 63) || (shift_val < -64)) {
1618      dst.SetUint(vform, i, 0);
1619    } else {
1620      if (shift_val < 0) {
1621        // Set rounding state. Rounding only needed on right shifts.
1622        if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1623          dst.SetRounding(i, true);
1624        }
1625
1626        if (shift_val == -64) {
1627          src_val = 0;
1628        } else {
1629          src_val >>= -shift_val;
1630        }
1631      } else {
1632        src_val <<= shift_val;
1633      }
1634      dst.SetUint(vform, i, src_val);
1635    }
1636  }
1637  return dst;
1638}
1639
1640LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst,
1641                              const LogicVRegister& src) {
1642  dst.ClearForWrite(vform);
1643  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1644    // Test for signed saturation.
1645    int64_t sa = src.Int(vform, i);
1646    if (sa == MinIntFromFormat(vform)) {
1647      dst.SetSignedSat(i, true);
1648    }
1649    dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1650  }
1651  return dst;
1652}
1653
1654LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst,
1655                                 const LogicVRegister& src) {
1656  dst.ClearForWrite(vform);
1657  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1658    int64_t sa = dst.IntLeftJustified(vform, i);
1659    uint64_t ub = src.UintLeftJustified(vform, i);
1660    uint64_t ur = sa + ub;
1661
1662    int64_t sr = bit_cast<int64_t>(ur);
1663    if (sr < sa) {  // Test for signed positive saturation.
1664      dst.SetInt(vform, i, MaxIntFromFormat(vform));
1665    } else {
1666      dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
1667    }
1668  }
1669  return dst;
1670}
1671
1672LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst,
1673                                 const LogicVRegister& src) {
1674  dst.ClearForWrite(vform);
1675  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1676    uint64_t ua = dst.UintLeftJustified(vform, i);
1677    int64_t sb = src.IntLeftJustified(vform, i);
1678    uint64_t ur = ua + sb;
1679
1680    if ((sb > 0) && (ur <= ua)) {
1681      dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
1682    } else if ((sb < 0) && (ur >= ua)) {
1683      dst.SetUint(vform, i, 0);  // Negative saturation.
1684    } else {
1685      dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
1686    }
1687  }
1688  return dst;
1689}
1690
1691LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst,
1692                              const LogicVRegister& src) {
1693  dst.ClearForWrite(vform);
1694  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1695    // Test for signed saturation.
1696    int64_t sa = src.Int(vform, i);
1697    if (sa == MinIntFromFormat(vform)) {
1698      dst.SetSignedSat(i, true);
1699    }
1700    if (sa < 0) {
1701      dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1702    } else {
1703      dst.SetInt(vform, i, sa);
1704    }
1705  }
1706  return dst;
1707}
1708
1709LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform,
1710                                        LogicVRegister dst, bool dstIsSigned,
1711                                        const LogicVRegister& src,
1712                                        bool srcIsSigned) {
1713  bool upperhalf = false;
1714  VectorFormat srcform = kFormatUndefined;
1715  int64_t ssrc[8];
1716  uint64_t usrc[8];
1717
1718  switch (dstform) {
1719    case kFormat8B:
1720      upperhalf = false;
1721      srcform = kFormat8H;
1722      break;
1723    case kFormat16B:
1724      upperhalf = true;
1725      srcform = kFormat8H;
1726      break;
1727    case kFormat4H:
1728      upperhalf = false;
1729      srcform = kFormat4S;
1730      break;
1731    case kFormat8H:
1732      upperhalf = true;
1733      srcform = kFormat4S;
1734      break;
1735    case kFormat2S:
1736      upperhalf = false;
1737      srcform = kFormat2D;
1738      break;
1739    case kFormat4S:
1740      upperhalf = true;
1741      srcform = kFormat2D;
1742      break;
1743    case kFormatB:
1744      upperhalf = false;
1745      srcform = kFormatH;
1746      break;
1747    case kFormatH:
1748      upperhalf = false;
1749      srcform = kFormatS;
1750      break;
1751    case kFormatS:
1752      upperhalf = false;
1753      srcform = kFormatD;
1754      break;
1755    default:
1756      UNIMPLEMENTED();
1757  }
1758
1759  for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1760    ssrc[i] = src.Int(srcform, i);
1761    usrc[i] = src.Uint(srcform, i);
1762  }
1763
1764  int offset;
1765  if (upperhalf) {
1766    offset = LaneCountFromFormat(dstform) / 2;
1767  } else {
1768    offset = 0;
1769    dst.ClearForWrite(dstform);
1770  }
1771
1772  for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1773    // Test for signed saturation
1774    if (ssrc[i] > MaxIntFromFormat(dstform)) {
1775      dst.SetSignedSat(offset + i, true);
1776    } else if (ssrc[i] < MinIntFromFormat(dstform)) {
1777      dst.SetSignedSat(offset + i, false);
1778    }
1779
1780    // Test for unsigned saturation
1781    if (srcIsSigned) {
1782      if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
1783        dst.SetUnsignedSat(offset + i, true);
1784      } else if (ssrc[i] < 0) {
1785        dst.SetUnsignedSat(offset + i, false);
1786      }
1787    } else {
1788      if (usrc[i] > MaxUintFromFormat(dstform)) {
1789        dst.SetUnsignedSat(offset + i, true);
1790      }
1791    }
1792
1793    int64_t result;
1794    if (srcIsSigned) {
1795      result = ssrc[i] & MaxUintFromFormat(dstform);
1796    } else {
1797      result = usrc[i] & MaxUintFromFormat(dstform);
1798    }
1799
1800    if (dstIsSigned) {
1801      dst.SetInt(dstform, offset + i, result);
1802    } else {
1803      dst.SetUint(dstform, offset + i, result);
1804    }
1805  }
1806  return dst;
1807}
1808
1809LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst,
1810                              const LogicVRegister& src) {
1811  return ExtractNarrow(vform, dst, true, src, true);
1812}
1813
1814LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst,
1815                                const LogicVRegister& src) {
1816  return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform);
1817}
1818
1819LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst,
1820                                 const LogicVRegister& src) {
1821  return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
1822}
1823
1824LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst,
1825                                const LogicVRegister& src) {
1826  return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
1827}
1828
1829LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst,
1830                                  const LogicVRegister& src1,
1831                                  const LogicVRegister& src2, bool issigned) {
1832  dst.ClearForWrite(vform);
1833  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1834    if (issigned) {
1835      int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
1836      sr = sr > 0 ? sr : -sr;
1837      dst.SetInt(vform, i, sr);
1838    } else {
1839      int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
1840      sr = sr > 0 ? sr : -sr;
1841      dst.SetUint(vform, i, sr);
1842    }
1843  }
1844  return dst;
1845}
1846
1847LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst,
1848                               const LogicVRegister& src1,
1849                               const LogicVRegister& src2) {
1850  SimVRegister temp;
1851  dst.ClearForWrite(vform);
1852  AbsDiff(vform, temp, src1, src2, true);
1853  add(vform, dst, dst, temp);
1854  return dst;
1855}
1856
1857LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst,
1858                               const LogicVRegister& src1,
1859                               const LogicVRegister& src2) {
1860  SimVRegister temp;
1861  dst.ClearForWrite(vform);
1862  AbsDiff(vform, temp, src1, src2, false);
1863  add(vform, dst, dst, temp);
1864  return dst;
1865}
1866
1867LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst,
1868                               const LogicVRegister& src) {
1869  dst.ClearForWrite(vform);
1870  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1871    dst.SetUint(vform, i, ~src.Uint(vform, i));
1872  }
1873  return dst;
1874}
1875
1876LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst,
1877                               const LogicVRegister& src) {
1878  uint64_t result[16];
1879  int laneCount = LaneCountFromFormat(vform);
1880  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1881  uint64_t reversed_value;
1882  uint64_t value;
1883  for (int i = 0; i < laneCount; i++) {
1884    value = src.Uint(vform, i);
1885    reversed_value = 0;
1886    for (int j = 0; j < laneSizeInBits; j++) {
1887      reversed_value = (reversed_value << 1) | (value & 1);
1888      value >>= 1;
1889    }
1890    result[i] = reversed_value;
1891  }
1892
1893  dst.SetUintArray(vform, result);
1894  return dst;
1895}
1896
1897LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst,
1898                              const LogicVRegister& src, int revSize) {
1899  uint64_t result[16];
1900  int laneCount = LaneCountFromFormat(vform);
1901  int laneSize = LaneSizeInBytesFromFormat(vform);
1902  int lanesPerLoop = revSize / laneSize;
1903  for (int i = 0; i < laneCount; i += lanesPerLoop) {
1904    for (int j = 0; j < lanesPerLoop; j++) {
1905      result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
1906    }
1907  }
1908  dst.SetUintArray(vform, result);
1909  return dst;
1910}
1911
1912LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst,
1913                                const LogicVRegister& src) {
1914  return rev(vform, dst, src, 2);
1915}
1916
1917LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst,
1918                                const LogicVRegister& src) {
1919  return rev(vform, dst, src, 4);
1920}
1921
1922LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst,
1923                                const LogicVRegister& src) {
1924  return rev(vform, dst, src, 8);
1925}
1926
1927LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst,
1928                                const LogicVRegister& src, bool is_signed,
1929                                bool do_accumulate) {
1930  VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
1931  DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U);
1932  DCHECK_LE(LaneCountFromFormat(vform), 8);
1933
1934  uint64_t result[8];
1935  int lane_count = LaneCountFromFormat(vform);
1936  for (int i = 0; i < lane_count; i++) {
1937    if (is_signed) {
1938      result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
1939                                        src.Int(vformsrc, 2 * i + 1));
1940    } else {
1941      result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
1942    }
1943  }
1944
1945  dst.ClearForWrite(vform);
1946  for (int i = 0; i < lane_count; ++i) {
1947    if (do_accumulate) {
1948      result[i] += dst.Uint(vform, i);
1949    }
1950    dst.SetUint(vform, i, result[i]);
1951  }
1952
1953  return dst;
1954}
1955
1956LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst,
1957                                 const LogicVRegister& src) {
1958  return addlp(vform, dst, src, true, false);
1959}
1960
1961LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst,
1962                                 const LogicVRegister& src) {
1963  return addlp(vform, dst, src, false, false);
1964}
1965
1966LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst,
1967                                 const LogicVRegister& src) {
1968  return addlp(vform, dst, src, true, true);
1969}
1970
1971LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst,
1972                                 const LogicVRegister& src) {
1973  return addlp(vform, dst, src, false, true);
1974}
1975
1976LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst,
1977                              const LogicVRegister& src1,
1978                              const LogicVRegister& src2, int index) {
1979  uint8_t result[16];
1980  int laneCount = LaneCountFromFormat(vform);
1981  for (int i = 0; i < laneCount - index; ++i) {
1982    result[i] = src1.Uint(vform, i + index);
1983  }
1984  for (int i = 0; i < index; ++i) {
1985    result[laneCount - index + i] = src2.Uint(vform, i);
1986  }
1987  dst.ClearForWrite(vform);
1988  for (int i = 0; i < laneCount; ++i) {
1989    dst.SetUint(vform, i, result[i]);
1990  }
1991  return dst;
1992}
1993
1994LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst,
1995                                      const LogicVRegister& src,
1996                                      int src_index) {
1997  int laneCount = LaneCountFromFormat(vform);
1998  uint64_t value = src.Uint(vform, src_index);
1999  dst.ClearForWrite(vform);
2000  for (int i = 0; i < laneCount; ++i) {
2001    dst.SetUint(vform, i, value);
2002  }
2003  return dst;
2004}
2005
2006LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst,
2007                                        uint64_t imm) {
2008  int laneCount = LaneCountFromFormat(vform);
2009  uint64_t value = imm & MaxUintFromFormat(vform);
2010  dst.ClearForWrite(vform);
2011  for (int i = 0; i < laneCount; ++i) {
2012    dst.SetUint(vform, i, value);
2013  }
2014  return dst;
2015}
2016
2017LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst,
2018                                      int dst_index, const LogicVRegister& src,
2019                                      int src_index) {
2020  dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2021  return dst;
2022}
2023
2024LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst,
2025                                        int dst_index, uint64_t imm) {
2026  uint64_t value = imm & MaxUintFromFormat(vform);
2027  dst.SetUint(vform, dst_index, value);
2028  return dst;
2029}
2030
2031LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst,
2032                               uint64_t imm) {
2033  int laneCount = LaneCountFromFormat(vform);
2034  dst.ClearForWrite(vform);
2035  for (int i = 0; i < laneCount; ++i) {
2036    dst.SetUint(vform, i, imm);
2037  }
2038  return dst;
2039}
2040
2041LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst,
2042                               uint64_t imm) {
2043  int laneCount = LaneCountFromFormat(vform);
2044  dst.ClearForWrite(vform);
2045  for (int i = 0; i < laneCount; ++i) {
2046    dst.SetUint(vform, i, ~imm);
2047  }
2048  return dst;
2049}
2050
2051LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
2052                              const LogicVRegister& src, uint64_t imm) {
2053  uint64_t result[16];
2054  int laneCount = LaneCountFromFormat(vform);
2055  for (int i = 0; i < laneCount; ++i) {
2056    result[i] = src.Uint(vform, i) | imm;
2057  }
2058  dst.SetUintArray(vform, result);
2059  return dst;
2060}
2061
2062LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst,
2063                               const LogicVRegister& src) {
2064  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2065
2066  dst.ClearForWrite(vform);
2067  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2068    dst.SetUint(vform, i, src.Uint(vform_half, i));
2069  }
2070  return dst;
2071}
2072
2073LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst,
2074                               const LogicVRegister& src) {
2075  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2076
2077  dst.ClearForWrite(vform);
2078  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2079    dst.SetInt(vform, i, src.Int(vform_half, i));
2080  }
2081  return dst;
2082}
2083
2084LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst,
2085                                const LogicVRegister& src) {
2086  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2087  int lane_count = LaneCountFromFormat(vform);
2088
2089  dst.ClearForWrite(vform);
2090  for (int i = 0; i < lane_count; i++) {
2091    dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2092  }
2093  return dst;
2094}
2095
2096LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst,
2097                                const LogicVRegister& src) {
2098  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2099  int lane_count = LaneCountFromFormat(vform);
2100
2101  dst.ClearForWrite(vform);
2102  for (int i = 0; i < lane_count; i++) {
2103    dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2104  }
2105  return dst;
2106}
2107
2108LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst,
2109                               const LogicVRegister& src, int shift) {
2110  SimVRegister temp;
2111  VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2112  VectorFormat vform_dst = vform;
2113  LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2114  return ExtractNarrow(vform_dst, dst, false, shifted_src, false);
2115}
2116
2117LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst,
2118                                const LogicVRegister& src, int shift) {
2119  SimVRegister temp;
2120  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2121  VectorFormat vformdst = vform;
2122  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2123  return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2124}
2125
2126LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst,
2127                                const LogicVRegister& src, int shift) {
2128  SimVRegister temp;
2129  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2130  VectorFormat vformdst = vform;
2131  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2132  return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2133}
2134
2135LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst,
2136                                 const LogicVRegister& src, int shift) {
2137  SimVRegister temp;
2138  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2139  VectorFormat vformdst = vform;
2140  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2141  return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2142}
2143
2144LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst,
2145                                const LogicVRegister& ind,
2146                                bool zero_out_of_bounds,
2147                                const LogicVRegister* tab1,
2148                                const LogicVRegister* tab2,
2149                                const LogicVRegister* tab3,
2150                                const LogicVRegister* tab4) {
2151  DCHECK_NOT_NULL(tab1);
2152  const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2153  uint64_t result[kMaxLanesPerVector];
2154  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2155    result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
2156  }
2157  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2158    uint64_t j = ind.Uint(vform, i);
2159    int tab_idx = static_cast<int>(j >> 4);
2160    int j_idx = static_cast<int>(j & 15);
2161    if ((tab_idx < 4) && (tab[tab_idx] != nullptr)) {
2162      result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
2163    }
2164  }
2165  dst.SetUintArray(vform, result);
2166  return dst;
2167}
2168
2169LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2170                              const LogicVRegister& tab,
2171                              const LogicVRegister& ind) {
2172  return Table(vform, dst, ind, true, &tab);
2173}
2174
2175LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2176                              const LogicVRegister& tab,
2177                              const LogicVRegister& tab2,
2178                              const LogicVRegister& ind) {
2179  return Table(vform, dst, ind, true, &tab, &tab2);
2180}
2181
2182LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2183                              const LogicVRegister& tab,
2184                              const LogicVRegister& tab2,
2185                              const LogicVRegister& tab3,
2186                              const LogicVRegister& ind) {
2187  return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
2188}
2189
2190LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2191                              const LogicVRegister& tab,
2192                              const LogicVRegister& tab2,
2193                              const LogicVRegister& tab3,
2194                              const LogicVRegister& tab4,
2195                              const LogicVRegister& ind) {
2196  return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
2197}
2198
2199LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2200                              const LogicVRegister& tab,
2201                              const LogicVRegister& ind) {
2202  return Table(vform, dst, ind, false, &tab);
2203}
2204
2205LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2206                              const LogicVRegister& tab,
2207                              const LogicVRegister& tab2,
2208                              const LogicVRegister& ind) {
2209  return Table(vform, dst, ind, false, &tab, &tab2);
2210}
2211
2212LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2213                              const LogicVRegister& tab,
2214                              const LogicVRegister& tab2,
2215                              const LogicVRegister& tab3,
2216                              const LogicVRegister& ind) {
2217  return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
2218}
2219
2220LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2221                              const LogicVRegister& tab,
2222                              const LogicVRegister& tab2,
2223                              const LogicVRegister& tab3,
2224                              const LogicVRegister& tab4,
2225                              const LogicVRegister& ind) {
2226  return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
2227}
2228
2229LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst,
2230                                 const LogicVRegister& src, int shift) {
2231  return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2232}
2233
2234LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst,
2235                                  const LogicVRegister& src, int shift) {
2236  return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2237}
2238
2239LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst,
2240                                  const LogicVRegister& src, int shift) {
2241  return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2242}
2243
2244LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst,
2245                                   const LogicVRegister& src, int shift) {
2246  return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2247}
2248
2249LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst,
2250                                 const LogicVRegister& src, int shift) {
2251  SimVRegister temp;
2252  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2253  VectorFormat vformdst = vform;
2254  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2255  return sqxtn(vformdst, dst, shifted_src);
2256}
2257
2258LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst,
2259                                  const LogicVRegister& src, int shift) {
2260  SimVRegister temp;
2261  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2262  VectorFormat vformdst = vform;
2263  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2264  return sqxtn(vformdst, dst, shifted_src);
2265}
2266
2267LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst,
2268                                  const LogicVRegister& src, int shift) {
2269  SimVRegister temp;
2270  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2271  VectorFormat vformdst = vform;
2272  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2273  return sqxtn(vformdst, dst, shifted_src);
2274}
2275
2276LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst,
2277                                   const LogicVRegister& src, int shift) {
2278  SimVRegister temp;
2279  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2280  VectorFormat vformdst = vform;
2281  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2282  return sqxtn(vformdst, dst, shifted_src);
2283}
2284
2285LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst,
2286                                  const LogicVRegister& src, int shift) {
2287  SimVRegister temp;
2288  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2289  VectorFormat vformdst = vform;
2290  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2291  return sqxtun(vformdst, dst, shifted_src);
2292}
2293
2294LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst,
2295                                   const LogicVRegister& src, int shift) {
2296  SimVRegister temp;
2297  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2298  VectorFormat vformdst = vform;
2299  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2300  return sqxtun(vformdst, dst, shifted_src);
2301}
2302
2303LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst,
2304                                   const LogicVRegister& src, int shift) {
2305  SimVRegister temp;
2306  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2307  VectorFormat vformdst = vform;
2308  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2309  return sqxtun(vformdst, dst, shifted_src);
2310}
2311
2312LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst,
2313                                    const LogicVRegister& src, int shift) {
2314  SimVRegister temp;
2315  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2316  VectorFormat vformdst = vform;
2317  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2318  return sqxtun(vformdst, dst, shifted_src);
2319}
2320
2321LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst,
2322                                const LogicVRegister& src1,
2323                                const LogicVRegister& src2) {
2324  SimVRegister temp1, temp2;
2325  uxtl(vform, temp1, src1);
2326  uxtl(vform, temp2, src2);
2327  add(vform, dst, temp1, temp2);
2328  return dst;
2329}
2330
2331LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst,
2332                                 const LogicVRegister& src1,
2333                                 const LogicVRegister& src2) {
2334  SimVRegister temp1, temp2;
2335  uxtl2(vform, temp1, src1);
2336  uxtl2(vform, temp2, src2);
2337  add(vform, dst, temp1, temp2);
2338  return dst;
2339}
2340
2341LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst,
2342                                const LogicVRegister& src1,
2343                                const LogicVRegister& src2) {
2344  SimVRegister temp;
2345  uxtl(vform, temp, src2);
2346  add(vform, dst, src1, temp);
2347  return dst;
2348}
2349
2350LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst,
2351                                 const LogicVRegister& src1,
2352                                 const LogicVRegister& src2) {
2353  SimVRegister temp;
2354  uxtl2(vform, temp, src2);
2355  add(vform, dst, src1, temp);
2356  return dst;
2357}
2358
2359LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst,
2360                                const LogicVRegister& src1,
2361                                const LogicVRegister& src2) {
2362  SimVRegister temp1, temp2;
2363  sxtl(vform, temp1, src1);
2364  sxtl(vform, temp2, src2);
2365  add(vform, dst, temp1, temp2);
2366  return dst;
2367}
2368
2369LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst,
2370                                 const LogicVRegister& src1,
2371                                 const LogicVRegister& src2) {
2372  SimVRegister temp1, temp2;
2373  sxtl2(vform, temp1, src1);
2374  sxtl2(vform, temp2, src2);
2375  add(vform, dst, temp1, temp2);
2376  return dst;
2377}
2378
2379LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst,
2380                                const LogicVRegister& src1,
2381                                const LogicVRegister& src2) {
2382  SimVRegister temp;
2383  sxtl(vform, temp, src2);
2384  add(vform, dst, src1, temp);
2385  return dst;
2386}
2387
2388LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst,
2389                                 const LogicVRegister& src1,
2390                                 const LogicVRegister& src2) {
2391  SimVRegister temp;
2392  sxtl2(vform, temp, src2);
2393  add(vform, dst, src1, temp);
2394  return dst;
2395}
2396
2397LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst,
2398                                const LogicVRegister& src1,
2399                                const LogicVRegister& src2) {
2400  SimVRegister temp1, temp2;
2401  uxtl(vform, temp1, src1);
2402  uxtl(vform, temp2, src2);
2403  sub(vform, dst, temp1, temp2);
2404  return dst;
2405}
2406
2407LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst,
2408                                 const LogicVRegister& src1,
2409                                 const LogicVRegister& src2) {
2410  SimVRegister temp1, temp2;
2411  uxtl2(vform, temp1, src1);
2412  uxtl2(vform, temp2, src2);
2413  sub(vform, dst, temp1, temp2);
2414  return dst;
2415}
2416
2417LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst,
2418                                const LogicVRegister& src1,
2419                                const LogicVRegister& src2) {
2420  SimVRegister temp;
2421  uxtl(vform, temp, src2);
2422  sub(vform, dst, src1, temp);
2423  return dst;
2424}
2425
2426LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst,
2427                                 const LogicVRegister& src1,
2428                                 const LogicVRegister& src2) {
2429  SimVRegister temp;
2430  uxtl2(vform, temp, src2);
2431  sub(vform, dst, src1, temp);
2432  return dst;
2433}
2434
2435LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst,
2436                                const LogicVRegister& src1,
2437                                const LogicVRegister& src2) {
2438  SimVRegister temp1, temp2;
2439  sxtl(vform, temp1, src1);
2440  sxtl(vform, temp2, src2);
2441  sub(vform, dst, temp1, temp2);
2442  return dst;
2443}
2444
2445LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst,
2446                                 const LogicVRegister& src1,
2447                                 const LogicVRegister& src2) {
2448  SimVRegister temp1, temp2;
2449  sxtl2(vform, temp1, src1);
2450  sxtl2(vform, temp2, src2);
2451  sub(vform, dst, temp1, temp2);
2452  return dst;
2453}
2454
2455LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst,
2456                                const LogicVRegister& src1,
2457                                const LogicVRegister& src2) {
2458  SimVRegister temp;
2459  sxtl(vform, temp, src2);
2460  sub(vform, dst, src1, temp);
2461  return dst;
2462}
2463
2464LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst,
2465                                 const LogicVRegister& src1,
2466                                 const LogicVRegister& src2) {
2467  SimVRegister temp;
2468  sxtl2(vform, temp, src2);
2469  sub(vform, dst, src1, temp);
2470  return dst;
2471}
2472
2473LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst,
2474                                const LogicVRegister& src1,
2475                                const LogicVRegister& src2) {
2476  SimVRegister temp1, temp2;
2477  uxtl(vform, temp1, src1);
2478  uxtl(vform, temp2, src2);
2479  uaba(vform, dst, temp1, temp2);
2480  return dst;
2481}
2482
2483LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst,
2484                                 const LogicVRegister& src1,
2485                                 const LogicVRegister& src2) {
2486  SimVRegister temp1, temp2;
2487  uxtl2(vform, temp1, src1);
2488  uxtl2(vform, temp2, src2);
2489  uaba(vform, dst, temp1, temp2);
2490  return dst;
2491}
2492
2493LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst,
2494                                const LogicVRegister& src1,
2495                                const LogicVRegister& src2) {
2496  SimVRegister temp1, temp2;
2497  sxtl(vform, temp1, src1);
2498  sxtl(vform, temp2, src2);
2499  saba(vform, dst, temp1, temp2);
2500  return dst;
2501}
2502
2503LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst,
2504                                 const LogicVRegister& src1,
2505                                 const LogicVRegister& src2) {
2506  SimVRegister temp1, temp2;
2507  sxtl2(vform, temp1, src1);
2508  sxtl2(vform, temp2, src2);
2509  saba(vform, dst, temp1, temp2);
2510  return dst;
2511}
2512
2513LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst,
2514                                const LogicVRegister& src1,
2515                                const LogicVRegister& src2) {
2516  SimVRegister temp1, temp2;
2517  uxtl(vform, temp1, src1);
2518  uxtl(vform, temp2, src2);
2519  AbsDiff(vform, dst, temp1, temp2, false);
2520  return dst;
2521}
2522
2523LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst,
2524                                 const LogicVRegister& src1,
2525                                 const LogicVRegister& src2) {
2526  SimVRegister temp1, temp2;
2527  uxtl2(vform, temp1, src1);
2528  uxtl2(vform, temp2, src2);
2529  AbsDiff(vform, dst, temp1, temp2, false);
2530  return dst;
2531}
2532
2533LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst,
2534                                const LogicVRegister& src1,
2535                                const LogicVRegister& src2) {
2536  SimVRegister temp1, temp2;
2537  sxtl(vform, temp1, src1);
2538  sxtl(vform, temp2, src2);
2539  AbsDiff(vform, dst, temp1, temp2, true);
2540  return dst;
2541}
2542
2543LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst,
2544                                 const LogicVRegister& src1,
2545                                 const LogicVRegister& src2) {
2546  SimVRegister temp1, temp2;
2547  sxtl2(vform, temp1, src1);
2548  sxtl2(vform, temp2, src2);
2549  AbsDiff(vform, dst, temp1, temp2, true);
2550  return dst;
2551}
2552
2553LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
2554                                const LogicVRegister& src1,
2555                                const LogicVRegister& src2) {
2556  SimVRegister temp1, temp2;
2557  uxtl(vform, temp1, src1);
2558  uxtl(vform, temp2, src2);
2559  mul(vform, dst, temp1, temp2);
2560  return dst;
2561}
2562
2563LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
2564                                 const LogicVRegister& src1,
2565                                 const LogicVRegister& src2) {
2566  SimVRegister temp1, temp2;
2567  uxtl2(vform, temp1, src1);
2568  uxtl2(vform, temp2, src2);
2569  mul(vform, dst, temp1, temp2);
2570  return dst;
2571}
2572
2573LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
2574                                const LogicVRegister& src1,
2575                                const LogicVRegister& src2) {
2576  SimVRegister temp1, temp2;
2577  sxtl(vform, temp1, src1);
2578  sxtl(vform, temp2, src2);
2579  mul(vform, dst, temp1, temp2);
2580  return dst;
2581}
2582
2583LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
2584                                 const LogicVRegister& src1,
2585                                 const LogicVRegister& src2) {
2586  SimVRegister temp1, temp2;
2587  sxtl2(vform, temp1, src1);
2588  sxtl2(vform, temp2, src2);
2589  mul(vform, dst, temp1, temp2);
2590  return dst;
2591}
2592
2593LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
2594                                const LogicVRegister& src1,
2595                                const LogicVRegister& src2) {
2596  SimVRegister temp1, temp2;
2597  uxtl(vform, temp1, src1);
2598  uxtl(vform, temp2, src2);
2599  mls(vform, dst, temp1, temp2);
2600  return dst;
2601}
2602
2603LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
2604                                 const LogicVRegister& src1,
2605                                 const LogicVRegister& src2) {
2606  SimVRegister temp1, temp2;
2607  uxtl2(vform, temp1, src1);
2608  uxtl2(vform, temp2, src2);
2609  mls(vform, dst, temp1, temp2);
2610  return dst;
2611}
2612
2613LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
2614                                const LogicVRegister& src1,
2615                                const LogicVRegister& src2) {
2616  SimVRegister temp1, temp2;
2617  sxtl(vform, temp1, src1);
2618  sxtl(vform, temp2, src2);
2619  mls(vform, dst, temp1, temp2);
2620  return dst;
2621}
2622
2623LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
2624                                 const LogicVRegister& src1,
2625                                 const LogicVRegister& src2) {
2626  SimVRegister temp1, temp2;
2627  sxtl2(vform, temp1, src1);
2628  sxtl2(vform, temp2, src2);
2629  mls(vform, dst, temp1, temp2);
2630  return dst;
2631}
2632
2633LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
2634                                const LogicVRegister& src1,
2635                                const LogicVRegister& src2) {
2636  SimVRegister temp1, temp2;
2637  uxtl(vform, temp1, src1);
2638  uxtl(vform, temp2, src2);
2639  mla(vform, dst, temp1, temp2);
2640  return dst;
2641}
2642
2643LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
2644                                 const LogicVRegister& src1,
2645                                 const LogicVRegister& src2) {
2646  SimVRegister temp1, temp2;
2647  uxtl2(vform, temp1, src1);
2648  uxtl2(vform, temp2, src2);
2649  mla(vform, dst, temp1, temp2);
2650  return dst;
2651}
2652
2653LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
2654                                const LogicVRegister& src1,
2655                                const LogicVRegister& src2) {
2656  SimVRegister temp1, temp2;
2657  sxtl(vform, temp1, src1);
2658  sxtl(vform, temp2, src2);
2659  mla(vform, dst, temp1, temp2);
2660  return dst;
2661}
2662
2663LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
2664                                 const LogicVRegister& src1,
2665                                 const LogicVRegister& src2) {
2666  SimVRegister temp1, temp2;
2667  sxtl2(vform, temp1, src1);
2668  sxtl2(vform, temp2, src2);
2669  mla(vform, dst, temp1, temp2);
2670  return dst;
2671}
2672
2673LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
2674                                  const LogicVRegister& src1,
2675                                  const LogicVRegister& src2) {
2676  SimVRegister temp;
2677  LogicVRegister product = sqdmull(vform, temp, src1, src2);
2678  return add(vform, dst, dst, product).SignedSaturate(vform);
2679}
2680
2681LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
2682                                   const LogicVRegister& src1,
2683                                   const LogicVRegister& src2) {
2684  SimVRegister temp;
2685  LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2686  return add(vform, dst, dst, product).SignedSaturate(vform);
2687}
2688
2689LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
2690                                  const LogicVRegister& src1,
2691                                  const LogicVRegister& src2) {
2692  SimVRegister temp;
2693  LogicVRegister product = sqdmull(vform, temp, src1, src2);
2694  return sub(vform, dst, dst, product).SignedSaturate(vform);
2695}
2696
2697LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
2698                                   const LogicVRegister& src1,
2699                                   const LogicVRegister& src2) {
2700  SimVRegister temp;
2701  LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2702  return sub(vform, dst, dst, product).SignedSaturate(vform);
2703}
2704
2705LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
2706                                  const LogicVRegister& src1,
2707                                  const LogicVRegister& src2) {
2708  SimVRegister temp;
2709  LogicVRegister product = smull(vform, temp, src1, src2);
2710  return add(vform, dst, product, product).SignedSaturate(vform);
2711}
2712
2713LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
2714                                   const LogicVRegister& src1,
2715                                   const LogicVRegister& src2) {
2716  SimVRegister temp;
2717  LogicVRegister product = smull2(vform, temp, src1, src2);
2718  return add(vform, dst, product, product).SignedSaturate(vform);
2719}
2720
2721LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
2722                                   const LogicVRegister& src1,
2723                                   const LogicVRegister& src2, bool round) {
2724  // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
2725  // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
2726  // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
2727
2728  int esize = LaneSizeInBitsFromFormat(vform);
2729  int round_const = round ? (1 << (esize - 2)) : 0;
2730  int64_t product;
2731
2732  dst.ClearForWrite(vform);
2733  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2734    product = src1.Int(vform, i) * src2.Int(vform, i);
2735    product += round_const;
2736    product = product >> (esize - 1);
2737
2738    if (product > MaxIntFromFormat(vform)) {
2739      product = MaxIntFromFormat(vform);
2740    } else if (product < MinIntFromFormat(vform)) {
2741      product = MinIntFromFormat(vform);
2742    }
2743    dst.SetInt(vform, i, product);
2744  }
2745  return dst;
2746}
2747
2748LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
2749                                  const LogicVRegister& src1,
2750                                  const LogicVRegister& src2) {
2751  return sqrdmulh(vform, dst, src1, src2, false);
2752}
2753
2754LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst,
2755                                const LogicVRegister& src1,
2756                                const LogicVRegister& src2) {
2757  SimVRegister temp;
2758  add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2759  shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2760  return dst;
2761}
2762
2763LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst,
2764                                 const LogicVRegister& src1,
2765                                 const LogicVRegister& src2) {
2766  SimVRegister temp;
2767  add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2768  shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2769  return dst;
2770}
2771
2772LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst,
2773                                 const LogicVRegister& src1,
2774                                 const LogicVRegister& src2) {
2775  SimVRegister temp;
2776  add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2777  rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2778  return dst;
2779}
2780
2781LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst,
2782                                  const LogicVRegister& src1,
2783                                  const LogicVRegister& src2) {
2784  SimVRegister temp;
2785  add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2786  rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2787  return dst;
2788}
2789
2790LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst,
2791                                const LogicVRegister& src1,
2792                                const LogicVRegister& src2) {
2793  SimVRegister temp;
2794  sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2795  shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2796  return dst;
2797}
2798
2799LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst,
2800                                 const LogicVRegister& src1,
2801                                 const LogicVRegister& src2) {
2802  SimVRegister temp;
2803  sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2804  shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2805  return dst;
2806}
2807
2808LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst,
2809                                 const LogicVRegister& src1,
2810                                 const LogicVRegister& src2) {
2811  SimVRegister temp;
2812  sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2813  rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2814  return dst;
2815}
2816
2817LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst,
2818                                  const LogicVRegister& src1,
2819                                  const LogicVRegister& src2) {
2820  SimVRegister temp;
2821  sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2822  rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2823  return dst;
2824}
2825
2826LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst,
2827                               const LogicVRegister& src1,
2828                               const LogicVRegister& src2) {
2829  uint64_t result[16];
2830  int laneCount = LaneCountFromFormat(vform);
2831  int pairs = laneCount / 2;
2832  for (int i = 0; i < pairs; ++i) {
2833    result[2 * i] = src1.Uint(vform, 2 * i);
2834    result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
2835  }
2836
2837  dst.SetUintArray(vform, result);
2838  return dst;
2839}
2840
2841LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst,
2842                               const LogicVRegister& src1,
2843                               const LogicVRegister& src2) {
2844  uint64_t result[16];
2845  int laneCount = LaneCountFromFormat(vform);
2846  int pairs = laneCount / 2;
2847  for (int i = 0; i < pairs; ++i) {
2848    result[2 * i] = src1.Uint(vform, (2 * i) + 1);
2849    result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
2850  }
2851
2852  dst.SetUintArray(vform, result);
2853  return dst;
2854}
2855
2856LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst,
2857                               const LogicVRegister& src1,
2858                               const LogicVRegister& src2) {
2859  uint64_t result[16];
2860  int laneCount = LaneCountFromFormat(vform);
2861  int pairs = laneCount / 2;
2862  for (int i = 0; i < pairs; ++i) {
2863    result[2 * i] = src1.Uint(vform, i);
2864    result[(2 * i) + 1] = src2.Uint(vform, i);
2865  }
2866
2867  dst.SetUintArray(vform, result);
2868  return dst;
2869}
2870
2871LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst,
2872                               const LogicVRegister& src1,
2873                               const LogicVRegister& src2) {
2874  uint64_t result[16];
2875  int laneCount = LaneCountFromFormat(vform);
2876  int pairs = laneCount / 2;
2877  for (int i = 0; i < pairs; ++i) {
2878    result[2 * i] = src1.Uint(vform, pairs + i);
2879    result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
2880  }
2881
2882  dst.SetUintArray(vform, result);
2883  return dst;
2884}
2885
2886LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst,
2887                               const LogicVRegister& src1,
2888                               const LogicVRegister& src2) {
2889  uint64_t result[32];
2890  int laneCount = LaneCountFromFormat(vform);
2891  for (int i = 0; i < laneCount; ++i) {
2892    result[i] = src1.Uint(vform, i);
2893    result[laneCount + i] = src2.Uint(vform, i);
2894  }
2895
2896  dst.ClearForWrite(vform);
2897  for (int i = 0; i < laneCount; ++i) {
2898    dst.SetUint(vform, i, result[2 * i]);
2899  }
2900  return dst;
2901}
2902
2903LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst,
2904                               const LogicVRegister& src1,
2905                               const LogicVRegister& src2) {
2906  uint64_t result[32];
2907  int laneCount = LaneCountFromFormat(vform);
2908  for (int i = 0; i < laneCount; ++i) {
2909    result[i] = src1.Uint(vform, i);
2910    result[laneCount + i] = src2.Uint(vform, i);
2911  }
2912
2913  dst.ClearForWrite(vform);
2914  for (int i = 0; i < laneCount; ++i) {
2915    dst.SetUint(vform, i, result[(2 * i) + 1]);
2916  }
2917  return dst;
2918}
2919
2920template <typename T>
2921T Simulator::FPAdd(T op1, T op2) {
2922  T result = FPProcessNaNs(op1, op2);
2923  if (std::isnan(result)) return result;
2924
2925  if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
2926    // inf + -inf returns the default NaN.
2927    FPProcessException();
2928    return FPDefaultNaN<T>();
2929  } else {
2930    // Other cases should be handled by standard arithmetic.
2931    return op1 + op2;
2932  }
2933}
2934
2935template <typename T>
2936T Simulator::FPSub(T op1, T op2) {
2937  // NaNs should be handled elsewhere.
2938  DCHECK(!std::isnan(op1) && !std::isnan(op2));
2939
2940  if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
2941    // inf - inf returns the default NaN.
2942    FPProcessException();
2943    return FPDefaultNaN<T>();
2944  } else {
2945    // Other cases should be handled by standard arithmetic.
2946    return op1 - op2;
2947  }
2948}
2949
2950template <typename T>
2951T Simulator::FPMul(T op1, T op2) {
2952  // NaNs should be handled elsewhere.
2953  DCHECK(!std::isnan(op1) && !std::isnan(op2));
2954
2955  if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2956    // inf * 0.0 returns the default NaN.
2957    FPProcessException();
2958    return FPDefaultNaN<T>();
2959  } else {
2960    // Other cases should be handled by standard arithmetic.
2961    return op1 * op2;
2962  }
2963}
2964
2965template <typename T>
2966T Simulator::FPMulx(T op1, T op2) {
2967  if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2968    // inf * 0.0 returns +/-2.0.
2969    T two = 2.0;
2970    return std::copysign(1.0, op1) * std::copysign(1.0, op2) * two;
2971  }
2972  return FPMul(op1, op2);
2973}
2974
2975template <typename T>
2976T Simulator::FPMulAdd(T a, T op1, T op2) {
2977  T result = FPProcessNaNs3(a, op1, op2);
2978
2979  T sign_a = std::copysign(1.0, a);
2980  T sign_prod = std::copysign(1.0, op1) * std::copysign(1.0, op2);
2981  bool isinf_prod = std::isinf(op1) || std::isinf(op2);
2982  bool operation_generates_nan =
2983      (std::isinf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
2984      (std::isinf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
2985      (std::isinf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
2986
2987  if (std::isnan(result)) {
2988    // Generated NaNs override quiet NaNs propagated from a.
2989    if (operation_generates_nan && IsQuietNaN(a)) {
2990      FPProcessException();
2991      return FPDefaultNaN<T>();
2992    } else {
2993      return result;
2994    }
2995  }
2996
2997  // If the operation would produce a NaN, return the default NaN.
2998  if (operation_generates_nan) {
2999    FPProcessException();
3000    return FPDefaultNaN<T>();
3001  }
3002
3003  // Work around broken fma implementations for exact zero results: The sign of
3004  // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3005  if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3006    return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3007  }
3008
3009  result = FusedMultiplyAdd(op1, op2, a);
3010  DCHECK(!std::isnan(result));
3011
3012  // Work around broken fma implementations for rounded zero results: If a is
3013  // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3014  if ((a == 0.0) && (result == 0.0)) {
3015    return std::copysign(0.0, sign_prod);
3016  }
3017
3018  return result;
3019}
3020
3021template <typename T>
3022T Simulator::FPDiv(T op1, T op2) {
3023  // NaNs should be handled elsewhere.
3024  DCHECK(!std::isnan(op1) && !std::isnan(op2));
3025
3026  if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3027    // inf / inf and 0.0 / 0.0 return the default NaN.
3028    FPProcessException();
3029    return FPDefaultNaN<T>();
3030  } else {
3031    if (op2 == 0.0) {
3032      FPProcessException();
3033      if (!std::isnan(op1)) {
3034        double op1_sign = std::copysign(1.0, op1);
3035        double op2_sign = std::copysign(1.0, op2);
3036        return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
3037      }
3038    }
3039
3040    // Other cases should be handled by standard arithmetic.
3041    return op1 / op2;
3042  }
3043}
3044
3045template <typename T>
3046T Simulator::FPSqrt(T op) {
3047  if (std::isnan(op)) {
3048    return FPProcessNaN(op);
3049  } else if (op < 0.0) {
3050    FPProcessException();
3051    return FPDefaultNaN<T>();
3052  } else {
3053    return std::sqrt(op);
3054  }
3055}
3056
3057template <typename T>
3058T Simulator::FPMax(T a, T b) {
3059  T result = FPProcessNaNs(a, b);
3060  if (std::isnan(result)) return result;
3061
3062  if ((a == 0.0) && (b == 0.0) &&
3063      (std::copysign(1.0, a) != std::copysign(1.0, b))) {
3064    // a and b are zero, and the sign differs: return +0.0.
3065    return 0.0;
3066  } else {
3067    return (a > b) ? a : b;
3068  }
3069}
3070
3071template <typename T>
3072T Simulator::FPMaxNM(T a, T b) {
3073  if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3074    a = kFP64NegativeInfinity;
3075  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3076    b = kFP64NegativeInfinity;
3077  }
3078
3079  T result = FPProcessNaNs(a, b);
3080  return std::isnan(result) ? result : FPMax(a, b);
3081}
3082
3083template <typename T>
3084T Simulator::FPMin(T a, T b) {
3085  T result = FPProcessNaNs(a, b);
3086  if (std::isnan(result)) return result;
3087
3088  if ((a == 0.0) && (b == 0.0) &&
3089      (std::copysign(1.0, a) != std::copysign(1.0, b))) {
3090    // a and b are zero, and the sign differs: return -0.0.
3091    return -0.0;
3092  } else {
3093    return (a < b) ? a : b;
3094  }
3095}
3096
3097template <typename T>
3098T Simulator::FPMinNM(T a, T b) {
3099  if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3100    a = kFP64PositiveInfinity;
3101  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3102    b = kFP64PositiveInfinity;
3103  }
3104
3105  T result = FPProcessNaNs(a, b);
3106  return std::isnan(result) ? result : FPMin(a, b);
3107}
3108
3109template <typename T>
3110T Simulator::FPRecipStepFused(T op1, T op2) {
3111  const T two = 2.0;
3112  if ((std::isinf(op1) && (op2 == 0.0)) ||
3113      ((op1 == 0.0) && (std::isinf(op2)))) {
3114    return two;
3115  } else if (std::isinf(op1) || std::isinf(op2)) {
3116    // Return +inf if signs match, otherwise -inf.
3117    return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3118                                          : kFP64NegativeInfinity;
3119  } else {
3120    return FusedMultiplyAdd(op1, op2, two);
3121  }
3122}
3123
3124template <typename T>
3125T Simulator::FPRSqrtStepFused(T op1, T op2) {
3126  const T one_point_five = 1.5;
3127  const T two = 2.0;
3128
3129  if ((std::isinf(op1) && (op2 == 0.0)) ||
3130      ((op1 == 0.0) && (std::isinf(op2)))) {
3131    return one_point_five;
3132  } else if (std::isinf(op1) || std::isinf(op2)) {
3133    // Return +inf if signs match, otherwise -inf.
3134    return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3135                                          : kFP64NegativeInfinity;
3136  } else {
3137    // The multiply-add-halve operation must be fully fused, so avoid interim
3138    // rounding by checking which operand can be losslessly divided by two
3139    // before doing the multiply-add.
3140    if (std::isnormal(op1 / two)) {
3141      return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3142    } else if (std::isnormal(op2 / two)) {
3143      return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3144    } else {
3145      // Neither operand is normal after halving: the result is dominated by
3146      // the addition term, so just return that.
3147      return one_point_five;
3148    }
3149  }
3150}
3151
3152double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3153  if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3154      (value == kFP64NegativeInfinity)) {
3155    return value;
3156  } else if (std::isnan(value)) {
3157    return FPProcessNaN(value);
3158  }
3159
3160  double int_result = std::floor(value);
3161  double error = value - int_result;
3162  switch (round_mode) {
3163    case FPTieAway: {
3164      // Take care of correctly handling the range ]-0.5, -0.0], which must
3165      // yield -0.0.
3166      if ((-0.5 < value) && (value < 0.0)) {
3167        int_result = -0.0;
3168
3169      } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3170        // If the error is greater than 0.5, or is equal to 0.5 and the integer
3171        // result is positive, round up.
3172        int_result++;
3173      }
3174      break;
3175    }
3176    case FPTieEven: {
3177      // Take care of correctly handling the range [-0.5, -0.0], which must
3178      // yield -0.0.
3179      if ((-0.5 <= value) && (value < 0.0)) {
3180        int_result = -0.0;
3181
3182        // If the error is greater than 0.5, or is equal to 0.5 and the integer
3183        // result is odd, round up.
3184      } else if ((error > 0.5) ||
3185                 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3186        int_result++;
3187      }
3188      break;
3189    }
3190    case FPZero: {
3191      // If value>0 then we take floor(value)
3192      // otherwise, ceil(value).
3193      if (value < 0) {
3194        int_result = ceil(value);
3195      }
3196      break;
3197    }
3198    case FPNegativeInfinity: {
3199      // We always use floor(value).
3200      break;
3201    }
3202    case FPPositiveInfinity: {
3203      // Take care of correctly handling the range ]-1.0, -0.0], which must
3204      // yield -0.0.
3205      if ((-1.0 < value) && (value < 0.0)) {
3206        int_result = -0.0;
3207
3208        // If the error is non-zero, round up.
3209      } else if (error > 0.0) {
3210        int_result++;
3211      }
3212      break;
3213    }
3214    default:
3215      UNIMPLEMENTED();
3216  }
3217  return int_result;
3218}
3219
3220int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3221  value = FPRoundInt(value, rmode);
3222  return base::saturated_cast<int32_t>(value);
3223}
3224
3225int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3226  value = FPRoundInt(value, rmode);
3227  return base::saturated_cast<int64_t>(value);
3228}
3229
3230uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3231  value = FPRoundInt(value, rmode);
3232  return base::saturated_cast<uint32_t>(value);
3233}
3234
3235uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3236  value = FPRoundInt(value, rmode);
3237  return base::saturated_cast<uint64_t>(value);
3238}
3239
3240#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                      \
3241  template <typename T>                                                \
3242  LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3243                               const LogicVRegister& src1,             \
3244                               const LogicVRegister& src2) {           \
3245    dst.ClearForWrite(vform);                                          \
3246    for (int i = 0; i < LaneCountFromFormat(vform); i++) {             \
3247      T op1 = src1.Float<T>(i);                                        \
3248      T op2 = src2.Float<T>(i);                                        \
3249      T result;                                                        \
3250      if (PROCNAN) {                                                   \
3251        result = FPProcessNaNs(op1, op2);                              \
3252        if (!std::isnan(result)) {                                     \
3253          result = OP(op1, op2);                                       \
3254        }                                                              \
3255      } else {                                                         \
3256        result = OP(op1, op2);                                         \
3257      }                                                                \
3258      dst.SetFloat(i, result);                                         \
3259    }                                                                  \
3260    return dst;                                                        \
3261  }                                                                    \
3262                                                                       \
3263  LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3264                               const LogicVRegister& src1,             \
3265                               const LogicVRegister& src2) {           \
3266    if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {               \
3267      FN<float>(vform, dst, src1, src2);                               \
3268    } else {                                                           \
3269      DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);          \
3270      FN<double>(vform, dst, src1, src2);                              \
3271    }                                                                  \
3272    return dst;                                                        \
3273  }
3274NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3275#undef DEFINE_NEON_FP_VECTOR_OP
3276
3277LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst,
3278                                const LogicVRegister& src1,
3279                                const LogicVRegister& src2) {
3280  SimVRegister temp;
3281  LogicVRegister product = fmul(vform, temp, src1, src2);
3282  return fneg(vform, dst, product);
3283}
3284
3285template <typename T>
3286LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3287                                 const LogicVRegister& src1,
3288                                 const LogicVRegister& src2) {
3289  dst.ClearForWrite(vform);
3290  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3291    T op1 = -src1.Float<T>(i);
3292    T op2 = src2.Float<T>(i);
3293    T result = FPProcessNaNs(op1, op2);
3294    dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3295  }
3296  return dst;
3297}
3298
3299LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3300                                 const LogicVRegister& src1,
3301                                 const LogicVRegister& src2) {
3302  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3303    frecps<float>(vform, dst, src1, src2);
3304  } else {
3305    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3306    frecps<double>(vform, dst, src1, src2);
3307  }
3308  return dst;
3309}
3310
3311template <typename T>
3312LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3313                                  const LogicVRegister& src1,
3314                                  const LogicVRegister& src2) {
3315  dst.ClearForWrite(vform);
3316  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3317    T op1 = -src1.Float<T>(i);
3318    T op2 = src2.Float<T>(i);
3319    T result = FPProcessNaNs(op1, op2);
3320    dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3321  }
3322  return dst;
3323}
3324
3325int32_t Simulator::FPToFixedJS(double value) {
3326  // The Z-flag is set when the conversion from double precision floating-point
3327  // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
3328  // outside the bounds of a 32-bit integer, or isn't an exact integer then the
3329  // Z-flag is unset.
3330  int Z = 1;
3331  int32_t result;
3332  if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3333      (value == kFP64NegativeInfinity)) {
3334    // +/- zero and infinity all return zero, however -0 and +/- Infinity also
3335    // unset the Z-flag.
3336    result = 0.0;
3337    if ((value != 0.0) || std::signbit(value)) {
3338      Z = 0;
3339    }
3340  } else if (std::isnan(value)) {
3341    // NaN values unset the Z-flag and set the result to 0.
3342    result = 0;
3343    Z = 0;
3344  } else {
3345    // All other values are converted to an integer representation, rounded
3346    // toward zero.
3347    double int_result = std::floor(value);
3348    double error = value - int_result;
3349    if ((error != 0.0) && (int_result < 0.0)) {
3350      int_result++;
3351    }
3352    // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
3353    // write a one-liner with std::round, but the behaviour on ties is incorrect
3354    // for our purposes.
3355    double mod_const = static_cast<double>(UINT64_C(1) << 32);
3356    double mod_error =
3357        (int_result / mod_const) - std::floor(int_result / mod_const);
3358    double constrained;
3359    if (mod_error == 0.5) {
3360      constrained = INT32_MIN;
3361    } else {
3362      constrained = int_result - mod_const * round(int_result / mod_const);
3363    }
3364    DCHECK(std::floor(constrained) == constrained);
3365    DCHECK(constrained >= INT32_MIN);
3366    DCHECK(constrained <= INT32_MAX);
3367    // Take the bottom 32 bits of the result as a 32-bit integer.
3368    result = static_cast<int32_t>(constrained);
3369    if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
3370        (error != 0.0)) {
3371      // If the integer result is out of range or the conversion isn't exact,
3372      // take exception and unset the Z-flag.
3373      FPProcessException();
3374      Z = 0;
3375    }
3376  }
3377  nzcv().SetN(0);
3378  nzcv().SetZ(Z);
3379  nzcv().SetC(0);
3380  nzcv().SetV(0);
3381  return result;
3382}
3383
3384LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3385                                  const LogicVRegister& src1,
3386                                  const LogicVRegister& src2) {
3387  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3388    frsqrts<float>(vform, dst, src1, src2);
3389  } else {
3390    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3391    frsqrts<double>(vform, dst, src1, src2);
3392  }
3393  return dst;
3394}
3395
3396template <typename T>
3397LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3398                               const LogicVRegister& src1,
3399                               const LogicVRegister& src2, Condition cond) {
3400  dst.ClearForWrite(vform);
3401  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3402    bool result = false;
3403    T op1 = src1.Float<T>(i);
3404    T op2 = src2.Float<T>(i);
3405    T nan_result = FPProcessNaNs(op1, op2);
3406    if (!std::isnan(nan_result)) {
3407      switch (cond) {
3408        case eq:
3409          result = (op1 == op2);
3410          break;
3411        case ge:
3412          result = (op1 >= op2);
3413          break;
3414        case gt:
3415          result = (op1 > op2);
3416          break;
3417        case le:
3418          result = (op1 <= op2);
3419          break;
3420        case lt:
3421          result = (op1 < op2);
3422          break;
3423        default:
3424          UNREACHABLE();
3425      }
3426    }
3427    dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
3428  }
3429  return dst;
3430}
3431
3432LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3433                               const LogicVRegister& src1,
3434                               const LogicVRegister& src2, Condition cond) {
3435  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3436    fcmp<float>(vform, dst, src1, src2, cond);
3437  } else {
3438    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3439    fcmp<double>(vform, dst, src1, src2, cond);
3440  }
3441  return dst;
3442}
3443
3444LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst,
3445                                    const LogicVRegister& src, Condition cond) {
3446  SimVRegister temp;
3447  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3448    LogicVRegister zero_reg =
3449        dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f));
3450    fcmp<float>(vform, dst, src, zero_reg, cond);
3451  } else {
3452    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3453    LogicVRegister zero_reg =
3454        dup_immediate(vform, temp, bit_cast<uint64_t>(0.0));
3455    fcmp<double>(vform, dst, src, zero_reg, cond);
3456  }
3457  return dst;
3458}
3459
3460LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst,
3461                                  const LogicVRegister& src1,
3462                                  const LogicVRegister& src2, Condition cond) {
3463  SimVRegister temp1, temp2;
3464  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3465    LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
3466    LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
3467    fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
3468  } else {
3469    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3470    LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
3471    LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
3472    fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
3473  }
3474  return dst;
3475}
3476
3477template <typename T>
3478LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3479                               const LogicVRegister& src1,
3480                               const LogicVRegister& src2) {
3481  dst.ClearForWrite(vform);
3482  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3483    T op1 = src1.Float<T>(i);
3484    T op2 = src2.Float<T>(i);
3485    T acc = dst.Float<T>(i);
3486    T result = FPMulAdd(acc, op1, op2);
3487    dst.SetFloat(i, result);
3488  }
3489  return dst;
3490}
3491
3492LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3493                               const LogicVRegister& src1,
3494                               const LogicVRegister& src2) {
3495  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3496    fmla<float>(vform, dst, src1, src2);
3497  } else {
3498    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3499    fmla<double>(vform, dst, src1, src2);
3500  }
3501  return dst;
3502}
3503
3504template <typename T>
3505LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3506                               const LogicVRegister& src1,
3507                               const LogicVRegister& src2) {
3508  dst.ClearForWrite(vform);
3509  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3510    T op1 = -src1.Float<T>(i);
3511    T op2 = src2.Float<T>(i);
3512    T acc = dst.Float<T>(i);
3513    T result = FPMulAdd(acc, op1, op2);
3514    dst.SetFloat(i, result);
3515  }
3516  return dst;
3517}
3518
3519LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3520                               const LogicVRegister& src1,
3521                               const LogicVRegister& src2) {
3522  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3523    fmls<float>(vform, dst, src1, src2);
3524  } else {
3525    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3526    fmls<double>(vform, dst, src1, src2);
3527  }
3528  return dst;
3529}
3530
3531template <typename T>
3532LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3533                               const LogicVRegister& src) {
3534  dst.ClearForWrite(vform);
3535  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3536    T op = src.Float<T>(i);
3537    op = -op;
3538    dst.SetFloat(i, op);
3539  }
3540  return dst;
3541}
3542
3543LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3544                               const LogicVRegister& src) {
3545  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3546    fneg<float>(vform, dst, src);
3547  } else {
3548    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3549    fneg<double>(vform, dst, src);
3550  }
3551  return dst;
3552}
3553
3554template <typename T>
3555LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3556                                const LogicVRegister& src) {
3557  dst.ClearForWrite(vform);
3558  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3559    T op = src.Float<T>(i);
3560    if (std::copysign(1.0, op) < 0.0) {
3561      op = -op;
3562    }
3563    dst.SetFloat(i, op);
3564  }
3565  return dst;
3566}
3567
3568LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3569                                const LogicVRegister& src) {
3570  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3571    fabs_<float>(vform, dst, src);
3572  } else {
3573    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3574    fabs_<double>(vform, dst, src);
3575  }
3576  return dst;
3577}
3578
3579LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst,
3580                               const LogicVRegister& src1,
3581                               const LogicVRegister& src2) {
3582  SimVRegister temp;
3583  fsub(vform, temp, src1, src2);
3584  fabs_(vform, dst, temp);
3585  return dst;
3586}
3587
3588LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst,
3589                                const LogicVRegister& src) {
3590  dst.ClearForWrite(vform);
3591  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3592    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3593      float result = FPSqrt(src.Float<float>(i));
3594      dst.SetFloat(i, result);
3595    }
3596  } else {
3597    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3598    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3599      double result = FPSqrt(src.Float<double>(i));
3600      dst.SetFloat(i, result);
3601    }
3602  }
3603  return dst;
3604}
3605
3606#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                             \
3607  LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3608                                const LogicVRegister& src1,             \
3609                                const LogicVRegister& src2) {           \
3610    SimVRegister temp1, temp2;                                          \
3611    uzp1(vform, temp1, src1, src2);                                     \
3612    uzp2(vform, temp2, src1, src2);                                     \
3613    FN(vform, dst, temp1, temp2);                                       \
3614    return dst;                                                         \
3615  }                                                                     \
3616                                                                        \
3617  LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3618                                const LogicVRegister& src) {            \
3619    if (vform == kFormatS) {                                            \
3620      float result = OP(src.Float<float>(0), src.Float<float>(1));      \
3621      dst.SetFloat(0, result);                                          \
3622    } else {                                                            \
3623      DCHECK_EQ(vform, kFormatD);                                       \
3624      double result = OP(src.Float<double>(0), src.Float<double>(1));   \
3625      dst.SetFloat(0, result);                                          \
3626    }                                                                   \
3627    dst.ClearForWrite(vform);                                           \
3628    return dst;                                                         \
3629  }
3630NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
3631#undef DEFINE_NEON_FP_PAIR_OP
3632
3633LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst,
3634                                   const LogicVRegister& src, FPMinMaxOp Op) {
3635  DCHECK_EQ(vform, kFormat4S);
3636  USE(vform);
3637  float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
3638  float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
3639  float result = (this->*Op)(result1, result2);
3640  dst.ClearForWrite(kFormatS);
3641  dst.SetFloat<float>(0, result);
3642  return dst;
3643}
3644
3645LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst,
3646                                const LogicVRegister& src) {
3647  return FMinMaxV(vform, dst, src, &Simulator::FPMax);
3648}
3649
3650LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst,
3651                                const LogicVRegister& src) {
3652  return FMinMaxV(vform, dst, src, &Simulator::FPMin);
3653}
3654
3655LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst,
3656                                  const LogicVRegister& src) {
3657  return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM);
3658}
3659
3660LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst,
3661                                  const LogicVRegister& src) {
3662  return FMinMaxV(vform, dst, src, &Simulator::FPMinNM);
3663}
3664
3665LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst,
3666                               const LogicVRegister& src1,
3667                               const LogicVRegister& src2, int index) {
3668  dst.ClearForWrite(vform);
3669  SimVRegister temp;
3670  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3671    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3672    fmul<float>(vform, dst, src1, index_reg);
3673  } else {
3674    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3675    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3676    fmul<double>(vform, dst, src1, index_reg);
3677  }
3678  return dst;
3679}
3680
3681LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3682                               const LogicVRegister& src1,
3683                               const LogicVRegister& src2, int index) {
3684  dst.ClearForWrite(vform);
3685  SimVRegister temp;
3686  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3687    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3688    fmla<float>(vform, dst, src1, index_reg);
3689  } else {
3690    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3691    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3692    fmla<double>(vform, dst, src1, index_reg);
3693  }
3694  return dst;
3695}
3696
3697LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3698                               const LogicVRegister& src1,
3699                               const LogicVRegister& src2, int index) {
3700  dst.ClearForWrite(vform);
3701  SimVRegister temp;
3702  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3703    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3704    fmls<float>(vform, dst, src1, index_reg);
3705  } else {
3706    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3707    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3708    fmls<double>(vform, dst, src1, index_reg);
3709  }
3710  return dst;
3711}
3712
3713LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst,
3714                                const LogicVRegister& src1,
3715                                const LogicVRegister& src2, int index) {
3716  dst.ClearForWrite(vform);
3717  SimVRegister temp;
3718  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3719    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3720    fmulx<float>(vform, dst, src1, index_reg);
3721
3722  } else {
3723    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3724    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3725    fmulx<double>(vform, dst, src1, index_reg);
3726  }
3727  return dst;
3728}
3729
3730LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst,
3731                                const LogicVRegister& src,
3732                                FPRounding rounding_mode,
3733                                bool inexact_exception) {
3734  dst.ClearForWrite(vform);
3735  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3736    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3737      float input = src.Float<float>(i);
3738      float rounded = FPRoundInt(input, rounding_mode);
3739      if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3740        FPProcessException();
3741      }
3742      dst.SetFloat<float>(i, rounded);
3743    }
3744  } else {
3745    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3746    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3747      double input = src.Float<double>(i);
3748      double rounded = FPRoundInt(input, rounding_mode);
3749      if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3750        FPProcessException();
3751      }
3752      dst.SetFloat<double>(i, rounded);
3753    }
3754  }
3755  return dst;
3756}
3757
3758LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst,
3759                                const LogicVRegister& src,
3760                                FPRounding rounding_mode, int fbits) {
3761  dst.ClearForWrite(vform);
3762  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3763    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3764      float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3765      dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
3766    }
3767  } else {
3768    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3769    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3770      double op = src.Float<double>(i) * std::pow(2.0, fbits);
3771      dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
3772    }
3773  }
3774  return dst;
3775}
3776
3777LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst,
3778                                const LogicVRegister& src,
3779                                FPRounding rounding_mode, int fbits) {
3780  dst.ClearForWrite(vform);
3781  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3782    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3783      float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3784      dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
3785    }
3786  } else {
3787    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3788    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3789      double op = src.Float<double>(i) * std::pow(2.0, fbits);
3790      dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
3791    }
3792  }
3793  return dst;
3794}
3795
3796LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst,
3797                                const LogicVRegister& src) {
3798  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3799    for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3800      dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
3801    }
3802  } else {
3803    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3804    for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3805      dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
3806    }
3807  }
3808  return dst;
3809}
3810
3811LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst,
3812                                 const LogicVRegister& src) {
3813  int lane_count = LaneCountFromFormat(vform);
3814  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3815    for (int i = 0; i < lane_count; i++) {
3816      dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
3817    }
3818  } else {
3819    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3820    for (int i = 0; i < lane_count; i++) {
3821      dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
3822    }
3823  }
3824  return dst;
3825}
3826
3827LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,
3828                                const LogicVRegister& src) {
3829  if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3830    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3831      dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
3832    }
3833  } else {
3834    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3835    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3836      dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
3837    }
3838  }
3839  dst.ClearForWrite(vform);
3840  return dst;
3841}
3842
3843LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst,
3844                                 const LogicVRegister& src) {
3845  int lane_count = LaneCountFromFormat(vform) / 2;
3846  if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3847    for (int i = lane_count - 1; i >= 0; i--) {
3848      dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
3849    }
3850  } else {
3851    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3852    for (int i = lane_count - 1; i >= 0; i--) {
3853      dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
3854    }
3855  }
3856  return dst;
3857}
3858
3859LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst,
3860                                 const LogicVRegister& src) {
3861  dst.ClearForWrite(vform);
3862  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3863  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3864    dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
3865  }
3866  return dst;
3867}
3868
3869LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst,
3870                                  const LogicVRegister& src) {
3871  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3872  int lane_count = LaneCountFromFormat(vform) / 2;
3873  for (int i = lane_count - 1; i >= 0; i--) {
3874    dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
3875  }
3876  return dst;
3877}
3878
3879// Based on reference C function recip_sqrt_estimate from ARM ARM.
3880double Simulator::recip_sqrt_estimate(double a) {
3881  int q0, q1, s;
3882  double r;
3883  if (a < 0.5) {
3884    q0 = static_cast<int>(a * 512.0);
3885    r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
3886  } else {
3887    q1 = static_cast<int>(a * 256.0);
3888    r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
3889  }
3890  s = static_cast<int>(256.0 * r + 0.5);
3891  return static_cast<double>(s) / 256.0;
3892}
3893
3894namespace {
3895
3896inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
3897  return unsigned_bitextract_64(start_bit, end_bit, val);
3898}
3899
3900}  // anonymous namespace
3901
3902template <typename T>
3903T Simulator::FPRecipSqrtEstimate(T op) {
3904  static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3905                "T must be a float or double");
3906
3907  if (std::isnan(op)) {
3908    return FPProcessNaN(op);
3909  } else if (op == 0.0) {
3910    if (std::copysign(1.0, op) < 0.0) {
3911      return kFP64NegativeInfinity;
3912    } else {
3913      return kFP64PositiveInfinity;
3914    }
3915  } else if (std::copysign(1.0, op) < 0.0) {
3916    FPProcessException();
3917    return FPDefaultNaN<T>();
3918  } else if (std::isinf(op)) {
3919    return 0.0;
3920  } else {
3921    uint64_t fraction;
3922    int32_t exp, result_exp;
3923
3924    if (sizeof(T) == sizeof(float)) {
3925      exp = static_cast<int32_t>(float_exp(op));
3926      fraction = float_mantissa(op);
3927      fraction <<= 29;
3928    } else {
3929      exp = static_cast<int32_t>(double_exp(op));
3930      fraction = double_mantissa(op);
3931    }
3932
3933    if (exp == 0) {
3934      while (Bits(fraction, 51, 51) == 0) {
3935        fraction = Bits(fraction, 50, 0) << 1;
3936        exp -= 1;
3937      }
3938      fraction = Bits(fraction, 50, 0) << 1;
3939    }
3940
3941    double scaled;
3942    if (Bits(exp, 0, 0) == 0) {
3943      scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
3944    } else {
3945      scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
3946    }
3947
3948    if (sizeof(T) == sizeof(float)) {
3949      result_exp = (380 - exp) / 2;
3950    } else {
3951      result_exp = (3068 - exp) / 2;
3952    }
3953
3954    uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled));
3955
3956    if (sizeof(T) == sizeof(float)) {
3957      uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
3958      uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
3959      return float_pack(0, exp_bits, est_bits);
3960    } else {
3961      return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
3962    }
3963  }
3964}
3965
3966LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst,
3967                                  const LogicVRegister& src) {
3968  dst.ClearForWrite(vform);
3969  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3970    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3971      float input = src.Float<float>(i);
3972      dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
3973    }
3974  } else {
3975    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3976    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3977      double input = src.Float<double>(i);
3978      dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
3979    }
3980  }
3981  return dst;
3982}
3983
3984template <typename T>
3985T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
3986  static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3987                "T must be a float or double");
3988  uint32_t sign;
3989
3990  if (sizeof(T) == sizeof(float)) {
3991    sign = float_sign(op);
3992  } else {
3993    sign = double_sign(op);
3994  }
3995
3996  if (std::isnan(op)) {
3997    return FPProcessNaN(op);
3998  } else if (std::isinf(op)) {
3999    return (sign == 1) ? -0.0 : 0.0;
4000  } else if (op == 0.0) {
4001    FPProcessException();  // FPExc_DivideByZero exception.
4002    return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4003  } else if (((sizeof(T) == sizeof(float)) &&
4004              (std::fabs(op) < std::pow(2.0, -128.0))) ||
4005             ((sizeof(T) == sizeof(double)) &&
4006              (std::fabs(op) < std::pow(2.0, -1024.0)))) {
4007    bool overflow_to_inf = false;
4008    switch (rounding) {
4009      case FPTieEven:
4010        overflow_to_inf = true;
4011        break;
4012      case FPPositiveInfinity:
4013        overflow_to_inf = (sign == 0);
4014        break;
4015      case FPNegativeInfinity:
4016        overflow_to_inf = (sign == 1);
4017        break;
4018      case FPZero:
4019        overflow_to_inf = false;
4020        break;
4021      default:
4022        break;
4023    }
4024    FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
4025    if (overflow_to_inf) {
4026      return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4027    } else {
4028      // Return FPMaxNormal(sign).
4029      if (sizeof(T) == sizeof(float)) {
4030        return float_pack(sign, 0xFE, 0x07FFFFF);
4031      } else {
4032        return double_pack(sign, 0x7FE, 0x0FFFFFFFFFFFFFl);
4033      }
4034    }
4035  } else {
4036    uint64_t fraction;
4037    int32_t exp, result_exp;
4038    uint32_t sign;
4039
4040    if (sizeof(T) == sizeof(float)) {
4041      sign = float_sign(op);
4042      exp = static_cast<int32_t>(float_exp(op));
4043      fraction = float_mantissa(op);
4044      fraction <<= 29;
4045    } else {
4046      sign = double_sign(op);
4047      exp = static_cast<int32_t>(double_exp(op));
4048      fraction = double_mantissa(op);
4049    }
4050
4051    if (exp == 0) {
4052      if (Bits(fraction, 51, 51) == 0) {
4053        exp -= 1;
4054        fraction = Bits(fraction, 49, 0) << 2;
4055      } else {
4056        fraction = Bits(fraction, 50, 0) << 1;
4057      }
4058    }
4059
4060    double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4061
4062    if (sizeof(T) == sizeof(float)) {
4063      result_exp = 253 - exp;
4064    } else {
4065      result_exp = 2045 - exp;
4066    }
4067
4068    double estimate = recip_estimate(scaled);
4069
4070    fraction = double_mantissa(estimate);
4071    if (result_exp == 0) {
4072      fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4073    } else if (result_exp == -1) {
4074      fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4075      result_exp = 0;
4076    }
4077    if (sizeof(T) == sizeof(float)) {
4078      uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4079      uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4080      return float_pack(sign, exp_bits, frac_bits);
4081    } else {
4082      return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4083    }
4084  }
4085}
4086
4087LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst,
4088                                 const LogicVRegister& src, FPRounding round) {
4089  dst.ClearForWrite(vform);
4090  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4091    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4092      float input = src.Float<float>(i);
4093      dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4094    }
4095  } else {
4096    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4097    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4098      double input = src.Float<double>(i);
4099      dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4100    }
4101  }
4102  return dst;
4103}
4104
4105LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst,
4106                                  const LogicVRegister& src) {
4107  dst.ClearForWrite(vform);
4108  uint64_t operand;
4109  uint32_t result;
4110  double dp_operand, dp_result;
4111  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4112    operand = src.Uint(vform, i);
4113    if (operand <= 0x3FFFFFFF) {
4114      result = 0xFFFFFFFF;
4115    } else {
4116      dp_operand = operand * std::pow(2.0, -32);
4117      dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4118      result = static_cast<uint32_t>(dp_result);
4119    }
4120    dst.SetUint(vform, i, result);
4121  }
4122  return dst;
4123}
4124
4125// Based on reference C function recip_estimate from ARM ARM.
4126double Simulator::recip_estimate(double a) {
4127  int q, s;
4128  double r;
4129  q = static_cast<int>(a * 512.0);
4130  r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4131  s = static_cast<int>(256.0 * r + 0.5);
4132  return static_cast<double>(s) / 256.0;
4133}
4134
4135LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst,
4136                                 const LogicVRegister& src) {
4137  dst.ClearForWrite(vform);
4138  uint64_t operand;
4139  uint32_t result;
4140  double dp_operand, dp_result;
4141  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4142    operand = src.Uint(vform, i);
4143    if (operand <= 0x7FFFFFFF) {
4144      result = 0xFFFFFFFF;
4145    } else {
4146      dp_operand = operand * std::pow(2.0, -32);
4147      dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4148      result = static_cast<uint32_t>(dp_result);
4149    }
4150    dst.SetUint(vform, i, result);
4151  }
4152  return dst;
4153}
4154
4155template <typename T>
4156LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4157                                 const LogicVRegister& src) {
4158  dst.ClearForWrite(vform);
4159  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4160    T op = src.Float<T>(i);
4161    T result;
4162    if (std::isnan(op)) {
4163      result = FPProcessNaN(op);
4164    } else {
4165      int exp;
4166      uint32_t sign;
4167      if (sizeof(T) == sizeof(float)) {
4168        sign = float_sign(op);
4169        exp = static_cast<int>(float_exp(op));
4170        exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4171        result = float_pack(sign, exp, 0);
4172      } else {
4173        sign = double_sign(op);
4174        exp = static_cast<int>(double_exp(op));
4175        exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4176        result = double_pack(sign, exp, 0);
4177      }
4178    }
4179    dst.SetFloat(i, result);
4180  }
4181  return dst;
4182}
4183
4184LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4185                                 const LogicVRegister& src) {
4186  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4187    frecpx<float>(vform, dst, src);
4188  } else {
4189    DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4190    frecpx<double>(vform, dst, src);
4191  }
4192  return dst;
4193}
4194
4195LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst,
4196                                const LogicVRegister& src, int fbits,
4197                                FPRounding round) {
4198  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4199    if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4200      float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4201      dst.SetFloat<float>(i, result);
4202    } else {
4203      DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4204      double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4205      dst.SetFloat<double>(i, result);
4206    }
4207  }
4208  return dst;
4209}
4210
4211LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst,
4212                                const LogicVRegister& src, int fbits,
4213                                FPRounding round) {
4214  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4215    if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4216      float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4217      dst.SetFloat<float>(i, result);
4218    } else {
4219      DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4220      double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4221      dst.SetFloat<double>(i, result);
4222    }
4223  }
4224  return dst;
4225}
4226
4227}  // namespace internal
4228}  // namespace v8
4229
4230#endif  // USE_SIMULATOR
4231