1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28 
29 #include <cmath>
30 
31 #include "simulator-aarch64.h"
32 
33 namespace vixl {
34 namespace aarch64 {
35 
36 using vixl::internal::SimFloat16;
37 
38 template <typename T>
IsFloat64()39 bool IsFloat64() {
40   return false;
41 }
42 template <>
IsFloat64()43 bool IsFloat64<double>() {
44   return true;
45 }
46 
47 template <typename T>
IsFloat32()48 bool IsFloat32() {
49   return false;
50 }
51 template <>
IsFloat32()52 bool IsFloat32<float>() {
53   return true;
54 }
55 
56 template <typename T>
IsFloat16()57 bool IsFloat16() {
58   return false;
59 }
60 template <>
IsFloat16()61 bool IsFloat16<Float16>() {
62   return true;
63 }
64 template <>
IsFloat16()65 bool IsFloat16<SimFloat16>() {
66   return true;
67 }
68 
69 template <>
FPDefaultNaN()70 double Simulator::FPDefaultNaN<double>() {
71   return kFP64DefaultNaN;
72 }
73 
74 
75 template <>
FPDefaultNaN()76 float Simulator::FPDefaultNaN<float>() {
77   return kFP32DefaultNaN;
78 }
79 
80 
81 template <>
FPDefaultNaN()82 SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
83   return SimFloat16(kFP16DefaultNaN);
84 }
85 
86 
FixedToDouble(int64_t src, int fbits, FPRounding round)87 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
88   if (src >= 0) {
89     return UFixedToDouble(src, fbits, round);
90   } else if (src == INT64_MIN) {
91     return -UFixedToDouble(src, fbits, round);
92   } else {
93     return -UFixedToDouble(-src, fbits, round);
94   }
95 }
96 
97 
UFixedToDouble(uint64_t src, int fbits, FPRounding round)98 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
99   // An input of 0 is a special case because the result is effectively
100   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
101   if (src == 0) {
102     return 0.0;
103   }
104 
105   // Calculate the exponent. The highest significant bit will have the value
106   // 2^exponent.
107   const int highest_significant_bit = 63 - CountLeadingZeros(src);
108   const int64_t exponent = highest_significant_bit - fbits;
109 
110   return FPRoundToDouble(0, exponent, src, round);
111 }
112 
113 
FixedToFloat(int64_t src, int fbits, FPRounding round)114 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
115   if (src >= 0) {
116     return UFixedToFloat(src, fbits, round);
117   } else if (src == INT64_MIN) {
118     return -UFixedToFloat(src, fbits, round);
119   } else {
120     return -UFixedToFloat(-src, fbits, round);
121   }
122 }
123 
124 
UFixedToFloat(uint64_t src, int fbits, FPRounding round)125 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
126   // An input of 0 is a special case because the result is effectively
127   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
128   if (src == 0) {
129     return 0.0f;
130   }
131 
132   // Calculate the exponent. The highest significant bit will have the value
133   // 2^exponent.
134   const int highest_significant_bit = 63 - CountLeadingZeros(src);
135   const int32_t exponent = highest_significant_bit - fbits;
136 
137   return FPRoundToFloat(0, exponent, src, round);
138 }
139 
140 
FixedToFloat16(int64_t src, int fbits, FPRounding round)141 SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
142   if (src >= 0) {
143     return UFixedToFloat16(src, fbits, round);
144   } else if (src == INT64_MIN) {
145     return -UFixedToFloat16(src, fbits, round);
146   } else {
147     return -UFixedToFloat16(-src, fbits, round);
148   }
149 }
150 
151 
UFixedToFloat16(uint64_t src, int fbits, FPRounding round)152 SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
153                                       int fbits,
154                                       FPRounding round) {
155   // An input of 0 is a special case because the result is effectively
156   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
157   if (src == 0) {
158     return 0.0f;
159   }
160 
161   // Calculate the exponent. The highest significant bit will have the value
162   // 2^exponent.
163   const int highest_significant_bit = 63 - CountLeadingZeros(src);
164   const int16_t exponent = highest_significant_bit - fbits;
165 
166   return FPRoundToFloat16(0, exponent, src, round);
167 }
168 
169 
GenerateRandomTag(uint16_t exclude)170 uint64_t Simulator::GenerateRandomTag(uint16_t exclude) {
171   uint64_t rtag = nrand48(rand_state_) >> 28;
172   VIXL_ASSERT(IsUint4(rtag));
173 
174   if (exclude == 0) {
175     exclude = nrand48(rand_state_) >> 27;
176   }
177 
178   // TODO: implement this to better match the specification, which calls for a
179   // true random mode, and a pseudo-random mode with state (EL1.TAG) modified by
180   // PRNG.
181   return ChooseNonExcludedTag(rtag, 0, exclude);
182 }
183 
184 
ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr)185 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
186   dst.ClearForWrite(vform);
187   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
188     LoadLane(dst, vform, i, addr);
189     addr += LaneSizeInBytesFromFormat(vform);
190   }
191 }
192 
193 
ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr)194 void Simulator::ld1(VectorFormat vform,
195                     LogicVRegister dst,
196                     int index,
197                     uint64_t addr) {
198   LoadLane(dst, vform, index, addr);
199 }
200 
201 
ld1r(VectorFormat vform, VectorFormat unpack_vform, LogicVRegister dst, uint64_t addr, bool is_signed)202 void Simulator::ld1r(VectorFormat vform,
203                      VectorFormat unpack_vform,
204                      LogicVRegister dst,
205                      uint64_t addr,
206                      bool is_signed) {
207   unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform);
208   dst.ClearForWrite(vform);
209   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
210     if (is_signed) {
211       LoadIntToLane(dst, vform, unpack_size, i, addr);
212     } else {
213       LoadUintToLane(dst, vform, unpack_size, i, addr);
214     }
215   }
216 }
217 
218 
ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr)219 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
220   ld1r(vform, vform, dst, addr);
221 }
222 
223 
ld2(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, uint64_t addr1)224 void Simulator::ld2(VectorFormat vform,
225                     LogicVRegister dst1,
226                     LogicVRegister dst2,
227                     uint64_t addr1) {
228   dst1.ClearForWrite(vform);
229   dst2.ClearForWrite(vform);
230   int esize = LaneSizeInBytesFromFormat(vform);
231   uint64_t addr2 = addr1 + esize;
232   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
233     LoadLane(dst1, vform, i, addr1);
234     LoadLane(dst2, vform, i, addr2);
235     addr1 += 2 * esize;
236     addr2 += 2 * esize;
237   }
238 }
239 
240 
ld2(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, int index, uint64_t addr1)241 void Simulator::ld2(VectorFormat vform,
242                     LogicVRegister dst1,
243                     LogicVRegister dst2,
244                     int index,
245                     uint64_t addr1) {
246   dst1.ClearForWrite(vform);
247   dst2.ClearForWrite(vform);
248   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
249   LoadLane(dst1, vform, index, addr1);
250   LoadLane(dst2, vform, index, addr2);
251 }
252 
253 
ld2r(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, uint64_t addr)254 void Simulator::ld2r(VectorFormat vform,
255                      LogicVRegister dst1,
256                      LogicVRegister dst2,
257                      uint64_t addr) {
258   dst1.ClearForWrite(vform);
259   dst2.ClearForWrite(vform);
260   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
261   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
262     LoadLane(dst1, vform, i, addr);
263     LoadLane(dst2, vform, i, addr2);
264   }
265 }
266 
267 
ld3(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1)268 void Simulator::ld3(VectorFormat vform,
269                     LogicVRegister dst1,
270                     LogicVRegister dst2,
271                     LogicVRegister dst3,
272                     uint64_t addr1) {
273   dst1.ClearForWrite(vform);
274   dst2.ClearForWrite(vform);
275   dst3.ClearForWrite(vform);
276   int esize = LaneSizeInBytesFromFormat(vform);
277   uint64_t addr2 = addr1 + esize;
278   uint64_t addr3 = addr2 + esize;
279   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
280     LoadLane(dst1, vform, i, addr1);
281     LoadLane(dst2, vform, i, addr2);
282     LoadLane(dst3, vform, i, addr3);
283     addr1 += 3 * esize;
284     addr2 += 3 * esize;
285     addr3 += 3 * esize;
286   }
287 }
288 
289 
ld3(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, int index, uint64_t addr1)290 void Simulator::ld3(VectorFormat vform,
291                     LogicVRegister dst1,
292                     LogicVRegister dst2,
293                     LogicVRegister dst3,
294                     int index,
295                     uint64_t addr1) {
296   dst1.ClearForWrite(vform);
297   dst2.ClearForWrite(vform);
298   dst3.ClearForWrite(vform);
299   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
300   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
301   LoadLane(dst1, vform, index, addr1);
302   LoadLane(dst2, vform, index, addr2);
303   LoadLane(dst3, vform, index, addr3);
304 }
305 
306 
ld3r(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, uint64_t addr)307 void Simulator::ld3r(VectorFormat vform,
308                      LogicVRegister dst1,
309                      LogicVRegister dst2,
310                      LogicVRegister dst3,
311                      uint64_t addr) {
312   dst1.ClearForWrite(vform);
313   dst2.ClearForWrite(vform);
314   dst3.ClearForWrite(vform);
315   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
316   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
317   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
318     LoadLane(dst1, vform, i, addr);
319     LoadLane(dst2, vform, i, addr2);
320     LoadLane(dst3, vform, i, addr3);
321   }
322 }
323 
324 
ld4(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, LogicVRegister dst4, uint64_t addr1)325 void Simulator::ld4(VectorFormat vform,
326                     LogicVRegister dst1,
327                     LogicVRegister dst2,
328                     LogicVRegister dst3,
329                     LogicVRegister dst4,
330                     uint64_t addr1) {
331   dst1.ClearForWrite(vform);
332   dst2.ClearForWrite(vform);
333   dst3.ClearForWrite(vform);
334   dst4.ClearForWrite(vform);
335   int esize = LaneSizeInBytesFromFormat(vform);
336   uint64_t addr2 = addr1 + esize;
337   uint64_t addr3 = addr2 + esize;
338   uint64_t addr4 = addr3 + esize;
339   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
340     LoadLane(dst1, vform, i, addr1);
341     LoadLane(dst2, vform, i, addr2);
342     LoadLane(dst3, vform, i, addr3);
343     LoadLane(dst4, vform, i, addr4);
344     addr1 += 4 * esize;
345     addr2 += 4 * esize;
346     addr3 += 4 * esize;
347     addr4 += 4 * esize;
348   }
349 }
350 
351 
ld4(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, LogicVRegister dst4, int index, uint64_t addr1)352 void Simulator::ld4(VectorFormat vform,
353                     LogicVRegister dst1,
354                     LogicVRegister dst2,
355                     LogicVRegister dst3,
356                     LogicVRegister dst4,
357                     int index,
358                     uint64_t addr1) {
359   dst1.ClearForWrite(vform);
360   dst2.ClearForWrite(vform);
361   dst3.ClearForWrite(vform);
362   dst4.ClearForWrite(vform);
363   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
364   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
365   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
366   LoadLane(dst1, vform, index, addr1);
367   LoadLane(dst2, vform, index, addr2);
368   LoadLane(dst3, vform, index, addr3);
369   LoadLane(dst4, vform, index, addr4);
370 }
371 
372 
ld4r(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, LogicVRegister dst4, uint64_t addr)373 void Simulator::ld4r(VectorFormat vform,
374                      LogicVRegister dst1,
375                      LogicVRegister dst2,
376                      LogicVRegister dst3,
377                      LogicVRegister dst4,
378                      uint64_t addr) {
379   dst1.ClearForWrite(vform);
380   dst2.ClearForWrite(vform);
381   dst3.ClearForWrite(vform);
382   dst4.ClearForWrite(vform);
383   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
384   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
385   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
386   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
387     LoadLane(dst1, vform, i, addr);
388     LoadLane(dst2, vform, i, addr2);
389     LoadLane(dst3, vform, i, addr3);
390     LoadLane(dst4, vform, i, addr4);
391   }
392 }
393 
394 
st1(VectorFormat vform, LogicVRegister src, uint64_t addr)395 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
396   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
397     StoreLane(src, vform, i, addr);
398     addr += LaneSizeInBytesFromFormat(vform);
399   }
400 }
401 
402 
st1(VectorFormat vform, LogicVRegister src, int index, uint64_t addr)403 void Simulator::st1(VectorFormat vform,
404                     LogicVRegister src,
405                     int index,
406                     uint64_t addr) {
407   StoreLane(src, vform, index, addr);
408 }
409 
410 
st2(VectorFormat vform, LogicVRegister src, LogicVRegister src2, uint64_t addr)411 void Simulator::st2(VectorFormat vform,
412                     LogicVRegister src,
413                     LogicVRegister src2,
414                     uint64_t addr) {
415   int esize = LaneSizeInBytesFromFormat(vform);
416   uint64_t addr2 = addr + esize;
417   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
418     StoreLane(src, vform, i, addr);
419     StoreLane(src2, vform, i, addr2);
420     addr += 2 * esize;
421     addr2 += 2 * esize;
422   }
423 }
424 
425 
st2(VectorFormat vform, LogicVRegister src, LogicVRegister src2, int index, uint64_t addr)426 void Simulator::st2(VectorFormat vform,
427                     LogicVRegister src,
428                     LogicVRegister src2,
429                     int index,
430                     uint64_t addr) {
431   int esize = LaneSizeInBytesFromFormat(vform);
432   StoreLane(src, vform, index, addr);
433   StoreLane(src2, vform, index, addr + 1 * esize);
434 }
435 
436 
st3(VectorFormat vform, LogicVRegister src, LogicVRegister src2, LogicVRegister src3, uint64_t addr)437 void Simulator::st3(VectorFormat vform,
438                     LogicVRegister src,
439                     LogicVRegister src2,
440                     LogicVRegister src3,
441                     uint64_t addr) {
442   int esize = LaneSizeInBytesFromFormat(vform);
443   uint64_t addr2 = addr + esize;
444   uint64_t addr3 = addr2 + esize;
445   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
446     StoreLane(src, vform, i, addr);
447     StoreLane(src2, vform, i, addr2);
448     StoreLane(src3, vform, i, addr3);
449     addr += 3 * esize;
450     addr2 += 3 * esize;
451     addr3 += 3 * esize;
452   }
453 }
454 
455 
st3(VectorFormat vform, LogicVRegister src, LogicVRegister src2, LogicVRegister src3, int index, uint64_t addr)456 void Simulator::st3(VectorFormat vform,
457                     LogicVRegister src,
458                     LogicVRegister src2,
459                     LogicVRegister src3,
460                     int index,
461                     uint64_t addr) {
462   int esize = LaneSizeInBytesFromFormat(vform);
463   StoreLane(src, vform, index, addr);
464   StoreLane(src2, vform, index, addr + 1 * esize);
465   StoreLane(src3, vform, index, addr + 2 * esize);
466 }
467 
468 
st4(VectorFormat vform, LogicVRegister src, LogicVRegister src2, LogicVRegister src3, LogicVRegister src4, uint64_t addr)469 void Simulator::st4(VectorFormat vform,
470                     LogicVRegister src,
471                     LogicVRegister src2,
472                     LogicVRegister src3,
473                     LogicVRegister src4,
474                     uint64_t addr) {
475   int esize = LaneSizeInBytesFromFormat(vform);
476   uint64_t addr2 = addr + esize;
477   uint64_t addr3 = addr2 + esize;
478   uint64_t addr4 = addr3 + esize;
479   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
480     StoreLane(src, vform, i, addr);
481     StoreLane(src2, vform, i, addr2);
482     StoreLane(src3, vform, i, addr3);
483     StoreLane(src4, vform, i, addr4);
484     addr += 4 * esize;
485     addr2 += 4 * esize;
486     addr3 += 4 * esize;
487     addr4 += 4 * esize;
488   }
489 }
490 
491 
st4(VectorFormat vform, LogicVRegister src, LogicVRegister src2, LogicVRegister src3, LogicVRegister src4, int index, uint64_t addr)492 void Simulator::st4(VectorFormat vform,
493                     LogicVRegister src,
494                     LogicVRegister src2,
495                     LogicVRegister src3,
496                     LogicVRegister src4,
497                     int index,
498                     uint64_t addr) {
499   int esize = LaneSizeInBytesFromFormat(vform);
500   StoreLane(src, vform, index, addr);
501   StoreLane(src2, vform, index, addr + 1 * esize);
502   StoreLane(src3, vform, index, addr + 2 * esize);
503   StoreLane(src4, vform, index, addr + 3 * esize);
504 }
505 
506 
cmp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, Condition cond)507 LogicVRegister Simulator::cmp(VectorFormat vform,
508                               LogicVRegister dst,
509                               const LogicVRegister& src1,
510                               const LogicVRegister& src2,
511                               Condition cond) {
512   dst.ClearForWrite(vform);
513   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
514     int64_t sa = src1.Int(vform, i);
515     int64_t sb = src2.Int(vform, i);
516     uint64_t ua = src1.Uint(vform, i);
517     uint64_t ub = src2.Uint(vform, i);
518     bool result = false;
519     switch (cond) {
520       case eq:
521         result = (ua == ub);
522         break;
523       case ge:
524         result = (sa >= sb);
525         break;
526       case gt:
527         result = (sa > sb);
528         break;
529       case hi:
530         result = (ua > ub);
531         break;
532       case hs:
533         result = (ua >= ub);
534         break;
535       case lt:
536         result = (sa < sb);
537         break;
538       case le:
539         result = (sa <= sb);
540         break;
541       default:
542         VIXL_UNREACHABLE();
543         break;
544     }
545     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
546   }
547   return dst;
548 }
549 
550 
cmp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, int imm, Condition cond)551 LogicVRegister Simulator::cmp(VectorFormat vform,
552                               LogicVRegister dst,
553                               const LogicVRegister& src1,
554                               int imm,
555                               Condition cond) {
556   SimVRegister temp;
557   LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
558   return cmp(vform, dst, src1, imm_reg, cond);
559 }
560 
561 
cmptst(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)562 LogicVRegister Simulator::cmptst(VectorFormat vform,
563                                  LogicVRegister dst,
564                                  const LogicVRegister& src1,
565                                  const LogicVRegister& src2) {
566   dst.ClearForWrite(vform);
567   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
568     uint64_t ua = src1.Uint(vform, i);
569     uint64_t ub = src2.Uint(vform, i);
570     dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
571   }
572   return dst;
573 }
574 
575 
add(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)576 LogicVRegister Simulator::add(VectorFormat vform,
577                               LogicVRegister dst,
578                               const LogicVRegister& src1,
579                               const LogicVRegister& src2) {
580   int lane_size = LaneSizeInBitsFromFormat(vform);
581   dst.ClearForWrite(vform);
582 
583   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
584     // Test for unsigned saturation.
585     uint64_t ua = src1.UintLeftJustified(vform, i);
586     uint64_t ub = src2.UintLeftJustified(vform, i);
587     uint64_t ur = ua + ub;
588     if (ur < ua) {
589       dst.SetUnsignedSat(i, true);
590     }
591 
592     // Test for signed saturation.
593     bool pos_a = (ua >> 63) == 0;
594     bool pos_b = (ub >> 63) == 0;
595     bool pos_r = (ur >> 63) == 0;
596     // If the signs of the operands are the same, but different from the result,
597     // there was an overflow.
598     if ((pos_a == pos_b) && (pos_a != pos_r)) {
599       dst.SetSignedSat(i, pos_a);
600     }
601     dst.SetInt(vform, i, ur >> (64 - lane_size));
602   }
603   return dst;
604 }
605 
add_uint(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, uint64_t value)606 LogicVRegister Simulator::add_uint(VectorFormat vform,
607                                    LogicVRegister dst,
608                                    const LogicVRegister& src1,
609                                    uint64_t value) {
610   int lane_size = LaneSizeInBitsFromFormat(vform);
611   VIXL_ASSERT(IsUintN(lane_size, value));
612   dst.ClearForWrite(vform);
613   // Left-justify `value`.
614   uint64_t ub = value << (64 - lane_size);
615   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
616     // Test for unsigned saturation.
617     uint64_t ua = src1.UintLeftJustified(vform, i);
618     uint64_t ur = ua + ub;
619     if (ur < ua) {
620       dst.SetUnsignedSat(i, true);
621     }
622 
623     // Test for signed saturation.
624     // `value` is always positive, so we have an overflow if the (signed) result
625     // is smaller than the first operand.
626     if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {
627       dst.SetSignedSat(i, true);
628     }
629 
630     dst.SetInt(vform, i, ur >> (64 - lane_size));
631   }
632   return dst;
633 }
634 
addp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)635 LogicVRegister Simulator::addp(VectorFormat vform,
636                                LogicVRegister dst,
637                                const LogicVRegister& src1,
638                                const LogicVRegister& src2) {
639   SimVRegister temp1, temp2;
640   uzp1(vform, temp1, src1, src2);
641   uzp2(vform, temp2, src1, src2);
642   add(vform, dst, temp1, temp2);
643   if (IsSVEFormat(vform)) {
644     interleave_top_bottom(vform, dst, dst);
645   }
646   return dst;
647 }
648 
sdiv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)649 LogicVRegister Simulator::sdiv(VectorFormat vform,
650                                LogicVRegister dst,
651                                const LogicVRegister& src1,
652                                const LogicVRegister& src2) {
653   VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
654 
655   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
656     int64_t val1 = src1.Int(vform, i);
657     int64_t val2 = src2.Int(vform, i);
658     int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;
659     int64_t quotient = 0;
660     if ((val1 == min_int) && (val2 == -1)) {
661       quotient = min_int;
662     } else if (val2 != 0) {
663       quotient = val1 / val2;
664     }
665     dst.SetInt(vform, i, quotient);
666   }
667 
668   return dst;
669 }
670 
udiv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)671 LogicVRegister Simulator::udiv(VectorFormat vform,
672                                LogicVRegister dst,
673                                const LogicVRegister& src1,
674                                const LogicVRegister& src2) {
675   VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
676 
677   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
678     uint64_t val1 = src1.Uint(vform, i);
679     uint64_t val2 = src2.Uint(vform, i);
680     uint64_t quotient = 0;
681     if (val2 != 0) {
682       quotient = val1 / val2;
683     }
684     dst.SetUint(vform, i, quotient);
685   }
686 
687   return dst;
688 }
689 
690 
mla(VectorFormat vform, LogicVRegister dst, const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2)691 LogicVRegister Simulator::mla(VectorFormat vform,
692                               LogicVRegister dst,
693                               const LogicVRegister& srca,
694                               const LogicVRegister& src1,
695                               const LogicVRegister& src2) {
696   SimVRegister temp;
697   mul(vform, temp, src1, src2);
698   add(vform, dst, srca, temp);
699   return dst;
700 }
701 
702 
mls(VectorFormat vform, LogicVRegister dst, const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2)703 LogicVRegister Simulator::mls(VectorFormat vform,
704                               LogicVRegister dst,
705                               const LogicVRegister& srca,
706                               const LogicVRegister& src1,
707                               const LogicVRegister& src2) {
708   SimVRegister temp;
709   mul(vform, temp, src1, src2);
710   sub(vform, dst, srca, temp);
711   return dst;
712 }
713 
714 
mul(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)715 LogicVRegister Simulator::mul(VectorFormat vform,
716                               LogicVRegister dst,
717                               const LogicVRegister& src1,
718                               const LogicVRegister& src2) {
719   dst.ClearForWrite(vform);
720 
721   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
722     dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
723   }
724   return dst;
725 }
726 
727 
mul(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)728 LogicVRegister Simulator::mul(VectorFormat vform,
729                               LogicVRegister dst,
730                               const LogicVRegister& src1,
731                               const LogicVRegister& src2,
732                               int index) {
733   SimVRegister temp;
734   VectorFormat indexform = VectorFormatFillQ(vform);
735   return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
736 }
737 
738 
smulh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)739 LogicVRegister Simulator::smulh(VectorFormat vform,
740                                 LogicVRegister dst,
741                                 const LogicVRegister& src1,
742                                 const LogicVRegister& src2) {
743   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
744     int64_t dst_val = 0xbadbeef;
745     int64_t val1 = src1.Int(vform, i);
746     int64_t val2 = src2.Int(vform, i);
747     switch (LaneSizeInBitsFromFormat(vform)) {
748       case 8:
749         dst_val = internal::MultiplyHigh<8>(val1, val2);
750         break;
751       case 16:
752         dst_val = internal::MultiplyHigh<16>(val1, val2);
753         break;
754       case 32:
755         dst_val = internal::MultiplyHigh<32>(val1, val2);
756         break;
757       case 64:
758         dst_val = internal::MultiplyHigh<64>(val1, val2);
759         break;
760       default:
761         VIXL_UNREACHABLE();
762         break;
763     }
764     dst.SetInt(vform, i, dst_val);
765   }
766   return dst;
767 }
768 
769 
umulh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)770 LogicVRegister Simulator::umulh(VectorFormat vform,
771                                 LogicVRegister dst,
772                                 const LogicVRegister& src1,
773                                 const LogicVRegister& src2) {
774   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
775     uint64_t dst_val = 0xbadbeef;
776     uint64_t val1 = src1.Uint(vform, i);
777     uint64_t val2 = src2.Uint(vform, i);
778     switch (LaneSizeInBitsFromFormat(vform)) {
779       case 8:
780         dst_val = internal::MultiplyHigh<8>(val1, val2);
781         break;
782       case 16:
783         dst_val = internal::MultiplyHigh<16>(val1, val2);
784         break;
785       case 32:
786         dst_val = internal::MultiplyHigh<32>(val1, val2);
787         break;
788       case 64:
789         dst_val = internal::MultiplyHigh<64>(val1, val2);
790         break;
791       default:
792         VIXL_UNREACHABLE();
793         break;
794     }
795     dst.SetUint(vform, i, dst_val);
796   }
797   return dst;
798 }
799 
800 
mla(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)801 LogicVRegister Simulator::mla(VectorFormat vform,
802                               LogicVRegister dst,
803                               const LogicVRegister& src1,
804                               const LogicVRegister& src2,
805                               int index) {
806   SimVRegister temp;
807   VectorFormat indexform = VectorFormatFillQ(vform);
808   return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
809 }
810 
811 
mls(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)812 LogicVRegister Simulator::mls(VectorFormat vform,
813                               LogicVRegister dst,
814                               const LogicVRegister& src1,
815                               const LogicVRegister& src2,
816                               int index) {
817   SimVRegister temp;
818   VectorFormat indexform = VectorFormatFillQ(vform);
819   return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
820 }
821 
sqdmull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)822 LogicVRegister Simulator::sqdmull(VectorFormat vform,
823                                   LogicVRegister dst,
824                                   const LogicVRegister& src1,
825                                   const LogicVRegister& src2,
826                                   int index) {
827   SimVRegister temp;
828   VectorFormat indexform =
829       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
830   return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
831 }
832 
sqdmlal(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)833 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
834                                   LogicVRegister dst,
835                                   const LogicVRegister& src1,
836                                   const LogicVRegister& src2,
837                                   int index) {
838   SimVRegister temp;
839   VectorFormat indexform =
840       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
841   return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
842 }
843 
sqdmlsl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)844 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
845                                   LogicVRegister dst,
846                                   const LogicVRegister& src1,
847                                   const LogicVRegister& src2,
848                                   int index) {
849   SimVRegister temp;
850   VectorFormat indexform =
851       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
852   return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
853 }
854 
sqdmulh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)855 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
856                                   LogicVRegister dst,
857                                   const LogicVRegister& src1,
858                                   const LogicVRegister& src2,
859                                   int index) {
860   SimVRegister temp;
861   VectorFormat indexform = VectorFormatFillQ(vform);
862   return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
863 }
864 
865 
sqrdmulh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)866 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
867                                    LogicVRegister dst,
868                                    const LogicVRegister& src1,
869                                    const LogicVRegister& src2,
870                                    int index) {
871   SimVRegister temp;
872   VectorFormat indexform = VectorFormatFillQ(vform);
873   return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
874 }
875 
876 
sqrdmlah(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)877 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
878                                    LogicVRegister dst,
879                                    const LogicVRegister& src1,
880                                    const LogicVRegister& src2,
881                                    int index) {
882   SimVRegister temp;
883   VectorFormat indexform = VectorFormatFillQ(vform);
884   return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));
885 }
886 
887 
sqrdmlsh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)888 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
889                                    LogicVRegister dst,
890                                    const LogicVRegister& src1,
891                                    const LogicVRegister& src2,
892                                    int index) {
893   SimVRegister temp;
894   VectorFormat indexform = VectorFormatFillQ(vform);
895   return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
896 }
897 
898 
PolynomialMult(uint64_t op1, uint64_t op2, int lane_size_in_bits) const899 uint64_t Simulator::PolynomialMult(uint64_t op1,
900                                    uint64_t op2,
901                                    int lane_size_in_bits) const {
902   VIXL_ASSERT(static_cast<unsigned>(lane_size_in_bits) <= kSRegSize);
903   VIXL_ASSERT(IsUintN(lane_size_in_bits, op1));
904   VIXL_ASSERT(IsUintN(lane_size_in_bits, op2));
905   uint64_t result = 0;
906   for (int i = 0; i < lane_size_in_bits; ++i) {
907     if ((op1 >> i) & 1) {
908       result = result ^ (op2 << i);
909     }
910   }
911   return result;
912 }
913 
914 
pmul(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)915 LogicVRegister Simulator::pmul(VectorFormat vform,
916                                LogicVRegister dst,
917                                const LogicVRegister& src1,
918                                const LogicVRegister& src2) {
919   dst.ClearForWrite(vform);
920   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
921     dst.SetUint(vform,
922                 i,
923                 PolynomialMult(src1.Uint(vform, i),
924                                src2.Uint(vform, i),
925                                LaneSizeInBitsFromFormat(vform)));
926   }
927   return dst;
928 }
929 
930 
pmull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)931 LogicVRegister Simulator::pmull(VectorFormat vform,
932                                 LogicVRegister dst,
933                                 const LogicVRegister& src1,
934                                 const LogicVRegister& src2) {
935   dst.ClearForWrite(vform);
936 
937   VectorFormat vform_src = VectorFormatHalfWidth(vform);
938   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
939     dst.SetUint(vform,
940                 i,
941                 PolynomialMult(src1.Uint(vform_src, i),
942                                src2.Uint(vform_src, i),
943                                LaneSizeInBitsFromFormat(vform_src)));
944   }
945 
946   return dst;
947 }
948 
949 
pmull2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)950 LogicVRegister Simulator::pmull2(VectorFormat vform,
951                                  LogicVRegister dst,
952                                  const LogicVRegister& src1,
953                                  const LogicVRegister& src2) {
954   VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
955   dst.ClearForWrite(vform);
956   int lane_count = LaneCountFromFormat(vform);
957   for (int i = 0; i < lane_count; i++) {
958     dst.SetUint(vform,
959                 i,
960                 PolynomialMult(src1.Uint(vform_src, lane_count + i),
961                                src2.Uint(vform_src, lane_count + i),
962                                LaneSizeInBitsFromFormat(vform_src)));
963   }
964   return dst;
965 }
966 
967 
sub(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)968 LogicVRegister Simulator::sub(VectorFormat vform,
969                               LogicVRegister dst,
970                               const LogicVRegister& src1,
971                               const LogicVRegister& src2) {
972   int lane_size = LaneSizeInBitsFromFormat(vform);
973   dst.ClearForWrite(vform);
974   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
975     // Test for unsigned saturation.
976     uint64_t ua = src1.UintLeftJustified(vform, i);
977     uint64_t ub = src2.UintLeftJustified(vform, i);
978     uint64_t ur = ua - ub;
979     if (ub > ua) {
980       dst.SetUnsignedSat(i, false);
981     }
982 
983     // Test for signed saturation.
984     bool pos_a = (ua >> 63) == 0;
985     bool pos_b = (ub >> 63) == 0;
986     bool pos_r = (ur >> 63) == 0;
987     // If the signs of the operands are different, and the sign of the first
988     // operand doesn't match the result, there was an overflow.
989     if ((pos_a != pos_b) && (pos_a != pos_r)) {
990       dst.SetSignedSat(i, pos_a);
991     }
992 
993     dst.SetInt(vform, i, ur >> (64 - lane_size));
994   }
995   return dst;
996 }
997 
sub_uint(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, uint64_t value)998 LogicVRegister Simulator::sub_uint(VectorFormat vform,
999                                    LogicVRegister dst,
1000                                    const LogicVRegister& src1,
1001                                    uint64_t value) {
1002   int lane_size = LaneSizeInBitsFromFormat(vform);
1003   VIXL_ASSERT(IsUintN(lane_size, value));
1004   dst.ClearForWrite(vform);
1005   // Left-justify `value`.
1006   uint64_t ub = value << (64 - lane_size);
1007   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1008     // Test for unsigned saturation.
1009     uint64_t ua = src1.UintLeftJustified(vform, i);
1010     uint64_t ur = ua - ub;
1011     if (ub > ua) {
1012       dst.SetUnsignedSat(i, false);
1013     }
1014 
1015     // Test for signed saturation.
1016     // `value` is always positive, so we have an overflow if the (signed) result
1017     // is greater than the first operand.
1018     if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {
1019       dst.SetSignedSat(i, false);
1020     }
1021 
1022     dst.SetInt(vform, i, ur >> (64 - lane_size));
1023   }
1024   return dst;
1025 }
1026 
and_(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1027 LogicVRegister Simulator::and_(VectorFormat vform,
1028                                LogicVRegister dst,
1029                                const LogicVRegister& src1,
1030                                const LogicVRegister& src2) {
1031   dst.ClearForWrite(vform);
1032   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1033     dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1034   }
1035   return dst;
1036 }
1037 
1038 
orr(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1039 LogicVRegister Simulator::orr(VectorFormat vform,
1040                               LogicVRegister dst,
1041                               const LogicVRegister& src1,
1042                               const LogicVRegister& src2) {
1043   dst.ClearForWrite(vform);
1044   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1045     dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1046   }
1047   return dst;
1048 }
1049 
1050 
orn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1051 LogicVRegister Simulator::orn(VectorFormat vform,
1052                               LogicVRegister dst,
1053                               const LogicVRegister& src1,
1054                               const LogicVRegister& src2) {
1055   dst.ClearForWrite(vform);
1056   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1057     dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1058   }
1059   return dst;
1060 }
1061 
1062 
eor(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1063 LogicVRegister Simulator::eor(VectorFormat vform,
1064                               LogicVRegister dst,
1065                               const LogicVRegister& src1,
1066                               const LogicVRegister& src2) {
1067   dst.ClearForWrite(vform);
1068   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1069     dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1070   }
1071   return dst;
1072 }
1073 
1074 
bic(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1075 LogicVRegister Simulator::bic(VectorFormat vform,
1076                               LogicVRegister dst,
1077                               const LogicVRegister& src1,
1078                               const LogicVRegister& src2) {
1079   dst.ClearForWrite(vform);
1080   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1081     dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1082   }
1083   return dst;
1084 }
1085 
1086 
bic(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, uint64_t imm)1087 LogicVRegister Simulator::bic(VectorFormat vform,
1088                               LogicVRegister dst,
1089                               const LogicVRegister& src,
1090                               uint64_t imm) {
1091   uint64_t result[16];
1092   int lane_count = LaneCountFromFormat(vform);
1093   for (int i = 0; i < lane_count; ++i) {
1094     result[i] = src.Uint(vform, i) & ~imm;
1095   }
1096   dst.ClearForWrite(vform);
1097   for (int i = 0; i < lane_count; ++i) {
1098     dst.SetUint(vform, i, result[i]);
1099   }
1100   return dst;
1101 }
1102 
1103 
bif(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1104 LogicVRegister Simulator::bif(VectorFormat vform,
1105                               LogicVRegister dst,
1106                               const LogicVRegister& src1,
1107                               const LogicVRegister& src2) {
1108   dst.ClearForWrite(vform);
1109   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1110     uint64_t operand1 = dst.Uint(vform, i);
1111     uint64_t operand2 = ~src2.Uint(vform, i);
1112     uint64_t operand3 = src1.Uint(vform, i);
1113     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1114     dst.SetUint(vform, i, result);
1115   }
1116   return dst;
1117 }
1118 
1119 
bit(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1120 LogicVRegister Simulator::bit(VectorFormat vform,
1121                               LogicVRegister dst,
1122                               const LogicVRegister& src1,
1123                               const LogicVRegister& src2) {
1124   dst.ClearForWrite(vform);
1125   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1126     uint64_t operand1 = dst.Uint(vform, i);
1127     uint64_t operand2 = src2.Uint(vform, i);
1128     uint64_t operand3 = src1.Uint(vform, i);
1129     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1130     dst.SetUint(vform, i, result);
1131   }
1132   return dst;
1133 }
1134 
1135 
bsl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src_mask, const LogicVRegister& src1, const LogicVRegister& src2)1136 LogicVRegister Simulator::bsl(VectorFormat vform,
1137                               LogicVRegister dst,
1138                               const LogicVRegister& src_mask,
1139                               const LogicVRegister& src1,
1140                               const LogicVRegister& src2) {
1141   dst.ClearForWrite(vform);
1142   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1143     uint64_t operand1 = src2.Uint(vform, i);
1144     uint64_t operand2 = src_mask.Uint(vform, i);
1145     uint64_t operand3 = src1.Uint(vform, i);
1146     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1147     dst.SetUint(vform, i, result);
1148   }
1149   return dst;
1150 }
1151 
1152 
sminmax(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, bool max)1153 LogicVRegister Simulator::sminmax(VectorFormat vform,
1154                                   LogicVRegister dst,
1155                                   const LogicVRegister& src1,
1156                                   const LogicVRegister& src2,
1157                                   bool max) {
1158   dst.ClearForWrite(vform);
1159   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1160     int64_t src1_val = src1.Int(vform, i);
1161     int64_t src2_val = src2.Int(vform, i);
1162     int64_t dst_val;
1163     if (max) {
1164       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1165     } else {
1166       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1167     }
1168     dst.SetInt(vform, i, dst_val);
1169   }
1170   return dst;
1171 }
1172 
1173 
smax(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1174 LogicVRegister Simulator::smax(VectorFormat vform,
1175                                LogicVRegister dst,
1176                                const LogicVRegister& src1,
1177                                const LogicVRegister& src2) {
1178   return sminmax(vform, dst, src1, src2, true);
1179 }
1180 
1181 
smin(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1182 LogicVRegister Simulator::smin(VectorFormat vform,
1183                                LogicVRegister dst,
1184                                const LogicVRegister& src1,
1185                                const LogicVRegister& src2) {
1186   return sminmax(vform, dst, src1, src2, false);
1187 }
1188 
1189 
sminmaxp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, bool max)1190 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1191                                    LogicVRegister dst,
1192                                    const LogicVRegister& src1,
1193                                    const LogicVRegister& src2,
1194                                    bool max) {
1195   unsigned lanes = LaneCountFromFormat(vform);
1196   int64_t result[kZRegMaxSizeInBytes];
1197   const LogicVRegister* src = &src1;
1198   for (unsigned j = 0; j < 2; j++) {
1199     for (unsigned i = 0; i < lanes; i += 2) {
1200       int64_t first_val = src->Int(vform, i);
1201       int64_t second_val = src->Int(vform, i + 1);
1202       int64_t dst_val;
1203       if (max) {
1204         dst_val = (first_val > second_val) ? first_val : second_val;
1205       } else {
1206         dst_val = (first_val < second_val) ? first_val : second_val;
1207       }
1208       VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1209       result[(i >> 1) + (j * lanes / 2)] = dst_val;
1210     }
1211     src = &src2;
1212   }
1213   dst.SetIntArray(vform, result);
1214   if (IsSVEFormat(vform)) {
1215     interleave_top_bottom(vform, dst, dst);
1216   }
1217   return dst;
1218 }
1219 
1220 
smaxp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1221 LogicVRegister Simulator::smaxp(VectorFormat vform,
1222                                 LogicVRegister dst,
1223                                 const LogicVRegister& src1,
1224                                 const LogicVRegister& src2) {
1225   return sminmaxp(vform, dst, src1, src2, true);
1226 }
1227 
1228 
sminp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1229 LogicVRegister Simulator::sminp(VectorFormat vform,
1230                                 LogicVRegister dst,
1231                                 const LogicVRegister& src1,
1232                                 const LogicVRegister& src2) {
1233   return sminmaxp(vform, dst, src1, src2, false);
1234 }
1235 
1236 
addp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1237 LogicVRegister Simulator::addp(VectorFormat vform,
1238                                LogicVRegister dst,
1239                                const LogicVRegister& src) {
1240   VIXL_ASSERT(vform == kFormatD);
1241 
1242   uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1243   dst.ClearForWrite(vform);
1244   dst.SetUint(vform, 0, dst_val);
1245   return dst;
1246 }
1247 
1248 
addv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1249 LogicVRegister Simulator::addv(VectorFormat vform,
1250                                LogicVRegister dst,
1251                                const LogicVRegister& src) {
1252   VectorFormat vform_dst =
1253       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1254 
1255 
1256   int64_t dst_val = 0;
1257   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1258     dst_val += src.Int(vform, i);
1259   }
1260 
1261   dst.ClearForWrite(vform_dst);
1262   dst.SetInt(vform_dst, 0, dst_val);
1263   return dst;
1264 }
1265 
1266 
saddlv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1267 LogicVRegister Simulator::saddlv(VectorFormat vform,
1268                                  LogicVRegister dst,
1269                                  const LogicVRegister& src) {
1270   VectorFormat vform_dst =
1271       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1272 
1273   int64_t dst_val = 0;
1274   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1275     dst_val += src.Int(vform, i);
1276   }
1277 
1278   dst.ClearForWrite(vform_dst);
1279   dst.SetInt(vform_dst, 0, dst_val);
1280   return dst;
1281 }
1282 
1283 
uaddlv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1284 LogicVRegister Simulator::uaddlv(VectorFormat vform,
1285                                  LogicVRegister dst,
1286                                  const LogicVRegister& src) {
1287   VectorFormat vform_dst =
1288       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1289 
1290   uint64_t dst_val = 0;
1291   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1292     dst_val += src.Uint(vform, i);
1293   }
1294 
1295   dst.ClearForWrite(vform_dst);
1296   dst.SetUint(vform_dst, 0, dst_val);
1297   return dst;
1298 }
1299 
1300 
sminmaxv(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src, bool max)1301 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1302                                    LogicVRegister dst,
1303                                    const LogicPRegister& pg,
1304                                    const LogicVRegister& src,
1305                                    bool max) {
1306   int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1307   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1308     if (!pg.IsActive(vform, i)) continue;
1309 
1310     int64_t src_val = src.Int(vform, i);
1311     if (max) {
1312       dst_val = (src_val > dst_val) ? src_val : dst_val;
1313     } else {
1314       dst_val = (src_val < dst_val) ? src_val : dst_val;
1315     }
1316   }
1317   dst.ClearForWrite(ScalarFormatFromFormat(vform));
1318   dst.SetInt(vform, 0, dst_val);
1319   return dst;
1320 }
1321 
1322 
smaxv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1323 LogicVRegister Simulator::smaxv(VectorFormat vform,
1324                                 LogicVRegister dst,
1325                                 const LogicVRegister& src) {
1326   sminmaxv(vform, dst, GetPTrue(), src, true);
1327   return dst;
1328 }
1329 
1330 
sminv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1331 LogicVRegister Simulator::sminv(VectorFormat vform,
1332                                 LogicVRegister dst,
1333                                 const LogicVRegister& src) {
1334   sminmaxv(vform, dst, GetPTrue(), src, false);
1335   return dst;
1336 }
1337 
1338 
smaxv(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src)1339 LogicVRegister Simulator::smaxv(VectorFormat vform,
1340                                 LogicVRegister dst,
1341                                 const LogicPRegister& pg,
1342                                 const LogicVRegister& src) {
1343   VIXL_ASSERT(IsSVEFormat(vform));
1344   sminmaxv(vform, dst, pg, src, true);
1345   return dst;
1346 }
1347 
1348 
sminv(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src)1349 LogicVRegister Simulator::sminv(VectorFormat vform,
1350                                 LogicVRegister dst,
1351                                 const LogicPRegister& pg,
1352                                 const LogicVRegister& src) {
1353   VIXL_ASSERT(IsSVEFormat(vform));
1354   sminmaxv(vform, dst, pg, src, false);
1355   return dst;
1356 }
1357 
1358 
uminmax(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, bool max)1359 LogicVRegister Simulator::uminmax(VectorFormat vform,
1360                                   LogicVRegister dst,
1361                                   const LogicVRegister& src1,
1362                                   const LogicVRegister& src2,
1363                                   bool max) {
1364   dst.ClearForWrite(vform);
1365   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1366     uint64_t src1_val = src1.Uint(vform, i);
1367     uint64_t src2_val = src2.Uint(vform, i);
1368     uint64_t dst_val;
1369     if (max) {
1370       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1371     } else {
1372       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1373     }
1374     dst.SetUint(vform, i, dst_val);
1375   }
1376   return dst;
1377 }
1378 
1379 
umax(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1380 LogicVRegister Simulator::umax(VectorFormat vform,
1381                                LogicVRegister dst,
1382                                const LogicVRegister& src1,
1383                                const LogicVRegister& src2) {
1384   return uminmax(vform, dst, src1, src2, true);
1385 }
1386 
1387 
umin(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1388 LogicVRegister Simulator::umin(VectorFormat vform,
1389                                LogicVRegister dst,
1390                                const LogicVRegister& src1,
1391                                const LogicVRegister& src2) {
1392   return uminmax(vform, dst, src1, src2, false);
1393 }
1394 
1395 
uminmaxp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, bool max)1396 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1397                                    LogicVRegister dst,
1398                                    const LogicVRegister& src1,
1399                                    const LogicVRegister& src2,
1400                                    bool max) {
1401   unsigned lanes = LaneCountFromFormat(vform);
1402   uint64_t result[kZRegMaxSizeInBytes];
1403   const LogicVRegister* src = &src1;
1404   for (unsigned j = 0; j < 2; j++) {
1405     for (unsigned i = 0; i < lanes; i += 2) {
1406       uint64_t first_val = src->Uint(vform, i);
1407       uint64_t second_val = src->Uint(vform, i + 1);
1408       uint64_t dst_val;
1409       if (max) {
1410         dst_val = (first_val > second_val) ? first_val : second_val;
1411       } else {
1412         dst_val = (first_val < second_val) ? first_val : second_val;
1413       }
1414       VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1415       result[(i >> 1) + (j * lanes / 2)] = dst_val;
1416     }
1417     src = &src2;
1418   }
1419   dst.SetUintArray(vform, result);
1420   if (IsSVEFormat(vform)) {
1421     interleave_top_bottom(vform, dst, dst);
1422   }
1423   return dst;
1424 }
1425 
1426 
umaxp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1427 LogicVRegister Simulator::umaxp(VectorFormat vform,
1428                                 LogicVRegister dst,
1429                                 const LogicVRegister& src1,
1430                                 const LogicVRegister& src2) {
1431   return uminmaxp(vform, dst, src1, src2, true);
1432 }
1433 
1434 
uminp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1435 LogicVRegister Simulator::uminp(VectorFormat vform,
1436                                 LogicVRegister dst,
1437                                 const LogicVRegister& src1,
1438                                 const LogicVRegister& src2) {
1439   return uminmaxp(vform, dst, src1, src2, false);
1440 }
1441 
1442 
uminmaxv(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src, bool max)1443 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1444                                    LogicVRegister dst,
1445                                    const LogicPRegister& pg,
1446                                    const LogicVRegister& src,
1447                                    bool max) {
1448   uint64_t dst_val = max ? 0 : UINT64_MAX;
1449   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1450     if (!pg.IsActive(vform, i)) continue;
1451 
1452     uint64_t src_val = src.Uint(vform, i);
1453     if (max) {
1454       dst_val = (src_val > dst_val) ? src_val : dst_val;
1455     } else {
1456       dst_val = (src_val < dst_val) ? src_val : dst_val;
1457     }
1458   }
1459   dst.ClearForWrite(ScalarFormatFromFormat(vform));
1460   dst.SetUint(vform, 0, dst_val);
1461   return dst;
1462 }
1463 
1464 
umaxv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1465 LogicVRegister Simulator::umaxv(VectorFormat vform,
1466                                 LogicVRegister dst,
1467                                 const LogicVRegister& src) {
1468   uminmaxv(vform, dst, GetPTrue(), src, true);
1469   return dst;
1470 }
1471 
1472 
uminv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1473 LogicVRegister Simulator::uminv(VectorFormat vform,
1474                                 LogicVRegister dst,
1475                                 const LogicVRegister& src) {
1476   uminmaxv(vform, dst, GetPTrue(), src, false);
1477   return dst;
1478 }
1479 
1480 
umaxv(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src)1481 LogicVRegister Simulator::umaxv(VectorFormat vform,
1482                                 LogicVRegister dst,
1483                                 const LogicPRegister& pg,
1484                                 const LogicVRegister& src) {
1485   VIXL_ASSERT(IsSVEFormat(vform));
1486   uminmaxv(vform, dst, pg, src, true);
1487   return dst;
1488 }
1489 
1490 
uminv(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src)1491 LogicVRegister Simulator::uminv(VectorFormat vform,
1492                                 LogicVRegister dst,
1493                                 const LogicPRegister& pg,
1494                                 const LogicVRegister& src) {
1495   VIXL_ASSERT(IsSVEFormat(vform));
1496   uminmaxv(vform, dst, pg, src, false);
1497   return dst;
1498 }
1499 
1500 
shl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift)1501 LogicVRegister Simulator::shl(VectorFormat vform,
1502                               LogicVRegister dst,
1503                               const LogicVRegister& src,
1504                               int shift) {
1505   VIXL_ASSERT(shift >= 0);
1506   SimVRegister temp;
1507   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1508   return ushl(vform, dst, src, shiftreg);
1509 }
1510 
1511 
sshll(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift)1512 LogicVRegister Simulator::sshll(VectorFormat vform,
1513                                 LogicVRegister dst,
1514                                 const LogicVRegister& src,
1515                                 int shift) {
1516   VIXL_ASSERT(shift >= 0);
1517   SimVRegister temp1, temp2;
1518   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1519   LogicVRegister extendedreg = sxtl(vform, temp2, src);
1520   return sshl(vform, dst, extendedreg, shiftreg);
1521 }
1522 
1523 
sshll2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift)1524 LogicVRegister Simulator::sshll2(VectorFormat vform,
1525                                  LogicVRegister dst,
1526                                  const LogicVRegister& src,
1527                                  int shift) {
1528   VIXL_ASSERT(shift >= 0);
1529   SimVRegister temp1, temp2;
1530   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1531   LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1532   return sshl(vform, dst, extendedreg, shiftreg);
1533 }
1534 
1535 
shll(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1536 LogicVRegister Simulator::shll(VectorFormat vform,
1537                                LogicVRegister dst,
1538                                const LogicVRegister& src) {
1539   int shift = LaneSizeInBitsFromFormat(vform) / 2;
1540   return sshll(vform, dst, src, shift);
1541 }
1542 
1543 
shll2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1544 LogicVRegister Simulator::shll2(VectorFormat vform,
1545                                 LogicVRegister dst,
1546                                 const LogicVRegister& src) {
1547   int shift = LaneSizeInBitsFromFormat(vform) / 2;
1548   return sshll2(vform, dst, src, shift);
1549 }
1550 
1551 
ushll(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift)1552 LogicVRegister Simulator::ushll(VectorFormat vform,
1553                                 LogicVRegister dst,
1554                                 const LogicVRegister& src,
1555                                 int shift) {
1556   VIXL_ASSERT(shift >= 0);
1557   SimVRegister temp1, temp2;
1558   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1559   LogicVRegister extendedreg = uxtl(vform, temp2, src);
1560   return ushl(vform, dst, extendedreg, shiftreg);
1561 }
1562 
1563 
ushll2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift)1564 LogicVRegister Simulator::ushll2(VectorFormat vform,
1565                                  LogicVRegister dst,
1566                                  const LogicVRegister& src,
1567                                  int shift) {
1568   VIXL_ASSERT(shift >= 0);
1569   SimVRegister temp1, temp2;
1570   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1571   LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1572   return ushl(vform, dst, extendedreg, shiftreg);
1573 }
1574 
clast(VectorFormat vform, const LogicPRegister& pg, const LogicVRegister& src, int offset_from_last_active)1575 std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,
1576                                            const LogicPRegister& pg,
1577                                            const LogicVRegister& src,
1578                                            int offset_from_last_active) {
1579   // Untested for any other values.
1580   VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));
1581 
1582   int last_active = GetLastActive(vform, pg);
1583   int lane_count = LaneCountFromFormat(vform);
1584   int index =
1585       ((last_active + offset_from_last_active) + lane_count) % lane_count;
1586   return std::make_pair(last_active >= 0, src.Uint(vform, index));
1587 }
1588 
compact(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src)1589 LogicVRegister Simulator::compact(VectorFormat vform,
1590                                   LogicVRegister dst,
1591                                   const LogicPRegister& pg,
1592                                   const LogicVRegister& src) {
1593   int j = 0;
1594   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1595     if (pg.IsActive(vform, i)) {
1596       dst.SetUint(vform, j++, src.Uint(vform, i));
1597     }
1598   }
1599   for (; j < LaneCountFromFormat(vform); j++) {
1600     dst.SetUint(vform, j, 0);
1601   }
1602   return dst;
1603 }
1604 
splice(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src1, const LogicVRegister& src2)1605 LogicVRegister Simulator::splice(VectorFormat vform,
1606                                  LogicVRegister dst,
1607                                  const LogicPRegister& pg,
1608                                  const LogicVRegister& src1,
1609                                  const LogicVRegister& src2) {
1610   int lane_count = LaneCountFromFormat(vform);
1611   int first_active = GetFirstActive(vform, pg);
1612   int last_active = GetLastActive(vform, pg);
1613   int dst_idx = 0;
1614   uint64_t result[kZRegMaxSizeInBytes];
1615 
1616   if (first_active >= 0) {
1617     VIXL_ASSERT(last_active >= first_active);
1618     VIXL_ASSERT(last_active < lane_count);
1619     for (int i = first_active; i <= last_active; i++) {
1620       result[dst_idx++] = src1.Uint(vform, i);
1621     }
1622   }
1623 
1624   VIXL_ASSERT(dst_idx <= lane_count);
1625   for (int i = dst_idx; i < lane_count; i++) {
1626     result[i] = src2.Uint(vform, i - dst_idx);
1627   }
1628 
1629   dst.SetUintArray(vform, result);
1630 
1631   return dst;
1632 }
1633 
1634 LogicVRegister Simulator::sel(VectorFormat vform,
1635                               LogicVRegister dst,
1636                               const SimPRegister& pg,
1637                               const LogicVRegister& src1,
1638                               const LogicVRegister& src2) {
1639   int p_reg_bits_per_lane =
1640       LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;
1641   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
1642     uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)
1643                               ? src1.Uint(vform, lane)
1644                               : src2.Uint(vform, lane);
1645     dst.SetUint(vform, lane, lane_value);
1646   }
1647   return dst;
1648 }
1649 
1650 
1651 LogicPRegister Simulator::sel(LogicPRegister dst,
1652                               const LogicPRegister& pg,
1653                               const LogicPRegister& src1,
1654                               const LogicPRegister& src2) {
1655   for (int i = 0; i < dst.GetChunkCount(); i++) {
1656     LogicPRegister::ChunkType mask = pg.GetChunk(i);
1657     LogicPRegister::ChunkType result =
1658         (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));
1659     dst.SetChunk(i, result);
1660   }
1661   return dst;
1662 }
1663 
1664 
1665 LogicVRegister Simulator::sli(VectorFormat vform,
1666                               LogicVRegister dst,
1667                               const LogicVRegister& src,
1668                               int shift) {
1669   dst.ClearForWrite(vform);
1670   int lane_count = LaneCountFromFormat(vform);
1671   for (int i = 0; i < lane_count; i++) {
1672     uint64_t src_lane = src.Uint(vform, i);
1673     uint64_t dst_lane = dst.Uint(vform, i);
1674     uint64_t shifted = src_lane << shift;
1675     uint64_t mask = MaxUintFromFormat(vform) << shift;
1676     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1677   }
1678   return dst;
1679 }
1680 
1681 
1682 LogicVRegister Simulator::sqshl(VectorFormat vform,
1683                                 LogicVRegister dst,
1684                                 const LogicVRegister& src,
1685                                 int shift) {
1686   VIXL_ASSERT(shift >= 0);
1687   SimVRegister temp;
1688   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1689   return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1690 }
1691 
1692 
1693 LogicVRegister Simulator::uqshl(VectorFormat vform,
1694                                 LogicVRegister dst,
1695                                 const LogicVRegister& src,
1696                                 int shift) {
1697   VIXL_ASSERT(shift >= 0);
1698   SimVRegister temp;
1699   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1700   return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1701 }
1702 
1703 
1704 LogicVRegister Simulator::sqshlu(VectorFormat vform,
1705                                  LogicVRegister dst,
1706                                  const LogicVRegister& src,
1707                                  int shift) {
1708   VIXL_ASSERT(shift >= 0);
1709   SimVRegister temp;
1710   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1711   return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1712 }
1713 
1714 
1715 LogicVRegister Simulator::sri(VectorFormat vform,
1716                               LogicVRegister dst,
1717                               const LogicVRegister& src,
1718                               int shift) {
1719   dst.ClearForWrite(vform);
1720   int lane_count = LaneCountFromFormat(vform);
1721   VIXL_ASSERT((shift > 0) &&
1722               (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1723   for (int i = 0; i < lane_count; i++) {
1724     uint64_t src_lane = src.Uint(vform, i);
1725     uint64_t dst_lane = dst.Uint(vform, i);
1726     uint64_t shifted;
1727     uint64_t mask;
1728     if (shift == 64) {
1729       shifted = 0;
1730       mask = 0;
1731     } else {
1732       shifted = src_lane >> shift;
1733       mask = MaxUintFromFormat(vform) >> shift;
1734     }
1735     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1736   }
1737   return dst;
1738 }
1739 
1740 
1741 LogicVRegister Simulator::ushr(VectorFormat vform,
1742                                LogicVRegister dst,
1743                                const LogicVRegister& src,
1744                                int shift) {
1745   VIXL_ASSERT(shift >= 0);
1746   SimVRegister temp;
1747   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1748   return ushl(vform, dst, src, shiftreg);
1749 }
1750 
1751 
1752 LogicVRegister Simulator::sshr(VectorFormat vform,
1753                                LogicVRegister dst,
1754                                const LogicVRegister& src,
1755                                int shift) {
1756   VIXL_ASSERT(shift >= 0);
1757   SimVRegister temp;
1758   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1759   return sshl(vform, dst, src, shiftreg);
1760 }
1761 
1762 
1763 LogicVRegister Simulator::ssra(VectorFormat vform,
1764                                LogicVRegister dst,
1765                                const LogicVRegister& src,
1766                                int shift) {
1767   SimVRegister temp;
1768   LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1769   return add(vform, dst, dst, shifted_reg);
1770 }
1771 
1772 
1773 LogicVRegister Simulator::usra(VectorFormat vform,
1774                                LogicVRegister dst,
1775                                const LogicVRegister& src,
1776                                int shift) {
1777   SimVRegister temp;
1778   LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1779   return add(vform, dst, dst, shifted_reg);
1780 }
1781 
1782 
1783 LogicVRegister Simulator::srsra(VectorFormat vform,
1784                                 LogicVRegister dst,
1785                                 const LogicVRegister& src,
1786                                 int shift) {
1787   SimVRegister temp;
1788   LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1789   return add(vform, dst, dst, shifted_reg);
1790 }
1791 
1792 
1793 LogicVRegister Simulator::ursra(VectorFormat vform,
1794                                 LogicVRegister dst,
1795                                 const LogicVRegister& src,
1796                                 int shift) {
1797   SimVRegister temp;
1798   LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1799   return add(vform, dst, dst, shifted_reg);
1800 }
1801 
1802 
1803 LogicVRegister Simulator::cls(VectorFormat vform,
1804                               LogicVRegister dst,
1805                               const LogicVRegister& src) {
1806   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1807   int lane_count = LaneCountFromFormat(vform);
1808 
1809   // Ensure that we can store one result per lane.
1810   int result[kZRegMaxSizeInBytes];
1811 
1812   for (int i = 0; i < lane_count; i++) {
1813     result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);
1814   }
1815 
1816   dst.ClearForWrite(vform);
1817   for (int i = 0; i < lane_count; ++i) {
1818     dst.SetUint(vform, i, result[i]);
1819   }
1820   return dst;
1821 }
1822 
1823 
1824 LogicVRegister Simulator::clz(VectorFormat vform,
1825                               LogicVRegister dst,
1826                               const LogicVRegister& src) {
1827   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1828   int lane_count = LaneCountFromFormat(vform);
1829 
1830   // Ensure that we can store one result per lane.
1831   int result[kZRegMaxSizeInBytes];
1832 
1833   for (int i = 0; i < lane_count; i++) {
1834     result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);
1835   }
1836 
1837   dst.ClearForWrite(vform);
1838   for (int i = 0; i < lane_count; ++i) {
1839     dst.SetUint(vform, i, result[i]);
1840   }
1841   return dst;
1842 }
1843 
1844 
1845 LogicVRegister Simulator::cnot(VectorFormat vform,
1846                                LogicVRegister dst,
1847                                const LogicVRegister& src) {
1848   dst.ClearForWrite(vform);
1849   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1850     uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;
1851     dst.SetUint(vform, i, value);
1852   }
1853   return dst;
1854 }
1855 
1856 
1857 LogicVRegister Simulator::cnt(VectorFormat vform,
1858                               LogicVRegister dst,
1859                               const LogicVRegister& src) {
1860   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1861   int lane_count = LaneCountFromFormat(vform);
1862 
1863   // Ensure that we can store one result per lane.
1864   int result[kZRegMaxSizeInBytes];
1865 
1866   for (int i = 0; i < lane_count; i++) {
1867     result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);
1868   }
1869 
1870   dst.ClearForWrite(vform);
1871   for (int i = 0; i < lane_count; ++i) {
1872     dst.SetUint(vform, i, result[i]);
1873   }
1874   return dst;
1875 }
1876 
1877 static int64_t CalculateSignedShiftDistance(int64_t shift_val,
1878                                             int esize,
1879                                             bool shift_in_ls_byte) {
1880   if (shift_in_ls_byte) {
1881     // Neon uses the least-significant byte of the lane as the shift distance.
1882     shift_val = ExtractSignedBitfield64(7, 0, shift_val);
1883   } else {
1884     // SVE uses a saturated shift distance in the range
1885     //  -(esize + 1) ... (esize + 1).
1886     if (shift_val > (esize + 1)) shift_val = esize + 1;
1887     if (shift_val < -(esize + 1)) shift_val = -(esize + 1);
1888   }
1889   return shift_val;
1890 }
1891 
1892 LogicVRegister Simulator::sshl(VectorFormat vform,
1893                                LogicVRegister dst,
1894                                const LogicVRegister& src1,
1895                                const LogicVRegister& src2,
1896                                bool shift_in_ls_byte) {
1897   dst.ClearForWrite(vform);
1898   int esize = LaneSizeInBitsFromFormat(vform);
1899   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1900     int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1901                                                      esize,
1902                                                      shift_in_ls_byte);
1903 
1904     int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1905 
1906     // Set signed saturation state.
1907     if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1908       dst.SetSignedSat(i, lj_src_val >= 0);
1909     }
1910 
1911     // Set unsigned saturation state.
1912     if (lj_src_val < 0) {
1913       dst.SetUnsignedSat(i, false);
1914     } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1915                (lj_src_val != 0)) {
1916       dst.SetUnsignedSat(i, true);
1917     }
1918 
1919     int64_t src_val = src1.Int(vform, i);
1920     bool src_is_negative = src_val < 0;
1921     if (shift_val > 63) {
1922       dst.SetInt(vform, i, 0);
1923     } else if (shift_val < -63) {
1924       dst.SetRounding(i, src_is_negative);
1925       dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1926     } else {
1927       // Use unsigned types for shifts, as behaviour is undefined for signed
1928       // lhs.
1929       uint64_t usrc_val = static_cast<uint64_t>(src_val);
1930 
1931       if (shift_val < 0) {
1932         // Convert to right shift.
1933         shift_val = -shift_val;
1934 
1935         // Set rounding state by testing most-significant bit shifted out.
1936         // Rounding only needed on right shifts.
1937         if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1938           dst.SetRounding(i, true);
1939         }
1940 
1941         usrc_val >>= shift_val;
1942 
1943         if (src_is_negative) {
1944           // Simulate sign-extension.
1945           usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1946         }
1947       } else {
1948         usrc_val <<= shift_val;
1949       }
1950       dst.SetUint(vform, i, usrc_val);
1951     }
1952   }
1953   return dst;
1954 }
1955 
1956 
1957 LogicVRegister Simulator::ushl(VectorFormat vform,
1958                                LogicVRegister dst,
1959                                const LogicVRegister& src1,
1960                                const LogicVRegister& src2,
1961                                bool shift_in_ls_byte) {
1962   dst.ClearForWrite(vform);
1963   int esize = LaneSizeInBitsFromFormat(vform);
1964   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1965     int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1966                                                      esize,
1967                                                      shift_in_ls_byte);
1968 
1969     uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1970 
1971     // Set saturation state.
1972     if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1973       dst.SetUnsignedSat(i, true);
1974     }
1975 
1976     uint64_t src_val = src1.Uint(vform, i);
1977     if ((shift_val > 63) || (shift_val < -64)) {
1978       dst.SetUint(vform, i, 0);
1979     } else {
1980       if (shift_val < 0) {
1981         // Set rounding state. Rounding only needed on right shifts.
1982         if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1983           dst.SetRounding(i, true);
1984         }
1985 
1986         if (shift_val == -64) {
1987           src_val = 0;
1988         } else {
1989           src_val >>= -shift_val;
1990         }
1991       } else {
1992         src_val <<= shift_val;
1993       }
1994       dst.SetUint(vform, i, src_val);
1995     }
1996   }
1997   return dst;
1998 }
1999 
2000 LogicVRegister Simulator::sshr(VectorFormat vform,
2001                                LogicVRegister dst,
2002                                const LogicVRegister& src1,
2003                                const LogicVRegister& src2) {
2004   SimVRegister temp;
2005   // Saturate to sidestep the min-int problem.
2006   neg(vform, temp, src2).SignedSaturate(vform);
2007   sshl(vform, dst, src1, temp, false);
2008   return dst;
2009 }
2010 
2011 LogicVRegister Simulator::ushr(VectorFormat vform,
2012                                LogicVRegister dst,
2013                                const LogicVRegister& src1,
2014                                const LogicVRegister& src2) {
2015   SimVRegister temp;
2016   // Saturate to sidestep the min-int problem.
2017   neg(vform, temp, src2).SignedSaturate(vform);
2018   ushl(vform, dst, src1, temp, false);
2019   return dst;
2020 }
2021 
2022 LogicVRegister Simulator::neg(VectorFormat vform,
2023                               LogicVRegister dst,
2024                               const LogicVRegister& src) {
2025   dst.ClearForWrite(vform);
2026   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2027     // Test for signed saturation.
2028     int64_t sa = src.Int(vform, i);
2029     if (sa == MinIntFromFormat(vform)) {
2030       dst.SetSignedSat(i, true);
2031     }
2032     dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2033   }
2034   return dst;
2035 }
2036 
2037 
2038 LogicVRegister Simulator::suqadd(VectorFormat vform,
2039                                  LogicVRegister dst,
2040                                  const LogicVRegister& src1,
2041                                  const LogicVRegister& src2) {
2042   dst.ClearForWrite(vform);
2043   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2044     int64_t sa = src1.IntLeftJustified(vform, i);
2045     uint64_t ub = src2.UintLeftJustified(vform, i);
2046     uint64_t ur = sa + ub;
2047 
2048     int64_t sr;
2049     memcpy(&sr, &ur, sizeof(sr));
2050     if (sr < sa) {  // Test for signed positive saturation.
2051       dst.SetInt(vform, i, MaxIntFromFormat(vform));
2052     } else {
2053       dst.SetUint(vform, i, src1.Int(vform, i) + src2.Uint(vform, i));
2054     }
2055   }
2056   return dst;
2057 }
2058 
2059 
2060 LogicVRegister Simulator::usqadd(VectorFormat vform,
2061                                  LogicVRegister dst,
2062                                  const LogicVRegister& src1,
2063                                  const LogicVRegister& src2) {
2064   dst.ClearForWrite(vform);
2065   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2066     uint64_t ua = src1.UintLeftJustified(vform, i);
2067     int64_t sb = src2.IntLeftJustified(vform, i);
2068     uint64_t ur = ua + sb;
2069 
2070     if ((sb > 0) && (ur <= ua)) {
2071       dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
2072     } else if ((sb < 0) && (ur >= ua)) {
2073       dst.SetUint(vform, i, 0);  // Negative saturation.
2074     } else {
2075       dst.SetUint(vform, i, src1.Uint(vform, i) + src2.Int(vform, i));
2076     }
2077   }
2078   return dst;
2079 }
2080 
2081 
2082 LogicVRegister Simulator::abs(VectorFormat vform,
2083                               LogicVRegister dst,
2084                               const LogicVRegister& src) {
2085   dst.ClearForWrite(vform);
2086   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2087     // Test for signed saturation.
2088     int64_t sa = src.Int(vform, i);
2089     if (sa == MinIntFromFormat(vform)) {
2090       dst.SetSignedSat(i, true);
2091     }
2092     if (sa < 0) {
2093       dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2094     } else {
2095       dst.SetInt(vform, i, sa);
2096     }
2097   }
2098   return dst;
2099 }
2100 
2101 
2102 LogicVRegister Simulator::andv(VectorFormat vform,
2103                                LogicVRegister dst,
2104                                const LogicPRegister& pg,
2105                                const LogicVRegister& src) {
2106   VIXL_ASSERT(IsSVEFormat(vform));
2107   uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));
2108   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2109     if (!pg.IsActive(vform, i)) continue;
2110 
2111     result &= src.Uint(vform, i);
2112   }
2113   VectorFormat vform_dst =
2114       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2115   dst.ClearForWrite(vform_dst);
2116   dst.SetUint(vform_dst, 0, result);
2117   return dst;
2118 }
2119 
2120 
2121 LogicVRegister Simulator::eorv(VectorFormat vform,
2122                                LogicVRegister dst,
2123                                const LogicPRegister& pg,
2124                                const LogicVRegister& src) {
2125   VIXL_ASSERT(IsSVEFormat(vform));
2126   uint64_t result = 0;
2127   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2128     if (!pg.IsActive(vform, i)) continue;
2129 
2130     result ^= src.Uint(vform, i);
2131   }
2132   VectorFormat vform_dst =
2133       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2134   dst.ClearForWrite(vform_dst);
2135   dst.SetUint(vform_dst, 0, result);
2136   return dst;
2137 }
2138 
2139 
2140 LogicVRegister Simulator::orv(VectorFormat vform,
2141                               LogicVRegister dst,
2142                               const LogicPRegister& pg,
2143                               const LogicVRegister& src) {
2144   VIXL_ASSERT(IsSVEFormat(vform));
2145   uint64_t result = 0;
2146   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2147     if (!pg.IsActive(vform, i)) continue;
2148 
2149     result |= src.Uint(vform, i);
2150   }
2151   VectorFormat vform_dst =
2152       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2153   dst.ClearForWrite(vform_dst);
2154   dst.SetUint(vform_dst, 0, result);
2155   return dst;
2156 }
2157 
2158 
2159 LogicVRegister Simulator::saddv(VectorFormat vform,
2160                                 LogicVRegister dst,
2161                                 const LogicPRegister& pg,
2162                                 const LogicVRegister& src) {
2163   VIXL_ASSERT(IsSVEFormat(vform));
2164   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);
2165   int64_t result = 0;
2166   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2167     if (!pg.IsActive(vform, i)) continue;
2168 
2169     // The destination register always has D-lane sizes and the source register
2170     // always has S-lanes or smaller, so signed integer overflow -- undefined
2171     // behaviour -- can't occur.
2172     result += src.Int(vform, i);
2173   }
2174 
2175   dst.ClearForWrite(kFormatD);
2176   dst.SetInt(kFormatD, 0, result);
2177   return dst;
2178 }
2179 
2180 
2181 LogicVRegister Simulator::uaddv(VectorFormat vform,
2182                                 LogicVRegister dst,
2183                                 const LogicPRegister& pg,
2184                                 const LogicVRegister& src) {
2185   VIXL_ASSERT(IsSVEFormat(vform));
2186   uint64_t result = 0;
2187   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2188     if (!pg.IsActive(vform, i)) continue;
2189 
2190     result += src.Uint(vform, i);
2191   }
2192 
2193   dst.ClearForWrite(kFormatD);
2194   dst.SetUint(kFormatD, 0, result);
2195   return dst;
2196 }
2197 
2198 
2199 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2200                                         LogicVRegister dst,
2201                                         bool dst_is_signed,
2202                                         const LogicVRegister& src,
2203                                         bool src_is_signed) {
2204   bool upperhalf = false;
2205   VectorFormat srcform = dstform;
2206   if ((dstform == kFormat16B) || (dstform == kFormat8H) ||
2207       (dstform == kFormat4S)) {
2208     upperhalf = true;
2209     srcform = VectorFormatHalfLanes(srcform);
2210   }
2211   srcform = VectorFormatDoubleWidth(srcform);
2212 
2213   LogicVRegister src_copy = src;
2214 
2215   int offset;
2216   if (upperhalf) {
2217     offset = LaneCountFromFormat(dstform) / 2;
2218   } else {
2219     offset = 0;
2220   }
2221 
2222   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2223     int64_t ssrc = src_copy.Int(srcform, i);
2224     uint64_t usrc = src_copy.Uint(srcform, i);
2225 
2226     // Test for signed saturation
2227     if (ssrc > MaxIntFromFormat(dstform)) {
2228       dst.SetSignedSat(offset + i, true);
2229     } else if (ssrc < MinIntFromFormat(dstform)) {
2230       dst.SetSignedSat(offset + i, false);
2231     }
2232 
2233     // Test for unsigned saturation
2234     if (src_is_signed) {
2235       if (ssrc > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2236         dst.SetUnsignedSat(offset + i, true);
2237       } else if (ssrc < 0) {
2238         dst.SetUnsignedSat(offset + i, false);
2239       }
2240     } else {
2241       if (usrc > MaxUintFromFormat(dstform)) {
2242         dst.SetUnsignedSat(offset + i, true);
2243       }
2244     }
2245 
2246     int64_t result;
2247     if (src_is_signed) {
2248       result = ssrc & MaxUintFromFormat(dstform);
2249     } else {
2250       result = usrc & MaxUintFromFormat(dstform);
2251     }
2252 
2253     if (dst_is_signed) {
2254       dst.SetInt(dstform, offset + i, result);
2255     } else {
2256       dst.SetUint(dstform, offset + i, result);
2257     }
2258   }
2259 
2260   if (!upperhalf) {
2261     dst.ClearForWrite(dstform);
2262   }
2263   return dst;
2264 }
2265 
2266 
2267 LogicVRegister Simulator::xtn(VectorFormat vform,
2268                               LogicVRegister dst,
2269                               const LogicVRegister& src) {
2270   return extractnarrow(vform, dst, true, src, true);
2271 }
2272 
2273 
2274 LogicVRegister Simulator::sqxtn(VectorFormat vform,
2275                                 LogicVRegister dst,
2276                                 const LogicVRegister& src) {
2277   return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2278 }
2279 
2280 
2281 LogicVRegister Simulator::sqxtun(VectorFormat vform,
2282                                  LogicVRegister dst,
2283                                  const LogicVRegister& src) {
2284   return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2285 }
2286 
2287 
2288 LogicVRegister Simulator::uqxtn(VectorFormat vform,
2289                                 LogicVRegister dst,
2290                                 const LogicVRegister& src) {
2291   return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2292 }
2293 
2294 
2295 LogicVRegister Simulator::absdiff(VectorFormat vform,
2296                                   LogicVRegister dst,
2297                                   const LogicVRegister& src1,
2298                                   const LogicVRegister& src2,
2299                                   bool is_signed) {
2300   dst.ClearForWrite(vform);
2301   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2302     bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
2303                                   : (src1.Uint(vform, i) > src2.Uint(vform, i));
2304     // Always calculate the answer using unsigned arithmetic, to avoid
2305     // implementation-defined signed overflow.
2306     if (src1_gt_src2) {
2307       dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
2308     } else {
2309       dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));
2310     }
2311   }
2312   return dst;
2313 }
2314 
2315 
2316 LogicVRegister Simulator::saba(VectorFormat vform,
2317                                LogicVRegister dst,
2318                                const LogicVRegister& src1,
2319                                const LogicVRegister& src2) {
2320   SimVRegister temp;
2321   dst.ClearForWrite(vform);
2322   absdiff(vform, temp, src1, src2, true);
2323   add(vform, dst, dst, temp);
2324   return dst;
2325 }
2326 
2327 
2328 LogicVRegister Simulator::uaba(VectorFormat vform,
2329                                LogicVRegister dst,
2330                                const LogicVRegister& src1,
2331                                const LogicVRegister& src2) {
2332   SimVRegister temp;
2333   dst.ClearForWrite(vform);
2334   absdiff(vform, temp, src1, src2, false);
2335   add(vform, dst, dst, temp);
2336   return dst;
2337 }
2338 
2339 
2340 LogicVRegister Simulator::not_(VectorFormat vform,
2341                                LogicVRegister dst,
2342                                const LogicVRegister& src) {
2343   dst.ClearForWrite(vform);
2344   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2345     dst.SetUint(vform, i, ~src.Uint(vform, i));
2346   }
2347   return dst;
2348 }
2349 
2350 
2351 LogicVRegister Simulator::rbit(VectorFormat vform,
2352                                LogicVRegister dst,
2353                                const LogicVRegister& src) {
2354   uint64_t result[kZRegMaxSizeInBytes];
2355   int lane_count = LaneCountFromFormat(vform);
2356   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2357   uint64_t reversed_value;
2358   uint64_t value;
2359   for (int i = 0; i < lane_count; i++) {
2360     value = src.Uint(vform, i);
2361     reversed_value = 0;
2362     for (int j = 0; j < lane_size_in_bits; j++) {
2363       reversed_value = (reversed_value << 1) | (value & 1);
2364       value >>= 1;
2365     }
2366     result[i] = reversed_value;
2367   }
2368 
2369   dst.ClearForWrite(vform);
2370   for (int i = 0; i < lane_count; ++i) {
2371     dst.SetUint(vform, i, result[i]);
2372   }
2373   return dst;
2374 }
2375 
2376 
2377 LogicVRegister Simulator::rev(VectorFormat vform,
2378                               LogicVRegister dst,
2379                               const LogicVRegister& src) {
2380   VIXL_ASSERT(IsSVEFormat(vform));
2381   int lane_count = LaneCountFromFormat(vform);
2382   for (int i = 0; i < lane_count / 2; i++) {
2383     uint64_t t = src.Uint(vform, i);
2384     dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));
2385     dst.SetUint(vform, lane_count - i - 1, t);
2386   }
2387   return dst;
2388 }
2389 
2390 
2391 LogicVRegister Simulator::rev_byte(VectorFormat vform,
2392                                    LogicVRegister dst,
2393                                    const LogicVRegister& src,
2394                                    int rev_size) {
2395   uint64_t result[kZRegMaxSizeInBytes] = {};
2396   int lane_count = LaneCountFromFormat(vform);
2397   int lane_size = LaneSizeInBytesFromFormat(vform);
2398   int lanes_per_loop = rev_size / lane_size;
2399   for (int i = 0; i < lane_count; i += lanes_per_loop) {
2400     for (int j = 0; j < lanes_per_loop; j++) {
2401       result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);
2402     }
2403   }
2404   dst.ClearForWrite(vform);
2405   for (int i = 0; i < lane_count; ++i) {
2406     dst.SetUint(vform, i, result[i]);
2407   }
2408   return dst;
2409 }
2410 
2411 
2412 LogicVRegister Simulator::rev16(VectorFormat vform,
2413                                 LogicVRegister dst,
2414                                 const LogicVRegister& src) {
2415   return rev_byte(vform, dst, src, 2);
2416 }
2417 
2418 
2419 LogicVRegister Simulator::rev32(VectorFormat vform,
2420                                 LogicVRegister dst,
2421                                 const LogicVRegister& src) {
2422   return rev_byte(vform, dst, src, 4);
2423 }
2424 
2425 
2426 LogicVRegister Simulator::rev64(VectorFormat vform,
2427                                 LogicVRegister dst,
2428                                 const LogicVRegister& src) {
2429   return rev_byte(vform, dst, src, 8);
2430 }
2431 
2432 LogicVRegister Simulator::addlp(VectorFormat vform,
2433                                 LogicVRegister dst,
2434                                 const LogicVRegister& src,
2435                                 bool is_signed,
2436                                 bool do_accumulate) {
2437   VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2438   VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= kSRegSize);
2439 
2440   uint64_t result[kZRegMaxSizeInBytes];
2441   int lane_count = LaneCountFromFormat(vform);
2442   for (int i = 0; i < lane_count; i++) {
2443     if (is_signed) {
2444       result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2445                                         src.Int(vformsrc, 2 * i + 1));
2446     } else {
2447       result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2448     }
2449   }
2450 
2451   dst.ClearForWrite(vform);
2452   for (int i = 0; i < lane_count; ++i) {
2453     if (do_accumulate) {
2454       result[i] += dst.Uint(vform, i);
2455     }
2456     dst.SetUint(vform, i, result[i]);
2457   }
2458 
2459   return dst;
2460 }
2461 
2462 
2463 LogicVRegister Simulator::saddlp(VectorFormat vform,
2464                                  LogicVRegister dst,
2465                                  const LogicVRegister& src) {
2466   return addlp(vform, dst, src, true, false);
2467 }
2468 
2469 
2470 LogicVRegister Simulator::uaddlp(VectorFormat vform,
2471                                  LogicVRegister dst,
2472                                  const LogicVRegister& src) {
2473   return addlp(vform, dst, src, false, false);
2474 }
2475 
2476 
2477 LogicVRegister Simulator::sadalp(VectorFormat vform,
2478                                  LogicVRegister dst,
2479                                  const LogicVRegister& src) {
2480   return addlp(vform, dst, src, true, true);
2481 }
2482 
2483 
2484 LogicVRegister Simulator::uadalp(VectorFormat vform,
2485                                  LogicVRegister dst,
2486                                  const LogicVRegister& src) {
2487   return addlp(vform, dst, src, false, true);
2488 }
2489 
2490 LogicVRegister Simulator::ror(VectorFormat vform,
2491                               LogicVRegister dst,
2492                               const LogicVRegister& src,
2493                               int rotation) {
2494   int width = LaneSizeInBitsFromFormat(vform);
2495   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2496     uint64_t value = src.Uint(vform, i);
2497     dst.SetUint(vform, i, RotateRight(value, rotation, width));
2498   }
2499   return dst;
2500 }
2501 
2502 LogicVRegister Simulator::ext(VectorFormat vform,
2503                               LogicVRegister dst,
2504                               const LogicVRegister& src1,
2505                               const LogicVRegister& src2,
2506                               int index) {
2507   uint8_t result[kZRegMaxSizeInBytes] = {};
2508   int lane_count = LaneCountFromFormat(vform);
2509   for (int i = 0; i < lane_count - index; ++i) {
2510     result[i] = src1.Uint(vform, i + index);
2511   }
2512   for (int i = 0; i < index; ++i) {
2513     result[lane_count - index + i] = src2.Uint(vform, i);
2514   }
2515   dst.ClearForWrite(vform);
2516   for (int i = 0; i < lane_count; ++i) {
2517     dst.SetUint(vform, i, result[i]);
2518   }
2519   return dst;
2520 }
2521 
2522 LogicVRegister Simulator::rotate_elements_right(VectorFormat vform,
2523                                                 LogicVRegister dst,
2524                                                 const LogicVRegister& src,
2525                                                 int index) {
2526   if (index < 0) index += LaneCountFromFormat(vform);
2527   VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform)));
2528   index *= LaneSizeInBytesFromFormat(vform);
2529   return ext(kFormatVnB, dst, src, src, index);
2530 }
2531 
2532 
2533 template <typename T>
2534 LogicVRegister Simulator::fadda(VectorFormat vform,
2535                                 LogicVRegister acc,
2536                                 const LogicPRegister& pg,
2537                                 const LogicVRegister& src) {
2538   T result = acc.Float<T>(0);
2539   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2540     if (!pg.IsActive(vform, i)) continue;
2541 
2542     result = FPAdd(result, src.Float<T>(i));
2543   }
2544   VectorFormat vform_dst =
2545       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2546   acc.ClearForWrite(vform_dst);
2547   acc.SetFloat(0, result);
2548   return acc;
2549 }
2550 
2551 LogicVRegister Simulator::fadda(VectorFormat vform,
2552                                 LogicVRegister acc,
2553                                 const LogicPRegister& pg,
2554                                 const LogicVRegister& src) {
2555   switch (LaneSizeInBitsFromFormat(vform)) {
2556     case kHRegSize:
2557       fadda<SimFloat16>(vform, acc, pg, src);
2558       break;
2559     case kSRegSize:
2560       fadda<float>(vform, acc, pg, src);
2561       break;
2562     case kDRegSize:
2563       fadda<double>(vform, acc, pg, src);
2564       break;
2565     default:
2566       VIXL_UNREACHABLE();
2567   }
2568   return acc;
2569 }
2570 
2571 template <typename T>
2572 LogicVRegister Simulator::fcadd(VectorFormat vform,
2573                                 LogicVRegister dst,          // d
2574                                 const LogicVRegister& src1,  // n
2575                                 const LogicVRegister& src2,  // m
2576                                 int rot) {
2577   int elements = LaneCountFromFormat(vform);
2578 
2579   T element1, element3;
2580   rot = (rot == 1) ? 270 : 90;
2581 
2582   // Loop example:
2583   // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2584   // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2585 
2586   for (int e = 0; e <= (elements / 2) - 1; e++) {
2587     switch (rot) {
2588       case 90:
2589         element1 = FPNeg(src2.Float<T>(e * 2 + 1));
2590         element3 = src2.Float<T>(e * 2);
2591         break;
2592       case 270:
2593         element1 = src2.Float<T>(e * 2 + 1);
2594         element3 = FPNeg(src2.Float<T>(e * 2));
2595         break;
2596       default:
2597         VIXL_UNREACHABLE();
2598         return dst;  // prevents "element(n) may be unintialized" errors
2599     }
2600     dst.ClearForWrite(vform);
2601     dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));
2602     dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));
2603   }
2604   return dst;
2605 }
2606 
2607 
2608 LogicVRegister Simulator::fcadd(VectorFormat vform,
2609                                 LogicVRegister dst,          // d
2610                                 const LogicVRegister& src1,  // n
2611                                 const LogicVRegister& src2,  // m
2612                                 int rot) {
2613   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2614     fcadd<SimFloat16>(vform, dst, src1, src2, rot);
2615   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2616     fcadd<float>(vform, dst, src1, src2, rot);
2617   } else {
2618     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
2619     fcadd<double>(vform, dst, src1, src2, rot);
2620   }
2621   return dst;
2622 }
2623 
2624 template <typename T>
2625 LogicVRegister Simulator::fcmla(VectorFormat vform,
2626                                 LogicVRegister dst,
2627                                 const LogicVRegister& src1,
2628                                 const LogicVRegister& src2,
2629                                 const LogicVRegister& acc,
2630                                 int index,
2631                                 int rot) {
2632   int elements = LaneCountFromFormat(vform);
2633 
2634   T element1, element2, element3, element4;
2635   rot *= 90;
2636 
2637   // Loop example:
2638   // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2639   // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2640 
2641   for (int e = 0; e <= (elements / 2) - 1; e++) {
2642     // Index == -1 indicates a vector/vector rather than vector/indexed-element
2643     // operation.
2644     int f = (index < 0) ? e : index;
2645 
2646     switch (rot) {
2647       case 0:
2648         element1 = src2.Float<T>(f * 2);
2649         element2 = src1.Float<T>(e * 2);
2650         element3 = src2.Float<T>(f * 2 + 1);
2651         element4 = src1.Float<T>(e * 2);
2652         break;
2653       case 90:
2654         element1 = FPNeg(src2.Float<T>(f * 2 + 1));
2655         element2 = src1.Float<T>(e * 2 + 1);
2656         element3 = src2.Float<T>(f * 2);
2657         element4 = src1.Float<T>(e * 2 + 1);
2658         break;
2659       case 180:
2660         element1 = FPNeg(src2.Float<T>(f * 2));
2661         element2 = src1.Float<T>(e * 2);
2662         element3 = FPNeg(src2.Float<T>(f * 2 + 1));
2663         element4 = src1.Float<T>(e * 2);
2664         break;
2665       case 270:
2666         element1 = src2.Float<T>(f * 2 + 1);
2667         element2 = src1.Float<T>(e * 2 + 1);
2668         element3 = FPNeg(src2.Float<T>(f * 2));
2669         element4 = src1.Float<T>(e * 2 + 1);
2670         break;
2671       default:
2672         VIXL_UNREACHABLE();
2673         return dst;  // prevents "element(n) may be unintialized" errors
2674     }
2675     dst.ClearForWrite(vform);
2676     dst.SetFloat<T>(vform,
2677                     e * 2,
2678                     FPMulAdd(acc.Float<T>(e * 2), element2, element1));
2679     dst.SetFloat<T>(vform,
2680                     e * 2 + 1,
2681                     FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));
2682   }
2683   return dst;
2684 }
2685 
2686 LogicVRegister Simulator::fcmla(VectorFormat vform,
2687                                 LogicVRegister dst,
2688                                 const LogicVRegister& src1,
2689                                 const LogicVRegister& src2,
2690                                 const LogicVRegister& acc,
2691                                 int rot) {
2692   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2693     fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);
2694   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2695     fcmla<float>(vform, dst, src1, src2, acc, -1, rot);
2696   } else {
2697     fcmla<double>(vform, dst, src1, src2, acc, -1, rot);
2698   }
2699   return dst;
2700 }
2701 
2702 
2703 LogicVRegister Simulator::fcmla(VectorFormat vform,
2704                                 LogicVRegister dst,          // d
2705                                 const LogicVRegister& src1,  // n
2706                                 const LogicVRegister& src2,  // m
2707                                 int index,
2708                                 int rot) {
2709   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2710     VIXL_UNIMPLEMENTED();
2711   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2712     fcmla<float>(vform, dst, src1, src2, dst, index, rot);
2713   } else {
2714     fcmla<double>(vform, dst, src1, src2, dst, index, rot);
2715   }
2716   return dst;
2717 }
2718 
2719 LogicVRegister Simulator::cadd(VectorFormat vform,
2720                                LogicVRegister dst,
2721                                const LogicVRegister& src1,
2722                                const LogicVRegister& src2,
2723                                int rot,
2724                                bool saturate) {
2725   SimVRegister src1_r, src1_i;
2726   SimVRegister src2_r, src2_i;
2727   SimVRegister zero;
2728   zero.Clear();
2729   uzp1(vform, src1_r, src1, zero);
2730   uzp2(vform, src1_i, src1, zero);
2731   uzp1(vform, src2_r, src2, zero);
2732   uzp2(vform, src2_i, src2, zero);
2733 
2734   if (rot == 90) {
2735     if (saturate) {
2736       sub(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2737       add(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2738     } else {
2739       sub(vform, src1_r, src1_r, src2_i);
2740       add(vform, src1_i, src1_i, src2_r);
2741     }
2742   } else {
2743     VIXL_ASSERT(rot == 270);
2744     if (saturate) {
2745       add(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2746       sub(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2747     } else {
2748       add(vform, src1_r, src1_r, src2_i);
2749       sub(vform, src1_i, src1_i, src2_r);
2750     }
2751   }
2752 
2753   zip1(vform, dst, src1_r, src1_i);
2754   return dst;
2755 }
2756 
2757 LogicVRegister Simulator::cmla(VectorFormat vform,
2758                                LogicVRegister dst,
2759                                const LogicVRegister& srca,
2760                                const LogicVRegister& src1,
2761                                const LogicVRegister& src2,
2762                                int rot) {
2763   SimVRegister src1_a;
2764   SimVRegister src2_a, src2_b;
2765   SimVRegister srca_i, srca_r;
2766   SimVRegister zero, temp;
2767   zero.Clear();
2768 
2769   if ((rot == 0) || (rot == 180)) {
2770     uzp1(vform, src1_a, src1, zero);
2771     uzp1(vform, src2_a, src2, zero);
2772     uzp2(vform, src2_b, src2, zero);
2773   } else {
2774     uzp2(vform, src1_a, src1, zero);
2775     uzp2(vform, src2_a, src2, zero);
2776     uzp1(vform, src2_b, src2, zero);
2777   }
2778 
2779   uzp1(vform, srca_r, srca, zero);
2780   uzp2(vform, srca_i, srca, zero);
2781 
2782   bool sub_r = (rot == 90) || (rot == 180);
2783   bool sub_i = (rot == 180) || (rot == 270);
2784 
2785   mul(vform, temp, src1_a, src2_a);
2786   if (sub_r) {
2787     sub(vform, srca_r, srca_r, temp);
2788   } else {
2789     add(vform, srca_r, srca_r, temp);
2790   }
2791 
2792   mul(vform, temp, src1_a, src2_b);
2793   if (sub_i) {
2794     sub(vform, srca_i, srca_i, temp);
2795   } else {
2796     add(vform, srca_i, srca_i, temp);
2797   }
2798 
2799   zip1(vform, dst, srca_r, srca_i);
2800   return dst;
2801 }
2802 
2803 LogicVRegister Simulator::cmla(VectorFormat vform,
2804                                LogicVRegister dst,
2805                                const LogicVRegister& srca,
2806                                const LogicVRegister& src1,
2807                                const LogicVRegister& src2,
2808                                int index,
2809                                int rot) {
2810   SimVRegister temp;
2811   dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
2812   return cmla(vform, dst, srca, src1, temp, rot);
2813 }
2814 
2815 LogicVRegister Simulator::bgrp(VectorFormat vform,
2816                                LogicVRegister dst,
2817                                const LogicVRegister& src1,
2818                                const LogicVRegister& src2,
2819                                bool do_bext) {
2820   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2821     uint64_t value = src1.Uint(vform, i);
2822     uint64_t mask = src2.Uint(vform, i);
2823     int high_pos = 0;
2824     int low_pos = 0;
2825     uint64_t result_high = 0;
2826     uint64_t result_low = 0;
2827     for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2828       if ((mask & 1) == 0) {
2829         result_high |= (value & 1) << high_pos;
2830         high_pos++;
2831       } else {
2832         result_low |= (value & 1) << low_pos;
2833         low_pos++;
2834       }
2835       mask >>= 1;
2836       value >>= 1;
2837     }
2838 
2839     if (!do_bext) {
2840       result_low |= result_high << low_pos;
2841     }
2842 
2843     dst.SetUint(vform, i, result_low);
2844   }
2845   return dst;
2846 }
2847 
2848 LogicVRegister Simulator::bdep(VectorFormat vform,
2849                                LogicVRegister dst,
2850                                const LogicVRegister& src1,
2851                                const LogicVRegister& src2) {
2852   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2853     uint64_t value = src1.Uint(vform, i);
2854     uint64_t mask = src2.Uint(vform, i);
2855     uint64_t result = 0;
2856     for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2857       if ((mask & 1) == 1) {
2858         result |= (value & 1) << j;
2859         value >>= 1;
2860       }
2861       mask >>= 1;
2862     }
2863     dst.SetUint(vform, i, result);
2864   }
2865   return dst;
2866 }
2867 
2868 LogicVRegister Simulator::histogram(VectorFormat vform,
2869                                     LogicVRegister dst,
2870                                     const LogicPRegister& pg,
2871                                     const LogicVRegister& src1,
2872                                     const LogicVRegister& src2,
2873                                     bool do_segmented) {
2874   int elements_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
2875   uint64_t result[kZRegMaxSizeInBytes];
2876 
2877   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2878     uint64_t count = 0;
2879     uint64_t value = src1.Uint(vform, i);
2880 
2881     int segment = do_segmented ? (i / elements_per_segment) : 0;
2882     int segment_offset = segment * elements_per_segment;
2883     int hist_limit = do_segmented ? elements_per_segment : (i + 1);
2884     for (int j = 0; j < hist_limit; j++) {
2885       if (pg.IsActive(vform, j) &&
2886           (value == src2.Uint(vform, j + segment_offset))) {
2887         count++;
2888       }
2889     }
2890     result[i] = count;
2891   }
2892   dst.SetUintArray(vform, result);
2893   return dst;
2894 }
2895 
2896 LogicVRegister Simulator::dup_element(VectorFormat vform,
2897                                       LogicVRegister dst,
2898                                       const LogicVRegister& src,
2899                                       int src_index) {
2900   if ((vform == kFormatVnQ) || (vform == kFormatVnO)) {
2901     // When duplicating an element larger than 64 bits, split the element into
2902     // 64-bit parts, and duplicate the parts across the destination.
2903     uint64_t d[4];
2904     int count = (vform == kFormatVnQ) ? 2 : 4;
2905     for (int i = 0; i < count; i++) {
2906       d[i] = src.Uint(kFormatVnD, (src_index * count) + i);
2907     }
2908     dst.Clear();
2909     for (int i = 0; i < LaneCountFromFormat(vform) * count; i++) {
2910       dst.SetUint(kFormatVnD, i, d[i % count]);
2911     }
2912   } else {
2913     int lane_count = LaneCountFromFormat(vform);
2914     uint64_t value = src.Uint(vform, src_index);
2915     dst.ClearForWrite(vform);
2916     for (int i = 0; i < lane_count; ++i) {
2917       dst.SetUint(vform, i, value);
2918     }
2919   }
2920   return dst;
2921 }
2922 
2923 LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
2924                                                    LogicVRegister dst,
2925                                                    const LogicVRegister& src,
2926                                                    int src_index) {
2927   // In SVE, a segment is a 128-bit portion of a vector, like a Q register,
2928   // whereas in NEON, the size of segment is equal to the size of register
2929   // itself.
2930   int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));
2931   VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));
2932   int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);
2933 
2934   VIXL_ASSERT(src_index >= 0);
2935   VIXL_ASSERT(src_index < lanes_per_segment);
2936 
2937   dst.ClearForWrite(vform);
2938   for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {
2939     uint64_t value = src.Uint(vform, j + src_index);
2940     for (int i = 0; i < lanes_per_segment; i++) {
2941       dst.SetUint(vform, j + i, value);
2942     }
2943   }
2944   return dst;
2945 }
2946 
2947 LogicVRegister Simulator::dup_elements_to_segments(
2948     VectorFormat vform,
2949     LogicVRegister dst,
2950     const std::pair<int, int>& src_and_index) {
2951   return dup_elements_to_segments(vform,
2952                                   dst,
2953                                   ReadVRegister(src_and_index.first),
2954                                   src_and_index.second);
2955 }
2956 
2957 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2958                                         LogicVRegister dst,
2959                                         uint64_t imm) {
2960   int lane_count = LaneCountFromFormat(vform);
2961   uint64_t value = imm & MaxUintFromFormat(vform);
2962   dst.ClearForWrite(vform);
2963   for (int i = 0; i < lane_count; ++i) {
2964     dst.SetUint(vform, i, value);
2965   }
2966   return dst;
2967 }
2968 
2969 
2970 LogicVRegister Simulator::ins_element(VectorFormat vform,
2971                                       LogicVRegister dst,
2972                                       int dst_index,
2973                                       const LogicVRegister& src,
2974                                       int src_index) {
2975   dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2976   return dst;
2977 }
2978 
2979 
2980 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2981                                         LogicVRegister dst,
2982                                         int dst_index,
2983                                         uint64_t imm) {
2984   uint64_t value = imm & MaxUintFromFormat(vform);
2985   dst.SetUint(vform, dst_index, value);
2986   return dst;
2987 }
2988 
2989 
2990 LogicVRegister Simulator::index(VectorFormat vform,
2991                                 LogicVRegister dst,
2992                                 uint64_t start,
2993                                 uint64_t step) {
2994   VIXL_ASSERT(IsSVEFormat(vform));
2995   uint64_t value = start;
2996   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2997     dst.SetUint(vform, i, value);
2998     value += step;
2999   }
3000   return dst;
3001 }
3002 
3003 
3004 LogicVRegister Simulator::insr(VectorFormat vform,
3005                                LogicVRegister dst,
3006                                uint64_t imm) {
3007   VIXL_ASSERT(IsSVEFormat(vform));
3008   for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {
3009     dst.SetUint(vform, i, dst.Uint(vform, i - 1));
3010   }
3011   dst.SetUint(vform, 0, imm);
3012   return dst;
3013 }
3014 
3015 
3016 LogicVRegister Simulator::mov(VectorFormat vform,
3017                               LogicVRegister dst,
3018                               const LogicVRegister& src) {
3019   dst.ClearForWrite(vform);
3020   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
3021     dst.SetUint(vform, lane, src.Uint(vform, lane));
3022   }
3023   return dst;
3024 }
3025 
3026 
3027 LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {
3028   // Avoid a copy if the registers already alias.
3029   if (dst.Aliases(src)) return dst;
3030 
3031   for (int i = 0; i < dst.GetChunkCount(); i++) {
3032     dst.SetChunk(i, src.GetChunk(i));
3033   }
3034   return dst;
3035 }
3036 
3037 
3038 LogicVRegister Simulator::mov_merging(VectorFormat vform,
3039                                       LogicVRegister dst,
3040                                       const SimPRegister& pg,
3041                                       const LogicVRegister& src) {
3042   return sel(vform, dst, pg, src, dst);
3043 }
3044 
3045 LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
3046                                       LogicVRegister dst,
3047                                       const SimPRegister& pg,
3048                                       const LogicVRegister& src) {
3049   SimVRegister zero;
3050   dup_immediate(vform, zero, 0);
3051   return sel(vform, dst, pg, src, zero);
3052 }
3053 
3054 LogicVRegister Simulator::mov_alternating(VectorFormat vform,
3055                                           LogicVRegister dst,
3056                                           const LogicVRegister& src,
3057                                           int start_at) {
3058   VIXL_ASSERT((start_at == 0) || (start_at == 1));
3059   for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) {
3060     dst.SetUint(vform, i, src.Uint(vform, i));
3061   }
3062   return dst;
3063 }
3064 
3065 LogicPRegister Simulator::mov_merging(LogicPRegister dst,
3066                                       const LogicPRegister& pg,
3067                                       const LogicPRegister& src) {
3068   return sel(dst, pg, src, dst);
3069 }
3070 
3071 LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
3072                                       const LogicPRegister& pg,
3073                                       const LogicPRegister& src) {
3074   SimPRegister all_false;
3075   return sel(dst, pg, src, pfalse(all_false));
3076 }
3077 
3078 LogicVRegister Simulator::movi(VectorFormat vform,
3079                                LogicVRegister dst,
3080                                uint64_t imm) {
3081   int lane_count = LaneCountFromFormat(vform);
3082   dst.ClearForWrite(vform);
3083   for (int i = 0; i < lane_count; ++i) {
3084     dst.SetUint(vform, i, imm);
3085   }
3086   return dst;
3087 }
3088 
3089 
3090 LogicVRegister Simulator::mvni(VectorFormat vform,
3091                                LogicVRegister dst,
3092                                uint64_t imm) {
3093   int lane_count = LaneCountFromFormat(vform);
3094   dst.ClearForWrite(vform);
3095   for (int i = 0; i < lane_count; ++i) {
3096     dst.SetUint(vform, i, ~imm);
3097   }
3098   return dst;
3099 }
3100 
3101 
3102 LogicVRegister Simulator::orr(VectorFormat vform,
3103                               LogicVRegister dst,
3104                               const LogicVRegister& src,
3105                               uint64_t imm) {
3106   uint64_t result[16];
3107   int lane_count = LaneCountFromFormat(vform);
3108   for (int i = 0; i < lane_count; ++i) {
3109     result[i] = src.Uint(vform, i) | imm;
3110   }
3111   dst.ClearForWrite(vform);
3112   for (int i = 0; i < lane_count; ++i) {
3113     dst.SetUint(vform, i, result[i]);
3114   }
3115   return dst;
3116 }
3117 
3118 
3119 LogicVRegister Simulator::uxtl(VectorFormat vform,
3120                                LogicVRegister dst,
3121                                const LogicVRegister& src,
3122                                bool is_2) {
3123   VectorFormat vform_half = VectorFormatHalfWidth(vform);
3124   int lane_count = LaneCountFromFormat(vform);
3125   int src_offset = is_2 ? lane_count : 0;
3126 
3127   dst.ClearForWrite(vform);
3128   for (int i = 0; i < lane_count; i++) {
3129     dst.SetUint(vform, i, src.Uint(vform_half, src_offset + i));
3130   }
3131   return dst;
3132 }
3133 
3134 
3135 LogicVRegister Simulator::sxtl(VectorFormat vform,
3136                                LogicVRegister dst,
3137                                const LogicVRegister& src,
3138                                bool is_2) {
3139   VectorFormat vform_half = VectorFormatHalfWidth(vform);
3140   int lane_count = LaneCountFromFormat(vform);
3141   int src_offset = is_2 ? lane_count : 0;
3142 
3143   dst.ClearForWrite(vform);
3144   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3145     dst.SetInt(vform, i, src.Int(vform_half, src_offset + i));
3146   }
3147   return dst;
3148 }
3149 
3150 
3151 LogicVRegister Simulator::uxtl2(VectorFormat vform,
3152                                 LogicVRegister dst,
3153                                 const LogicVRegister& src) {
3154   return uxtl(vform, dst, src, /* is_2 = */ true);
3155 }
3156 
3157 
3158 LogicVRegister Simulator::sxtl2(VectorFormat vform,
3159                                 LogicVRegister dst,
3160                                 const LogicVRegister& src) {
3161   return sxtl(vform, dst, src, /* is_2 = */ true);
3162 }
3163 
3164 
3165 LogicVRegister Simulator::uxt(VectorFormat vform,
3166                               LogicVRegister dst,
3167                               const LogicVRegister& src,
3168                               unsigned from_size_in_bits) {
3169   int lane_count = LaneCountFromFormat(vform);
3170   uint64_t mask = GetUintMask(from_size_in_bits);
3171 
3172   dst.ClearForWrite(vform);
3173   for (int i = 0; i < lane_count; i++) {
3174     dst.SetInt(vform, i, src.Uint(vform, i) & mask);
3175   }
3176   return dst;
3177 }
3178 
3179 
3180 LogicVRegister Simulator::sxt(VectorFormat vform,
3181                               LogicVRegister dst,
3182                               const LogicVRegister& src,
3183                               unsigned from_size_in_bits) {
3184   int lane_count = LaneCountFromFormat(vform);
3185 
3186   dst.ClearForWrite(vform);
3187   for (int i = 0; i < lane_count; i++) {
3188     uint64_t value =
3189         ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));
3190     dst.SetInt(vform, i, value);
3191   }
3192   return dst;
3193 }
3194 
3195 
3196 LogicVRegister Simulator::shrn(VectorFormat vform,
3197                                LogicVRegister dst,
3198                                const LogicVRegister& src,
3199                                int shift) {
3200   SimVRegister temp;
3201   VectorFormat vform_src = VectorFormatDoubleWidth(vform);
3202   VectorFormat vform_dst = vform;
3203   LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
3204   return extractnarrow(vform_dst, dst, false, shifted_src, false);
3205 }
3206 
3207 
3208 LogicVRegister Simulator::shrn2(VectorFormat vform,
3209                                 LogicVRegister dst,
3210                                 const LogicVRegister& src,
3211                                 int shift) {
3212   SimVRegister temp;
3213   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3214   VectorFormat vformdst = vform;
3215   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
3216   return extractnarrow(vformdst, dst, false, shifted_src, false);
3217 }
3218 
3219 
3220 LogicVRegister Simulator::rshrn(VectorFormat vform,
3221                                 LogicVRegister dst,
3222                                 const LogicVRegister& src,
3223                                 int shift) {
3224   SimVRegister temp;
3225   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3226   VectorFormat vformdst = vform;
3227   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3228   return extractnarrow(vformdst, dst, false, shifted_src, false);
3229 }
3230 
3231 
3232 LogicVRegister Simulator::rshrn2(VectorFormat vform,
3233                                  LogicVRegister dst,
3234                                  const LogicVRegister& src,
3235                                  int shift) {
3236   SimVRegister temp;
3237   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3238   VectorFormat vformdst = vform;
3239   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3240   return extractnarrow(vformdst, dst, false, shifted_src, false);
3241 }
3242 
3243 LogicVRegister Simulator::Table(VectorFormat vform,
3244                                 LogicVRegister dst,
3245                                 const LogicVRegister& ind,
3246                                 bool zero_out_of_bounds,
3247                                 const LogicVRegister* tab1,
3248                                 const LogicVRegister* tab2,
3249                                 const LogicVRegister* tab3,
3250                                 const LogicVRegister* tab4) {
3251   VIXL_ASSERT(tab1 != NULL);
3252   int lane_count = LaneCountFromFormat(vform);
3253   VIXL_ASSERT((tab3 == NULL) || (lane_count <= 16));
3254   uint64_t table[kZRegMaxSizeInBytes * 2];
3255   uint64_t result[kZRegMaxSizeInBytes];
3256 
3257   // For Neon, the table source registers are always 16B, and Neon allows only
3258   // 8B or 16B vform for the destination, so infer the table format from the
3259   // destination.
3260   VectorFormat vform_tab = (vform == kFormat8B) ? kFormat16B : vform;
3261 
3262   uint64_t tab_size = tab1->UintArray(vform_tab, &table[0]);
3263   if (tab2 != NULL) tab_size += tab2->UintArray(vform_tab, &table[tab_size]);
3264   if (tab3 != NULL) tab_size += tab3->UintArray(vform_tab, &table[tab_size]);
3265   if (tab4 != NULL) tab_size += tab4->UintArray(vform_tab, &table[tab_size]);
3266 
3267   for (int i = 0; i < lane_count; i++) {
3268     uint64_t index = ind.Uint(vform, i);
3269     result[i] = zero_out_of_bounds ? 0 : dst.Uint(vform, i);
3270     if (index < tab_size) result[i] = table[index];
3271   }
3272   dst.SetUintArray(vform, result);
3273   return dst;
3274 }
3275 
3276 LogicVRegister Simulator::tbl(VectorFormat vform,
3277                               LogicVRegister dst,
3278                               const LogicVRegister& tab,
3279                               const LogicVRegister& ind) {
3280   return Table(vform, dst, ind, true, &tab);
3281 }
3282 
3283 
3284 LogicVRegister Simulator::tbl(VectorFormat vform,
3285                               LogicVRegister dst,
3286                               const LogicVRegister& tab,
3287                               const LogicVRegister& tab2,
3288                               const LogicVRegister& ind) {
3289   return Table(vform, dst, ind, true, &tab, &tab2);
3290 }
3291 
3292 
3293 LogicVRegister Simulator::tbl(VectorFormat vform,
3294                               LogicVRegister dst,
3295                               const LogicVRegister& tab,
3296                               const LogicVRegister& tab2,
3297                               const LogicVRegister& tab3,
3298                               const LogicVRegister& ind) {
3299   return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
3300 }
3301 
3302 
3303 LogicVRegister Simulator::tbl(VectorFormat vform,
3304                               LogicVRegister dst,
3305                               const LogicVRegister& tab,
3306                               const LogicVRegister& tab2,
3307                               const LogicVRegister& tab3,
3308                               const LogicVRegister& tab4,
3309                               const LogicVRegister& ind) {
3310   return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
3311 }
3312 
3313 
3314 LogicVRegister Simulator::tbx(VectorFormat vform,
3315                               LogicVRegister dst,
3316                               const LogicVRegister& tab,
3317                               const LogicVRegister& ind) {
3318   return Table(vform, dst, ind, false, &tab);
3319 }
3320 
3321 
3322 LogicVRegister Simulator::tbx(VectorFormat vform,
3323                               LogicVRegister dst,
3324                               const LogicVRegister& tab,
3325                               const LogicVRegister& tab2,
3326                               const LogicVRegister& ind) {
3327   return Table(vform, dst, ind, false, &tab, &tab2);
3328 }
3329 
3330 
3331 LogicVRegister Simulator::tbx(VectorFormat vform,
3332                               LogicVRegister dst,
3333                               const LogicVRegister& tab,
3334                               const LogicVRegister& tab2,
3335                               const LogicVRegister& tab3,
3336                               const LogicVRegister& ind) {
3337   return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
3338 }
3339 
3340 
3341 LogicVRegister Simulator::tbx(VectorFormat vform,
3342                               LogicVRegister dst,
3343                               const LogicVRegister& tab,
3344                               const LogicVRegister& tab2,
3345                               const LogicVRegister& tab3,
3346                               const LogicVRegister& tab4,
3347                               const LogicVRegister& ind) {
3348   return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
3349 }
3350 
3351 
3352 LogicVRegister Simulator::uqshrn(VectorFormat vform,
3353                                  LogicVRegister dst,
3354                                  const LogicVRegister& src,
3355                                  int shift) {
3356   return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
3357 }
3358 
3359 
3360 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
3361                                   LogicVRegister dst,
3362                                   const LogicVRegister& src,
3363                                   int shift) {
3364   return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3365 }
3366 
3367 
3368 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
3369                                   LogicVRegister dst,
3370                                   const LogicVRegister& src,
3371                                   int shift) {
3372   return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
3373 }
3374 
3375 
3376 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
3377                                    LogicVRegister dst,
3378                                    const LogicVRegister& src,
3379                                    int shift) {
3380   return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3381 }
3382 
3383 
3384 LogicVRegister Simulator::sqshrn(VectorFormat vform,
3385                                  LogicVRegister dst,
3386                                  const LogicVRegister& src,
3387                                  int shift) {
3388   SimVRegister temp;
3389   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3390   VectorFormat vformdst = vform;
3391   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3392   return sqxtn(vformdst, dst, shifted_src);
3393 }
3394 
3395 
3396 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
3397                                   LogicVRegister dst,
3398                                   const LogicVRegister& src,
3399                                   int shift) {
3400   SimVRegister temp;
3401   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3402   VectorFormat vformdst = vform;
3403   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3404   return sqxtn(vformdst, dst, shifted_src);
3405 }
3406 
3407 
3408 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
3409                                   LogicVRegister dst,
3410                                   const LogicVRegister& src,
3411                                   int shift) {
3412   SimVRegister temp;
3413   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3414   VectorFormat vformdst = vform;
3415   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3416   return sqxtn(vformdst, dst, shifted_src);
3417 }
3418 
3419 
3420 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
3421                                    LogicVRegister dst,
3422                                    const LogicVRegister& src,
3423                                    int shift) {
3424   SimVRegister temp;
3425   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3426   VectorFormat vformdst = vform;
3427   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3428   return sqxtn(vformdst, dst, shifted_src);
3429 }
3430 
3431 
3432 LogicVRegister Simulator::sqshrun(VectorFormat vform,
3433                                   LogicVRegister dst,
3434                                   const LogicVRegister& src,
3435                                   int shift) {
3436   SimVRegister temp;
3437   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3438   VectorFormat vformdst = vform;
3439   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3440   return sqxtun(vformdst, dst, shifted_src);
3441 }
3442 
3443 
3444 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
3445                                    LogicVRegister dst,
3446                                    const LogicVRegister& src,
3447                                    int shift) {
3448   SimVRegister temp;
3449   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3450   VectorFormat vformdst = vform;
3451   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3452   return sqxtun(vformdst, dst, shifted_src);
3453 }
3454 
3455 
3456 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
3457                                    LogicVRegister dst,
3458                                    const LogicVRegister& src,
3459                                    int shift) {
3460   SimVRegister temp;
3461   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3462   VectorFormat vformdst = vform;
3463   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3464   return sqxtun(vformdst, dst, shifted_src);
3465 }
3466 
3467 
3468 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
3469                                     LogicVRegister dst,
3470                                     const LogicVRegister& src,
3471                                     int shift) {
3472   SimVRegister temp;
3473   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3474   VectorFormat vformdst = vform;
3475   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3476   return sqxtun(vformdst, dst, shifted_src);
3477 }
3478 
3479 
3480 LogicVRegister Simulator::uaddl(VectorFormat vform,
3481                                 LogicVRegister dst,
3482                                 const LogicVRegister& src1,
3483                                 const LogicVRegister& src2) {
3484   SimVRegister temp1, temp2;
3485   uxtl(vform, temp1, src1);
3486   uxtl(vform, temp2, src2);
3487   add(vform, dst, temp1, temp2);
3488   return dst;
3489 }
3490 
3491 
3492 LogicVRegister Simulator::uaddl2(VectorFormat vform,
3493                                  LogicVRegister dst,
3494                                  const LogicVRegister& src1,
3495                                  const LogicVRegister& src2) {
3496   SimVRegister temp1, temp2;
3497   uxtl2(vform, temp1, src1);
3498   uxtl2(vform, temp2, src2);
3499   add(vform, dst, temp1, temp2);
3500   return dst;
3501 }
3502 
3503 
3504 LogicVRegister Simulator::uaddw(VectorFormat vform,
3505                                 LogicVRegister dst,
3506                                 const LogicVRegister& src1,
3507                                 const LogicVRegister& src2) {
3508   SimVRegister temp;
3509   uxtl(vform, temp, src2);
3510   add(vform, dst, src1, temp);
3511   return dst;
3512 }
3513 
3514 
3515 LogicVRegister Simulator::uaddw2(VectorFormat vform,
3516                                  LogicVRegister dst,
3517                                  const LogicVRegister& src1,
3518                                  const LogicVRegister& src2) {
3519   SimVRegister temp;
3520   uxtl2(vform, temp, src2);
3521   add(vform, dst, src1, temp);
3522   return dst;
3523 }
3524 
3525 
3526 LogicVRegister Simulator::saddl(VectorFormat vform,
3527                                 LogicVRegister dst,
3528                                 const LogicVRegister& src1,
3529                                 const LogicVRegister& src2) {
3530   SimVRegister temp1, temp2;
3531   sxtl(vform, temp1, src1);
3532   sxtl(vform, temp2, src2);
3533   add(vform, dst, temp1, temp2);
3534   return dst;
3535 }
3536 
3537 
3538 LogicVRegister Simulator::saddl2(VectorFormat vform,
3539                                  LogicVRegister dst,
3540                                  const LogicVRegister& src1,
3541                                  const LogicVRegister& src2) {
3542   SimVRegister temp1, temp2;
3543   sxtl2(vform, temp1, src1);
3544   sxtl2(vform, temp2, src2);
3545   add(vform, dst, temp1, temp2);
3546   return dst;
3547 }
3548 
3549 
3550 LogicVRegister Simulator::saddw(VectorFormat vform,
3551                                 LogicVRegister dst,
3552                                 const LogicVRegister& src1,
3553                                 const LogicVRegister& src2) {
3554   SimVRegister temp;
3555   sxtl(vform, temp, src2);
3556   add(vform, dst, src1, temp);
3557   return dst;
3558 }
3559 
3560 
3561 LogicVRegister Simulator::saddw2(VectorFormat vform,
3562                                  LogicVRegister dst,
3563                                  const LogicVRegister& src1,
3564                                  const LogicVRegister& src2) {
3565   SimVRegister temp;
3566   sxtl2(vform, temp, src2);
3567   add(vform, dst, src1, temp);
3568   return dst;
3569 }
3570 
3571 
3572 LogicVRegister Simulator::usubl(VectorFormat vform,
3573                                 LogicVRegister dst,
3574                                 const LogicVRegister& src1,
3575                                 const LogicVRegister& src2) {
3576   SimVRegister temp1, temp2;
3577   uxtl(vform, temp1, src1);
3578   uxtl(vform, temp2, src2);
3579   sub(vform, dst, temp1, temp2);
3580   return dst;
3581 }
3582 
3583 
3584 LogicVRegister Simulator::usubl2(VectorFormat vform,
3585                                  LogicVRegister dst,
3586                                  const LogicVRegister& src1,
3587                                  const LogicVRegister& src2) {
3588   SimVRegister temp1, temp2;
3589   uxtl2(vform, temp1, src1);
3590   uxtl2(vform, temp2, src2);
3591   sub(vform, dst, temp1, temp2);
3592   return dst;
3593 }
3594 
3595 
3596 LogicVRegister Simulator::usubw(VectorFormat vform,
3597                                 LogicVRegister dst,
3598                                 const LogicVRegister& src1,
3599                                 const LogicVRegister& src2) {
3600   SimVRegister temp;
3601   uxtl(vform, temp, src2);
3602   sub(vform, dst, src1, temp);
3603   return dst;
3604 }
3605 
3606 
3607 LogicVRegister Simulator::usubw2(VectorFormat vform,
3608                                  LogicVRegister dst,
3609                                  const LogicVRegister& src1,
3610                                  const LogicVRegister& src2) {
3611   SimVRegister temp;
3612   uxtl2(vform, temp, src2);
3613   sub(vform, dst, src1, temp);
3614   return dst;
3615 }
3616 
3617 
3618 LogicVRegister Simulator::ssubl(VectorFormat vform,
3619                                 LogicVRegister dst,
3620                                 const LogicVRegister& src1,
3621                                 const LogicVRegister& src2) {
3622   SimVRegister temp1, temp2;
3623   sxtl(vform, temp1, src1);
3624   sxtl(vform, temp2, src2);
3625   sub(vform, dst, temp1, temp2);
3626   return dst;
3627 }
3628 
3629 
3630 LogicVRegister Simulator::ssubl2(VectorFormat vform,
3631                                  LogicVRegister dst,
3632                                  const LogicVRegister& src1,
3633                                  const LogicVRegister& src2) {
3634   SimVRegister temp1, temp2;
3635   sxtl2(vform, temp1, src1);
3636   sxtl2(vform, temp2, src2);
3637   sub(vform, dst, temp1, temp2);
3638   return dst;
3639 }
3640 
3641 
3642 LogicVRegister Simulator::ssubw(VectorFormat vform,
3643                                 LogicVRegister dst,
3644                                 const LogicVRegister& src1,
3645                                 const LogicVRegister& src2) {
3646   SimVRegister temp;
3647   sxtl(vform, temp, src2);
3648   sub(vform, dst, src1, temp);
3649   return dst;
3650 }
3651 
3652 
3653 LogicVRegister Simulator::ssubw2(VectorFormat vform,
3654                                  LogicVRegister dst,
3655                                  const LogicVRegister& src1,
3656                                  const LogicVRegister& src2) {
3657   SimVRegister temp;
3658   sxtl2(vform, temp, src2);
3659   sub(vform, dst, src1, temp);
3660   return dst;
3661 }
3662 
3663 
3664 LogicVRegister Simulator::uabal(VectorFormat vform,
3665                                 LogicVRegister dst,
3666                                 const LogicVRegister& src1,
3667                                 const LogicVRegister& src2) {
3668   SimVRegister temp1, temp2;
3669   uxtl(vform, temp1, src1);
3670   uxtl(vform, temp2, src2);
3671   uaba(vform, dst, temp1, temp2);
3672   return dst;
3673 }
3674 
3675 
3676 LogicVRegister Simulator::uabal2(VectorFormat vform,
3677                                  LogicVRegister dst,
3678                                  const LogicVRegister& src1,
3679                                  const LogicVRegister& src2) {
3680   SimVRegister temp1, temp2;
3681   uxtl2(vform, temp1, src1);
3682   uxtl2(vform, temp2, src2);
3683   uaba(vform, dst, temp1, temp2);
3684   return dst;
3685 }
3686 
3687 
3688 LogicVRegister Simulator::sabal(VectorFormat vform,
3689                                 LogicVRegister dst,
3690                                 const LogicVRegister& src1,
3691                                 const LogicVRegister& src2) {
3692   SimVRegister temp1, temp2;
3693   sxtl(vform, temp1, src1);
3694   sxtl(vform, temp2, src2);
3695   saba(vform, dst, temp1, temp2);
3696   return dst;
3697 }
3698 
3699 
3700 LogicVRegister Simulator::sabal2(VectorFormat vform,
3701                                  LogicVRegister dst,
3702                                  const LogicVRegister& src1,
3703                                  const LogicVRegister& src2) {
3704   SimVRegister temp1, temp2;
3705   sxtl2(vform, temp1, src1);
3706   sxtl2(vform, temp2, src2);
3707   saba(vform, dst, temp1, temp2);
3708   return dst;
3709 }
3710 
3711 
3712 LogicVRegister Simulator::uabdl(VectorFormat vform,
3713                                 LogicVRegister dst,
3714                                 const LogicVRegister& src1,
3715                                 const LogicVRegister& src2) {
3716   SimVRegister temp1, temp2;
3717   uxtl(vform, temp1, src1);
3718   uxtl(vform, temp2, src2);
3719   absdiff(vform, dst, temp1, temp2, false);
3720   return dst;
3721 }
3722 
3723 
3724 LogicVRegister Simulator::uabdl2(VectorFormat vform,
3725                                  LogicVRegister dst,
3726                                  const LogicVRegister& src1,
3727                                  const LogicVRegister& src2) {
3728   SimVRegister temp1, temp2;
3729   uxtl2(vform, temp1, src1);
3730   uxtl2(vform, temp2, src2);
3731   absdiff(vform, dst, temp1, temp2, false);
3732   return dst;
3733 }
3734 
3735 
3736 LogicVRegister Simulator::sabdl(VectorFormat vform,
3737                                 LogicVRegister dst,
3738                                 const LogicVRegister& src1,
3739                                 const LogicVRegister& src2) {
3740   SimVRegister temp1, temp2;
3741   sxtl(vform, temp1, src1);
3742   sxtl(vform, temp2, src2);
3743   absdiff(vform, dst, temp1, temp2, true);
3744   return dst;
3745 }
3746 
3747 
3748 LogicVRegister Simulator::sabdl2(VectorFormat vform,
3749                                  LogicVRegister dst,
3750                                  const LogicVRegister& src1,
3751                                  const LogicVRegister& src2) {
3752   SimVRegister temp1, temp2;
3753   sxtl2(vform, temp1, src1);
3754   sxtl2(vform, temp2, src2);
3755   absdiff(vform, dst, temp1, temp2, true);
3756   return dst;
3757 }
3758 
3759 
3760 LogicVRegister Simulator::umull(VectorFormat vform,
3761                                 LogicVRegister dst,
3762                                 const LogicVRegister& src1,
3763                                 const LogicVRegister& src2,
3764                                 bool is_2) {
3765   SimVRegister temp1, temp2;
3766   uxtl(vform, temp1, src1, is_2);
3767   uxtl(vform, temp2, src2, is_2);
3768   mul(vform, dst, temp1, temp2);
3769   return dst;
3770 }
3771 
3772 
3773 LogicVRegister Simulator::umull2(VectorFormat vform,
3774                                  LogicVRegister dst,
3775                                  const LogicVRegister& src1,
3776                                  const LogicVRegister& src2) {
3777   return umull(vform, dst, src1, src2, /* is_2 = */ true);
3778 }
3779 
3780 
3781 LogicVRegister Simulator::smull(VectorFormat vform,
3782                                 LogicVRegister dst,
3783                                 const LogicVRegister& src1,
3784                                 const LogicVRegister& src2,
3785                                 bool is_2) {
3786   SimVRegister temp1, temp2;
3787   sxtl(vform, temp1, src1, is_2);
3788   sxtl(vform, temp2, src2, is_2);
3789   mul(vform, dst, temp1, temp2);
3790   return dst;
3791 }
3792 
3793 
3794 LogicVRegister Simulator::smull2(VectorFormat vform,
3795                                  LogicVRegister dst,
3796                                  const LogicVRegister& src1,
3797                                  const LogicVRegister& src2) {
3798   return smull(vform, dst, src1, src2, /* is_2 = */ true);
3799 }
3800 
3801 
3802 LogicVRegister Simulator::umlsl(VectorFormat vform,
3803                                 LogicVRegister dst,
3804                                 const LogicVRegister& src1,
3805                                 const LogicVRegister& src2,
3806                                 bool is_2) {
3807   SimVRegister temp1, temp2;
3808   uxtl(vform, temp1, src1, is_2);
3809   uxtl(vform, temp2, src2, is_2);
3810   mls(vform, dst, dst, temp1, temp2);
3811   return dst;
3812 }
3813 
3814 
3815 LogicVRegister Simulator::umlsl2(VectorFormat vform,
3816                                  LogicVRegister dst,
3817                                  const LogicVRegister& src1,
3818                                  const LogicVRegister& src2) {
3819   return umlsl(vform, dst, src1, src2, /* is_2 = */ true);
3820 }
3821 
3822 
3823 LogicVRegister Simulator::smlsl(VectorFormat vform,
3824                                 LogicVRegister dst,
3825                                 const LogicVRegister& src1,
3826                                 const LogicVRegister& src2,
3827                                 bool is_2) {
3828   SimVRegister temp1, temp2;
3829   sxtl(vform, temp1, src1, is_2);
3830   sxtl(vform, temp2, src2, is_2);
3831   mls(vform, dst, dst, temp1, temp2);
3832   return dst;
3833 }
3834 
3835 
3836 LogicVRegister Simulator::smlsl2(VectorFormat vform,
3837                                  LogicVRegister dst,
3838                                  const LogicVRegister& src1,
3839                                  const LogicVRegister& src2) {
3840   return smlsl(vform, dst, src1, src2, /* is_2 = */ true);
3841 }
3842 
3843 
3844 LogicVRegister Simulator::umlal(VectorFormat vform,
3845                                 LogicVRegister dst,
3846                                 const LogicVRegister& src1,
3847                                 const LogicVRegister& src2,
3848                                 bool is_2) {
3849   SimVRegister temp1, temp2;
3850   uxtl(vform, temp1, src1, is_2);
3851   uxtl(vform, temp2, src2, is_2);
3852   mla(vform, dst, dst, temp1, temp2);
3853   return dst;
3854 }
3855 
3856 
3857 LogicVRegister Simulator::umlal2(VectorFormat vform,
3858                                  LogicVRegister dst,
3859                                  const LogicVRegister& src1,
3860                                  const LogicVRegister& src2) {
3861   return umlal(vform, dst, src1, src2, /* is_2 = */ true);
3862 }
3863 
3864 
3865 LogicVRegister Simulator::smlal(VectorFormat vform,
3866                                 LogicVRegister dst,
3867                                 const LogicVRegister& src1,
3868                                 const LogicVRegister& src2,
3869                                 bool is_2) {
3870   SimVRegister temp1, temp2;
3871   sxtl(vform, temp1, src1, is_2);
3872   sxtl(vform, temp2, src2, is_2);
3873   mla(vform, dst, dst, temp1, temp2);
3874   return dst;
3875 }
3876 
3877 
3878 LogicVRegister Simulator::smlal2(VectorFormat vform,
3879                                  LogicVRegister dst,
3880                                  const LogicVRegister& src1,
3881                                  const LogicVRegister& src2) {
3882   return smlal(vform, dst, src1, src2, /* is_2 = */ true);
3883 }
3884 
3885 
3886 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3887                                   LogicVRegister dst,
3888                                   const LogicVRegister& src1,
3889                                   const LogicVRegister& src2,
3890                                   bool is_2) {
3891   SimVRegister temp;
3892   LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3893   return add(vform, dst, dst, product).SignedSaturate(vform);
3894 }
3895 
3896 
3897 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3898                                    LogicVRegister dst,
3899                                    const LogicVRegister& src1,
3900                                    const LogicVRegister& src2) {
3901   return sqdmlal(vform, dst, src1, src2, /* is_2 = */ true);
3902 }
3903 
3904 
3905 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3906                                   LogicVRegister dst,
3907                                   const LogicVRegister& src1,
3908                                   const LogicVRegister& src2,
3909                                   bool is_2) {
3910   SimVRegister temp;
3911   LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3912   return sub(vform, dst, dst, product).SignedSaturate(vform);
3913 }
3914 
3915 
3916 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3917                                    LogicVRegister dst,
3918                                    const LogicVRegister& src1,
3919                                    const LogicVRegister& src2) {
3920   return sqdmlsl(vform, dst, src1, src2, /* is_2 = */ true);
3921 }
3922 
3923 
3924 LogicVRegister Simulator::sqdmull(VectorFormat vform,
3925                                   LogicVRegister dst,
3926                                   const LogicVRegister& src1,
3927                                   const LogicVRegister& src2,
3928                                   bool is_2) {
3929   SimVRegister temp;
3930   LogicVRegister product = smull(vform, temp, src1, src2, is_2);
3931   return add(vform, dst, product, product).SignedSaturate(vform);
3932 }
3933 
3934 
3935 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3936                                    LogicVRegister dst,
3937                                    const LogicVRegister& src1,
3938                                    const LogicVRegister& src2) {
3939   return sqdmull(vform, dst, src1, src2, /* is_2 = */ true);
3940 }
3941 
3942 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3943                                    LogicVRegister dst,
3944                                    const LogicVRegister& src1,
3945                                    const LogicVRegister& src2,
3946                                    bool round) {
3947   int esize = LaneSizeInBitsFromFormat(vform);
3948 
3949   SimVRegister temp_lo, temp_hi;
3950 
3951   // Compute low and high multiplication results.
3952   mul(vform, temp_lo, src1, src2);
3953   smulh(vform, temp_hi, src1, src2);
3954 
3955   // Double by shifting high half, and adding in most-significant bit of low
3956   // half.
3957   shl(vform, temp_hi, temp_hi, 1);
3958   usra(vform, temp_hi, temp_lo, esize - 1);
3959 
3960   if (round) {
3961     // Add the second (due to doubling) most-significant bit of the low half
3962     // into the result.
3963     shl(vform, temp_lo, temp_lo, 1);
3964     usra(vform, temp_hi, temp_lo, esize - 1);
3965   }
3966 
3967   SimPRegister not_sat;
3968   LogicPRegister ptemp(not_sat);
3969   dst.ClearForWrite(vform);
3970   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3971     // Saturation only occurs when src1 = src2 = minimum representable value.
3972     // Check this as a special case.
3973     ptemp.SetActive(vform, i, true);
3974     if ((src1.Int(vform, i) == MinIntFromFormat(vform)) &&
3975         (src2.Int(vform, i) == MinIntFromFormat(vform))) {
3976       ptemp.SetActive(vform, i, false);
3977     }
3978     dst.SetInt(vform, i, MaxIntFromFormat(vform));
3979   }
3980 
3981   mov_merging(vform, dst, not_sat, temp_hi);
3982   return dst;
3983 }
3984 
3985 
3986 LogicVRegister Simulator::dot(VectorFormat vform,
3987                               LogicVRegister dst,
3988                               const LogicVRegister& src1,
3989                               const LogicVRegister& src2,
3990                               bool is_src1_signed,
3991                               bool is_src2_signed) {
3992   VectorFormat quarter_vform =
3993       VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
3994 
3995   dst.ClearForWrite(vform);
3996   for (int e = 0; e < LaneCountFromFormat(vform); e++) {
3997     uint64_t result = 0;
3998     int64_t element1, element2;
3999     for (int i = 0; i < 4; i++) {
4000       int index = 4 * e + i;
4001       if (is_src1_signed) {
4002         element1 = src1.Int(quarter_vform, index);
4003       } else {
4004         element1 = src1.Uint(quarter_vform, index);
4005       }
4006       if (is_src2_signed) {
4007         element2 = src2.Int(quarter_vform, index);
4008       } else {
4009         element2 = src2.Uint(quarter_vform, index);
4010       }
4011       result += element1 * element2;
4012     }
4013     dst.SetUint(vform, e, result + dst.Uint(vform, e));
4014   }
4015   return dst;
4016 }
4017 
4018 
4019 LogicVRegister Simulator::sdot(VectorFormat vform,
4020                                LogicVRegister dst,
4021                                const LogicVRegister& src1,
4022                                const LogicVRegister& src2) {
4023   return dot(vform, dst, src1, src2, true, true);
4024 }
4025 
4026 
4027 LogicVRegister Simulator::udot(VectorFormat vform,
4028                                LogicVRegister dst,
4029                                const LogicVRegister& src1,
4030                                const LogicVRegister& src2) {
4031   return dot(vform, dst, src1, src2, false, false);
4032 }
4033 
4034 LogicVRegister Simulator::usdot(VectorFormat vform,
4035                                 LogicVRegister dst,
4036                                 const LogicVRegister& src1,
4037                                 const LogicVRegister& src2) {
4038   return dot(vform, dst, src1, src2, false, true);
4039 }
4040 
4041 LogicVRegister Simulator::cdot(VectorFormat vform,
4042                                LogicVRegister dst,
4043                                const LogicVRegister& acc,
4044                                const LogicVRegister& src1,
4045                                const LogicVRegister& src2,
4046                                int rot) {
4047   VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
4048   VectorFormat quarter_vform =
4049       VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
4050 
4051   int sel_a = ((rot == 0) || (rot == 180)) ? 0 : 1;
4052   int sel_b = 1 - sel_a;
4053   int sub_i = ((rot == 90) || (rot == 180)) ? 1 : -1;
4054 
4055   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4056     int64_t result = acc.Int(vform, i);
4057     for (int j = 0; j < 2; j++) {
4058       int64_t r1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 0);
4059       int64_t i1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 1);
4060       int64_t r2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_a);
4061       int64_t i2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_b);
4062       result += (r1 * r2) + (sub_i * i1 * i2);
4063     }
4064     dst.SetInt(vform, i, result);
4065   }
4066   return dst;
4067 }
4068 
4069 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4070                                     LogicVRegister dst,
4071                                     const LogicVRegister& srca,
4072                                     const LogicVRegister& src1,
4073                                     const LogicVRegister& src2,
4074                                     int rot) {
4075   SimVRegister src1_a, src1_b;
4076   SimVRegister src2_a, src2_b;
4077   SimVRegister srca_i, srca_r;
4078   SimVRegister zero, temp;
4079   zero.Clear();
4080 
4081   if ((rot == 0) || (rot == 180)) {
4082     uzp1(vform, src1_a, src1, zero);
4083     uzp1(vform, src2_a, src2, zero);
4084     uzp2(vform, src2_b, src2, zero);
4085   } else {
4086     uzp2(vform, src1_a, src1, zero);
4087     uzp2(vform, src2_a, src2, zero);
4088     uzp1(vform, src2_b, src2, zero);
4089   }
4090 
4091   uzp1(vform, srca_r, srca, zero);
4092   uzp2(vform, srca_i, srca, zero);
4093 
4094   bool sub_r = (rot == 90) || (rot == 180);
4095   bool sub_i = (rot == 180) || (rot == 270);
4096 
4097   const bool round = true;
4098   sqrdmlash(vform, srca_r, src1_a, src2_a, round, sub_r);
4099   sqrdmlash(vform, srca_i, src1_a, src2_b, round, sub_i);
4100   zip1(vform, dst, srca_r, srca_i);
4101   return dst;
4102 }
4103 
4104 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4105                                     LogicVRegister dst,
4106                                     const LogicVRegister& srca,
4107                                     const LogicVRegister& src1,
4108                                     const LogicVRegister& src2,
4109                                     int index,
4110                                     int rot) {
4111   SimVRegister temp;
4112   dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
4113   return sqrdcmlah(vform, dst, srca, src1, temp, rot);
4114 }
4115 
4116 LogicVRegister Simulator::sqrdmlash_d(VectorFormat vform,
4117                                       LogicVRegister dst,
4118                                       const LogicVRegister& src1,
4119                                       const LogicVRegister& src2,
4120                                       bool round,
4121                                       bool sub_op) {
4122   // 2 * INT_64_MIN * INT_64_MIN causes INT_128 to overflow.
4123   // To avoid this, we use:
4124   //     (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4125   // which is same as:
4126   //     (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4127 
4128   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4129   int esize = kDRegSize;
4130   vixl_uint128_t round_const, accum;
4131   round_const.first = 0;
4132   if (round) {
4133     round_const.second = UINT64_C(1) << (esize - 2);
4134   } else {
4135     round_const.second = 0;
4136   }
4137 
4138   dst.ClearForWrite(vform);
4139   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4140     // Shift the whole value left by `esize - 1` bits.
4141     accum.first = dst.Int(vform, i) >> 1;
4142     accum.second = dst.Int(vform, i) << (esize - 1);
4143 
4144     vixl_uint128_t product = Mul64(src1.Int(vform, i), src2.Int(vform, i));
4145 
4146     if (sub_op) {
4147       product = Neg128(product);
4148     }
4149     accum = Add128(accum, product);
4150 
4151     // Perform rounding.
4152     accum = Add128(accum, round_const);
4153 
4154     // Arithmetic shift the whole value right by `esize - 1` bits.
4155     accum.second = (accum.first << 1) | (accum.second >> (esize - 1));
4156     accum.first = -(accum.first >> (esize - 1));
4157 
4158     // Perform saturation.
4159     bool is_pos = (accum.first == 0) ? true : false;
4160     if (is_pos &&
4161         (accum.second > static_cast<uint64_t>(MaxIntFromFormat(vform)))) {
4162       accum.second = MaxIntFromFormat(vform);
4163     } else if (!is_pos && (accum.second <
4164                            static_cast<uint64_t>(MinIntFromFormat(vform)))) {
4165       accum.second = MinIntFromFormat(vform);
4166     }
4167 
4168     dst.SetInt(vform, i, accum.second);
4169   }
4170 
4171   return dst;
4172 }
4173 
4174 LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
4175                                     LogicVRegister dst,
4176                                     const LogicVRegister& src1,
4177                                     const LogicVRegister& src2,
4178                                     bool round,
4179                                     bool sub_op) {
4180   // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
4181   // To avoid this, we use:
4182   //     (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4183   // which is same as:
4184   //     (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4185 
4186   if (vform == kFormatVnD) {
4187     return sqrdmlash_d(vform, dst, src1, src2, round, sub_op);
4188   }
4189 
4190   int esize = LaneSizeInBitsFromFormat(vform);
4191   int round_const = round ? (1 << (esize - 2)) : 0;
4192   int64_t accum;
4193 
4194   dst.ClearForWrite(vform);
4195   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4196     accum = dst.Int(vform, i) << (esize - 1);
4197     if (sub_op) {
4198       accum -= src1.Int(vform, i) * src2.Int(vform, i);
4199     } else {
4200       accum += src1.Int(vform, i) * src2.Int(vform, i);
4201     }
4202     accum += round_const;
4203     accum = accum >> (esize - 1);
4204 
4205     if (accum > MaxIntFromFormat(vform)) {
4206       accum = MaxIntFromFormat(vform);
4207     } else if (accum < MinIntFromFormat(vform)) {
4208       accum = MinIntFromFormat(vform);
4209     }
4210     dst.SetInt(vform, i, accum);
4211   }
4212   return dst;
4213 }
4214 
4215 
4216 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
4217                                    LogicVRegister dst,
4218                                    const LogicVRegister& src1,
4219                                    const LogicVRegister& src2,
4220                                    bool round) {
4221   return sqrdmlash(vform, dst, src1, src2, round, false);
4222 }
4223 
4224 
4225 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
4226                                    LogicVRegister dst,
4227                                    const LogicVRegister& src1,
4228                                    const LogicVRegister& src2,
4229                                    bool round) {
4230   return sqrdmlash(vform, dst, src1, src2, round, true);
4231 }
4232 
4233 
4234 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
4235                                   LogicVRegister dst,
4236                                   const LogicVRegister& src1,
4237                                   const LogicVRegister& src2) {
4238   return sqrdmulh(vform, dst, src1, src2, false);
4239 }
4240 
4241 
4242 LogicVRegister Simulator::addhn(VectorFormat vform,
4243                                 LogicVRegister dst,
4244                                 const LogicVRegister& src1,
4245                                 const LogicVRegister& src2) {
4246   SimVRegister temp;
4247   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4248   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4249   return dst;
4250 }
4251 
4252 
4253 LogicVRegister Simulator::addhn2(VectorFormat vform,
4254                                  LogicVRegister dst,
4255                                  const LogicVRegister& src1,
4256                                  const LogicVRegister& src2) {
4257   SimVRegister temp;
4258   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4259   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4260   return dst;
4261 }
4262 
4263 
4264 LogicVRegister Simulator::raddhn(VectorFormat vform,
4265                                  LogicVRegister dst,
4266                                  const LogicVRegister& src1,
4267                                  const LogicVRegister& src2) {
4268   SimVRegister temp;
4269   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4270   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4271   return dst;
4272 }
4273 
4274 
4275 LogicVRegister Simulator::raddhn2(VectorFormat vform,
4276                                   LogicVRegister dst,
4277                                   const LogicVRegister& src1,
4278                                   const LogicVRegister& src2) {
4279   SimVRegister temp;
4280   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4281   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4282   return dst;
4283 }
4284 
4285 
4286 LogicVRegister Simulator::subhn(VectorFormat vform,
4287                                 LogicVRegister dst,
4288                                 const LogicVRegister& src1,
4289                                 const LogicVRegister& src2) {
4290   SimVRegister temp;
4291   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4292   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4293   return dst;
4294 }
4295 
4296 
4297 LogicVRegister Simulator::subhn2(VectorFormat vform,
4298                                  LogicVRegister dst,
4299                                  const LogicVRegister& src1,
4300                                  const LogicVRegister& src2) {
4301   SimVRegister temp;
4302   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4303   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4304   return dst;
4305 }
4306 
4307 
4308 LogicVRegister Simulator::rsubhn(VectorFormat vform,
4309                                  LogicVRegister dst,
4310                                  const LogicVRegister& src1,
4311                                  const LogicVRegister& src2) {
4312   SimVRegister temp;
4313   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4314   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4315   return dst;
4316 }
4317 
4318 
4319 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
4320                                   LogicVRegister dst,
4321                                   const LogicVRegister& src1,
4322                                   const LogicVRegister& src2) {
4323   SimVRegister temp;
4324   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4325   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4326   return dst;
4327 }
4328 
4329 
4330 LogicVRegister Simulator::trn1(VectorFormat vform,
4331                                LogicVRegister dst,
4332                                const LogicVRegister& src1,
4333                                const LogicVRegister& src2) {
4334   uint64_t result[kZRegMaxSizeInBytes] = {};
4335   int lane_count = LaneCountFromFormat(vform);
4336   int pairs = lane_count / 2;
4337   for (int i = 0; i < pairs; ++i) {
4338     result[2 * i] = src1.Uint(vform, 2 * i);
4339     result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
4340   }
4341 
4342   dst.ClearForWrite(vform);
4343   for (int i = 0; i < lane_count; ++i) {
4344     dst.SetUint(vform, i, result[i]);
4345   }
4346   return dst;
4347 }
4348 
4349 
4350 LogicVRegister Simulator::trn2(VectorFormat vform,
4351                                LogicVRegister dst,
4352                                const LogicVRegister& src1,
4353                                const LogicVRegister& src2) {
4354   uint64_t result[kZRegMaxSizeInBytes] = {};
4355   int lane_count = LaneCountFromFormat(vform);
4356   int pairs = lane_count / 2;
4357   for (int i = 0; i < pairs; ++i) {
4358     result[2 * i] = src1.Uint(vform, (2 * i) + 1);
4359     result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
4360   }
4361 
4362   dst.ClearForWrite(vform);
4363   for (int i = 0; i < lane_count; ++i) {
4364     dst.SetUint(vform, i, result[i]);
4365   }
4366   return dst;
4367 }
4368 
4369 
4370 LogicVRegister Simulator::zip1(VectorFormat vform,
4371                                LogicVRegister dst,
4372                                const LogicVRegister& src1,
4373                                const LogicVRegister& src2) {
4374   uint64_t result[kZRegMaxSizeInBytes] = {};
4375   int lane_count = LaneCountFromFormat(vform);
4376   int pairs = lane_count / 2;
4377   for (int i = 0; i < pairs; ++i) {
4378     result[2 * i] = src1.Uint(vform, i);
4379     result[(2 * i) + 1] = src2.Uint(vform, i);
4380   }
4381 
4382   dst.ClearForWrite(vform);
4383   for (int i = 0; i < lane_count; ++i) {
4384     dst.SetUint(vform, i, result[i]);
4385   }
4386   return dst;
4387 }
4388 
4389 
4390 LogicVRegister Simulator::zip2(VectorFormat vform,
4391                                LogicVRegister dst,
4392                                const LogicVRegister& src1,
4393                                const LogicVRegister& src2) {
4394   uint64_t result[kZRegMaxSizeInBytes] = {};
4395   int lane_count = LaneCountFromFormat(vform);
4396   int pairs = lane_count / 2;
4397   for (int i = 0; i < pairs; ++i) {
4398     result[2 * i] = src1.Uint(vform, pairs + i);
4399     result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
4400   }
4401 
4402   dst.ClearForWrite(vform);
4403   for (int i = 0; i < lane_count; ++i) {
4404     dst.SetUint(vform, i, result[i]);
4405   }
4406   return dst;
4407 }
4408 
4409 
4410 LogicVRegister Simulator::uzp1(VectorFormat vform,
4411                                LogicVRegister dst,
4412                                const LogicVRegister& src1,
4413                                const LogicVRegister& src2) {
4414   uint64_t result[kZRegMaxSizeInBytes * 2];
4415   int lane_count = LaneCountFromFormat(vform);
4416   for (int i = 0; i < lane_count; ++i) {
4417     result[i] = src1.Uint(vform, i);
4418     result[lane_count + i] = src2.Uint(vform, i);
4419   }
4420 
4421   dst.ClearForWrite(vform);
4422   for (int i = 0; i < lane_count; ++i) {
4423     dst.SetUint(vform, i, result[2 * i]);
4424   }
4425   return dst;
4426 }
4427 
4428 
4429 LogicVRegister Simulator::uzp2(VectorFormat vform,
4430                                LogicVRegister dst,
4431                                const LogicVRegister& src1,
4432                                const LogicVRegister& src2) {
4433   uint64_t result[kZRegMaxSizeInBytes * 2];
4434   int lane_count = LaneCountFromFormat(vform);
4435   for (int i = 0; i < lane_count; ++i) {
4436     result[i] = src1.Uint(vform, i);
4437     result[lane_count + i] = src2.Uint(vform, i);
4438   }
4439 
4440   dst.ClearForWrite(vform);
4441   for (int i = 0; i < lane_count; ++i) {
4442     dst.SetUint(vform, i, result[(2 * i) + 1]);
4443   }
4444   return dst;
4445 }
4446 
4447 LogicVRegister Simulator::interleave_top_bottom(VectorFormat vform,
4448                                                 LogicVRegister dst,
4449                                                 const LogicVRegister& src) {
4450   // Interleave the top and bottom half of a vector, ie. for a vector:
4451   //
4452   //   [ ... | F | D | B | ... | E | C | A ]
4453   //
4454   // where B is the first element in the top half of the vector, produce a
4455   // result vector:
4456   //
4457   //   [ ... | ... | F | E | D | C | B | A ]
4458 
4459   uint64_t result[kZRegMaxSizeInBytes] = {};
4460   int lane_count = LaneCountFromFormat(vform);
4461   for (int i = 0; i < lane_count; i += 2) {
4462     result[i] = src.Uint(vform, i / 2);
4463     result[i + 1] = src.Uint(vform, (lane_count / 2) + (i / 2));
4464   }
4465   dst.SetUintArray(vform, result);
4466   return dst;
4467 }
4468 
4469 template <typename T>
4470 T Simulator::FPNeg(T op) {
4471   return -op;
4472 }
4473 
4474 template <typename T>
4475 T Simulator::FPAdd(T op1, T op2) {
4476   T result = FPProcessNaNs(op1, op2);
4477   if (IsNaN(result)) {
4478     return result;
4479   }
4480 
4481   if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
4482     // inf + -inf returns the default NaN.
4483     FPProcessException();
4484     return FPDefaultNaN<T>();
4485   } else {
4486     // Other cases should be handled by standard arithmetic.
4487     return op1 + op2;
4488   }
4489 }
4490 
4491 
4492 template <typename T>
4493 T Simulator::FPSub(T op1, T op2) {
4494   // NaNs should be handled elsewhere.
4495   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4496 
4497   if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
4498     // inf - inf returns the default NaN.
4499     FPProcessException();
4500     return FPDefaultNaN<T>();
4501   } else {
4502     // Other cases should be handled by standard arithmetic.
4503     return op1 - op2;
4504   }
4505 }
4506 
4507 template <typename T>
4508 T Simulator::FPMulNaNs(T op1, T op2) {
4509   T result = FPProcessNaNs(op1, op2);
4510   return IsNaN(result) ? result : FPMul(op1, op2);
4511 }
4512 
4513 template <typename T>
4514 T Simulator::FPMul(T op1, T op2) {
4515   // NaNs should be handled elsewhere.
4516   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4517 
4518   if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4519     // inf * 0.0 returns the default NaN.
4520     FPProcessException();
4521     return FPDefaultNaN<T>();
4522   } else {
4523     // Other cases should be handled by standard arithmetic.
4524     return op1 * op2;
4525   }
4526 }
4527 
4528 
4529 template <typename T>
4530 T Simulator::FPMulx(T op1, T op2) {
4531   if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4532     // inf * 0.0 returns +/-2.0.
4533     T two = 2.0;
4534     return copysign(1.0, op1) * copysign(1.0, op2) * two;
4535   }
4536   return FPMul(op1, op2);
4537 }
4538 
4539 
4540 template <typename T>
4541 T Simulator::FPMulAdd(T a, T op1, T op2) {
4542   T result = FPProcessNaNs3(a, op1, op2);
4543 
4544   T sign_a = copysign(1.0, a);
4545   T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
4546   bool isinf_prod = IsInf(op1) || IsInf(op2);
4547   bool operation_generates_nan =
4548       (IsInf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
4549       (IsInf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
4550       (IsInf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
4551 
4552   if (IsNaN(result)) {
4553     // Generated NaNs override quiet NaNs propagated from a.
4554     if (operation_generates_nan && IsQuietNaN(a)) {
4555       FPProcessException();
4556       return FPDefaultNaN<T>();
4557     } else {
4558       return result;
4559     }
4560   }
4561 
4562   // If the operation would produce a NaN, return the default NaN.
4563   if (operation_generates_nan) {
4564     FPProcessException();
4565     return FPDefaultNaN<T>();
4566   }
4567 
4568   // Work around broken fma implementations for exact zero results: The sign of
4569   // exact 0.0 results is positive unless both a and op1 * op2 are negative.
4570   if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
4571     return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;
4572   }
4573 
4574   result = FusedMultiplyAdd(op1, op2, a);
4575   VIXL_ASSERT(!IsNaN(result));
4576 
4577   // Work around broken fma implementations for rounded zero results: If a is
4578   // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
4579   if ((a == 0.0) && (result == 0.0)) {
4580     return copysign(0.0, sign_prod);
4581   }
4582 
4583   return result;
4584 }
4585 
4586 template float Simulator::FPMulAdd(float a, float op1, float op2);
4587 
4588 template double Simulator::FPMulAdd(double a, double op1, double op2);
4589 
4590 template <typename T>
4591 T Simulator::FPDiv(T op1, T op2) {
4592   // NaNs should be handled elsewhere.
4593   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4594 
4595   if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
4596     // inf / inf and 0.0 / 0.0 return the default NaN.
4597     FPProcessException();
4598     return FPDefaultNaN<T>();
4599   } else {
4600     if (op2 == 0.0) {
4601       FPProcessException();
4602       if (!IsNaN(op1)) {
4603         double op1_sign = copysign(1.0, op1);
4604         double op2_sign = copysign(1.0, op2);
4605         return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
4606       }
4607     }
4608 
4609     // Other cases should be handled by standard arithmetic.
4610     return op1 / op2;
4611   }
4612 }
4613 
4614 
4615 template <typename T>
4616 T Simulator::FPSqrt(T op) {
4617   if (IsNaN(op)) {
4618     return FPProcessNaN(op);
4619   } else if (op < T(0.0)) {
4620     FPProcessException();
4621     return FPDefaultNaN<T>();
4622   } else {
4623     return sqrt(op);
4624   }
4625 }
4626 
4627 
4628 template <typename T>
4629 T Simulator::FPMax(T a, T b) {
4630   T result = FPProcessNaNs(a, b);
4631   if (IsNaN(result)) return result;
4632 
4633   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4634     // a and b are zero, and the sign differs: return +0.0.
4635     return 0.0;
4636   } else {
4637     return (a > b) ? a : b;
4638   }
4639 }
4640 
4641 
4642 template <typename T>
4643 T Simulator::FPMaxNM(T a, T b) {
4644   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4645     a = kFP64NegativeInfinity;
4646   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4647     b = kFP64NegativeInfinity;
4648   }
4649 
4650   T result = FPProcessNaNs(a, b);
4651   return IsNaN(result) ? result : FPMax(a, b);
4652 }
4653 
4654 
4655 template <typename T>
4656 T Simulator::FPMin(T a, T b) {
4657   T result = FPProcessNaNs(a, b);
4658   if (IsNaN(result)) return result;
4659 
4660   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4661     // a and b are zero, and the sign differs: return -0.0.
4662     return -0.0;
4663   } else {
4664     return (a < b) ? a : b;
4665   }
4666 }
4667 
4668 
4669 template <typename T>
4670 T Simulator::FPMinNM(T a, T b) {
4671   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4672     a = kFP64PositiveInfinity;
4673   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4674     b = kFP64PositiveInfinity;
4675   }
4676 
4677   T result = FPProcessNaNs(a, b);
4678   return IsNaN(result) ? result : FPMin(a, b);
4679 }
4680 
4681 
4682 template <typename T>
4683 T Simulator::FPRecipStepFused(T op1, T op2) {
4684   const T two = 2.0;
4685   if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4686     return two;
4687   } else if (IsInf(op1) || IsInf(op2)) {
4688     // Return +inf if signs match, otherwise -inf.
4689     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4690                                           : kFP64NegativeInfinity;
4691   } else {
4692     return FusedMultiplyAdd(op1, op2, two);
4693   }
4694 }
4695 
4696 template <typename T>
4697 bool IsNormal(T value) {
4698   return std::isnormal(value);
4699 }
4700 
4701 template <>
4702 bool IsNormal(SimFloat16 value) {
4703   uint16_t rawbits = Float16ToRawbits(value);
4704   uint16_t exp_mask = 0x7c00;
4705   // Check that the exponent is neither all zeroes or all ones.
4706   return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
4707 }
4708 
4709 
4710 template <typename T>
4711 T Simulator::FPRSqrtStepFused(T op1, T op2) {
4712   const T one_point_five = 1.5;
4713   const T two = 2.0;
4714 
4715   if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4716     return one_point_five;
4717   } else if (IsInf(op1) || IsInf(op2)) {
4718     // Return +inf if signs match, otherwise -inf.
4719     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4720                                           : kFP64NegativeInfinity;
4721   } else {
4722     // The multiply-add-halve operation must be fully fused, so avoid interim
4723     // rounding by checking which operand can be losslessly divided by two
4724     // before doing the multiply-add.
4725     if (IsNormal(op1 / two)) {
4726       return FusedMultiplyAdd(op1 / two, op2, one_point_five);
4727     } else if (IsNormal(op2 / two)) {
4728       return FusedMultiplyAdd(op1, op2 / two, one_point_five);
4729     } else {
4730       // Neither operand is normal after halving: the result is dominated by
4731       // the addition term, so just return that.
4732       return one_point_five;
4733     }
4734   }
4735 }
4736 
4737 int32_t Simulator::FPToFixedJS(double value) {
4738   // The Z-flag is set when the conversion from double precision floating-point
4739   // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
4740   // outside the bounds of a 32-bit integer, or isn't an exact integer then the
4741   // Z-flag is unset.
4742   int Z = 1;
4743   int32_t result;
4744 
4745   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4746       (value == kFP64NegativeInfinity)) {
4747     // +/- zero and infinity all return zero, however -0 and +/- Infinity also
4748     // unset the Z-flag.
4749     result = 0.0;
4750     if ((value != 0.0) || std::signbit(value)) {
4751       Z = 0;
4752     }
4753   } else if (std::isnan(value)) {
4754     // NaN values unset the Z-flag and set the result to 0.
4755     FPProcessNaN(value);
4756     result = 0;
4757     Z = 0;
4758   } else {
4759     // All other values are converted to an integer representation, rounded
4760     // toward zero.
4761     double int_result = std::floor(value);
4762     double error = value - int_result;
4763 
4764     if ((error != 0.0) && (int_result < 0.0)) {
4765       int_result++;
4766     }
4767 
4768     // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
4769     // write a one-liner with std::round, but the behaviour on ties is incorrect
4770     // for our purposes.
4771     double mod_const = static_cast<double>(UINT64_C(1) << 32);
4772     double mod_error =
4773         (int_result / mod_const) - std::floor(int_result / mod_const);
4774     double constrained;
4775     if (mod_error == 0.5) {
4776       constrained = INT32_MIN;
4777     } else {
4778       constrained = int_result - mod_const * round(int_result / mod_const);
4779     }
4780 
4781     VIXL_ASSERT(std::floor(constrained) == constrained);
4782     VIXL_ASSERT(constrained >= INT32_MIN);
4783     VIXL_ASSERT(constrained <= INT32_MAX);
4784 
4785     // Take the bottom 32 bits of the result as a 32-bit integer.
4786     result = static_cast<int32_t>(constrained);
4787 
4788     if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
4789         (error != 0.0)) {
4790       // If the integer result is out of range or the conversion isn't exact,
4791       // take exception and unset the Z-flag.
4792       FPProcessException();
4793       Z = 0;
4794     }
4795   }
4796 
4797   ReadNzcv().SetN(0);
4798   ReadNzcv().SetZ(Z);
4799   ReadNzcv().SetC(0);
4800   ReadNzcv().SetV(0);
4801 
4802   return result;
4803 }
4804 
4805 double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) {
4806   VIXL_ASSERT((value != kFP64PositiveInfinity) &&
4807               (value != kFP64NegativeInfinity));
4808   VIXL_ASSERT(!IsNaN(value));
4809 
4810   double int_result = std::floor(value);
4811   double error = value - int_result;
4812   switch (round_mode) {
4813     case FPTieAway: {
4814       // Take care of correctly handling the range ]-0.5, -0.0], which must
4815       // yield -0.0.
4816       if ((-0.5 < value) && (value < 0.0)) {
4817         int_result = -0.0;
4818 
4819       } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
4820         // If the error is greater than 0.5, or is equal to 0.5 and the integer
4821         // result is positive, round up.
4822         int_result++;
4823       }
4824       break;
4825     }
4826     case FPTieEven: {
4827       // Take care of correctly handling the range [-0.5, -0.0], which must
4828       // yield -0.0.
4829       if ((-0.5 <= value) && (value < 0.0)) {
4830         int_result = -0.0;
4831 
4832         // If the error is greater than 0.5, or is equal to 0.5 and the integer
4833         // result is odd, round up.
4834       } else if ((error > 0.5) ||
4835                  ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
4836         int_result++;
4837       }
4838       break;
4839     }
4840     case FPZero: {
4841       // If value>0 then we take floor(value)
4842       // otherwise, ceil(value).
4843       if (value < 0) {
4844         int_result = ceil(value);
4845       }
4846       break;
4847     }
4848     case FPNegativeInfinity: {
4849       // We always use floor(value).
4850       break;
4851     }
4852     case FPPositiveInfinity: {
4853       // Take care of correctly handling the range ]-1.0, -0.0], which must
4854       // yield -0.0.
4855       if ((-1.0 < value) && (value < 0.0)) {
4856         int_result = -0.0;
4857 
4858         // If the error is non-zero, round up.
4859       } else if (error > 0.0) {
4860         int_result++;
4861       }
4862       break;
4863     }
4864     default:
4865       VIXL_UNIMPLEMENTED();
4866   }
4867   return int_result;
4868 }
4869 
4870 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
4871   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4872       (value == kFP64NegativeInfinity)) {
4873     return value;
4874   } else if (IsNaN(value)) {
4875     return FPProcessNaN(value);
4876   }
4877   return FPRoundIntCommon(value, round_mode);
4878 }
4879 
4880 double Simulator::FPRoundInt(double value,
4881                              FPRounding round_mode,
4882                              FrintMode frint_mode) {
4883   if (frint_mode == kFrintToInteger) {
4884     return FPRoundInt(value, round_mode);
4885   }
4886 
4887   VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64));
4888 
4889   if (value == 0.0) {
4890     return value;
4891   }
4892 
4893   if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) ||
4894       IsNaN(value)) {
4895     if (frint_mode == kFrintToInt32) {
4896       return INT32_MIN;
4897     } else {
4898       return INT64_MIN;
4899     }
4900   }
4901 
4902   double result = FPRoundIntCommon(value, round_mode);
4903 
4904   // We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly
4905   // representable as a double, and is rounded to (INT64_MAX + 1) when
4906   // converted. To avoid this, we compare `result >= int64_max_plus_one`
4907   // instead; this is safe because `result` is known to be integral, and
4908   // `int64_max_plus_one` is exactly representable as a double.
4909   constexpr uint64_t int64_max_plus_one = static_cast<uint64_t>(INT64_MAX) + 1;
4910   VIXL_STATIC_ASSERT(static_cast<uint64_t>(static_cast<double>(
4911                          int64_max_plus_one)) == int64_max_plus_one);
4912 
4913   if (frint_mode == kFrintToInt32) {
4914     if ((result > INT32_MAX) || (result < INT32_MIN)) {
4915       return INT32_MIN;
4916     }
4917   } else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) {
4918     return INT64_MIN;
4919   }
4920 
4921   return result;
4922 }
4923 
4924 int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
4925   value = FPRoundInt(value, rmode);
4926   if (value >= kHMaxInt) {
4927     return kHMaxInt;
4928   } else if (value < kHMinInt) {
4929     return kHMinInt;
4930   }
4931   return IsNaN(value) ? 0 : static_cast<int16_t>(value);
4932 }
4933 
4934 
4935 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
4936   value = FPRoundInt(value, rmode);
4937   if (value >= kWMaxInt) {
4938     return kWMaxInt;
4939   } else if (value < kWMinInt) {
4940     return kWMinInt;
4941   }
4942   return IsNaN(value) ? 0 : static_cast<int32_t>(value);
4943 }
4944 
4945 
4946 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
4947   value = FPRoundInt(value, rmode);
4948   // This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues
4949   // as a result of kMaxInt not being representable as a double.
4950   if (value >= 9223372036854775808.) {
4951     return kXMaxInt;
4952   } else if (value < kXMinInt) {
4953     return kXMinInt;
4954   }
4955   return IsNaN(value) ? 0 : static_cast<int64_t>(value);
4956 }
4957 
4958 
4959 uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
4960   value = FPRoundInt(value, rmode);
4961   if (value >= kHMaxUInt) {
4962     return kHMaxUInt;
4963   } else if (value < 0.0) {
4964     return 0;
4965   }
4966   return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
4967 }
4968 
4969 
4970 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
4971   value = FPRoundInt(value, rmode);
4972   if (value >= kWMaxUInt) {
4973     return kWMaxUInt;
4974   } else if (value < 0.0) {
4975     return 0;
4976   }
4977   return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
4978 }
4979 
4980 
4981 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
4982   value = FPRoundInt(value, rmode);
4983   // This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues
4984   // as a result of kMaxUInt not being representable as a double.
4985   if (value >= 18446744073709551616.) {
4986     return kXMaxUInt;
4987   } else if (value < 0.0) {
4988     return 0;
4989   }
4990   return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
4991 }
4992 
4993 
4994 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                \
4995   template <typename T>                                          \
4996   LogicVRegister Simulator::FN(VectorFormat vform,               \
4997                                LogicVRegister dst,               \
4998                                const LogicVRegister& src1,       \
4999                                const LogicVRegister& src2) {     \
5000     dst.ClearForWrite(vform);                                    \
5001     for (int i = 0; i < LaneCountFromFormat(vform); i++) {       \
5002       T op1 = src1.Float<T>(i);                                  \
5003       T op2 = src2.Float<T>(i);                                  \
5004       T result;                                                  \
5005       if (PROCNAN) {                                             \
5006         result = FPProcessNaNs(op1, op2);                        \
5007         if (!IsNaN(result)) {                                    \
5008           result = OP(op1, op2);                                 \
5009         }                                                        \
5010       } else {                                                   \
5011         result = OP(op1, op2);                                   \
5012       }                                                          \
5013       dst.SetFloat(vform, i, result);                            \
5014     }                                                            \
5015     return dst;                                                  \
5016   }                                                              \
5017                                                                  \
5018   LogicVRegister Simulator::FN(VectorFormat vform,               \
5019                                LogicVRegister dst,               \
5020                                const LogicVRegister& src1,       \
5021                                const LogicVRegister& src2) {     \
5022     if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {          \
5023       FN<SimFloat16>(vform, dst, src1, src2);                    \
5024     } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {   \
5025       FN<float>(vform, dst, src1, src2);                         \
5026     } else {                                                     \
5027       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
5028       FN<double>(vform, dst, src1, src2);                        \
5029     }                                                            \
5030     return dst;                                                  \
5031   }
5032 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
5033 #undef DEFINE_NEON_FP_VECTOR_OP
5034 
5035 
5036 LogicVRegister Simulator::fnmul(VectorFormat vform,
5037                                 LogicVRegister dst,
5038                                 const LogicVRegister& src1,
5039                                 const LogicVRegister& src2) {
5040   SimVRegister temp;
5041   LogicVRegister product = fmul(vform, temp, src1, src2);
5042   return fneg(vform, dst, product);
5043 }
5044 
5045 
5046 template <typename T>
5047 LogicVRegister Simulator::frecps(VectorFormat vform,
5048                                  LogicVRegister dst,
5049                                  const LogicVRegister& src1,
5050                                  const LogicVRegister& src2) {
5051   dst.ClearForWrite(vform);
5052   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5053     T op1 = -src1.Float<T>(i);
5054     T op2 = src2.Float<T>(i);
5055     T result = FPProcessNaNs(op1, op2);
5056     dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
5057   }
5058   return dst;
5059 }
5060 
5061 
5062 LogicVRegister Simulator::frecps(VectorFormat vform,
5063                                  LogicVRegister dst,
5064                                  const LogicVRegister& src1,
5065                                  const LogicVRegister& src2) {
5066   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5067     frecps<SimFloat16>(vform, dst, src1, src2);
5068   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5069     frecps<float>(vform, dst, src1, src2);
5070   } else {
5071     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5072     frecps<double>(vform, dst, src1, src2);
5073   }
5074   return dst;
5075 }
5076 
5077 
5078 template <typename T>
5079 LogicVRegister Simulator::frsqrts(VectorFormat vform,
5080                                   LogicVRegister dst,
5081                                   const LogicVRegister& src1,
5082                                   const LogicVRegister& src2) {
5083   dst.ClearForWrite(vform);
5084   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5085     T op1 = -src1.Float<T>(i);
5086     T op2 = src2.Float<T>(i);
5087     T result = FPProcessNaNs(op1, op2);
5088     dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
5089   }
5090   return dst;
5091 }
5092 
5093 
5094 LogicVRegister Simulator::frsqrts(VectorFormat vform,
5095                                   LogicVRegister dst,
5096                                   const LogicVRegister& src1,
5097                                   const LogicVRegister& src2) {
5098   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5099     frsqrts<SimFloat16>(vform, dst, src1, src2);
5100   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5101     frsqrts<float>(vform, dst, src1, src2);
5102   } else {
5103     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5104     frsqrts<double>(vform, dst, src1, src2);
5105   }
5106   return dst;
5107 }
5108 
5109 
5110 template <typename T>
5111 LogicVRegister Simulator::fcmp(VectorFormat vform,
5112                                LogicVRegister dst,
5113                                const LogicVRegister& src1,
5114                                const LogicVRegister& src2,
5115                                Condition cond) {
5116   dst.ClearForWrite(vform);
5117   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5118     bool result = false;
5119     T op1 = src1.Float<T>(i);
5120     T op2 = src2.Float<T>(i);
5121     bool unordered = IsNaN(FPProcessNaNs(op1, op2));
5122 
5123     switch (cond) {
5124       case eq:
5125         result = (op1 == op2);
5126         break;
5127       case ge:
5128         result = (op1 >= op2);
5129         break;
5130       case gt:
5131         result = (op1 > op2);
5132         break;
5133       case le:
5134         result = (op1 <= op2);
5135         break;
5136       case lt:
5137         result = (op1 < op2);
5138         break;
5139       case ne:
5140         result = (op1 != op2);
5141         break;
5142       case uo:
5143         result = unordered;
5144         break;
5145       default:
5146         // Other conditions are defined in terms of those above.
5147         VIXL_UNREACHABLE();
5148         break;
5149     }
5150 
5151     if (result && unordered) {
5152       // Only `uo` and `ne` can be true for unordered comparisons.
5153       VIXL_ASSERT((cond == uo) || (cond == ne));
5154     }
5155 
5156     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
5157   }
5158   return dst;
5159 }
5160 
5161 
5162 LogicVRegister Simulator::fcmp(VectorFormat vform,
5163                                LogicVRegister dst,
5164                                const LogicVRegister& src1,
5165                                const LogicVRegister& src2,
5166                                Condition cond) {
5167   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5168     fcmp<SimFloat16>(vform, dst, src1, src2, cond);
5169   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5170     fcmp<float>(vform, dst, src1, src2, cond);
5171   } else {
5172     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5173     fcmp<double>(vform, dst, src1, src2, cond);
5174   }
5175   return dst;
5176 }
5177 
5178 
5179 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
5180                                     LogicVRegister dst,
5181                                     const LogicVRegister& src,
5182                                     Condition cond) {
5183   SimVRegister temp;
5184   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5185     LogicVRegister zero_reg =
5186         dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
5187     fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
5188   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5189     LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
5190     fcmp<float>(vform, dst, src, zero_reg, cond);
5191   } else {
5192     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5193     LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
5194     fcmp<double>(vform, dst, src, zero_reg, cond);
5195   }
5196   return dst;
5197 }
5198 
5199 
5200 LogicVRegister Simulator::fabscmp(VectorFormat vform,
5201                                   LogicVRegister dst,
5202                                   const LogicVRegister& src1,
5203                                   const LogicVRegister& src2,
5204                                   Condition cond) {
5205   SimVRegister temp1, temp2;
5206   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5207     LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
5208     LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
5209     fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
5210   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5211     LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
5212     LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
5213     fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
5214   } else {
5215     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5216     LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
5217     LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
5218     fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
5219   }
5220   return dst;
5221 }
5222 
5223 
5224 template <typename T>
5225 LogicVRegister Simulator::fmla(VectorFormat vform,
5226                                LogicVRegister dst,
5227                                const LogicVRegister& srca,
5228                                const LogicVRegister& src1,
5229                                const LogicVRegister& src2) {
5230   dst.ClearForWrite(vform);
5231   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5232     T op1 = src1.Float<T>(i);
5233     T op2 = src2.Float<T>(i);
5234     T acc = srca.Float<T>(i);
5235     T result = FPMulAdd(acc, op1, op2);
5236     dst.SetFloat(vform, i, result);
5237   }
5238   return dst;
5239 }
5240 
5241 
5242 LogicVRegister Simulator::fmla(VectorFormat vform,
5243                                LogicVRegister dst,
5244                                const LogicVRegister& srca,
5245                                const LogicVRegister& src1,
5246                                const LogicVRegister& src2) {
5247   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5248     fmla<SimFloat16>(vform, dst, srca, src1, src2);
5249   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5250     fmla<float>(vform, dst, srca, src1, src2);
5251   } else {
5252     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5253     fmla<double>(vform, dst, srca, src1, src2);
5254   }
5255   return dst;
5256 }
5257 
5258 
5259 template <typename T>
5260 LogicVRegister Simulator::fmls(VectorFormat vform,
5261                                LogicVRegister dst,
5262                                const LogicVRegister& srca,
5263                                const LogicVRegister& src1,
5264                                const LogicVRegister& src2) {
5265   dst.ClearForWrite(vform);
5266   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5267     T op1 = -src1.Float<T>(i);
5268     T op2 = src2.Float<T>(i);
5269     T acc = srca.Float<T>(i);
5270     T result = FPMulAdd(acc, op1, op2);
5271     dst.SetFloat(i, result);
5272   }
5273   return dst;
5274 }
5275 
5276 
5277 LogicVRegister Simulator::fmls(VectorFormat vform,
5278                                LogicVRegister dst,
5279                                const LogicVRegister& srca,
5280                                const LogicVRegister& src1,
5281                                const LogicVRegister& src2) {
5282   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5283     fmls<SimFloat16>(vform, dst, srca, src1, src2);
5284   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5285     fmls<float>(vform, dst, srca, src1, src2);
5286   } else {
5287     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5288     fmls<double>(vform, dst, srca, src1, src2);
5289   }
5290   return dst;
5291 }
5292 
5293 
5294 LogicVRegister Simulator::fmlal(VectorFormat vform,
5295                                 LogicVRegister dst,
5296                                 const LogicVRegister& src1,
5297                                 const LogicVRegister& src2) {
5298   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5299   dst.ClearForWrite(vform);
5300   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5301     float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5302     float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5303     float acc = dst.Float<float>(i);
5304     float result = FPMulAdd(acc, op1, op2);
5305     dst.SetFloat(i, result);
5306   }
5307   return dst;
5308 }
5309 
5310 
5311 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5312                                  LogicVRegister dst,
5313                                  const LogicVRegister& src1,
5314                                  const LogicVRegister& src2) {
5315   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5316   dst.ClearForWrite(vform);
5317   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5318     int src = i + LaneCountFromFormat(vform);
5319     float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5320     float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5321     float acc = dst.Float<float>(i);
5322     float result = FPMulAdd(acc, op1, op2);
5323     dst.SetFloat(i, result);
5324   }
5325   return dst;
5326 }
5327 
5328 
5329 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5330                                 LogicVRegister dst,
5331                                 const LogicVRegister& src1,
5332                                 const LogicVRegister& src2) {
5333   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5334   dst.ClearForWrite(vform);
5335   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5336     float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5337     float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5338     float acc = dst.Float<float>(i);
5339     float result = FPMulAdd(acc, op1, op2);
5340     dst.SetFloat(i, result);
5341   }
5342   return dst;
5343 }
5344 
5345 
5346 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5347                                  LogicVRegister dst,
5348                                  const LogicVRegister& src1,
5349                                  const LogicVRegister& src2) {
5350   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5351   dst.ClearForWrite(vform);
5352   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5353     int src = i + LaneCountFromFormat(vform);
5354     float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5355     float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5356     float acc = dst.Float<float>(i);
5357     float result = FPMulAdd(acc, op1, op2);
5358     dst.SetFloat(i, result);
5359   }
5360   return dst;
5361 }
5362 
5363 
5364 LogicVRegister Simulator::fmlal(VectorFormat vform,
5365                                 LogicVRegister dst,
5366                                 const LogicVRegister& src1,
5367                                 const LogicVRegister& src2,
5368                                 int index) {
5369   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5370   dst.ClearForWrite(vform);
5371   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5372   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5373     float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5374     float acc = dst.Float<float>(i);
5375     float result = FPMulAdd(acc, op1, op2);
5376     dst.SetFloat(i, result);
5377   }
5378   return dst;
5379 }
5380 
5381 
5382 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5383                                  LogicVRegister dst,
5384                                  const LogicVRegister& src1,
5385                                  const LogicVRegister& src2,
5386                                  int index) {
5387   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5388   dst.ClearForWrite(vform);
5389   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5390   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5391     int src = i + LaneCountFromFormat(vform);
5392     float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5393     float acc = dst.Float<float>(i);
5394     float result = FPMulAdd(acc, op1, op2);
5395     dst.SetFloat(i, result);
5396   }
5397   return dst;
5398 }
5399 
5400 
5401 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5402                                 LogicVRegister dst,
5403                                 const LogicVRegister& src1,
5404                                 const LogicVRegister& src2,
5405                                 int index) {
5406   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5407   dst.ClearForWrite(vform);
5408   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5409   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5410     float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5411     float acc = dst.Float<float>(i);
5412     float result = FPMulAdd(acc, op1, op2);
5413     dst.SetFloat(i, result);
5414   }
5415   return dst;
5416 }
5417 
5418 
5419 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5420                                  LogicVRegister dst,
5421                                  const LogicVRegister& src1,
5422                                  const LogicVRegister& src2,
5423                                  int index) {
5424   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5425   dst.ClearForWrite(vform);
5426   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5427   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5428     int src = i + LaneCountFromFormat(vform);
5429     float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5430     float acc = dst.Float<float>(i);
5431     float result = FPMulAdd(acc, op1, op2);
5432     dst.SetFloat(i, result);
5433   }
5434   return dst;
5435 }
5436 
5437 
5438 template <typename T>
5439 LogicVRegister Simulator::fneg(VectorFormat vform,
5440                                LogicVRegister dst,
5441                                const LogicVRegister& src) {
5442   dst.ClearForWrite(vform);
5443   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5444     T op = src.Float<T>(i);
5445     op = -op;
5446     dst.SetFloat(i, op);
5447   }
5448   return dst;
5449 }
5450 
5451 
5452 LogicVRegister Simulator::fneg(VectorFormat vform,
5453                                LogicVRegister dst,
5454                                const LogicVRegister& src) {
5455   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5456     fneg<SimFloat16>(vform, dst, src);
5457   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5458     fneg<float>(vform, dst, src);
5459   } else {
5460     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5461     fneg<double>(vform, dst, src);
5462   }
5463   return dst;
5464 }
5465 
5466 
5467 template <typename T>
5468 LogicVRegister Simulator::fabs_(VectorFormat vform,
5469                                 LogicVRegister dst,
5470                                 const LogicVRegister& src) {
5471   dst.ClearForWrite(vform);
5472   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5473     T op = src.Float<T>(i);
5474     if (copysign(1.0, op) < 0.0) {
5475       op = -op;
5476     }
5477     dst.SetFloat(i, op);
5478   }
5479   return dst;
5480 }
5481 
5482 
5483 LogicVRegister Simulator::fabs_(VectorFormat vform,
5484                                 LogicVRegister dst,
5485                                 const LogicVRegister& src) {
5486   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5487     fabs_<SimFloat16>(vform, dst, src);
5488   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5489     fabs_<float>(vform, dst, src);
5490   } else {
5491     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5492     fabs_<double>(vform, dst, src);
5493   }
5494   return dst;
5495 }
5496 
5497 
5498 LogicVRegister Simulator::fabd(VectorFormat vform,
5499                                LogicVRegister dst,
5500                                const LogicVRegister& src1,
5501                                const LogicVRegister& src2) {
5502   SimVRegister temp;
5503   fsub(vform, temp, src1, src2);
5504   fabs_(vform, dst, temp);
5505   return dst;
5506 }
5507 
5508 
5509 LogicVRegister Simulator::fsqrt(VectorFormat vform,
5510                                 LogicVRegister dst,
5511                                 const LogicVRegister& src) {
5512   dst.ClearForWrite(vform);
5513   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5514     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5515       SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
5516       dst.SetFloat(i, result);
5517     }
5518   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5519     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5520       float result = FPSqrt(src.Float<float>(i));
5521       dst.SetFloat(i, result);
5522     }
5523   } else {
5524     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5525     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5526       double result = FPSqrt(src.Float<double>(i));
5527       dst.SetFloat(i, result);
5528     }
5529   }
5530   return dst;
5531 }
5532 
5533 
5534 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                                    \
5535   LogicVRegister Simulator::FNP(VectorFormat vform,                            \
5536                                 LogicVRegister dst,                            \
5537                                 const LogicVRegister& src1,                    \
5538                                 const LogicVRegister& src2) {                  \
5539     SimVRegister temp1, temp2;                                                 \
5540     uzp1(vform, temp1, src1, src2);                                            \
5541     uzp2(vform, temp2, src1, src2);                                            \
5542     FN(vform, dst, temp1, temp2);                                              \
5543     if (IsSVEFormat(vform)) {                                                  \
5544       interleave_top_bottom(vform, dst, dst);                                  \
5545     }                                                                          \
5546     return dst;                                                                \
5547   }                                                                            \
5548                                                                                \
5549   LogicVRegister Simulator::FNP(VectorFormat vform,                            \
5550                                 LogicVRegister dst,                            \
5551                                 const LogicVRegister& src) {                   \
5552     if (vform == kFormatH) {                                                   \
5553       SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))),   \
5554                            SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \
5555       dst.SetUint(vform, 0, Float16ToRawbits(result));                         \
5556     } else if (vform == kFormatS) {                                            \
5557       float result = OP(src.Float<float>(0), src.Float<float>(1));             \
5558       dst.SetFloat(0, result);                                                 \
5559     } else {                                                                   \
5560       VIXL_ASSERT(vform == kFormatD);                                          \
5561       double result = OP(src.Float<double>(0), src.Float<double>(1));          \
5562       dst.SetFloat(0, result);                                                 \
5563     }                                                                          \
5564     dst.ClearForWrite(vform);                                                  \
5565     return dst;                                                                \
5566   }
5567 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
5568 #undef DEFINE_NEON_FP_PAIR_OP
5569 
5570 template <typename T>
5571 LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,
5572                                                LogicVRegister dst,
5573                                                const LogicVRegister& src,
5574                                                typename TFPPairOp<T>::type fn,
5575                                                uint64_t inactive_value) {
5576   int lane_count = LaneCountFromFormat(vform);
5577   T result[kZRegMaxSizeInBytes / sizeof(T)];
5578   // Copy the source vector into a working array. Initialise the unused elements
5579   // at the end of the array to the same value that a false predicate would set.
5580   for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {
5581     result[i] = (i < lane_count)
5582                     ? src.Float<T>(i)
5583                     : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);
5584   }
5585 
5586   // Pairwise reduce the elements to a single value, using the pair op function
5587   // argument.
5588   for (int step = 1; step < lane_count; step *= 2) {
5589     for (int i = 0; i < lane_count; i += step * 2) {
5590       result[i] = (this->*fn)(result[i], result[i + step]);
5591     }
5592   }
5593   dst.ClearForWrite(ScalarFormatFromFormat(vform));
5594   dst.SetFloat<T>(0, result[0]);
5595   return dst;
5596 }
5597 
5598 LogicVRegister Simulator::FPPairedAcrossHelper(
5599     VectorFormat vform,
5600     LogicVRegister dst,
5601     const LogicVRegister& src,
5602     typename TFPPairOp<SimFloat16>::type fn16,
5603     typename TFPPairOp<float>::type fn32,
5604     typename TFPPairOp<double>::type fn64,
5605     uint64_t inactive_value) {
5606   switch (LaneSizeInBitsFromFormat(vform)) {
5607     case kHRegSize:
5608       return FPPairedAcrossHelper<SimFloat16>(vform,
5609                                               dst,
5610                                               src,
5611                                               fn16,
5612                                               inactive_value);
5613     case kSRegSize:
5614       return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);
5615     default:
5616       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5617       return FPPairedAcrossHelper<double>(vform,
5618                                           dst,
5619                                           src,
5620                                           fn64,
5621                                           inactive_value);
5622   }
5623 }
5624 
5625 LogicVRegister Simulator::faddv(VectorFormat vform,
5626                                 LogicVRegister dst,
5627                                 const LogicVRegister& src) {
5628   return FPPairedAcrossHelper(vform,
5629                               dst,
5630                               src,
5631                               &Simulator::FPAdd<SimFloat16>,
5632                               &Simulator::FPAdd<float>,
5633                               &Simulator::FPAdd<double>,
5634                               0);
5635 }
5636 
5637 LogicVRegister Simulator::fmaxv(VectorFormat vform,
5638                                 LogicVRegister dst,
5639                                 const LogicVRegister& src) {
5640   int lane_size = LaneSizeInBitsFromFormat(vform);
5641   uint64_t inactive_value =
5642       FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
5643   return FPPairedAcrossHelper(vform,
5644                               dst,
5645                               src,
5646                               &Simulator::FPMax<SimFloat16>,
5647                               &Simulator::FPMax<float>,
5648                               &Simulator::FPMax<double>,
5649                               inactive_value);
5650 }
5651 
5652 
5653 LogicVRegister Simulator::fminv(VectorFormat vform,
5654                                 LogicVRegister dst,
5655                                 const LogicVRegister& src) {
5656   int lane_size = LaneSizeInBitsFromFormat(vform);
5657   uint64_t inactive_value =
5658       FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
5659   return FPPairedAcrossHelper(vform,
5660                               dst,
5661                               src,
5662                               &Simulator::FPMin<SimFloat16>,
5663                               &Simulator::FPMin<float>,
5664                               &Simulator::FPMin<double>,
5665                               inactive_value);
5666 }
5667 
5668 
5669 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
5670                                   LogicVRegister dst,
5671                                   const LogicVRegister& src) {
5672   int lane_size = LaneSizeInBitsFromFormat(vform);
5673   uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5674   return FPPairedAcrossHelper(vform,
5675                               dst,
5676                               src,
5677                               &Simulator::FPMaxNM<SimFloat16>,
5678                               &Simulator::FPMaxNM<float>,
5679                               &Simulator::FPMaxNM<double>,
5680                               inactive_value);
5681 }
5682 
5683 
5684 LogicVRegister Simulator::fminnmv(VectorFormat vform,
5685                                   LogicVRegister dst,
5686                                   const LogicVRegister& src) {
5687   int lane_size = LaneSizeInBitsFromFormat(vform);
5688   uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5689   return FPPairedAcrossHelper(vform,
5690                               dst,
5691                               src,
5692                               &Simulator::FPMinNM<SimFloat16>,
5693                               &Simulator::FPMinNM<float>,
5694                               &Simulator::FPMinNM<double>,
5695                               inactive_value);
5696 }
5697 
5698 
5699 LogicVRegister Simulator::fmul(VectorFormat vform,
5700                                LogicVRegister dst,
5701                                const LogicVRegister& src1,
5702                                const LogicVRegister& src2,
5703                                int index) {
5704   dst.ClearForWrite(vform);
5705   SimVRegister temp;
5706   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5707     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5708     fmul<SimFloat16>(vform, dst, src1, index_reg);
5709   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5710     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5711     fmul<float>(vform, dst, src1, index_reg);
5712   } else {
5713     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5714     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5715     fmul<double>(vform, dst, src1, index_reg);
5716   }
5717   return dst;
5718 }
5719 
5720 
5721 LogicVRegister Simulator::fmla(VectorFormat vform,
5722                                LogicVRegister dst,
5723                                const LogicVRegister& src1,
5724                                const LogicVRegister& src2,
5725                                int index) {
5726   dst.ClearForWrite(vform);
5727   SimVRegister temp;
5728   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5729     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5730     fmla<SimFloat16>(vform, dst, dst, src1, index_reg);
5731   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5732     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5733     fmla<float>(vform, dst, dst, src1, index_reg);
5734   } else {
5735     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5736     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5737     fmla<double>(vform, dst, dst, src1, index_reg);
5738   }
5739   return dst;
5740 }
5741 
5742 
5743 LogicVRegister Simulator::fmls(VectorFormat vform,
5744                                LogicVRegister dst,
5745                                const LogicVRegister& src1,
5746                                const LogicVRegister& src2,
5747                                int index) {
5748   dst.ClearForWrite(vform);
5749   SimVRegister temp;
5750   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5751     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5752     fmls<SimFloat16>(vform, dst, dst, src1, index_reg);
5753   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5754     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5755     fmls<float>(vform, dst, dst, src1, index_reg);
5756   } else {
5757     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5758     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5759     fmls<double>(vform, dst, dst, src1, index_reg);
5760   }
5761   return dst;
5762 }
5763 
5764 
5765 LogicVRegister Simulator::fmulx(VectorFormat vform,
5766                                 LogicVRegister dst,
5767                                 const LogicVRegister& src1,
5768                                 const LogicVRegister& src2,
5769                                 int index) {
5770   dst.ClearForWrite(vform);
5771   SimVRegister temp;
5772   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5773     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5774     fmulx<SimFloat16>(vform, dst, src1, index_reg);
5775   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5776     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5777     fmulx<float>(vform, dst, src1, index_reg);
5778   } else {
5779     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5780     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5781     fmulx<double>(vform, dst, src1, index_reg);
5782   }
5783   return dst;
5784 }
5785 
5786 
5787 LogicVRegister Simulator::frint(VectorFormat vform,
5788                                 LogicVRegister dst,
5789                                 const LogicVRegister& src,
5790                                 FPRounding rounding_mode,
5791                                 bool inexact_exception,
5792                                 FrintMode frint_mode) {
5793   dst.ClearForWrite(vform);
5794   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5795     VIXL_ASSERT(frint_mode == kFrintToInteger);
5796     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5797       SimFloat16 input = src.Float<SimFloat16>(i);
5798       SimFloat16 rounded = FPRoundInt(input, rounding_mode);
5799       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5800         FPProcessException();
5801       }
5802       dst.SetFloat<SimFloat16>(i, rounded);
5803     }
5804   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5805     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5806       float input = src.Float<float>(i);
5807       float rounded = FPRoundInt(input, rounding_mode, frint_mode);
5808 
5809       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5810         FPProcessException();
5811       }
5812       dst.SetFloat<float>(i, rounded);
5813     }
5814   } else {
5815     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5816     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5817       double input = src.Float<double>(i);
5818       double rounded = FPRoundInt(input, rounding_mode, frint_mode);
5819       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5820         FPProcessException();
5821       }
5822       dst.SetFloat<double>(i, rounded);
5823     }
5824   }
5825   return dst;
5826 }
5827 
5828 LogicVRegister Simulator::fcvt(VectorFormat dst_vform,
5829                                VectorFormat src_vform,
5830                                LogicVRegister dst,
5831                                const LogicPRegister& pg,
5832                                const LogicVRegister& src) {
5833   unsigned dst_data_size_in_bits = LaneSizeInBitsFromFormat(dst_vform);
5834   unsigned src_data_size_in_bits = LaneSizeInBitsFromFormat(src_vform);
5835   VectorFormat vform = SVEFormatFromLaneSizeInBits(
5836       std::max(dst_data_size_in_bits, src_data_size_in_bits));
5837 
5838   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5839     if (!pg.IsActive(vform, i)) continue;
5840 
5841     uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5842                                                       0,
5843                                                       src.Uint(vform, i));
5844     double dst_value =
5845         RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);
5846 
5847     uint64_t dst_raw_bits =
5848         FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);
5849 
5850     dst.SetUint(vform, i, dst_raw_bits);
5851   }
5852 
5853   return dst;
5854 }
5855 
5856 LogicVRegister Simulator::fcvts(VectorFormat vform,
5857                                 unsigned dst_data_size_in_bits,
5858                                 unsigned src_data_size_in_bits,
5859                                 LogicVRegister dst,
5860                                 const LogicPRegister& pg,
5861                                 const LogicVRegister& src,
5862                                 FPRounding round,
5863                                 int fbits) {
5864   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5865   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5866 
5867   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5868     if (!pg.IsActive(vform, i)) continue;
5869 
5870     uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5871                                                0,
5872                                                src.Uint(vform, i));
5873     double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5874                     std::pow(2.0, fbits);
5875 
5876     switch (dst_data_size_in_bits) {
5877       case kHRegSize:
5878         dst.SetInt(vform, i, FPToInt16(result, round));
5879         break;
5880       case kSRegSize:
5881         dst.SetInt(vform, i, FPToInt32(result, round));
5882         break;
5883       case kDRegSize:
5884         dst.SetInt(vform, i, FPToInt64(result, round));
5885         break;
5886       default:
5887         VIXL_UNIMPLEMENTED();
5888         break;
5889     }
5890   }
5891 
5892   return dst;
5893 }
5894 
5895 LogicVRegister Simulator::fcvts(VectorFormat vform,
5896                                 LogicVRegister dst,
5897                                 const LogicVRegister& src,
5898                                 FPRounding round,
5899                                 int fbits) {
5900   dst.ClearForWrite(vform);
5901   return fcvts(vform,
5902                LaneSizeInBitsFromFormat(vform),
5903                LaneSizeInBitsFromFormat(vform),
5904                dst,
5905                GetPTrue(),
5906                src,
5907                round,
5908                fbits);
5909 }
5910 
5911 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5912                                 unsigned dst_data_size_in_bits,
5913                                 unsigned src_data_size_in_bits,
5914                                 LogicVRegister dst,
5915                                 const LogicPRegister& pg,
5916                                 const LogicVRegister& src,
5917                                 FPRounding round,
5918                                 int fbits) {
5919   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5920   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5921 
5922   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5923     if (!pg.IsActive(vform, i)) continue;
5924 
5925     uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5926                                                0,
5927                                                src.Uint(vform, i));
5928     double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5929                     std::pow(2.0, fbits);
5930 
5931     switch (dst_data_size_in_bits) {
5932       case kHRegSize:
5933         dst.SetUint(vform, i, FPToUInt16(result, round));
5934         break;
5935       case kSRegSize:
5936         dst.SetUint(vform, i, FPToUInt32(result, round));
5937         break;
5938       case kDRegSize:
5939         dst.SetUint(vform, i, FPToUInt64(result, round));
5940         break;
5941       default:
5942         VIXL_UNIMPLEMENTED();
5943         break;
5944     }
5945   }
5946 
5947   return dst;
5948 }
5949 
5950 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5951                                 LogicVRegister dst,
5952                                 const LogicVRegister& src,
5953                                 FPRounding round,
5954                                 int fbits) {
5955   dst.ClearForWrite(vform);
5956   return fcvtu(vform,
5957                LaneSizeInBitsFromFormat(vform),
5958                LaneSizeInBitsFromFormat(vform),
5959                dst,
5960                GetPTrue(),
5961                src,
5962                round,
5963                fbits);
5964 }
5965 
5966 LogicVRegister Simulator::fcvtl(VectorFormat vform,
5967                                 LogicVRegister dst,
5968                                 const LogicVRegister& src) {
5969   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5970     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5971       // TODO: Full support for SimFloat16 in SimRegister(s).
5972       dst.SetFloat(i,
5973                    FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
5974                              ReadDN()));
5975     }
5976   } else {
5977     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5978     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5979       dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
5980     }
5981   }
5982   return dst;
5983 }
5984 
5985 
5986 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
5987                                  LogicVRegister dst,
5988                                  const LogicVRegister& src) {
5989   int lane_count = LaneCountFromFormat(vform);
5990   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5991     for (int i = 0; i < lane_count; i++) {
5992       // TODO: Full support for SimFloat16 in SimRegister(s).
5993       dst.SetFloat(i,
5994                    FPToFloat(RawbitsToFloat16(
5995                                  src.Float<uint16_t>(i + lane_count)),
5996                              ReadDN()));
5997     }
5998   } else {
5999     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6000     for (int i = 0; i < lane_count; i++) {
6001       dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
6002     }
6003   }
6004   return dst;
6005 }
6006 
6007 
6008 LogicVRegister Simulator::fcvtn(VectorFormat vform,
6009                                 LogicVRegister dst,
6010                                 const LogicVRegister& src) {
6011   SimVRegister tmp;
6012   LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6013   dst.ClearForWrite(vform);
6014   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6015     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6016       dst.SetFloat(i,
6017                    Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i),
6018                                                 FPTieEven,
6019                                                 ReadDN())));
6020     }
6021   } else {
6022     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6023     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6024       dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN()));
6025     }
6026   }
6027   return dst;
6028 }
6029 
6030 
6031 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
6032                                  LogicVRegister dst,
6033                                  const LogicVRegister& src) {
6034   int lane_count = LaneCountFromFormat(vform) / 2;
6035   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6036     for (int i = lane_count - 1; i >= 0; i--) {
6037       dst.SetFloat(i + lane_count,
6038                    Float16ToRawbits(
6039                        FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
6040     }
6041   } else {
6042     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6043     for (int i = lane_count - 1; i >= 0; i--) {
6044       dst.SetFloat(i + lane_count,
6045                    FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
6046     }
6047   }
6048   return dst;
6049 }
6050 
6051 
6052 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
6053                                  LogicVRegister dst,
6054                                  const LogicVRegister& src) {
6055   SimVRegister tmp;
6056   LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6057   int input_lane_count = LaneCountFromFormat(vform);
6058   if (IsSVEFormat(vform)) {
6059     mov(kFormatVnB, tmp, src);
6060     input_lane_count /= 2;
6061   }
6062 
6063   dst.ClearForWrite(vform);
6064   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6065 
6066   for (int i = 0; i < input_lane_count; i++) {
6067     dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN()));
6068   }
6069   return dst;
6070 }
6071 
6072 
6073 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
6074                                   LogicVRegister dst,
6075                                   const LogicVRegister& src) {
6076   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6077   int lane_count = LaneCountFromFormat(vform) / 2;
6078   for (int i = lane_count - 1; i >= 0; i--) {
6079     dst.SetFloat(i + lane_count,
6080                  FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
6081   }
6082   return dst;
6083 }
6084 
6085 
6086 // Based on reference C function recip_sqrt_estimate from ARM ARM.
6087 double Simulator::recip_sqrt_estimate(double a) {
6088   int quot0, quot1, s;
6089   double r;
6090   if (a < 0.5) {
6091     quot0 = static_cast<int>(a * 512.0);
6092     r = 1.0 / sqrt((static_cast<double>(quot0) + 0.5) / 512.0);
6093   } else {
6094     quot1 = static_cast<int>(a * 256.0);
6095     r = 1.0 / sqrt((static_cast<double>(quot1) + 0.5) / 256.0);
6096   }
6097   s = static_cast<int>(256.0 * r + 0.5);
6098   return static_cast<double>(s) / 256.0;
6099 }
6100 
6101 
6102 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
6103   return ExtractUnsignedBitfield64(start_bit, end_bit, val);
6104 }
6105 
6106 
6107 template <typename T>
6108 T Simulator::FPRecipSqrtEstimate(T op) {
6109   if (IsNaN(op)) {
6110     return FPProcessNaN(op);
6111   } else if (op == 0.0) {
6112     if (copysign(1.0, op) < 0.0) {
6113       return kFP64NegativeInfinity;
6114     } else {
6115       return kFP64PositiveInfinity;
6116     }
6117   } else if (copysign(1.0, op) < 0.0) {
6118     FPProcessException();
6119     return FPDefaultNaN<T>();
6120   } else if (IsInf(op)) {
6121     return 0.0;
6122   } else {
6123     uint64_t fraction;
6124     int exp, result_exp;
6125 
6126     if (IsFloat16<T>()) {
6127       exp = Float16Exp(op);
6128       fraction = Float16Mantissa(op);
6129       fraction <<= 42;
6130     } else if (IsFloat32<T>()) {
6131       exp = FloatExp(op);
6132       fraction = FloatMantissa(op);
6133       fraction <<= 29;
6134     } else {
6135       VIXL_ASSERT(IsFloat64<T>());
6136       exp = DoubleExp(op);
6137       fraction = DoubleMantissa(op);
6138     }
6139 
6140     if (exp == 0) {
6141       while (Bits(fraction, 51, 51) == 0) {
6142         fraction = Bits(fraction, 50, 0) << 1;
6143         exp -= 1;
6144       }
6145       fraction = Bits(fraction, 50, 0) << 1;
6146     }
6147 
6148     double scaled;
6149     if (Bits(exp, 0, 0) == 0) {
6150       scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6151     } else {
6152       scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
6153     }
6154 
6155     if (IsFloat16<T>()) {
6156       result_exp = (44 - exp) / 2;
6157     } else if (IsFloat32<T>()) {
6158       result_exp = (380 - exp) / 2;
6159     } else {
6160       VIXL_ASSERT(IsFloat64<T>());
6161       result_exp = (3068 - exp) / 2;
6162     }
6163 
6164     uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
6165 
6166     if (IsFloat16<T>()) {
6167       uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6168       uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
6169       return Float16Pack(0, exp_bits, est_bits);
6170     } else if (IsFloat32<T>()) {
6171       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6172       uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
6173       return FloatPack(0, exp_bits, est_bits);
6174     } else {
6175       VIXL_ASSERT(IsFloat64<T>());
6176       return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
6177     }
6178   }
6179 }
6180 
6181 
6182 LogicVRegister Simulator::frsqrte(VectorFormat vform,
6183                                   LogicVRegister dst,
6184                                   const LogicVRegister& src) {
6185   dst.ClearForWrite(vform);
6186   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6187     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6188       SimFloat16 input = src.Float<SimFloat16>(i);
6189       dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));
6190     }
6191   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6192     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6193       float input = src.Float<float>(i);
6194       dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));
6195     }
6196   } else {
6197     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6198     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6199       double input = src.Float<double>(i);
6200       dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));
6201     }
6202   }
6203   return dst;
6204 }
6205 
6206 template <typename T>
6207 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
6208   uint32_t sign;
6209 
6210   if (IsFloat16<T>()) {
6211     sign = Float16Sign(op);
6212   } else if (IsFloat32<T>()) {
6213     sign = FloatSign(op);
6214   } else {
6215     VIXL_ASSERT(IsFloat64<T>());
6216     sign = DoubleSign(op);
6217   }
6218 
6219   if (IsNaN(op)) {
6220     return FPProcessNaN(op);
6221   } else if (IsInf(op)) {
6222     return (sign == 1) ? -0.0 : 0.0;
6223   } else if (op == 0.0) {
6224     FPProcessException();  // FPExc_DivideByZero exception.
6225     return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6226   } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
6227              (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
6228              (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
6229     bool overflow_to_inf = false;
6230     switch (rounding) {
6231       case FPTieEven:
6232         overflow_to_inf = true;
6233         break;
6234       case FPPositiveInfinity:
6235         overflow_to_inf = (sign == 0);
6236         break;
6237       case FPNegativeInfinity:
6238         overflow_to_inf = (sign == 1);
6239         break;
6240       case FPZero:
6241         overflow_to_inf = false;
6242         break;
6243       default:
6244         break;
6245     }
6246     FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
6247     if (overflow_to_inf) {
6248       return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6249     } else {
6250       // Return FPMaxNormal(sign).
6251       if (IsFloat16<T>()) {
6252         return Float16Pack(sign, 0x1f, 0x3ff);
6253       } else if (IsFloat32<T>()) {
6254         return FloatPack(sign, 0xfe, 0x07fffff);
6255       } else {
6256         VIXL_ASSERT(IsFloat64<T>());
6257         return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
6258       }
6259     }
6260   } else {
6261     uint64_t fraction;
6262     int exp, result_exp;
6263 
6264     if (IsFloat16<T>()) {
6265       sign = Float16Sign(op);
6266       exp = Float16Exp(op);
6267       fraction = Float16Mantissa(op);
6268       fraction <<= 42;
6269     } else if (IsFloat32<T>()) {
6270       sign = FloatSign(op);
6271       exp = FloatExp(op);
6272       fraction = FloatMantissa(op);
6273       fraction <<= 29;
6274     } else {
6275       VIXL_ASSERT(IsFloat64<T>());
6276       sign = DoubleSign(op);
6277       exp = DoubleExp(op);
6278       fraction = DoubleMantissa(op);
6279     }
6280 
6281     if (exp == 0) {
6282       if (Bits(fraction, 51, 51) == 0) {
6283         exp -= 1;
6284         fraction = Bits(fraction, 49, 0) << 2;
6285       } else {
6286         fraction = Bits(fraction, 50, 0) << 1;
6287       }
6288     }
6289 
6290     double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6291 
6292     if (IsFloat16<T>()) {
6293       result_exp = (29 - exp);  // In range 29-30 = -1 to 29+1 = 30.
6294     } else if (IsFloat32<T>()) {
6295       result_exp = (253 - exp);  // In range 253-254 = -1 to 253+1 = 254.
6296     } else {
6297       VIXL_ASSERT(IsFloat64<T>());
6298       result_exp = (2045 - exp);  // In range 2045-2046 = -1 to 2045+1 = 2046.
6299     }
6300 
6301     double estimate = recip_estimate(scaled);
6302 
6303     fraction = DoubleMantissa(estimate);
6304     if (result_exp == 0) {
6305       fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
6306     } else if (result_exp == -1) {
6307       fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
6308       result_exp = 0;
6309     }
6310     if (IsFloat16<T>()) {
6311       uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6312       uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
6313       return Float16Pack(sign, exp_bits, frac_bits);
6314     } else if (IsFloat32<T>()) {
6315       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6316       uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
6317       return FloatPack(sign, exp_bits, frac_bits);
6318     } else {
6319       VIXL_ASSERT(IsFloat64<T>());
6320       return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
6321     }
6322   }
6323 }
6324 
6325 
6326 LogicVRegister Simulator::frecpe(VectorFormat vform,
6327                                  LogicVRegister dst,
6328                                  const LogicVRegister& src,
6329                                  FPRounding round) {
6330   dst.ClearForWrite(vform);
6331   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6332     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6333       SimFloat16 input = src.Float<SimFloat16>(i);
6334       dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));
6335     }
6336   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6337     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6338       float input = src.Float<float>(i);
6339       dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));
6340     }
6341   } else {
6342     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6343     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6344       double input = src.Float<double>(i);
6345       dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));
6346     }
6347   }
6348   return dst;
6349 }
6350 
6351 
6352 LogicVRegister Simulator::ursqrte(VectorFormat vform,
6353                                   LogicVRegister dst,
6354                                   const LogicVRegister& src) {
6355   dst.ClearForWrite(vform);
6356   uint64_t operand;
6357   uint32_t result;
6358   double dp_operand, dp_result;
6359   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6360     operand = src.Uint(vform, i);
6361     if (operand <= 0x3FFFFFFF) {
6362       result = 0xFFFFFFFF;
6363     } else {
6364       dp_operand = operand * std::pow(2.0, -32);
6365       dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
6366       result = static_cast<uint32_t>(dp_result);
6367     }
6368     dst.SetUint(vform, i, result);
6369   }
6370   return dst;
6371 }
6372 
6373 
6374 // Based on reference C function recip_estimate from ARM ARM.
6375 double Simulator::recip_estimate(double a) {
6376   int q, s;
6377   double r;
6378   q = static_cast<int>(a * 512.0);
6379   r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
6380   s = static_cast<int>(256.0 * r + 0.5);
6381   return static_cast<double>(s) / 256.0;
6382 }
6383 
6384 
6385 LogicVRegister Simulator::urecpe(VectorFormat vform,
6386                                  LogicVRegister dst,
6387                                  const LogicVRegister& src) {
6388   dst.ClearForWrite(vform);
6389   uint64_t operand;
6390   uint32_t result;
6391   double dp_operand, dp_result;
6392   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6393     operand = src.Uint(vform, i);
6394     if (operand <= 0x7FFFFFFF) {
6395       result = 0xFFFFFFFF;
6396     } else {
6397       dp_operand = operand * std::pow(2.0, -32);
6398       dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
6399       result = static_cast<uint32_t>(dp_result);
6400     }
6401     dst.SetUint(vform, i, result);
6402   }
6403   return dst;
6404 }
6405 
6406 LogicPRegister Simulator::pfalse(LogicPRegister dst) {
6407   dst.Clear();
6408   return dst;
6409 }
6410 
6411 LogicPRegister Simulator::pfirst(LogicPRegister dst,
6412                                  const LogicPRegister& pg,
6413                                  const LogicPRegister& src) {
6414   int first_pg = GetFirstActive(kFormatVnB, pg);
6415   VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));
6416   mov(dst, src);
6417   if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);
6418   return dst;
6419 }
6420 
6421 LogicPRegister Simulator::ptrue(VectorFormat vform,
6422                                 LogicPRegister dst,
6423                                 int pattern) {
6424   int count = GetPredicateConstraintLaneCount(vform, pattern);
6425   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6426     dst.SetActive(vform, i, i < count);
6427   }
6428   return dst;
6429 }
6430 
6431 LogicPRegister Simulator::pnext(VectorFormat vform,
6432                                 LogicPRegister dst,
6433                                 const LogicPRegister& pg,
6434                                 const LogicPRegister& src) {
6435   int next = GetLastActive(vform, src) + 1;
6436   while (next < LaneCountFromFormat(vform)) {
6437     if (pg.IsActive(vform, next)) break;
6438     next++;
6439   }
6440 
6441   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6442     dst.SetActive(vform, i, (i == next));
6443   }
6444   return dst;
6445 }
6446 
6447 template <typename T>
6448 LogicVRegister Simulator::frecpx(VectorFormat vform,
6449                                  LogicVRegister dst,
6450                                  const LogicVRegister& src) {
6451   dst.ClearForWrite(vform);
6452   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6453     T op = src.Float<T>(i);
6454     T result;
6455     if (IsNaN(op)) {
6456       result = FPProcessNaN(op);
6457     } else {
6458       int exp;
6459       uint32_t sign;
6460       if (IsFloat16<T>()) {
6461         sign = Float16Sign(op);
6462         exp = Float16Exp(op);
6463         exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
6464         result = Float16Pack(sign, exp, 0);
6465       } else if (IsFloat32<T>()) {
6466         sign = FloatSign(op);
6467         exp = FloatExp(op);
6468         exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
6469         result = FloatPack(sign, exp, 0);
6470       } else {
6471         VIXL_ASSERT(IsFloat64<T>());
6472         sign = DoubleSign(op);
6473         exp = DoubleExp(op);
6474         exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
6475         result = DoublePack(sign, exp, 0);
6476       }
6477     }
6478     dst.SetFloat(i, result);
6479   }
6480   return dst;
6481 }
6482 
6483 
6484 LogicVRegister Simulator::frecpx(VectorFormat vform,
6485                                  LogicVRegister dst,
6486                                  const LogicVRegister& src) {
6487   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6488     frecpx<SimFloat16>(vform, dst, src);
6489   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6490     frecpx<float>(vform, dst, src);
6491   } else {
6492     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6493     frecpx<double>(vform, dst, src);
6494   }
6495   return dst;
6496 }
6497 
6498 LogicVRegister Simulator::flogb(VectorFormat vform,
6499                                 LogicVRegister dst,
6500                                 const LogicVRegister& src) {
6501   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6502     double op = 0.0;
6503     switch (vform) {
6504       case kFormatVnH:
6505         op = FPToDouble(src.Float<SimFloat16>(i), kIgnoreDefaultNaN);
6506         break;
6507       case kFormatVnS:
6508         op = src.Float<float>(i);
6509         break;
6510       case kFormatVnD:
6511         op = src.Float<double>(i);
6512         break;
6513       default:
6514         VIXL_UNREACHABLE();
6515     }
6516 
6517     switch (std::fpclassify(op)) {
6518       case FP_INFINITE:
6519         dst.SetInt(vform, i, MaxIntFromFormat(vform));
6520         break;
6521       case FP_NAN:
6522       case FP_ZERO:
6523         dst.SetInt(vform, i, MinIntFromFormat(vform));
6524         break;
6525       case FP_SUBNORMAL: {
6526         // DoubleMantissa returns the mantissa of its input, leaving 12 zero
6527         // bits where the sign and exponent would be. We subtract 12 to
6528         // find the number of leading zero bits in the mantissa itself.
6529         int64_t mant_zero_count = CountLeadingZeros(DoubleMantissa(op)) - 12;
6530         // Log2 of a subnormal is the lowest exponent a normal number can
6531         // represent, together with the zeros in the mantissa.
6532         dst.SetInt(vform, i, -1023 - mant_zero_count);
6533         break;
6534       }
6535       case FP_NORMAL:
6536         // Log2 of a normal number is the exponent minus the bias.
6537         dst.SetInt(vform, i, static_cast<int64_t>(DoubleExp(op)) - 1023);
6538         break;
6539     }
6540   }
6541   return dst;
6542 }
6543 
6544 LogicVRegister Simulator::ftsmul(VectorFormat vform,
6545                                  LogicVRegister dst,
6546                                  const LogicVRegister& src1,
6547                                  const LogicVRegister& src2) {
6548   SimVRegister maybe_neg_src1;
6549 
6550   // The bottom bit of src2 controls the sign of the result. Use it to
6551   // conditionally invert the sign of one `fmul` operand.
6552   shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);
6553   eor(vform, maybe_neg_src1, maybe_neg_src1, src1);
6554 
6555   // Multiply src1 by the modified neg_src1, which is potentially its negation.
6556   // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,
6557   // rather than neg_src1, must be the first source argument.
6558   fmul(vform, dst, src1, maybe_neg_src1);
6559 
6560   return dst;
6561 }
6562 
6563 LogicVRegister Simulator::ftssel(VectorFormat vform,
6564                                  LogicVRegister dst,
6565                                  const LogicVRegister& src1,
6566                                  const LogicVRegister& src2) {
6567   unsigned lane_bits = LaneSizeInBitsFromFormat(vform);
6568   uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);
6569   uint64_t one;
6570 
6571   if (lane_bits == kHRegSize) {
6572     one = Float16ToRawbits(Float16(1.0));
6573   } else if (lane_bits == kSRegSize) {
6574     one = FloatToRawbits(1.0);
6575   } else {
6576     VIXL_ASSERT(lane_bits == kDRegSize);
6577     one = DoubleToRawbits(1.0);
6578   }
6579 
6580   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6581     // Use integer accessors for this operation, as this is a data manipulation
6582     // task requiring no calculation.
6583     uint64_t op = src1.Uint(vform, i);
6584 
6585     // Only the bottom two bits of the src2 register are significant, indicating
6586     // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1
6587     // determines the sign of the value written to dst.
6588     uint64_t q = src2.Uint(vform, i);
6589     if ((q & 1) == 1) op = one;
6590     if ((q & 2) == 2) op ^= sign_bit;
6591 
6592     dst.SetUint(vform, i, op);
6593   }
6594 
6595   return dst;
6596 }
6597 
6598 template <typename T>
6599 LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,
6600                                        LogicVRegister dst,
6601                                        const LogicVRegister& src1,
6602                                        const LogicVRegister& src2,
6603                                        uint64_t coeff_pos,
6604                                        uint64_t coeff_neg) {
6605   SimVRegister zero;
6606   dup_immediate(kFormatVnB, zero, 0);
6607 
6608   SimVRegister cf;
6609   SimVRegister cfn;
6610   dup_immediate(vform, cf, coeff_pos);
6611   dup_immediate(vform, cfn, coeff_neg);
6612 
6613   // The specification requires testing the top bit of the raw value, rather
6614   // than the sign of the floating point number, so use an integer comparison
6615   // here.
6616   SimPRegister is_neg;
6617   SVEIntCompareVectorsHelper(lt,
6618                              vform,
6619                              is_neg,
6620                              GetPTrue(),
6621                              src2,
6622                              zero,
6623                              false,
6624                              LeaveFlags);
6625   mov_merging(vform, cf, is_neg, cfn);
6626 
6627   SimVRegister temp;
6628   fabs_<T>(vform, temp, src2);
6629   fmla<T>(vform, cf, cf, src1, temp);
6630   mov(vform, dst, cf);
6631   return dst;
6632 }
6633 
6634 
6635 LogicVRegister Simulator::ftmad(VectorFormat vform,
6636                                 LogicVRegister dst,
6637                                 const LogicVRegister& src1,
6638                                 const LogicVRegister& src2,
6639                                 unsigned index) {
6640   static const uint64_t ftmad_coeff16[] = {0x3c00,
6641                                            0xb155,
6642                                            0x2030,
6643                                            0x0000,
6644                                            0x0000,
6645                                            0x0000,
6646                                            0x0000,
6647                                            0x0000,
6648                                            0x3c00,
6649                                            0xb800,
6650                                            0x293a,
6651                                            0x0000,
6652                                            0x0000,
6653                                            0x0000,
6654                                            0x0000,
6655                                            0x0000};
6656 
6657   static const uint64_t ftmad_coeff32[] = {0x3f800000,
6658                                            0xbe2aaaab,
6659                                            0x3c088886,
6660                                            0xb95008b9,
6661                                            0x36369d6d,
6662                                            0x00000000,
6663                                            0x00000000,
6664                                            0x00000000,
6665                                            0x3f800000,
6666                                            0xbf000000,
6667                                            0x3d2aaaa6,
6668                                            0xbab60705,
6669                                            0x37cd37cc,
6670                                            0x00000000,
6671                                            0x00000000,
6672                                            0x00000000};
6673 
6674   static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,
6675                                            0xbfc5555555555543,
6676                                            0x3f8111111110f30c,
6677                                            0xbf2a01a019b92fc6,
6678                                            0x3ec71de351f3d22b,
6679                                            0xbe5ae5e2b60f7b91,
6680                                            0x3de5d8408868552f,
6681                                            0x0000000000000000,
6682                                            0x3ff0000000000000,
6683                                            0xbfe0000000000000,
6684                                            0x3fa5555555555536,
6685                                            0xbf56c16c16c13a0b,
6686                                            0x3efa01a019b1e8d8,
6687                                            0xbe927e4f7282f468,
6688                                            0x3e21ee96d2641b13,
6689                                            0xbda8f76380fbb401};
6690   VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));
6691   VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));
6692   VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));
6693 
6694   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6695     FTMaddHelper<SimFloat16>(vform,
6696                              dst,
6697                              src1,
6698                              src2,
6699                              ftmad_coeff16[index],
6700                              ftmad_coeff16[index + 8]);
6701   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6702     FTMaddHelper<float>(vform,
6703                         dst,
6704                         src1,
6705                         src2,
6706                         ftmad_coeff32[index],
6707                         ftmad_coeff32[index + 8]);
6708   } else {
6709     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6710     FTMaddHelper<double>(vform,
6711                          dst,
6712                          src1,
6713                          src2,
6714                          ftmad_coeff64[index],
6715                          ftmad_coeff64[index + 8]);
6716   }
6717   return dst;
6718 }
6719 
6720 LogicVRegister Simulator::fexpa(VectorFormat vform,
6721                                 LogicVRegister dst,
6722                                 const LogicVRegister& src) {
6723   static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,
6724                                            0x005d, 0x0075, 0x008e, 0x00a8,
6725                                            0x00c2, 0x00dc, 0x00f8, 0x0114,
6726                                            0x0130, 0x014d, 0x016b, 0x0189,
6727                                            0x01a8, 0x01c8, 0x01e8, 0x0209,
6728                                            0x022b, 0x024e, 0x0271, 0x0295,
6729                                            0x02ba, 0x02e0, 0x0306, 0x032e,
6730                                            0x0356, 0x037f, 0x03a9, 0x03d4};
6731 
6732   static const uint64_t fexpa_coeff32[] =
6733       {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
6734        0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
6735        0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
6736        0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
6737        0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
6738        0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
6739        0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
6740        0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
6741        0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
6742        0x7d3e0c};
6743 
6744   static const uint64_t fexpa_coeff64[] =
6745       {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,
6746        0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,
6747        0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,
6748        0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,
6749        0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,
6750        0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,
6751        0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,
6752        0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,
6753        0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,
6754        0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,
6755        0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,
6756        0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,
6757        0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,
6758        0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,
6759        0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,
6760        0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};
6761 
6762   unsigned lane_size = LaneSizeInBitsFromFormat(vform);
6763   int index_highbit = 5;
6764   int op_highbit, op_shift;
6765   const uint64_t* fexpa_coeff;
6766 
6767   if (lane_size == kHRegSize) {
6768     index_highbit = 4;
6769     VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1)));
6770     fexpa_coeff = fexpa_coeff16;
6771     op_highbit = 9;
6772     op_shift = 10;
6773   } else if (lane_size == kSRegSize) {
6774     VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1)));
6775     fexpa_coeff = fexpa_coeff32;
6776     op_highbit = 13;
6777     op_shift = 23;
6778   } else {
6779     VIXL_ASSERT(lane_size == kDRegSize);
6780     VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1)));
6781     fexpa_coeff = fexpa_coeff64;
6782     op_highbit = 16;
6783     op_shift = 52;
6784   }
6785 
6786   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6787     uint64_t op = src.Uint(vform, i);
6788     uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];
6789     result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);
6790     dst.SetUint(vform, i, result);
6791   }
6792   return dst;
6793 }
6794 
6795 template <typename T>
6796 LogicVRegister Simulator::fscale(VectorFormat vform,
6797                                  LogicVRegister dst,
6798                                  const LogicVRegister& src1,
6799                                  const LogicVRegister& src2) {
6800   T two = T(2.0);
6801   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6802     T src1_val = src1.Float<T>(i);
6803     if (!IsNaN(src1_val)) {
6804       int64_t scale = src2.Int(vform, i);
6805       // TODO: this is a low-performance implementation, but it's simple and
6806       // less likely to be buggy. Consider replacing it with something faster.
6807 
6808       // Scales outside of these bounds become infinity or zero, so there's no
6809       // point iterating further.
6810       scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
6811 
6812       // Compute src1_val * 2 ^ scale. If scale is positive, multiply by two and
6813       // decrement scale until it's zero.
6814       while (scale-- > 0) {
6815         src1_val = FPMul(src1_val, two);
6816       }
6817 
6818       // If scale is negative, divide by two and increment scale until it's
6819       // zero. Initially, scale is (src2 - 1), so we pre-increment.
6820       while (++scale < 0) {
6821         src1_val = FPDiv(src1_val, two);
6822       }
6823     }
6824     dst.SetFloat<T>(i, src1_val);
6825   }
6826   return dst;
6827 }
6828 
6829 LogicVRegister Simulator::fscale(VectorFormat vform,
6830                                  LogicVRegister dst,
6831                                  const LogicVRegister& src1,
6832                                  const LogicVRegister& src2) {
6833   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6834     fscale<SimFloat16>(vform, dst, src1, src2);
6835   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6836     fscale<float>(vform, dst, src1, src2);
6837   } else {
6838     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6839     fscale<double>(vform, dst, src1, src2);
6840   }
6841   return dst;
6842 }
6843 
6844 LogicVRegister Simulator::scvtf(VectorFormat vform,
6845                                 unsigned dst_data_size_in_bits,
6846                                 unsigned src_data_size_in_bits,
6847                                 LogicVRegister dst,
6848                                 const LogicPRegister& pg,
6849                                 const LogicVRegister& src,
6850                                 FPRounding round,
6851                                 int fbits) {
6852   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6853   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6854   dst.ClearForWrite(vform);
6855 
6856   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6857     if (!pg.IsActive(vform, i)) continue;
6858 
6859     int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,
6860                                             0,
6861                                             src.Uint(vform, i));
6862 
6863     switch (dst_data_size_in_bits) {
6864       case kHRegSize: {
6865         SimFloat16 result = FixedToFloat16(value, fbits, round);
6866         dst.SetUint(vform, i, Float16ToRawbits(result));
6867         break;
6868       }
6869       case kSRegSize: {
6870         float result = FixedToFloat(value, fbits, round);
6871         dst.SetUint(vform, i, FloatToRawbits(result));
6872         break;
6873       }
6874       case kDRegSize: {
6875         double result = FixedToDouble(value, fbits, round);
6876         dst.SetUint(vform, i, DoubleToRawbits(result));
6877         break;
6878       }
6879       default:
6880         VIXL_UNIMPLEMENTED();
6881         break;
6882     }
6883   }
6884 
6885   return dst;
6886 }
6887 
6888 LogicVRegister Simulator::scvtf(VectorFormat vform,
6889                                 LogicVRegister dst,
6890                                 const LogicVRegister& src,
6891                                 int fbits,
6892                                 FPRounding round) {
6893   return scvtf(vform,
6894                LaneSizeInBitsFromFormat(vform),
6895                LaneSizeInBitsFromFormat(vform),
6896                dst,
6897                GetPTrue(),
6898                src,
6899                round,
6900                fbits);
6901 }
6902 
6903 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6904                                 unsigned dst_data_size_in_bits,
6905                                 unsigned src_data_size_in_bits,
6906                                 LogicVRegister dst,
6907                                 const LogicPRegister& pg,
6908                                 const LogicVRegister& src,
6909                                 FPRounding round,
6910                                 int fbits) {
6911   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6912   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6913   dst.ClearForWrite(vform);
6914 
6915   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6916     if (!pg.IsActive(vform, i)) continue;
6917 
6918     uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
6919                                                0,
6920                                                src.Uint(vform, i));
6921 
6922     switch (dst_data_size_in_bits) {
6923       case kHRegSize: {
6924         SimFloat16 result = UFixedToFloat16(value, fbits, round);
6925         dst.SetUint(vform, i, Float16ToRawbits(result));
6926         break;
6927       }
6928       case kSRegSize: {
6929         float result = UFixedToFloat(value, fbits, round);
6930         dst.SetUint(vform, i, FloatToRawbits(result));
6931         break;
6932       }
6933       case kDRegSize: {
6934         double result = UFixedToDouble(value, fbits, round);
6935         dst.SetUint(vform, i, DoubleToRawbits(result));
6936         break;
6937       }
6938       default:
6939         VIXL_UNIMPLEMENTED();
6940         break;
6941     }
6942   }
6943 
6944   return dst;
6945 }
6946 
6947 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6948                                 LogicVRegister dst,
6949                                 const LogicVRegister& src,
6950                                 int fbits,
6951                                 FPRounding round) {
6952   return ucvtf(vform,
6953                LaneSizeInBitsFromFormat(vform),
6954                LaneSizeInBitsFromFormat(vform),
6955                dst,
6956                GetPTrue(),
6957                src,
6958                round,
6959                fbits);
6960 }
6961 
6962 LogicVRegister Simulator::unpk(VectorFormat vform,
6963                                LogicVRegister dst,
6964                                const LogicVRegister& src,
6965                                UnpackType unpack_type,
6966                                ExtendType extend_type) {
6967   VectorFormat vform_half = VectorFormatHalfWidth(vform);
6968   const int lane_count = LaneCountFromFormat(vform);
6969   const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;
6970 
6971   switch (extend_type) {
6972     case kSignedExtend: {
6973       int64_t result[kZRegMaxSizeInBytes];
6974       for (int i = 0; i < lane_count; ++i) {
6975         result[i] = src.Int(vform_half, i + src_start_lane);
6976       }
6977       for (int i = 0; i < lane_count; ++i) {
6978         dst.SetInt(vform, i, result[i]);
6979       }
6980       break;
6981     }
6982     case kUnsignedExtend: {
6983       uint64_t result[kZRegMaxSizeInBytes];
6984       for (int i = 0; i < lane_count; ++i) {
6985         result[i] = src.Uint(vform_half, i + src_start_lane);
6986       }
6987       for (int i = 0; i < lane_count; ++i) {
6988         dst.SetUint(vform, i, result[i]);
6989       }
6990       break;
6991     }
6992     default:
6993       VIXL_UNREACHABLE();
6994   }
6995   return dst;
6996 }
6997 
6998 LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,
6999                                                      VectorFormat vform,
7000                                                      LogicPRegister dst,
7001                                                      const LogicPRegister& mask,
7002                                                      const LogicVRegister& src1,
7003                                                      const LogicVRegister& src2,
7004                                                      bool is_wide_elements,
7005                                                      FlagsUpdate flags) {
7006   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7007     bool result = false;
7008     if (mask.IsActive(vform, lane)) {
7009       int64_t op1 = 0xbadbeef;
7010       int64_t op2 = 0xbadbeef;
7011       int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;
7012       switch (cond) {
7013         case eq:
7014         case ge:
7015         case gt:
7016         case lt:
7017         case le:
7018         case ne:
7019           op1 = src1.Int(vform, lane);
7020           op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)
7021                                  : src2.Int(vform, lane);
7022           break;
7023         case hi:
7024         case hs:
7025         case ls:
7026         case lo:
7027           op1 = src1.Uint(vform, lane);
7028           op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)
7029                                  : src2.Uint(vform, lane);
7030           break;
7031         default:
7032           VIXL_UNREACHABLE();
7033       }
7034 
7035       switch (cond) {
7036         case eq:
7037           result = (op1 == op2);
7038           break;
7039         case ne:
7040           result = (op1 != op2);
7041           break;
7042         case ge:
7043           result = (op1 >= op2);
7044           break;
7045         case gt:
7046           result = (op1 > op2);
7047           break;
7048         case le:
7049           result = (op1 <= op2);
7050           break;
7051         case lt:
7052           result = (op1 < op2);
7053           break;
7054         case hs:
7055           result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));
7056           break;
7057         case hi:
7058           result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));
7059           break;
7060         case ls:
7061           result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));
7062           break;
7063         case lo:
7064           result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));
7065           break;
7066         default:
7067           VIXL_UNREACHABLE();
7068       }
7069     }
7070     dst.SetActive(vform, lane, result);
7071   }
7072 
7073   if (flags == SetFlags) PredTest(vform, mask, dst);
7074 
7075   return dst;
7076 }
7077 
7078 LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,
7079                                                 VectorFormat vform,
7080                                                 LogicVRegister dst,
7081                                                 const LogicVRegister& src1,
7082                                                 const LogicVRegister& src2,
7083                                                 bool is_wide_elements) {
7084   unsigned lane_size = LaneSizeInBitsFromFormat(vform);
7085   VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;
7086 
7087   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7088     int shift_src_lane = lane;
7089     if (is_wide_elements) {
7090       // If the shift amount comes from wide elements, select the D-sized lane
7091       // which occupies the corresponding lanes of the value to be shifted.
7092       shift_src_lane = (lane * lane_size) / kDRegSize;
7093     }
7094     uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);
7095 
7096     // Saturate shift_amount to the size of the lane that will be shifted.
7097     if (shift_amount > lane_size) shift_amount = lane_size;
7098 
7099     uint64_t value = src1.Uint(vform, lane);
7100     int64_t result = ShiftOperand(lane_size,
7101                                   value,
7102                                   shift_op,
7103                                   static_cast<unsigned>(shift_amount));
7104     dst.SetUint(vform, lane, result);
7105   }
7106 
7107   return dst;
7108 }
7109 
7110 LogicVRegister Simulator::asrd(VectorFormat vform,
7111                                LogicVRegister dst,
7112                                const LogicVRegister& src1,
7113                                int shift) {
7114   VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=
7115                               LaneSizeInBitsFromFormat(vform)));
7116 
7117   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7118     int64_t value = src1.Int(vform, i);
7119     if (shift <= 63) {
7120       if (value < 0) {
7121         // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely
7122         // cast to int64_t, and cannot cause signed overflow in the result.
7123         value = value + GetUintMask(shift);
7124       }
7125       value = ShiftOperand(kDRegSize, value, ASR, shift);
7126     } else {
7127       value = 0;
7128     }
7129     dst.SetInt(vform, i, value);
7130   }
7131   return dst;
7132 }
7133 
7134 LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
7135     LogicalOp logical_op,
7136     VectorFormat vform,
7137     LogicVRegister zd,
7138     const LogicVRegister& zn,
7139     const LogicVRegister& zm) {
7140   VIXL_ASSERT(IsSVEFormat(vform));
7141   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7142     uint64_t op1 = zn.Uint(vform, i);
7143     uint64_t op2 = zm.Uint(vform, i);
7144     uint64_t result = 0;
7145     switch (logical_op) {
7146       case AND:
7147         result = op1 & op2;
7148         break;
7149       case BIC:
7150         result = op1 & ~op2;
7151         break;
7152       case EOR:
7153         result = op1 ^ op2;
7154         break;
7155       case ORR:
7156         result = op1 | op2;
7157         break;
7158       default:
7159         VIXL_UNIMPLEMENTED();
7160     }
7161     zd.SetUint(vform, i, result);
7162   }
7163 
7164   return zd;
7165 }
7166 
7167 LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
7168                                                     LogicPRegister pd,
7169                                                     const LogicPRegister& pn,
7170                                                     const LogicPRegister& pm) {
7171   for (int i = 0; i < pn.GetChunkCount(); i++) {
7172     LogicPRegister::ChunkType op1 = pn.GetChunk(i);
7173     LogicPRegister::ChunkType op2 = pm.GetChunk(i);
7174     LogicPRegister::ChunkType result = 0;
7175     switch (op) {
7176       case ANDS_p_p_pp_z:
7177       case AND_p_p_pp_z:
7178         result = op1 & op2;
7179         break;
7180       case BICS_p_p_pp_z:
7181       case BIC_p_p_pp_z:
7182         result = op1 & ~op2;
7183         break;
7184       case EORS_p_p_pp_z:
7185       case EOR_p_p_pp_z:
7186         result = op1 ^ op2;
7187         break;
7188       case NANDS_p_p_pp_z:
7189       case NAND_p_p_pp_z:
7190         result = ~(op1 & op2);
7191         break;
7192       case NORS_p_p_pp_z:
7193       case NOR_p_p_pp_z:
7194         result = ~(op1 | op2);
7195         break;
7196       case ORNS_p_p_pp_z:
7197       case ORN_p_p_pp_z:
7198         result = op1 | ~op2;
7199         break;
7200       case ORRS_p_p_pp_z:
7201       case ORR_p_p_pp_z:
7202         result = op1 | op2;
7203         break;
7204       default:
7205         VIXL_UNIMPLEMENTED();
7206     }
7207     pd.SetChunk(i, result);
7208   }
7209   return pd;
7210 }
7211 
7212 LogicVRegister Simulator::SVEBitwiseImmHelper(
7213     SVEBitwiseLogicalWithImm_UnpredicatedOp op,
7214     VectorFormat vform,
7215     LogicVRegister zd,
7216     uint64_t imm) {
7217   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7218     uint64_t op1 = zd.Uint(vform, i);
7219     uint64_t result = 0;
7220     switch (op) {
7221       case AND_z_zi:
7222         result = op1 & imm;
7223         break;
7224       case EOR_z_zi:
7225         result = op1 ^ imm;
7226         break;
7227       case ORR_z_zi:
7228         result = op1 | imm;
7229         break;
7230       default:
7231         VIXL_UNIMPLEMENTED();
7232     }
7233     zd.SetUint(vform, i, result);
7234   }
7235 
7236   return zd;
7237 }
7238 
7239 void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
7240                                          const LogicPRegister& pg,
7241                                          unsigned zt_code,
7242                                          const LogicSVEAddressVector& addr) {
7243   VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7244 
7245   int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7246   int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7247   int msize_in_bytes = addr.GetMsizeInBytes();
7248   int reg_count = addr.GetRegCount();
7249 
7250   VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7251   VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7252 
7253   unsigned zt_codes[4] = {zt_code,
7254                           (zt_code + 1) % kNumberOfZRegisters,
7255                           (zt_code + 2) % kNumberOfZRegisters,
7256                           (zt_code + 3) % kNumberOfZRegisters};
7257 
7258   LogicVRegister zt[4] = {
7259       ReadVRegister(zt_codes[0]),
7260       ReadVRegister(zt_codes[1]),
7261       ReadVRegister(zt_codes[2]),
7262       ReadVRegister(zt_codes[3]),
7263   };
7264 
7265   // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes
7266   // are ignored, so read the source register using the VectorFormat that
7267   // corresponds with the storage format, and multiply the index accordingly.
7268   VectorFormat unpack_vform =
7269       SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
7270   int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;
7271 
7272   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7273     if (!pg.IsActive(vform, i)) continue;
7274 
7275     for (int r = 0; r < reg_count; r++) {
7276       uint64_t element_address = addr.GetElementAddress(i, r);
7277       StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address);
7278     }
7279   }
7280 
7281   if (ShouldTraceWrites()) {
7282     PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7283     if (esize_in_bytes_log2 == msize_in_bytes_log2) {
7284       // Use an FP format where it's likely that we're accessing FP data.
7285       format = GetPrintRegisterFormatTryFP(format);
7286     }
7287     // Stores don't represent a change to the source register's value, so only
7288     // print the relevant part of the value.
7289     format = GetPrintRegPartial(format);
7290 
7291     PrintZStructAccess(zt_code,
7292                        reg_count,
7293                        pg,
7294                        format,
7295                        msize_in_bytes,
7296                        "->",
7297                        addr);
7298   }
7299 }
7300 
7301 void Simulator::SVEStructuredLoadHelper(VectorFormat vform,
7302                                         const LogicPRegister& pg,
7303                                         unsigned zt_code,
7304                                         const LogicSVEAddressVector& addr,
7305                                         bool is_signed) {
7306   int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7307   int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7308   int msize_in_bytes = addr.GetMsizeInBytes();
7309   int reg_count = addr.GetRegCount();
7310 
7311   VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7312   VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7313   VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7314 
7315   unsigned zt_codes[4] = {zt_code,
7316                           (zt_code + 1) % kNumberOfZRegisters,
7317                           (zt_code + 2) % kNumberOfZRegisters,
7318                           (zt_code + 3) % kNumberOfZRegisters};
7319   LogicVRegister zt[4] = {
7320       ReadVRegister(zt_codes[0]),
7321       ReadVRegister(zt_codes[1]),
7322       ReadVRegister(zt_codes[2]),
7323       ReadVRegister(zt_codes[3]),
7324   };
7325 
7326   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7327     for (int r = 0; r < reg_count; r++) {
7328       uint64_t element_address = addr.GetElementAddress(i, r);
7329 
7330       if (!pg.IsActive(vform, i)) {
7331         zt[r].SetUint(vform, i, 0);
7332         continue;
7333       }
7334 
7335       if (is_signed) {
7336         LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address);
7337       } else {
7338         LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address);
7339       }
7340     }
7341   }
7342 
7343   if (ShouldTraceVRegs()) {
7344     PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7345     if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {
7346       // Use an FP format where it's likely that we're accessing FP data.
7347       format = GetPrintRegisterFormatTryFP(format);
7348     }
7349     PrintZStructAccess(zt_code,
7350                        reg_count,
7351                        pg,
7352                        format,
7353                        msize_in_bytes,
7354                        "<-",
7355                        addr);
7356   }
7357 }
7358 
7359 LogicPRegister Simulator::brka(LogicPRegister pd,
7360                                const LogicPRegister& pg,
7361                                const LogicPRegister& pn) {
7362   bool break_ = false;
7363   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7364     if (pg.IsActive(kFormatVnB, i)) {
7365       pd.SetActive(kFormatVnB, i, !break_);
7366       break_ |= pn.IsActive(kFormatVnB, i);
7367     }
7368   }
7369 
7370   return pd;
7371 }
7372 
7373 LogicPRegister Simulator::brkb(LogicPRegister pd,
7374                                const LogicPRegister& pg,
7375                                const LogicPRegister& pn) {
7376   bool break_ = false;
7377   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7378     if (pg.IsActive(kFormatVnB, i)) {
7379       break_ |= pn.IsActive(kFormatVnB, i);
7380       pd.SetActive(kFormatVnB, i, !break_);
7381     }
7382   }
7383 
7384   return pd;
7385 }
7386 
7387 LogicPRegister Simulator::brkn(LogicPRegister pdm,
7388                                const LogicPRegister& pg,
7389                                const LogicPRegister& pn) {
7390   if (!IsLastActive(kFormatVnB, pg, pn)) {
7391     pfalse(pdm);
7392   }
7393   return pdm;
7394 }
7395 
7396 LogicPRegister Simulator::brkpa(LogicPRegister pd,
7397                                 const LogicPRegister& pg,
7398                                 const LogicPRegister& pn,
7399                                 const LogicPRegister& pm) {
7400   bool last_active = IsLastActive(kFormatVnB, pg, pn);
7401 
7402   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7403     bool active = false;
7404     if (pg.IsActive(kFormatVnB, i)) {
7405       active = last_active;
7406       last_active = last_active && !pm.IsActive(kFormatVnB, i);
7407     }
7408     pd.SetActive(kFormatVnB, i, active);
7409   }
7410 
7411   return pd;
7412 }
7413 
7414 LogicPRegister Simulator::brkpb(LogicPRegister pd,
7415                                 const LogicPRegister& pg,
7416                                 const LogicPRegister& pn,
7417                                 const LogicPRegister& pm) {
7418   bool last_active = IsLastActive(kFormatVnB, pg, pn);
7419 
7420   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7421     bool active = false;
7422     if (pg.IsActive(kFormatVnB, i)) {
7423       last_active = last_active && !pm.IsActive(kFormatVnB, i);
7424       active = last_active;
7425     }
7426     pd.SetActive(kFormatVnB, i, active);
7427   }
7428 
7429   return pd;
7430 }
7431 
7432 void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,
7433                                            const LogicPRegister& pg,
7434                                            unsigned zt_code,
7435                                            const LogicSVEAddressVector& addr,
7436                                            SVEFaultTolerantLoadType type,
7437                                            bool is_signed) {
7438   int esize_in_bytes = LaneSizeInBytesFromFormat(vform);
7439   int msize_in_bits = addr.GetMsizeInBits();
7440   int msize_in_bytes = addr.GetMsizeInBytes();
7441 
7442   VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7443   VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);
7444   VIXL_ASSERT(addr.GetRegCount() == 1);
7445 
7446   LogicVRegister zt = ReadVRegister(zt_code);
7447   LogicPRegister ffr = ReadFFR();
7448 
7449   // Non-faulting loads are allowed to fail arbitrarily. To stress user
7450   // code, fail a random element in roughly one in eight full-vector loads.
7451   uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_));
7452   int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
7453 
7454   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7455     uint64_t value = 0;
7456 
7457     if (pg.IsActive(vform, i)) {
7458       uint64_t element_address = addr.GetElementAddress(i, 0);
7459 
7460       if (type == kSVEFirstFaultLoad) {
7461         // First-faulting loads always load the first active element, regardless
7462         // of FFR. The result will be discarded if its FFR lane is inactive, but
7463         // it could still generate a fault.
7464         value = MemReadUint(msize_in_bytes, element_address);
7465         // All subsequent elements have non-fault semantics.
7466         type = kSVENonFaultLoad;
7467 
7468       } else if (ffr.IsActive(vform, i)) {
7469         // Simulation of fault-tolerant loads relies on system calls, and is
7470         // likely to be relatively slow, so we only actually perform the load if
7471         // its FFR lane is active.
7472 
7473         bool can_read = (i < fake_fault_at_lane) &&
7474                         CanReadMemory(element_address, msize_in_bytes);
7475         if (can_read) {
7476           value = MemReadUint(msize_in_bytes, element_address);
7477         } else {
7478           // Propagate the fault to the end of FFR.
7479           for (int j = i; j < LaneCountFromFormat(vform); j++) {
7480             ffr.SetActive(vform, j, false);
7481           }
7482         }
7483       }
7484     }
7485 
7486     // The architecture permits a few possible results for inactive FFR lanes
7487     // (including those caused by a fault in this instruction). We choose to
7488     // leave the register value unchanged (like merging predication) because
7489     // no other input to this instruction can have the same behaviour.
7490     //
7491     // Note that this behaviour takes precedence over pg's zeroing predication.
7492 
7493     if (ffr.IsActive(vform, i)) {
7494       int msb = msize_in_bits - 1;
7495       if (is_signed) {
7496         zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));
7497       } else {
7498         zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));
7499       }
7500     }
7501   }
7502 
7503   if (ShouldTraceVRegs()) {
7504     PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7505     if ((esize_in_bytes == msize_in_bytes) && !is_signed) {
7506       // Use an FP format where it's likely that we're accessing FP data.
7507       format = GetPrintRegisterFormatTryFP(format);
7508     }
7509     // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess
7510     // expects a single mask, so combine the two predicates.
7511     SimPRegister mask;
7512     SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);
7513     PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);
7514   }
7515 }
7516 
7517 void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
7518                                                     VectorFormat vform,
7519                                                     SVEOffsetModifier mod) {
7520   bool is_signed = instr->ExtractBit(14) == 0;
7521   bool is_ff = instr->ExtractBit(13) == 1;
7522   // Note that these instructions don't use the Dtype encoding.
7523   int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
7524   int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
7525   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
7526   LogicSVEAddressVector addr(base,
7527                              &ReadVRegister(instr->GetRm()),
7528                              vform,
7529                              mod,
7530                              scale);
7531   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
7532   if (is_ff) {
7533     SVEFaultTolerantLoadHelper(vform,
7534                                ReadPRegister(instr->GetPgLow8()),
7535                                instr->GetRt(),
7536                                addr,
7537                                kSVEFirstFaultLoad,
7538                                is_signed);
7539   } else {
7540     SVEStructuredLoadHelper(vform,
7541                             ReadPRegister(instr->GetPgLow8()),
7542                             instr->GetRt(),
7543                             addr,
7544                             is_signed);
7545   }
7546 }
7547 
7548 int Simulator::GetFirstActive(VectorFormat vform,
7549                               const LogicPRegister& pg) const {
7550   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7551     if (pg.IsActive(vform, i)) return i;
7552   }
7553   return -1;
7554 }
7555 
7556 int Simulator::GetLastActive(VectorFormat vform,
7557                              const LogicPRegister& pg) const {
7558   for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
7559     if (pg.IsActive(vform, i)) return i;
7560   }
7561   return -1;
7562 }
7563 
7564 int Simulator::CountActiveLanes(VectorFormat vform,
7565                                 const LogicPRegister& pg) const {
7566   int count = 0;
7567   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7568     count += pg.IsActive(vform, i) ? 1 : 0;
7569   }
7570   return count;
7571 }
7572 
7573 int Simulator::CountActiveAndTrueLanes(VectorFormat vform,
7574                                        const LogicPRegister& pg,
7575                                        const LogicPRegister& pn) const {
7576   int count = 0;
7577   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7578     count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;
7579   }
7580   return count;
7581 }
7582 
7583 int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
7584                                                int pattern) const {
7585   VIXL_ASSERT(IsSVEFormat(vform));
7586   int all = LaneCountFromFormat(vform);
7587   VIXL_ASSERT(all > 0);
7588 
7589   switch (pattern) {
7590     case SVE_VL1:
7591     case SVE_VL2:
7592     case SVE_VL3:
7593     case SVE_VL4:
7594     case SVE_VL5:
7595     case SVE_VL6:
7596     case SVE_VL7:
7597     case SVE_VL8:
7598       // VL1-VL8 are encoded directly.
7599       VIXL_STATIC_ASSERT(SVE_VL1 == 1);
7600       VIXL_STATIC_ASSERT(SVE_VL8 == 8);
7601       return (pattern <= all) ? pattern : 0;
7602     case SVE_VL16:
7603     case SVE_VL32:
7604     case SVE_VL64:
7605     case SVE_VL128:
7606     case SVE_VL256: {
7607       // VL16-VL256 are encoded as log2(N) + c.
7608       int min = 16 << (pattern - SVE_VL16);
7609       return (min <= all) ? min : 0;
7610     }
7611     // Special cases.
7612     case SVE_POW2:
7613       return 1 << HighestSetBitPosition(all);
7614     case SVE_MUL4:
7615       return all - (all % 4);
7616     case SVE_MUL3:
7617       return all - (all % 3);
7618     case SVE_ALL:
7619       return all;
7620   }
7621   // Unnamed cases architecturally return 0.
7622   return 0;
7623 }
7624 
7625 LogicPRegister Simulator::match(VectorFormat vform,
7626                                 LogicPRegister dst,
7627                                 const LogicVRegister& haystack,
7628                                 const LogicVRegister& needles,
7629                                 bool negate_match) {
7630   SimVRegister ztemp;
7631   SimPRegister ptemp;
7632 
7633   pfalse(dst);
7634   int lanes_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
7635   for (int i = 0; i < lanes_per_segment; i++) {
7636     dup_elements_to_segments(vform, ztemp, needles, i);
7637     SVEIntCompareVectorsHelper(eq,
7638                                vform,
7639                                ptemp,
7640                                GetPTrue(),
7641                                haystack,
7642                                ztemp,
7643                                false,
7644                                LeaveFlags);
7645     SVEPredicateLogicalHelper(ORR_p_p_pp_z, dst, dst, ptemp);
7646   }
7647   if (negate_match) {
7648     ptrue(vform, ptemp, SVE_ALL);
7649     SVEPredicateLogicalHelper(EOR_p_p_pp_z, dst, dst, ptemp);
7650   }
7651   return dst;
7652 }
7653 
7654 uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
7655   if (IsContiguous()) {
7656     return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
7657   }
7658 
7659   VIXL_ASSERT(IsScatterGather());
7660   VIXL_ASSERT(vector_ != NULL);
7661 
7662   // For scatter-gather accesses, we need to extract the offset from vector_,
7663   // and apply modifiers.
7664 
7665   uint64_t offset = 0;
7666   switch (vector_form_) {
7667     case kFormatVnS:
7668       offset = vector_->GetLane<uint32_t>(lane);
7669       break;
7670     case kFormatVnD:
7671       offset = vector_->GetLane<uint64_t>(lane);
7672       break;
7673     default:
7674       VIXL_UNIMPLEMENTED();
7675       break;
7676   }
7677 
7678   switch (vector_mod_) {
7679     case SVE_MUL_VL:
7680       VIXL_UNIMPLEMENTED();
7681       break;
7682     case SVE_LSL:
7683       // We apply the shift below. There's nothing to do here.
7684       break;
7685     case NO_SVE_OFFSET_MODIFIER:
7686       VIXL_ASSERT(vector_shift_ == 0);
7687       break;
7688     case SVE_UXTW:
7689       offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);
7690       break;
7691     case SVE_SXTW:
7692       offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);
7693       break;
7694   }
7695 
7696   return base_ + (offset << vector_shift_);
7697 }
7698 
7699 LogicVRegister Simulator::pack_odd_elements(VectorFormat vform,
7700                                             LogicVRegister dst,
7701                                             const LogicVRegister& src) {
7702   SimVRegister zero;
7703   zero.Clear();
7704   return uzp2(vform, dst, src, zero);
7705 }
7706 
7707 LogicVRegister Simulator::pack_even_elements(VectorFormat vform,
7708                                              LogicVRegister dst,
7709                                              const LogicVRegister& src) {
7710   SimVRegister zero;
7711   zero.Clear();
7712   return uzp1(vform, dst, src, zero);
7713 }
7714 
7715 LogicVRegister Simulator::adcl(VectorFormat vform,
7716                                LogicVRegister dst,
7717                                const LogicVRegister& src1,
7718                                const LogicVRegister& src2,
7719                                bool top) {
7720   unsigned reg_size = LaneSizeInBitsFromFormat(vform);
7721   VIXL_ASSERT((reg_size == kSRegSize) || (reg_size == kDRegSize));
7722 
7723   for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
7724     uint64_t left = src1.Uint(vform, i + (top ? 1 : 0));
7725     uint64_t right = dst.Uint(vform, i);
7726     unsigned carry_in = src2.Uint(vform, i + 1) & 1;
7727     std::pair<uint64_t, uint8_t> val_and_flags =
7728         AddWithCarry(reg_size, left, right, carry_in);
7729 
7730     // Set even lanes to the result of the addition.
7731     dst.SetUint(vform, i, val_and_flags.first);
7732 
7733     // Set odd lanes to the carry flag from the addition.
7734     uint64_t carry_out = (val_and_flags.second >> 1) & 1;
7735     dst.SetUint(vform, i + 1, carry_out);
7736   }
7737   return dst;
7738 }
7739 
7740 // Multiply the 2x8 8-bit matrix in src1 by the 8x2 8-bit matrix in src2, add
7741 // the 2x2 32-bit result to the matrix in srcdst, and write back to srcdst.
7742 //
7743 // Matrices of the form:
7744 //
7745 //  src1 = ( a b c d e f g h )  src2 = ( A B )
7746 //         ( i j k l m n o p )         ( C D )
7747 //                                     ( E F )
7748 //                                     ( G H )
7749 //                                     ( I J )
7750 //                                     ( K L )
7751 //                                     ( M N )
7752 //                                     ( O P )
7753 //
7754 // Are stored in the input vector registers as:
7755 //
7756 //           15  14  13  12  11  10  9   8   7   6   5   4   3   2   1   0
7757 //  src1 = [ p | o | n | m | l | k | j | i | h | g | f | e | d | c | b | a ]
7758 //  src2 = [ P | N | L | J | H | F | D | B | O | M | K | I | G | E | C | A ]
7759 //
7760 LogicVRegister Simulator::matmul(VectorFormat vform_dst,
7761                                  LogicVRegister srcdst,
7762                                  const LogicVRegister& src1,
7763                                  const LogicVRegister& src2,
7764                                  bool src1_signed,
7765                                  bool src2_signed) {
7766   // Two destination forms are supported: Q register containing four S-sized
7767   // elements (4S) and Z register containing n S-sized elements (VnS).
7768   VIXL_ASSERT((vform_dst == kFormat4S) || (vform_dst == kFormatVnS));
7769   VectorFormat vform_src = kFormatVnB;
7770   int b_per_segment = kQRegSize / kBRegSize;
7771   int s_per_segment = kQRegSize / kSRegSize;
7772   int64_t result[kZRegMaxSizeInBytes / kSRegSizeInBytes] = {};
7773   int segment_count = LaneCountFromFormat(vform_dst) / 4;
7774   for (int seg = 0; seg < segment_count; seg++) {
7775     for (int i = 0; i < 2; i++) {
7776       for (int j = 0; j < 2; j++) {
7777         int dstidx = (2 * i) + j + (seg * s_per_segment);
7778         int64_t sum = srcdst.Int(vform_dst, dstidx);
7779         for (int k = 0; k < 8; k++) {
7780           int idx1 = (8 * i) + k + (seg * b_per_segment);
7781           int idx2 = (8 * j) + k + (seg * b_per_segment);
7782           int64_t e1 = src1_signed ? src1.Int(vform_src, idx1)
7783                                    : src1.Uint(vform_src, idx1);
7784           int64_t e2 = src2_signed ? src2.Int(vform_src, idx2)
7785                                    : src2.Uint(vform_src, idx2);
7786           sum += e1 * e2;
7787         }
7788         result[dstidx] = sum;
7789       }
7790     }
7791   }
7792   srcdst.SetIntArray(vform_dst, result);
7793   return srcdst;
7794 }
7795 
7796 // Multiply the 2x2 FP matrix in src1 by the 2x2 FP matrix in src2, add the 2x2
7797 // result to the matrix in srcdst, and write back to srcdst.
7798 //
7799 // Matrices of the form:
7800 //
7801 //  src1 = ( a b )  src2 = ( A B )
7802 //         ( c d )         ( C D )
7803 //
7804 // Are stored in the input vector registers as:
7805 //
7806 //           3   2   1   0
7807 //  src1 = [ d | c | b | a ]
7808 //  src2 = [ D | B | C | A ]
7809 //
7810 template <typename T>
7811 LogicVRegister Simulator::fmatmul(VectorFormat vform,
7812                                   LogicVRegister srcdst,
7813                                   const LogicVRegister& src1,
7814                                   const LogicVRegister& src2) {
7815   T result[kZRegMaxSizeInBytes / sizeof(T)];
7816   int T_per_segment = 4;
7817   int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T));
7818   for (int seg = 0; seg < segment_count; seg++) {
7819     int segoff = seg * T_per_segment;
7820     for (int i = 0; i < 2; i++) {
7821       for (int j = 0; j < 2; j++) {
7822         T prod0 = FPMulNaNs(src1.Float<T>(2 * i + 0 + segoff),
7823                             src2.Float<T>(2 * j + 0 + segoff));
7824         T prod1 = FPMulNaNs(src1.Float<T>(2 * i + 1 + segoff),
7825                             src2.Float<T>(2 * j + 1 + segoff));
7826         T sum = FPAdd(srcdst.Float<T>(2 * i + j + segoff), prod0);
7827         result[2 * i + j + segoff] = FPAdd(sum, prod1);
7828       }
7829     }
7830   }
7831   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7832     // Elements outside a multiple of 4T are set to zero. This happens only
7833     // for double precision operations, when the VL is a multiple of 128 bits,
7834     // but not a multiple of 256 bits.
7835     T value = (i < (T_per_segment * segment_count)) ? result[i] : 0;
7836     srcdst.SetFloat<T>(vform, i, value);
7837   }
7838   return srcdst;
7839 }
7840 
7841 LogicVRegister Simulator::fmatmul(VectorFormat vform,
7842                                   LogicVRegister dst,
7843                                   const LogicVRegister& src1,
7844                                   const LogicVRegister& src2) {
7845   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
7846     fmatmul<float>(vform, dst, src1, src2);
7847   } else {
7848     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
7849     fmatmul<double>(vform, dst, src1, src2);
7850   }
7851   return dst;
7852 }
7853 
7854 }  // namespace aarch64
7855 }  // namespace vixl
7856 
7857 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
7858