1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28
29 #include <cmath>
30
31 #include "simulator-aarch64.h"
32
33 namespace vixl {
34 namespace aarch64 {
35
36 using vixl::internal::SimFloat16;
37
38 template <typename T>
IsFloat64()39 bool IsFloat64() {
40 return false;
41 }
42 template <>
IsFloat64()43 bool IsFloat64<double>() {
44 return true;
45 }
46
47 template <typename T>
IsFloat32()48 bool IsFloat32() {
49 return false;
50 }
51 template <>
IsFloat32()52 bool IsFloat32<float>() {
53 return true;
54 }
55
56 template <typename T>
IsFloat16()57 bool IsFloat16() {
58 return false;
59 }
60 template <>
IsFloat16()61 bool IsFloat16<Float16>() {
62 return true;
63 }
64 template <>
IsFloat16()65 bool IsFloat16<SimFloat16>() {
66 return true;
67 }
68
69 template <>
FPDefaultNaN()70 double Simulator::FPDefaultNaN<double>() {
71 return kFP64DefaultNaN;
72 }
73
74
75 template <>
FPDefaultNaN()76 float Simulator::FPDefaultNaN<float>() {
77 return kFP32DefaultNaN;
78 }
79
80
81 template <>
FPDefaultNaN()82 SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
83 return SimFloat16(kFP16DefaultNaN);
84 }
85
86
FixedToDouble(int64_t src, int fbits, FPRounding round)87 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
88 if (src >= 0) {
89 return UFixedToDouble(src, fbits, round);
90 } else if (src == INT64_MIN) {
91 return -UFixedToDouble(src, fbits, round);
92 } else {
93 return -UFixedToDouble(-src, fbits, round);
94 }
95 }
96
97
UFixedToDouble(uint64_t src, int fbits, FPRounding round)98 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
99 // An input of 0 is a special case because the result is effectively
100 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
101 if (src == 0) {
102 return 0.0;
103 }
104
105 // Calculate the exponent. The highest significant bit will have the value
106 // 2^exponent.
107 const int highest_significant_bit = 63 - CountLeadingZeros(src);
108 const int64_t exponent = highest_significant_bit - fbits;
109
110 return FPRoundToDouble(0, exponent, src, round);
111 }
112
113
FixedToFloat(int64_t src, int fbits, FPRounding round)114 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
115 if (src >= 0) {
116 return UFixedToFloat(src, fbits, round);
117 } else if (src == INT64_MIN) {
118 return -UFixedToFloat(src, fbits, round);
119 } else {
120 return -UFixedToFloat(-src, fbits, round);
121 }
122 }
123
124
UFixedToFloat(uint64_t src, int fbits, FPRounding round)125 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
126 // An input of 0 is a special case because the result is effectively
127 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
128 if (src == 0) {
129 return 0.0f;
130 }
131
132 // Calculate the exponent. The highest significant bit will have the value
133 // 2^exponent.
134 const int highest_significant_bit = 63 - CountLeadingZeros(src);
135 const int32_t exponent = highest_significant_bit - fbits;
136
137 return FPRoundToFloat(0, exponent, src, round);
138 }
139
140
FixedToFloat16(int64_t src, int fbits, FPRounding round)141 SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
142 if (src >= 0) {
143 return UFixedToFloat16(src, fbits, round);
144 } else if (src == INT64_MIN) {
145 return -UFixedToFloat16(src, fbits, round);
146 } else {
147 return -UFixedToFloat16(-src, fbits, round);
148 }
149 }
150
151
UFixedToFloat16(uint64_t src, int fbits, FPRounding round)152 SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
153 int fbits,
154 FPRounding round) {
155 // An input of 0 is a special case because the result is effectively
156 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
157 if (src == 0) {
158 return 0.0f;
159 }
160
161 // Calculate the exponent. The highest significant bit will have the value
162 // 2^exponent.
163 const int highest_significant_bit = 63 - CountLeadingZeros(src);
164 const int16_t exponent = highest_significant_bit - fbits;
165
166 return FPRoundToFloat16(0, exponent, src, round);
167 }
168
169
GenerateRandomTag(uint16_t exclude)170 uint64_t Simulator::GenerateRandomTag(uint16_t exclude) {
171 uint64_t rtag = nrand48(rand_state_) >> 28;
172 VIXL_ASSERT(IsUint4(rtag));
173
174 if (exclude == 0) {
175 exclude = nrand48(rand_state_) >> 27;
176 }
177
178 // TODO: implement this to better match the specification, which calls for a
179 // true random mode, and a pseudo-random mode with state (EL1.TAG) modified by
180 // PRNG.
181 return ChooseNonExcludedTag(rtag, 0, exclude);
182 }
183
184
ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr)185 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
186 dst.ClearForWrite(vform);
187 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
188 LoadLane(dst, vform, i, addr);
189 addr += LaneSizeInBytesFromFormat(vform);
190 }
191 }
192
193
ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr)194 void Simulator::ld1(VectorFormat vform,
195 LogicVRegister dst,
196 int index,
197 uint64_t addr) {
198 LoadLane(dst, vform, index, addr);
199 }
200
201
ld1r(VectorFormat vform, VectorFormat unpack_vform, LogicVRegister dst, uint64_t addr, bool is_signed)202 void Simulator::ld1r(VectorFormat vform,
203 VectorFormat unpack_vform,
204 LogicVRegister dst,
205 uint64_t addr,
206 bool is_signed) {
207 unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform);
208 dst.ClearForWrite(vform);
209 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
210 if (is_signed) {
211 LoadIntToLane(dst, vform, unpack_size, i, addr);
212 } else {
213 LoadUintToLane(dst, vform, unpack_size, i, addr);
214 }
215 }
216 }
217
218
ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr)219 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
220 ld1r(vform, vform, dst, addr);
221 }
222
223
ld2(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, uint64_t addr1)224 void Simulator::ld2(VectorFormat vform,
225 LogicVRegister dst1,
226 LogicVRegister dst2,
227 uint64_t addr1) {
228 dst1.ClearForWrite(vform);
229 dst2.ClearForWrite(vform);
230 int esize = LaneSizeInBytesFromFormat(vform);
231 uint64_t addr2 = addr1 + esize;
232 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
233 LoadLane(dst1, vform, i, addr1);
234 LoadLane(dst2, vform, i, addr2);
235 addr1 += 2 * esize;
236 addr2 += 2 * esize;
237 }
238 }
239
240
ld2(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, int index, uint64_t addr1)241 void Simulator::ld2(VectorFormat vform,
242 LogicVRegister dst1,
243 LogicVRegister dst2,
244 int index,
245 uint64_t addr1) {
246 dst1.ClearForWrite(vform);
247 dst2.ClearForWrite(vform);
248 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
249 LoadLane(dst1, vform, index, addr1);
250 LoadLane(dst2, vform, index, addr2);
251 }
252
253
ld2r(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, uint64_t addr)254 void Simulator::ld2r(VectorFormat vform,
255 LogicVRegister dst1,
256 LogicVRegister dst2,
257 uint64_t addr) {
258 dst1.ClearForWrite(vform);
259 dst2.ClearForWrite(vform);
260 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
261 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
262 LoadLane(dst1, vform, i, addr);
263 LoadLane(dst2, vform, i, addr2);
264 }
265 }
266
267
ld3(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1)268 void Simulator::ld3(VectorFormat vform,
269 LogicVRegister dst1,
270 LogicVRegister dst2,
271 LogicVRegister dst3,
272 uint64_t addr1) {
273 dst1.ClearForWrite(vform);
274 dst2.ClearForWrite(vform);
275 dst3.ClearForWrite(vform);
276 int esize = LaneSizeInBytesFromFormat(vform);
277 uint64_t addr2 = addr1 + esize;
278 uint64_t addr3 = addr2 + esize;
279 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
280 LoadLane(dst1, vform, i, addr1);
281 LoadLane(dst2, vform, i, addr2);
282 LoadLane(dst3, vform, i, addr3);
283 addr1 += 3 * esize;
284 addr2 += 3 * esize;
285 addr3 += 3 * esize;
286 }
287 }
288
289
ld3(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, int index, uint64_t addr1)290 void Simulator::ld3(VectorFormat vform,
291 LogicVRegister dst1,
292 LogicVRegister dst2,
293 LogicVRegister dst3,
294 int index,
295 uint64_t addr1) {
296 dst1.ClearForWrite(vform);
297 dst2.ClearForWrite(vform);
298 dst3.ClearForWrite(vform);
299 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
300 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
301 LoadLane(dst1, vform, index, addr1);
302 LoadLane(dst2, vform, index, addr2);
303 LoadLane(dst3, vform, index, addr3);
304 }
305
306
ld3r(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, uint64_t addr)307 void Simulator::ld3r(VectorFormat vform,
308 LogicVRegister dst1,
309 LogicVRegister dst2,
310 LogicVRegister dst3,
311 uint64_t addr) {
312 dst1.ClearForWrite(vform);
313 dst2.ClearForWrite(vform);
314 dst3.ClearForWrite(vform);
315 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
316 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
317 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
318 LoadLane(dst1, vform, i, addr);
319 LoadLane(dst2, vform, i, addr2);
320 LoadLane(dst3, vform, i, addr3);
321 }
322 }
323
324
ld4(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, LogicVRegister dst4, uint64_t addr1)325 void Simulator::ld4(VectorFormat vform,
326 LogicVRegister dst1,
327 LogicVRegister dst2,
328 LogicVRegister dst3,
329 LogicVRegister dst4,
330 uint64_t addr1) {
331 dst1.ClearForWrite(vform);
332 dst2.ClearForWrite(vform);
333 dst3.ClearForWrite(vform);
334 dst4.ClearForWrite(vform);
335 int esize = LaneSizeInBytesFromFormat(vform);
336 uint64_t addr2 = addr1 + esize;
337 uint64_t addr3 = addr2 + esize;
338 uint64_t addr4 = addr3 + esize;
339 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
340 LoadLane(dst1, vform, i, addr1);
341 LoadLane(dst2, vform, i, addr2);
342 LoadLane(dst3, vform, i, addr3);
343 LoadLane(dst4, vform, i, addr4);
344 addr1 += 4 * esize;
345 addr2 += 4 * esize;
346 addr3 += 4 * esize;
347 addr4 += 4 * esize;
348 }
349 }
350
351
ld4(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, LogicVRegister dst4, int index, uint64_t addr1)352 void Simulator::ld4(VectorFormat vform,
353 LogicVRegister dst1,
354 LogicVRegister dst2,
355 LogicVRegister dst3,
356 LogicVRegister dst4,
357 int index,
358 uint64_t addr1) {
359 dst1.ClearForWrite(vform);
360 dst2.ClearForWrite(vform);
361 dst3.ClearForWrite(vform);
362 dst4.ClearForWrite(vform);
363 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
364 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
365 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
366 LoadLane(dst1, vform, index, addr1);
367 LoadLane(dst2, vform, index, addr2);
368 LoadLane(dst3, vform, index, addr3);
369 LoadLane(dst4, vform, index, addr4);
370 }
371
372
ld4r(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, LogicVRegister dst4, uint64_t addr)373 void Simulator::ld4r(VectorFormat vform,
374 LogicVRegister dst1,
375 LogicVRegister dst2,
376 LogicVRegister dst3,
377 LogicVRegister dst4,
378 uint64_t addr) {
379 dst1.ClearForWrite(vform);
380 dst2.ClearForWrite(vform);
381 dst3.ClearForWrite(vform);
382 dst4.ClearForWrite(vform);
383 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
384 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
385 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
386 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
387 LoadLane(dst1, vform, i, addr);
388 LoadLane(dst2, vform, i, addr2);
389 LoadLane(dst3, vform, i, addr3);
390 LoadLane(dst4, vform, i, addr4);
391 }
392 }
393
394
st1(VectorFormat vform, LogicVRegister src, uint64_t addr)395 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
396 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
397 StoreLane(src, vform, i, addr);
398 addr += LaneSizeInBytesFromFormat(vform);
399 }
400 }
401
402
st1(VectorFormat vform, LogicVRegister src, int index, uint64_t addr)403 void Simulator::st1(VectorFormat vform,
404 LogicVRegister src,
405 int index,
406 uint64_t addr) {
407 StoreLane(src, vform, index, addr);
408 }
409
410
st2(VectorFormat vform, LogicVRegister src, LogicVRegister src2, uint64_t addr)411 void Simulator::st2(VectorFormat vform,
412 LogicVRegister src,
413 LogicVRegister src2,
414 uint64_t addr) {
415 int esize = LaneSizeInBytesFromFormat(vform);
416 uint64_t addr2 = addr + esize;
417 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
418 StoreLane(src, vform, i, addr);
419 StoreLane(src2, vform, i, addr2);
420 addr += 2 * esize;
421 addr2 += 2 * esize;
422 }
423 }
424
425
st2(VectorFormat vform, LogicVRegister src, LogicVRegister src2, int index, uint64_t addr)426 void Simulator::st2(VectorFormat vform,
427 LogicVRegister src,
428 LogicVRegister src2,
429 int index,
430 uint64_t addr) {
431 int esize = LaneSizeInBytesFromFormat(vform);
432 StoreLane(src, vform, index, addr);
433 StoreLane(src2, vform, index, addr + 1 * esize);
434 }
435
436
st3(VectorFormat vform, LogicVRegister src, LogicVRegister src2, LogicVRegister src3, uint64_t addr)437 void Simulator::st3(VectorFormat vform,
438 LogicVRegister src,
439 LogicVRegister src2,
440 LogicVRegister src3,
441 uint64_t addr) {
442 int esize = LaneSizeInBytesFromFormat(vform);
443 uint64_t addr2 = addr + esize;
444 uint64_t addr3 = addr2 + esize;
445 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
446 StoreLane(src, vform, i, addr);
447 StoreLane(src2, vform, i, addr2);
448 StoreLane(src3, vform, i, addr3);
449 addr += 3 * esize;
450 addr2 += 3 * esize;
451 addr3 += 3 * esize;
452 }
453 }
454
455
st3(VectorFormat vform, LogicVRegister src, LogicVRegister src2, LogicVRegister src3, int index, uint64_t addr)456 void Simulator::st3(VectorFormat vform,
457 LogicVRegister src,
458 LogicVRegister src2,
459 LogicVRegister src3,
460 int index,
461 uint64_t addr) {
462 int esize = LaneSizeInBytesFromFormat(vform);
463 StoreLane(src, vform, index, addr);
464 StoreLane(src2, vform, index, addr + 1 * esize);
465 StoreLane(src3, vform, index, addr + 2 * esize);
466 }
467
468
st4(VectorFormat vform, LogicVRegister src, LogicVRegister src2, LogicVRegister src3, LogicVRegister src4, uint64_t addr)469 void Simulator::st4(VectorFormat vform,
470 LogicVRegister src,
471 LogicVRegister src2,
472 LogicVRegister src3,
473 LogicVRegister src4,
474 uint64_t addr) {
475 int esize = LaneSizeInBytesFromFormat(vform);
476 uint64_t addr2 = addr + esize;
477 uint64_t addr3 = addr2 + esize;
478 uint64_t addr4 = addr3 + esize;
479 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
480 StoreLane(src, vform, i, addr);
481 StoreLane(src2, vform, i, addr2);
482 StoreLane(src3, vform, i, addr3);
483 StoreLane(src4, vform, i, addr4);
484 addr += 4 * esize;
485 addr2 += 4 * esize;
486 addr3 += 4 * esize;
487 addr4 += 4 * esize;
488 }
489 }
490
491
st4(VectorFormat vform, LogicVRegister src, LogicVRegister src2, LogicVRegister src3, LogicVRegister src4, int index, uint64_t addr)492 void Simulator::st4(VectorFormat vform,
493 LogicVRegister src,
494 LogicVRegister src2,
495 LogicVRegister src3,
496 LogicVRegister src4,
497 int index,
498 uint64_t addr) {
499 int esize = LaneSizeInBytesFromFormat(vform);
500 StoreLane(src, vform, index, addr);
501 StoreLane(src2, vform, index, addr + 1 * esize);
502 StoreLane(src3, vform, index, addr + 2 * esize);
503 StoreLane(src4, vform, index, addr + 3 * esize);
504 }
505
506
cmp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, Condition cond)507 LogicVRegister Simulator::cmp(VectorFormat vform,
508 LogicVRegister dst,
509 const LogicVRegister& src1,
510 const LogicVRegister& src2,
511 Condition cond) {
512 dst.ClearForWrite(vform);
513 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
514 int64_t sa = src1.Int(vform, i);
515 int64_t sb = src2.Int(vform, i);
516 uint64_t ua = src1.Uint(vform, i);
517 uint64_t ub = src2.Uint(vform, i);
518 bool result = false;
519 switch (cond) {
520 case eq:
521 result = (ua == ub);
522 break;
523 case ge:
524 result = (sa >= sb);
525 break;
526 case gt:
527 result = (sa > sb);
528 break;
529 case hi:
530 result = (ua > ub);
531 break;
532 case hs:
533 result = (ua >= ub);
534 break;
535 case lt:
536 result = (sa < sb);
537 break;
538 case le:
539 result = (sa <= sb);
540 break;
541 default:
542 VIXL_UNREACHABLE();
543 break;
544 }
545 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
546 }
547 return dst;
548 }
549
550
cmp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, int imm, Condition cond)551 LogicVRegister Simulator::cmp(VectorFormat vform,
552 LogicVRegister dst,
553 const LogicVRegister& src1,
554 int imm,
555 Condition cond) {
556 SimVRegister temp;
557 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
558 return cmp(vform, dst, src1, imm_reg, cond);
559 }
560
561
cmptst(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)562 LogicVRegister Simulator::cmptst(VectorFormat vform,
563 LogicVRegister dst,
564 const LogicVRegister& src1,
565 const LogicVRegister& src2) {
566 dst.ClearForWrite(vform);
567 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
568 uint64_t ua = src1.Uint(vform, i);
569 uint64_t ub = src2.Uint(vform, i);
570 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
571 }
572 return dst;
573 }
574
575
add(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)576 LogicVRegister Simulator::add(VectorFormat vform,
577 LogicVRegister dst,
578 const LogicVRegister& src1,
579 const LogicVRegister& src2) {
580 int lane_size = LaneSizeInBitsFromFormat(vform);
581 dst.ClearForWrite(vform);
582
583 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
584 // Test for unsigned saturation.
585 uint64_t ua = src1.UintLeftJustified(vform, i);
586 uint64_t ub = src2.UintLeftJustified(vform, i);
587 uint64_t ur = ua + ub;
588 if (ur < ua) {
589 dst.SetUnsignedSat(i, true);
590 }
591
592 // Test for signed saturation.
593 bool pos_a = (ua >> 63) == 0;
594 bool pos_b = (ub >> 63) == 0;
595 bool pos_r = (ur >> 63) == 0;
596 // If the signs of the operands are the same, but different from the result,
597 // there was an overflow.
598 if ((pos_a == pos_b) && (pos_a != pos_r)) {
599 dst.SetSignedSat(i, pos_a);
600 }
601 dst.SetInt(vform, i, ur >> (64 - lane_size));
602 }
603 return dst;
604 }
605
add_uint(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, uint64_t value)606 LogicVRegister Simulator::add_uint(VectorFormat vform,
607 LogicVRegister dst,
608 const LogicVRegister& src1,
609 uint64_t value) {
610 int lane_size = LaneSizeInBitsFromFormat(vform);
611 VIXL_ASSERT(IsUintN(lane_size, value));
612 dst.ClearForWrite(vform);
613 // Left-justify `value`.
614 uint64_t ub = value << (64 - lane_size);
615 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
616 // Test for unsigned saturation.
617 uint64_t ua = src1.UintLeftJustified(vform, i);
618 uint64_t ur = ua + ub;
619 if (ur < ua) {
620 dst.SetUnsignedSat(i, true);
621 }
622
623 // Test for signed saturation.
624 // `value` is always positive, so we have an overflow if the (signed) result
625 // is smaller than the first operand.
626 if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {
627 dst.SetSignedSat(i, true);
628 }
629
630 dst.SetInt(vform, i, ur >> (64 - lane_size));
631 }
632 return dst;
633 }
634
addp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)635 LogicVRegister Simulator::addp(VectorFormat vform,
636 LogicVRegister dst,
637 const LogicVRegister& src1,
638 const LogicVRegister& src2) {
639 SimVRegister temp1, temp2;
640 uzp1(vform, temp1, src1, src2);
641 uzp2(vform, temp2, src1, src2);
642 add(vform, dst, temp1, temp2);
643 if (IsSVEFormat(vform)) {
644 interleave_top_bottom(vform, dst, dst);
645 }
646 return dst;
647 }
648
sdiv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)649 LogicVRegister Simulator::sdiv(VectorFormat vform,
650 LogicVRegister dst,
651 const LogicVRegister& src1,
652 const LogicVRegister& src2) {
653 VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
654
655 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
656 int64_t val1 = src1.Int(vform, i);
657 int64_t val2 = src2.Int(vform, i);
658 int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;
659 int64_t quotient = 0;
660 if ((val1 == min_int) && (val2 == -1)) {
661 quotient = min_int;
662 } else if (val2 != 0) {
663 quotient = val1 / val2;
664 }
665 dst.SetInt(vform, i, quotient);
666 }
667
668 return dst;
669 }
670
udiv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)671 LogicVRegister Simulator::udiv(VectorFormat vform,
672 LogicVRegister dst,
673 const LogicVRegister& src1,
674 const LogicVRegister& src2) {
675 VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
676
677 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
678 uint64_t val1 = src1.Uint(vform, i);
679 uint64_t val2 = src2.Uint(vform, i);
680 uint64_t quotient = 0;
681 if (val2 != 0) {
682 quotient = val1 / val2;
683 }
684 dst.SetUint(vform, i, quotient);
685 }
686
687 return dst;
688 }
689
690
mla(VectorFormat vform, LogicVRegister dst, const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2)691 LogicVRegister Simulator::mla(VectorFormat vform,
692 LogicVRegister dst,
693 const LogicVRegister& srca,
694 const LogicVRegister& src1,
695 const LogicVRegister& src2) {
696 SimVRegister temp;
697 mul(vform, temp, src1, src2);
698 add(vform, dst, srca, temp);
699 return dst;
700 }
701
702
mls(VectorFormat vform, LogicVRegister dst, const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2)703 LogicVRegister Simulator::mls(VectorFormat vform,
704 LogicVRegister dst,
705 const LogicVRegister& srca,
706 const LogicVRegister& src1,
707 const LogicVRegister& src2) {
708 SimVRegister temp;
709 mul(vform, temp, src1, src2);
710 sub(vform, dst, srca, temp);
711 return dst;
712 }
713
714
mul(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)715 LogicVRegister Simulator::mul(VectorFormat vform,
716 LogicVRegister dst,
717 const LogicVRegister& src1,
718 const LogicVRegister& src2) {
719 dst.ClearForWrite(vform);
720
721 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
722 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
723 }
724 return dst;
725 }
726
727
mul(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)728 LogicVRegister Simulator::mul(VectorFormat vform,
729 LogicVRegister dst,
730 const LogicVRegister& src1,
731 const LogicVRegister& src2,
732 int index) {
733 SimVRegister temp;
734 VectorFormat indexform = VectorFormatFillQ(vform);
735 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
736 }
737
738
smulh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)739 LogicVRegister Simulator::smulh(VectorFormat vform,
740 LogicVRegister dst,
741 const LogicVRegister& src1,
742 const LogicVRegister& src2) {
743 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
744 int64_t dst_val = 0xbadbeef;
745 int64_t val1 = src1.Int(vform, i);
746 int64_t val2 = src2.Int(vform, i);
747 switch (LaneSizeInBitsFromFormat(vform)) {
748 case 8:
749 dst_val = internal::MultiplyHigh<8>(val1, val2);
750 break;
751 case 16:
752 dst_val = internal::MultiplyHigh<16>(val1, val2);
753 break;
754 case 32:
755 dst_val = internal::MultiplyHigh<32>(val1, val2);
756 break;
757 case 64:
758 dst_val = internal::MultiplyHigh<64>(val1, val2);
759 break;
760 default:
761 VIXL_UNREACHABLE();
762 break;
763 }
764 dst.SetInt(vform, i, dst_val);
765 }
766 return dst;
767 }
768
769
umulh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)770 LogicVRegister Simulator::umulh(VectorFormat vform,
771 LogicVRegister dst,
772 const LogicVRegister& src1,
773 const LogicVRegister& src2) {
774 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
775 uint64_t dst_val = 0xbadbeef;
776 uint64_t val1 = src1.Uint(vform, i);
777 uint64_t val2 = src2.Uint(vform, i);
778 switch (LaneSizeInBitsFromFormat(vform)) {
779 case 8:
780 dst_val = internal::MultiplyHigh<8>(val1, val2);
781 break;
782 case 16:
783 dst_val = internal::MultiplyHigh<16>(val1, val2);
784 break;
785 case 32:
786 dst_val = internal::MultiplyHigh<32>(val1, val2);
787 break;
788 case 64:
789 dst_val = internal::MultiplyHigh<64>(val1, val2);
790 break;
791 default:
792 VIXL_UNREACHABLE();
793 break;
794 }
795 dst.SetUint(vform, i, dst_val);
796 }
797 return dst;
798 }
799
800
mla(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)801 LogicVRegister Simulator::mla(VectorFormat vform,
802 LogicVRegister dst,
803 const LogicVRegister& src1,
804 const LogicVRegister& src2,
805 int index) {
806 SimVRegister temp;
807 VectorFormat indexform = VectorFormatFillQ(vform);
808 return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
809 }
810
811
mls(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)812 LogicVRegister Simulator::mls(VectorFormat vform,
813 LogicVRegister dst,
814 const LogicVRegister& src1,
815 const LogicVRegister& src2,
816 int index) {
817 SimVRegister temp;
818 VectorFormat indexform = VectorFormatFillQ(vform);
819 return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
820 }
821
sqdmull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)822 LogicVRegister Simulator::sqdmull(VectorFormat vform,
823 LogicVRegister dst,
824 const LogicVRegister& src1,
825 const LogicVRegister& src2,
826 int index) {
827 SimVRegister temp;
828 VectorFormat indexform =
829 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
830 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
831 }
832
sqdmlal(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)833 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
834 LogicVRegister dst,
835 const LogicVRegister& src1,
836 const LogicVRegister& src2,
837 int index) {
838 SimVRegister temp;
839 VectorFormat indexform =
840 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
841 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
842 }
843
sqdmlsl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)844 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
845 LogicVRegister dst,
846 const LogicVRegister& src1,
847 const LogicVRegister& src2,
848 int index) {
849 SimVRegister temp;
850 VectorFormat indexform =
851 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
852 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
853 }
854
sqdmulh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)855 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
856 LogicVRegister dst,
857 const LogicVRegister& src1,
858 const LogicVRegister& src2,
859 int index) {
860 SimVRegister temp;
861 VectorFormat indexform = VectorFormatFillQ(vform);
862 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
863 }
864
865
sqrdmulh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)866 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
867 LogicVRegister dst,
868 const LogicVRegister& src1,
869 const LogicVRegister& src2,
870 int index) {
871 SimVRegister temp;
872 VectorFormat indexform = VectorFormatFillQ(vform);
873 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
874 }
875
876
sqrdmlah(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)877 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
878 LogicVRegister dst,
879 const LogicVRegister& src1,
880 const LogicVRegister& src2,
881 int index) {
882 SimVRegister temp;
883 VectorFormat indexform = VectorFormatFillQ(vform);
884 return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));
885 }
886
887
sqrdmlsh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index)888 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
889 LogicVRegister dst,
890 const LogicVRegister& src1,
891 const LogicVRegister& src2,
892 int index) {
893 SimVRegister temp;
894 VectorFormat indexform = VectorFormatFillQ(vform);
895 return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
896 }
897
898
PolynomialMult(uint64_t op1, uint64_t op2, int lane_size_in_bits) const899 uint64_t Simulator::PolynomialMult(uint64_t op1,
900 uint64_t op2,
901 int lane_size_in_bits) const {
902 VIXL_ASSERT(static_cast<unsigned>(lane_size_in_bits) <= kSRegSize);
903 VIXL_ASSERT(IsUintN(lane_size_in_bits, op1));
904 VIXL_ASSERT(IsUintN(lane_size_in_bits, op2));
905 uint64_t result = 0;
906 for (int i = 0; i < lane_size_in_bits; ++i) {
907 if ((op1 >> i) & 1) {
908 result = result ^ (op2 << i);
909 }
910 }
911 return result;
912 }
913
914
pmul(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)915 LogicVRegister Simulator::pmul(VectorFormat vform,
916 LogicVRegister dst,
917 const LogicVRegister& src1,
918 const LogicVRegister& src2) {
919 dst.ClearForWrite(vform);
920 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
921 dst.SetUint(vform,
922 i,
923 PolynomialMult(src1.Uint(vform, i),
924 src2.Uint(vform, i),
925 LaneSizeInBitsFromFormat(vform)));
926 }
927 return dst;
928 }
929
930
pmull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)931 LogicVRegister Simulator::pmull(VectorFormat vform,
932 LogicVRegister dst,
933 const LogicVRegister& src1,
934 const LogicVRegister& src2) {
935 dst.ClearForWrite(vform);
936
937 VectorFormat vform_src = VectorFormatHalfWidth(vform);
938 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
939 dst.SetUint(vform,
940 i,
941 PolynomialMult(src1.Uint(vform_src, i),
942 src2.Uint(vform_src, i),
943 LaneSizeInBitsFromFormat(vform_src)));
944 }
945
946 return dst;
947 }
948
949
pmull2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)950 LogicVRegister Simulator::pmull2(VectorFormat vform,
951 LogicVRegister dst,
952 const LogicVRegister& src1,
953 const LogicVRegister& src2) {
954 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
955 dst.ClearForWrite(vform);
956 int lane_count = LaneCountFromFormat(vform);
957 for (int i = 0; i < lane_count; i++) {
958 dst.SetUint(vform,
959 i,
960 PolynomialMult(src1.Uint(vform_src, lane_count + i),
961 src2.Uint(vform_src, lane_count + i),
962 LaneSizeInBitsFromFormat(vform_src)));
963 }
964 return dst;
965 }
966
967
sub(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)968 LogicVRegister Simulator::sub(VectorFormat vform,
969 LogicVRegister dst,
970 const LogicVRegister& src1,
971 const LogicVRegister& src2) {
972 int lane_size = LaneSizeInBitsFromFormat(vform);
973 dst.ClearForWrite(vform);
974 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
975 // Test for unsigned saturation.
976 uint64_t ua = src1.UintLeftJustified(vform, i);
977 uint64_t ub = src2.UintLeftJustified(vform, i);
978 uint64_t ur = ua - ub;
979 if (ub > ua) {
980 dst.SetUnsignedSat(i, false);
981 }
982
983 // Test for signed saturation.
984 bool pos_a = (ua >> 63) == 0;
985 bool pos_b = (ub >> 63) == 0;
986 bool pos_r = (ur >> 63) == 0;
987 // If the signs of the operands are different, and the sign of the first
988 // operand doesn't match the result, there was an overflow.
989 if ((pos_a != pos_b) && (pos_a != pos_r)) {
990 dst.SetSignedSat(i, pos_a);
991 }
992
993 dst.SetInt(vform, i, ur >> (64 - lane_size));
994 }
995 return dst;
996 }
997
sub_uint(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, uint64_t value)998 LogicVRegister Simulator::sub_uint(VectorFormat vform,
999 LogicVRegister dst,
1000 const LogicVRegister& src1,
1001 uint64_t value) {
1002 int lane_size = LaneSizeInBitsFromFormat(vform);
1003 VIXL_ASSERT(IsUintN(lane_size, value));
1004 dst.ClearForWrite(vform);
1005 // Left-justify `value`.
1006 uint64_t ub = value << (64 - lane_size);
1007 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1008 // Test for unsigned saturation.
1009 uint64_t ua = src1.UintLeftJustified(vform, i);
1010 uint64_t ur = ua - ub;
1011 if (ub > ua) {
1012 dst.SetUnsignedSat(i, false);
1013 }
1014
1015 // Test for signed saturation.
1016 // `value` is always positive, so we have an overflow if the (signed) result
1017 // is greater than the first operand.
1018 if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {
1019 dst.SetSignedSat(i, false);
1020 }
1021
1022 dst.SetInt(vform, i, ur >> (64 - lane_size));
1023 }
1024 return dst;
1025 }
1026
and_(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1027 LogicVRegister Simulator::and_(VectorFormat vform,
1028 LogicVRegister dst,
1029 const LogicVRegister& src1,
1030 const LogicVRegister& src2) {
1031 dst.ClearForWrite(vform);
1032 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1033 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1034 }
1035 return dst;
1036 }
1037
1038
orr(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1039 LogicVRegister Simulator::orr(VectorFormat vform,
1040 LogicVRegister dst,
1041 const LogicVRegister& src1,
1042 const LogicVRegister& src2) {
1043 dst.ClearForWrite(vform);
1044 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1045 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1046 }
1047 return dst;
1048 }
1049
1050
orn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1051 LogicVRegister Simulator::orn(VectorFormat vform,
1052 LogicVRegister dst,
1053 const LogicVRegister& src1,
1054 const LogicVRegister& src2) {
1055 dst.ClearForWrite(vform);
1056 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1057 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1058 }
1059 return dst;
1060 }
1061
1062
eor(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1063 LogicVRegister Simulator::eor(VectorFormat vform,
1064 LogicVRegister dst,
1065 const LogicVRegister& src1,
1066 const LogicVRegister& src2) {
1067 dst.ClearForWrite(vform);
1068 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1069 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1070 }
1071 return dst;
1072 }
1073
1074
bic(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1075 LogicVRegister Simulator::bic(VectorFormat vform,
1076 LogicVRegister dst,
1077 const LogicVRegister& src1,
1078 const LogicVRegister& src2) {
1079 dst.ClearForWrite(vform);
1080 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1081 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1082 }
1083 return dst;
1084 }
1085
1086
bic(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, uint64_t imm)1087 LogicVRegister Simulator::bic(VectorFormat vform,
1088 LogicVRegister dst,
1089 const LogicVRegister& src,
1090 uint64_t imm) {
1091 uint64_t result[16];
1092 int lane_count = LaneCountFromFormat(vform);
1093 for (int i = 0; i < lane_count; ++i) {
1094 result[i] = src.Uint(vform, i) & ~imm;
1095 }
1096 dst.ClearForWrite(vform);
1097 for (int i = 0; i < lane_count; ++i) {
1098 dst.SetUint(vform, i, result[i]);
1099 }
1100 return dst;
1101 }
1102
1103
bif(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1104 LogicVRegister Simulator::bif(VectorFormat vform,
1105 LogicVRegister dst,
1106 const LogicVRegister& src1,
1107 const LogicVRegister& src2) {
1108 dst.ClearForWrite(vform);
1109 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1110 uint64_t operand1 = dst.Uint(vform, i);
1111 uint64_t operand2 = ~src2.Uint(vform, i);
1112 uint64_t operand3 = src1.Uint(vform, i);
1113 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1114 dst.SetUint(vform, i, result);
1115 }
1116 return dst;
1117 }
1118
1119
bit(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1120 LogicVRegister Simulator::bit(VectorFormat vform,
1121 LogicVRegister dst,
1122 const LogicVRegister& src1,
1123 const LogicVRegister& src2) {
1124 dst.ClearForWrite(vform);
1125 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1126 uint64_t operand1 = dst.Uint(vform, i);
1127 uint64_t operand2 = src2.Uint(vform, i);
1128 uint64_t operand3 = src1.Uint(vform, i);
1129 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1130 dst.SetUint(vform, i, result);
1131 }
1132 return dst;
1133 }
1134
1135
bsl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src_mask, const LogicVRegister& src1, const LogicVRegister& src2)1136 LogicVRegister Simulator::bsl(VectorFormat vform,
1137 LogicVRegister dst,
1138 const LogicVRegister& src_mask,
1139 const LogicVRegister& src1,
1140 const LogicVRegister& src2) {
1141 dst.ClearForWrite(vform);
1142 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1143 uint64_t operand1 = src2.Uint(vform, i);
1144 uint64_t operand2 = src_mask.Uint(vform, i);
1145 uint64_t operand3 = src1.Uint(vform, i);
1146 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1147 dst.SetUint(vform, i, result);
1148 }
1149 return dst;
1150 }
1151
1152
sminmax(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, bool max)1153 LogicVRegister Simulator::sminmax(VectorFormat vform,
1154 LogicVRegister dst,
1155 const LogicVRegister& src1,
1156 const LogicVRegister& src2,
1157 bool max) {
1158 dst.ClearForWrite(vform);
1159 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1160 int64_t src1_val = src1.Int(vform, i);
1161 int64_t src2_val = src2.Int(vform, i);
1162 int64_t dst_val;
1163 if (max) {
1164 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1165 } else {
1166 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1167 }
1168 dst.SetInt(vform, i, dst_val);
1169 }
1170 return dst;
1171 }
1172
1173
smax(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1174 LogicVRegister Simulator::smax(VectorFormat vform,
1175 LogicVRegister dst,
1176 const LogicVRegister& src1,
1177 const LogicVRegister& src2) {
1178 return sminmax(vform, dst, src1, src2, true);
1179 }
1180
1181
smin(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1182 LogicVRegister Simulator::smin(VectorFormat vform,
1183 LogicVRegister dst,
1184 const LogicVRegister& src1,
1185 const LogicVRegister& src2) {
1186 return sminmax(vform, dst, src1, src2, false);
1187 }
1188
1189
sminmaxp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, bool max)1190 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1191 LogicVRegister dst,
1192 const LogicVRegister& src1,
1193 const LogicVRegister& src2,
1194 bool max) {
1195 unsigned lanes = LaneCountFromFormat(vform);
1196 int64_t result[kZRegMaxSizeInBytes];
1197 const LogicVRegister* src = &src1;
1198 for (unsigned j = 0; j < 2; j++) {
1199 for (unsigned i = 0; i < lanes; i += 2) {
1200 int64_t first_val = src->Int(vform, i);
1201 int64_t second_val = src->Int(vform, i + 1);
1202 int64_t dst_val;
1203 if (max) {
1204 dst_val = (first_val > second_val) ? first_val : second_val;
1205 } else {
1206 dst_val = (first_val < second_val) ? first_val : second_val;
1207 }
1208 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1209 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1210 }
1211 src = &src2;
1212 }
1213 dst.SetIntArray(vform, result);
1214 if (IsSVEFormat(vform)) {
1215 interleave_top_bottom(vform, dst, dst);
1216 }
1217 return dst;
1218 }
1219
1220
smaxp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1221 LogicVRegister Simulator::smaxp(VectorFormat vform,
1222 LogicVRegister dst,
1223 const LogicVRegister& src1,
1224 const LogicVRegister& src2) {
1225 return sminmaxp(vform, dst, src1, src2, true);
1226 }
1227
1228
sminp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1229 LogicVRegister Simulator::sminp(VectorFormat vform,
1230 LogicVRegister dst,
1231 const LogicVRegister& src1,
1232 const LogicVRegister& src2) {
1233 return sminmaxp(vform, dst, src1, src2, false);
1234 }
1235
1236
addp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1237 LogicVRegister Simulator::addp(VectorFormat vform,
1238 LogicVRegister dst,
1239 const LogicVRegister& src) {
1240 VIXL_ASSERT(vform == kFormatD);
1241
1242 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1243 dst.ClearForWrite(vform);
1244 dst.SetUint(vform, 0, dst_val);
1245 return dst;
1246 }
1247
1248
addv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1249 LogicVRegister Simulator::addv(VectorFormat vform,
1250 LogicVRegister dst,
1251 const LogicVRegister& src) {
1252 VectorFormat vform_dst =
1253 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1254
1255
1256 int64_t dst_val = 0;
1257 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1258 dst_val += src.Int(vform, i);
1259 }
1260
1261 dst.ClearForWrite(vform_dst);
1262 dst.SetInt(vform_dst, 0, dst_val);
1263 return dst;
1264 }
1265
1266
saddlv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1267 LogicVRegister Simulator::saddlv(VectorFormat vform,
1268 LogicVRegister dst,
1269 const LogicVRegister& src) {
1270 VectorFormat vform_dst =
1271 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1272
1273 int64_t dst_val = 0;
1274 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1275 dst_val += src.Int(vform, i);
1276 }
1277
1278 dst.ClearForWrite(vform_dst);
1279 dst.SetInt(vform_dst, 0, dst_val);
1280 return dst;
1281 }
1282
1283
uaddlv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1284 LogicVRegister Simulator::uaddlv(VectorFormat vform,
1285 LogicVRegister dst,
1286 const LogicVRegister& src) {
1287 VectorFormat vform_dst =
1288 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1289
1290 uint64_t dst_val = 0;
1291 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1292 dst_val += src.Uint(vform, i);
1293 }
1294
1295 dst.ClearForWrite(vform_dst);
1296 dst.SetUint(vform_dst, 0, dst_val);
1297 return dst;
1298 }
1299
1300
sminmaxv(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src, bool max)1301 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1302 LogicVRegister dst,
1303 const LogicPRegister& pg,
1304 const LogicVRegister& src,
1305 bool max) {
1306 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1307 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1308 if (!pg.IsActive(vform, i)) continue;
1309
1310 int64_t src_val = src.Int(vform, i);
1311 if (max) {
1312 dst_val = (src_val > dst_val) ? src_val : dst_val;
1313 } else {
1314 dst_val = (src_val < dst_val) ? src_val : dst_val;
1315 }
1316 }
1317 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1318 dst.SetInt(vform, 0, dst_val);
1319 return dst;
1320 }
1321
1322
smaxv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1323 LogicVRegister Simulator::smaxv(VectorFormat vform,
1324 LogicVRegister dst,
1325 const LogicVRegister& src) {
1326 sminmaxv(vform, dst, GetPTrue(), src, true);
1327 return dst;
1328 }
1329
1330
sminv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1331 LogicVRegister Simulator::sminv(VectorFormat vform,
1332 LogicVRegister dst,
1333 const LogicVRegister& src) {
1334 sminmaxv(vform, dst, GetPTrue(), src, false);
1335 return dst;
1336 }
1337
1338
smaxv(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src)1339 LogicVRegister Simulator::smaxv(VectorFormat vform,
1340 LogicVRegister dst,
1341 const LogicPRegister& pg,
1342 const LogicVRegister& src) {
1343 VIXL_ASSERT(IsSVEFormat(vform));
1344 sminmaxv(vform, dst, pg, src, true);
1345 return dst;
1346 }
1347
1348
sminv(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src)1349 LogicVRegister Simulator::sminv(VectorFormat vform,
1350 LogicVRegister dst,
1351 const LogicPRegister& pg,
1352 const LogicVRegister& src) {
1353 VIXL_ASSERT(IsSVEFormat(vform));
1354 sminmaxv(vform, dst, pg, src, false);
1355 return dst;
1356 }
1357
1358
uminmax(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, bool max)1359 LogicVRegister Simulator::uminmax(VectorFormat vform,
1360 LogicVRegister dst,
1361 const LogicVRegister& src1,
1362 const LogicVRegister& src2,
1363 bool max) {
1364 dst.ClearForWrite(vform);
1365 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1366 uint64_t src1_val = src1.Uint(vform, i);
1367 uint64_t src2_val = src2.Uint(vform, i);
1368 uint64_t dst_val;
1369 if (max) {
1370 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1371 } else {
1372 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1373 }
1374 dst.SetUint(vform, i, dst_val);
1375 }
1376 return dst;
1377 }
1378
1379
umax(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1380 LogicVRegister Simulator::umax(VectorFormat vform,
1381 LogicVRegister dst,
1382 const LogicVRegister& src1,
1383 const LogicVRegister& src2) {
1384 return uminmax(vform, dst, src1, src2, true);
1385 }
1386
1387
umin(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1388 LogicVRegister Simulator::umin(VectorFormat vform,
1389 LogicVRegister dst,
1390 const LogicVRegister& src1,
1391 const LogicVRegister& src2) {
1392 return uminmax(vform, dst, src1, src2, false);
1393 }
1394
1395
uminmaxp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, bool max)1396 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1397 LogicVRegister dst,
1398 const LogicVRegister& src1,
1399 const LogicVRegister& src2,
1400 bool max) {
1401 unsigned lanes = LaneCountFromFormat(vform);
1402 uint64_t result[kZRegMaxSizeInBytes];
1403 const LogicVRegister* src = &src1;
1404 for (unsigned j = 0; j < 2; j++) {
1405 for (unsigned i = 0; i < lanes; i += 2) {
1406 uint64_t first_val = src->Uint(vform, i);
1407 uint64_t second_val = src->Uint(vform, i + 1);
1408 uint64_t dst_val;
1409 if (max) {
1410 dst_val = (first_val > second_val) ? first_val : second_val;
1411 } else {
1412 dst_val = (first_val < second_val) ? first_val : second_val;
1413 }
1414 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1415 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1416 }
1417 src = &src2;
1418 }
1419 dst.SetUintArray(vform, result);
1420 if (IsSVEFormat(vform)) {
1421 interleave_top_bottom(vform, dst, dst);
1422 }
1423 return dst;
1424 }
1425
1426
umaxp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1427 LogicVRegister Simulator::umaxp(VectorFormat vform,
1428 LogicVRegister dst,
1429 const LogicVRegister& src1,
1430 const LogicVRegister& src2) {
1431 return uminmaxp(vform, dst, src1, src2, true);
1432 }
1433
1434
uminp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2)1435 LogicVRegister Simulator::uminp(VectorFormat vform,
1436 LogicVRegister dst,
1437 const LogicVRegister& src1,
1438 const LogicVRegister& src2) {
1439 return uminmaxp(vform, dst, src1, src2, false);
1440 }
1441
1442
uminmaxv(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src, bool max)1443 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1444 LogicVRegister dst,
1445 const LogicPRegister& pg,
1446 const LogicVRegister& src,
1447 bool max) {
1448 uint64_t dst_val = max ? 0 : UINT64_MAX;
1449 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1450 if (!pg.IsActive(vform, i)) continue;
1451
1452 uint64_t src_val = src.Uint(vform, i);
1453 if (max) {
1454 dst_val = (src_val > dst_val) ? src_val : dst_val;
1455 } else {
1456 dst_val = (src_val < dst_val) ? src_val : dst_val;
1457 }
1458 }
1459 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1460 dst.SetUint(vform, 0, dst_val);
1461 return dst;
1462 }
1463
1464
umaxv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1465 LogicVRegister Simulator::umaxv(VectorFormat vform,
1466 LogicVRegister dst,
1467 const LogicVRegister& src) {
1468 uminmaxv(vform, dst, GetPTrue(), src, true);
1469 return dst;
1470 }
1471
1472
uminv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1473 LogicVRegister Simulator::uminv(VectorFormat vform,
1474 LogicVRegister dst,
1475 const LogicVRegister& src) {
1476 uminmaxv(vform, dst, GetPTrue(), src, false);
1477 return dst;
1478 }
1479
1480
umaxv(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src)1481 LogicVRegister Simulator::umaxv(VectorFormat vform,
1482 LogicVRegister dst,
1483 const LogicPRegister& pg,
1484 const LogicVRegister& src) {
1485 VIXL_ASSERT(IsSVEFormat(vform));
1486 uminmaxv(vform, dst, pg, src, true);
1487 return dst;
1488 }
1489
1490
uminv(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src)1491 LogicVRegister Simulator::uminv(VectorFormat vform,
1492 LogicVRegister dst,
1493 const LogicPRegister& pg,
1494 const LogicVRegister& src) {
1495 VIXL_ASSERT(IsSVEFormat(vform));
1496 uminmaxv(vform, dst, pg, src, false);
1497 return dst;
1498 }
1499
1500
shl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift)1501 LogicVRegister Simulator::shl(VectorFormat vform,
1502 LogicVRegister dst,
1503 const LogicVRegister& src,
1504 int shift) {
1505 VIXL_ASSERT(shift >= 0);
1506 SimVRegister temp;
1507 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1508 return ushl(vform, dst, src, shiftreg);
1509 }
1510
1511
sshll(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift)1512 LogicVRegister Simulator::sshll(VectorFormat vform,
1513 LogicVRegister dst,
1514 const LogicVRegister& src,
1515 int shift) {
1516 VIXL_ASSERT(shift >= 0);
1517 SimVRegister temp1, temp2;
1518 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1519 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1520 return sshl(vform, dst, extendedreg, shiftreg);
1521 }
1522
1523
sshll2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift)1524 LogicVRegister Simulator::sshll2(VectorFormat vform,
1525 LogicVRegister dst,
1526 const LogicVRegister& src,
1527 int shift) {
1528 VIXL_ASSERT(shift >= 0);
1529 SimVRegister temp1, temp2;
1530 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1531 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1532 return sshl(vform, dst, extendedreg, shiftreg);
1533 }
1534
1535
shll(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1536 LogicVRegister Simulator::shll(VectorFormat vform,
1537 LogicVRegister dst,
1538 const LogicVRegister& src) {
1539 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1540 return sshll(vform, dst, src, shift);
1541 }
1542
1543
shll2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src)1544 LogicVRegister Simulator::shll2(VectorFormat vform,
1545 LogicVRegister dst,
1546 const LogicVRegister& src) {
1547 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1548 return sshll2(vform, dst, src, shift);
1549 }
1550
1551
ushll(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift)1552 LogicVRegister Simulator::ushll(VectorFormat vform,
1553 LogicVRegister dst,
1554 const LogicVRegister& src,
1555 int shift) {
1556 VIXL_ASSERT(shift >= 0);
1557 SimVRegister temp1, temp2;
1558 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1559 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1560 return ushl(vform, dst, extendedreg, shiftreg);
1561 }
1562
1563
ushll2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift)1564 LogicVRegister Simulator::ushll2(VectorFormat vform,
1565 LogicVRegister dst,
1566 const LogicVRegister& src,
1567 int shift) {
1568 VIXL_ASSERT(shift >= 0);
1569 SimVRegister temp1, temp2;
1570 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1571 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1572 return ushl(vform, dst, extendedreg, shiftreg);
1573 }
1574
clast(VectorFormat vform, const LogicPRegister& pg, const LogicVRegister& src, int offset_from_last_active)1575 std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,
1576 const LogicPRegister& pg,
1577 const LogicVRegister& src,
1578 int offset_from_last_active) {
1579 // Untested for any other values.
1580 VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));
1581
1582 int last_active = GetLastActive(vform, pg);
1583 int lane_count = LaneCountFromFormat(vform);
1584 int index =
1585 ((last_active + offset_from_last_active) + lane_count) % lane_count;
1586 return std::make_pair(last_active >= 0, src.Uint(vform, index));
1587 }
1588
compact(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src)1589 LogicVRegister Simulator::compact(VectorFormat vform,
1590 LogicVRegister dst,
1591 const LogicPRegister& pg,
1592 const LogicVRegister& src) {
1593 int j = 0;
1594 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1595 if (pg.IsActive(vform, i)) {
1596 dst.SetUint(vform, j++, src.Uint(vform, i));
1597 }
1598 }
1599 for (; j < LaneCountFromFormat(vform); j++) {
1600 dst.SetUint(vform, j, 0);
1601 }
1602 return dst;
1603 }
1604
splice(VectorFormat vform, LogicVRegister dst, const LogicPRegister& pg, const LogicVRegister& src1, const LogicVRegister& src2)1605 LogicVRegister Simulator::splice(VectorFormat vform,
1606 LogicVRegister dst,
1607 const LogicPRegister& pg,
1608 const LogicVRegister& src1,
1609 const LogicVRegister& src2) {
1610 int lane_count = LaneCountFromFormat(vform);
1611 int first_active = GetFirstActive(vform, pg);
1612 int last_active = GetLastActive(vform, pg);
1613 int dst_idx = 0;
1614 uint64_t result[kZRegMaxSizeInBytes];
1615
1616 if (first_active >= 0) {
1617 VIXL_ASSERT(last_active >= first_active);
1618 VIXL_ASSERT(last_active < lane_count);
1619 for (int i = first_active; i <= last_active; i++) {
1620 result[dst_idx++] = src1.Uint(vform, i);
1621 }
1622 }
1623
1624 VIXL_ASSERT(dst_idx <= lane_count);
1625 for (int i = dst_idx; i < lane_count; i++) {
1626 result[i] = src2.Uint(vform, i - dst_idx);
1627 }
1628
1629 dst.SetUintArray(vform, result);
1630
1631 return dst;
1632 }
1633
1634 LogicVRegister Simulator::sel(VectorFormat vform,
1635 LogicVRegister dst,
1636 const SimPRegister& pg,
1637 const LogicVRegister& src1,
1638 const LogicVRegister& src2) {
1639 int p_reg_bits_per_lane =
1640 LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;
1641 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
1642 uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)
1643 ? src1.Uint(vform, lane)
1644 : src2.Uint(vform, lane);
1645 dst.SetUint(vform, lane, lane_value);
1646 }
1647 return dst;
1648 }
1649
1650
1651 LogicPRegister Simulator::sel(LogicPRegister dst,
1652 const LogicPRegister& pg,
1653 const LogicPRegister& src1,
1654 const LogicPRegister& src2) {
1655 for (int i = 0; i < dst.GetChunkCount(); i++) {
1656 LogicPRegister::ChunkType mask = pg.GetChunk(i);
1657 LogicPRegister::ChunkType result =
1658 (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));
1659 dst.SetChunk(i, result);
1660 }
1661 return dst;
1662 }
1663
1664
1665 LogicVRegister Simulator::sli(VectorFormat vform,
1666 LogicVRegister dst,
1667 const LogicVRegister& src,
1668 int shift) {
1669 dst.ClearForWrite(vform);
1670 int lane_count = LaneCountFromFormat(vform);
1671 for (int i = 0; i < lane_count; i++) {
1672 uint64_t src_lane = src.Uint(vform, i);
1673 uint64_t dst_lane = dst.Uint(vform, i);
1674 uint64_t shifted = src_lane << shift;
1675 uint64_t mask = MaxUintFromFormat(vform) << shift;
1676 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1677 }
1678 return dst;
1679 }
1680
1681
1682 LogicVRegister Simulator::sqshl(VectorFormat vform,
1683 LogicVRegister dst,
1684 const LogicVRegister& src,
1685 int shift) {
1686 VIXL_ASSERT(shift >= 0);
1687 SimVRegister temp;
1688 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1689 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1690 }
1691
1692
1693 LogicVRegister Simulator::uqshl(VectorFormat vform,
1694 LogicVRegister dst,
1695 const LogicVRegister& src,
1696 int shift) {
1697 VIXL_ASSERT(shift >= 0);
1698 SimVRegister temp;
1699 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1700 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1701 }
1702
1703
1704 LogicVRegister Simulator::sqshlu(VectorFormat vform,
1705 LogicVRegister dst,
1706 const LogicVRegister& src,
1707 int shift) {
1708 VIXL_ASSERT(shift >= 0);
1709 SimVRegister temp;
1710 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1711 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1712 }
1713
1714
1715 LogicVRegister Simulator::sri(VectorFormat vform,
1716 LogicVRegister dst,
1717 const LogicVRegister& src,
1718 int shift) {
1719 dst.ClearForWrite(vform);
1720 int lane_count = LaneCountFromFormat(vform);
1721 VIXL_ASSERT((shift > 0) &&
1722 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1723 for (int i = 0; i < lane_count; i++) {
1724 uint64_t src_lane = src.Uint(vform, i);
1725 uint64_t dst_lane = dst.Uint(vform, i);
1726 uint64_t shifted;
1727 uint64_t mask;
1728 if (shift == 64) {
1729 shifted = 0;
1730 mask = 0;
1731 } else {
1732 shifted = src_lane >> shift;
1733 mask = MaxUintFromFormat(vform) >> shift;
1734 }
1735 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1736 }
1737 return dst;
1738 }
1739
1740
1741 LogicVRegister Simulator::ushr(VectorFormat vform,
1742 LogicVRegister dst,
1743 const LogicVRegister& src,
1744 int shift) {
1745 VIXL_ASSERT(shift >= 0);
1746 SimVRegister temp;
1747 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1748 return ushl(vform, dst, src, shiftreg);
1749 }
1750
1751
1752 LogicVRegister Simulator::sshr(VectorFormat vform,
1753 LogicVRegister dst,
1754 const LogicVRegister& src,
1755 int shift) {
1756 VIXL_ASSERT(shift >= 0);
1757 SimVRegister temp;
1758 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1759 return sshl(vform, dst, src, shiftreg);
1760 }
1761
1762
1763 LogicVRegister Simulator::ssra(VectorFormat vform,
1764 LogicVRegister dst,
1765 const LogicVRegister& src,
1766 int shift) {
1767 SimVRegister temp;
1768 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1769 return add(vform, dst, dst, shifted_reg);
1770 }
1771
1772
1773 LogicVRegister Simulator::usra(VectorFormat vform,
1774 LogicVRegister dst,
1775 const LogicVRegister& src,
1776 int shift) {
1777 SimVRegister temp;
1778 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1779 return add(vform, dst, dst, shifted_reg);
1780 }
1781
1782
1783 LogicVRegister Simulator::srsra(VectorFormat vform,
1784 LogicVRegister dst,
1785 const LogicVRegister& src,
1786 int shift) {
1787 SimVRegister temp;
1788 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1789 return add(vform, dst, dst, shifted_reg);
1790 }
1791
1792
1793 LogicVRegister Simulator::ursra(VectorFormat vform,
1794 LogicVRegister dst,
1795 const LogicVRegister& src,
1796 int shift) {
1797 SimVRegister temp;
1798 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1799 return add(vform, dst, dst, shifted_reg);
1800 }
1801
1802
1803 LogicVRegister Simulator::cls(VectorFormat vform,
1804 LogicVRegister dst,
1805 const LogicVRegister& src) {
1806 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1807 int lane_count = LaneCountFromFormat(vform);
1808
1809 // Ensure that we can store one result per lane.
1810 int result[kZRegMaxSizeInBytes];
1811
1812 for (int i = 0; i < lane_count; i++) {
1813 result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);
1814 }
1815
1816 dst.ClearForWrite(vform);
1817 for (int i = 0; i < lane_count; ++i) {
1818 dst.SetUint(vform, i, result[i]);
1819 }
1820 return dst;
1821 }
1822
1823
1824 LogicVRegister Simulator::clz(VectorFormat vform,
1825 LogicVRegister dst,
1826 const LogicVRegister& src) {
1827 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1828 int lane_count = LaneCountFromFormat(vform);
1829
1830 // Ensure that we can store one result per lane.
1831 int result[kZRegMaxSizeInBytes];
1832
1833 for (int i = 0; i < lane_count; i++) {
1834 result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);
1835 }
1836
1837 dst.ClearForWrite(vform);
1838 for (int i = 0; i < lane_count; ++i) {
1839 dst.SetUint(vform, i, result[i]);
1840 }
1841 return dst;
1842 }
1843
1844
1845 LogicVRegister Simulator::cnot(VectorFormat vform,
1846 LogicVRegister dst,
1847 const LogicVRegister& src) {
1848 dst.ClearForWrite(vform);
1849 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1850 uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;
1851 dst.SetUint(vform, i, value);
1852 }
1853 return dst;
1854 }
1855
1856
1857 LogicVRegister Simulator::cnt(VectorFormat vform,
1858 LogicVRegister dst,
1859 const LogicVRegister& src) {
1860 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1861 int lane_count = LaneCountFromFormat(vform);
1862
1863 // Ensure that we can store one result per lane.
1864 int result[kZRegMaxSizeInBytes];
1865
1866 for (int i = 0; i < lane_count; i++) {
1867 result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);
1868 }
1869
1870 dst.ClearForWrite(vform);
1871 for (int i = 0; i < lane_count; ++i) {
1872 dst.SetUint(vform, i, result[i]);
1873 }
1874 return dst;
1875 }
1876
1877 static int64_t CalculateSignedShiftDistance(int64_t shift_val,
1878 int esize,
1879 bool shift_in_ls_byte) {
1880 if (shift_in_ls_byte) {
1881 // Neon uses the least-significant byte of the lane as the shift distance.
1882 shift_val = ExtractSignedBitfield64(7, 0, shift_val);
1883 } else {
1884 // SVE uses a saturated shift distance in the range
1885 // -(esize + 1) ... (esize + 1).
1886 if (shift_val > (esize + 1)) shift_val = esize + 1;
1887 if (shift_val < -(esize + 1)) shift_val = -(esize + 1);
1888 }
1889 return shift_val;
1890 }
1891
1892 LogicVRegister Simulator::sshl(VectorFormat vform,
1893 LogicVRegister dst,
1894 const LogicVRegister& src1,
1895 const LogicVRegister& src2,
1896 bool shift_in_ls_byte) {
1897 dst.ClearForWrite(vform);
1898 int esize = LaneSizeInBitsFromFormat(vform);
1899 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1900 int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1901 esize,
1902 shift_in_ls_byte);
1903
1904 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1905
1906 // Set signed saturation state.
1907 if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1908 dst.SetSignedSat(i, lj_src_val >= 0);
1909 }
1910
1911 // Set unsigned saturation state.
1912 if (lj_src_val < 0) {
1913 dst.SetUnsignedSat(i, false);
1914 } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1915 (lj_src_val != 0)) {
1916 dst.SetUnsignedSat(i, true);
1917 }
1918
1919 int64_t src_val = src1.Int(vform, i);
1920 bool src_is_negative = src_val < 0;
1921 if (shift_val > 63) {
1922 dst.SetInt(vform, i, 0);
1923 } else if (shift_val < -63) {
1924 dst.SetRounding(i, src_is_negative);
1925 dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1926 } else {
1927 // Use unsigned types for shifts, as behaviour is undefined for signed
1928 // lhs.
1929 uint64_t usrc_val = static_cast<uint64_t>(src_val);
1930
1931 if (shift_val < 0) {
1932 // Convert to right shift.
1933 shift_val = -shift_val;
1934
1935 // Set rounding state by testing most-significant bit shifted out.
1936 // Rounding only needed on right shifts.
1937 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1938 dst.SetRounding(i, true);
1939 }
1940
1941 usrc_val >>= shift_val;
1942
1943 if (src_is_negative) {
1944 // Simulate sign-extension.
1945 usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1946 }
1947 } else {
1948 usrc_val <<= shift_val;
1949 }
1950 dst.SetUint(vform, i, usrc_val);
1951 }
1952 }
1953 return dst;
1954 }
1955
1956
1957 LogicVRegister Simulator::ushl(VectorFormat vform,
1958 LogicVRegister dst,
1959 const LogicVRegister& src1,
1960 const LogicVRegister& src2,
1961 bool shift_in_ls_byte) {
1962 dst.ClearForWrite(vform);
1963 int esize = LaneSizeInBitsFromFormat(vform);
1964 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1965 int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1966 esize,
1967 shift_in_ls_byte);
1968
1969 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1970
1971 // Set saturation state.
1972 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1973 dst.SetUnsignedSat(i, true);
1974 }
1975
1976 uint64_t src_val = src1.Uint(vform, i);
1977 if ((shift_val > 63) || (shift_val < -64)) {
1978 dst.SetUint(vform, i, 0);
1979 } else {
1980 if (shift_val < 0) {
1981 // Set rounding state. Rounding only needed on right shifts.
1982 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1983 dst.SetRounding(i, true);
1984 }
1985
1986 if (shift_val == -64) {
1987 src_val = 0;
1988 } else {
1989 src_val >>= -shift_val;
1990 }
1991 } else {
1992 src_val <<= shift_val;
1993 }
1994 dst.SetUint(vform, i, src_val);
1995 }
1996 }
1997 return dst;
1998 }
1999
2000 LogicVRegister Simulator::sshr(VectorFormat vform,
2001 LogicVRegister dst,
2002 const LogicVRegister& src1,
2003 const LogicVRegister& src2) {
2004 SimVRegister temp;
2005 // Saturate to sidestep the min-int problem.
2006 neg(vform, temp, src2).SignedSaturate(vform);
2007 sshl(vform, dst, src1, temp, false);
2008 return dst;
2009 }
2010
2011 LogicVRegister Simulator::ushr(VectorFormat vform,
2012 LogicVRegister dst,
2013 const LogicVRegister& src1,
2014 const LogicVRegister& src2) {
2015 SimVRegister temp;
2016 // Saturate to sidestep the min-int problem.
2017 neg(vform, temp, src2).SignedSaturate(vform);
2018 ushl(vform, dst, src1, temp, false);
2019 return dst;
2020 }
2021
2022 LogicVRegister Simulator::neg(VectorFormat vform,
2023 LogicVRegister dst,
2024 const LogicVRegister& src) {
2025 dst.ClearForWrite(vform);
2026 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2027 // Test for signed saturation.
2028 int64_t sa = src.Int(vform, i);
2029 if (sa == MinIntFromFormat(vform)) {
2030 dst.SetSignedSat(i, true);
2031 }
2032 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2033 }
2034 return dst;
2035 }
2036
2037
2038 LogicVRegister Simulator::suqadd(VectorFormat vform,
2039 LogicVRegister dst,
2040 const LogicVRegister& src1,
2041 const LogicVRegister& src2) {
2042 dst.ClearForWrite(vform);
2043 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2044 int64_t sa = src1.IntLeftJustified(vform, i);
2045 uint64_t ub = src2.UintLeftJustified(vform, i);
2046 uint64_t ur = sa + ub;
2047
2048 int64_t sr;
2049 memcpy(&sr, &ur, sizeof(sr));
2050 if (sr < sa) { // Test for signed positive saturation.
2051 dst.SetInt(vform, i, MaxIntFromFormat(vform));
2052 } else {
2053 dst.SetUint(vform, i, src1.Int(vform, i) + src2.Uint(vform, i));
2054 }
2055 }
2056 return dst;
2057 }
2058
2059
2060 LogicVRegister Simulator::usqadd(VectorFormat vform,
2061 LogicVRegister dst,
2062 const LogicVRegister& src1,
2063 const LogicVRegister& src2) {
2064 dst.ClearForWrite(vform);
2065 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2066 uint64_t ua = src1.UintLeftJustified(vform, i);
2067 int64_t sb = src2.IntLeftJustified(vform, i);
2068 uint64_t ur = ua + sb;
2069
2070 if ((sb > 0) && (ur <= ua)) {
2071 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
2072 } else if ((sb < 0) && (ur >= ua)) {
2073 dst.SetUint(vform, i, 0); // Negative saturation.
2074 } else {
2075 dst.SetUint(vform, i, src1.Uint(vform, i) + src2.Int(vform, i));
2076 }
2077 }
2078 return dst;
2079 }
2080
2081
2082 LogicVRegister Simulator::abs(VectorFormat vform,
2083 LogicVRegister dst,
2084 const LogicVRegister& src) {
2085 dst.ClearForWrite(vform);
2086 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2087 // Test for signed saturation.
2088 int64_t sa = src.Int(vform, i);
2089 if (sa == MinIntFromFormat(vform)) {
2090 dst.SetSignedSat(i, true);
2091 }
2092 if (sa < 0) {
2093 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2094 } else {
2095 dst.SetInt(vform, i, sa);
2096 }
2097 }
2098 return dst;
2099 }
2100
2101
2102 LogicVRegister Simulator::andv(VectorFormat vform,
2103 LogicVRegister dst,
2104 const LogicPRegister& pg,
2105 const LogicVRegister& src) {
2106 VIXL_ASSERT(IsSVEFormat(vform));
2107 uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));
2108 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2109 if (!pg.IsActive(vform, i)) continue;
2110
2111 result &= src.Uint(vform, i);
2112 }
2113 VectorFormat vform_dst =
2114 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2115 dst.ClearForWrite(vform_dst);
2116 dst.SetUint(vform_dst, 0, result);
2117 return dst;
2118 }
2119
2120
2121 LogicVRegister Simulator::eorv(VectorFormat vform,
2122 LogicVRegister dst,
2123 const LogicPRegister& pg,
2124 const LogicVRegister& src) {
2125 VIXL_ASSERT(IsSVEFormat(vform));
2126 uint64_t result = 0;
2127 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2128 if (!pg.IsActive(vform, i)) continue;
2129
2130 result ^= src.Uint(vform, i);
2131 }
2132 VectorFormat vform_dst =
2133 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2134 dst.ClearForWrite(vform_dst);
2135 dst.SetUint(vform_dst, 0, result);
2136 return dst;
2137 }
2138
2139
2140 LogicVRegister Simulator::orv(VectorFormat vform,
2141 LogicVRegister dst,
2142 const LogicPRegister& pg,
2143 const LogicVRegister& src) {
2144 VIXL_ASSERT(IsSVEFormat(vform));
2145 uint64_t result = 0;
2146 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2147 if (!pg.IsActive(vform, i)) continue;
2148
2149 result |= src.Uint(vform, i);
2150 }
2151 VectorFormat vform_dst =
2152 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2153 dst.ClearForWrite(vform_dst);
2154 dst.SetUint(vform_dst, 0, result);
2155 return dst;
2156 }
2157
2158
2159 LogicVRegister Simulator::saddv(VectorFormat vform,
2160 LogicVRegister dst,
2161 const LogicPRegister& pg,
2162 const LogicVRegister& src) {
2163 VIXL_ASSERT(IsSVEFormat(vform));
2164 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);
2165 int64_t result = 0;
2166 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2167 if (!pg.IsActive(vform, i)) continue;
2168
2169 // The destination register always has D-lane sizes and the source register
2170 // always has S-lanes or smaller, so signed integer overflow -- undefined
2171 // behaviour -- can't occur.
2172 result += src.Int(vform, i);
2173 }
2174
2175 dst.ClearForWrite(kFormatD);
2176 dst.SetInt(kFormatD, 0, result);
2177 return dst;
2178 }
2179
2180
2181 LogicVRegister Simulator::uaddv(VectorFormat vform,
2182 LogicVRegister dst,
2183 const LogicPRegister& pg,
2184 const LogicVRegister& src) {
2185 VIXL_ASSERT(IsSVEFormat(vform));
2186 uint64_t result = 0;
2187 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2188 if (!pg.IsActive(vform, i)) continue;
2189
2190 result += src.Uint(vform, i);
2191 }
2192
2193 dst.ClearForWrite(kFormatD);
2194 dst.SetUint(kFormatD, 0, result);
2195 return dst;
2196 }
2197
2198
2199 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2200 LogicVRegister dst,
2201 bool dst_is_signed,
2202 const LogicVRegister& src,
2203 bool src_is_signed) {
2204 bool upperhalf = false;
2205 VectorFormat srcform = dstform;
2206 if ((dstform == kFormat16B) || (dstform == kFormat8H) ||
2207 (dstform == kFormat4S)) {
2208 upperhalf = true;
2209 srcform = VectorFormatHalfLanes(srcform);
2210 }
2211 srcform = VectorFormatDoubleWidth(srcform);
2212
2213 LogicVRegister src_copy = src;
2214
2215 int offset;
2216 if (upperhalf) {
2217 offset = LaneCountFromFormat(dstform) / 2;
2218 } else {
2219 offset = 0;
2220 }
2221
2222 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2223 int64_t ssrc = src_copy.Int(srcform, i);
2224 uint64_t usrc = src_copy.Uint(srcform, i);
2225
2226 // Test for signed saturation
2227 if (ssrc > MaxIntFromFormat(dstform)) {
2228 dst.SetSignedSat(offset + i, true);
2229 } else if (ssrc < MinIntFromFormat(dstform)) {
2230 dst.SetSignedSat(offset + i, false);
2231 }
2232
2233 // Test for unsigned saturation
2234 if (src_is_signed) {
2235 if (ssrc > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2236 dst.SetUnsignedSat(offset + i, true);
2237 } else if (ssrc < 0) {
2238 dst.SetUnsignedSat(offset + i, false);
2239 }
2240 } else {
2241 if (usrc > MaxUintFromFormat(dstform)) {
2242 dst.SetUnsignedSat(offset + i, true);
2243 }
2244 }
2245
2246 int64_t result;
2247 if (src_is_signed) {
2248 result = ssrc & MaxUintFromFormat(dstform);
2249 } else {
2250 result = usrc & MaxUintFromFormat(dstform);
2251 }
2252
2253 if (dst_is_signed) {
2254 dst.SetInt(dstform, offset + i, result);
2255 } else {
2256 dst.SetUint(dstform, offset + i, result);
2257 }
2258 }
2259
2260 if (!upperhalf) {
2261 dst.ClearForWrite(dstform);
2262 }
2263 return dst;
2264 }
2265
2266
2267 LogicVRegister Simulator::xtn(VectorFormat vform,
2268 LogicVRegister dst,
2269 const LogicVRegister& src) {
2270 return extractnarrow(vform, dst, true, src, true);
2271 }
2272
2273
2274 LogicVRegister Simulator::sqxtn(VectorFormat vform,
2275 LogicVRegister dst,
2276 const LogicVRegister& src) {
2277 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2278 }
2279
2280
2281 LogicVRegister Simulator::sqxtun(VectorFormat vform,
2282 LogicVRegister dst,
2283 const LogicVRegister& src) {
2284 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2285 }
2286
2287
2288 LogicVRegister Simulator::uqxtn(VectorFormat vform,
2289 LogicVRegister dst,
2290 const LogicVRegister& src) {
2291 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2292 }
2293
2294
2295 LogicVRegister Simulator::absdiff(VectorFormat vform,
2296 LogicVRegister dst,
2297 const LogicVRegister& src1,
2298 const LogicVRegister& src2,
2299 bool is_signed) {
2300 dst.ClearForWrite(vform);
2301 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2302 bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
2303 : (src1.Uint(vform, i) > src2.Uint(vform, i));
2304 // Always calculate the answer using unsigned arithmetic, to avoid
2305 // implementation-defined signed overflow.
2306 if (src1_gt_src2) {
2307 dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
2308 } else {
2309 dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));
2310 }
2311 }
2312 return dst;
2313 }
2314
2315
2316 LogicVRegister Simulator::saba(VectorFormat vform,
2317 LogicVRegister dst,
2318 const LogicVRegister& src1,
2319 const LogicVRegister& src2) {
2320 SimVRegister temp;
2321 dst.ClearForWrite(vform);
2322 absdiff(vform, temp, src1, src2, true);
2323 add(vform, dst, dst, temp);
2324 return dst;
2325 }
2326
2327
2328 LogicVRegister Simulator::uaba(VectorFormat vform,
2329 LogicVRegister dst,
2330 const LogicVRegister& src1,
2331 const LogicVRegister& src2) {
2332 SimVRegister temp;
2333 dst.ClearForWrite(vform);
2334 absdiff(vform, temp, src1, src2, false);
2335 add(vform, dst, dst, temp);
2336 return dst;
2337 }
2338
2339
2340 LogicVRegister Simulator::not_(VectorFormat vform,
2341 LogicVRegister dst,
2342 const LogicVRegister& src) {
2343 dst.ClearForWrite(vform);
2344 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2345 dst.SetUint(vform, i, ~src.Uint(vform, i));
2346 }
2347 return dst;
2348 }
2349
2350
2351 LogicVRegister Simulator::rbit(VectorFormat vform,
2352 LogicVRegister dst,
2353 const LogicVRegister& src) {
2354 uint64_t result[kZRegMaxSizeInBytes];
2355 int lane_count = LaneCountFromFormat(vform);
2356 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2357 uint64_t reversed_value;
2358 uint64_t value;
2359 for (int i = 0; i < lane_count; i++) {
2360 value = src.Uint(vform, i);
2361 reversed_value = 0;
2362 for (int j = 0; j < lane_size_in_bits; j++) {
2363 reversed_value = (reversed_value << 1) | (value & 1);
2364 value >>= 1;
2365 }
2366 result[i] = reversed_value;
2367 }
2368
2369 dst.ClearForWrite(vform);
2370 for (int i = 0; i < lane_count; ++i) {
2371 dst.SetUint(vform, i, result[i]);
2372 }
2373 return dst;
2374 }
2375
2376
2377 LogicVRegister Simulator::rev(VectorFormat vform,
2378 LogicVRegister dst,
2379 const LogicVRegister& src) {
2380 VIXL_ASSERT(IsSVEFormat(vform));
2381 int lane_count = LaneCountFromFormat(vform);
2382 for (int i = 0; i < lane_count / 2; i++) {
2383 uint64_t t = src.Uint(vform, i);
2384 dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));
2385 dst.SetUint(vform, lane_count - i - 1, t);
2386 }
2387 return dst;
2388 }
2389
2390
2391 LogicVRegister Simulator::rev_byte(VectorFormat vform,
2392 LogicVRegister dst,
2393 const LogicVRegister& src,
2394 int rev_size) {
2395 uint64_t result[kZRegMaxSizeInBytes] = {};
2396 int lane_count = LaneCountFromFormat(vform);
2397 int lane_size = LaneSizeInBytesFromFormat(vform);
2398 int lanes_per_loop = rev_size / lane_size;
2399 for (int i = 0; i < lane_count; i += lanes_per_loop) {
2400 for (int j = 0; j < lanes_per_loop; j++) {
2401 result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);
2402 }
2403 }
2404 dst.ClearForWrite(vform);
2405 for (int i = 0; i < lane_count; ++i) {
2406 dst.SetUint(vform, i, result[i]);
2407 }
2408 return dst;
2409 }
2410
2411
2412 LogicVRegister Simulator::rev16(VectorFormat vform,
2413 LogicVRegister dst,
2414 const LogicVRegister& src) {
2415 return rev_byte(vform, dst, src, 2);
2416 }
2417
2418
2419 LogicVRegister Simulator::rev32(VectorFormat vform,
2420 LogicVRegister dst,
2421 const LogicVRegister& src) {
2422 return rev_byte(vform, dst, src, 4);
2423 }
2424
2425
2426 LogicVRegister Simulator::rev64(VectorFormat vform,
2427 LogicVRegister dst,
2428 const LogicVRegister& src) {
2429 return rev_byte(vform, dst, src, 8);
2430 }
2431
2432 LogicVRegister Simulator::addlp(VectorFormat vform,
2433 LogicVRegister dst,
2434 const LogicVRegister& src,
2435 bool is_signed,
2436 bool do_accumulate) {
2437 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2438 VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= kSRegSize);
2439
2440 uint64_t result[kZRegMaxSizeInBytes];
2441 int lane_count = LaneCountFromFormat(vform);
2442 for (int i = 0; i < lane_count; i++) {
2443 if (is_signed) {
2444 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2445 src.Int(vformsrc, 2 * i + 1));
2446 } else {
2447 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2448 }
2449 }
2450
2451 dst.ClearForWrite(vform);
2452 for (int i = 0; i < lane_count; ++i) {
2453 if (do_accumulate) {
2454 result[i] += dst.Uint(vform, i);
2455 }
2456 dst.SetUint(vform, i, result[i]);
2457 }
2458
2459 return dst;
2460 }
2461
2462
2463 LogicVRegister Simulator::saddlp(VectorFormat vform,
2464 LogicVRegister dst,
2465 const LogicVRegister& src) {
2466 return addlp(vform, dst, src, true, false);
2467 }
2468
2469
2470 LogicVRegister Simulator::uaddlp(VectorFormat vform,
2471 LogicVRegister dst,
2472 const LogicVRegister& src) {
2473 return addlp(vform, dst, src, false, false);
2474 }
2475
2476
2477 LogicVRegister Simulator::sadalp(VectorFormat vform,
2478 LogicVRegister dst,
2479 const LogicVRegister& src) {
2480 return addlp(vform, dst, src, true, true);
2481 }
2482
2483
2484 LogicVRegister Simulator::uadalp(VectorFormat vform,
2485 LogicVRegister dst,
2486 const LogicVRegister& src) {
2487 return addlp(vform, dst, src, false, true);
2488 }
2489
2490 LogicVRegister Simulator::ror(VectorFormat vform,
2491 LogicVRegister dst,
2492 const LogicVRegister& src,
2493 int rotation) {
2494 int width = LaneSizeInBitsFromFormat(vform);
2495 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2496 uint64_t value = src.Uint(vform, i);
2497 dst.SetUint(vform, i, RotateRight(value, rotation, width));
2498 }
2499 return dst;
2500 }
2501
2502 LogicVRegister Simulator::ext(VectorFormat vform,
2503 LogicVRegister dst,
2504 const LogicVRegister& src1,
2505 const LogicVRegister& src2,
2506 int index) {
2507 uint8_t result[kZRegMaxSizeInBytes] = {};
2508 int lane_count = LaneCountFromFormat(vform);
2509 for (int i = 0; i < lane_count - index; ++i) {
2510 result[i] = src1.Uint(vform, i + index);
2511 }
2512 for (int i = 0; i < index; ++i) {
2513 result[lane_count - index + i] = src2.Uint(vform, i);
2514 }
2515 dst.ClearForWrite(vform);
2516 for (int i = 0; i < lane_count; ++i) {
2517 dst.SetUint(vform, i, result[i]);
2518 }
2519 return dst;
2520 }
2521
2522 LogicVRegister Simulator::rotate_elements_right(VectorFormat vform,
2523 LogicVRegister dst,
2524 const LogicVRegister& src,
2525 int index) {
2526 if (index < 0) index += LaneCountFromFormat(vform);
2527 VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform)));
2528 index *= LaneSizeInBytesFromFormat(vform);
2529 return ext(kFormatVnB, dst, src, src, index);
2530 }
2531
2532
2533 template <typename T>
2534 LogicVRegister Simulator::fadda(VectorFormat vform,
2535 LogicVRegister acc,
2536 const LogicPRegister& pg,
2537 const LogicVRegister& src) {
2538 T result = acc.Float<T>(0);
2539 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2540 if (!pg.IsActive(vform, i)) continue;
2541
2542 result = FPAdd(result, src.Float<T>(i));
2543 }
2544 VectorFormat vform_dst =
2545 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2546 acc.ClearForWrite(vform_dst);
2547 acc.SetFloat(0, result);
2548 return acc;
2549 }
2550
2551 LogicVRegister Simulator::fadda(VectorFormat vform,
2552 LogicVRegister acc,
2553 const LogicPRegister& pg,
2554 const LogicVRegister& src) {
2555 switch (LaneSizeInBitsFromFormat(vform)) {
2556 case kHRegSize:
2557 fadda<SimFloat16>(vform, acc, pg, src);
2558 break;
2559 case kSRegSize:
2560 fadda<float>(vform, acc, pg, src);
2561 break;
2562 case kDRegSize:
2563 fadda<double>(vform, acc, pg, src);
2564 break;
2565 default:
2566 VIXL_UNREACHABLE();
2567 }
2568 return acc;
2569 }
2570
2571 template <typename T>
2572 LogicVRegister Simulator::fcadd(VectorFormat vform,
2573 LogicVRegister dst, // d
2574 const LogicVRegister& src1, // n
2575 const LogicVRegister& src2, // m
2576 int rot) {
2577 int elements = LaneCountFromFormat(vform);
2578
2579 T element1, element3;
2580 rot = (rot == 1) ? 270 : 90;
2581
2582 // Loop example:
2583 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2584 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2585
2586 for (int e = 0; e <= (elements / 2) - 1; e++) {
2587 switch (rot) {
2588 case 90:
2589 element1 = FPNeg(src2.Float<T>(e * 2 + 1));
2590 element3 = src2.Float<T>(e * 2);
2591 break;
2592 case 270:
2593 element1 = src2.Float<T>(e * 2 + 1);
2594 element3 = FPNeg(src2.Float<T>(e * 2));
2595 break;
2596 default:
2597 VIXL_UNREACHABLE();
2598 return dst; // prevents "element(n) may be unintialized" errors
2599 }
2600 dst.ClearForWrite(vform);
2601 dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));
2602 dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));
2603 }
2604 return dst;
2605 }
2606
2607
2608 LogicVRegister Simulator::fcadd(VectorFormat vform,
2609 LogicVRegister dst, // d
2610 const LogicVRegister& src1, // n
2611 const LogicVRegister& src2, // m
2612 int rot) {
2613 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2614 fcadd<SimFloat16>(vform, dst, src1, src2, rot);
2615 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2616 fcadd<float>(vform, dst, src1, src2, rot);
2617 } else {
2618 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
2619 fcadd<double>(vform, dst, src1, src2, rot);
2620 }
2621 return dst;
2622 }
2623
2624 template <typename T>
2625 LogicVRegister Simulator::fcmla(VectorFormat vform,
2626 LogicVRegister dst,
2627 const LogicVRegister& src1,
2628 const LogicVRegister& src2,
2629 const LogicVRegister& acc,
2630 int index,
2631 int rot) {
2632 int elements = LaneCountFromFormat(vform);
2633
2634 T element1, element2, element3, element4;
2635 rot *= 90;
2636
2637 // Loop example:
2638 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2639 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2640
2641 for (int e = 0; e <= (elements / 2) - 1; e++) {
2642 // Index == -1 indicates a vector/vector rather than vector/indexed-element
2643 // operation.
2644 int f = (index < 0) ? e : index;
2645
2646 switch (rot) {
2647 case 0:
2648 element1 = src2.Float<T>(f * 2);
2649 element2 = src1.Float<T>(e * 2);
2650 element3 = src2.Float<T>(f * 2 + 1);
2651 element4 = src1.Float<T>(e * 2);
2652 break;
2653 case 90:
2654 element1 = FPNeg(src2.Float<T>(f * 2 + 1));
2655 element2 = src1.Float<T>(e * 2 + 1);
2656 element3 = src2.Float<T>(f * 2);
2657 element4 = src1.Float<T>(e * 2 + 1);
2658 break;
2659 case 180:
2660 element1 = FPNeg(src2.Float<T>(f * 2));
2661 element2 = src1.Float<T>(e * 2);
2662 element3 = FPNeg(src2.Float<T>(f * 2 + 1));
2663 element4 = src1.Float<T>(e * 2);
2664 break;
2665 case 270:
2666 element1 = src2.Float<T>(f * 2 + 1);
2667 element2 = src1.Float<T>(e * 2 + 1);
2668 element3 = FPNeg(src2.Float<T>(f * 2));
2669 element4 = src1.Float<T>(e * 2 + 1);
2670 break;
2671 default:
2672 VIXL_UNREACHABLE();
2673 return dst; // prevents "element(n) may be unintialized" errors
2674 }
2675 dst.ClearForWrite(vform);
2676 dst.SetFloat<T>(vform,
2677 e * 2,
2678 FPMulAdd(acc.Float<T>(e * 2), element2, element1));
2679 dst.SetFloat<T>(vform,
2680 e * 2 + 1,
2681 FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));
2682 }
2683 return dst;
2684 }
2685
2686 LogicVRegister Simulator::fcmla(VectorFormat vform,
2687 LogicVRegister dst,
2688 const LogicVRegister& src1,
2689 const LogicVRegister& src2,
2690 const LogicVRegister& acc,
2691 int rot) {
2692 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2693 fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);
2694 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2695 fcmla<float>(vform, dst, src1, src2, acc, -1, rot);
2696 } else {
2697 fcmla<double>(vform, dst, src1, src2, acc, -1, rot);
2698 }
2699 return dst;
2700 }
2701
2702
2703 LogicVRegister Simulator::fcmla(VectorFormat vform,
2704 LogicVRegister dst, // d
2705 const LogicVRegister& src1, // n
2706 const LogicVRegister& src2, // m
2707 int index,
2708 int rot) {
2709 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2710 VIXL_UNIMPLEMENTED();
2711 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2712 fcmla<float>(vform, dst, src1, src2, dst, index, rot);
2713 } else {
2714 fcmla<double>(vform, dst, src1, src2, dst, index, rot);
2715 }
2716 return dst;
2717 }
2718
2719 LogicVRegister Simulator::cadd(VectorFormat vform,
2720 LogicVRegister dst,
2721 const LogicVRegister& src1,
2722 const LogicVRegister& src2,
2723 int rot,
2724 bool saturate) {
2725 SimVRegister src1_r, src1_i;
2726 SimVRegister src2_r, src2_i;
2727 SimVRegister zero;
2728 zero.Clear();
2729 uzp1(vform, src1_r, src1, zero);
2730 uzp2(vform, src1_i, src1, zero);
2731 uzp1(vform, src2_r, src2, zero);
2732 uzp2(vform, src2_i, src2, zero);
2733
2734 if (rot == 90) {
2735 if (saturate) {
2736 sub(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2737 add(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2738 } else {
2739 sub(vform, src1_r, src1_r, src2_i);
2740 add(vform, src1_i, src1_i, src2_r);
2741 }
2742 } else {
2743 VIXL_ASSERT(rot == 270);
2744 if (saturate) {
2745 add(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2746 sub(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2747 } else {
2748 add(vform, src1_r, src1_r, src2_i);
2749 sub(vform, src1_i, src1_i, src2_r);
2750 }
2751 }
2752
2753 zip1(vform, dst, src1_r, src1_i);
2754 return dst;
2755 }
2756
2757 LogicVRegister Simulator::cmla(VectorFormat vform,
2758 LogicVRegister dst,
2759 const LogicVRegister& srca,
2760 const LogicVRegister& src1,
2761 const LogicVRegister& src2,
2762 int rot) {
2763 SimVRegister src1_a;
2764 SimVRegister src2_a, src2_b;
2765 SimVRegister srca_i, srca_r;
2766 SimVRegister zero, temp;
2767 zero.Clear();
2768
2769 if ((rot == 0) || (rot == 180)) {
2770 uzp1(vform, src1_a, src1, zero);
2771 uzp1(vform, src2_a, src2, zero);
2772 uzp2(vform, src2_b, src2, zero);
2773 } else {
2774 uzp2(vform, src1_a, src1, zero);
2775 uzp2(vform, src2_a, src2, zero);
2776 uzp1(vform, src2_b, src2, zero);
2777 }
2778
2779 uzp1(vform, srca_r, srca, zero);
2780 uzp2(vform, srca_i, srca, zero);
2781
2782 bool sub_r = (rot == 90) || (rot == 180);
2783 bool sub_i = (rot == 180) || (rot == 270);
2784
2785 mul(vform, temp, src1_a, src2_a);
2786 if (sub_r) {
2787 sub(vform, srca_r, srca_r, temp);
2788 } else {
2789 add(vform, srca_r, srca_r, temp);
2790 }
2791
2792 mul(vform, temp, src1_a, src2_b);
2793 if (sub_i) {
2794 sub(vform, srca_i, srca_i, temp);
2795 } else {
2796 add(vform, srca_i, srca_i, temp);
2797 }
2798
2799 zip1(vform, dst, srca_r, srca_i);
2800 return dst;
2801 }
2802
2803 LogicVRegister Simulator::cmla(VectorFormat vform,
2804 LogicVRegister dst,
2805 const LogicVRegister& srca,
2806 const LogicVRegister& src1,
2807 const LogicVRegister& src2,
2808 int index,
2809 int rot) {
2810 SimVRegister temp;
2811 dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
2812 return cmla(vform, dst, srca, src1, temp, rot);
2813 }
2814
2815 LogicVRegister Simulator::bgrp(VectorFormat vform,
2816 LogicVRegister dst,
2817 const LogicVRegister& src1,
2818 const LogicVRegister& src2,
2819 bool do_bext) {
2820 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2821 uint64_t value = src1.Uint(vform, i);
2822 uint64_t mask = src2.Uint(vform, i);
2823 int high_pos = 0;
2824 int low_pos = 0;
2825 uint64_t result_high = 0;
2826 uint64_t result_low = 0;
2827 for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2828 if ((mask & 1) == 0) {
2829 result_high |= (value & 1) << high_pos;
2830 high_pos++;
2831 } else {
2832 result_low |= (value & 1) << low_pos;
2833 low_pos++;
2834 }
2835 mask >>= 1;
2836 value >>= 1;
2837 }
2838
2839 if (!do_bext) {
2840 result_low |= result_high << low_pos;
2841 }
2842
2843 dst.SetUint(vform, i, result_low);
2844 }
2845 return dst;
2846 }
2847
2848 LogicVRegister Simulator::bdep(VectorFormat vform,
2849 LogicVRegister dst,
2850 const LogicVRegister& src1,
2851 const LogicVRegister& src2) {
2852 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2853 uint64_t value = src1.Uint(vform, i);
2854 uint64_t mask = src2.Uint(vform, i);
2855 uint64_t result = 0;
2856 for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2857 if ((mask & 1) == 1) {
2858 result |= (value & 1) << j;
2859 value >>= 1;
2860 }
2861 mask >>= 1;
2862 }
2863 dst.SetUint(vform, i, result);
2864 }
2865 return dst;
2866 }
2867
2868 LogicVRegister Simulator::histogram(VectorFormat vform,
2869 LogicVRegister dst,
2870 const LogicPRegister& pg,
2871 const LogicVRegister& src1,
2872 const LogicVRegister& src2,
2873 bool do_segmented) {
2874 int elements_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
2875 uint64_t result[kZRegMaxSizeInBytes];
2876
2877 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2878 uint64_t count = 0;
2879 uint64_t value = src1.Uint(vform, i);
2880
2881 int segment = do_segmented ? (i / elements_per_segment) : 0;
2882 int segment_offset = segment * elements_per_segment;
2883 int hist_limit = do_segmented ? elements_per_segment : (i + 1);
2884 for (int j = 0; j < hist_limit; j++) {
2885 if (pg.IsActive(vform, j) &&
2886 (value == src2.Uint(vform, j + segment_offset))) {
2887 count++;
2888 }
2889 }
2890 result[i] = count;
2891 }
2892 dst.SetUintArray(vform, result);
2893 return dst;
2894 }
2895
2896 LogicVRegister Simulator::dup_element(VectorFormat vform,
2897 LogicVRegister dst,
2898 const LogicVRegister& src,
2899 int src_index) {
2900 if ((vform == kFormatVnQ) || (vform == kFormatVnO)) {
2901 // When duplicating an element larger than 64 bits, split the element into
2902 // 64-bit parts, and duplicate the parts across the destination.
2903 uint64_t d[4];
2904 int count = (vform == kFormatVnQ) ? 2 : 4;
2905 for (int i = 0; i < count; i++) {
2906 d[i] = src.Uint(kFormatVnD, (src_index * count) + i);
2907 }
2908 dst.Clear();
2909 for (int i = 0; i < LaneCountFromFormat(vform) * count; i++) {
2910 dst.SetUint(kFormatVnD, i, d[i % count]);
2911 }
2912 } else {
2913 int lane_count = LaneCountFromFormat(vform);
2914 uint64_t value = src.Uint(vform, src_index);
2915 dst.ClearForWrite(vform);
2916 for (int i = 0; i < lane_count; ++i) {
2917 dst.SetUint(vform, i, value);
2918 }
2919 }
2920 return dst;
2921 }
2922
2923 LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
2924 LogicVRegister dst,
2925 const LogicVRegister& src,
2926 int src_index) {
2927 // In SVE, a segment is a 128-bit portion of a vector, like a Q register,
2928 // whereas in NEON, the size of segment is equal to the size of register
2929 // itself.
2930 int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));
2931 VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));
2932 int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);
2933
2934 VIXL_ASSERT(src_index >= 0);
2935 VIXL_ASSERT(src_index < lanes_per_segment);
2936
2937 dst.ClearForWrite(vform);
2938 for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {
2939 uint64_t value = src.Uint(vform, j + src_index);
2940 for (int i = 0; i < lanes_per_segment; i++) {
2941 dst.SetUint(vform, j + i, value);
2942 }
2943 }
2944 return dst;
2945 }
2946
2947 LogicVRegister Simulator::dup_elements_to_segments(
2948 VectorFormat vform,
2949 LogicVRegister dst,
2950 const std::pair<int, int>& src_and_index) {
2951 return dup_elements_to_segments(vform,
2952 dst,
2953 ReadVRegister(src_and_index.first),
2954 src_and_index.second);
2955 }
2956
2957 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2958 LogicVRegister dst,
2959 uint64_t imm) {
2960 int lane_count = LaneCountFromFormat(vform);
2961 uint64_t value = imm & MaxUintFromFormat(vform);
2962 dst.ClearForWrite(vform);
2963 for (int i = 0; i < lane_count; ++i) {
2964 dst.SetUint(vform, i, value);
2965 }
2966 return dst;
2967 }
2968
2969
2970 LogicVRegister Simulator::ins_element(VectorFormat vform,
2971 LogicVRegister dst,
2972 int dst_index,
2973 const LogicVRegister& src,
2974 int src_index) {
2975 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2976 return dst;
2977 }
2978
2979
2980 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2981 LogicVRegister dst,
2982 int dst_index,
2983 uint64_t imm) {
2984 uint64_t value = imm & MaxUintFromFormat(vform);
2985 dst.SetUint(vform, dst_index, value);
2986 return dst;
2987 }
2988
2989
2990 LogicVRegister Simulator::index(VectorFormat vform,
2991 LogicVRegister dst,
2992 uint64_t start,
2993 uint64_t step) {
2994 VIXL_ASSERT(IsSVEFormat(vform));
2995 uint64_t value = start;
2996 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2997 dst.SetUint(vform, i, value);
2998 value += step;
2999 }
3000 return dst;
3001 }
3002
3003
3004 LogicVRegister Simulator::insr(VectorFormat vform,
3005 LogicVRegister dst,
3006 uint64_t imm) {
3007 VIXL_ASSERT(IsSVEFormat(vform));
3008 for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {
3009 dst.SetUint(vform, i, dst.Uint(vform, i - 1));
3010 }
3011 dst.SetUint(vform, 0, imm);
3012 return dst;
3013 }
3014
3015
3016 LogicVRegister Simulator::mov(VectorFormat vform,
3017 LogicVRegister dst,
3018 const LogicVRegister& src) {
3019 dst.ClearForWrite(vform);
3020 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
3021 dst.SetUint(vform, lane, src.Uint(vform, lane));
3022 }
3023 return dst;
3024 }
3025
3026
3027 LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {
3028 // Avoid a copy if the registers already alias.
3029 if (dst.Aliases(src)) return dst;
3030
3031 for (int i = 0; i < dst.GetChunkCount(); i++) {
3032 dst.SetChunk(i, src.GetChunk(i));
3033 }
3034 return dst;
3035 }
3036
3037
3038 LogicVRegister Simulator::mov_merging(VectorFormat vform,
3039 LogicVRegister dst,
3040 const SimPRegister& pg,
3041 const LogicVRegister& src) {
3042 return sel(vform, dst, pg, src, dst);
3043 }
3044
3045 LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
3046 LogicVRegister dst,
3047 const SimPRegister& pg,
3048 const LogicVRegister& src) {
3049 SimVRegister zero;
3050 dup_immediate(vform, zero, 0);
3051 return sel(vform, dst, pg, src, zero);
3052 }
3053
3054 LogicVRegister Simulator::mov_alternating(VectorFormat vform,
3055 LogicVRegister dst,
3056 const LogicVRegister& src,
3057 int start_at) {
3058 VIXL_ASSERT((start_at == 0) || (start_at == 1));
3059 for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) {
3060 dst.SetUint(vform, i, src.Uint(vform, i));
3061 }
3062 return dst;
3063 }
3064
3065 LogicPRegister Simulator::mov_merging(LogicPRegister dst,
3066 const LogicPRegister& pg,
3067 const LogicPRegister& src) {
3068 return sel(dst, pg, src, dst);
3069 }
3070
3071 LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
3072 const LogicPRegister& pg,
3073 const LogicPRegister& src) {
3074 SimPRegister all_false;
3075 return sel(dst, pg, src, pfalse(all_false));
3076 }
3077
3078 LogicVRegister Simulator::movi(VectorFormat vform,
3079 LogicVRegister dst,
3080 uint64_t imm) {
3081 int lane_count = LaneCountFromFormat(vform);
3082 dst.ClearForWrite(vform);
3083 for (int i = 0; i < lane_count; ++i) {
3084 dst.SetUint(vform, i, imm);
3085 }
3086 return dst;
3087 }
3088
3089
3090 LogicVRegister Simulator::mvni(VectorFormat vform,
3091 LogicVRegister dst,
3092 uint64_t imm) {
3093 int lane_count = LaneCountFromFormat(vform);
3094 dst.ClearForWrite(vform);
3095 for (int i = 0; i < lane_count; ++i) {
3096 dst.SetUint(vform, i, ~imm);
3097 }
3098 return dst;
3099 }
3100
3101
3102 LogicVRegister Simulator::orr(VectorFormat vform,
3103 LogicVRegister dst,
3104 const LogicVRegister& src,
3105 uint64_t imm) {
3106 uint64_t result[16];
3107 int lane_count = LaneCountFromFormat(vform);
3108 for (int i = 0; i < lane_count; ++i) {
3109 result[i] = src.Uint(vform, i) | imm;
3110 }
3111 dst.ClearForWrite(vform);
3112 for (int i = 0; i < lane_count; ++i) {
3113 dst.SetUint(vform, i, result[i]);
3114 }
3115 return dst;
3116 }
3117
3118
3119 LogicVRegister Simulator::uxtl(VectorFormat vform,
3120 LogicVRegister dst,
3121 const LogicVRegister& src,
3122 bool is_2) {
3123 VectorFormat vform_half = VectorFormatHalfWidth(vform);
3124 int lane_count = LaneCountFromFormat(vform);
3125 int src_offset = is_2 ? lane_count : 0;
3126
3127 dst.ClearForWrite(vform);
3128 for (int i = 0; i < lane_count; i++) {
3129 dst.SetUint(vform, i, src.Uint(vform_half, src_offset + i));
3130 }
3131 return dst;
3132 }
3133
3134
3135 LogicVRegister Simulator::sxtl(VectorFormat vform,
3136 LogicVRegister dst,
3137 const LogicVRegister& src,
3138 bool is_2) {
3139 VectorFormat vform_half = VectorFormatHalfWidth(vform);
3140 int lane_count = LaneCountFromFormat(vform);
3141 int src_offset = is_2 ? lane_count : 0;
3142
3143 dst.ClearForWrite(vform);
3144 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3145 dst.SetInt(vform, i, src.Int(vform_half, src_offset + i));
3146 }
3147 return dst;
3148 }
3149
3150
3151 LogicVRegister Simulator::uxtl2(VectorFormat vform,
3152 LogicVRegister dst,
3153 const LogicVRegister& src) {
3154 return uxtl(vform, dst, src, /* is_2 = */ true);
3155 }
3156
3157
3158 LogicVRegister Simulator::sxtl2(VectorFormat vform,
3159 LogicVRegister dst,
3160 const LogicVRegister& src) {
3161 return sxtl(vform, dst, src, /* is_2 = */ true);
3162 }
3163
3164
3165 LogicVRegister Simulator::uxt(VectorFormat vform,
3166 LogicVRegister dst,
3167 const LogicVRegister& src,
3168 unsigned from_size_in_bits) {
3169 int lane_count = LaneCountFromFormat(vform);
3170 uint64_t mask = GetUintMask(from_size_in_bits);
3171
3172 dst.ClearForWrite(vform);
3173 for (int i = 0; i < lane_count; i++) {
3174 dst.SetInt(vform, i, src.Uint(vform, i) & mask);
3175 }
3176 return dst;
3177 }
3178
3179
3180 LogicVRegister Simulator::sxt(VectorFormat vform,
3181 LogicVRegister dst,
3182 const LogicVRegister& src,
3183 unsigned from_size_in_bits) {
3184 int lane_count = LaneCountFromFormat(vform);
3185
3186 dst.ClearForWrite(vform);
3187 for (int i = 0; i < lane_count; i++) {
3188 uint64_t value =
3189 ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));
3190 dst.SetInt(vform, i, value);
3191 }
3192 return dst;
3193 }
3194
3195
3196 LogicVRegister Simulator::shrn(VectorFormat vform,
3197 LogicVRegister dst,
3198 const LogicVRegister& src,
3199 int shift) {
3200 SimVRegister temp;
3201 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
3202 VectorFormat vform_dst = vform;
3203 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
3204 return extractnarrow(vform_dst, dst, false, shifted_src, false);
3205 }
3206
3207
3208 LogicVRegister Simulator::shrn2(VectorFormat vform,
3209 LogicVRegister dst,
3210 const LogicVRegister& src,
3211 int shift) {
3212 SimVRegister temp;
3213 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3214 VectorFormat vformdst = vform;
3215 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
3216 return extractnarrow(vformdst, dst, false, shifted_src, false);
3217 }
3218
3219
3220 LogicVRegister Simulator::rshrn(VectorFormat vform,
3221 LogicVRegister dst,
3222 const LogicVRegister& src,
3223 int shift) {
3224 SimVRegister temp;
3225 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3226 VectorFormat vformdst = vform;
3227 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3228 return extractnarrow(vformdst, dst, false, shifted_src, false);
3229 }
3230
3231
3232 LogicVRegister Simulator::rshrn2(VectorFormat vform,
3233 LogicVRegister dst,
3234 const LogicVRegister& src,
3235 int shift) {
3236 SimVRegister temp;
3237 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3238 VectorFormat vformdst = vform;
3239 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3240 return extractnarrow(vformdst, dst, false, shifted_src, false);
3241 }
3242
3243 LogicVRegister Simulator::Table(VectorFormat vform,
3244 LogicVRegister dst,
3245 const LogicVRegister& ind,
3246 bool zero_out_of_bounds,
3247 const LogicVRegister* tab1,
3248 const LogicVRegister* tab2,
3249 const LogicVRegister* tab3,
3250 const LogicVRegister* tab4) {
3251 VIXL_ASSERT(tab1 != NULL);
3252 int lane_count = LaneCountFromFormat(vform);
3253 VIXL_ASSERT((tab3 == NULL) || (lane_count <= 16));
3254 uint64_t table[kZRegMaxSizeInBytes * 2];
3255 uint64_t result[kZRegMaxSizeInBytes];
3256
3257 // For Neon, the table source registers are always 16B, and Neon allows only
3258 // 8B or 16B vform for the destination, so infer the table format from the
3259 // destination.
3260 VectorFormat vform_tab = (vform == kFormat8B) ? kFormat16B : vform;
3261
3262 uint64_t tab_size = tab1->UintArray(vform_tab, &table[0]);
3263 if (tab2 != NULL) tab_size += tab2->UintArray(vform_tab, &table[tab_size]);
3264 if (tab3 != NULL) tab_size += tab3->UintArray(vform_tab, &table[tab_size]);
3265 if (tab4 != NULL) tab_size += tab4->UintArray(vform_tab, &table[tab_size]);
3266
3267 for (int i = 0; i < lane_count; i++) {
3268 uint64_t index = ind.Uint(vform, i);
3269 result[i] = zero_out_of_bounds ? 0 : dst.Uint(vform, i);
3270 if (index < tab_size) result[i] = table[index];
3271 }
3272 dst.SetUintArray(vform, result);
3273 return dst;
3274 }
3275
3276 LogicVRegister Simulator::tbl(VectorFormat vform,
3277 LogicVRegister dst,
3278 const LogicVRegister& tab,
3279 const LogicVRegister& ind) {
3280 return Table(vform, dst, ind, true, &tab);
3281 }
3282
3283
3284 LogicVRegister Simulator::tbl(VectorFormat vform,
3285 LogicVRegister dst,
3286 const LogicVRegister& tab,
3287 const LogicVRegister& tab2,
3288 const LogicVRegister& ind) {
3289 return Table(vform, dst, ind, true, &tab, &tab2);
3290 }
3291
3292
3293 LogicVRegister Simulator::tbl(VectorFormat vform,
3294 LogicVRegister dst,
3295 const LogicVRegister& tab,
3296 const LogicVRegister& tab2,
3297 const LogicVRegister& tab3,
3298 const LogicVRegister& ind) {
3299 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
3300 }
3301
3302
3303 LogicVRegister Simulator::tbl(VectorFormat vform,
3304 LogicVRegister dst,
3305 const LogicVRegister& tab,
3306 const LogicVRegister& tab2,
3307 const LogicVRegister& tab3,
3308 const LogicVRegister& tab4,
3309 const LogicVRegister& ind) {
3310 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
3311 }
3312
3313
3314 LogicVRegister Simulator::tbx(VectorFormat vform,
3315 LogicVRegister dst,
3316 const LogicVRegister& tab,
3317 const LogicVRegister& ind) {
3318 return Table(vform, dst, ind, false, &tab);
3319 }
3320
3321
3322 LogicVRegister Simulator::tbx(VectorFormat vform,
3323 LogicVRegister dst,
3324 const LogicVRegister& tab,
3325 const LogicVRegister& tab2,
3326 const LogicVRegister& ind) {
3327 return Table(vform, dst, ind, false, &tab, &tab2);
3328 }
3329
3330
3331 LogicVRegister Simulator::tbx(VectorFormat vform,
3332 LogicVRegister dst,
3333 const LogicVRegister& tab,
3334 const LogicVRegister& tab2,
3335 const LogicVRegister& tab3,
3336 const LogicVRegister& ind) {
3337 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
3338 }
3339
3340
3341 LogicVRegister Simulator::tbx(VectorFormat vform,
3342 LogicVRegister dst,
3343 const LogicVRegister& tab,
3344 const LogicVRegister& tab2,
3345 const LogicVRegister& tab3,
3346 const LogicVRegister& tab4,
3347 const LogicVRegister& ind) {
3348 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
3349 }
3350
3351
3352 LogicVRegister Simulator::uqshrn(VectorFormat vform,
3353 LogicVRegister dst,
3354 const LogicVRegister& src,
3355 int shift) {
3356 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
3357 }
3358
3359
3360 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
3361 LogicVRegister dst,
3362 const LogicVRegister& src,
3363 int shift) {
3364 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3365 }
3366
3367
3368 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
3369 LogicVRegister dst,
3370 const LogicVRegister& src,
3371 int shift) {
3372 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
3373 }
3374
3375
3376 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
3377 LogicVRegister dst,
3378 const LogicVRegister& src,
3379 int shift) {
3380 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3381 }
3382
3383
3384 LogicVRegister Simulator::sqshrn(VectorFormat vform,
3385 LogicVRegister dst,
3386 const LogicVRegister& src,
3387 int shift) {
3388 SimVRegister temp;
3389 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3390 VectorFormat vformdst = vform;
3391 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3392 return sqxtn(vformdst, dst, shifted_src);
3393 }
3394
3395
3396 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
3397 LogicVRegister dst,
3398 const LogicVRegister& src,
3399 int shift) {
3400 SimVRegister temp;
3401 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3402 VectorFormat vformdst = vform;
3403 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3404 return sqxtn(vformdst, dst, shifted_src);
3405 }
3406
3407
3408 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
3409 LogicVRegister dst,
3410 const LogicVRegister& src,
3411 int shift) {
3412 SimVRegister temp;
3413 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3414 VectorFormat vformdst = vform;
3415 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3416 return sqxtn(vformdst, dst, shifted_src);
3417 }
3418
3419
3420 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
3421 LogicVRegister dst,
3422 const LogicVRegister& src,
3423 int shift) {
3424 SimVRegister temp;
3425 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3426 VectorFormat vformdst = vform;
3427 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3428 return sqxtn(vformdst, dst, shifted_src);
3429 }
3430
3431
3432 LogicVRegister Simulator::sqshrun(VectorFormat vform,
3433 LogicVRegister dst,
3434 const LogicVRegister& src,
3435 int shift) {
3436 SimVRegister temp;
3437 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3438 VectorFormat vformdst = vform;
3439 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3440 return sqxtun(vformdst, dst, shifted_src);
3441 }
3442
3443
3444 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
3445 LogicVRegister dst,
3446 const LogicVRegister& src,
3447 int shift) {
3448 SimVRegister temp;
3449 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3450 VectorFormat vformdst = vform;
3451 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3452 return sqxtun(vformdst, dst, shifted_src);
3453 }
3454
3455
3456 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
3457 LogicVRegister dst,
3458 const LogicVRegister& src,
3459 int shift) {
3460 SimVRegister temp;
3461 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3462 VectorFormat vformdst = vform;
3463 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3464 return sqxtun(vformdst, dst, shifted_src);
3465 }
3466
3467
3468 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
3469 LogicVRegister dst,
3470 const LogicVRegister& src,
3471 int shift) {
3472 SimVRegister temp;
3473 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3474 VectorFormat vformdst = vform;
3475 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3476 return sqxtun(vformdst, dst, shifted_src);
3477 }
3478
3479
3480 LogicVRegister Simulator::uaddl(VectorFormat vform,
3481 LogicVRegister dst,
3482 const LogicVRegister& src1,
3483 const LogicVRegister& src2) {
3484 SimVRegister temp1, temp2;
3485 uxtl(vform, temp1, src1);
3486 uxtl(vform, temp2, src2);
3487 add(vform, dst, temp1, temp2);
3488 return dst;
3489 }
3490
3491
3492 LogicVRegister Simulator::uaddl2(VectorFormat vform,
3493 LogicVRegister dst,
3494 const LogicVRegister& src1,
3495 const LogicVRegister& src2) {
3496 SimVRegister temp1, temp2;
3497 uxtl2(vform, temp1, src1);
3498 uxtl2(vform, temp2, src2);
3499 add(vform, dst, temp1, temp2);
3500 return dst;
3501 }
3502
3503
3504 LogicVRegister Simulator::uaddw(VectorFormat vform,
3505 LogicVRegister dst,
3506 const LogicVRegister& src1,
3507 const LogicVRegister& src2) {
3508 SimVRegister temp;
3509 uxtl(vform, temp, src2);
3510 add(vform, dst, src1, temp);
3511 return dst;
3512 }
3513
3514
3515 LogicVRegister Simulator::uaddw2(VectorFormat vform,
3516 LogicVRegister dst,
3517 const LogicVRegister& src1,
3518 const LogicVRegister& src2) {
3519 SimVRegister temp;
3520 uxtl2(vform, temp, src2);
3521 add(vform, dst, src1, temp);
3522 return dst;
3523 }
3524
3525
3526 LogicVRegister Simulator::saddl(VectorFormat vform,
3527 LogicVRegister dst,
3528 const LogicVRegister& src1,
3529 const LogicVRegister& src2) {
3530 SimVRegister temp1, temp2;
3531 sxtl(vform, temp1, src1);
3532 sxtl(vform, temp2, src2);
3533 add(vform, dst, temp1, temp2);
3534 return dst;
3535 }
3536
3537
3538 LogicVRegister Simulator::saddl2(VectorFormat vform,
3539 LogicVRegister dst,
3540 const LogicVRegister& src1,
3541 const LogicVRegister& src2) {
3542 SimVRegister temp1, temp2;
3543 sxtl2(vform, temp1, src1);
3544 sxtl2(vform, temp2, src2);
3545 add(vform, dst, temp1, temp2);
3546 return dst;
3547 }
3548
3549
3550 LogicVRegister Simulator::saddw(VectorFormat vform,
3551 LogicVRegister dst,
3552 const LogicVRegister& src1,
3553 const LogicVRegister& src2) {
3554 SimVRegister temp;
3555 sxtl(vform, temp, src2);
3556 add(vform, dst, src1, temp);
3557 return dst;
3558 }
3559
3560
3561 LogicVRegister Simulator::saddw2(VectorFormat vform,
3562 LogicVRegister dst,
3563 const LogicVRegister& src1,
3564 const LogicVRegister& src2) {
3565 SimVRegister temp;
3566 sxtl2(vform, temp, src2);
3567 add(vform, dst, src1, temp);
3568 return dst;
3569 }
3570
3571
3572 LogicVRegister Simulator::usubl(VectorFormat vform,
3573 LogicVRegister dst,
3574 const LogicVRegister& src1,
3575 const LogicVRegister& src2) {
3576 SimVRegister temp1, temp2;
3577 uxtl(vform, temp1, src1);
3578 uxtl(vform, temp2, src2);
3579 sub(vform, dst, temp1, temp2);
3580 return dst;
3581 }
3582
3583
3584 LogicVRegister Simulator::usubl2(VectorFormat vform,
3585 LogicVRegister dst,
3586 const LogicVRegister& src1,
3587 const LogicVRegister& src2) {
3588 SimVRegister temp1, temp2;
3589 uxtl2(vform, temp1, src1);
3590 uxtl2(vform, temp2, src2);
3591 sub(vform, dst, temp1, temp2);
3592 return dst;
3593 }
3594
3595
3596 LogicVRegister Simulator::usubw(VectorFormat vform,
3597 LogicVRegister dst,
3598 const LogicVRegister& src1,
3599 const LogicVRegister& src2) {
3600 SimVRegister temp;
3601 uxtl(vform, temp, src2);
3602 sub(vform, dst, src1, temp);
3603 return dst;
3604 }
3605
3606
3607 LogicVRegister Simulator::usubw2(VectorFormat vform,
3608 LogicVRegister dst,
3609 const LogicVRegister& src1,
3610 const LogicVRegister& src2) {
3611 SimVRegister temp;
3612 uxtl2(vform, temp, src2);
3613 sub(vform, dst, src1, temp);
3614 return dst;
3615 }
3616
3617
3618 LogicVRegister Simulator::ssubl(VectorFormat vform,
3619 LogicVRegister dst,
3620 const LogicVRegister& src1,
3621 const LogicVRegister& src2) {
3622 SimVRegister temp1, temp2;
3623 sxtl(vform, temp1, src1);
3624 sxtl(vform, temp2, src2);
3625 sub(vform, dst, temp1, temp2);
3626 return dst;
3627 }
3628
3629
3630 LogicVRegister Simulator::ssubl2(VectorFormat vform,
3631 LogicVRegister dst,
3632 const LogicVRegister& src1,
3633 const LogicVRegister& src2) {
3634 SimVRegister temp1, temp2;
3635 sxtl2(vform, temp1, src1);
3636 sxtl2(vform, temp2, src2);
3637 sub(vform, dst, temp1, temp2);
3638 return dst;
3639 }
3640
3641
3642 LogicVRegister Simulator::ssubw(VectorFormat vform,
3643 LogicVRegister dst,
3644 const LogicVRegister& src1,
3645 const LogicVRegister& src2) {
3646 SimVRegister temp;
3647 sxtl(vform, temp, src2);
3648 sub(vform, dst, src1, temp);
3649 return dst;
3650 }
3651
3652
3653 LogicVRegister Simulator::ssubw2(VectorFormat vform,
3654 LogicVRegister dst,
3655 const LogicVRegister& src1,
3656 const LogicVRegister& src2) {
3657 SimVRegister temp;
3658 sxtl2(vform, temp, src2);
3659 sub(vform, dst, src1, temp);
3660 return dst;
3661 }
3662
3663
3664 LogicVRegister Simulator::uabal(VectorFormat vform,
3665 LogicVRegister dst,
3666 const LogicVRegister& src1,
3667 const LogicVRegister& src2) {
3668 SimVRegister temp1, temp2;
3669 uxtl(vform, temp1, src1);
3670 uxtl(vform, temp2, src2);
3671 uaba(vform, dst, temp1, temp2);
3672 return dst;
3673 }
3674
3675
3676 LogicVRegister Simulator::uabal2(VectorFormat vform,
3677 LogicVRegister dst,
3678 const LogicVRegister& src1,
3679 const LogicVRegister& src2) {
3680 SimVRegister temp1, temp2;
3681 uxtl2(vform, temp1, src1);
3682 uxtl2(vform, temp2, src2);
3683 uaba(vform, dst, temp1, temp2);
3684 return dst;
3685 }
3686
3687
3688 LogicVRegister Simulator::sabal(VectorFormat vform,
3689 LogicVRegister dst,
3690 const LogicVRegister& src1,
3691 const LogicVRegister& src2) {
3692 SimVRegister temp1, temp2;
3693 sxtl(vform, temp1, src1);
3694 sxtl(vform, temp2, src2);
3695 saba(vform, dst, temp1, temp2);
3696 return dst;
3697 }
3698
3699
3700 LogicVRegister Simulator::sabal2(VectorFormat vform,
3701 LogicVRegister dst,
3702 const LogicVRegister& src1,
3703 const LogicVRegister& src2) {
3704 SimVRegister temp1, temp2;
3705 sxtl2(vform, temp1, src1);
3706 sxtl2(vform, temp2, src2);
3707 saba(vform, dst, temp1, temp2);
3708 return dst;
3709 }
3710
3711
3712 LogicVRegister Simulator::uabdl(VectorFormat vform,
3713 LogicVRegister dst,
3714 const LogicVRegister& src1,
3715 const LogicVRegister& src2) {
3716 SimVRegister temp1, temp2;
3717 uxtl(vform, temp1, src1);
3718 uxtl(vform, temp2, src2);
3719 absdiff(vform, dst, temp1, temp2, false);
3720 return dst;
3721 }
3722
3723
3724 LogicVRegister Simulator::uabdl2(VectorFormat vform,
3725 LogicVRegister dst,
3726 const LogicVRegister& src1,
3727 const LogicVRegister& src2) {
3728 SimVRegister temp1, temp2;
3729 uxtl2(vform, temp1, src1);
3730 uxtl2(vform, temp2, src2);
3731 absdiff(vform, dst, temp1, temp2, false);
3732 return dst;
3733 }
3734
3735
3736 LogicVRegister Simulator::sabdl(VectorFormat vform,
3737 LogicVRegister dst,
3738 const LogicVRegister& src1,
3739 const LogicVRegister& src2) {
3740 SimVRegister temp1, temp2;
3741 sxtl(vform, temp1, src1);
3742 sxtl(vform, temp2, src2);
3743 absdiff(vform, dst, temp1, temp2, true);
3744 return dst;
3745 }
3746
3747
3748 LogicVRegister Simulator::sabdl2(VectorFormat vform,
3749 LogicVRegister dst,
3750 const LogicVRegister& src1,
3751 const LogicVRegister& src2) {
3752 SimVRegister temp1, temp2;
3753 sxtl2(vform, temp1, src1);
3754 sxtl2(vform, temp2, src2);
3755 absdiff(vform, dst, temp1, temp2, true);
3756 return dst;
3757 }
3758
3759
3760 LogicVRegister Simulator::umull(VectorFormat vform,
3761 LogicVRegister dst,
3762 const LogicVRegister& src1,
3763 const LogicVRegister& src2,
3764 bool is_2) {
3765 SimVRegister temp1, temp2;
3766 uxtl(vform, temp1, src1, is_2);
3767 uxtl(vform, temp2, src2, is_2);
3768 mul(vform, dst, temp1, temp2);
3769 return dst;
3770 }
3771
3772
3773 LogicVRegister Simulator::umull2(VectorFormat vform,
3774 LogicVRegister dst,
3775 const LogicVRegister& src1,
3776 const LogicVRegister& src2) {
3777 return umull(vform, dst, src1, src2, /* is_2 = */ true);
3778 }
3779
3780
3781 LogicVRegister Simulator::smull(VectorFormat vform,
3782 LogicVRegister dst,
3783 const LogicVRegister& src1,
3784 const LogicVRegister& src2,
3785 bool is_2) {
3786 SimVRegister temp1, temp2;
3787 sxtl(vform, temp1, src1, is_2);
3788 sxtl(vform, temp2, src2, is_2);
3789 mul(vform, dst, temp1, temp2);
3790 return dst;
3791 }
3792
3793
3794 LogicVRegister Simulator::smull2(VectorFormat vform,
3795 LogicVRegister dst,
3796 const LogicVRegister& src1,
3797 const LogicVRegister& src2) {
3798 return smull(vform, dst, src1, src2, /* is_2 = */ true);
3799 }
3800
3801
3802 LogicVRegister Simulator::umlsl(VectorFormat vform,
3803 LogicVRegister dst,
3804 const LogicVRegister& src1,
3805 const LogicVRegister& src2,
3806 bool is_2) {
3807 SimVRegister temp1, temp2;
3808 uxtl(vform, temp1, src1, is_2);
3809 uxtl(vform, temp2, src2, is_2);
3810 mls(vform, dst, dst, temp1, temp2);
3811 return dst;
3812 }
3813
3814
3815 LogicVRegister Simulator::umlsl2(VectorFormat vform,
3816 LogicVRegister dst,
3817 const LogicVRegister& src1,
3818 const LogicVRegister& src2) {
3819 return umlsl(vform, dst, src1, src2, /* is_2 = */ true);
3820 }
3821
3822
3823 LogicVRegister Simulator::smlsl(VectorFormat vform,
3824 LogicVRegister dst,
3825 const LogicVRegister& src1,
3826 const LogicVRegister& src2,
3827 bool is_2) {
3828 SimVRegister temp1, temp2;
3829 sxtl(vform, temp1, src1, is_2);
3830 sxtl(vform, temp2, src2, is_2);
3831 mls(vform, dst, dst, temp1, temp2);
3832 return dst;
3833 }
3834
3835
3836 LogicVRegister Simulator::smlsl2(VectorFormat vform,
3837 LogicVRegister dst,
3838 const LogicVRegister& src1,
3839 const LogicVRegister& src2) {
3840 return smlsl(vform, dst, src1, src2, /* is_2 = */ true);
3841 }
3842
3843
3844 LogicVRegister Simulator::umlal(VectorFormat vform,
3845 LogicVRegister dst,
3846 const LogicVRegister& src1,
3847 const LogicVRegister& src2,
3848 bool is_2) {
3849 SimVRegister temp1, temp2;
3850 uxtl(vform, temp1, src1, is_2);
3851 uxtl(vform, temp2, src2, is_2);
3852 mla(vform, dst, dst, temp1, temp2);
3853 return dst;
3854 }
3855
3856
3857 LogicVRegister Simulator::umlal2(VectorFormat vform,
3858 LogicVRegister dst,
3859 const LogicVRegister& src1,
3860 const LogicVRegister& src2) {
3861 return umlal(vform, dst, src1, src2, /* is_2 = */ true);
3862 }
3863
3864
3865 LogicVRegister Simulator::smlal(VectorFormat vform,
3866 LogicVRegister dst,
3867 const LogicVRegister& src1,
3868 const LogicVRegister& src2,
3869 bool is_2) {
3870 SimVRegister temp1, temp2;
3871 sxtl(vform, temp1, src1, is_2);
3872 sxtl(vform, temp2, src2, is_2);
3873 mla(vform, dst, dst, temp1, temp2);
3874 return dst;
3875 }
3876
3877
3878 LogicVRegister Simulator::smlal2(VectorFormat vform,
3879 LogicVRegister dst,
3880 const LogicVRegister& src1,
3881 const LogicVRegister& src2) {
3882 return smlal(vform, dst, src1, src2, /* is_2 = */ true);
3883 }
3884
3885
3886 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3887 LogicVRegister dst,
3888 const LogicVRegister& src1,
3889 const LogicVRegister& src2,
3890 bool is_2) {
3891 SimVRegister temp;
3892 LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3893 return add(vform, dst, dst, product).SignedSaturate(vform);
3894 }
3895
3896
3897 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3898 LogicVRegister dst,
3899 const LogicVRegister& src1,
3900 const LogicVRegister& src2) {
3901 return sqdmlal(vform, dst, src1, src2, /* is_2 = */ true);
3902 }
3903
3904
3905 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3906 LogicVRegister dst,
3907 const LogicVRegister& src1,
3908 const LogicVRegister& src2,
3909 bool is_2) {
3910 SimVRegister temp;
3911 LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3912 return sub(vform, dst, dst, product).SignedSaturate(vform);
3913 }
3914
3915
3916 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3917 LogicVRegister dst,
3918 const LogicVRegister& src1,
3919 const LogicVRegister& src2) {
3920 return sqdmlsl(vform, dst, src1, src2, /* is_2 = */ true);
3921 }
3922
3923
3924 LogicVRegister Simulator::sqdmull(VectorFormat vform,
3925 LogicVRegister dst,
3926 const LogicVRegister& src1,
3927 const LogicVRegister& src2,
3928 bool is_2) {
3929 SimVRegister temp;
3930 LogicVRegister product = smull(vform, temp, src1, src2, is_2);
3931 return add(vform, dst, product, product).SignedSaturate(vform);
3932 }
3933
3934
3935 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3936 LogicVRegister dst,
3937 const LogicVRegister& src1,
3938 const LogicVRegister& src2) {
3939 return sqdmull(vform, dst, src1, src2, /* is_2 = */ true);
3940 }
3941
3942 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3943 LogicVRegister dst,
3944 const LogicVRegister& src1,
3945 const LogicVRegister& src2,
3946 bool round) {
3947 int esize = LaneSizeInBitsFromFormat(vform);
3948
3949 SimVRegister temp_lo, temp_hi;
3950
3951 // Compute low and high multiplication results.
3952 mul(vform, temp_lo, src1, src2);
3953 smulh(vform, temp_hi, src1, src2);
3954
3955 // Double by shifting high half, and adding in most-significant bit of low
3956 // half.
3957 shl(vform, temp_hi, temp_hi, 1);
3958 usra(vform, temp_hi, temp_lo, esize - 1);
3959
3960 if (round) {
3961 // Add the second (due to doubling) most-significant bit of the low half
3962 // into the result.
3963 shl(vform, temp_lo, temp_lo, 1);
3964 usra(vform, temp_hi, temp_lo, esize - 1);
3965 }
3966
3967 SimPRegister not_sat;
3968 LogicPRegister ptemp(not_sat);
3969 dst.ClearForWrite(vform);
3970 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3971 // Saturation only occurs when src1 = src2 = minimum representable value.
3972 // Check this as a special case.
3973 ptemp.SetActive(vform, i, true);
3974 if ((src1.Int(vform, i) == MinIntFromFormat(vform)) &&
3975 (src2.Int(vform, i) == MinIntFromFormat(vform))) {
3976 ptemp.SetActive(vform, i, false);
3977 }
3978 dst.SetInt(vform, i, MaxIntFromFormat(vform));
3979 }
3980
3981 mov_merging(vform, dst, not_sat, temp_hi);
3982 return dst;
3983 }
3984
3985
3986 LogicVRegister Simulator::dot(VectorFormat vform,
3987 LogicVRegister dst,
3988 const LogicVRegister& src1,
3989 const LogicVRegister& src2,
3990 bool is_src1_signed,
3991 bool is_src2_signed) {
3992 VectorFormat quarter_vform =
3993 VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
3994
3995 dst.ClearForWrite(vform);
3996 for (int e = 0; e < LaneCountFromFormat(vform); e++) {
3997 uint64_t result = 0;
3998 int64_t element1, element2;
3999 for (int i = 0; i < 4; i++) {
4000 int index = 4 * e + i;
4001 if (is_src1_signed) {
4002 element1 = src1.Int(quarter_vform, index);
4003 } else {
4004 element1 = src1.Uint(quarter_vform, index);
4005 }
4006 if (is_src2_signed) {
4007 element2 = src2.Int(quarter_vform, index);
4008 } else {
4009 element2 = src2.Uint(quarter_vform, index);
4010 }
4011 result += element1 * element2;
4012 }
4013 dst.SetUint(vform, e, result + dst.Uint(vform, e));
4014 }
4015 return dst;
4016 }
4017
4018
4019 LogicVRegister Simulator::sdot(VectorFormat vform,
4020 LogicVRegister dst,
4021 const LogicVRegister& src1,
4022 const LogicVRegister& src2) {
4023 return dot(vform, dst, src1, src2, true, true);
4024 }
4025
4026
4027 LogicVRegister Simulator::udot(VectorFormat vform,
4028 LogicVRegister dst,
4029 const LogicVRegister& src1,
4030 const LogicVRegister& src2) {
4031 return dot(vform, dst, src1, src2, false, false);
4032 }
4033
4034 LogicVRegister Simulator::usdot(VectorFormat vform,
4035 LogicVRegister dst,
4036 const LogicVRegister& src1,
4037 const LogicVRegister& src2) {
4038 return dot(vform, dst, src1, src2, false, true);
4039 }
4040
4041 LogicVRegister Simulator::cdot(VectorFormat vform,
4042 LogicVRegister dst,
4043 const LogicVRegister& acc,
4044 const LogicVRegister& src1,
4045 const LogicVRegister& src2,
4046 int rot) {
4047 VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
4048 VectorFormat quarter_vform =
4049 VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
4050
4051 int sel_a = ((rot == 0) || (rot == 180)) ? 0 : 1;
4052 int sel_b = 1 - sel_a;
4053 int sub_i = ((rot == 90) || (rot == 180)) ? 1 : -1;
4054
4055 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4056 int64_t result = acc.Int(vform, i);
4057 for (int j = 0; j < 2; j++) {
4058 int64_t r1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 0);
4059 int64_t i1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 1);
4060 int64_t r2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_a);
4061 int64_t i2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_b);
4062 result += (r1 * r2) + (sub_i * i1 * i2);
4063 }
4064 dst.SetInt(vform, i, result);
4065 }
4066 return dst;
4067 }
4068
4069 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4070 LogicVRegister dst,
4071 const LogicVRegister& srca,
4072 const LogicVRegister& src1,
4073 const LogicVRegister& src2,
4074 int rot) {
4075 SimVRegister src1_a, src1_b;
4076 SimVRegister src2_a, src2_b;
4077 SimVRegister srca_i, srca_r;
4078 SimVRegister zero, temp;
4079 zero.Clear();
4080
4081 if ((rot == 0) || (rot == 180)) {
4082 uzp1(vform, src1_a, src1, zero);
4083 uzp1(vform, src2_a, src2, zero);
4084 uzp2(vform, src2_b, src2, zero);
4085 } else {
4086 uzp2(vform, src1_a, src1, zero);
4087 uzp2(vform, src2_a, src2, zero);
4088 uzp1(vform, src2_b, src2, zero);
4089 }
4090
4091 uzp1(vform, srca_r, srca, zero);
4092 uzp2(vform, srca_i, srca, zero);
4093
4094 bool sub_r = (rot == 90) || (rot == 180);
4095 bool sub_i = (rot == 180) || (rot == 270);
4096
4097 const bool round = true;
4098 sqrdmlash(vform, srca_r, src1_a, src2_a, round, sub_r);
4099 sqrdmlash(vform, srca_i, src1_a, src2_b, round, sub_i);
4100 zip1(vform, dst, srca_r, srca_i);
4101 return dst;
4102 }
4103
4104 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4105 LogicVRegister dst,
4106 const LogicVRegister& srca,
4107 const LogicVRegister& src1,
4108 const LogicVRegister& src2,
4109 int index,
4110 int rot) {
4111 SimVRegister temp;
4112 dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
4113 return sqrdcmlah(vform, dst, srca, src1, temp, rot);
4114 }
4115
4116 LogicVRegister Simulator::sqrdmlash_d(VectorFormat vform,
4117 LogicVRegister dst,
4118 const LogicVRegister& src1,
4119 const LogicVRegister& src2,
4120 bool round,
4121 bool sub_op) {
4122 // 2 * INT_64_MIN * INT_64_MIN causes INT_128 to overflow.
4123 // To avoid this, we use:
4124 // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4125 // which is same as:
4126 // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4127
4128 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4129 int esize = kDRegSize;
4130 vixl_uint128_t round_const, accum;
4131 round_const.first = 0;
4132 if (round) {
4133 round_const.second = UINT64_C(1) << (esize - 2);
4134 } else {
4135 round_const.second = 0;
4136 }
4137
4138 dst.ClearForWrite(vform);
4139 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4140 // Shift the whole value left by `esize - 1` bits.
4141 accum.first = dst.Int(vform, i) >> 1;
4142 accum.second = dst.Int(vform, i) << (esize - 1);
4143
4144 vixl_uint128_t product = Mul64(src1.Int(vform, i), src2.Int(vform, i));
4145
4146 if (sub_op) {
4147 product = Neg128(product);
4148 }
4149 accum = Add128(accum, product);
4150
4151 // Perform rounding.
4152 accum = Add128(accum, round_const);
4153
4154 // Arithmetic shift the whole value right by `esize - 1` bits.
4155 accum.second = (accum.first << 1) | (accum.second >> (esize - 1));
4156 accum.first = -(accum.first >> (esize - 1));
4157
4158 // Perform saturation.
4159 bool is_pos = (accum.first == 0) ? true : false;
4160 if (is_pos &&
4161 (accum.second > static_cast<uint64_t>(MaxIntFromFormat(vform)))) {
4162 accum.second = MaxIntFromFormat(vform);
4163 } else if (!is_pos && (accum.second <
4164 static_cast<uint64_t>(MinIntFromFormat(vform)))) {
4165 accum.second = MinIntFromFormat(vform);
4166 }
4167
4168 dst.SetInt(vform, i, accum.second);
4169 }
4170
4171 return dst;
4172 }
4173
4174 LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
4175 LogicVRegister dst,
4176 const LogicVRegister& src1,
4177 const LogicVRegister& src2,
4178 bool round,
4179 bool sub_op) {
4180 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
4181 // To avoid this, we use:
4182 // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4183 // which is same as:
4184 // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4185
4186 if (vform == kFormatVnD) {
4187 return sqrdmlash_d(vform, dst, src1, src2, round, sub_op);
4188 }
4189
4190 int esize = LaneSizeInBitsFromFormat(vform);
4191 int round_const = round ? (1 << (esize - 2)) : 0;
4192 int64_t accum;
4193
4194 dst.ClearForWrite(vform);
4195 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4196 accum = dst.Int(vform, i) << (esize - 1);
4197 if (sub_op) {
4198 accum -= src1.Int(vform, i) * src2.Int(vform, i);
4199 } else {
4200 accum += src1.Int(vform, i) * src2.Int(vform, i);
4201 }
4202 accum += round_const;
4203 accum = accum >> (esize - 1);
4204
4205 if (accum > MaxIntFromFormat(vform)) {
4206 accum = MaxIntFromFormat(vform);
4207 } else if (accum < MinIntFromFormat(vform)) {
4208 accum = MinIntFromFormat(vform);
4209 }
4210 dst.SetInt(vform, i, accum);
4211 }
4212 return dst;
4213 }
4214
4215
4216 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
4217 LogicVRegister dst,
4218 const LogicVRegister& src1,
4219 const LogicVRegister& src2,
4220 bool round) {
4221 return sqrdmlash(vform, dst, src1, src2, round, false);
4222 }
4223
4224
4225 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
4226 LogicVRegister dst,
4227 const LogicVRegister& src1,
4228 const LogicVRegister& src2,
4229 bool round) {
4230 return sqrdmlash(vform, dst, src1, src2, round, true);
4231 }
4232
4233
4234 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
4235 LogicVRegister dst,
4236 const LogicVRegister& src1,
4237 const LogicVRegister& src2) {
4238 return sqrdmulh(vform, dst, src1, src2, false);
4239 }
4240
4241
4242 LogicVRegister Simulator::addhn(VectorFormat vform,
4243 LogicVRegister dst,
4244 const LogicVRegister& src1,
4245 const LogicVRegister& src2) {
4246 SimVRegister temp;
4247 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4248 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4249 return dst;
4250 }
4251
4252
4253 LogicVRegister Simulator::addhn2(VectorFormat vform,
4254 LogicVRegister dst,
4255 const LogicVRegister& src1,
4256 const LogicVRegister& src2) {
4257 SimVRegister temp;
4258 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4259 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4260 return dst;
4261 }
4262
4263
4264 LogicVRegister Simulator::raddhn(VectorFormat vform,
4265 LogicVRegister dst,
4266 const LogicVRegister& src1,
4267 const LogicVRegister& src2) {
4268 SimVRegister temp;
4269 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4270 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4271 return dst;
4272 }
4273
4274
4275 LogicVRegister Simulator::raddhn2(VectorFormat vform,
4276 LogicVRegister dst,
4277 const LogicVRegister& src1,
4278 const LogicVRegister& src2) {
4279 SimVRegister temp;
4280 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4281 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4282 return dst;
4283 }
4284
4285
4286 LogicVRegister Simulator::subhn(VectorFormat vform,
4287 LogicVRegister dst,
4288 const LogicVRegister& src1,
4289 const LogicVRegister& src2) {
4290 SimVRegister temp;
4291 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4292 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4293 return dst;
4294 }
4295
4296
4297 LogicVRegister Simulator::subhn2(VectorFormat vform,
4298 LogicVRegister dst,
4299 const LogicVRegister& src1,
4300 const LogicVRegister& src2) {
4301 SimVRegister temp;
4302 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4303 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4304 return dst;
4305 }
4306
4307
4308 LogicVRegister Simulator::rsubhn(VectorFormat vform,
4309 LogicVRegister dst,
4310 const LogicVRegister& src1,
4311 const LogicVRegister& src2) {
4312 SimVRegister temp;
4313 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4314 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4315 return dst;
4316 }
4317
4318
4319 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
4320 LogicVRegister dst,
4321 const LogicVRegister& src1,
4322 const LogicVRegister& src2) {
4323 SimVRegister temp;
4324 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4325 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4326 return dst;
4327 }
4328
4329
4330 LogicVRegister Simulator::trn1(VectorFormat vform,
4331 LogicVRegister dst,
4332 const LogicVRegister& src1,
4333 const LogicVRegister& src2) {
4334 uint64_t result[kZRegMaxSizeInBytes] = {};
4335 int lane_count = LaneCountFromFormat(vform);
4336 int pairs = lane_count / 2;
4337 for (int i = 0; i < pairs; ++i) {
4338 result[2 * i] = src1.Uint(vform, 2 * i);
4339 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
4340 }
4341
4342 dst.ClearForWrite(vform);
4343 for (int i = 0; i < lane_count; ++i) {
4344 dst.SetUint(vform, i, result[i]);
4345 }
4346 return dst;
4347 }
4348
4349
4350 LogicVRegister Simulator::trn2(VectorFormat vform,
4351 LogicVRegister dst,
4352 const LogicVRegister& src1,
4353 const LogicVRegister& src2) {
4354 uint64_t result[kZRegMaxSizeInBytes] = {};
4355 int lane_count = LaneCountFromFormat(vform);
4356 int pairs = lane_count / 2;
4357 for (int i = 0; i < pairs; ++i) {
4358 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
4359 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
4360 }
4361
4362 dst.ClearForWrite(vform);
4363 for (int i = 0; i < lane_count; ++i) {
4364 dst.SetUint(vform, i, result[i]);
4365 }
4366 return dst;
4367 }
4368
4369
4370 LogicVRegister Simulator::zip1(VectorFormat vform,
4371 LogicVRegister dst,
4372 const LogicVRegister& src1,
4373 const LogicVRegister& src2) {
4374 uint64_t result[kZRegMaxSizeInBytes] = {};
4375 int lane_count = LaneCountFromFormat(vform);
4376 int pairs = lane_count / 2;
4377 for (int i = 0; i < pairs; ++i) {
4378 result[2 * i] = src1.Uint(vform, i);
4379 result[(2 * i) + 1] = src2.Uint(vform, i);
4380 }
4381
4382 dst.ClearForWrite(vform);
4383 for (int i = 0; i < lane_count; ++i) {
4384 dst.SetUint(vform, i, result[i]);
4385 }
4386 return dst;
4387 }
4388
4389
4390 LogicVRegister Simulator::zip2(VectorFormat vform,
4391 LogicVRegister dst,
4392 const LogicVRegister& src1,
4393 const LogicVRegister& src2) {
4394 uint64_t result[kZRegMaxSizeInBytes] = {};
4395 int lane_count = LaneCountFromFormat(vform);
4396 int pairs = lane_count / 2;
4397 for (int i = 0; i < pairs; ++i) {
4398 result[2 * i] = src1.Uint(vform, pairs + i);
4399 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
4400 }
4401
4402 dst.ClearForWrite(vform);
4403 for (int i = 0; i < lane_count; ++i) {
4404 dst.SetUint(vform, i, result[i]);
4405 }
4406 return dst;
4407 }
4408
4409
4410 LogicVRegister Simulator::uzp1(VectorFormat vform,
4411 LogicVRegister dst,
4412 const LogicVRegister& src1,
4413 const LogicVRegister& src2) {
4414 uint64_t result[kZRegMaxSizeInBytes * 2];
4415 int lane_count = LaneCountFromFormat(vform);
4416 for (int i = 0; i < lane_count; ++i) {
4417 result[i] = src1.Uint(vform, i);
4418 result[lane_count + i] = src2.Uint(vform, i);
4419 }
4420
4421 dst.ClearForWrite(vform);
4422 for (int i = 0; i < lane_count; ++i) {
4423 dst.SetUint(vform, i, result[2 * i]);
4424 }
4425 return dst;
4426 }
4427
4428
4429 LogicVRegister Simulator::uzp2(VectorFormat vform,
4430 LogicVRegister dst,
4431 const LogicVRegister& src1,
4432 const LogicVRegister& src2) {
4433 uint64_t result[kZRegMaxSizeInBytes * 2];
4434 int lane_count = LaneCountFromFormat(vform);
4435 for (int i = 0; i < lane_count; ++i) {
4436 result[i] = src1.Uint(vform, i);
4437 result[lane_count + i] = src2.Uint(vform, i);
4438 }
4439
4440 dst.ClearForWrite(vform);
4441 for (int i = 0; i < lane_count; ++i) {
4442 dst.SetUint(vform, i, result[(2 * i) + 1]);
4443 }
4444 return dst;
4445 }
4446
4447 LogicVRegister Simulator::interleave_top_bottom(VectorFormat vform,
4448 LogicVRegister dst,
4449 const LogicVRegister& src) {
4450 // Interleave the top and bottom half of a vector, ie. for a vector:
4451 //
4452 // [ ... | F | D | B | ... | E | C | A ]
4453 //
4454 // where B is the first element in the top half of the vector, produce a
4455 // result vector:
4456 //
4457 // [ ... | ... | F | E | D | C | B | A ]
4458
4459 uint64_t result[kZRegMaxSizeInBytes] = {};
4460 int lane_count = LaneCountFromFormat(vform);
4461 for (int i = 0; i < lane_count; i += 2) {
4462 result[i] = src.Uint(vform, i / 2);
4463 result[i + 1] = src.Uint(vform, (lane_count / 2) + (i / 2));
4464 }
4465 dst.SetUintArray(vform, result);
4466 return dst;
4467 }
4468
4469 template <typename T>
4470 T Simulator::FPNeg(T op) {
4471 return -op;
4472 }
4473
4474 template <typename T>
4475 T Simulator::FPAdd(T op1, T op2) {
4476 T result = FPProcessNaNs(op1, op2);
4477 if (IsNaN(result)) {
4478 return result;
4479 }
4480
4481 if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
4482 // inf + -inf returns the default NaN.
4483 FPProcessException();
4484 return FPDefaultNaN<T>();
4485 } else {
4486 // Other cases should be handled by standard arithmetic.
4487 return op1 + op2;
4488 }
4489 }
4490
4491
4492 template <typename T>
4493 T Simulator::FPSub(T op1, T op2) {
4494 // NaNs should be handled elsewhere.
4495 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4496
4497 if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
4498 // inf - inf returns the default NaN.
4499 FPProcessException();
4500 return FPDefaultNaN<T>();
4501 } else {
4502 // Other cases should be handled by standard arithmetic.
4503 return op1 - op2;
4504 }
4505 }
4506
4507 template <typename T>
4508 T Simulator::FPMulNaNs(T op1, T op2) {
4509 T result = FPProcessNaNs(op1, op2);
4510 return IsNaN(result) ? result : FPMul(op1, op2);
4511 }
4512
4513 template <typename T>
4514 T Simulator::FPMul(T op1, T op2) {
4515 // NaNs should be handled elsewhere.
4516 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4517
4518 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4519 // inf * 0.0 returns the default NaN.
4520 FPProcessException();
4521 return FPDefaultNaN<T>();
4522 } else {
4523 // Other cases should be handled by standard arithmetic.
4524 return op1 * op2;
4525 }
4526 }
4527
4528
4529 template <typename T>
4530 T Simulator::FPMulx(T op1, T op2) {
4531 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4532 // inf * 0.0 returns +/-2.0.
4533 T two = 2.0;
4534 return copysign(1.0, op1) * copysign(1.0, op2) * two;
4535 }
4536 return FPMul(op1, op2);
4537 }
4538
4539
4540 template <typename T>
4541 T Simulator::FPMulAdd(T a, T op1, T op2) {
4542 T result = FPProcessNaNs3(a, op1, op2);
4543
4544 T sign_a = copysign(1.0, a);
4545 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
4546 bool isinf_prod = IsInf(op1) || IsInf(op2);
4547 bool operation_generates_nan =
4548 (IsInf(op1) && (op2 == 0.0)) || // inf * 0.0
4549 (IsInf(op2) && (op1 == 0.0)) || // 0.0 * inf
4550 (IsInf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
4551
4552 if (IsNaN(result)) {
4553 // Generated NaNs override quiet NaNs propagated from a.
4554 if (operation_generates_nan && IsQuietNaN(a)) {
4555 FPProcessException();
4556 return FPDefaultNaN<T>();
4557 } else {
4558 return result;
4559 }
4560 }
4561
4562 // If the operation would produce a NaN, return the default NaN.
4563 if (operation_generates_nan) {
4564 FPProcessException();
4565 return FPDefaultNaN<T>();
4566 }
4567
4568 // Work around broken fma implementations for exact zero results: The sign of
4569 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
4570 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
4571 return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;
4572 }
4573
4574 result = FusedMultiplyAdd(op1, op2, a);
4575 VIXL_ASSERT(!IsNaN(result));
4576
4577 // Work around broken fma implementations for rounded zero results: If a is
4578 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
4579 if ((a == 0.0) && (result == 0.0)) {
4580 return copysign(0.0, sign_prod);
4581 }
4582
4583 return result;
4584 }
4585
4586 template float Simulator::FPMulAdd(float a, float op1, float op2);
4587
4588 template double Simulator::FPMulAdd(double a, double op1, double op2);
4589
4590 template <typename T>
4591 T Simulator::FPDiv(T op1, T op2) {
4592 // NaNs should be handled elsewhere.
4593 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4594
4595 if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
4596 // inf / inf and 0.0 / 0.0 return the default NaN.
4597 FPProcessException();
4598 return FPDefaultNaN<T>();
4599 } else {
4600 if (op2 == 0.0) {
4601 FPProcessException();
4602 if (!IsNaN(op1)) {
4603 double op1_sign = copysign(1.0, op1);
4604 double op2_sign = copysign(1.0, op2);
4605 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
4606 }
4607 }
4608
4609 // Other cases should be handled by standard arithmetic.
4610 return op1 / op2;
4611 }
4612 }
4613
4614
4615 template <typename T>
4616 T Simulator::FPSqrt(T op) {
4617 if (IsNaN(op)) {
4618 return FPProcessNaN(op);
4619 } else if (op < T(0.0)) {
4620 FPProcessException();
4621 return FPDefaultNaN<T>();
4622 } else {
4623 return sqrt(op);
4624 }
4625 }
4626
4627
4628 template <typename T>
4629 T Simulator::FPMax(T a, T b) {
4630 T result = FPProcessNaNs(a, b);
4631 if (IsNaN(result)) return result;
4632
4633 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4634 // a and b are zero, and the sign differs: return +0.0.
4635 return 0.0;
4636 } else {
4637 return (a > b) ? a : b;
4638 }
4639 }
4640
4641
4642 template <typename T>
4643 T Simulator::FPMaxNM(T a, T b) {
4644 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4645 a = kFP64NegativeInfinity;
4646 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4647 b = kFP64NegativeInfinity;
4648 }
4649
4650 T result = FPProcessNaNs(a, b);
4651 return IsNaN(result) ? result : FPMax(a, b);
4652 }
4653
4654
4655 template <typename T>
4656 T Simulator::FPMin(T a, T b) {
4657 T result = FPProcessNaNs(a, b);
4658 if (IsNaN(result)) return result;
4659
4660 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4661 // a and b are zero, and the sign differs: return -0.0.
4662 return -0.0;
4663 } else {
4664 return (a < b) ? a : b;
4665 }
4666 }
4667
4668
4669 template <typename T>
4670 T Simulator::FPMinNM(T a, T b) {
4671 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4672 a = kFP64PositiveInfinity;
4673 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4674 b = kFP64PositiveInfinity;
4675 }
4676
4677 T result = FPProcessNaNs(a, b);
4678 return IsNaN(result) ? result : FPMin(a, b);
4679 }
4680
4681
4682 template <typename T>
4683 T Simulator::FPRecipStepFused(T op1, T op2) {
4684 const T two = 2.0;
4685 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4686 return two;
4687 } else if (IsInf(op1) || IsInf(op2)) {
4688 // Return +inf if signs match, otherwise -inf.
4689 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4690 : kFP64NegativeInfinity;
4691 } else {
4692 return FusedMultiplyAdd(op1, op2, two);
4693 }
4694 }
4695
4696 template <typename T>
4697 bool IsNormal(T value) {
4698 return std::isnormal(value);
4699 }
4700
4701 template <>
4702 bool IsNormal(SimFloat16 value) {
4703 uint16_t rawbits = Float16ToRawbits(value);
4704 uint16_t exp_mask = 0x7c00;
4705 // Check that the exponent is neither all zeroes or all ones.
4706 return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
4707 }
4708
4709
4710 template <typename T>
4711 T Simulator::FPRSqrtStepFused(T op1, T op2) {
4712 const T one_point_five = 1.5;
4713 const T two = 2.0;
4714
4715 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4716 return one_point_five;
4717 } else if (IsInf(op1) || IsInf(op2)) {
4718 // Return +inf if signs match, otherwise -inf.
4719 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4720 : kFP64NegativeInfinity;
4721 } else {
4722 // The multiply-add-halve operation must be fully fused, so avoid interim
4723 // rounding by checking which operand can be losslessly divided by two
4724 // before doing the multiply-add.
4725 if (IsNormal(op1 / two)) {
4726 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
4727 } else if (IsNormal(op2 / two)) {
4728 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
4729 } else {
4730 // Neither operand is normal after halving: the result is dominated by
4731 // the addition term, so just return that.
4732 return one_point_five;
4733 }
4734 }
4735 }
4736
4737 int32_t Simulator::FPToFixedJS(double value) {
4738 // The Z-flag is set when the conversion from double precision floating-point
4739 // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
4740 // outside the bounds of a 32-bit integer, or isn't an exact integer then the
4741 // Z-flag is unset.
4742 int Z = 1;
4743 int32_t result;
4744
4745 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4746 (value == kFP64NegativeInfinity)) {
4747 // +/- zero and infinity all return zero, however -0 and +/- Infinity also
4748 // unset the Z-flag.
4749 result = 0.0;
4750 if ((value != 0.0) || std::signbit(value)) {
4751 Z = 0;
4752 }
4753 } else if (std::isnan(value)) {
4754 // NaN values unset the Z-flag and set the result to 0.
4755 FPProcessNaN(value);
4756 result = 0;
4757 Z = 0;
4758 } else {
4759 // All other values are converted to an integer representation, rounded
4760 // toward zero.
4761 double int_result = std::floor(value);
4762 double error = value - int_result;
4763
4764 if ((error != 0.0) && (int_result < 0.0)) {
4765 int_result++;
4766 }
4767
4768 // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
4769 // write a one-liner with std::round, but the behaviour on ties is incorrect
4770 // for our purposes.
4771 double mod_const = static_cast<double>(UINT64_C(1) << 32);
4772 double mod_error =
4773 (int_result / mod_const) - std::floor(int_result / mod_const);
4774 double constrained;
4775 if (mod_error == 0.5) {
4776 constrained = INT32_MIN;
4777 } else {
4778 constrained = int_result - mod_const * round(int_result / mod_const);
4779 }
4780
4781 VIXL_ASSERT(std::floor(constrained) == constrained);
4782 VIXL_ASSERT(constrained >= INT32_MIN);
4783 VIXL_ASSERT(constrained <= INT32_MAX);
4784
4785 // Take the bottom 32 bits of the result as a 32-bit integer.
4786 result = static_cast<int32_t>(constrained);
4787
4788 if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
4789 (error != 0.0)) {
4790 // If the integer result is out of range or the conversion isn't exact,
4791 // take exception and unset the Z-flag.
4792 FPProcessException();
4793 Z = 0;
4794 }
4795 }
4796
4797 ReadNzcv().SetN(0);
4798 ReadNzcv().SetZ(Z);
4799 ReadNzcv().SetC(0);
4800 ReadNzcv().SetV(0);
4801
4802 return result;
4803 }
4804
4805 double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) {
4806 VIXL_ASSERT((value != kFP64PositiveInfinity) &&
4807 (value != kFP64NegativeInfinity));
4808 VIXL_ASSERT(!IsNaN(value));
4809
4810 double int_result = std::floor(value);
4811 double error = value - int_result;
4812 switch (round_mode) {
4813 case FPTieAway: {
4814 // Take care of correctly handling the range ]-0.5, -0.0], which must
4815 // yield -0.0.
4816 if ((-0.5 < value) && (value < 0.0)) {
4817 int_result = -0.0;
4818
4819 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
4820 // If the error is greater than 0.5, or is equal to 0.5 and the integer
4821 // result is positive, round up.
4822 int_result++;
4823 }
4824 break;
4825 }
4826 case FPTieEven: {
4827 // Take care of correctly handling the range [-0.5, -0.0], which must
4828 // yield -0.0.
4829 if ((-0.5 <= value) && (value < 0.0)) {
4830 int_result = -0.0;
4831
4832 // If the error is greater than 0.5, or is equal to 0.5 and the integer
4833 // result is odd, round up.
4834 } else if ((error > 0.5) ||
4835 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
4836 int_result++;
4837 }
4838 break;
4839 }
4840 case FPZero: {
4841 // If value>0 then we take floor(value)
4842 // otherwise, ceil(value).
4843 if (value < 0) {
4844 int_result = ceil(value);
4845 }
4846 break;
4847 }
4848 case FPNegativeInfinity: {
4849 // We always use floor(value).
4850 break;
4851 }
4852 case FPPositiveInfinity: {
4853 // Take care of correctly handling the range ]-1.0, -0.0], which must
4854 // yield -0.0.
4855 if ((-1.0 < value) && (value < 0.0)) {
4856 int_result = -0.0;
4857
4858 // If the error is non-zero, round up.
4859 } else if (error > 0.0) {
4860 int_result++;
4861 }
4862 break;
4863 }
4864 default:
4865 VIXL_UNIMPLEMENTED();
4866 }
4867 return int_result;
4868 }
4869
4870 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
4871 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4872 (value == kFP64NegativeInfinity)) {
4873 return value;
4874 } else if (IsNaN(value)) {
4875 return FPProcessNaN(value);
4876 }
4877 return FPRoundIntCommon(value, round_mode);
4878 }
4879
4880 double Simulator::FPRoundInt(double value,
4881 FPRounding round_mode,
4882 FrintMode frint_mode) {
4883 if (frint_mode == kFrintToInteger) {
4884 return FPRoundInt(value, round_mode);
4885 }
4886
4887 VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64));
4888
4889 if (value == 0.0) {
4890 return value;
4891 }
4892
4893 if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) ||
4894 IsNaN(value)) {
4895 if (frint_mode == kFrintToInt32) {
4896 return INT32_MIN;
4897 } else {
4898 return INT64_MIN;
4899 }
4900 }
4901
4902 double result = FPRoundIntCommon(value, round_mode);
4903
4904 // We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly
4905 // representable as a double, and is rounded to (INT64_MAX + 1) when
4906 // converted. To avoid this, we compare `result >= int64_max_plus_one`
4907 // instead; this is safe because `result` is known to be integral, and
4908 // `int64_max_plus_one` is exactly representable as a double.
4909 constexpr uint64_t int64_max_plus_one = static_cast<uint64_t>(INT64_MAX) + 1;
4910 VIXL_STATIC_ASSERT(static_cast<uint64_t>(static_cast<double>(
4911 int64_max_plus_one)) == int64_max_plus_one);
4912
4913 if (frint_mode == kFrintToInt32) {
4914 if ((result > INT32_MAX) || (result < INT32_MIN)) {
4915 return INT32_MIN;
4916 }
4917 } else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) {
4918 return INT64_MIN;
4919 }
4920
4921 return result;
4922 }
4923
4924 int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
4925 value = FPRoundInt(value, rmode);
4926 if (value >= kHMaxInt) {
4927 return kHMaxInt;
4928 } else if (value < kHMinInt) {
4929 return kHMinInt;
4930 }
4931 return IsNaN(value) ? 0 : static_cast<int16_t>(value);
4932 }
4933
4934
4935 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
4936 value = FPRoundInt(value, rmode);
4937 if (value >= kWMaxInt) {
4938 return kWMaxInt;
4939 } else if (value < kWMinInt) {
4940 return kWMinInt;
4941 }
4942 return IsNaN(value) ? 0 : static_cast<int32_t>(value);
4943 }
4944
4945
4946 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
4947 value = FPRoundInt(value, rmode);
4948 // This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues
4949 // as a result of kMaxInt not being representable as a double.
4950 if (value >= 9223372036854775808.) {
4951 return kXMaxInt;
4952 } else if (value < kXMinInt) {
4953 return kXMinInt;
4954 }
4955 return IsNaN(value) ? 0 : static_cast<int64_t>(value);
4956 }
4957
4958
4959 uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
4960 value = FPRoundInt(value, rmode);
4961 if (value >= kHMaxUInt) {
4962 return kHMaxUInt;
4963 } else if (value < 0.0) {
4964 return 0;
4965 }
4966 return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
4967 }
4968
4969
4970 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
4971 value = FPRoundInt(value, rmode);
4972 if (value >= kWMaxUInt) {
4973 return kWMaxUInt;
4974 } else if (value < 0.0) {
4975 return 0;
4976 }
4977 return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
4978 }
4979
4980
4981 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
4982 value = FPRoundInt(value, rmode);
4983 // This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues
4984 // as a result of kMaxUInt not being representable as a double.
4985 if (value >= 18446744073709551616.) {
4986 return kXMaxUInt;
4987 } else if (value < 0.0) {
4988 return 0;
4989 }
4990 return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
4991 }
4992
4993
4994 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
4995 template <typename T> \
4996 LogicVRegister Simulator::FN(VectorFormat vform, \
4997 LogicVRegister dst, \
4998 const LogicVRegister& src1, \
4999 const LogicVRegister& src2) { \
5000 dst.ClearForWrite(vform); \
5001 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
5002 T op1 = src1.Float<T>(i); \
5003 T op2 = src2.Float<T>(i); \
5004 T result; \
5005 if (PROCNAN) { \
5006 result = FPProcessNaNs(op1, op2); \
5007 if (!IsNaN(result)) { \
5008 result = OP(op1, op2); \
5009 } \
5010 } else { \
5011 result = OP(op1, op2); \
5012 } \
5013 dst.SetFloat(vform, i, result); \
5014 } \
5015 return dst; \
5016 } \
5017 \
5018 LogicVRegister Simulator::FN(VectorFormat vform, \
5019 LogicVRegister dst, \
5020 const LogicVRegister& src1, \
5021 const LogicVRegister& src2) { \
5022 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { \
5023 FN<SimFloat16>(vform, dst, src1, src2); \
5024 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
5025 FN<float>(vform, dst, src1, src2); \
5026 } else { \
5027 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
5028 FN<double>(vform, dst, src1, src2); \
5029 } \
5030 return dst; \
5031 }
5032 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
5033 #undef DEFINE_NEON_FP_VECTOR_OP
5034
5035
5036 LogicVRegister Simulator::fnmul(VectorFormat vform,
5037 LogicVRegister dst,
5038 const LogicVRegister& src1,
5039 const LogicVRegister& src2) {
5040 SimVRegister temp;
5041 LogicVRegister product = fmul(vform, temp, src1, src2);
5042 return fneg(vform, dst, product);
5043 }
5044
5045
5046 template <typename T>
5047 LogicVRegister Simulator::frecps(VectorFormat vform,
5048 LogicVRegister dst,
5049 const LogicVRegister& src1,
5050 const LogicVRegister& src2) {
5051 dst.ClearForWrite(vform);
5052 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5053 T op1 = -src1.Float<T>(i);
5054 T op2 = src2.Float<T>(i);
5055 T result = FPProcessNaNs(op1, op2);
5056 dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
5057 }
5058 return dst;
5059 }
5060
5061
5062 LogicVRegister Simulator::frecps(VectorFormat vform,
5063 LogicVRegister dst,
5064 const LogicVRegister& src1,
5065 const LogicVRegister& src2) {
5066 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5067 frecps<SimFloat16>(vform, dst, src1, src2);
5068 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5069 frecps<float>(vform, dst, src1, src2);
5070 } else {
5071 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5072 frecps<double>(vform, dst, src1, src2);
5073 }
5074 return dst;
5075 }
5076
5077
5078 template <typename T>
5079 LogicVRegister Simulator::frsqrts(VectorFormat vform,
5080 LogicVRegister dst,
5081 const LogicVRegister& src1,
5082 const LogicVRegister& src2) {
5083 dst.ClearForWrite(vform);
5084 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5085 T op1 = -src1.Float<T>(i);
5086 T op2 = src2.Float<T>(i);
5087 T result = FPProcessNaNs(op1, op2);
5088 dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
5089 }
5090 return dst;
5091 }
5092
5093
5094 LogicVRegister Simulator::frsqrts(VectorFormat vform,
5095 LogicVRegister dst,
5096 const LogicVRegister& src1,
5097 const LogicVRegister& src2) {
5098 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5099 frsqrts<SimFloat16>(vform, dst, src1, src2);
5100 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5101 frsqrts<float>(vform, dst, src1, src2);
5102 } else {
5103 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5104 frsqrts<double>(vform, dst, src1, src2);
5105 }
5106 return dst;
5107 }
5108
5109
5110 template <typename T>
5111 LogicVRegister Simulator::fcmp(VectorFormat vform,
5112 LogicVRegister dst,
5113 const LogicVRegister& src1,
5114 const LogicVRegister& src2,
5115 Condition cond) {
5116 dst.ClearForWrite(vform);
5117 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5118 bool result = false;
5119 T op1 = src1.Float<T>(i);
5120 T op2 = src2.Float<T>(i);
5121 bool unordered = IsNaN(FPProcessNaNs(op1, op2));
5122
5123 switch (cond) {
5124 case eq:
5125 result = (op1 == op2);
5126 break;
5127 case ge:
5128 result = (op1 >= op2);
5129 break;
5130 case gt:
5131 result = (op1 > op2);
5132 break;
5133 case le:
5134 result = (op1 <= op2);
5135 break;
5136 case lt:
5137 result = (op1 < op2);
5138 break;
5139 case ne:
5140 result = (op1 != op2);
5141 break;
5142 case uo:
5143 result = unordered;
5144 break;
5145 default:
5146 // Other conditions are defined in terms of those above.
5147 VIXL_UNREACHABLE();
5148 break;
5149 }
5150
5151 if (result && unordered) {
5152 // Only `uo` and `ne` can be true for unordered comparisons.
5153 VIXL_ASSERT((cond == uo) || (cond == ne));
5154 }
5155
5156 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
5157 }
5158 return dst;
5159 }
5160
5161
5162 LogicVRegister Simulator::fcmp(VectorFormat vform,
5163 LogicVRegister dst,
5164 const LogicVRegister& src1,
5165 const LogicVRegister& src2,
5166 Condition cond) {
5167 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5168 fcmp<SimFloat16>(vform, dst, src1, src2, cond);
5169 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5170 fcmp<float>(vform, dst, src1, src2, cond);
5171 } else {
5172 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5173 fcmp<double>(vform, dst, src1, src2, cond);
5174 }
5175 return dst;
5176 }
5177
5178
5179 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
5180 LogicVRegister dst,
5181 const LogicVRegister& src,
5182 Condition cond) {
5183 SimVRegister temp;
5184 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5185 LogicVRegister zero_reg =
5186 dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
5187 fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
5188 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5189 LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
5190 fcmp<float>(vform, dst, src, zero_reg, cond);
5191 } else {
5192 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5193 LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
5194 fcmp<double>(vform, dst, src, zero_reg, cond);
5195 }
5196 return dst;
5197 }
5198
5199
5200 LogicVRegister Simulator::fabscmp(VectorFormat vform,
5201 LogicVRegister dst,
5202 const LogicVRegister& src1,
5203 const LogicVRegister& src2,
5204 Condition cond) {
5205 SimVRegister temp1, temp2;
5206 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5207 LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
5208 LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
5209 fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
5210 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5211 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
5212 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
5213 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
5214 } else {
5215 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5216 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
5217 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
5218 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
5219 }
5220 return dst;
5221 }
5222
5223
5224 template <typename T>
5225 LogicVRegister Simulator::fmla(VectorFormat vform,
5226 LogicVRegister dst,
5227 const LogicVRegister& srca,
5228 const LogicVRegister& src1,
5229 const LogicVRegister& src2) {
5230 dst.ClearForWrite(vform);
5231 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5232 T op1 = src1.Float<T>(i);
5233 T op2 = src2.Float<T>(i);
5234 T acc = srca.Float<T>(i);
5235 T result = FPMulAdd(acc, op1, op2);
5236 dst.SetFloat(vform, i, result);
5237 }
5238 return dst;
5239 }
5240
5241
5242 LogicVRegister Simulator::fmla(VectorFormat vform,
5243 LogicVRegister dst,
5244 const LogicVRegister& srca,
5245 const LogicVRegister& src1,
5246 const LogicVRegister& src2) {
5247 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5248 fmla<SimFloat16>(vform, dst, srca, src1, src2);
5249 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5250 fmla<float>(vform, dst, srca, src1, src2);
5251 } else {
5252 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5253 fmla<double>(vform, dst, srca, src1, src2);
5254 }
5255 return dst;
5256 }
5257
5258
5259 template <typename T>
5260 LogicVRegister Simulator::fmls(VectorFormat vform,
5261 LogicVRegister dst,
5262 const LogicVRegister& srca,
5263 const LogicVRegister& src1,
5264 const LogicVRegister& src2) {
5265 dst.ClearForWrite(vform);
5266 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5267 T op1 = -src1.Float<T>(i);
5268 T op2 = src2.Float<T>(i);
5269 T acc = srca.Float<T>(i);
5270 T result = FPMulAdd(acc, op1, op2);
5271 dst.SetFloat(i, result);
5272 }
5273 return dst;
5274 }
5275
5276
5277 LogicVRegister Simulator::fmls(VectorFormat vform,
5278 LogicVRegister dst,
5279 const LogicVRegister& srca,
5280 const LogicVRegister& src1,
5281 const LogicVRegister& src2) {
5282 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5283 fmls<SimFloat16>(vform, dst, srca, src1, src2);
5284 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5285 fmls<float>(vform, dst, srca, src1, src2);
5286 } else {
5287 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5288 fmls<double>(vform, dst, srca, src1, src2);
5289 }
5290 return dst;
5291 }
5292
5293
5294 LogicVRegister Simulator::fmlal(VectorFormat vform,
5295 LogicVRegister dst,
5296 const LogicVRegister& src1,
5297 const LogicVRegister& src2) {
5298 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5299 dst.ClearForWrite(vform);
5300 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5301 float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5302 float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5303 float acc = dst.Float<float>(i);
5304 float result = FPMulAdd(acc, op1, op2);
5305 dst.SetFloat(i, result);
5306 }
5307 return dst;
5308 }
5309
5310
5311 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5312 LogicVRegister dst,
5313 const LogicVRegister& src1,
5314 const LogicVRegister& src2) {
5315 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5316 dst.ClearForWrite(vform);
5317 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5318 int src = i + LaneCountFromFormat(vform);
5319 float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5320 float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5321 float acc = dst.Float<float>(i);
5322 float result = FPMulAdd(acc, op1, op2);
5323 dst.SetFloat(i, result);
5324 }
5325 return dst;
5326 }
5327
5328
5329 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5330 LogicVRegister dst,
5331 const LogicVRegister& src1,
5332 const LogicVRegister& src2) {
5333 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5334 dst.ClearForWrite(vform);
5335 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5336 float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5337 float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5338 float acc = dst.Float<float>(i);
5339 float result = FPMulAdd(acc, op1, op2);
5340 dst.SetFloat(i, result);
5341 }
5342 return dst;
5343 }
5344
5345
5346 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5347 LogicVRegister dst,
5348 const LogicVRegister& src1,
5349 const LogicVRegister& src2) {
5350 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5351 dst.ClearForWrite(vform);
5352 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5353 int src = i + LaneCountFromFormat(vform);
5354 float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5355 float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5356 float acc = dst.Float<float>(i);
5357 float result = FPMulAdd(acc, op1, op2);
5358 dst.SetFloat(i, result);
5359 }
5360 return dst;
5361 }
5362
5363
5364 LogicVRegister Simulator::fmlal(VectorFormat vform,
5365 LogicVRegister dst,
5366 const LogicVRegister& src1,
5367 const LogicVRegister& src2,
5368 int index) {
5369 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5370 dst.ClearForWrite(vform);
5371 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5372 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5373 float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5374 float acc = dst.Float<float>(i);
5375 float result = FPMulAdd(acc, op1, op2);
5376 dst.SetFloat(i, result);
5377 }
5378 return dst;
5379 }
5380
5381
5382 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5383 LogicVRegister dst,
5384 const LogicVRegister& src1,
5385 const LogicVRegister& src2,
5386 int index) {
5387 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5388 dst.ClearForWrite(vform);
5389 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5390 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5391 int src = i + LaneCountFromFormat(vform);
5392 float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5393 float acc = dst.Float<float>(i);
5394 float result = FPMulAdd(acc, op1, op2);
5395 dst.SetFloat(i, result);
5396 }
5397 return dst;
5398 }
5399
5400
5401 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5402 LogicVRegister dst,
5403 const LogicVRegister& src1,
5404 const LogicVRegister& src2,
5405 int index) {
5406 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5407 dst.ClearForWrite(vform);
5408 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5409 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5410 float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5411 float acc = dst.Float<float>(i);
5412 float result = FPMulAdd(acc, op1, op2);
5413 dst.SetFloat(i, result);
5414 }
5415 return dst;
5416 }
5417
5418
5419 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5420 LogicVRegister dst,
5421 const LogicVRegister& src1,
5422 const LogicVRegister& src2,
5423 int index) {
5424 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5425 dst.ClearForWrite(vform);
5426 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5427 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5428 int src = i + LaneCountFromFormat(vform);
5429 float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5430 float acc = dst.Float<float>(i);
5431 float result = FPMulAdd(acc, op1, op2);
5432 dst.SetFloat(i, result);
5433 }
5434 return dst;
5435 }
5436
5437
5438 template <typename T>
5439 LogicVRegister Simulator::fneg(VectorFormat vform,
5440 LogicVRegister dst,
5441 const LogicVRegister& src) {
5442 dst.ClearForWrite(vform);
5443 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5444 T op = src.Float<T>(i);
5445 op = -op;
5446 dst.SetFloat(i, op);
5447 }
5448 return dst;
5449 }
5450
5451
5452 LogicVRegister Simulator::fneg(VectorFormat vform,
5453 LogicVRegister dst,
5454 const LogicVRegister& src) {
5455 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5456 fneg<SimFloat16>(vform, dst, src);
5457 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5458 fneg<float>(vform, dst, src);
5459 } else {
5460 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5461 fneg<double>(vform, dst, src);
5462 }
5463 return dst;
5464 }
5465
5466
5467 template <typename T>
5468 LogicVRegister Simulator::fabs_(VectorFormat vform,
5469 LogicVRegister dst,
5470 const LogicVRegister& src) {
5471 dst.ClearForWrite(vform);
5472 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5473 T op = src.Float<T>(i);
5474 if (copysign(1.0, op) < 0.0) {
5475 op = -op;
5476 }
5477 dst.SetFloat(i, op);
5478 }
5479 return dst;
5480 }
5481
5482
5483 LogicVRegister Simulator::fabs_(VectorFormat vform,
5484 LogicVRegister dst,
5485 const LogicVRegister& src) {
5486 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5487 fabs_<SimFloat16>(vform, dst, src);
5488 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5489 fabs_<float>(vform, dst, src);
5490 } else {
5491 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5492 fabs_<double>(vform, dst, src);
5493 }
5494 return dst;
5495 }
5496
5497
5498 LogicVRegister Simulator::fabd(VectorFormat vform,
5499 LogicVRegister dst,
5500 const LogicVRegister& src1,
5501 const LogicVRegister& src2) {
5502 SimVRegister temp;
5503 fsub(vform, temp, src1, src2);
5504 fabs_(vform, dst, temp);
5505 return dst;
5506 }
5507
5508
5509 LogicVRegister Simulator::fsqrt(VectorFormat vform,
5510 LogicVRegister dst,
5511 const LogicVRegister& src) {
5512 dst.ClearForWrite(vform);
5513 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5514 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5515 SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
5516 dst.SetFloat(i, result);
5517 }
5518 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5519 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5520 float result = FPSqrt(src.Float<float>(i));
5521 dst.SetFloat(i, result);
5522 }
5523 } else {
5524 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5525 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5526 double result = FPSqrt(src.Float<double>(i));
5527 dst.SetFloat(i, result);
5528 }
5529 }
5530 return dst;
5531 }
5532
5533
5534 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
5535 LogicVRegister Simulator::FNP(VectorFormat vform, \
5536 LogicVRegister dst, \
5537 const LogicVRegister& src1, \
5538 const LogicVRegister& src2) { \
5539 SimVRegister temp1, temp2; \
5540 uzp1(vform, temp1, src1, src2); \
5541 uzp2(vform, temp2, src1, src2); \
5542 FN(vform, dst, temp1, temp2); \
5543 if (IsSVEFormat(vform)) { \
5544 interleave_top_bottom(vform, dst, dst); \
5545 } \
5546 return dst; \
5547 } \
5548 \
5549 LogicVRegister Simulator::FNP(VectorFormat vform, \
5550 LogicVRegister dst, \
5551 const LogicVRegister& src) { \
5552 if (vform == kFormatH) { \
5553 SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))), \
5554 SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \
5555 dst.SetUint(vform, 0, Float16ToRawbits(result)); \
5556 } else if (vform == kFormatS) { \
5557 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
5558 dst.SetFloat(0, result); \
5559 } else { \
5560 VIXL_ASSERT(vform == kFormatD); \
5561 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
5562 dst.SetFloat(0, result); \
5563 } \
5564 dst.ClearForWrite(vform); \
5565 return dst; \
5566 }
5567 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
5568 #undef DEFINE_NEON_FP_PAIR_OP
5569
5570 template <typename T>
5571 LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,
5572 LogicVRegister dst,
5573 const LogicVRegister& src,
5574 typename TFPPairOp<T>::type fn,
5575 uint64_t inactive_value) {
5576 int lane_count = LaneCountFromFormat(vform);
5577 T result[kZRegMaxSizeInBytes / sizeof(T)];
5578 // Copy the source vector into a working array. Initialise the unused elements
5579 // at the end of the array to the same value that a false predicate would set.
5580 for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {
5581 result[i] = (i < lane_count)
5582 ? src.Float<T>(i)
5583 : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);
5584 }
5585
5586 // Pairwise reduce the elements to a single value, using the pair op function
5587 // argument.
5588 for (int step = 1; step < lane_count; step *= 2) {
5589 for (int i = 0; i < lane_count; i += step * 2) {
5590 result[i] = (this->*fn)(result[i], result[i + step]);
5591 }
5592 }
5593 dst.ClearForWrite(ScalarFormatFromFormat(vform));
5594 dst.SetFloat<T>(0, result[0]);
5595 return dst;
5596 }
5597
5598 LogicVRegister Simulator::FPPairedAcrossHelper(
5599 VectorFormat vform,
5600 LogicVRegister dst,
5601 const LogicVRegister& src,
5602 typename TFPPairOp<SimFloat16>::type fn16,
5603 typename TFPPairOp<float>::type fn32,
5604 typename TFPPairOp<double>::type fn64,
5605 uint64_t inactive_value) {
5606 switch (LaneSizeInBitsFromFormat(vform)) {
5607 case kHRegSize:
5608 return FPPairedAcrossHelper<SimFloat16>(vform,
5609 dst,
5610 src,
5611 fn16,
5612 inactive_value);
5613 case kSRegSize:
5614 return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);
5615 default:
5616 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5617 return FPPairedAcrossHelper<double>(vform,
5618 dst,
5619 src,
5620 fn64,
5621 inactive_value);
5622 }
5623 }
5624
5625 LogicVRegister Simulator::faddv(VectorFormat vform,
5626 LogicVRegister dst,
5627 const LogicVRegister& src) {
5628 return FPPairedAcrossHelper(vform,
5629 dst,
5630 src,
5631 &Simulator::FPAdd<SimFloat16>,
5632 &Simulator::FPAdd<float>,
5633 &Simulator::FPAdd<double>,
5634 0);
5635 }
5636
5637 LogicVRegister Simulator::fmaxv(VectorFormat vform,
5638 LogicVRegister dst,
5639 const LogicVRegister& src) {
5640 int lane_size = LaneSizeInBitsFromFormat(vform);
5641 uint64_t inactive_value =
5642 FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
5643 return FPPairedAcrossHelper(vform,
5644 dst,
5645 src,
5646 &Simulator::FPMax<SimFloat16>,
5647 &Simulator::FPMax<float>,
5648 &Simulator::FPMax<double>,
5649 inactive_value);
5650 }
5651
5652
5653 LogicVRegister Simulator::fminv(VectorFormat vform,
5654 LogicVRegister dst,
5655 const LogicVRegister& src) {
5656 int lane_size = LaneSizeInBitsFromFormat(vform);
5657 uint64_t inactive_value =
5658 FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
5659 return FPPairedAcrossHelper(vform,
5660 dst,
5661 src,
5662 &Simulator::FPMin<SimFloat16>,
5663 &Simulator::FPMin<float>,
5664 &Simulator::FPMin<double>,
5665 inactive_value);
5666 }
5667
5668
5669 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
5670 LogicVRegister dst,
5671 const LogicVRegister& src) {
5672 int lane_size = LaneSizeInBitsFromFormat(vform);
5673 uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5674 return FPPairedAcrossHelper(vform,
5675 dst,
5676 src,
5677 &Simulator::FPMaxNM<SimFloat16>,
5678 &Simulator::FPMaxNM<float>,
5679 &Simulator::FPMaxNM<double>,
5680 inactive_value);
5681 }
5682
5683
5684 LogicVRegister Simulator::fminnmv(VectorFormat vform,
5685 LogicVRegister dst,
5686 const LogicVRegister& src) {
5687 int lane_size = LaneSizeInBitsFromFormat(vform);
5688 uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5689 return FPPairedAcrossHelper(vform,
5690 dst,
5691 src,
5692 &Simulator::FPMinNM<SimFloat16>,
5693 &Simulator::FPMinNM<float>,
5694 &Simulator::FPMinNM<double>,
5695 inactive_value);
5696 }
5697
5698
5699 LogicVRegister Simulator::fmul(VectorFormat vform,
5700 LogicVRegister dst,
5701 const LogicVRegister& src1,
5702 const LogicVRegister& src2,
5703 int index) {
5704 dst.ClearForWrite(vform);
5705 SimVRegister temp;
5706 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5707 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5708 fmul<SimFloat16>(vform, dst, src1, index_reg);
5709 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5710 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5711 fmul<float>(vform, dst, src1, index_reg);
5712 } else {
5713 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5714 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5715 fmul<double>(vform, dst, src1, index_reg);
5716 }
5717 return dst;
5718 }
5719
5720
5721 LogicVRegister Simulator::fmla(VectorFormat vform,
5722 LogicVRegister dst,
5723 const LogicVRegister& src1,
5724 const LogicVRegister& src2,
5725 int index) {
5726 dst.ClearForWrite(vform);
5727 SimVRegister temp;
5728 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5729 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5730 fmla<SimFloat16>(vform, dst, dst, src1, index_reg);
5731 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5732 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5733 fmla<float>(vform, dst, dst, src1, index_reg);
5734 } else {
5735 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5736 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5737 fmla<double>(vform, dst, dst, src1, index_reg);
5738 }
5739 return dst;
5740 }
5741
5742
5743 LogicVRegister Simulator::fmls(VectorFormat vform,
5744 LogicVRegister dst,
5745 const LogicVRegister& src1,
5746 const LogicVRegister& src2,
5747 int index) {
5748 dst.ClearForWrite(vform);
5749 SimVRegister temp;
5750 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5751 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5752 fmls<SimFloat16>(vform, dst, dst, src1, index_reg);
5753 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5754 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5755 fmls<float>(vform, dst, dst, src1, index_reg);
5756 } else {
5757 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5758 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5759 fmls<double>(vform, dst, dst, src1, index_reg);
5760 }
5761 return dst;
5762 }
5763
5764
5765 LogicVRegister Simulator::fmulx(VectorFormat vform,
5766 LogicVRegister dst,
5767 const LogicVRegister& src1,
5768 const LogicVRegister& src2,
5769 int index) {
5770 dst.ClearForWrite(vform);
5771 SimVRegister temp;
5772 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5773 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5774 fmulx<SimFloat16>(vform, dst, src1, index_reg);
5775 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5776 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5777 fmulx<float>(vform, dst, src1, index_reg);
5778 } else {
5779 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5780 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5781 fmulx<double>(vform, dst, src1, index_reg);
5782 }
5783 return dst;
5784 }
5785
5786
5787 LogicVRegister Simulator::frint(VectorFormat vform,
5788 LogicVRegister dst,
5789 const LogicVRegister& src,
5790 FPRounding rounding_mode,
5791 bool inexact_exception,
5792 FrintMode frint_mode) {
5793 dst.ClearForWrite(vform);
5794 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5795 VIXL_ASSERT(frint_mode == kFrintToInteger);
5796 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5797 SimFloat16 input = src.Float<SimFloat16>(i);
5798 SimFloat16 rounded = FPRoundInt(input, rounding_mode);
5799 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5800 FPProcessException();
5801 }
5802 dst.SetFloat<SimFloat16>(i, rounded);
5803 }
5804 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5805 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5806 float input = src.Float<float>(i);
5807 float rounded = FPRoundInt(input, rounding_mode, frint_mode);
5808
5809 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5810 FPProcessException();
5811 }
5812 dst.SetFloat<float>(i, rounded);
5813 }
5814 } else {
5815 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5816 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5817 double input = src.Float<double>(i);
5818 double rounded = FPRoundInt(input, rounding_mode, frint_mode);
5819 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5820 FPProcessException();
5821 }
5822 dst.SetFloat<double>(i, rounded);
5823 }
5824 }
5825 return dst;
5826 }
5827
5828 LogicVRegister Simulator::fcvt(VectorFormat dst_vform,
5829 VectorFormat src_vform,
5830 LogicVRegister dst,
5831 const LogicPRegister& pg,
5832 const LogicVRegister& src) {
5833 unsigned dst_data_size_in_bits = LaneSizeInBitsFromFormat(dst_vform);
5834 unsigned src_data_size_in_bits = LaneSizeInBitsFromFormat(src_vform);
5835 VectorFormat vform = SVEFormatFromLaneSizeInBits(
5836 std::max(dst_data_size_in_bits, src_data_size_in_bits));
5837
5838 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5839 if (!pg.IsActive(vform, i)) continue;
5840
5841 uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5842 0,
5843 src.Uint(vform, i));
5844 double dst_value =
5845 RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);
5846
5847 uint64_t dst_raw_bits =
5848 FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);
5849
5850 dst.SetUint(vform, i, dst_raw_bits);
5851 }
5852
5853 return dst;
5854 }
5855
5856 LogicVRegister Simulator::fcvts(VectorFormat vform,
5857 unsigned dst_data_size_in_bits,
5858 unsigned src_data_size_in_bits,
5859 LogicVRegister dst,
5860 const LogicPRegister& pg,
5861 const LogicVRegister& src,
5862 FPRounding round,
5863 int fbits) {
5864 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5865 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5866
5867 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5868 if (!pg.IsActive(vform, i)) continue;
5869
5870 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5871 0,
5872 src.Uint(vform, i));
5873 double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5874 std::pow(2.0, fbits);
5875
5876 switch (dst_data_size_in_bits) {
5877 case kHRegSize:
5878 dst.SetInt(vform, i, FPToInt16(result, round));
5879 break;
5880 case kSRegSize:
5881 dst.SetInt(vform, i, FPToInt32(result, round));
5882 break;
5883 case kDRegSize:
5884 dst.SetInt(vform, i, FPToInt64(result, round));
5885 break;
5886 default:
5887 VIXL_UNIMPLEMENTED();
5888 break;
5889 }
5890 }
5891
5892 return dst;
5893 }
5894
5895 LogicVRegister Simulator::fcvts(VectorFormat vform,
5896 LogicVRegister dst,
5897 const LogicVRegister& src,
5898 FPRounding round,
5899 int fbits) {
5900 dst.ClearForWrite(vform);
5901 return fcvts(vform,
5902 LaneSizeInBitsFromFormat(vform),
5903 LaneSizeInBitsFromFormat(vform),
5904 dst,
5905 GetPTrue(),
5906 src,
5907 round,
5908 fbits);
5909 }
5910
5911 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5912 unsigned dst_data_size_in_bits,
5913 unsigned src_data_size_in_bits,
5914 LogicVRegister dst,
5915 const LogicPRegister& pg,
5916 const LogicVRegister& src,
5917 FPRounding round,
5918 int fbits) {
5919 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5920 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5921
5922 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5923 if (!pg.IsActive(vform, i)) continue;
5924
5925 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5926 0,
5927 src.Uint(vform, i));
5928 double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5929 std::pow(2.0, fbits);
5930
5931 switch (dst_data_size_in_bits) {
5932 case kHRegSize:
5933 dst.SetUint(vform, i, FPToUInt16(result, round));
5934 break;
5935 case kSRegSize:
5936 dst.SetUint(vform, i, FPToUInt32(result, round));
5937 break;
5938 case kDRegSize:
5939 dst.SetUint(vform, i, FPToUInt64(result, round));
5940 break;
5941 default:
5942 VIXL_UNIMPLEMENTED();
5943 break;
5944 }
5945 }
5946
5947 return dst;
5948 }
5949
5950 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5951 LogicVRegister dst,
5952 const LogicVRegister& src,
5953 FPRounding round,
5954 int fbits) {
5955 dst.ClearForWrite(vform);
5956 return fcvtu(vform,
5957 LaneSizeInBitsFromFormat(vform),
5958 LaneSizeInBitsFromFormat(vform),
5959 dst,
5960 GetPTrue(),
5961 src,
5962 round,
5963 fbits);
5964 }
5965
5966 LogicVRegister Simulator::fcvtl(VectorFormat vform,
5967 LogicVRegister dst,
5968 const LogicVRegister& src) {
5969 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5970 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5971 // TODO: Full support for SimFloat16 in SimRegister(s).
5972 dst.SetFloat(i,
5973 FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
5974 ReadDN()));
5975 }
5976 } else {
5977 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5978 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5979 dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
5980 }
5981 }
5982 return dst;
5983 }
5984
5985
5986 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
5987 LogicVRegister dst,
5988 const LogicVRegister& src) {
5989 int lane_count = LaneCountFromFormat(vform);
5990 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5991 for (int i = 0; i < lane_count; i++) {
5992 // TODO: Full support for SimFloat16 in SimRegister(s).
5993 dst.SetFloat(i,
5994 FPToFloat(RawbitsToFloat16(
5995 src.Float<uint16_t>(i + lane_count)),
5996 ReadDN()));
5997 }
5998 } else {
5999 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6000 for (int i = 0; i < lane_count; i++) {
6001 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
6002 }
6003 }
6004 return dst;
6005 }
6006
6007
6008 LogicVRegister Simulator::fcvtn(VectorFormat vform,
6009 LogicVRegister dst,
6010 const LogicVRegister& src) {
6011 SimVRegister tmp;
6012 LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6013 dst.ClearForWrite(vform);
6014 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6015 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6016 dst.SetFloat(i,
6017 Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i),
6018 FPTieEven,
6019 ReadDN())));
6020 }
6021 } else {
6022 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6023 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6024 dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN()));
6025 }
6026 }
6027 return dst;
6028 }
6029
6030
6031 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
6032 LogicVRegister dst,
6033 const LogicVRegister& src) {
6034 int lane_count = LaneCountFromFormat(vform) / 2;
6035 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6036 for (int i = lane_count - 1; i >= 0; i--) {
6037 dst.SetFloat(i + lane_count,
6038 Float16ToRawbits(
6039 FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
6040 }
6041 } else {
6042 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6043 for (int i = lane_count - 1; i >= 0; i--) {
6044 dst.SetFloat(i + lane_count,
6045 FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
6046 }
6047 }
6048 return dst;
6049 }
6050
6051
6052 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
6053 LogicVRegister dst,
6054 const LogicVRegister& src) {
6055 SimVRegister tmp;
6056 LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6057 int input_lane_count = LaneCountFromFormat(vform);
6058 if (IsSVEFormat(vform)) {
6059 mov(kFormatVnB, tmp, src);
6060 input_lane_count /= 2;
6061 }
6062
6063 dst.ClearForWrite(vform);
6064 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6065
6066 for (int i = 0; i < input_lane_count; i++) {
6067 dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN()));
6068 }
6069 return dst;
6070 }
6071
6072
6073 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
6074 LogicVRegister dst,
6075 const LogicVRegister& src) {
6076 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6077 int lane_count = LaneCountFromFormat(vform) / 2;
6078 for (int i = lane_count - 1; i >= 0; i--) {
6079 dst.SetFloat(i + lane_count,
6080 FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
6081 }
6082 return dst;
6083 }
6084
6085
6086 // Based on reference C function recip_sqrt_estimate from ARM ARM.
6087 double Simulator::recip_sqrt_estimate(double a) {
6088 int quot0, quot1, s;
6089 double r;
6090 if (a < 0.5) {
6091 quot0 = static_cast<int>(a * 512.0);
6092 r = 1.0 / sqrt((static_cast<double>(quot0) + 0.5) / 512.0);
6093 } else {
6094 quot1 = static_cast<int>(a * 256.0);
6095 r = 1.0 / sqrt((static_cast<double>(quot1) + 0.5) / 256.0);
6096 }
6097 s = static_cast<int>(256.0 * r + 0.5);
6098 return static_cast<double>(s) / 256.0;
6099 }
6100
6101
6102 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
6103 return ExtractUnsignedBitfield64(start_bit, end_bit, val);
6104 }
6105
6106
6107 template <typename T>
6108 T Simulator::FPRecipSqrtEstimate(T op) {
6109 if (IsNaN(op)) {
6110 return FPProcessNaN(op);
6111 } else if (op == 0.0) {
6112 if (copysign(1.0, op) < 0.0) {
6113 return kFP64NegativeInfinity;
6114 } else {
6115 return kFP64PositiveInfinity;
6116 }
6117 } else if (copysign(1.0, op) < 0.0) {
6118 FPProcessException();
6119 return FPDefaultNaN<T>();
6120 } else if (IsInf(op)) {
6121 return 0.0;
6122 } else {
6123 uint64_t fraction;
6124 int exp, result_exp;
6125
6126 if (IsFloat16<T>()) {
6127 exp = Float16Exp(op);
6128 fraction = Float16Mantissa(op);
6129 fraction <<= 42;
6130 } else if (IsFloat32<T>()) {
6131 exp = FloatExp(op);
6132 fraction = FloatMantissa(op);
6133 fraction <<= 29;
6134 } else {
6135 VIXL_ASSERT(IsFloat64<T>());
6136 exp = DoubleExp(op);
6137 fraction = DoubleMantissa(op);
6138 }
6139
6140 if (exp == 0) {
6141 while (Bits(fraction, 51, 51) == 0) {
6142 fraction = Bits(fraction, 50, 0) << 1;
6143 exp -= 1;
6144 }
6145 fraction = Bits(fraction, 50, 0) << 1;
6146 }
6147
6148 double scaled;
6149 if (Bits(exp, 0, 0) == 0) {
6150 scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6151 } else {
6152 scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
6153 }
6154
6155 if (IsFloat16<T>()) {
6156 result_exp = (44 - exp) / 2;
6157 } else if (IsFloat32<T>()) {
6158 result_exp = (380 - exp) / 2;
6159 } else {
6160 VIXL_ASSERT(IsFloat64<T>());
6161 result_exp = (3068 - exp) / 2;
6162 }
6163
6164 uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
6165
6166 if (IsFloat16<T>()) {
6167 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6168 uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
6169 return Float16Pack(0, exp_bits, est_bits);
6170 } else if (IsFloat32<T>()) {
6171 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6172 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
6173 return FloatPack(0, exp_bits, est_bits);
6174 } else {
6175 VIXL_ASSERT(IsFloat64<T>());
6176 return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
6177 }
6178 }
6179 }
6180
6181
6182 LogicVRegister Simulator::frsqrte(VectorFormat vform,
6183 LogicVRegister dst,
6184 const LogicVRegister& src) {
6185 dst.ClearForWrite(vform);
6186 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6187 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6188 SimFloat16 input = src.Float<SimFloat16>(i);
6189 dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));
6190 }
6191 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6192 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6193 float input = src.Float<float>(i);
6194 dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));
6195 }
6196 } else {
6197 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6198 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6199 double input = src.Float<double>(i);
6200 dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));
6201 }
6202 }
6203 return dst;
6204 }
6205
6206 template <typename T>
6207 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
6208 uint32_t sign;
6209
6210 if (IsFloat16<T>()) {
6211 sign = Float16Sign(op);
6212 } else if (IsFloat32<T>()) {
6213 sign = FloatSign(op);
6214 } else {
6215 VIXL_ASSERT(IsFloat64<T>());
6216 sign = DoubleSign(op);
6217 }
6218
6219 if (IsNaN(op)) {
6220 return FPProcessNaN(op);
6221 } else if (IsInf(op)) {
6222 return (sign == 1) ? -0.0 : 0.0;
6223 } else if (op == 0.0) {
6224 FPProcessException(); // FPExc_DivideByZero exception.
6225 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6226 } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
6227 (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
6228 (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
6229 bool overflow_to_inf = false;
6230 switch (rounding) {
6231 case FPTieEven:
6232 overflow_to_inf = true;
6233 break;
6234 case FPPositiveInfinity:
6235 overflow_to_inf = (sign == 0);
6236 break;
6237 case FPNegativeInfinity:
6238 overflow_to_inf = (sign == 1);
6239 break;
6240 case FPZero:
6241 overflow_to_inf = false;
6242 break;
6243 default:
6244 break;
6245 }
6246 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
6247 if (overflow_to_inf) {
6248 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6249 } else {
6250 // Return FPMaxNormal(sign).
6251 if (IsFloat16<T>()) {
6252 return Float16Pack(sign, 0x1f, 0x3ff);
6253 } else if (IsFloat32<T>()) {
6254 return FloatPack(sign, 0xfe, 0x07fffff);
6255 } else {
6256 VIXL_ASSERT(IsFloat64<T>());
6257 return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
6258 }
6259 }
6260 } else {
6261 uint64_t fraction;
6262 int exp, result_exp;
6263
6264 if (IsFloat16<T>()) {
6265 sign = Float16Sign(op);
6266 exp = Float16Exp(op);
6267 fraction = Float16Mantissa(op);
6268 fraction <<= 42;
6269 } else if (IsFloat32<T>()) {
6270 sign = FloatSign(op);
6271 exp = FloatExp(op);
6272 fraction = FloatMantissa(op);
6273 fraction <<= 29;
6274 } else {
6275 VIXL_ASSERT(IsFloat64<T>());
6276 sign = DoubleSign(op);
6277 exp = DoubleExp(op);
6278 fraction = DoubleMantissa(op);
6279 }
6280
6281 if (exp == 0) {
6282 if (Bits(fraction, 51, 51) == 0) {
6283 exp -= 1;
6284 fraction = Bits(fraction, 49, 0) << 2;
6285 } else {
6286 fraction = Bits(fraction, 50, 0) << 1;
6287 }
6288 }
6289
6290 double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6291
6292 if (IsFloat16<T>()) {
6293 result_exp = (29 - exp); // In range 29-30 = -1 to 29+1 = 30.
6294 } else if (IsFloat32<T>()) {
6295 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
6296 } else {
6297 VIXL_ASSERT(IsFloat64<T>());
6298 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.
6299 }
6300
6301 double estimate = recip_estimate(scaled);
6302
6303 fraction = DoubleMantissa(estimate);
6304 if (result_exp == 0) {
6305 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
6306 } else if (result_exp == -1) {
6307 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
6308 result_exp = 0;
6309 }
6310 if (IsFloat16<T>()) {
6311 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6312 uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
6313 return Float16Pack(sign, exp_bits, frac_bits);
6314 } else if (IsFloat32<T>()) {
6315 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6316 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
6317 return FloatPack(sign, exp_bits, frac_bits);
6318 } else {
6319 VIXL_ASSERT(IsFloat64<T>());
6320 return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
6321 }
6322 }
6323 }
6324
6325
6326 LogicVRegister Simulator::frecpe(VectorFormat vform,
6327 LogicVRegister dst,
6328 const LogicVRegister& src,
6329 FPRounding round) {
6330 dst.ClearForWrite(vform);
6331 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6332 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6333 SimFloat16 input = src.Float<SimFloat16>(i);
6334 dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));
6335 }
6336 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6337 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6338 float input = src.Float<float>(i);
6339 dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));
6340 }
6341 } else {
6342 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6343 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6344 double input = src.Float<double>(i);
6345 dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));
6346 }
6347 }
6348 return dst;
6349 }
6350
6351
6352 LogicVRegister Simulator::ursqrte(VectorFormat vform,
6353 LogicVRegister dst,
6354 const LogicVRegister& src) {
6355 dst.ClearForWrite(vform);
6356 uint64_t operand;
6357 uint32_t result;
6358 double dp_operand, dp_result;
6359 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6360 operand = src.Uint(vform, i);
6361 if (operand <= 0x3FFFFFFF) {
6362 result = 0xFFFFFFFF;
6363 } else {
6364 dp_operand = operand * std::pow(2.0, -32);
6365 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
6366 result = static_cast<uint32_t>(dp_result);
6367 }
6368 dst.SetUint(vform, i, result);
6369 }
6370 return dst;
6371 }
6372
6373
6374 // Based on reference C function recip_estimate from ARM ARM.
6375 double Simulator::recip_estimate(double a) {
6376 int q, s;
6377 double r;
6378 q = static_cast<int>(a * 512.0);
6379 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
6380 s = static_cast<int>(256.0 * r + 0.5);
6381 return static_cast<double>(s) / 256.0;
6382 }
6383
6384
6385 LogicVRegister Simulator::urecpe(VectorFormat vform,
6386 LogicVRegister dst,
6387 const LogicVRegister& src) {
6388 dst.ClearForWrite(vform);
6389 uint64_t operand;
6390 uint32_t result;
6391 double dp_operand, dp_result;
6392 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6393 operand = src.Uint(vform, i);
6394 if (operand <= 0x7FFFFFFF) {
6395 result = 0xFFFFFFFF;
6396 } else {
6397 dp_operand = operand * std::pow(2.0, -32);
6398 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
6399 result = static_cast<uint32_t>(dp_result);
6400 }
6401 dst.SetUint(vform, i, result);
6402 }
6403 return dst;
6404 }
6405
6406 LogicPRegister Simulator::pfalse(LogicPRegister dst) {
6407 dst.Clear();
6408 return dst;
6409 }
6410
6411 LogicPRegister Simulator::pfirst(LogicPRegister dst,
6412 const LogicPRegister& pg,
6413 const LogicPRegister& src) {
6414 int first_pg = GetFirstActive(kFormatVnB, pg);
6415 VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));
6416 mov(dst, src);
6417 if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);
6418 return dst;
6419 }
6420
6421 LogicPRegister Simulator::ptrue(VectorFormat vform,
6422 LogicPRegister dst,
6423 int pattern) {
6424 int count = GetPredicateConstraintLaneCount(vform, pattern);
6425 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6426 dst.SetActive(vform, i, i < count);
6427 }
6428 return dst;
6429 }
6430
6431 LogicPRegister Simulator::pnext(VectorFormat vform,
6432 LogicPRegister dst,
6433 const LogicPRegister& pg,
6434 const LogicPRegister& src) {
6435 int next = GetLastActive(vform, src) + 1;
6436 while (next < LaneCountFromFormat(vform)) {
6437 if (pg.IsActive(vform, next)) break;
6438 next++;
6439 }
6440
6441 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6442 dst.SetActive(vform, i, (i == next));
6443 }
6444 return dst;
6445 }
6446
6447 template <typename T>
6448 LogicVRegister Simulator::frecpx(VectorFormat vform,
6449 LogicVRegister dst,
6450 const LogicVRegister& src) {
6451 dst.ClearForWrite(vform);
6452 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6453 T op = src.Float<T>(i);
6454 T result;
6455 if (IsNaN(op)) {
6456 result = FPProcessNaN(op);
6457 } else {
6458 int exp;
6459 uint32_t sign;
6460 if (IsFloat16<T>()) {
6461 sign = Float16Sign(op);
6462 exp = Float16Exp(op);
6463 exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
6464 result = Float16Pack(sign, exp, 0);
6465 } else if (IsFloat32<T>()) {
6466 sign = FloatSign(op);
6467 exp = FloatExp(op);
6468 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
6469 result = FloatPack(sign, exp, 0);
6470 } else {
6471 VIXL_ASSERT(IsFloat64<T>());
6472 sign = DoubleSign(op);
6473 exp = DoubleExp(op);
6474 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
6475 result = DoublePack(sign, exp, 0);
6476 }
6477 }
6478 dst.SetFloat(i, result);
6479 }
6480 return dst;
6481 }
6482
6483
6484 LogicVRegister Simulator::frecpx(VectorFormat vform,
6485 LogicVRegister dst,
6486 const LogicVRegister& src) {
6487 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6488 frecpx<SimFloat16>(vform, dst, src);
6489 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6490 frecpx<float>(vform, dst, src);
6491 } else {
6492 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6493 frecpx<double>(vform, dst, src);
6494 }
6495 return dst;
6496 }
6497
6498 LogicVRegister Simulator::flogb(VectorFormat vform,
6499 LogicVRegister dst,
6500 const LogicVRegister& src) {
6501 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6502 double op = 0.0;
6503 switch (vform) {
6504 case kFormatVnH:
6505 op = FPToDouble(src.Float<SimFloat16>(i), kIgnoreDefaultNaN);
6506 break;
6507 case kFormatVnS:
6508 op = src.Float<float>(i);
6509 break;
6510 case kFormatVnD:
6511 op = src.Float<double>(i);
6512 break;
6513 default:
6514 VIXL_UNREACHABLE();
6515 }
6516
6517 switch (std::fpclassify(op)) {
6518 case FP_INFINITE:
6519 dst.SetInt(vform, i, MaxIntFromFormat(vform));
6520 break;
6521 case FP_NAN:
6522 case FP_ZERO:
6523 dst.SetInt(vform, i, MinIntFromFormat(vform));
6524 break;
6525 case FP_SUBNORMAL: {
6526 // DoubleMantissa returns the mantissa of its input, leaving 12 zero
6527 // bits where the sign and exponent would be. We subtract 12 to
6528 // find the number of leading zero bits in the mantissa itself.
6529 int64_t mant_zero_count = CountLeadingZeros(DoubleMantissa(op)) - 12;
6530 // Log2 of a subnormal is the lowest exponent a normal number can
6531 // represent, together with the zeros in the mantissa.
6532 dst.SetInt(vform, i, -1023 - mant_zero_count);
6533 break;
6534 }
6535 case FP_NORMAL:
6536 // Log2 of a normal number is the exponent minus the bias.
6537 dst.SetInt(vform, i, static_cast<int64_t>(DoubleExp(op)) - 1023);
6538 break;
6539 }
6540 }
6541 return dst;
6542 }
6543
6544 LogicVRegister Simulator::ftsmul(VectorFormat vform,
6545 LogicVRegister dst,
6546 const LogicVRegister& src1,
6547 const LogicVRegister& src2) {
6548 SimVRegister maybe_neg_src1;
6549
6550 // The bottom bit of src2 controls the sign of the result. Use it to
6551 // conditionally invert the sign of one `fmul` operand.
6552 shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);
6553 eor(vform, maybe_neg_src1, maybe_neg_src1, src1);
6554
6555 // Multiply src1 by the modified neg_src1, which is potentially its negation.
6556 // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,
6557 // rather than neg_src1, must be the first source argument.
6558 fmul(vform, dst, src1, maybe_neg_src1);
6559
6560 return dst;
6561 }
6562
6563 LogicVRegister Simulator::ftssel(VectorFormat vform,
6564 LogicVRegister dst,
6565 const LogicVRegister& src1,
6566 const LogicVRegister& src2) {
6567 unsigned lane_bits = LaneSizeInBitsFromFormat(vform);
6568 uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);
6569 uint64_t one;
6570
6571 if (lane_bits == kHRegSize) {
6572 one = Float16ToRawbits(Float16(1.0));
6573 } else if (lane_bits == kSRegSize) {
6574 one = FloatToRawbits(1.0);
6575 } else {
6576 VIXL_ASSERT(lane_bits == kDRegSize);
6577 one = DoubleToRawbits(1.0);
6578 }
6579
6580 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6581 // Use integer accessors for this operation, as this is a data manipulation
6582 // task requiring no calculation.
6583 uint64_t op = src1.Uint(vform, i);
6584
6585 // Only the bottom two bits of the src2 register are significant, indicating
6586 // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1
6587 // determines the sign of the value written to dst.
6588 uint64_t q = src2.Uint(vform, i);
6589 if ((q & 1) == 1) op = one;
6590 if ((q & 2) == 2) op ^= sign_bit;
6591
6592 dst.SetUint(vform, i, op);
6593 }
6594
6595 return dst;
6596 }
6597
6598 template <typename T>
6599 LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,
6600 LogicVRegister dst,
6601 const LogicVRegister& src1,
6602 const LogicVRegister& src2,
6603 uint64_t coeff_pos,
6604 uint64_t coeff_neg) {
6605 SimVRegister zero;
6606 dup_immediate(kFormatVnB, zero, 0);
6607
6608 SimVRegister cf;
6609 SimVRegister cfn;
6610 dup_immediate(vform, cf, coeff_pos);
6611 dup_immediate(vform, cfn, coeff_neg);
6612
6613 // The specification requires testing the top bit of the raw value, rather
6614 // than the sign of the floating point number, so use an integer comparison
6615 // here.
6616 SimPRegister is_neg;
6617 SVEIntCompareVectorsHelper(lt,
6618 vform,
6619 is_neg,
6620 GetPTrue(),
6621 src2,
6622 zero,
6623 false,
6624 LeaveFlags);
6625 mov_merging(vform, cf, is_neg, cfn);
6626
6627 SimVRegister temp;
6628 fabs_<T>(vform, temp, src2);
6629 fmla<T>(vform, cf, cf, src1, temp);
6630 mov(vform, dst, cf);
6631 return dst;
6632 }
6633
6634
6635 LogicVRegister Simulator::ftmad(VectorFormat vform,
6636 LogicVRegister dst,
6637 const LogicVRegister& src1,
6638 const LogicVRegister& src2,
6639 unsigned index) {
6640 static const uint64_t ftmad_coeff16[] = {0x3c00,
6641 0xb155,
6642 0x2030,
6643 0x0000,
6644 0x0000,
6645 0x0000,
6646 0x0000,
6647 0x0000,
6648 0x3c00,
6649 0xb800,
6650 0x293a,
6651 0x0000,
6652 0x0000,
6653 0x0000,
6654 0x0000,
6655 0x0000};
6656
6657 static const uint64_t ftmad_coeff32[] = {0x3f800000,
6658 0xbe2aaaab,
6659 0x3c088886,
6660 0xb95008b9,
6661 0x36369d6d,
6662 0x00000000,
6663 0x00000000,
6664 0x00000000,
6665 0x3f800000,
6666 0xbf000000,
6667 0x3d2aaaa6,
6668 0xbab60705,
6669 0x37cd37cc,
6670 0x00000000,
6671 0x00000000,
6672 0x00000000};
6673
6674 static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,
6675 0xbfc5555555555543,
6676 0x3f8111111110f30c,
6677 0xbf2a01a019b92fc6,
6678 0x3ec71de351f3d22b,
6679 0xbe5ae5e2b60f7b91,
6680 0x3de5d8408868552f,
6681 0x0000000000000000,
6682 0x3ff0000000000000,
6683 0xbfe0000000000000,
6684 0x3fa5555555555536,
6685 0xbf56c16c16c13a0b,
6686 0x3efa01a019b1e8d8,
6687 0xbe927e4f7282f468,
6688 0x3e21ee96d2641b13,
6689 0xbda8f76380fbb401};
6690 VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));
6691 VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));
6692 VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));
6693
6694 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6695 FTMaddHelper<SimFloat16>(vform,
6696 dst,
6697 src1,
6698 src2,
6699 ftmad_coeff16[index],
6700 ftmad_coeff16[index + 8]);
6701 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6702 FTMaddHelper<float>(vform,
6703 dst,
6704 src1,
6705 src2,
6706 ftmad_coeff32[index],
6707 ftmad_coeff32[index + 8]);
6708 } else {
6709 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6710 FTMaddHelper<double>(vform,
6711 dst,
6712 src1,
6713 src2,
6714 ftmad_coeff64[index],
6715 ftmad_coeff64[index + 8]);
6716 }
6717 return dst;
6718 }
6719
6720 LogicVRegister Simulator::fexpa(VectorFormat vform,
6721 LogicVRegister dst,
6722 const LogicVRegister& src) {
6723 static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,
6724 0x005d, 0x0075, 0x008e, 0x00a8,
6725 0x00c2, 0x00dc, 0x00f8, 0x0114,
6726 0x0130, 0x014d, 0x016b, 0x0189,
6727 0x01a8, 0x01c8, 0x01e8, 0x0209,
6728 0x022b, 0x024e, 0x0271, 0x0295,
6729 0x02ba, 0x02e0, 0x0306, 0x032e,
6730 0x0356, 0x037f, 0x03a9, 0x03d4};
6731
6732 static const uint64_t fexpa_coeff32[] =
6733 {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
6734 0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
6735 0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
6736 0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
6737 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
6738 0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
6739 0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
6740 0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
6741 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
6742 0x7d3e0c};
6743
6744 static const uint64_t fexpa_coeff64[] =
6745 {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,
6746 0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,
6747 0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,
6748 0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,
6749 0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,
6750 0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,
6751 0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,
6752 0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,
6753 0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,
6754 0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,
6755 0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,
6756 0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,
6757 0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,
6758 0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,
6759 0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,
6760 0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};
6761
6762 unsigned lane_size = LaneSizeInBitsFromFormat(vform);
6763 int index_highbit = 5;
6764 int op_highbit, op_shift;
6765 const uint64_t* fexpa_coeff;
6766
6767 if (lane_size == kHRegSize) {
6768 index_highbit = 4;
6769 VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1)));
6770 fexpa_coeff = fexpa_coeff16;
6771 op_highbit = 9;
6772 op_shift = 10;
6773 } else if (lane_size == kSRegSize) {
6774 VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1)));
6775 fexpa_coeff = fexpa_coeff32;
6776 op_highbit = 13;
6777 op_shift = 23;
6778 } else {
6779 VIXL_ASSERT(lane_size == kDRegSize);
6780 VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1)));
6781 fexpa_coeff = fexpa_coeff64;
6782 op_highbit = 16;
6783 op_shift = 52;
6784 }
6785
6786 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6787 uint64_t op = src.Uint(vform, i);
6788 uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];
6789 result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);
6790 dst.SetUint(vform, i, result);
6791 }
6792 return dst;
6793 }
6794
6795 template <typename T>
6796 LogicVRegister Simulator::fscale(VectorFormat vform,
6797 LogicVRegister dst,
6798 const LogicVRegister& src1,
6799 const LogicVRegister& src2) {
6800 T two = T(2.0);
6801 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6802 T src1_val = src1.Float<T>(i);
6803 if (!IsNaN(src1_val)) {
6804 int64_t scale = src2.Int(vform, i);
6805 // TODO: this is a low-performance implementation, but it's simple and
6806 // less likely to be buggy. Consider replacing it with something faster.
6807
6808 // Scales outside of these bounds become infinity or zero, so there's no
6809 // point iterating further.
6810 scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
6811
6812 // Compute src1_val * 2 ^ scale. If scale is positive, multiply by two and
6813 // decrement scale until it's zero.
6814 while (scale-- > 0) {
6815 src1_val = FPMul(src1_val, two);
6816 }
6817
6818 // If scale is negative, divide by two and increment scale until it's
6819 // zero. Initially, scale is (src2 - 1), so we pre-increment.
6820 while (++scale < 0) {
6821 src1_val = FPDiv(src1_val, two);
6822 }
6823 }
6824 dst.SetFloat<T>(i, src1_val);
6825 }
6826 return dst;
6827 }
6828
6829 LogicVRegister Simulator::fscale(VectorFormat vform,
6830 LogicVRegister dst,
6831 const LogicVRegister& src1,
6832 const LogicVRegister& src2) {
6833 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6834 fscale<SimFloat16>(vform, dst, src1, src2);
6835 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6836 fscale<float>(vform, dst, src1, src2);
6837 } else {
6838 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6839 fscale<double>(vform, dst, src1, src2);
6840 }
6841 return dst;
6842 }
6843
6844 LogicVRegister Simulator::scvtf(VectorFormat vform,
6845 unsigned dst_data_size_in_bits,
6846 unsigned src_data_size_in_bits,
6847 LogicVRegister dst,
6848 const LogicPRegister& pg,
6849 const LogicVRegister& src,
6850 FPRounding round,
6851 int fbits) {
6852 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6853 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6854 dst.ClearForWrite(vform);
6855
6856 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6857 if (!pg.IsActive(vform, i)) continue;
6858
6859 int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,
6860 0,
6861 src.Uint(vform, i));
6862
6863 switch (dst_data_size_in_bits) {
6864 case kHRegSize: {
6865 SimFloat16 result = FixedToFloat16(value, fbits, round);
6866 dst.SetUint(vform, i, Float16ToRawbits(result));
6867 break;
6868 }
6869 case kSRegSize: {
6870 float result = FixedToFloat(value, fbits, round);
6871 dst.SetUint(vform, i, FloatToRawbits(result));
6872 break;
6873 }
6874 case kDRegSize: {
6875 double result = FixedToDouble(value, fbits, round);
6876 dst.SetUint(vform, i, DoubleToRawbits(result));
6877 break;
6878 }
6879 default:
6880 VIXL_UNIMPLEMENTED();
6881 break;
6882 }
6883 }
6884
6885 return dst;
6886 }
6887
6888 LogicVRegister Simulator::scvtf(VectorFormat vform,
6889 LogicVRegister dst,
6890 const LogicVRegister& src,
6891 int fbits,
6892 FPRounding round) {
6893 return scvtf(vform,
6894 LaneSizeInBitsFromFormat(vform),
6895 LaneSizeInBitsFromFormat(vform),
6896 dst,
6897 GetPTrue(),
6898 src,
6899 round,
6900 fbits);
6901 }
6902
6903 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6904 unsigned dst_data_size_in_bits,
6905 unsigned src_data_size_in_bits,
6906 LogicVRegister dst,
6907 const LogicPRegister& pg,
6908 const LogicVRegister& src,
6909 FPRounding round,
6910 int fbits) {
6911 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6912 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6913 dst.ClearForWrite(vform);
6914
6915 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6916 if (!pg.IsActive(vform, i)) continue;
6917
6918 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
6919 0,
6920 src.Uint(vform, i));
6921
6922 switch (dst_data_size_in_bits) {
6923 case kHRegSize: {
6924 SimFloat16 result = UFixedToFloat16(value, fbits, round);
6925 dst.SetUint(vform, i, Float16ToRawbits(result));
6926 break;
6927 }
6928 case kSRegSize: {
6929 float result = UFixedToFloat(value, fbits, round);
6930 dst.SetUint(vform, i, FloatToRawbits(result));
6931 break;
6932 }
6933 case kDRegSize: {
6934 double result = UFixedToDouble(value, fbits, round);
6935 dst.SetUint(vform, i, DoubleToRawbits(result));
6936 break;
6937 }
6938 default:
6939 VIXL_UNIMPLEMENTED();
6940 break;
6941 }
6942 }
6943
6944 return dst;
6945 }
6946
6947 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6948 LogicVRegister dst,
6949 const LogicVRegister& src,
6950 int fbits,
6951 FPRounding round) {
6952 return ucvtf(vform,
6953 LaneSizeInBitsFromFormat(vform),
6954 LaneSizeInBitsFromFormat(vform),
6955 dst,
6956 GetPTrue(),
6957 src,
6958 round,
6959 fbits);
6960 }
6961
6962 LogicVRegister Simulator::unpk(VectorFormat vform,
6963 LogicVRegister dst,
6964 const LogicVRegister& src,
6965 UnpackType unpack_type,
6966 ExtendType extend_type) {
6967 VectorFormat vform_half = VectorFormatHalfWidth(vform);
6968 const int lane_count = LaneCountFromFormat(vform);
6969 const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;
6970
6971 switch (extend_type) {
6972 case kSignedExtend: {
6973 int64_t result[kZRegMaxSizeInBytes];
6974 for (int i = 0; i < lane_count; ++i) {
6975 result[i] = src.Int(vform_half, i + src_start_lane);
6976 }
6977 for (int i = 0; i < lane_count; ++i) {
6978 dst.SetInt(vform, i, result[i]);
6979 }
6980 break;
6981 }
6982 case kUnsignedExtend: {
6983 uint64_t result[kZRegMaxSizeInBytes];
6984 for (int i = 0; i < lane_count; ++i) {
6985 result[i] = src.Uint(vform_half, i + src_start_lane);
6986 }
6987 for (int i = 0; i < lane_count; ++i) {
6988 dst.SetUint(vform, i, result[i]);
6989 }
6990 break;
6991 }
6992 default:
6993 VIXL_UNREACHABLE();
6994 }
6995 return dst;
6996 }
6997
6998 LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,
6999 VectorFormat vform,
7000 LogicPRegister dst,
7001 const LogicPRegister& mask,
7002 const LogicVRegister& src1,
7003 const LogicVRegister& src2,
7004 bool is_wide_elements,
7005 FlagsUpdate flags) {
7006 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7007 bool result = false;
7008 if (mask.IsActive(vform, lane)) {
7009 int64_t op1 = 0xbadbeef;
7010 int64_t op2 = 0xbadbeef;
7011 int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;
7012 switch (cond) {
7013 case eq:
7014 case ge:
7015 case gt:
7016 case lt:
7017 case le:
7018 case ne:
7019 op1 = src1.Int(vform, lane);
7020 op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)
7021 : src2.Int(vform, lane);
7022 break;
7023 case hi:
7024 case hs:
7025 case ls:
7026 case lo:
7027 op1 = src1.Uint(vform, lane);
7028 op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)
7029 : src2.Uint(vform, lane);
7030 break;
7031 default:
7032 VIXL_UNREACHABLE();
7033 }
7034
7035 switch (cond) {
7036 case eq:
7037 result = (op1 == op2);
7038 break;
7039 case ne:
7040 result = (op1 != op2);
7041 break;
7042 case ge:
7043 result = (op1 >= op2);
7044 break;
7045 case gt:
7046 result = (op1 > op2);
7047 break;
7048 case le:
7049 result = (op1 <= op2);
7050 break;
7051 case lt:
7052 result = (op1 < op2);
7053 break;
7054 case hs:
7055 result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));
7056 break;
7057 case hi:
7058 result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));
7059 break;
7060 case ls:
7061 result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));
7062 break;
7063 case lo:
7064 result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));
7065 break;
7066 default:
7067 VIXL_UNREACHABLE();
7068 }
7069 }
7070 dst.SetActive(vform, lane, result);
7071 }
7072
7073 if (flags == SetFlags) PredTest(vform, mask, dst);
7074
7075 return dst;
7076 }
7077
7078 LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,
7079 VectorFormat vform,
7080 LogicVRegister dst,
7081 const LogicVRegister& src1,
7082 const LogicVRegister& src2,
7083 bool is_wide_elements) {
7084 unsigned lane_size = LaneSizeInBitsFromFormat(vform);
7085 VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;
7086
7087 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7088 int shift_src_lane = lane;
7089 if (is_wide_elements) {
7090 // If the shift amount comes from wide elements, select the D-sized lane
7091 // which occupies the corresponding lanes of the value to be shifted.
7092 shift_src_lane = (lane * lane_size) / kDRegSize;
7093 }
7094 uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);
7095
7096 // Saturate shift_amount to the size of the lane that will be shifted.
7097 if (shift_amount > lane_size) shift_amount = lane_size;
7098
7099 uint64_t value = src1.Uint(vform, lane);
7100 int64_t result = ShiftOperand(lane_size,
7101 value,
7102 shift_op,
7103 static_cast<unsigned>(shift_amount));
7104 dst.SetUint(vform, lane, result);
7105 }
7106
7107 return dst;
7108 }
7109
7110 LogicVRegister Simulator::asrd(VectorFormat vform,
7111 LogicVRegister dst,
7112 const LogicVRegister& src1,
7113 int shift) {
7114 VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=
7115 LaneSizeInBitsFromFormat(vform)));
7116
7117 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7118 int64_t value = src1.Int(vform, i);
7119 if (shift <= 63) {
7120 if (value < 0) {
7121 // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely
7122 // cast to int64_t, and cannot cause signed overflow in the result.
7123 value = value + GetUintMask(shift);
7124 }
7125 value = ShiftOperand(kDRegSize, value, ASR, shift);
7126 } else {
7127 value = 0;
7128 }
7129 dst.SetInt(vform, i, value);
7130 }
7131 return dst;
7132 }
7133
7134 LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
7135 LogicalOp logical_op,
7136 VectorFormat vform,
7137 LogicVRegister zd,
7138 const LogicVRegister& zn,
7139 const LogicVRegister& zm) {
7140 VIXL_ASSERT(IsSVEFormat(vform));
7141 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7142 uint64_t op1 = zn.Uint(vform, i);
7143 uint64_t op2 = zm.Uint(vform, i);
7144 uint64_t result = 0;
7145 switch (logical_op) {
7146 case AND:
7147 result = op1 & op2;
7148 break;
7149 case BIC:
7150 result = op1 & ~op2;
7151 break;
7152 case EOR:
7153 result = op1 ^ op2;
7154 break;
7155 case ORR:
7156 result = op1 | op2;
7157 break;
7158 default:
7159 VIXL_UNIMPLEMENTED();
7160 }
7161 zd.SetUint(vform, i, result);
7162 }
7163
7164 return zd;
7165 }
7166
7167 LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
7168 LogicPRegister pd,
7169 const LogicPRegister& pn,
7170 const LogicPRegister& pm) {
7171 for (int i = 0; i < pn.GetChunkCount(); i++) {
7172 LogicPRegister::ChunkType op1 = pn.GetChunk(i);
7173 LogicPRegister::ChunkType op2 = pm.GetChunk(i);
7174 LogicPRegister::ChunkType result = 0;
7175 switch (op) {
7176 case ANDS_p_p_pp_z:
7177 case AND_p_p_pp_z:
7178 result = op1 & op2;
7179 break;
7180 case BICS_p_p_pp_z:
7181 case BIC_p_p_pp_z:
7182 result = op1 & ~op2;
7183 break;
7184 case EORS_p_p_pp_z:
7185 case EOR_p_p_pp_z:
7186 result = op1 ^ op2;
7187 break;
7188 case NANDS_p_p_pp_z:
7189 case NAND_p_p_pp_z:
7190 result = ~(op1 & op2);
7191 break;
7192 case NORS_p_p_pp_z:
7193 case NOR_p_p_pp_z:
7194 result = ~(op1 | op2);
7195 break;
7196 case ORNS_p_p_pp_z:
7197 case ORN_p_p_pp_z:
7198 result = op1 | ~op2;
7199 break;
7200 case ORRS_p_p_pp_z:
7201 case ORR_p_p_pp_z:
7202 result = op1 | op2;
7203 break;
7204 default:
7205 VIXL_UNIMPLEMENTED();
7206 }
7207 pd.SetChunk(i, result);
7208 }
7209 return pd;
7210 }
7211
7212 LogicVRegister Simulator::SVEBitwiseImmHelper(
7213 SVEBitwiseLogicalWithImm_UnpredicatedOp op,
7214 VectorFormat vform,
7215 LogicVRegister zd,
7216 uint64_t imm) {
7217 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7218 uint64_t op1 = zd.Uint(vform, i);
7219 uint64_t result = 0;
7220 switch (op) {
7221 case AND_z_zi:
7222 result = op1 & imm;
7223 break;
7224 case EOR_z_zi:
7225 result = op1 ^ imm;
7226 break;
7227 case ORR_z_zi:
7228 result = op1 | imm;
7229 break;
7230 default:
7231 VIXL_UNIMPLEMENTED();
7232 }
7233 zd.SetUint(vform, i, result);
7234 }
7235
7236 return zd;
7237 }
7238
7239 void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
7240 const LogicPRegister& pg,
7241 unsigned zt_code,
7242 const LogicSVEAddressVector& addr) {
7243 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7244
7245 int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7246 int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7247 int msize_in_bytes = addr.GetMsizeInBytes();
7248 int reg_count = addr.GetRegCount();
7249
7250 VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7251 VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7252
7253 unsigned zt_codes[4] = {zt_code,
7254 (zt_code + 1) % kNumberOfZRegisters,
7255 (zt_code + 2) % kNumberOfZRegisters,
7256 (zt_code + 3) % kNumberOfZRegisters};
7257
7258 LogicVRegister zt[4] = {
7259 ReadVRegister(zt_codes[0]),
7260 ReadVRegister(zt_codes[1]),
7261 ReadVRegister(zt_codes[2]),
7262 ReadVRegister(zt_codes[3]),
7263 };
7264
7265 // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes
7266 // are ignored, so read the source register using the VectorFormat that
7267 // corresponds with the storage format, and multiply the index accordingly.
7268 VectorFormat unpack_vform =
7269 SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
7270 int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;
7271
7272 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7273 if (!pg.IsActive(vform, i)) continue;
7274
7275 for (int r = 0; r < reg_count; r++) {
7276 uint64_t element_address = addr.GetElementAddress(i, r);
7277 StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address);
7278 }
7279 }
7280
7281 if (ShouldTraceWrites()) {
7282 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7283 if (esize_in_bytes_log2 == msize_in_bytes_log2) {
7284 // Use an FP format where it's likely that we're accessing FP data.
7285 format = GetPrintRegisterFormatTryFP(format);
7286 }
7287 // Stores don't represent a change to the source register's value, so only
7288 // print the relevant part of the value.
7289 format = GetPrintRegPartial(format);
7290
7291 PrintZStructAccess(zt_code,
7292 reg_count,
7293 pg,
7294 format,
7295 msize_in_bytes,
7296 "->",
7297 addr);
7298 }
7299 }
7300
7301 void Simulator::SVEStructuredLoadHelper(VectorFormat vform,
7302 const LogicPRegister& pg,
7303 unsigned zt_code,
7304 const LogicSVEAddressVector& addr,
7305 bool is_signed) {
7306 int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7307 int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7308 int msize_in_bytes = addr.GetMsizeInBytes();
7309 int reg_count = addr.GetRegCount();
7310
7311 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7312 VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7313 VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7314
7315 unsigned zt_codes[4] = {zt_code,
7316 (zt_code + 1) % kNumberOfZRegisters,
7317 (zt_code + 2) % kNumberOfZRegisters,
7318 (zt_code + 3) % kNumberOfZRegisters};
7319 LogicVRegister zt[4] = {
7320 ReadVRegister(zt_codes[0]),
7321 ReadVRegister(zt_codes[1]),
7322 ReadVRegister(zt_codes[2]),
7323 ReadVRegister(zt_codes[3]),
7324 };
7325
7326 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7327 for (int r = 0; r < reg_count; r++) {
7328 uint64_t element_address = addr.GetElementAddress(i, r);
7329
7330 if (!pg.IsActive(vform, i)) {
7331 zt[r].SetUint(vform, i, 0);
7332 continue;
7333 }
7334
7335 if (is_signed) {
7336 LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address);
7337 } else {
7338 LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address);
7339 }
7340 }
7341 }
7342
7343 if (ShouldTraceVRegs()) {
7344 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7345 if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {
7346 // Use an FP format where it's likely that we're accessing FP data.
7347 format = GetPrintRegisterFormatTryFP(format);
7348 }
7349 PrintZStructAccess(zt_code,
7350 reg_count,
7351 pg,
7352 format,
7353 msize_in_bytes,
7354 "<-",
7355 addr);
7356 }
7357 }
7358
7359 LogicPRegister Simulator::brka(LogicPRegister pd,
7360 const LogicPRegister& pg,
7361 const LogicPRegister& pn) {
7362 bool break_ = false;
7363 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7364 if (pg.IsActive(kFormatVnB, i)) {
7365 pd.SetActive(kFormatVnB, i, !break_);
7366 break_ |= pn.IsActive(kFormatVnB, i);
7367 }
7368 }
7369
7370 return pd;
7371 }
7372
7373 LogicPRegister Simulator::brkb(LogicPRegister pd,
7374 const LogicPRegister& pg,
7375 const LogicPRegister& pn) {
7376 bool break_ = false;
7377 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7378 if (pg.IsActive(kFormatVnB, i)) {
7379 break_ |= pn.IsActive(kFormatVnB, i);
7380 pd.SetActive(kFormatVnB, i, !break_);
7381 }
7382 }
7383
7384 return pd;
7385 }
7386
7387 LogicPRegister Simulator::brkn(LogicPRegister pdm,
7388 const LogicPRegister& pg,
7389 const LogicPRegister& pn) {
7390 if (!IsLastActive(kFormatVnB, pg, pn)) {
7391 pfalse(pdm);
7392 }
7393 return pdm;
7394 }
7395
7396 LogicPRegister Simulator::brkpa(LogicPRegister pd,
7397 const LogicPRegister& pg,
7398 const LogicPRegister& pn,
7399 const LogicPRegister& pm) {
7400 bool last_active = IsLastActive(kFormatVnB, pg, pn);
7401
7402 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7403 bool active = false;
7404 if (pg.IsActive(kFormatVnB, i)) {
7405 active = last_active;
7406 last_active = last_active && !pm.IsActive(kFormatVnB, i);
7407 }
7408 pd.SetActive(kFormatVnB, i, active);
7409 }
7410
7411 return pd;
7412 }
7413
7414 LogicPRegister Simulator::brkpb(LogicPRegister pd,
7415 const LogicPRegister& pg,
7416 const LogicPRegister& pn,
7417 const LogicPRegister& pm) {
7418 bool last_active = IsLastActive(kFormatVnB, pg, pn);
7419
7420 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7421 bool active = false;
7422 if (pg.IsActive(kFormatVnB, i)) {
7423 last_active = last_active && !pm.IsActive(kFormatVnB, i);
7424 active = last_active;
7425 }
7426 pd.SetActive(kFormatVnB, i, active);
7427 }
7428
7429 return pd;
7430 }
7431
7432 void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,
7433 const LogicPRegister& pg,
7434 unsigned zt_code,
7435 const LogicSVEAddressVector& addr,
7436 SVEFaultTolerantLoadType type,
7437 bool is_signed) {
7438 int esize_in_bytes = LaneSizeInBytesFromFormat(vform);
7439 int msize_in_bits = addr.GetMsizeInBits();
7440 int msize_in_bytes = addr.GetMsizeInBytes();
7441
7442 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7443 VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);
7444 VIXL_ASSERT(addr.GetRegCount() == 1);
7445
7446 LogicVRegister zt = ReadVRegister(zt_code);
7447 LogicPRegister ffr = ReadFFR();
7448
7449 // Non-faulting loads are allowed to fail arbitrarily. To stress user
7450 // code, fail a random element in roughly one in eight full-vector loads.
7451 uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_));
7452 int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
7453
7454 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7455 uint64_t value = 0;
7456
7457 if (pg.IsActive(vform, i)) {
7458 uint64_t element_address = addr.GetElementAddress(i, 0);
7459
7460 if (type == kSVEFirstFaultLoad) {
7461 // First-faulting loads always load the first active element, regardless
7462 // of FFR. The result will be discarded if its FFR lane is inactive, but
7463 // it could still generate a fault.
7464 value = MemReadUint(msize_in_bytes, element_address);
7465 // All subsequent elements have non-fault semantics.
7466 type = kSVENonFaultLoad;
7467
7468 } else if (ffr.IsActive(vform, i)) {
7469 // Simulation of fault-tolerant loads relies on system calls, and is
7470 // likely to be relatively slow, so we only actually perform the load if
7471 // its FFR lane is active.
7472
7473 bool can_read = (i < fake_fault_at_lane) &&
7474 CanReadMemory(element_address, msize_in_bytes);
7475 if (can_read) {
7476 value = MemReadUint(msize_in_bytes, element_address);
7477 } else {
7478 // Propagate the fault to the end of FFR.
7479 for (int j = i; j < LaneCountFromFormat(vform); j++) {
7480 ffr.SetActive(vform, j, false);
7481 }
7482 }
7483 }
7484 }
7485
7486 // The architecture permits a few possible results for inactive FFR lanes
7487 // (including those caused by a fault in this instruction). We choose to
7488 // leave the register value unchanged (like merging predication) because
7489 // no other input to this instruction can have the same behaviour.
7490 //
7491 // Note that this behaviour takes precedence over pg's zeroing predication.
7492
7493 if (ffr.IsActive(vform, i)) {
7494 int msb = msize_in_bits - 1;
7495 if (is_signed) {
7496 zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));
7497 } else {
7498 zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));
7499 }
7500 }
7501 }
7502
7503 if (ShouldTraceVRegs()) {
7504 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7505 if ((esize_in_bytes == msize_in_bytes) && !is_signed) {
7506 // Use an FP format where it's likely that we're accessing FP data.
7507 format = GetPrintRegisterFormatTryFP(format);
7508 }
7509 // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess
7510 // expects a single mask, so combine the two predicates.
7511 SimPRegister mask;
7512 SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);
7513 PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);
7514 }
7515 }
7516
7517 void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
7518 VectorFormat vform,
7519 SVEOffsetModifier mod) {
7520 bool is_signed = instr->ExtractBit(14) == 0;
7521 bool is_ff = instr->ExtractBit(13) == 1;
7522 // Note that these instructions don't use the Dtype encoding.
7523 int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
7524 int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
7525 uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
7526 LogicSVEAddressVector addr(base,
7527 &ReadVRegister(instr->GetRm()),
7528 vform,
7529 mod,
7530 scale);
7531 addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
7532 if (is_ff) {
7533 SVEFaultTolerantLoadHelper(vform,
7534 ReadPRegister(instr->GetPgLow8()),
7535 instr->GetRt(),
7536 addr,
7537 kSVEFirstFaultLoad,
7538 is_signed);
7539 } else {
7540 SVEStructuredLoadHelper(vform,
7541 ReadPRegister(instr->GetPgLow8()),
7542 instr->GetRt(),
7543 addr,
7544 is_signed);
7545 }
7546 }
7547
7548 int Simulator::GetFirstActive(VectorFormat vform,
7549 const LogicPRegister& pg) const {
7550 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7551 if (pg.IsActive(vform, i)) return i;
7552 }
7553 return -1;
7554 }
7555
7556 int Simulator::GetLastActive(VectorFormat vform,
7557 const LogicPRegister& pg) const {
7558 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
7559 if (pg.IsActive(vform, i)) return i;
7560 }
7561 return -1;
7562 }
7563
7564 int Simulator::CountActiveLanes(VectorFormat vform,
7565 const LogicPRegister& pg) const {
7566 int count = 0;
7567 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7568 count += pg.IsActive(vform, i) ? 1 : 0;
7569 }
7570 return count;
7571 }
7572
7573 int Simulator::CountActiveAndTrueLanes(VectorFormat vform,
7574 const LogicPRegister& pg,
7575 const LogicPRegister& pn) const {
7576 int count = 0;
7577 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7578 count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;
7579 }
7580 return count;
7581 }
7582
7583 int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
7584 int pattern) const {
7585 VIXL_ASSERT(IsSVEFormat(vform));
7586 int all = LaneCountFromFormat(vform);
7587 VIXL_ASSERT(all > 0);
7588
7589 switch (pattern) {
7590 case SVE_VL1:
7591 case SVE_VL2:
7592 case SVE_VL3:
7593 case SVE_VL4:
7594 case SVE_VL5:
7595 case SVE_VL6:
7596 case SVE_VL7:
7597 case SVE_VL8:
7598 // VL1-VL8 are encoded directly.
7599 VIXL_STATIC_ASSERT(SVE_VL1 == 1);
7600 VIXL_STATIC_ASSERT(SVE_VL8 == 8);
7601 return (pattern <= all) ? pattern : 0;
7602 case SVE_VL16:
7603 case SVE_VL32:
7604 case SVE_VL64:
7605 case SVE_VL128:
7606 case SVE_VL256: {
7607 // VL16-VL256 are encoded as log2(N) + c.
7608 int min = 16 << (pattern - SVE_VL16);
7609 return (min <= all) ? min : 0;
7610 }
7611 // Special cases.
7612 case SVE_POW2:
7613 return 1 << HighestSetBitPosition(all);
7614 case SVE_MUL4:
7615 return all - (all % 4);
7616 case SVE_MUL3:
7617 return all - (all % 3);
7618 case SVE_ALL:
7619 return all;
7620 }
7621 // Unnamed cases architecturally return 0.
7622 return 0;
7623 }
7624
7625 LogicPRegister Simulator::match(VectorFormat vform,
7626 LogicPRegister dst,
7627 const LogicVRegister& haystack,
7628 const LogicVRegister& needles,
7629 bool negate_match) {
7630 SimVRegister ztemp;
7631 SimPRegister ptemp;
7632
7633 pfalse(dst);
7634 int lanes_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
7635 for (int i = 0; i < lanes_per_segment; i++) {
7636 dup_elements_to_segments(vform, ztemp, needles, i);
7637 SVEIntCompareVectorsHelper(eq,
7638 vform,
7639 ptemp,
7640 GetPTrue(),
7641 haystack,
7642 ztemp,
7643 false,
7644 LeaveFlags);
7645 SVEPredicateLogicalHelper(ORR_p_p_pp_z, dst, dst, ptemp);
7646 }
7647 if (negate_match) {
7648 ptrue(vform, ptemp, SVE_ALL);
7649 SVEPredicateLogicalHelper(EOR_p_p_pp_z, dst, dst, ptemp);
7650 }
7651 return dst;
7652 }
7653
7654 uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
7655 if (IsContiguous()) {
7656 return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
7657 }
7658
7659 VIXL_ASSERT(IsScatterGather());
7660 VIXL_ASSERT(vector_ != NULL);
7661
7662 // For scatter-gather accesses, we need to extract the offset from vector_,
7663 // and apply modifiers.
7664
7665 uint64_t offset = 0;
7666 switch (vector_form_) {
7667 case kFormatVnS:
7668 offset = vector_->GetLane<uint32_t>(lane);
7669 break;
7670 case kFormatVnD:
7671 offset = vector_->GetLane<uint64_t>(lane);
7672 break;
7673 default:
7674 VIXL_UNIMPLEMENTED();
7675 break;
7676 }
7677
7678 switch (vector_mod_) {
7679 case SVE_MUL_VL:
7680 VIXL_UNIMPLEMENTED();
7681 break;
7682 case SVE_LSL:
7683 // We apply the shift below. There's nothing to do here.
7684 break;
7685 case NO_SVE_OFFSET_MODIFIER:
7686 VIXL_ASSERT(vector_shift_ == 0);
7687 break;
7688 case SVE_UXTW:
7689 offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);
7690 break;
7691 case SVE_SXTW:
7692 offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);
7693 break;
7694 }
7695
7696 return base_ + (offset << vector_shift_);
7697 }
7698
7699 LogicVRegister Simulator::pack_odd_elements(VectorFormat vform,
7700 LogicVRegister dst,
7701 const LogicVRegister& src) {
7702 SimVRegister zero;
7703 zero.Clear();
7704 return uzp2(vform, dst, src, zero);
7705 }
7706
7707 LogicVRegister Simulator::pack_even_elements(VectorFormat vform,
7708 LogicVRegister dst,
7709 const LogicVRegister& src) {
7710 SimVRegister zero;
7711 zero.Clear();
7712 return uzp1(vform, dst, src, zero);
7713 }
7714
7715 LogicVRegister Simulator::adcl(VectorFormat vform,
7716 LogicVRegister dst,
7717 const LogicVRegister& src1,
7718 const LogicVRegister& src2,
7719 bool top) {
7720 unsigned reg_size = LaneSizeInBitsFromFormat(vform);
7721 VIXL_ASSERT((reg_size == kSRegSize) || (reg_size == kDRegSize));
7722
7723 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
7724 uint64_t left = src1.Uint(vform, i + (top ? 1 : 0));
7725 uint64_t right = dst.Uint(vform, i);
7726 unsigned carry_in = src2.Uint(vform, i + 1) & 1;
7727 std::pair<uint64_t, uint8_t> val_and_flags =
7728 AddWithCarry(reg_size, left, right, carry_in);
7729
7730 // Set even lanes to the result of the addition.
7731 dst.SetUint(vform, i, val_and_flags.first);
7732
7733 // Set odd lanes to the carry flag from the addition.
7734 uint64_t carry_out = (val_and_flags.second >> 1) & 1;
7735 dst.SetUint(vform, i + 1, carry_out);
7736 }
7737 return dst;
7738 }
7739
7740 // Multiply the 2x8 8-bit matrix in src1 by the 8x2 8-bit matrix in src2, add
7741 // the 2x2 32-bit result to the matrix in srcdst, and write back to srcdst.
7742 //
7743 // Matrices of the form:
7744 //
7745 // src1 = ( a b c d e f g h ) src2 = ( A B )
7746 // ( i j k l m n o p ) ( C D )
7747 // ( E F )
7748 // ( G H )
7749 // ( I J )
7750 // ( K L )
7751 // ( M N )
7752 // ( O P )
7753 //
7754 // Are stored in the input vector registers as:
7755 //
7756 // 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
7757 // src1 = [ p | o | n | m | l | k | j | i | h | g | f | e | d | c | b | a ]
7758 // src2 = [ P | N | L | J | H | F | D | B | O | M | K | I | G | E | C | A ]
7759 //
7760 LogicVRegister Simulator::matmul(VectorFormat vform_dst,
7761 LogicVRegister srcdst,
7762 const LogicVRegister& src1,
7763 const LogicVRegister& src2,
7764 bool src1_signed,
7765 bool src2_signed) {
7766 // Two destination forms are supported: Q register containing four S-sized
7767 // elements (4S) and Z register containing n S-sized elements (VnS).
7768 VIXL_ASSERT((vform_dst == kFormat4S) || (vform_dst == kFormatVnS));
7769 VectorFormat vform_src = kFormatVnB;
7770 int b_per_segment = kQRegSize / kBRegSize;
7771 int s_per_segment = kQRegSize / kSRegSize;
7772 int64_t result[kZRegMaxSizeInBytes / kSRegSizeInBytes] = {};
7773 int segment_count = LaneCountFromFormat(vform_dst) / 4;
7774 for (int seg = 0; seg < segment_count; seg++) {
7775 for (int i = 0; i < 2; i++) {
7776 for (int j = 0; j < 2; j++) {
7777 int dstidx = (2 * i) + j + (seg * s_per_segment);
7778 int64_t sum = srcdst.Int(vform_dst, dstidx);
7779 for (int k = 0; k < 8; k++) {
7780 int idx1 = (8 * i) + k + (seg * b_per_segment);
7781 int idx2 = (8 * j) + k + (seg * b_per_segment);
7782 int64_t e1 = src1_signed ? src1.Int(vform_src, idx1)
7783 : src1.Uint(vform_src, idx1);
7784 int64_t e2 = src2_signed ? src2.Int(vform_src, idx2)
7785 : src2.Uint(vform_src, idx2);
7786 sum += e1 * e2;
7787 }
7788 result[dstidx] = sum;
7789 }
7790 }
7791 }
7792 srcdst.SetIntArray(vform_dst, result);
7793 return srcdst;
7794 }
7795
7796 // Multiply the 2x2 FP matrix in src1 by the 2x2 FP matrix in src2, add the 2x2
7797 // result to the matrix in srcdst, and write back to srcdst.
7798 //
7799 // Matrices of the form:
7800 //
7801 // src1 = ( a b ) src2 = ( A B )
7802 // ( c d ) ( C D )
7803 //
7804 // Are stored in the input vector registers as:
7805 //
7806 // 3 2 1 0
7807 // src1 = [ d | c | b | a ]
7808 // src2 = [ D | B | C | A ]
7809 //
7810 template <typename T>
7811 LogicVRegister Simulator::fmatmul(VectorFormat vform,
7812 LogicVRegister srcdst,
7813 const LogicVRegister& src1,
7814 const LogicVRegister& src2) {
7815 T result[kZRegMaxSizeInBytes / sizeof(T)];
7816 int T_per_segment = 4;
7817 int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T));
7818 for (int seg = 0; seg < segment_count; seg++) {
7819 int segoff = seg * T_per_segment;
7820 for (int i = 0; i < 2; i++) {
7821 for (int j = 0; j < 2; j++) {
7822 T prod0 = FPMulNaNs(src1.Float<T>(2 * i + 0 + segoff),
7823 src2.Float<T>(2 * j + 0 + segoff));
7824 T prod1 = FPMulNaNs(src1.Float<T>(2 * i + 1 + segoff),
7825 src2.Float<T>(2 * j + 1 + segoff));
7826 T sum = FPAdd(srcdst.Float<T>(2 * i + j + segoff), prod0);
7827 result[2 * i + j + segoff] = FPAdd(sum, prod1);
7828 }
7829 }
7830 }
7831 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7832 // Elements outside a multiple of 4T are set to zero. This happens only
7833 // for double precision operations, when the VL is a multiple of 128 bits,
7834 // but not a multiple of 256 bits.
7835 T value = (i < (T_per_segment * segment_count)) ? result[i] : 0;
7836 srcdst.SetFloat<T>(vform, i, value);
7837 }
7838 return srcdst;
7839 }
7840
7841 LogicVRegister Simulator::fmatmul(VectorFormat vform,
7842 LogicVRegister dst,
7843 const LogicVRegister& src1,
7844 const LogicVRegister& src2) {
7845 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
7846 fmatmul<float>(vform, dst, src1, src2);
7847 } else {
7848 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
7849 fmatmul<double>(vform, dst, src1, src2);
7850 }
7851 return dst;
7852 }
7853
7854 } // namespace aarch64
7855 } // namespace vixl
7856
7857 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
7858