1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28 
29 #include "simulator-aarch64.h"
30 
31 #include <cmath>
32 #include <cstring>
33 #include <errno.h>
34 #include <limits>
35 #include <sys/mman.h>
36 #include <unistd.h>
37 
38 namespace vixl {
39 namespace aarch64 {
40 
41 using vixl::internal::SimFloat16;
42 
43 const Instruction* Simulator::kEndOfSimAddress = NULL;
44 
45 bool MetaDataDepot::MetaDataMTE::is_active = false;
46 
SetBits(int msb, int lsb, uint32_t bits)47 void SimSystemRegister::SetBits(int msb, int lsb, uint32_t bits) {
48   int width = msb - lsb + 1;
49   VIXL_ASSERT(IsUintN(width, bits) || IsIntN(width, bits));
50 
51   bits <<= lsb;
52   uint32_t mask = ((1 << width) - 1) << lsb;
53   VIXL_ASSERT((mask & write_ignore_mask_) == 0);
54 
55   value_ = (value_ & ~mask) | (bits & mask);
56 }
57 
58 
DefaultValueFor(SystemRegister id)59 SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) {
60   switch (id) {
61     case NZCV:
62       return SimSystemRegister(0x00000000, NZCVWriteIgnoreMask);
63     case FPCR:
64       return SimSystemRegister(0x00000000, FPCRWriteIgnoreMask);
65     default:
66       VIXL_UNREACHABLE();
67       return SimSystemRegister();
68   }
69 }
70 
GetFormToVisitorFnMap()71 const Simulator::FormToVisitorFnMap* Simulator::GetFormToVisitorFnMap() {
72   static const FormToVisitorFnMap form_to_visitor = {
73       DEFAULT_FORM_TO_VISITOR_MAP(Simulator),
74       SIM_AUD_VISITOR_MAP(Simulator),
75       {"smlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
76       {"smlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
77       {"smull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
78       {"sqdmlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
79       {"sqdmlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
80       {"sqdmull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
81       {"umlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
82       {"umlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
83       {"umull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
84       {"fcmla_asimdelem_c_h"_h, &Simulator::SimulateNEONComplexMulByElement},
85       {"fcmla_asimdelem_c_s"_h, &Simulator::SimulateNEONComplexMulByElement},
86       {"fmlal2_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
87       {"fmlal_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
88       {"fmlsl2_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
89       {"fmlsl_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
90       {"fmla_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
91       {"fmls_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
92       {"fmulx_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
93       {"fmul_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
94       {"fmla_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
95       {"fmls_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
96       {"fmulx_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
97       {"fmul_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
98       {"sdot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
99       {"udot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
100       {"adclb_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
101       {"adclt_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
102       {"addhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
103       {"addhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
104       {"addp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
105       {"bcax_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
106       {"bdep_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
107       {"bext_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
108       {"bgrp_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
109       {"bsl1n_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
110       {"bsl2n_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
111       {"bsl_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
112       {"cadd_z_zz"_h, &Simulator::Simulate_ZdnT_ZdnT_ZmT_const},
113       {"cdot_z_zzz"_h, &Simulator::SimulateSVEComplexDotProduct},
114       {"cdot_z_zzzi_d"_h, &Simulator::SimulateSVEComplexDotProduct},
115       {"cdot_z_zzzi_s"_h, &Simulator::SimulateSVEComplexDotProduct},
116       {"cmla_z_zzz"_h, &Simulator::SimulateSVEComplexIntMulAdd},
117       {"cmla_z_zzzi_h"_h, &Simulator::SimulateSVEComplexIntMulAdd},
118       {"cmla_z_zzzi_s"_h, &Simulator::SimulateSVEComplexIntMulAdd},
119       {"eor3_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
120       {"eorbt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
121       {"eortb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
122       {"ext_z_zi_con"_h, &Simulator::Simulate_ZdB_Zn1B_Zn2B_imm},
123       {"faddp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
124       {"fcvtlt_z_p_z_h2s"_h, &Simulator::SimulateSVEFPConvertLong},
125       {"fcvtlt_z_p_z_s2d"_h, &Simulator::SimulateSVEFPConvertLong},
126       {"fcvtnt_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD},
127       {"fcvtnt_z_p_z_s2h"_h, &Simulator::Simulate_ZdH_PgM_ZnS},
128       {"fcvtx_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD},
129       {"fcvtxnt_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD},
130       {"flogb_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT},
131       {"fmaxnmp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
132       {"fmaxp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
133       {"fminnmp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
134       {"fminp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
135       {"fmlalb_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
136       {"fmlalb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
137       {"fmlalt_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
138       {"fmlalt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
139       {"fmlslb_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
140       {"fmlslb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
141       {"fmlslt_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
142       {"fmlslt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
143       {"histcnt_z_p_zz"_h, &Simulator::Simulate_ZdT_PgZ_ZnT_ZmT},
144       {"histseg_z_zz"_h, &Simulator::Simulate_ZdB_ZnB_ZmB},
145       {"ldnt1b_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
146       {"ldnt1b_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
147       {"ldnt1d_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
148       {"ldnt1h_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
149       {"ldnt1h_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
150       {"ldnt1sb_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
151       {"ldnt1sb_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
152       {"ldnt1sh_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
153       {"ldnt1sh_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
154       {"ldnt1sw_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
155       {"ldnt1w_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
156       {"ldnt1w_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
157       {"match_p_p_zz"_h, &Simulator::Simulate_PdT_PgZ_ZnT_ZmT},
158       {"mla_z_zzzi_d"_h, &Simulator::SimulateSVEMlaMlsIndex},
159       {"mla_z_zzzi_h"_h, &Simulator::SimulateSVEMlaMlsIndex},
160       {"mla_z_zzzi_s"_h, &Simulator::SimulateSVEMlaMlsIndex},
161       {"mls_z_zzzi_d"_h, &Simulator::SimulateSVEMlaMlsIndex},
162       {"mls_z_zzzi_h"_h, &Simulator::SimulateSVEMlaMlsIndex},
163       {"mls_z_zzzi_s"_h, &Simulator::SimulateSVEMlaMlsIndex},
164       {"mul_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
165       {"mul_z_zzi_d"_h, &Simulator::SimulateSVEMulIndex},
166       {"mul_z_zzi_h"_h, &Simulator::SimulateSVEMulIndex},
167       {"mul_z_zzi_s"_h, &Simulator::SimulateSVEMulIndex},
168       {"nbsl_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
169       {"nmatch_p_p_zz"_h, &Simulator::Simulate_PdT_PgZ_ZnT_ZmT},
170       {"pmul_z_zz"_h, &Simulator::Simulate_ZdB_ZnB_ZmB},
171       {"pmullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
172       {"pmullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
173       {"raddhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
174       {"raddhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
175       {"rshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
176       {"rshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
177       {"rsubhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
178       {"rsubhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
179       {"saba_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnT_ZmT},
180       {"sabalb_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
181       {"sabalt_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
182       {"sabdlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
183       {"sabdlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
184       {"sadalp_z_p_z"_h, &Simulator::Simulate_ZdaT_PgM_ZnTb},
185       {"saddlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
186       {"saddlbt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
187       {"saddlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
188       {"saddwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
189       {"saddwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
190       {"sbclb_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
191       {"sbclt_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
192       {"shadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
193       {"shrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
194       {"shrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
195       {"shsub_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
196       {"shsubr_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
197       {"sli_z_zzi"_h, &Simulator::Simulate_ZdT_ZnT_const},
198       {"smaxp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
199       {"sminp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
200       {"smlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
201       {"smlalb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
202       {"smlalb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
203       {"smlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
204       {"smlalt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
205       {"smlalt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
206       {"smlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
207       {"smlslb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
208       {"smlslb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
209       {"smlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
210       {"smlslt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
211       {"smlslt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
212       {"smulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
213       {"smullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
214       {"smullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
215       {"smullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
216       {"smullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
217       {"smullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
218       {"smullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
219       {"splice_z_p_zz_con"_h, &Simulator::VisitSVEVectorSplice},
220       {"sqabs_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT},
221       {"sqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
222       {"sqcadd_z_zz"_h, &Simulator::Simulate_ZdnT_ZdnT_ZmT_const},
223       {"sqdmlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
224       {"sqdmlalb_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
225       {"sqdmlalb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
226       {"sqdmlalbt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
227       {"sqdmlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
228       {"sqdmlalt_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
229       {"sqdmlalt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
230       {"sqdmlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
231       {"sqdmlslb_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
232       {"sqdmlslb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
233       {"sqdmlslbt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
234       {"sqdmlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
235       {"sqdmlslt_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
236       {"sqdmlslt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
237       {"sqdmulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
238       {"sqdmulh_z_zzi_d"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
239       {"sqdmulh_z_zzi_h"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
240       {"sqdmulh_z_zzi_s"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
241       {"sqdmullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
242       {"sqdmullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
243       {"sqdmullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
244       {"sqdmullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
245       {"sqdmullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
246       {"sqdmullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
247       {"sqneg_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT},
248       {"sqrdcmlah_z_zzz"_h, &Simulator::SimulateSVEComplexIntMulAdd},
249       {"sqrdcmlah_z_zzzi_h"_h, &Simulator::SimulateSVEComplexIntMulAdd},
250       {"sqrdcmlah_z_zzzi_s"_h, &Simulator::SimulateSVEComplexIntMulAdd},
251       {"sqrdmlah_z_zzz"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
252       {"sqrdmlah_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
253       {"sqrdmlah_z_zzzi_h"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
254       {"sqrdmlah_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
255       {"sqrdmlsh_z_zzz"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
256       {"sqrdmlsh_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
257       {"sqrdmlsh_z_zzzi_h"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
258       {"sqrdmlsh_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
259       {"sqrdmulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
260       {"sqrdmulh_z_zzi_d"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
261       {"sqrdmulh_z_zzi_h"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
262       {"sqrdmulh_z_zzi_s"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
263       {"sqrshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
264       {"sqrshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
265       {"sqrshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
266       {"sqrshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
267       {"sqrshrunb_z_zi"_h, &Simulator::SimulateSVENarrow},
268       {"sqrshrunt_z_zi"_h, &Simulator::SimulateSVENarrow},
269       {"sqshl_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
270       {"sqshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
271       {"sqshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
272       {"sqshlu_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
273       {"sqshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
274       {"sqshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
275       {"sqshrunb_z_zi"_h, &Simulator::SimulateSVENarrow},
276       {"sqshrunt_z_zi"_h, &Simulator::SimulateSVENarrow},
277       {"sqsub_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
278       {"sqsubr_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
279       {"sqxtnb_z_zz"_h, &Simulator::SimulateSVENarrow},
280       {"sqxtnt_z_zz"_h, &Simulator::SimulateSVENarrow},
281       {"sqxtunb_z_zz"_h, &Simulator::SimulateSVENarrow},
282       {"sqxtunt_z_zz"_h, &Simulator::SimulateSVENarrow},
283       {"srhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
284       {"sri_z_zzi"_h, &Simulator::Simulate_ZdT_ZnT_const},
285       {"srshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
286       {"srshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
287       {"srshr_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
288       {"srsra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
289       {"sshllb_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
290       {"sshllt_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
291       {"ssra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
292       {"ssublb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
293       {"ssublbt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
294       {"ssublt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
295       {"ssubltb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
296       {"ssubwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
297       {"ssubwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
298       {"stnt1b_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
299       {"stnt1b_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
300       {"stnt1d_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
301       {"stnt1h_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
302       {"stnt1h_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
303       {"stnt1w_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
304       {"stnt1w_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
305       {"subhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
306       {"subhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
307       {"suqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
308       {"tbl_z_zz_2"_h, &Simulator::VisitSVETableLookup},
309       {"tbx_z_zz"_h, &Simulator::VisitSVETableLookup},
310       {"uaba_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnT_ZmT},
311       {"uabalb_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
312       {"uabalt_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
313       {"uabdlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
314       {"uabdlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
315       {"uadalp_z_p_z"_h, &Simulator::Simulate_ZdaT_PgM_ZnTb},
316       {"uaddlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
317       {"uaddlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
318       {"uaddwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
319       {"uaddwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
320       {"uhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
321       {"uhsub_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
322       {"uhsubr_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
323       {"umaxp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
324       {"uminp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
325       {"umlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
326       {"umlalb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
327       {"umlalb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
328       {"umlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
329       {"umlalt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
330       {"umlalt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
331       {"umlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
332       {"umlslb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
333       {"umlslb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
334       {"umlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
335       {"umlslt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
336       {"umlslt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
337       {"umulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
338       {"umullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
339       {"umullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
340       {"umullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
341       {"umullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
342       {"umullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
343       {"umullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
344       {"uqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
345       {"uqrshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
346       {"uqrshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
347       {"uqrshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
348       {"uqrshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
349       {"uqshl_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
350       {"uqshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
351       {"uqshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
352       {"uqshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
353       {"uqshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
354       {"uqsub_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
355       {"uqsubr_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
356       {"uqxtnb_z_zz"_h, &Simulator::SimulateSVENarrow},
357       {"uqxtnt_z_zz"_h, &Simulator::SimulateSVENarrow},
358       {"urecpe_z_p_z"_h, &Simulator::Simulate_ZdS_PgM_ZnS},
359       {"urhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
360       {"urshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
361       {"urshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
362       {"urshr_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
363       {"ursqrte_z_p_z"_h, &Simulator::Simulate_ZdS_PgM_ZnS},
364       {"ursra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
365       {"ushllb_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
366       {"ushllt_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
367       {"usqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
368       {"usra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
369       {"usublb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
370       {"usublt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
371       {"usubwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
372       {"usubwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
373       {"whilege_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
374       {"whilegt_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
375       {"whilehi_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
376       {"whilehs_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
377       {"whilerw_p_rr"_h, &Simulator::Simulate_PdT_Xn_Xm},
378       {"whilewr_p_rr"_h, &Simulator::Simulate_PdT_Xn_Xm},
379       {"xar_z_zzi"_h, &Simulator::SimulateSVEExclusiveOrRotate},
380       {"smmla_z_zzz"_h, &Simulator::SimulateMatrixMul},
381       {"ummla_z_zzz"_h, &Simulator::SimulateMatrixMul},
382       {"usmmla_z_zzz"_h, &Simulator::SimulateMatrixMul},
383       {"smmla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul},
384       {"ummla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul},
385       {"usmmla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul},
386       {"fmmla_z_zzz_s"_h, &Simulator::SimulateSVEFPMatrixMul},
387       {"fmmla_z_zzz_d"_h, &Simulator::SimulateSVEFPMatrixMul},
388       {"ld1row_z_p_bi_u32"_h,
389        &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
390       {"ld1row_z_p_br_contiguous"_h,
391        &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
392       {"ld1rod_z_p_bi_u64"_h,
393        &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
394       {"ld1rod_z_p_br_contiguous"_h,
395        &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
396       {"ld1rob_z_p_bi_u8"_h,
397        &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
398       {"ld1rob_z_p_br_contiguous"_h,
399        &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
400       {"ld1roh_z_p_bi_u16"_h,
401        &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
402       {"ld1roh_z_p_br_contiguous"_h,
403        &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
404       {"usdot_z_zzz_s"_h, &Simulator::VisitSVEIntMulAddUnpredicated},
405       {"sudot_z_zzzi_s"_h, &Simulator::VisitSVEMulIndex},
406       {"usdot_z_zzzi_s"_h, &Simulator::VisitSVEMulIndex},
407       {"usdot_asimdsame2_d"_h, &Simulator::VisitNEON3SameExtra},
408       {"sudot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
409       {"usdot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
410       {"addg_64_addsub_immtags"_h, &Simulator::SimulateMTEAddSubTag},
411       {"gmi_64g_dp_2src"_h, &Simulator::SimulateMTETagMaskInsert},
412       {"irg_64i_dp_2src"_h, &Simulator::Simulate_XdSP_XnSP_Xm},
413       {"ldg_64loffset_ldsttags"_h, &Simulator::SimulateMTELoadTag},
414       {"st2g_64soffset_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
415       {"st2g_64spost_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
416       {"st2g_64spre_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
417       {"stgp_64_ldstpair_off"_h, &Simulator::SimulateMTEStoreTagPair},
418       {"stgp_64_ldstpair_post"_h, &Simulator::SimulateMTEStoreTagPair},
419       {"stgp_64_ldstpair_pre"_h, &Simulator::SimulateMTEStoreTagPair},
420       {"stg_64soffset_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
421       {"stg_64spost_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
422       {"stg_64spre_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
423       {"stz2g_64soffset_ldsttags"_h,
424        &Simulator::Simulator::SimulateMTEStoreTag},
425       {"stz2g_64spost_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
426       {"stz2g_64spre_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
427       {"stzg_64soffset_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
428       {"stzg_64spost_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
429       {"stzg_64spre_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
430       {"subg_64_addsub_immtags"_h, &Simulator::SimulateMTEAddSubTag},
431       {"subps_64s_dp_2src"_h, &Simulator::SimulateMTESubPointer},
432       {"subp_64s_dp_2src"_h, &Simulator::SimulateMTESubPointer},
433       {"cpyen_cpy_memcms"_h, &Simulator::SimulateCpyE},
434       {"cpyern_cpy_memcms"_h, &Simulator::SimulateCpyE},
435       {"cpyewn_cpy_memcms"_h, &Simulator::SimulateCpyE},
436       {"cpye_cpy_memcms"_h, &Simulator::SimulateCpyE},
437       {"cpyfen_cpy_memcms"_h, &Simulator::SimulateCpyE},
438       {"cpyfern_cpy_memcms"_h, &Simulator::SimulateCpyE},
439       {"cpyfewn_cpy_memcms"_h, &Simulator::SimulateCpyE},
440       {"cpyfe_cpy_memcms"_h, &Simulator::SimulateCpyE},
441       {"cpyfmn_cpy_memcms"_h, &Simulator::SimulateCpyM},
442       {"cpyfmrn_cpy_memcms"_h, &Simulator::SimulateCpyM},
443       {"cpyfmwn_cpy_memcms"_h, &Simulator::SimulateCpyM},
444       {"cpyfm_cpy_memcms"_h, &Simulator::SimulateCpyM},
445       {"cpyfpn_cpy_memcms"_h, &Simulator::SimulateCpyFP},
446       {"cpyfprn_cpy_memcms"_h, &Simulator::SimulateCpyFP},
447       {"cpyfpwn_cpy_memcms"_h, &Simulator::SimulateCpyFP},
448       {"cpyfp_cpy_memcms"_h, &Simulator::SimulateCpyFP},
449       {"cpymn_cpy_memcms"_h, &Simulator::SimulateCpyM},
450       {"cpymrn_cpy_memcms"_h, &Simulator::SimulateCpyM},
451       {"cpymwn_cpy_memcms"_h, &Simulator::SimulateCpyM},
452       {"cpym_cpy_memcms"_h, &Simulator::SimulateCpyM},
453       {"cpypn_cpy_memcms"_h, &Simulator::SimulateCpyP},
454       {"cpyprn_cpy_memcms"_h, &Simulator::SimulateCpyP},
455       {"cpypwn_cpy_memcms"_h, &Simulator::SimulateCpyP},
456       {"cpyp_cpy_memcms"_h, &Simulator::SimulateCpyP},
457       {"setp_set_memcms"_h, &Simulator::SimulateSetP},
458       {"setpn_set_memcms"_h, &Simulator::SimulateSetP},
459       {"setgp_set_memcms"_h, &Simulator::SimulateSetGP},
460       {"setgpn_set_memcms"_h, &Simulator::SimulateSetGP},
461       {"setm_set_memcms"_h, &Simulator::SimulateSetM},
462       {"setmn_set_memcms"_h, &Simulator::SimulateSetM},
463       {"setgm_set_memcms"_h, &Simulator::SimulateSetGM},
464       {"setgmn_set_memcms"_h, &Simulator::SimulateSetGM},
465       {"sete_set_memcms"_h, &Simulator::SimulateSetE},
466       {"seten_set_memcms"_h, &Simulator::SimulateSetE},
467       {"setge_set_memcms"_h, &Simulator::SimulateSetE},
468       {"setgen_set_memcms"_h, &Simulator::SimulateSetE},
469       {"abs_32_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
470       {"abs_64_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
471       {"cnt_32_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
472       {"cnt_64_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
473       {"ctz_32_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
474       {"ctz_64_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
475       {"smax_32_dp_2src"_h, &Simulator::SimulateSignedMinMax},
476       {"smax_64_dp_2src"_h, &Simulator::SimulateSignedMinMax},
477       {"smin_32_dp_2src"_h, &Simulator::SimulateSignedMinMax},
478       {"smin_64_dp_2src"_h, &Simulator::SimulateSignedMinMax},
479       {"smax_32_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
480       {"smax_64_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
481       {"smin_32_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
482       {"smin_64_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
483       {"umax_32_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
484       {"umax_64_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
485       {"umin_32_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
486       {"umin_64_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
487       {"umax_32u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
488       {"umax_64u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
489       {"umin_32u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
490       {"umin_64u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
491   };
492   return &form_to_visitor;
493 }
494 
495 #ifndef PANDA_BUILD
Simulator(Decoder* decoder, FILE* stream, SimStack::Allocated stack)496 Simulator::Simulator(Decoder* decoder, FILE* stream, SimStack::Allocated stack)
497     : memory_(std::move(stack)),
498       last_instr_(NULL),
499       cpu_features_auditor_(decoder, CPUFeatures::All()) {
500 #else
501 Simulator::Simulator(PandaAllocator* allocator, Decoder* decoder, SimStack::Allocated stack, FILE* stream)
502     : memory_(std::move(stack)),
503       last_instr_(NULL),
504       allocator_(allocator),
505       cpu_features_auditor_(decoder, CPUFeatures::All()),
506       saved_cpu_features_(allocator_.Adapter()) {
507 #endif
508   // Ensure that shift operations act as the simulator expects.
509   VIXL_ASSERT((static_cast<int32_t>(-1) >> 1) == -1);
510   VIXL_ASSERT((static_cast<uint32_t>(-1) >> 1) == 0x7fffffff);
511 
512   // Set up a placeholder pipe for CanReadMemory.
513   VIXL_CHECK(pipe(placeholder_pipe_fd_) == 0);
514 
515   // Set up the decoder.
516   decoder_ = decoder;
517   decoder_->AppendVisitor(this);
518 
519   stream_ = stream;
520 
521 #ifndef PANDA_BUILD
522   print_disasm_ = new PrintDisassembler(stream_);
523 #else
524   print_disasm_ = allocator_.New<PrintDisassembler>(allocator, stream_);
525 #endif
526 
527   memory_.AppendMetaData(&meta_data_);
528 
529   // The Simulator and Disassembler share the same available list, held by the
530   // auditor. The Disassembler only annotates instructions with features that
531   // are _not_ available, so registering the auditor should have no effect
532   // unless the simulator is about to abort (due to missing features). In
533   // practice, this means that with trace enabled, the simulator will crash just
534   // after the disassembler prints the instruction, with the missing features
535   // enumerated.
536   print_disasm_->RegisterCPUFeaturesAuditor(&cpu_features_auditor_);
537 
538   SetColouredTrace(false);
539   trace_parameters_ = LOG_NONE;
540 
541   // We have to configure the SVE vector register length before calling
542   // ResetState().
543   SetVectorLengthInBits(kZRegMinSize);
544 
545   ResetState();
546 
547   // Print a warning about exclusive-access instructions, but only the first
548   // time they are encountered. This warning can be silenced using
549   // SilenceExclusiveAccessWarning().
550   print_exclusive_access_warning_ = true;
551 
552   guard_pages_ = false;
553 
554   // Initialize the common state of RNDR and RNDRRS.
555   uint16_t seed[3] = {11, 22, 33};
556   VIXL_STATIC_ASSERT(sizeof(seed) == sizeof(rand_state_));
557   memcpy(rand_state_, seed, sizeof(rand_state_));
558 
559   // Initialize all bits of pseudo predicate register to true.
560   LogicPRegister ones(pregister_all_true_);
561   ones.SetAllBits();
562 
563   // Initialize the debugger but disable it by default.
564   SetDebuggerEnabled(false);
565 #ifndef PANDA_BUILD
566   debugger_ = std::make_unique<Debugger>(this);
567 #else
568   debugger_ = allocator_.New<Debugger>(this);
569 #endif
570 }
571 
572 void Simulator::ResetSystemRegisters() {
573   // Reset the system registers.
574   nzcv_ = SimSystemRegister::DefaultValueFor(NZCV);
575   fpcr_ = SimSystemRegister::DefaultValueFor(FPCR);
576   ResetFFR();
577 }
578 
579 void Simulator::ResetRegisters() {
580   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
581     WriteXRegister(i, 0xbadbeef);
582   }
583   // Returning to address 0 exits the Simulator.
584   WriteLr(kEndOfSimAddress);
585 }
586 
587 void Simulator::ResetVRegisters() {
588   // Set SVE/FP registers to a value that is a NaN in both 32-bit and 64-bit FP.
589   VIXL_ASSERT((GetVectorLengthInBytes() % kDRegSizeInBytes) == 0);
590   int lane_count = GetVectorLengthInBytes() / kDRegSizeInBytes;
591   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
592     VIXL_ASSERT(vregisters_[i].GetSizeInBytes() == GetVectorLengthInBytes());
593     vregisters_[i].NotifyAccessAsZ();
594     for (int lane = 0; lane < lane_count; lane++) {
595       // Encode the register number and (D-sized) lane into each NaN, to
596       // make them easier to trace.
597       uint64_t nan_bits = 0x7ff0f0007f80f000 | (0x0000000100000000 * i) |
598                           (0x0000000000000001 * lane);
599       VIXL_ASSERT(IsSignallingNaN(RawbitsToDouble(nan_bits & kDRegMask)));
600       VIXL_ASSERT(IsSignallingNaN(RawbitsToFloat(nan_bits & kSRegMask)));
601       vregisters_[i].Insert(lane, nan_bits);
602     }
603   }
604 }
605 
606 void Simulator::ResetPRegisters() {
607   VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0);
608   int lane_count = GetPredicateLengthInBytes() / kHRegSizeInBytes;
609   // Ensure the register configuration fits in this bit encoding.
610   VIXL_STATIC_ASSERT(kNumberOfPRegisters <= UINT8_MAX);
611   VIXL_ASSERT(lane_count <= UINT8_MAX);
612   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
613     VIXL_ASSERT(pregisters_[i].GetSizeInBytes() == GetPredicateLengthInBytes());
614     for (int lane = 0; lane < lane_count; lane++) {
615       // Encode the register number and (H-sized) lane into each lane slot.
616       uint16_t bits = (0x0100 * lane) | i;
617       pregisters_[i].Insert(lane, bits);
618     }
619   }
620 }
621 
622 void Simulator::ResetFFR() {
623   VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0);
624   int default_active_lanes = GetPredicateLengthInBytes() / kHRegSizeInBytes;
625   ffr_register_.Write(static_cast<uint16_t>(GetUintMask(default_active_lanes)));
626 }
627 
628 void Simulator::ResetState() {
629   ResetSystemRegisters();
630   ResetRegisters();
631   ResetVRegisters();
632   ResetPRegisters();
633 
634   WriteSp(memory_.GetStack().GetBase());
635 
636   pc_ = NULL;
637   pc_modified_ = false;
638 
639   // BTI state.
640   btype_ = DefaultBType;
641   next_btype_ = DefaultBType;
642 
643   meta_data_.ResetState();
644 }
645 
646 void Simulator::SetVectorLengthInBits(unsigned vector_length) {
647   VIXL_ASSERT((vector_length >= kZRegMinSize) &&
648               (vector_length <= kZRegMaxSize));
649   VIXL_ASSERT((vector_length % kZRegMinSize) == 0);
650   vector_length_ = vector_length;
651 
652   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
653     vregisters_[i].SetSizeInBytes(GetVectorLengthInBytes());
654   }
655   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
656     pregisters_[i].SetSizeInBytes(GetPredicateLengthInBytes());
657   }
658 
659   ffr_register_.SetSizeInBytes(GetPredicateLengthInBytes());
660 
661   ResetVRegisters();
662   ResetPRegisters();
663   ResetFFR();
664 }
665 
666 Simulator::~Simulator() {
667   // The decoder may outlive the simulator.
668   decoder_->RemoveVisitor(print_disasm_);
669 #ifdef PANDA_BUILD
670   allocator_.DeleteObject(print_disasm_);
671   allocator_.DeleteObject(debugger_);
672 #endif
673   close(placeholder_pipe_fd_[0]);
674   close(placeholder_pipe_fd_[1]);
675 }
676 
677 
678 void Simulator::Run() {
679   // Flush any written registers before executing anything, so that
680   // manually-set registers are logged _before_ the first instruction.
681   LogAllWrittenRegisters();
682 
683   if (debugger_enabled_) {
684     // Slow path to check for breakpoints only if the debugger is enabled.
685     Debugger* debugger = GetDebugger();
686     while (!IsSimulationFinished()) {
687       if (debugger->IsAtBreakpoint()) {
688         fprintf(stream_, "Debugger hit breakpoint, breaking...\n");
689         debugger->Debug();
690       } else {
691         ExecuteInstruction();
692       }
693     }
694   } else {
695     while (!IsSimulationFinished()) {
696       ExecuteInstruction();
697     }
698   }
699 }
700 
701 
702 void Simulator::RunFrom(const Instruction* first) {
703   WritePc(first, NoBranchLog);
704   Run();
705 }
706 
707 
708 // clang-format off
709 const char* Simulator::xreg_names[] = {"x0",  "x1",  "x2",  "x3",  "x4",  "x5",
710                                        "x6",  "x7",  "x8",  "x9",  "x10", "x11",
711                                        "x12", "x13", "x14", "x15", "x16", "x17",
712                                        "x18", "x19", "x20", "x21", "x22", "x23",
713                                        "x24", "x25", "x26", "x27", "x28", "x29",
714                                        "lr",  "xzr", "sp"};
715 
716 const char* Simulator::wreg_names[] = {"w0",  "w1",  "w2",  "w3",  "w4",  "w5",
717                                        "w6",  "w7",  "w8",  "w9",  "w10", "w11",
718                                        "w12", "w13", "w14", "w15", "w16", "w17",
719                                        "w18", "w19", "w20", "w21", "w22", "w23",
720                                        "w24", "w25", "w26", "w27", "w28", "w29",
721                                        "w30", "wzr", "wsp"};
722 
723 const char* Simulator::breg_names[] = {"b0",  "b1",  "b2",  "b3",  "b4",  "b5",
724                                        "b6",  "b7",  "b8",  "b9",  "b10", "b11",
725                                        "b12", "b13", "b14", "b15", "b16", "b17",
726                                        "b18", "b19", "b20", "b21", "b22", "b23",
727                                        "b24", "b25", "b26", "b27", "b28", "b29",
728                                        "b30", "b31"};
729 
730 const char* Simulator::hreg_names[] = {"h0",  "h1",  "h2",  "h3",  "h4",  "h5",
731                                        "h6",  "h7",  "h8",  "h9",  "h10", "h11",
732                                        "h12", "h13", "h14", "h15", "h16", "h17",
733                                        "h18", "h19", "h20", "h21", "h22", "h23",
734                                        "h24", "h25", "h26", "h27", "h28", "h29",
735                                        "h30", "h31"};
736 
737 const char* Simulator::sreg_names[] = {"s0",  "s1",  "s2",  "s3",  "s4",  "s5",
738                                        "s6",  "s7",  "s8",  "s9",  "s10", "s11",
739                                        "s12", "s13", "s14", "s15", "s16", "s17",
740                                        "s18", "s19", "s20", "s21", "s22", "s23",
741                                        "s24", "s25", "s26", "s27", "s28", "s29",
742                                        "s30", "s31"};
743 
744 const char* Simulator::dreg_names[] = {"d0",  "d1",  "d2",  "d3",  "d4",  "d5",
745                                        "d6",  "d7",  "d8",  "d9",  "d10", "d11",
746                                        "d12", "d13", "d14", "d15", "d16", "d17",
747                                        "d18", "d19", "d20", "d21", "d22", "d23",
748                                        "d24", "d25", "d26", "d27", "d28", "d29",
749                                        "d30", "d31"};
750 
751 const char* Simulator::vreg_names[] = {"v0",  "v1",  "v2",  "v3",  "v4",  "v5",
752                                        "v6",  "v7",  "v8",  "v9",  "v10", "v11",
753                                        "v12", "v13", "v14", "v15", "v16", "v17",
754                                        "v18", "v19", "v20", "v21", "v22", "v23",
755                                        "v24", "v25", "v26", "v27", "v28", "v29",
756                                        "v30", "v31"};
757 
758 const char* Simulator::zreg_names[] = {"z0",  "z1",  "z2",  "z3",  "z4",  "z5",
759                                        "z6",  "z7",  "z8",  "z9",  "z10", "z11",
760                                        "z12", "z13", "z14", "z15", "z16", "z17",
761                                        "z18", "z19", "z20", "z21", "z22", "z23",
762                                        "z24", "z25", "z26", "z27", "z28", "z29",
763                                        "z30", "z31"};
764 
765 const char* Simulator::preg_names[] = {"p0",  "p1",  "p2",  "p3",  "p4",  "p5",
766                                        "p6",  "p7",  "p8",  "p9",  "p10", "p11",
767                                        "p12", "p13", "p14", "p15"};
768 // clang-format on
769 
770 
771 const char* Simulator::WRegNameForCode(unsigned code, Reg31Mode mode) {
772   // If the code represents the stack pointer, index the name after zr.
773   if ((code == kSPRegInternalCode) ||
774       ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) {
775     code = kZeroRegCode + 1;
776   }
777   VIXL_ASSERT(code < ArrayLength(wreg_names));
778   return wreg_names[code];
779 }
780 
781 
782 const char* Simulator::XRegNameForCode(unsigned code, Reg31Mode mode) {
783   // If the code represents the stack pointer, index the name after zr.
784   if ((code == kSPRegInternalCode) ||
785       ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) {
786     code = kZeroRegCode + 1;
787   }
788   VIXL_ASSERT(code < ArrayLength(xreg_names));
789   return xreg_names[code];
790 }
791 
792 
793 const char* Simulator::BRegNameForCode(unsigned code) {
794   VIXL_ASSERT(code < kNumberOfVRegisters);
795   return breg_names[code];
796 }
797 
798 
799 const char* Simulator::HRegNameForCode(unsigned code) {
800   VIXL_ASSERT(code < kNumberOfVRegisters);
801   return hreg_names[code];
802 }
803 
804 
805 const char* Simulator::SRegNameForCode(unsigned code) {
806   VIXL_ASSERT(code < kNumberOfVRegisters);
807   return sreg_names[code];
808 }
809 
810 
811 const char* Simulator::DRegNameForCode(unsigned code) {
812   VIXL_ASSERT(code < kNumberOfVRegisters);
813   return dreg_names[code];
814 }
815 
816 
817 const char* Simulator::VRegNameForCode(unsigned code) {
818   VIXL_ASSERT(code < kNumberOfVRegisters);
819   return vreg_names[code];
820 }
821 
822 
823 const char* Simulator::ZRegNameForCode(unsigned code) {
824   VIXL_ASSERT(code < kNumberOfZRegisters);
825   return zreg_names[code];
826 }
827 
828 
829 const char* Simulator::PRegNameForCode(unsigned code) {
830   VIXL_ASSERT(code < kNumberOfPRegisters);
831   return preg_names[code];
832 }
833 
834 SimVRegister Simulator::ExpandToSimVRegister(const SimPRegister& pg) {
835   SimVRegister ones, result;
836   dup_immediate(kFormatVnB, ones, 0xff);
837   mov_zeroing(kFormatVnB, result, pg, ones);
838   return result;
839 }
840 
841 void Simulator::ExtractFromSimVRegister(VectorFormat vform,
842                                         SimPRegister& pd,
843                                         SimVRegister vreg) {
844   SimVRegister zero;
845   dup_immediate(kFormatVnB, zero, 0);
846   SVEIntCompareVectorsHelper(ne,
847                              vform,
848                              pd,
849                              GetPTrue(),
850                              vreg,
851                              zero,
852                              false,
853                              LeaveFlags);
854 }
855 
856 #define COLOUR(colour_code) "\033[0;" colour_code "m"
857 #define COLOUR_BOLD(colour_code) "\033[1;" colour_code "m"
858 #define COLOUR_HIGHLIGHT "\033[43m"
859 #define NORMAL ""
860 #define GREY "30"
861 #define RED "31"
862 #define GREEN "32"
863 #define YELLOW "33"
864 #define BLUE "34"
865 #define MAGENTA "35"
866 #define CYAN "36"
867 #define WHITE "37"
868 void Simulator::SetColouredTrace(bool value) {
869   coloured_trace_ = value;
870 
871   clr_normal = value ? COLOUR(NORMAL) : "";
872   clr_flag_name = value ? COLOUR_BOLD(WHITE) : "";
873   clr_flag_value = value ? COLOUR(NORMAL) : "";
874   clr_reg_name = value ? COLOUR_BOLD(CYAN) : "";
875   clr_reg_value = value ? COLOUR(CYAN) : "";
876   clr_vreg_name = value ? COLOUR_BOLD(MAGENTA) : "";
877   clr_vreg_value = value ? COLOUR(MAGENTA) : "";
878   clr_preg_name = value ? COLOUR_BOLD(GREEN) : "";
879   clr_preg_value = value ? COLOUR(GREEN) : "";
880   clr_memory_address = value ? COLOUR_BOLD(BLUE) : "";
881   clr_warning = value ? COLOUR_BOLD(YELLOW) : "";
882   clr_warning_message = value ? COLOUR(YELLOW) : "";
883   clr_printf = value ? COLOUR(GREEN) : "";
884   clr_branch_marker = value ? COLOUR(GREY) COLOUR_HIGHLIGHT : "";
885 
886   if (value) {
887     print_disasm_->SetCPUFeaturesPrefix("// Needs: " COLOUR_BOLD(RED));
888     print_disasm_->SetCPUFeaturesSuffix(COLOUR(NORMAL));
889   } else {
890     print_disasm_->SetCPUFeaturesPrefix("// Needs: ");
891     print_disasm_->SetCPUFeaturesSuffix("");
892   }
893 }
894 
895 
896 void Simulator::SetTraceParameters(int parameters) {
897   bool disasm_before = trace_parameters_ & LOG_DISASM;
898   trace_parameters_ = parameters;
899   bool disasm_after = trace_parameters_ & LOG_DISASM;
900 
901   if (disasm_before != disasm_after) {
902     if (disasm_after) {
903       decoder_->InsertVisitorBefore(print_disasm_, this);
904     } else {
905       decoder_->RemoveVisitor(print_disasm_);
906     }
907   }
908 }
909 
910 // Helpers ---------------------------------------------------------------------
911 uint64_t Simulator::AddWithCarry(unsigned reg_size,
912                                  bool set_flags,
913                                  uint64_t left,
914                                  uint64_t right,
915                                  int carry_in) {
916   std::pair<uint64_t, uint8_t> result_and_flags =
917       AddWithCarry(reg_size, left, right, carry_in);
918   if (set_flags) {
919     uint8_t flags = result_and_flags.second;
920     ReadNzcv().SetN((flags >> 3) & 1);
921     ReadNzcv().SetZ((flags >> 2) & 1);
922     ReadNzcv().SetC((flags >> 1) & 1);
923     ReadNzcv().SetV((flags >> 0) & 1);
924     LogSystemRegister(NZCV);
925   }
926   return result_and_flags.first;
927 }
928 
929 std::pair<uint64_t, uint8_t> Simulator::AddWithCarry(unsigned reg_size,
930                                                      uint64_t left,
931                                                      uint64_t right,
932                                                      int carry_in) {
933   VIXL_ASSERT((carry_in == 0) || (carry_in == 1));
934   VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize));
935 
936   uint64_t max_uint = (reg_size == kWRegSize) ? kWMaxUInt : kXMaxUInt;
937   uint64_t reg_mask = (reg_size == kWRegSize) ? kWRegMask : kXRegMask;
938   uint64_t sign_mask = (reg_size == kWRegSize) ? kWSignMask : kXSignMask;
939 
940   left &= reg_mask;
941   right &= reg_mask;
942   uint64_t result = (left + right + carry_in) & reg_mask;
943 
944   // NZCV bits, ordered N in bit 3 to V in bit 0.
945   uint8_t nzcv = CalcNFlag(result, reg_size) ? 8 : 0;
946   nzcv |= CalcZFlag(result) ? 4 : 0;
947 
948   // Compute the C flag by comparing the result to the max unsigned integer.
949   uint64_t max_uint_2op = max_uint - carry_in;
950   bool C = (left > max_uint_2op) || ((max_uint_2op - left) < right);
951   nzcv |= C ? 2 : 0;
952 
953   // Overflow iff the sign bit is the same for the two inputs and different
954   // for the result.
955   uint64_t left_sign = left & sign_mask;
956   uint64_t right_sign = right & sign_mask;
957   uint64_t result_sign = result & sign_mask;
958   bool V = (left_sign == right_sign) && (left_sign != result_sign);
959   nzcv |= V ? 1 : 0;
960 
961   return std::make_pair(result, nzcv);
962 }
963 
964 using vixl_uint128_t = std::pair<uint64_t, uint64_t>;
965 
966 vixl_uint128_t Simulator::Add128(vixl_uint128_t x, vixl_uint128_t y) {
967   std::pair<uint64_t, uint8_t> sum_lo =
968       AddWithCarry(kXRegSize, x.second, y.second, 0);
969   int carry_in = (sum_lo.second & 0x2) >> 1;  // C flag in NZCV result.
970   std::pair<uint64_t, uint8_t> sum_hi =
971       AddWithCarry(kXRegSize, x.first, y.first, carry_in);
972   return std::make_pair(sum_hi.first, sum_lo.first);
973 }
974 
975 vixl_uint128_t Simulator::Neg128(vixl_uint128_t x) {
976   // Negate the integer value. Throw an assertion when the input is INT128_MIN.
977   VIXL_ASSERT((x.first != GetSignMask(64)) || (x.second != 0));
978   x.first = ~x.first;
979   x.second = ~x.second;
980   return Add128(x, {0, 1});
981 }
982 
983 vixl_uint128_t Simulator::Mul64(uint64_t x, uint64_t y) {
984   bool neg_result = false;
985   if ((x >> 63) == 1) {
986     x = -x;
987     neg_result = !neg_result;
988   }
989   if ((y >> 63) == 1) {
990     y = -y;
991     neg_result = !neg_result;
992   }
993 
994   uint64_t x_lo = x & 0xffffffff;
995   uint64_t x_hi = x >> 32;
996   uint64_t y_lo = y & 0xffffffff;
997   uint64_t y_hi = y >> 32;
998 
999   uint64_t t1 = x_lo * y_hi;
1000   uint64_t t2 = x_hi * y_lo;
1001   vixl_uint128_t a = std::make_pair(0, x_lo * y_lo);
1002   vixl_uint128_t b = std::make_pair(t1 >> 32, t1 << 32);
1003   vixl_uint128_t c = std::make_pair(t2 >> 32, t2 << 32);
1004   vixl_uint128_t d = std::make_pair(x_hi * y_hi, 0);
1005 
1006   vixl_uint128_t result = Add128(a, b);
1007   result = Add128(result, c);
1008   result = Add128(result, d);
1009   return neg_result ? std::make_pair(-result.first - 1, -result.second)
1010                     : result;
1011 }
1012 
1013 int64_t Simulator::ShiftOperand(unsigned reg_size,
1014                                 uint64_t uvalue,
1015                                 Shift shift_type,
1016                                 unsigned amount) const {
1017   VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) ||
1018               (reg_size == kSRegSize) || (reg_size == kDRegSize));
1019   if (amount > 0) {
1020     uint64_t mask = GetUintMask(reg_size);
1021     bool is_negative = (uvalue & GetSignMask(reg_size)) != 0;
1022     // The behavior is undefined in c++ if the shift amount greater than or
1023     // equal to the register lane size. Work out the shifted result based on
1024     // architectural behavior before performing the c++ type shift operations.
1025     switch (shift_type) {
1026       case LSL:
1027         if (amount >= reg_size) {
1028           return UINT64_C(0);
1029         }
1030         uvalue <<= amount;
1031         break;
1032       case LSR:
1033         if (amount >= reg_size) {
1034           return UINT64_C(0);
1035         }
1036         uvalue >>= amount;
1037         break;
1038       case ASR:
1039         if (amount >= reg_size) {
1040           return is_negative ? ~UINT64_C(0) : UINT64_C(0);
1041         }
1042         uvalue >>= amount;
1043         if (is_negative) {
1044           // Simulate sign-extension to 64 bits.
1045           uvalue |= ~UINT64_C(0) << (reg_size - amount);
1046         }
1047         break;
1048       case ROR: {
1049         uvalue = RotateRight(uvalue, amount, reg_size);
1050         break;
1051       }
1052       default:
1053         VIXL_UNIMPLEMENTED();
1054         return 0;
1055     }
1056     uvalue &= mask;
1057   }
1058 
1059   int64_t result;
1060   memcpy(&result, &uvalue, sizeof(result));
1061   return result;
1062 }
1063 
1064 
1065 int64_t Simulator::ExtendValue(unsigned reg_size,
1066                                int64_t value,
1067                                Extend extend_type,
1068                                unsigned left_shift) const {
1069   switch (extend_type) {
1070     case UXTB:
1071       value &= kByteMask;
1072       break;
1073     case UXTH:
1074       value &= kHalfWordMask;
1075       break;
1076     case UXTW:
1077       value &= kWordMask;
1078       break;
1079     case SXTB:
1080       value &= kByteMask;
1081       if ((value & 0x80) != 0) {
1082         value |= ~UINT64_C(0) << 8;
1083       }
1084       break;
1085     case SXTH:
1086       value &= kHalfWordMask;
1087       if ((value & 0x8000) != 0) {
1088         value |= ~UINT64_C(0) << 16;
1089       }
1090       break;
1091     case SXTW:
1092       value &= kWordMask;
1093       if ((value & 0x80000000) != 0) {
1094         value |= ~UINT64_C(0) << 32;
1095       }
1096       break;
1097     case UXTX:
1098     case SXTX:
1099       break;
1100     default:
1101       VIXL_UNREACHABLE();
1102   }
1103   return ShiftOperand(reg_size, value, LSL, left_shift);
1104 }
1105 
1106 
1107 void Simulator::FPCompare(double val0, double val1, FPTrapFlags trap) {
1108   AssertSupportedFPCR();
1109 
1110   // TODO: This assumes that the C++ implementation handles comparisons in the
1111   // way that we expect (as per AssertSupportedFPCR()).
1112   bool process_exception = false;
1113   if ((IsNaN(val0) != 0) || (IsNaN(val1) != 0)) {
1114     ReadNzcv().SetRawValue(FPUnorderedFlag);
1115     if (IsSignallingNaN(val0) || IsSignallingNaN(val1) ||
1116         (trap == EnableTrap)) {
1117       process_exception = true;
1118     }
1119   } else if (val0 < val1) {
1120     ReadNzcv().SetRawValue(FPLessThanFlag);
1121   } else if (val0 > val1) {
1122     ReadNzcv().SetRawValue(FPGreaterThanFlag);
1123   } else if (val0 == val1) {
1124     ReadNzcv().SetRawValue(FPEqualFlag);
1125   } else {
1126     VIXL_UNREACHABLE();
1127   }
1128   LogSystemRegister(NZCV);
1129   if (process_exception) FPProcessException();
1130 }
1131 
1132 
1133 uint64_t Simulator::ComputeMemOperandAddress(const MemOperand& mem_op) const {
1134   VIXL_ASSERT(mem_op.IsValid());
1135   int64_t base = ReadRegister<int64_t>(mem_op.GetBaseRegister());
1136   if (mem_op.IsImmediateOffset()) {
1137     return base + mem_op.GetOffset();
1138   } else {
1139     VIXL_ASSERT(mem_op.GetRegisterOffset().IsValid());
1140     int64_t offset = ReadRegister<int64_t>(mem_op.GetRegisterOffset());
1141     unsigned shift_amount = mem_op.GetShiftAmount();
1142     if (mem_op.GetShift() != NO_SHIFT) {
1143       offset = ShiftOperand(kXRegSize, offset, mem_op.GetShift(), shift_amount);
1144     }
1145     if (mem_op.GetExtend() != NO_EXTEND) {
1146       offset = ExtendValue(kXRegSize, offset, mem_op.GetExtend(), shift_amount);
1147     }
1148     return static_cast<uint64_t>(base + offset);
1149   }
1150 }
1151 
1152 
1153 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatForSize(
1154     unsigned reg_size, unsigned lane_size) {
1155   VIXL_ASSERT(reg_size >= lane_size);
1156 
1157   uint32_t format = 0;
1158   if (reg_size != lane_size) {
1159     switch (reg_size) {
1160       default:
1161         VIXL_UNREACHABLE();
1162         break;
1163       case kQRegSizeInBytes:
1164         format = kPrintRegAsQVector;
1165         break;
1166       case kDRegSizeInBytes:
1167         format = kPrintRegAsDVector;
1168         break;
1169     }
1170   }
1171 
1172   switch (lane_size) {
1173     default:
1174       VIXL_UNREACHABLE();
1175       break;
1176     case kQRegSizeInBytes:
1177       format |= kPrintReg1Q;
1178       break;
1179     case kDRegSizeInBytes:
1180       format |= kPrintReg1D;
1181       break;
1182     case kSRegSizeInBytes:
1183       format |= kPrintReg1S;
1184       break;
1185     case kHRegSizeInBytes:
1186       format |= kPrintReg1H;
1187       break;
1188     case kBRegSizeInBytes:
1189       format |= kPrintReg1B;
1190       break;
1191   }
1192   // These sizes would be duplicate case labels.
1193   VIXL_STATIC_ASSERT(kXRegSizeInBytes == kDRegSizeInBytes);
1194   VIXL_STATIC_ASSERT(kWRegSizeInBytes == kSRegSizeInBytes);
1195   VIXL_STATIC_ASSERT(kPrintXReg == kPrintReg1D);
1196   VIXL_STATIC_ASSERT(kPrintWReg == kPrintReg1S);
1197 
1198   return static_cast<PrintRegisterFormat>(format);
1199 }
1200 
1201 
1202 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat(
1203     VectorFormat vform) {
1204   switch (vform) {
1205     default:
1206       VIXL_UNREACHABLE();
1207       return kPrintReg16B;
1208     case kFormat16B:
1209       return kPrintReg16B;
1210     case kFormat8B:
1211       return kPrintReg8B;
1212     case kFormat8H:
1213       return kPrintReg8H;
1214     case kFormat4H:
1215       return kPrintReg4H;
1216     case kFormat4S:
1217       return kPrintReg4S;
1218     case kFormat2S:
1219       return kPrintReg2S;
1220     case kFormat2D:
1221       return kPrintReg2D;
1222     case kFormat1D:
1223       return kPrintReg1D;
1224 
1225     case kFormatB:
1226       return kPrintReg1B;
1227     case kFormatH:
1228       return kPrintReg1H;
1229     case kFormatS:
1230       return kPrintReg1S;
1231     case kFormatD:
1232       return kPrintReg1D;
1233 
1234     case kFormatVnB:
1235       return kPrintRegVnB;
1236     case kFormatVnH:
1237       return kPrintRegVnH;
1238     case kFormatVnS:
1239       return kPrintRegVnS;
1240     case kFormatVnD:
1241       return kPrintRegVnD;
1242   }
1243 }
1244 
1245 
1246 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatFP(
1247     VectorFormat vform) {
1248   switch (vform) {
1249     default:
1250       VIXL_UNREACHABLE();
1251       return kPrintReg16B;
1252     case kFormat8H:
1253       return kPrintReg8HFP;
1254     case kFormat4H:
1255       return kPrintReg4HFP;
1256     case kFormat4S:
1257       return kPrintReg4SFP;
1258     case kFormat2S:
1259       return kPrintReg2SFP;
1260     case kFormat2D:
1261       return kPrintReg2DFP;
1262     case kFormat1D:
1263       return kPrintReg1DFP;
1264     case kFormatH:
1265       return kPrintReg1HFP;
1266     case kFormatS:
1267       return kPrintReg1SFP;
1268     case kFormatD:
1269       return kPrintReg1DFP;
1270   }
1271 }
1272 
1273 void Simulator::PrintRegisters() {
1274   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
1275     if (i == kSpRegCode) i = kSPRegInternalCode;
1276     PrintRegister(i);
1277   }
1278 }
1279 
1280 void Simulator::PrintVRegisters() {
1281   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
1282     PrintVRegister(i);
1283   }
1284 }
1285 
1286 void Simulator::PrintZRegisters() {
1287   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
1288     PrintZRegister(i);
1289   }
1290 }
1291 
1292 void Simulator::PrintWrittenRegisters() {
1293   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
1294     if (registers_[i].WrittenSinceLastLog()) {
1295       if (i == kSpRegCode) i = kSPRegInternalCode;
1296       PrintRegister(i);
1297     }
1298   }
1299 }
1300 
1301 void Simulator::PrintWrittenVRegisters() {
1302   bool has_sve = GetCPUFeatures()->Has(CPUFeatures::kSVE);
1303   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
1304     if (vregisters_[i].WrittenSinceLastLog()) {
1305       // Z registers are initialised in the constructor before the user can
1306       // configure the CPU features, so we must also check for SVE here.
1307       if (vregisters_[i].AccessedAsZSinceLastLog() && has_sve) {
1308         PrintZRegister(i);
1309       } else {
1310         PrintVRegister(i);
1311       }
1312     }
1313   }
1314 }
1315 
1316 void Simulator::PrintWrittenPRegisters() {
1317   // P registers are initialised in the constructor before the user can
1318   // configure the CPU features, so we must check for SVE here.
1319   if (!GetCPUFeatures()->Has(CPUFeatures::kSVE)) return;
1320   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
1321     if (pregisters_[i].WrittenSinceLastLog()) {
1322       PrintPRegister(i);
1323     }
1324   }
1325   if (ReadFFR().WrittenSinceLastLog()) PrintFFR();
1326 }
1327 
1328 void Simulator::PrintSystemRegisters() {
1329   PrintSystemRegister(NZCV);
1330   PrintSystemRegister(FPCR);
1331 }
1332 
1333 void Simulator::PrintRegisterValue(const uint8_t* value,
1334                                    int value_size,
1335                                    PrintRegisterFormat format) {
1336   int print_width = GetPrintRegSizeInBytes(format);
1337   VIXL_ASSERT(print_width <= value_size);
1338   for (int i = value_size - 1; i >= print_width; i--) {
1339     // Pad with spaces so that values align vertically.
1340     fprintf(stream_, "  ");
1341     // If we aren't explicitly printing a partial value, ensure that the
1342     // unprinted bits are zero.
1343     VIXL_ASSERT(((format & kPrintRegPartial) != 0) || (value[i] == 0));
1344   }
1345   fprintf(stream_, "0x");
1346   for (int i = print_width - 1; i >= 0; i--) {
1347     fprintf(stream_, "%02x", value[i]);
1348   }
1349 }
1350 
1351 void Simulator::PrintRegisterValueFPAnnotations(const uint8_t* value,
1352                                                 uint16_t lane_mask,
1353                                                 PrintRegisterFormat format) {
1354   VIXL_ASSERT((format & kPrintRegAsFP) != 0);
1355   int lane_size = GetPrintRegLaneSizeInBytes(format);
1356   fprintf(stream_, " (");
1357   bool last_inactive = false;
1358   const char* sep = "";
1359   for (int i = GetPrintRegLaneCount(format) - 1; i >= 0; i--, sep = ", ") {
1360     bool access = (lane_mask & (1 << (i * lane_size))) != 0;
1361     if (access) {
1362       // Read the lane as a double, so we can format all FP types in the same
1363       // way. We squash NaNs, and a double can exactly represent any other value
1364       // that the smaller types can represent, so this is lossless.
1365       double element;
1366       switch (lane_size) {
1367         case kHRegSizeInBytes: {
1368           Float16 element_fp16;
1369           VIXL_STATIC_ASSERT(sizeof(element_fp16) == kHRegSizeInBytes);
1370           memcpy(&element_fp16, &value[i * lane_size], sizeof(element_fp16));
1371           element = FPToDouble(element_fp16, kUseDefaultNaN);
1372           break;
1373         }
1374         case kSRegSizeInBytes: {
1375           float element_fp32;
1376           memcpy(&element_fp32, &value[i * lane_size], sizeof(element_fp32));
1377           element = static_cast<double>(element_fp32);
1378           break;
1379         }
1380         case kDRegSizeInBytes: {
1381           memcpy(&element, &value[i * lane_size], sizeof(element));
1382           break;
1383         }
1384         default:
1385           VIXL_UNREACHABLE();
1386           fprintf(stream_, "{UnknownFPValue}");
1387           continue;
1388       }
1389       if (IsNaN(element)) {
1390         // The fprintf behaviour for NaNs is implementation-defined. Always
1391         // print "nan", so that traces are consistent.
1392         fprintf(stream_, "%s%snan%s", sep, clr_vreg_value, clr_normal);
1393       } else {
1394         fprintf(stream_,
1395                 "%s%s%#.4g%s",
1396                 sep,
1397                 clr_vreg_value,
1398                 element,
1399                 clr_normal);
1400       }
1401       last_inactive = false;
1402     } else if (!last_inactive) {
1403       // Replace each contiguous sequence of inactive lanes with "...".
1404       fprintf(stream_, "%s...", sep);
1405       last_inactive = true;
1406     }
1407   }
1408   fprintf(stream_, ")");
1409 }
1410 
1411 void Simulator::PrintRegister(int code,
1412                               PrintRegisterFormat format,
1413                               const char* suffix) {
1414   VIXL_ASSERT((static_cast<unsigned>(code) < kNumberOfRegisters) ||
1415               (static_cast<unsigned>(code) == kSPRegInternalCode));
1416   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsScalar);
1417   VIXL_ASSERT((format & kPrintRegAsFP) == 0);
1418 
1419   SimRegister* reg;
1420   SimRegister zero;
1421   if (code == kZeroRegCode) {
1422     reg = &zero;
1423   } else {
1424     // registers_[31] holds the SP.
1425     VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31);
1426     reg = &registers_[code % kNumberOfRegisters];
1427   }
1428 
1429   // We trace register writes as whole register values, implying that any
1430   // unprinted bits are all zero:
1431   //   "#       x{code}: 0x{-----value----}"
1432   //   "#       w{code}:         0x{-value}"
1433   // Stores trace partial register values, implying nothing about the unprinted
1434   // bits:
1435   //   "# x{code}<63:0>: 0x{-----value----}"
1436   //   "# x{code}<31:0>:         0x{-value}"
1437   //   "# x{code}<15:0>:             0x{--}"
1438   //   "#  x{code}<7:0>:               0x{}"
1439 
1440   bool is_partial = (format & kPrintRegPartial) != 0;
1441   unsigned print_reg_size = GetPrintRegSizeInBits(format);
1442   std::stringstream name;
1443   if (is_partial) {
1444     name << XRegNameForCode(code) << GetPartialRegSuffix(format);
1445   } else {
1446     // Notify the register that it has been logged, but only if we're printing
1447     // all of it.
1448     reg->NotifyRegisterLogged();
1449     switch (print_reg_size) {
1450       case kWRegSize:
1451         name << WRegNameForCode(code);
1452         break;
1453       case kXRegSize:
1454         name << XRegNameForCode(code);
1455         break;
1456       default:
1457         VIXL_UNREACHABLE();
1458         return;
1459     }
1460   }
1461 
1462   fprintf(stream_,
1463           "# %s%*s: %s",
1464           clr_reg_name,
1465           kPrintRegisterNameFieldWidth,
1466           name.str().c_str(),
1467           clr_reg_value);
1468   PrintRegisterValue(*reg, format);
1469   fprintf(stream_, "%s%s", clr_normal, suffix);
1470 }
1471 
1472 void Simulator::PrintVRegister(int code,
1473                                PrintRegisterFormat format,
1474                                const char* suffix) {
1475   VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfVRegisters);
1476   VIXL_ASSERT(((format & kPrintRegAsVectorMask) == kPrintRegAsScalar) ||
1477               ((format & kPrintRegAsVectorMask) == kPrintRegAsDVector) ||
1478               ((format & kPrintRegAsVectorMask) == kPrintRegAsQVector));
1479 
1480   // We trace register writes as whole register values, implying that any
1481   // unprinted bits are all zero:
1482   //   "#        v{code}: 0x{-------------value------------}"
1483   //   "#        d{code}:                 0x{-----value----}"
1484   //   "#        s{code}:                         0x{-value}"
1485   //   "#        h{code}:                             0x{--}"
1486   //   "#        b{code}:                               0x{}"
1487   // Stores trace partial register values, implying nothing about the unprinted
1488   // bits:
1489   //   "# v{code}<127:0>: 0x{-------------value------------}"
1490   //   "#  v{code}<63:0>:                 0x{-----value----}"
1491   //   "#  v{code}<31:0>:                         0x{-value}"
1492   //   "#  v{code}<15:0>:                             0x{--}"
1493   //   "#   v{code}<7:0>:                               0x{}"
1494 
1495   bool is_partial = ((format & kPrintRegPartial) != 0);
1496   std::stringstream name;
1497   unsigned print_reg_size = GetPrintRegSizeInBits(format);
1498   if (is_partial) {
1499     name << VRegNameForCode(code) << GetPartialRegSuffix(format);
1500   } else {
1501     // Notify the register that it has been logged, but only if we're printing
1502     // all of it.
1503     vregisters_[code].NotifyRegisterLogged();
1504     switch (print_reg_size) {
1505       case kBRegSize:
1506         name << BRegNameForCode(code);
1507         break;
1508       case kHRegSize:
1509         name << HRegNameForCode(code);
1510         break;
1511       case kSRegSize:
1512         name << SRegNameForCode(code);
1513         break;
1514       case kDRegSize:
1515         name << DRegNameForCode(code);
1516         break;
1517       case kQRegSize:
1518         name << VRegNameForCode(code);
1519         break;
1520       default:
1521         VIXL_UNREACHABLE();
1522         return;
1523     }
1524   }
1525 
1526   fprintf(stream_,
1527           "# %s%*s: %s",
1528           clr_vreg_name,
1529           kPrintRegisterNameFieldWidth,
1530           name.str().c_str(),
1531           clr_vreg_value);
1532   PrintRegisterValue(vregisters_[code], format);
1533   fprintf(stream_, "%s", clr_normal);
1534   if ((format & kPrintRegAsFP) != 0) {
1535     PrintRegisterValueFPAnnotations(vregisters_[code], format);
1536   }
1537   fprintf(stream_, "%s", suffix);
1538 }
1539 
1540 void Simulator::PrintVRegistersForStructuredAccess(int rt_code,
1541                                                    int reg_count,
1542                                                    uint16_t focus_mask,
1543                                                    PrintRegisterFormat format) {
1544   bool print_fp = (format & kPrintRegAsFP) != 0;
1545   // Suppress FP formatting, so we can specify the lanes we're interested in.
1546   PrintRegisterFormat format_no_fp =
1547       static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP);
1548 
1549   for (int r = 0; r < reg_count; r++) {
1550     int code = (rt_code + r) % kNumberOfVRegisters;
1551     PrintVRegister(code, format_no_fp, "");
1552     if (print_fp) {
1553       PrintRegisterValueFPAnnotations(vregisters_[code], focus_mask, format);
1554     }
1555     fprintf(stream_, "\n");
1556   }
1557 }
1558 
1559 void Simulator::PrintZRegistersForStructuredAccess(int rt_code,
1560                                                    int q_index,
1561                                                    int reg_count,
1562                                                    uint16_t focus_mask,
1563                                                    PrintRegisterFormat format) {
1564   bool print_fp = (format & kPrintRegAsFP) != 0;
1565   // Suppress FP formatting, so we can specify the lanes we're interested in.
1566   PrintRegisterFormat format_no_fp =
1567       static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP);
1568 
1569   PrintRegisterFormat format_q = GetPrintRegAsQChunkOfSVE(format);
1570 
1571   const unsigned size = kQRegSizeInBytes;
1572   unsigned byte_index = q_index * size;
1573   const uint8_t* value = vregisters_[rt_code].GetBytes() + byte_index;
1574   VIXL_ASSERT((byte_index + size) <= vregisters_[rt_code].GetSizeInBytes());
1575 
1576   for (int r = 0; r < reg_count; r++) {
1577     int code = (rt_code + r) % kNumberOfZRegisters;
1578     PrintPartialZRegister(code, q_index, format_no_fp, "");
1579     if (print_fp) {
1580       PrintRegisterValueFPAnnotations(value, focus_mask, format_q);
1581     }
1582     fprintf(stream_, "\n");
1583   }
1584 }
1585 
1586 void Simulator::PrintZRegister(int code, PrintRegisterFormat format) {
1587   // We're going to print the register in parts, so force a partial format.
1588   format = GetPrintRegPartial(format);
1589   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1590   int vl = GetVectorLengthInBits();
1591   VIXL_ASSERT((vl % kQRegSize) == 0);
1592   for (unsigned i = 0; i < (vl / kQRegSize); i++) {
1593     PrintPartialZRegister(code, i, format);
1594   }
1595   vregisters_[code].NotifyRegisterLogged();
1596 }
1597 
1598 void Simulator::PrintPRegister(int code, PrintRegisterFormat format) {
1599   // We're going to print the register in parts, so force a partial format.
1600   format = GetPrintRegPartial(format);
1601   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1602   int vl = GetVectorLengthInBits();
1603   VIXL_ASSERT((vl % kQRegSize) == 0);
1604   for (unsigned i = 0; i < (vl / kQRegSize); i++) {
1605     PrintPartialPRegister(code, i, format);
1606   }
1607   pregisters_[code].NotifyRegisterLogged();
1608 }
1609 
1610 void Simulator::PrintFFR(PrintRegisterFormat format) {
1611   // We're going to print the register in parts, so force a partial format.
1612   format = GetPrintRegPartial(format);
1613   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1614   int vl = GetVectorLengthInBits();
1615   VIXL_ASSERT((vl % kQRegSize) == 0);
1616   SimPRegister& ffr = ReadFFR();
1617   for (unsigned i = 0; i < (vl / kQRegSize); i++) {
1618     PrintPartialPRegister("FFR", ffr, i, format);
1619   }
1620   ffr.NotifyRegisterLogged();
1621 }
1622 
1623 void Simulator::PrintPartialZRegister(int code,
1624                                       int q_index,
1625                                       PrintRegisterFormat format,
1626                                       const char* suffix) {
1627   VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfZRegisters);
1628   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1629   VIXL_ASSERT((format & kPrintRegPartial) != 0);
1630   VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits());
1631 
1632   // We _only_ trace partial Z register values in Q-sized chunks, because
1633   // they're often too large to reasonably fit on a single line. Each line
1634   // implies nothing about the unprinted bits.
1635   //   "# z{code}<127:0>: 0x{-------------value------------}"
1636 
1637   format = GetPrintRegAsQChunkOfSVE(format);
1638 
1639   const unsigned size = kQRegSizeInBytes;
1640   unsigned byte_index = q_index * size;
1641   const uint8_t* value = vregisters_[code].GetBytes() + byte_index;
1642   VIXL_ASSERT((byte_index + size) <= vregisters_[code].GetSizeInBytes());
1643 
1644   int lsb = q_index * kQRegSize;
1645   int msb = lsb + kQRegSize - 1;
1646   std::stringstream name;
1647   name << ZRegNameForCode(code) << '<' << msb << ':' << lsb << '>';
1648 
1649   fprintf(stream_,
1650           "# %s%*s: %s",
1651           clr_vreg_name,
1652           kPrintRegisterNameFieldWidth,
1653           name.str().c_str(),
1654           clr_vreg_value);
1655   PrintRegisterValue(value, size, format);
1656   fprintf(stream_, "%s", clr_normal);
1657   if ((format & kPrintRegAsFP) != 0) {
1658     PrintRegisterValueFPAnnotations(value, GetPrintRegLaneMask(format), format);
1659   }
1660   fprintf(stream_, "%s", suffix);
1661 }
1662 
1663 void Simulator::PrintPartialPRegister(const char* name,
1664                                       const SimPRegister& reg,
1665                                       int q_index,
1666                                       PrintRegisterFormat format,
1667                                       const char* suffix) {
1668   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1669   VIXL_ASSERT((format & kPrintRegPartial) != 0);
1670   VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits());
1671 
1672   // We don't currently use the format for anything here.
1673   USE(format);
1674 
1675   // We _only_ trace partial P register values, because they're often too large
1676   // to reasonably fit on a single line. Each line implies nothing about the
1677   // unprinted bits.
1678   //
1679   // We print values in binary, with spaces between each bit, in order for the
1680   // bits to align with the Z register bytes that they predicate.
1681   //   "# {name}<15:0>: 0b{-------------value------------}"
1682 
1683   int print_size_in_bits = kQRegSize / kZRegBitsPerPRegBit;
1684   int lsb = q_index * print_size_in_bits;
1685   int msb = lsb + print_size_in_bits - 1;
1686   std::stringstream prefix;
1687   prefix << name << '<' << msb << ':' << lsb << '>';
1688 
1689   fprintf(stream_,
1690           "# %s%*s: %s0b",
1691           clr_preg_name,
1692           kPrintRegisterNameFieldWidth,
1693           prefix.str().c_str(),
1694           clr_preg_value);
1695   for (int i = msb; i >= lsb; i--) {
1696     fprintf(stream_, " %c", reg.GetBit(i) ? '1' : '0');
1697   }
1698   fprintf(stream_, "%s%s", clr_normal, suffix);
1699 }
1700 
1701 void Simulator::PrintPartialPRegister(int code,
1702                                       int q_index,
1703                                       PrintRegisterFormat format,
1704                                       const char* suffix) {
1705   VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfPRegisters);
1706   PrintPartialPRegister(PRegNameForCode(code),
1707                         pregisters_[code],
1708                         q_index,
1709                         format,
1710                         suffix);
1711 }
1712 
1713 void Simulator::PrintSystemRegister(SystemRegister id) {
1714   switch (id) {
1715     case NZCV:
1716       fprintf(stream_,
1717               "# %sNZCV: %sN:%d Z:%d C:%d V:%d%s\n",
1718               clr_flag_name,
1719               clr_flag_value,
1720               ReadNzcv().GetN(),
1721               ReadNzcv().GetZ(),
1722               ReadNzcv().GetC(),
1723               ReadNzcv().GetV(),
1724               clr_normal);
1725       break;
1726     case FPCR: {
1727       static const char* rmode[] = {"0b00 (Round to Nearest)",
1728                                     "0b01 (Round towards Plus Infinity)",
1729                                     "0b10 (Round towards Minus Infinity)",
1730                                     "0b11 (Round towards Zero)"};
1731       VIXL_ASSERT(ReadFpcr().GetRMode() < ArrayLength(rmode));
1732       fprintf(stream_,
1733               "# %sFPCR: %sAHP:%d DN:%d FZ:%d RMode:%s%s\n",
1734               clr_flag_name,
1735               clr_flag_value,
1736               ReadFpcr().GetAHP(),
1737               ReadFpcr().GetDN(),
1738               ReadFpcr().GetFZ(),
1739               rmode[ReadFpcr().GetRMode()],
1740               clr_normal);
1741       break;
1742     }
1743     default:
1744       VIXL_UNREACHABLE();
1745   }
1746 }
1747 
1748 uint16_t Simulator::PrintPartialAccess(uint16_t access_mask,
1749                                        uint16_t future_access_mask,
1750                                        int struct_element_count,
1751                                        int lane_size_in_bytes,
1752                                        const char* op,
1753                                        uintptr_t address,
1754                                        int reg_size_in_bytes) {
1755   // We want to assume that we'll access at least one lane.
1756   VIXL_ASSERT(access_mask != 0);
1757   VIXL_ASSERT((reg_size_in_bytes == kXRegSizeInBytes) ||
1758               (reg_size_in_bytes == kQRegSizeInBytes));
1759   bool started_annotation = false;
1760   // Indent to match the register field, the fixed formatting, and the value
1761   // prefix ("0x"): "# {name}: 0x"
1762   fprintf(stream_, "# %*s    ", kPrintRegisterNameFieldWidth, "");
1763   // First, annotate the lanes (byte by byte).
1764   for (int lane = reg_size_in_bytes - 1; lane >= 0; lane--) {
1765     bool access = (access_mask & (1 << lane)) != 0;
1766     bool future = (future_access_mask & (1 << lane)) != 0;
1767     if (started_annotation) {
1768       // If we've started an annotation, draw a horizontal line in addition to
1769       // any other symbols.
1770       if (access) {
1771         fprintf(stream_, "─╨");
1772       } else if (future) {
1773         fprintf(stream_, "─║");
1774       } else {
1775         fprintf(stream_, "──");
1776       }
1777     } else {
1778       if (access) {
1779         started_annotation = true;
1780         fprintf(stream_, " ╙");
1781       } else if (future) {
1782         fprintf(stream_, " ║");
1783       } else {
1784         fprintf(stream_, "  ");
1785       }
1786     }
1787   }
1788   VIXL_ASSERT(started_annotation);
1789   fprintf(stream_, "─ 0x");
1790   int lane_size_in_nibbles = lane_size_in_bytes * 2;
1791   // Print the most-significant struct element first.
1792   const char* sep = "";
1793   for (int i = struct_element_count - 1; i >= 0; i--) {
1794     int offset = lane_size_in_bytes * i;
1795     uint64_t nibble = MemReadUint(lane_size_in_bytes, address + offset);
1796     fprintf(stream_, "%s%0*" PRIx64, sep, lane_size_in_nibbles, nibble);
1797     sep = "'";
1798   }
1799   fprintf(stream_,
1800           " %s %s0x%016" PRIxPTR "%s\n",
1801           op,
1802           clr_memory_address,
1803           address,
1804           clr_normal);
1805   return future_access_mask & ~access_mask;
1806 }
1807 
1808 void Simulator::PrintAccess(int code,
1809                             PrintRegisterFormat format,
1810                             const char* op,
1811                             uintptr_t address) {
1812   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
1813   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1814   if ((format & kPrintRegPartial) == 0) {
1815     if (code != kZeroRegCode) {
1816       registers_[code].NotifyRegisterLogged();
1817     }
1818   }
1819   // Scalar-format accesses use a simple format:
1820   //   "# {reg}: 0x{value} -> {address}"
1821 
1822   // Suppress the newline, so the access annotation goes on the same line.
1823   PrintRegister(code, format, "");
1824   fprintf(stream_,
1825           " %s %s0x%016" PRIxPTR "%s\n",
1826           op,
1827           clr_memory_address,
1828           address,
1829           clr_normal);
1830 }
1831 
1832 void Simulator::PrintVAccess(int code,
1833                              PrintRegisterFormat format,
1834                              const char* op,
1835                              uintptr_t address) {
1836   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1837 
1838   // Scalar-format accesses use a simple format:
1839   //   "# v{code}: 0x{value} -> {address}"
1840 
1841   // Suppress the newline, so the access annotation goes on the same line.
1842   PrintVRegister(code, format, "");
1843   fprintf(stream_,
1844           " %s %s0x%016" PRIxPTR "%s\n",
1845           op,
1846           clr_memory_address,
1847           address,
1848           clr_normal);
1849 }
1850 
1851 void Simulator::PrintVStructAccess(int rt_code,
1852                                    int reg_count,
1853                                    PrintRegisterFormat format,
1854                                    const char* op,
1855                                    uintptr_t address) {
1856   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1857 
1858   // For example:
1859   //   "# v{code}: 0x{value}"
1860   //   "#     ...: 0x{value}"
1861   //   "#              ║   ╙─ {struct_value} -> {lowest_address}"
1862   //   "#              ╙───── {struct_value} -> {highest_address}"
1863 
1864   uint16_t lane_mask = GetPrintRegLaneMask(format);
1865   PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
1866 
1867   int reg_size_in_bytes = GetPrintRegSizeInBytes(format);
1868   int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
1869   for (int i = 0; i < reg_size_in_bytes; i += lane_size_in_bytes) {
1870     uint16_t access_mask = 1 << i;
1871     VIXL_ASSERT((lane_mask & access_mask) != 0);
1872     lane_mask = PrintPartialAccess(access_mask,
1873                                    lane_mask,
1874                                    reg_count,
1875                                    lane_size_in_bytes,
1876                                    op,
1877                                    address + (i * reg_count));
1878   }
1879 }
1880 
1881 void Simulator::PrintVSingleStructAccess(int rt_code,
1882                                          int reg_count,
1883                                          int lane,
1884                                          PrintRegisterFormat format,
1885                                          const char* op,
1886                                          uintptr_t address) {
1887   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1888 
1889   // For example:
1890   //   "# v{code}: 0x{value}"
1891   //   "#     ...: 0x{value}"
1892   //   "#              ╙───── {struct_value} -> {address}"
1893 
1894   int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
1895   uint16_t lane_mask = 1 << (lane * lane_size_in_bytes);
1896   PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
1897   PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address);
1898 }
1899 
1900 void Simulator::PrintVReplicatingStructAccess(int rt_code,
1901                                               int reg_count,
1902                                               PrintRegisterFormat format,
1903                                               const char* op,
1904                                               uintptr_t address) {
1905   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1906 
1907   // For example:
1908   //   "# v{code}: 0x{value}"
1909   //   "#     ...: 0x{value}"
1910   //   "#            ╙─╨─╨─╨─ {struct_value} -> {address}"
1911 
1912   int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
1913   uint16_t lane_mask = GetPrintRegLaneMask(format);
1914   PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
1915   PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address);
1916 }
1917 
1918 void Simulator::PrintZAccess(int rt_code, const char* op, uintptr_t address) {
1919   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1920 
1921   // Scalar-format accesses are split into separate chunks, each of which uses a
1922   // simple format:
1923   //   "#   z{code}<127:0>: 0x{value} -> {address}"
1924   //   "# z{code}<255:128>: 0x{value} -> {address + 16}"
1925   //   "# z{code}<383:256>: 0x{value} -> {address + 32}"
1926   // etc
1927 
1928   int vl = GetVectorLengthInBits();
1929   VIXL_ASSERT((vl % kQRegSize) == 0);
1930   for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
1931     // Suppress the newline, so the access annotation goes on the same line.
1932     PrintPartialZRegister(rt_code, q_index, kPrintRegVnQPartial, "");
1933     fprintf(stream_,
1934             " %s %s0x%016" PRIxPTR "%s\n",
1935             op,
1936             clr_memory_address,
1937             address,
1938             clr_normal);
1939     address += kQRegSizeInBytes;
1940   }
1941 }
1942 
1943 void Simulator::PrintZStructAccess(int rt_code,
1944                                    int reg_count,
1945                                    const LogicPRegister& pg,
1946                                    PrintRegisterFormat format,
1947                                    int msize_in_bytes,
1948                                    const char* op,
1949                                    const LogicSVEAddressVector& addr) {
1950   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1951 
1952   // For example:
1953   //   "# z{code}<255:128>: 0x{value}"
1954   //   "#     ...<255:128>: 0x{value}"
1955   //   "#                       ║   ╙─ {struct_value} -> {first_address}"
1956   //   "#                       ╙───── {struct_value} -> {last_address}"
1957 
1958   // We're going to print the register in parts, so force a partial format.
1959   bool skip_inactive_chunks = (format & kPrintRegPartial) != 0;
1960   format = GetPrintRegPartial(format);
1961 
1962   int esize_in_bytes = GetPrintRegLaneSizeInBytes(format);
1963   int vl = GetVectorLengthInBits();
1964   VIXL_ASSERT((vl % kQRegSize) == 0);
1965   int lanes_per_q = kQRegSizeInBytes / esize_in_bytes;
1966   for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
1967     uint16_t pred =
1968         pg.GetActiveMask<uint16_t>(q_index) & GetPrintRegLaneMask(format);
1969     if ((pred == 0) && skip_inactive_chunks) continue;
1970 
1971     PrintZRegistersForStructuredAccess(rt_code,
1972                                        q_index,
1973                                        reg_count,
1974                                        pred,
1975                                        format);
1976     if (pred == 0) {
1977       // This register chunk has no active lanes. The loop below would print
1978       // nothing, so leave a blank line to keep structures grouped together.
1979       fprintf(stream_, "#\n");
1980       continue;
1981     }
1982     for (int i = 0; i < lanes_per_q; i++) {
1983       uint16_t access = 1 << (i * esize_in_bytes);
1984       int lane = (q_index * lanes_per_q) + i;
1985       // Skip inactive lanes.
1986       if ((pred & access) == 0) continue;
1987       pred = PrintPartialAccess(access,
1988                                 pred,
1989                                 reg_count,
1990                                 msize_in_bytes,
1991                                 op,
1992                                 addr.GetStructAddress(lane));
1993     }
1994   }
1995 
1996   // We print the whole register, even for stores.
1997   for (int i = 0; i < reg_count; i++) {
1998     vregisters_[(rt_code + i) % kNumberOfZRegisters].NotifyRegisterLogged();
1999   }
2000 }
2001 
2002 void Simulator::PrintPAccess(int code, const char* op, uintptr_t address) {
2003   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
2004 
2005   // Scalar-format accesses are split into separate chunks, each of which uses a
2006   // simple format:
2007   //   "#  p{code}<15:0>: 0b{value} -> {address}"
2008   //   "# p{code}<31:16>: 0b{value} -> {address + 2}"
2009   //   "# p{code}<47:32>: 0b{value} -> {address + 4}"
2010   // etc
2011 
2012   int vl = GetVectorLengthInBits();
2013   VIXL_ASSERT((vl % kQRegSize) == 0);
2014   for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
2015     // Suppress the newline, so the access annotation goes on the same line.
2016     PrintPartialPRegister(code, q_index, kPrintRegVnQPartial, "");
2017     fprintf(stream_,
2018             " %s %s0x%016" PRIxPTR "%s\n",
2019             op,
2020             clr_memory_address,
2021             address,
2022             clr_normal);
2023     address += kQRegSizeInBytes;
2024   }
2025 }
2026 
2027 void Simulator::PrintMemTransfer(uintptr_t dst, uintptr_t src, uint8_t value) {
2028   fprintf(stream_,
2029           "#               %s: %s0x%016" PRIxPTR " %s<- %s0x%02x%s",
2030           clr_reg_name,
2031           clr_memory_address,
2032           dst,
2033           clr_normal,
2034           clr_reg_value,
2035           value,
2036           clr_normal);
2037 
2038   fprintf(stream_,
2039           " <- %s0x%016" PRIxPTR "%s\n",
2040           clr_memory_address,
2041           src,
2042           clr_normal);
2043 }
2044 
2045 void Simulator::PrintRead(int rt_code,
2046                           PrintRegisterFormat format,
2047                           uintptr_t address) {
2048   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
2049   if (rt_code != kZeroRegCode) {
2050     registers_[rt_code].NotifyRegisterLogged();
2051   }
2052   PrintAccess(rt_code, format, "<-", address);
2053 }
2054 
2055 void Simulator::PrintExtendingRead(int rt_code,
2056                                    PrintRegisterFormat format,
2057                                    int access_size_in_bytes,
2058                                    uintptr_t address) {
2059   int reg_size_in_bytes = GetPrintRegSizeInBytes(format);
2060   if (access_size_in_bytes == reg_size_in_bytes) {
2061     // There is no extension here, so print a simple load.
2062     PrintRead(rt_code, format, address);
2063     return;
2064   }
2065   VIXL_ASSERT(access_size_in_bytes < reg_size_in_bytes);
2066 
2067   // For sign- and zero-extension, make it clear that the resulting register
2068   // value is different from what is loaded from memory.
2069   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
2070   if (rt_code != kZeroRegCode) {
2071     registers_[rt_code].NotifyRegisterLogged();
2072   }
2073   PrintRegister(rt_code, format);
2074   PrintPartialAccess(1,
2075                      0,
2076                      1,
2077                      access_size_in_bytes,
2078                      "<-",
2079                      address,
2080                      kXRegSizeInBytes);
2081 }
2082 
2083 void Simulator::PrintVRead(int rt_code,
2084                            PrintRegisterFormat format,
2085                            uintptr_t address) {
2086   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
2087   vregisters_[rt_code].NotifyRegisterLogged();
2088   PrintVAccess(rt_code, format, "<-", address);
2089 }
2090 
2091 void Simulator::PrintWrite(int rt_code,
2092                            PrintRegisterFormat format,
2093                            uintptr_t address) {
2094   // Because this trace doesn't represent a change to the source register's
2095   // value, only print the relevant part of the value.
2096   format = GetPrintRegPartial(format);
2097   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
2098   if (rt_code != kZeroRegCode) {
2099     registers_[rt_code].NotifyRegisterLogged();
2100   }
2101   PrintAccess(rt_code, format, "->", address);
2102 }
2103 
2104 void Simulator::PrintVWrite(int rt_code,
2105                             PrintRegisterFormat format,
2106                             uintptr_t address) {
2107   // Because this trace doesn't represent a change to the source register's
2108   // value, only print the relevant part of the value.
2109   format = GetPrintRegPartial(format);
2110   // It only makes sense to write scalar values here. Vectors are handled by
2111   // PrintVStructAccess.
2112   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
2113   PrintVAccess(rt_code, format, "->", address);
2114 }
2115 
2116 void Simulator::PrintTakenBranch(const Instruction* target) {
2117   fprintf(stream_,
2118           "# %sBranch%s to 0x%016" PRIx64 ".\n",
2119           clr_branch_marker,
2120           clr_normal,
2121           reinterpret_cast<uint64_t>(target));
2122 }
2123 
2124 // Visitors---------------------------------------------------------------------
2125 
2126 
2127 void Simulator::Visit(Metadata* metadata, const Instruction* instr) {
2128   VIXL_ASSERT(metadata->count("form") > 0);
2129   const std::string& form = (*metadata)["form"];
2130   form_hash_ = Hash(form.c_str());
2131   const FormToVisitorFnMap* fv = Simulator::GetFormToVisitorFnMap();
2132   FormToVisitorFnMap::const_iterator it = fv->find(form_hash_);
2133   if (it == fv->end()) {
2134     VisitUnimplemented(instr);
2135   } else {
2136     (it->second)(this, instr);
2137   }
2138 }
2139 
2140 void Simulator::Simulate_PdT_PgZ_ZnT_ZmT(const Instruction* instr) {
2141   VectorFormat vform = instr->GetSVEVectorFormat();
2142   SimPRegister& pd = ReadPRegister(instr->GetPd());
2143   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2144   SimVRegister& zm = ReadVRegister(instr->GetRm());
2145   SimVRegister& zn = ReadVRegister(instr->GetRn());
2146 
2147   switch (form_hash_) {
2148     case "match_p_p_zz"_h:
2149       match(vform, pd, zn, zm, /* negate_match = */ false);
2150       break;
2151     case "nmatch_p_p_zz"_h:
2152       match(vform, pd, zn, zm, /* negate_match = */ true);
2153       break;
2154     default:
2155       VIXL_UNIMPLEMENTED();
2156   }
2157   mov_zeroing(pd, pg, pd);
2158   PredTest(vform, pg, pd);
2159 }
2160 
2161 void Simulator::Simulate_PdT_Xn_Xm(const Instruction* instr) {
2162   VectorFormat vform = instr->GetSVEVectorFormat();
2163   SimPRegister& pd = ReadPRegister(instr->GetPd());
2164   uint64_t src1 = ReadXRegister(instr->GetRn());
2165   uint64_t src2 = ReadXRegister(instr->GetRm());
2166 
2167   uint64_t absdiff = (src1 > src2) ? (src1 - src2) : (src2 - src1);
2168   absdiff >>= LaneSizeInBytesLog2FromFormat(vform);
2169 
2170   bool no_conflict = false;
2171   switch (form_hash_) {
2172     case "whilerw_p_rr"_h:
2173       no_conflict = (absdiff == 0);
2174       break;
2175     case "whilewr_p_rr"_h:
2176       no_conflict = (absdiff == 0) || (src2 <= src1);
2177       break;
2178     default:
2179       VIXL_UNIMPLEMENTED();
2180   }
2181 
2182   LogicPRegister dst(pd);
2183   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2184     dst.SetActive(vform,
2185                   i,
2186                   no_conflict || (static_cast<uint64_t>(i) < absdiff));
2187   }
2188 
2189   PredTest(vform, GetPTrue(), pd);
2190 }
2191 
2192 void Simulator::Simulate_ZdB_Zn1B_Zn2B_imm(const Instruction* instr) {
2193   VIXL_ASSERT(form_hash_ == "ext_z_zi_con"_h);
2194 
2195   SimVRegister& zd = ReadVRegister(instr->GetRd());
2196   SimVRegister& zn = ReadVRegister(instr->GetRn());
2197   SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
2198 
2199   int index = instr->GetSVEExtractImmediate();
2200   int vl = GetVectorLengthInBytes();
2201   index = (index >= vl) ? 0 : index;
2202 
2203   ext(kFormatVnB, zd, zn, zn2, index);
2204 }
2205 
2206 void Simulator::Simulate_ZdB_ZnB_ZmB(const Instruction* instr) {
2207   SimVRegister& zd = ReadVRegister(instr->GetRd());
2208   SimVRegister& zm = ReadVRegister(instr->GetRm());
2209   SimVRegister& zn = ReadVRegister(instr->GetRn());
2210 
2211   switch (form_hash_) {
2212     case "histseg_z_zz"_h:
2213       if (instr->GetSVEVectorFormat() == kFormatVnB) {
2214         histogram(kFormatVnB,
2215                   zd,
2216                   GetPTrue(),
2217                   zn,
2218                   zm,
2219                   /* do_segmented = */ true);
2220       } else {
2221         VIXL_UNIMPLEMENTED();
2222       }
2223       break;
2224     case "pmul_z_zz"_h:
2225       pmul(kFormatVnB, zd, zn, zm);
2226       break;
2227     default:
2228       VIXL_UNIMPLEMENTED();
2229   }
2230 }
2231 
2232 void Simulator::SimulateSVEMulIndex(const Instruction* instr) {
2233   VectorFormat vform = instr->GetSVEVectorFormat();
2234   SimVRegister& zd = ReadVRegister(instr->GetRd());
2235   SimVRegister& zn = ReadVRegister(instr->GetRn());
2236 
2237   // The encoding for B and H-sized lanes are redefined to encode the most
2238   // significant bit of index for H-sized lanes. B-sized lanes are not
2239   // supported.
2240   if (vform == kFormatVnB) vform = kFormatVnH;
2241 
2242   VIXL_ASSERT((form_hash_ == "mul_z_zzi_d"_h) ||
2243               (form_hash_ == "mul_z_zzi_h"_h) ||
2244               (form_hash_ == "mul_z_zzi_s"_h));
2245 
2246   SimVRegister temp;
2247   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
2248   mul(vform, zd, zn, temp);
2249 }
2250 
2251 void Simulator::SimulateSVEMlaMlsIndex(const Instruction* instr) {
2252   VectorFormat vform = instr->GetSVEVectorFormat();
2253   SimVRegister& zda = ReadVRegister(instr->GetRd());
2254   SimVRegister& zn = ReadVRegister(instr->GetRn());
2255 
2256   // The encoding for B and H-sized lanes are redefined to encode the most
2257   // significant bit of index for H-sized lanes. B-sized lanes are not
2258   // supported.
2259   if (vform == kFormatVnB) vform = kFormatVnH;
2260 
2261   VIXL_ASSERT(
2262       (form_hash_ == "mla_z_zzzi_d"_h) || (form_hash_ == "mla_z_zzzi_h"_h) ||
2263       (form_hash_ == "mla_z_zzzi_s"_h) || (form_hash_ == "mls_z_zzzi_d"_h) ||
2264       (form_hash_ == "mls_z_zzzi_h"_h) || (form_hash_ == "mls_z_zzzi_s"_h));
2265 
2266   SimVRegister temp;
2267   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
2268   if (instr->ExtractBit(10) == 0) {
2269     mla(vform, zda, zda, zn, temp);
2270   } else {
2271     mls(vform, zda, zda, zn, temp);
2272   }
2273 }
2274 
2275 void Simulator::SimulateSVESaturatingMulHighIndex(const Instruction* instr) {
2276   VectorFormat vform = instr->GetSVEVectorFormat();
2277   SimVRegister& zd = ReadVRegister(instr->GetRd());
2278   SimVRegister& zn = ReadVRegister(instr->GetRn());
2279 
2280   // The encoding for B and H-sized lanes are redefined to encode the most
2281   // significant bit of index for H-sized lanes. B-sized lanes are not
2282   // supported.
2283   if (vform == kFormatVnB) {
2284     vform = kFormatVnH;
2285   }
2286 
2287   SimVRegister temp;
2288   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
2289   switch (form_hash_) {
2290     case "sqdmulh_z_zzi_h"_h:
2291     case "sqdmulh_z_zzi_s"_h:
2292     case "sqdmulh_z_zzi_d"_h:
2293       sqdmulh(vform, zd, zn, temp);
2294       break;
2295     case "sqrdmulh_z_zzi_h"_h:
2296     case "sqrdmulh_z_zzi_s"_h:
2297     case "sqrdmulh_z_zzi_d"_h:
2298       sqrdmulh(vform, zd, zn, temp);
2299       break;
2300     default:
2301       VIXL_UNIMPLEMENTED();
2302   }
2303 }
2304 
2305 void Simulator::SimulateSVESaturatingIntMulLongIdx(const Instruction* instr) {
2306   VectorFormat vform = instr->GetSVEVectorFormat();
2307   SimVRegister& zd = ReadVRegister(instr->GetRd());
2308   SimVRegister& zn = ReadVRegister(instr->GetRn());
2309 
2310   SimVRegister temp, zm_idx, zn_b, zn_t;
2311   // Instead of calling the indexed form of the instruction logic, we call the
2312   // vector form, which can reuse existing function logic without modification.
2313   // Select the specified elements based on the index input and than pack them
2314   // to the corresponding position.
2315   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2316   dup_elements_to_segments(vform_half, temp, instr->GetSVEMulLongZmAndIndex());
2317   pack_even_elements(vform_half, zm_idx, temp);
2318 
2319   pack_even_elements(vform_half, zn_b, zn);
2320   pack_odd_elements(vform_half, zn_t, zn);
2321 
2322   switch (form_hash_) {
2323     case "smullb_z_zzi_s"_h:
2324     case "smullb_z_zzi_d"_h:
2325       smull(vform, zd, zn_b, zm_idx);
2326       break;
2327     case "smullt_z_zzi_s"_h:
2328     case "smullt_z_zzi_d"_h:
2329       smull(vform, zd, zn_t, zm_idx);
2330       break;
2331     case "sqdmullb_z_zzi_d"_h:
2332       sqdmull(vform, zd, zn_b, zm_idx);
2333       break;
2334     case "sqdmullt_z_zzi_d"_h:
2335       sqdmull(vform, zd, zn_t, zm_idx);
2336       break;
2337     case "umullb_z_zzi_s"_h:
2338     case "umullb_z_zzi_d"_h:
2339       umull(vform, zd, zn_b, zm_idx);
2340       break;
2341     case "umullt_z_zzi_s"_h:
2342     case "umullt_z_zzi_d"_h:
2343       umull(vform, zd, zn_t, zm_idx);
2344       break;
2345     case "sqdmullb_z_zzi_s"_h:
2346       sqdmull(vform, zd, zn_b, zm_idx);
2347       break;
2348     case "sqdmullt_z_zzi_s"_h:
2349       sqdmull(vform, zd, zn_t, zm_idx);
2350       break;
2351     case "smlalb_z_zzzi_s"_h:
2352     case "smlalb_z_zzzi_d"_h:
2353       smlal(vform, zd, zn_b, zm_idx);
2354       break;
2355     case "smlalt_z_zzzi_s"_h:
2356     case "smlalt_z_zzzi_d"_h:
2357       smlal(vform, zd, zn_t, zm_idx);
2358       break;
2359     case "smlslb_z_zzzi_s"_h:
2360     case "smlslb_z_zzzi_d"_h:
2361       smlsl(vform, zd, zn_b, zm_idx);
2362       break;
2363     case "smlslt_z_zzzi_s"_h:
2364     case "smlslt_z_zzzi_d"_h:
2365       smlsl(vform, zd, zn_t, zm_idx);
2366       break;
2367     case "umlalb_z_zzzi_s"_h:
2368     case "umlalb_z_zzzi_d"_h:
2369       umlal(vform, zd, zn_b, zm_idx);
2370       break;
2371     case "umlalt_z_zzzi_s"_h:
2372     case "umlalt_z_zzzi_d"_h:
2373       umlal(vform, zd, zn_t, zm_idx);
2374       break;
2375     case "umlslb_z_zzzi_s"_h:
2376     case "umlslb_z_zzzi_d"_h:
2377       umlsl(vform, zd, zn_b, zm_idx);
2378       break;
2379     case "umlslt_z_zzzi_s"_h:
2380     case "umlslt_z_zzzi_d"_h:
2381       umlsl(vform, zd, zn_t, zm_idx);
2382       break;
2383     default:
2384       VIXL_UNIMPLEMENTED();
2385   }
2386 }
2387 
2388 void Simulator::Simulate_ZdH_PgM_ZnS(const Instruction* instr) {
2389   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2390   SimVRegister& zd = ReadVRegister(instr->GetRd());
2391   SimVRegister& zn = ReadVRegister(instr->GetRn());
2392   SimVRegister result, zd_b;
2393 
2394   pack_even_elements(kFormatVnH, zd_b, zd);
2395 
2396   switch (form_hash_) {
2397     case "fcvtnt_z_p_z_s2h"_h:
2398       fcvt(kFormatVnH, kFormatVnS, result, pg, zn);
2399       pack_even_elements(kFormatVnH, result, result);
2400       zip1(kFormatVnH, result, zd_b, result);
2401       break;
2402     default:
2403       VIXL_UNIMPLEMENTED();
2404   }
2405   mov_merging(kFormatVnS, zd, pg, result);
2406 }
2407 
2408 void Simulator::Simulate_ZdS_PgM_ZnD(const Instruction* instr) {
2409   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2410   SimVRegister& zd = ReadVRegister(instr->GetRd());
2411   SimVRegister& zn = ReadVRegister(instr->GetRn());
2412   SimVRegister result, zero, zd_b;
2413 
2414   zero.Clear();
2415   pack_even_elements(kFormatVnS, zd_b, zd);
2416 
2417   switch (form_hash_) {
2418     case "fcvtnt_z_p_z_d2s"_h:
2419       fcvt(kFormatVnS, kFormatVnD, result, pg, zn);
2420       pack_even_elements(kFormatVnS, result, result);
2421       zip1(kFormatVnS, result, zd_b, result);
2422       break;
2423     case "fcvtx_z_p_z_d2s"_h:
2424       fcvtxn(kFormatVnS, result, zn);
2425       zip1(kFormatVnS, result, result, zero);
2426       break;
2427     case "fcvtxnt_z_p_z_d2s"_h:
2428       fcvtxn(kFormatVnS, result, zn);
2429       zip1(kFormatVnS, result, zd_b, result);
2430       break;
2431     default:
2432       VIXL_UNIMPLEMENTED();
2433   }
2434   mov_merging(kFormatVnD, zd, pg, result);
2435 }
2436 
2437 void Simulator::SimulateSVEFPConvertLong(const Instruction* instr) {
2438   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2439   SimVRegister& zd = ReadVRegister(instr->GetRd());
2440   SimVRegister& zn = ReadVRegister(instr->GetRn());
2441   SimVRegister result;
2442 
2443   switch (form_hash_) {
2444     case "fcvtlt_z_p_z_h2s"_h:
2445       ext(kFormatVnB, result, zn, zn, kHRegSizeInBytes);
2446       fcvt(kFormatVnS, kFormatVnH, zd, pg, result);
2447       break;
2448     case "fcvtlt_z_p_z_s2d"_h:
2449       ext(kFormatVnB, result, zn, zn, kSRegSizeInBytes);
2450       fcvt(kFormatVnD, kFormatVnS, zd, pg, result);
2451       break;
2452     default:
2453       VIXL_UNIMPLEMENTED();
2454   }
2455 }
2456 
2457 void Simulator::Simulate_ZdS_PgM_ZnS(const Instruction* instr) {
2458   VectorFormat vform = instr->GetSVEVectorFormat();
2459   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2460   SimVRegister& zd = ReadVRegister(instr->GetRd());
2461   SimVRegister& zn = ReadVRegister(instr->GetRn());
2462   SimVRegister result;
2463 
2464   if (vform != kFormatVnS) {
2465     VIXL_UNIMPLEMENTED();
2466   }
2467 
2468   switch (form_hash_) {
2469     case "urecpe_z_p_z"_h:
2470       urecpe(vform, result, zn);
2471       break;
2472     case "ursqrte_z_p_z"_h:
2473       ursqrte(vform, result, zn);
2474       break;
2475     default:
2476       VIXL_UNIMPLEMENTED();
2477   }
2478   mov_merging(vform, zd, pg, result);
2479 }
2480 
2481 void Simulator::Simulate_ZdT_PgM_ZnT(const Instruction* instr) {
2482   VectorFormat vform = instr->GetSVEVectorFormat();
2483   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2484   SimVRegister& zd = ReadVRegister(instr->GetRd());
2485   SimVRegister& zn = ReadVRegister(instr->GetRn());
2486   SimVRegister result;
2487 
2488   switch (form_hash_) {
2489     case "flogb_z_p_z"_h:
2490       vform = instr->GetSVEVectorFormat(17);
2491       flogb(vform, result, zn);
2492       break;
2493     case "sqabs_z_p_z"_h:
2494       abs(vform, result, zn).SignedSaturate(vform);
2495       break;
2496     case "sqneg_z_p_z"_h:
2497       neg(vform, result, zn).SignedSaturate(vform);
2498       break;
2499     default:
2500       VIXL_UNIMPLEMENTED();
2501   }
2502   mov_merging(vform, zd, pg, result);
2503 }
2504 
2505 void Simulator::Simulate_ZdT_PgZ_ZnT_ZmT(const Instruction* instr) {
2506   VectorFormat vform = instr->GetSVEVectorFormat();
2507   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2508   SimVRegister& zd = ReadVRegister(instr->GetRd());
2509   SimVRegister& zm = ReadVRegister(instr->GetRm());
2510   SimVRegister& zn = ReadVRegister(instr->GetRn());
2511   SimVRegister result;
2512 
2513   VIXL_ASSERT(form_hash_ == "histcnt_z_p_zz"_h);
2514   if ((vform == kFormatVnS) || (vform == kFormatVnD)) {
2515     histogram(vform, result, pg, zn, zm);
2516     mov_zeroing(vform, zd, pg, result);
2517   } else {
2518     VIXL_UNIMPLEMENTED();
2519   }
2520 }
2521 
2522 void Simulator::Simulate_ZdT_ZnT_ZmT(const Instruction* instr) {
2523   VectorFormat vform = instr->GetSVEVectorFormat();
2524   SimVRegister& zd = ReadVRegister(instr->GetRd());
2525   SimVRegister& zm = ReadVRegister(instr->GetRm());
2526   SimVRegister& zn = ReadVRegister(instr->GetRn());
2527   SimVRegister result;
2528   bool do_bext = false;
2529 
2530   switch (form_hash_) {
2531     case "bdep_z_zz"_h:
2532       bdep(vform, zd, zn, zm);
2533       break;
2534     case "bext_z_zz"_h:
2535       do_bext = true;
2536       VIXL_FALLTHROUGH();
2537     case "bgrp_z_zz"_h:
2538       bgrp(vform, zd, zn, zm, do_bext);
2539       break;
2540     case "eorbt_z_zz"_h:
2541       rotate_elements_right(vform, result, zm, 1);
2542       SVEBitwiseLogicalUnpredicatedHelper(EOR, kFormatVnD, result, zn, result);
2543       mov_alternating(vform, zd, result, 0);
2544       break;
2545     case "eortb_z_zz"_h:
2546       rotate_elements_right(vform, result, zm, -1);
2547       SVEBitwiseLogicalUnpredicatedHelper(EOR, kFormatVnD, result, zn, result);
2548       mov_alternating(vform, zd, result, 1);
2549       break;
2550     case "mul_z_zz"_h:
2551       mul(vform, zd, zn, zm);
2552       break;
2553     case "smulh_z_zz"_h:
2554       smulh(vform, zd, zn, zm);
2555       break;
2556     case "sqdmulh_z_zz"_h:
2557       sqdmulh(vform, zd, zn, zm);
2558       break;
2559     case "sqrdmulh_z_zz"_h:
2560       sqrdmulh(vform, zd, zn, zm);
2561       break;
2562     case "umulh_z_zz"_h:
2563       umulh(vform, zd, zn, zm);
2564       break;
2565     default:
2566       VIXL_UNIMPLEMENTED();
2567   }
2568 }
2569 
2570 void Simulator::Simulate_ZdT_ZnT_ZmTb(const Instruction* instr) {
2571   VectorFormat vform = instr->GetSVEVectorFormat();
2572   SimVRegister& zd = ReadVRegister(instr->GetRd());
2573   SimVRegister& zm = ReadVRegister(instr->GetRm());
2574   SimVRegister& zn = ReadVRegister(instr->GetRn());
2575 
2576   SimVRegister zm_b, zm_t;
2577   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2578   pack_even_elements(vform_half, zm_b, zm);
2579   pack_odd_elements(vform_half, zm_t, zm);
2580 
2581   switch (form_hash_) {
2582     case "saddwb_z_zz"_h:
2583       saddw(vform, zd, zn, zm_b);
2584       break;
2585     case "saddwt_z_zz"_h:
2586       saddw(vform, zd, zn, zm_t);
2587       break;
2588     case "ssubwb_z_zz"_h:
2589       ssubw(vform, zd, zn, zm_b);
2590       break;
2591     case "ssubwt_z_zz"_h:
2592       ssubw(vform, zd, zn, zm_t);
2593       break;
2594     case "uaddwb_z_zz"_h:
2595       uaddw(vform, zd, zn, zm_b);
2596       break;
2597     case "uaddwt_z_zz"_h:
2598       uaddw(vform, zd, zn, zm_t);
2599       break;
2600     case "usubwb_z_zz"_h:
2601       usubw(vform, zd, zn, zm_b);
2602       break;
2603     case "usubwt_z_zz"_h:
2604       usubw(vform, zd, zn, zm_t);
2605       break;
2606     default:
2607       VIXL_UNIMPLEMENTED();
2608   }
2609 }
2610 
2611 void Simulator::Simulate_ZdT_ZnT_const(const Instruction* instr) {
2612   SimVRegister& zd = ReadVRegister(instr->GetRd());
2613   SimVRegister& zn = ReadVRegister(instr->GetRn());
2614 
2615   std::pair<int, int> shift_and_lane_size =
2616       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
2617   int lane_size = shift_and_lane_size.second;
2618   VIXL_ASSERT((lane_size >= 0) &&
2619               (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
2620   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
2621   int shift_dist = shift_and_lane_size.first;
2622 
2623   switch (form_hash_) {
2624     case "sli_z_zzi"_h:
2625       // Shift distance is computed differently for left shifts. Convert the
2626       // result.
2627       shift_dist = (8 << lane_size) - shift_dist;
2628       sli(vform, zd, zn, shift_dist);
2629       break;
2630     case "sri_z_zzi"_h:
2631       sri(vform, zd, zn, shift_dist);
2632       break;
2633     default:
2634       VIXL_UNIMPLEMENTED();
2635   }
2636 }
2637 
2638 void Simulator::SimulateSVENarrow(const Instruction* instr) {
2639   SimVRegister& zd = ReadVRegister(instr->GetRd());
2640   SimVRegister& zn = ReadVRegister(instr->GetRn());
2641   SimVRegister result;
2642 
2643   std::pair<int, int> shift_and_lane_size =
2644       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
2645   int lane_size = shift_and_lane_size.second;
2646   VIXL_ASSERT((lane_size >= static_cast<int>(kBRegSizeInBytesLog2)) &&
2647               (lane_size <= static_cast<int>(kSRegSizeInBytesLog2)));
2648   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
2649   int right_shift_dist = shift_and_lane_size.first;
2650   bool top = false;
2651 
2652   switch (form_hash_) {
2653     case "sqxtnt_z_zz"_h:
2654       top = true;
2655       VIXL_FALLTHROUGH();
2656     case "sqxtnb_z_zz"_h:
2657       sqxtn(vform, result, zn);
2658       break;
2659     case "sqxtunt_z_zz"_h:
2660       top = true;
2661       VIXL_FALLTHROUGH();
2662     case "sqxtunb_z_zz"_h:
2663       sqxtun(vform, result, zn);
2664       break;
2665     case "uqxtnt_z_zz"_h:
2666       top = true;
2667       VIXL_FALLTHROUGH();
2668     case "uqxtnb_z_zz"_h:
2669       uqxtn(vform, result, zn);
2670       break;
2671     case "rshrnt_z_zi"_h:
2672       top = true;
2673       VIXL_FALLTHROUGH();
2674     case "rshrnb_z_zi"_h:
2675       rshrn(vform, result, zn, right_shift_dist);
2676       break;
2677     case "shrnt_z_zi"_h:
2678       top = true;
2679       VIXL_FALLTHROUGH();
2680     case "shrnb_z_zi"_h:
2681       shrn(vform, result, zn, right_shift_dist);
2682       break;
2683     case "sqrshrnt_z_zi"_h:
2684       top = true;
2685       VIXL_FALLTHROUGH();
2686     case "sqrshrnb_z_zi"_h:
2687       sqrshrn(vform, result, zn, right_shift_dist);
2688       break;
2689     case "sqrshrunt_z_zi"_h:
2690       top = true;
2691       VIXL_FALLTHROUGH();
2692     case "sqrshrunb_z_zi"_h:
2693       sqrshrun(vform, result, zn, right_shift_dist);
2694       break;
2695     case "sqshrnt_z_zi"_h:
2696       top = true;
2697       VIXL_FALLTHROUGH();
2698     case "sqshrnb_z_zi"_h:
2699       sqshrn(vform, result, zn, right_shift_dist);
2700       break;
2701     case "sqshrunt_z_zi"_h:
2702       top = true;
2703       VIXL_FALLTHROUGH();
2704     case "sqshrunb_z_zi"_h:
2705       sqshrun(vform, result, zn, right_shift_dist);
2706       break;
2707     case "uqrshrnt_z_zi"_h:
2708       top = true;
2709       VIXL_FALLTHROUGH();
2710     case "uqrshrnb_z_zi"_h:
2711       uqrshrn(vform, result, zn, right_shift_dist);
2712       break;
2713     case "uqshrnt_z_zi"_h:
2714       top = true;
2715       VIXL_FALLTHROUGH();
2716     case "uqshrnb_z_zi"_h:
2717       uqshrn(vform, result, zn, right_shift_dist);
2718       break;
2719     default:
2720       VIXL_UNIMPLEMENTED();
2721   }
2722 
2723   if (top) {
2724     // Keep even elements, replace odd elements with the results.
2725     xtn(vform, zd, zd);
2726     zip1(vform, zd, zd, result);
2727   } else {
2728     // Zero odd elements, replace even elements with the results.
2729     SimVRegister zero;
2730     zero.Clear();
2731     zip1(vform, zd, result, zero);
2732   }
2733 }
2734 
2735 void Simulator::SimulateSVEInterleavedArithLong(const Instruction* instr) {
2736   VectorFormat vform = instr->GetSVEVectorFormat();
2737   SimVRegister& zd = ReadVRegister(instr->GetRd());
2738   SimVRegister& zm = ReadVRegister(instr->GetRm());
2739   SimVRegister& zn = ReadVRegister(instr->GetRn());
2740   SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
2741 
2742   // Construct temporary registers containing the even (bottom) and odd (top)
2743   // elements.
2744   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2745   pack_even_elements(vform_half, zn_b, zn);
2746   pack_even_elements(vform_half, zm_b, zm);
2747   pack_odd_elements(vform_half, zn_t, zn);
2748   pack_odd_elements(vform_half, zm_t, zm);
2749 
2750   switch (form_hash_) {
2751     case "sabdlb_z_zz"_h:
2752       sabdl(vform, zd, zn_b, zm_b);
2753       break;
2754     case "sabdlt_z_zz"_h:
2755       sabdl(vform, zd, zn_t, zm_t);
2756       break;
2757     case "saddlb_z_zz"_h:
2758       saddl(vform, zd, zn_b, zm_b);
2759       break;
2760     case "saddlbt_z_zz"_h:
2761       saddl(vform, zd, zn_b, zm_t);
2762       break;
2763     case "saddlt_z_zz"_h:
2764       saddl(vform, zd, zn_t, zm_t);
2765       break;
2766     case "ssublb_z_zz"_h:
2767       ssubl(vform, zd, zn_b, zm_b);
2768       break;
2769     case "ssublbt_z_zz"_h:
2770       ssubl(vform, zd, zn_b, zm_t);
2771       break;
2772     case "ssublt_z_zz"_h:
2773       ssubl(vform, zd, zn_t, zm_t);
2774       break;
2775     case "ssubltb_z_zz"_h:
2776       ssubl(vform, zd, zn_t, zm_b);
2777       break;
2778     case "uabdlb_z_zz"_h:
2779       uabdl(vform, zd, zn_b, zm_b);
2780       break;
2781     case "uabdlt_z_zz"_h:
2782       uabdl(vform, zd, zn_t, zm_t);
2783       break;
2784     case "uaddlb_z_zz"_h:
2785       uaddl(vform, zd, zn_b, zm_b);
2786       break;
2787     case "uaddlt_z_zz"_h:
2788       uaddl(vform, zd, zn_t, zm_t);
2789       break;
2790     case "usublb_z_zz"_h:
2791       usubl(vform, zd, zn_b, zm_b);
2792       break;
2793     case "usublt_z_zz"_h:
2794       usubl(vform, zd, zn_t, zm_t);
2795       break;
2796     case "sabalb_z_zzz"_h:
2797       sabal(vform, zd, zn_b, zm_b);
2798       break;
2799     case "sabalt_z_zzz"_h:
2800       sabal(vform, zd, zn_t, zm_t);
2801       break;
2802     case "uabalb_z_zzz"_h:
2803       uabal(vform, zd, zn_b, zm_b);
2804       break;
2805     case "uabalt_z_zzz"_h:
2806       uabal(vform, zd, zn_t, zm_t);
2807       break;
2808     default:
2809       VIXL_UNIMPLEMENTED();
2810   }
2811 }
2812 
2813 void Simulator::SimulateSVEIntMulLongVec(const Instruction* instr) {
2814   VectorFormat vform = instr->GetSVEVectorFormat();
2815   SimVRegister& zd = ReadVRegister(instr->GetRd());
2816   SimVRegister& zm = ReadVRegister(instr->GetRm());
2817   SimVRegister& zn = ReadVRegister(instr->GetRn());
2818   SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
2819   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2820   pack_even_elements(vform_half, zn_b, zn);
2821   pack_even_elements(vform_half, zm_b, zm);
2822   pack_odd_elements(vform_half, zn_t, zn);
2823   pack_odd_elements(vform_half, zm_t, zm);
2824 
2825   switch (form_hash_) {
2826     case "pmullb_z_zz"_h:
2827       // '00' is reserved for Q-sized lane.
2828       if (vform == kFormatVnB) {
2829         VIXL_UNIMPLEMENTED();
2830       }
2831       pmull(vform, zd, zn_b, zm_b);
2832       break;
2833     case "pmullt_z_zz"_h:
2834       // '00' is reserved for Q-sized lane.
2835       if (vform == kFormatVnB) {
2836         VIXL_UNIMPLEMENTED();
2837       }
2838       pmull(vform, zd, zn_t, zm_t);
2839       break;
2840     case "smullb_z_zz"_h:
2841       smull(vform, zd, zn_b, zm_b);
2842       break;
2843     case "smullt_z_zz"_h:
2844       smull(vform, zd, zn_t, zm_t);
2845       break;
2846     case "sqdmullb_z_zz"_h:
2847       sqdmull(vform, zd, zn_b, zm_b);
2848       break;
2849     case "sqdmullt_z_zz"_h:
2850       sqdmull(vform, zd, zn_t, zm_t);
2851       break;
2852     case "umullb_z_zz"_h:
2853       umull(vform, zd, zn_b, zm_b);
2854       break;
2855     case "umullt_z_zz"_h:
2856       umull(vform, zd, zn_t, zm_t);
2857       break;
2858     default:
2859       VIXL_UNIMPLEMENTED();
2860   }
2861 }
2862 
2863 void Simulator::SimulateSVEAddSubHigh(const Instruction* instr) {
2864   SimVRegister& zd = ReadVRegister(instr->GetRd());
2865   SimVRegister& zm = ReadVRegister(instr->GetRm());
2866   SimVRegister& zn = ReadVRegister(instr->GetRn());
2867   SimVRegister result;
2868   bool top = false;
2869 
2870   VectorFormat vform_src = instr->GetSVEVectorFormat();
2871   if (vform_src == kFormatVnB) {
2872     VIXL_UNIMPLEMENTED();
2873   }
2874   VectorFormat vform = VectorFormatHalfWidth(vform_src);
2875 
2876   switch (form_hash_) {
2877     case "addhnt_z_zz"_h:
2878       top = true;
2879       VIXL_FALLTHROUGH();
2880     case "addhnb_z_zz"_h:
2881       addhn(vform, result, zn, zm);
2882       break;
2883     case "raddhnt_z_zz"_h:
2884       top = true;
2885       VIXL_FALLTHROUGH();
2886     case "raddhnb_z_zz"_h:
2887       raddhn(vform, result, zn, zm);
2888       break;
2889     case "rsubhnt_z_zz"_h:
2890       top = true;
2891       VIXL_FALLTHROUGH();
2892     case "rsubhnb_z_zz"_h:
2893       rsubhn(vform, result, zn, zm);
2894       break;
2895     case "subhnt_z_zz"_h:
2896       top = true;
2897       VIXL_FALLTHROUGH();
2898     case "subhnb_z_zz"_h:
2899       subhn(vform, result, zn, zm);
2900       break;
2901     default:
2902       VIXL_UNIMPLEMENTED();
2903   }
2904 
2905   if (top) {
2906     // Keep even elements, replace odd elements with the results.
2907     xtn(vform, zd, zd);
2908     zip1(vform, zd, zd, result);
2909   } else {
2910     // Zero odd elements, replace even elements with the results.
2911     SimVRegister zero;
2912     zero.Clear();
2913     zip1(vform, zd, result, zero);
2914   }
2915 }
2916 
2917 void Simulator::SimulateSVEShiftLeftImm(const Instruction* instr) {
2918   SimVRegister& zd = ReadVRegister(instr->GetRd());
2919   SimVRegister& zn = ReadVRegister(instr->GetRn());
2920   SimVRegister zn_b, zn_t;
2921 
2922   std::pair<int, int> shift_and_lane_size =
2923       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
2924   int lane_size = shift_and_lane_size.second;
2925   VIXL_ASSERT((lane_size >= 0) &&
2926               (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
2927   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size + 1);
2928   int right_shift_dist = shift_and_lane_size.first;
2929   int left_shift_dist = (8 << lane_size) - right_shift_dist;
2930 
2931   // Construct temporary registers containing the even (bottom) and odd (top)
2932   // elements.
2933   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2934   pack_even_elements(vform_half, zn_b, zn);
2935   pack_odd_elements(vform_half, zn_t, zn);
2936 
2937   switch (form_hash_) {
2938     case "sshllb_z_zi"_h:
2939       sshll(vform, zd, zn_b, left_shift_dist);
2940       break;
2941     case "sshllt_z_zi"_h:
2942       sshll(vform, zd, zn_t, left_shift_dist);
2943       break;
2944     case "ushllb_z_zi"_h:
2945       ushll(vform, zd, zn_b, left_shift_dist);
2946       break;
2947     case "ushllt_z_zi"_h:
2948       ushll(vform, zd, zn_t, left_shift_dist);
2949       break;
2950     default:
2951       VIXL_UNIMPLEMENTED();
2952   }
2953 }
2954 
2955 void Simulator::SimulateSVESaturatingMulAddHigh(const Instruction* instr) {
2956   VectorFormat vform = instr->GetSVEVectorFormat();
2957   SimVRegister& zda = ReadVRegister(instr->GetRd());
2958   SimVRegister& zn = ReadVRegister(instr->GetRn());
2959   unsigned zm_code = instr->GetRm();
2960   int index = -1;
2961   bool is_mla = false;
2962 
2963   switch (form_hash_) {
2964     case "sqrdmlah_z_zzz"_h:
2965       is_mla = true;
2966       VIXL_FALLTHROUGH();
2967     case "sqrdmlsh_z_zzz"_h:
2968       // Nothing to do.
2969       break;
2970     case "sqrdmlah_z_zzzi_h"_h:
2971       is_mla = true;
2972       VIXL_FALLTHROUGH();
2973     case "sqrdmlsh_z_zzzi_h"_h:
2974       vform = kFormatVnH;
2975       index = (instr->ExtractBit(22) << 2) | instr->ExtractBits(20, 19);
2976       zm_code = instr->ExtractBits(18, 16);
2977       break;
2978     case "sqrdmlah_z_zzzi_s"_h:
2979       is_mla = true;
2980       VIXL_FALLTHROUGH();
2981     case "sqrdmlsh_z_zzzi_s"_h:
2982       vform = kFormatVnS;
2983       index = instr->ExtractBits(20, 19);
2984       zm_code = instr->ExtractBits(18, 16);
2985       break;
2986     case "sqrdmlah_z_zzzi_d"_h:
2987       is_mla = true;
2988       VIXL_FALLTHROUGH();
2989     case "sqrdmlsh_z_zzzi_d"_h:
2990       vform = kFormatVnD;
2991       index = instr->ExtractBit(20);
2992       zm_code = instr->ExtractBits(19, 16);
2993       break;
2994     default:
2995       VIXL_UNIMPLEMENTED();
2996   }
2997 
2998   SimVRegister& zm = ReadVRegister(zm_code);
2999   SimVRegister zm_idx;
3000   if (index >= 0) {
3001     dup_elements_to_segments(vform, zm_idx, zm, index);
3002   }
3003 
3004   if (is_mla) {
3005     sqrdmlah(vform, zda, zn, (index >= 0) ? zm_idx : zm);
3006   } else {
3007     sqrdmlsh(vform, zda, zn, (index >= 0) ? zm_idx : zm);
3008   }
3009 }
3010 
3011 void Simulator::Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr) {
3012   SimVRegister& zda = ReadVRegister(instr->GetRd());
3013   SimVRegister& zn = ReadVRegister(instr->GetRn());
3014   SimVRegister& zm = ReadVRegister(instr->ExtractBits(19, 16));
3015 
3016   SimVRegister temp, zm_idx, zn_b, zn_t;
3017   Instr index = (instr->ExtractBit(20) << 1) | instr->ExtractBit(11);
3018   dup_elements_to_segments(kFormatVnS, temp, zm, index);
3019   pack_even_elements(kFormatVnS, zm_idx, temp);
3020   pack_even_elements(kFormatVnS, zn_b, zn);
3021   pack_odd_elements(kFormatVnS, zn_t, zn);
3022 
3023   switch (form_hash_) {
3024     case "sqdmlalb_z_zzzi_d"_h:
3025       sqdmlal(kFormatVnD, zda, zn_b, zm_idx);
3026       break;
3027     case "sqdmlalt_z_zzzi_d"_h:
3028       sqdmlal(kFormatVnD, zda, zn_t, zm_idx);
3029       break;
3030     case "sqdmlslb_z_zzzi_d"_h:
3031       sqdmlsl(kFormatVnD, zda, zn_b, zm_idx);
3032       break;
3033     case "sqdmlslt_z_zzzi_d"_h:
3034       sqdmlsl(kFormatVnD, zda, zn_t, zm_idx);
3035       break;
3036     default:
3037       VIXL_UNIMPLEMENTED();
3038   }
3039 }
3040 
3041 void Simulator::Simulate_ZdaS_ZnH_ZmH(const Instruction* instr) {
3042   SimVRegister& zda = ReadVRegister(instr->GetRd());
3043   SimVRegister& zm = ReadVRegister(instr->GetRm());
3044   SimVRegister& zn = ReadVRegister(instr->GetRn());
3045 
3046   SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
3047   pack_even_elements(kFormatVnH, zn_b, zn);
3048   pack_even_elements(kFormatVnH, zm_b, zm);
3049   pack_odd_elements(kFormatVnH, zn_t, zn);
3050   pack_odd_elements(kFormatVnH, zm_t, zm);
3051 
3052   switch (form_hash_) {
3053     case "fmlalb_z_zzz"_h:
3054       fmlal(kFormatVnS, zda, zn_b, zm_b);
3055       break;
3056     case "fmlalt_z_zzz"_h:
3057       fmlal(kFormatVnS, zda, zn_t, zm_t);
3058       break;
3059     case "fmlslb_z_zzz"_h:
3060       fmlsl(kFormatVnS, zda, zn_b, zm_b);
3061       break;
3062     case "fmlslt_z_zzz"_h:
3063       fmlsl(kFormatVnS, zda, zn_t, zm_t);
3064       break;
3065     default:
3066       VIXL_UNIMPLEMENTED();
3067   }
3068 }
3069 
3070 void Simulator::Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr) {
3071   SimVRegister& zda = ReadVRegister(instr->GetRd());
3072   SimVRegister& zn = ReadVRegister(instr->GetRn());
3073   SimVRegister& zm = ReadVRegister(instr->ExtractBits(18, 16));
3074 
3075   SimVRegister temp, zm_idx, zn_b, zn_t;
3076   Instr index = (instr->ExtractBits(20, 19) << 1) | instr->ExtractBit(11);
3077   dup_elements_to_segments(kFormatVnH, temp, zm, index);
3078   pack_even_elements(kFormatVnH, zm_idx, temp);
3079   pack_even_elements(kFormatVnH, zn_b, zn);
3080   pack_odd_elements(kFormatVnH, zn_t, zn);
3081 
3082   switch (form_hash_) {
3083     case "fmlalb_z_zzzi_s"_h:
3084       fmlal(kFormatVnS, zda, zn_b, zm_idx);
3085       break;
3086     case "fmlalt_z_zzzi_s"_h:
3087       fmlal(kFormatVnS, zda, zn_t, zm_idx);
3088       break;
3089     case "fmlslb_z_zzzi_s"_h:
3090       fmlsl(kFormatVnS, zda, zn_b, zm_idx);
3091       break;
3092     case "fmlslt_z_zzzi_s"_h:
3093       fmlsl(kFormatVnS, zda, zn_t, zm_idx);
3094       break;
3095     case "sqdmlalb_z_zzzi_s"_h:
3096       sqdmlal(kFormatVnS, zda, zn_b, zm_idx);
3097       break;
3098     case "sqdmlalt_z_zzzi_s"_h:
3099       sqdmlal(kFormatVnS, zda, zn_t, zm_idx);
3100       break;
3101     case "sqdmlslb_z_zzzi_s"_h:
3102       sqdmlsl(kFormatVnS, zda, zn_b, zm_idx);
3103       break;
3104     case "sqdmlslt_z_zzzi_s"_h:
3105       sqdmlsl(kFormatVnS, zda, zn_t, zm_idx);
3106       break;
3107     default:
3108       VIXL_UNIMPLEMENTED();
3109   }
3110 }
3111 
3112 void Simulator::Simulate_ZdaT_PgM_ZnTb(const Instruction* instr) {
3113   VectorFormat vform = instr->GetSVEVectorFormat();
3114   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3115   SimVRegister& zda = ReadVRegister(instr->GetRd());
3116   SimVRegister& zn = ReadVRegister(instr->GetRn());
3117   SimVRegister result;
3118 
3119   switch (form_hash_) {
3120     case "sadalp_z_p_z"_h:
3121       sadalp(vform, result, zn);
3122       break;
3123     case "uadalp_z_p_z"_h:
3124       uadalp(vform, result, zn);
3125       break;
3126     default:
3127       VIXL_UNIMPLEMENTED();
3128   }
3129   mov_merging(vform, zda, pg, result);
3130 }
3131 
3132 void Simulator::SimulateSVEAddSubCarry(const Instruction* instr) {
3133   VectorFormat vform = (instr->ExtractBit(22) == 0) ? kFormatVnS : kFormatVnD;
3134   SimVRegister& zda = ReadVRegister(instr->GetRd());
3135   SimVRegister& zm = ReadVRegister(instr->GetRm());
3136   SimVRegister& zn = ReadVRegister(instr->GetRn());
3137 
3138   SimVRegister not_zn;
3139   not_(vform, not_zn, zn);
3140 
3141   switch (form_hash_) {
3142     case "adclb_z_zzz"_h:
3143       adcl(vform, zda, zn, zm, /* top = */ false);
3144       break;
3145     case "adclt_z_zzz"_h:
3146       adcl(vform, zda, zn, zm, /* top = */ true);
3147       break;
3148     case "sbclb_z_zzz"_h:
3149       adcl(vform, zda, not_zn, zm, /* top = */ false);
3150       break;
3151     case "sbclt_z_zzz"_h:
3152       adcl(vform, zda, not_zn, zm, /* top = */ true);
3153       break;
3154     default:
3155       VIXL_UNIMPLEMENTED();
3156   }
3157 }
3158 
3159 void Simulator::Simulate_ZdaT_ZnT_ZmT(const Instruction* instr) {
3160   VectorFormat vform = instr->GetSVEVectorFormat();
3161   SimVRegister& zda = ReadVRegister(instr->GetRd());
3162   SimVRegister& zm = ReadVRegister(instr->GetRm());
3163   SimVRegister& zn = ReadVRegister(instr->GetRn());
3164 
3165   switch (form_hash_) {
3166     case "saba_z_zzz"_h:
3167       saba(vform, zda, zn, zm);
3168       break;
3169     case "uaba_z_zzz"_h:
3170       uaba(vform, zda, zn, zm);
3171       break;
3172     default:
3173       VIXL_UNIMPLEMENTED();
3174   }
3175 }
3176 
3177 void Simulator::SimulateSVEComplexIntMulAdd(const Instruction* instr) {
3178   SimVRegister& zda = ReadVRegister(instr->GetRd());
3179   SimVRegister& zn = ReadVRegister(instr->GetRn());
3180   int rot = instr->ExtractBits(11, 10) * 90;
3181   // vform and zm are only valid for the vector form of instruction.
3182   VectorFormat vform = instr->GetSVEVectorFormat();
3183   SimVRegister& zm = ReadVRegister(instr->GetRm());
3184 
3185   // Inputs for indexed form of instruction.
3186   SimVRegister& zm_h = ReadVRegister(instr->ExtractBits(18, 16));
3187   SimVRegister& zm_s = ReadVRegister(instr->ExtractBits(19, 16));
3188   int idx_h = instr->ExtractBits(20, 19);
3189   int idx_s = instr->ExtractBit(20);
3190 
3191   switch (form_hash_) {
3192     case "cmla_z_zzz"_h:
3193       cmla(vform, zda, zda, zn, zm, rot);
3194       break;
3195     case "cmla_z_zzzi_h"_h:
3196       cmla(kFormatVnH, zda, zda, zn, zm_h, idx_h, rot);
3197       break;
3198     case "cmla_z_zzzi_s"_h:
3199       cmla(kFormatVnS, zda, zda, zn, zm_s, idx_s, rot);
3200       break;
3201     case "sqrdcmlah_z_zzz"_h:
3202       sqrdcmlah(vform, zda, zda, zn, zm, rot);
3203       break;
3204     case "sqrdcmlah_z_zzzi_h"_h:
3205       sqrdcmlah(kFormatVnH, zda, zda, zn, zm_h, idx_h, rot);
3206       break;
3207     case "sqrdcmlah_z_zzzi_s"_h:
3208       sqrdcmlah(kFormatVnS, zda, zda, zn, zm_s, idx_s, rot);
3209       break;
3210     default:
3211       VIXL_UNIMPLEMENTED();
3212   }
3213 }
3214 
3215 void Simulator::Simulate_ZdaT_ZnT_const(const Instruction* instr) {
3216   SimVRegister& zd = ReadVRegister(instr->GetRd());
3217   SimVRegister& zn = ReadVRegister(instr->GetRn());
3218 
3219   std::pair<int, int> shift_and_lane_size =
3220       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
3221   int lane_size = shift_and_lane_size.second;
3222   VIXL_ASSERT((lane_size >= 0) &&
3223               (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
3224   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
3225   int shift_dist = shift_and_lane_size.first;
3226 
3227   switch (form_hash_) {
3228     case "srsra_z_zi"_h:
3229       srsra(vform, zd, zn, shift_dist);
3230       break;
3231     case "ssra_z_zi"_h:
3232       ssra(vform, zd, zn, shift_dist);
3233       break;
3234     case "ursra_z_zi"_h:
3235       ursra(vform, zd, zn, shift_dist);
3236       break;
3237     case "usra_z_zi"_h:
3238       usra(vform, zd, zn, shift_dist);
3239       break;
3240     default:
3241       VIXL_UNIMPLEMENTED();
3242   }
3243 }
3244 
3245 void Simulator::Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr) {
3246   VectorFormat vform = instr->GetSVEVectorFormat();
3247   SimVRegister& zda = ReadVRegister(instr->GetRd());
3248   SimVRegister& zm = ReadVRegister(instr->GetRm());
3249   SimVRegister& zn = ReadVRegister(instr->GetRn());
3250 
3251   SimVRegister zero, zn_b, zm_b, zn_t, zm_t;
3252   zero.Clear();
3253 
3254   VectorFormat vform_half = VectorFormatHalfWidth(vform);
3255   uzp1(vform_half, zn_b, zn, zero);
3256   uzp1(vform_half, zm_b, zm, zero);
3257   uzp2(vform_half, zn_t, zn, zero);
3258   uzp2(vform_half, zm_t, zm, zero);
3259 
3260   switch (form_hash_) {
3261     case "smlalb_z_zzz"_h:
3262       smlal(vform, zda, zn_b, zm_b);
3263       break;
3264     case "smlalt_z_zzz"_h:
3265       smlal(vform, zda, zn_t, zm_t);
3266       break;
3267     case "smlslb_z_zzz"_h:
3268       smlsl(vform, zda, zn_b, zm_b);
3269       break;
3270     case "smlslt_z_zzz"_h:
3271       smlsl(vform, zda, zn_t, zm_t);
3272       break;
3273     case "sqdmlalb_z_zzz"_h:
3274       sqdmlal(vform, zda, zn_b, zm_b);
3275       break;
3276     case "sqdmlalbt_z_zzz"_h:
3277       sqdmlal(vform, zda, zn_b, zm_t);
3278       break;
3279     case "sqdmlalt_z_zzz"_h:
3280       sqdmlal(vform, zda, zn_t, zm_t);
3281       break;
3282     case "sqdmlslb_z_zzz"_h:
3283       sqdmlsl(vform, zda, zn_b, zm_b);
3284       break;
3285     case "sqdmlslbt_z_zzz"_h:
3286       sqdmlsl(vform, zda, zn_b, zm_t);
3287       break;
3288     case "sqdmlslt_z_zzz"_h:
3289       sqdmlsl(vform, zda, zn_t, zm_t);
3290       break;
3291     case "umlalb_z_zzz"_h:
3292       umlal(vform, zda, zn_b, zm_b);
3293       break;
3294     case "umlalt_z_zzz"_h:
3295       umlal(vform, zda, zn_t, zm_t);
3296       break;
3297     case "umlslb_z_zzz"_h:
3298       umlsl(vform, zda, zn_b, zm_b);
3299       break;
3300     case "umlslt_z_zzz"_h:
3301       umlsl(vform, zda, zn_t, zm_t);
3302       break;
3303     default:
3304       VIXL_UNIMPLEMENTED();
3305   }
3306 }
3307 
3308 void Simulator::SimulateSVEComplexDotProduct(const Instruction* instr) {
3309   VectorFormat vform = instr->GetSVEVectorFormat();
3310   SimVRegister& zda = ReadVRegister(instr->GetRd());
3311   SimVRegister& zn = ReadVRegister(instr->GetRn());
3312   int rot = instr->ExtractBits(11, 10) * 90;
3313   unsigned zm_code = instr->GetRm();
3314   int index = -1;
3315 
3316   switch (form_hash_) {
3317     case "cdot_z_zzz"_h:
3318       // Nothing to do.
3319       break;
3320     case "cdot_z_zzzi_s"_h:
3321       index = zm_code >> 3;
3322       zm_code &= 0x7;
3323       break;
3324     case "cdot_z_zzzi_d"_h:
3325       index = zm_code >> 4;
3326       zm_code &= 0xf;
3327       break;
3328     default:
3329       VIXL_UNIMPLEMENTED();
3330   }
3331 
3332   SimVRegister temp;
3333   SimVRegister& zm = ReadVRegister(zm_code);
3334   if (index >= 0) dup_elements_to_segments(vform, temp, zm, index);
3335   cdot(vform, zda, zda, zn, (index >= 0) ? temp : zm, rot);
3336 }
3337 
3338 void Simulator::SimulateSVEBitwiseTernary(const Instruction* instr) {
3339   VectorFormat vform = kFormatVnD;
3340   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3341   SimVRegister& zm = ReadVRegister(instr->GetRm());
3342   SimVRegister& zk = ReadVRegister(instr->GetRn());
3343   SimVRegister temp;
3344 
3345   switch (form_hash_) {
3346     case "bcax_z_zzz"_h:
3347       bic(vform, temp, zm, zk);
3348       eor(vform, zdn, temp, zdn);
3349       break;
3350     case "bsl1n_z_zzz"_h:
3351       not_(vform, temp, zdn);
3352       bsl(vform, zdn, zk, temp, zm);
3353       break;
3354     case "bsl2n_z_zzz"_h:
3355       not_(vform, temp, zm);
3356       bsl(vform, zdn, zk, zdn, temp);
3357       break;
3358     case "bsl_z_zzz"_h:
3359       bsl(vform, zdn, zk, zdn, zm);
3360       break;
3361     case "eor3_z_zzz"_h:
3362       eor(vform, temp, zdn, zm);
3363       eor(vform, zdn, temp, zk);
3364       break;
3365     case "nbsl_z_zzz"_h:
3366       bsl(vform, zdn, zk, zdn, zm);
3367       not_(vform, zdn, zdn);
3368       break;
3369     default:
3370       VIXL_UNIMPLEMENTED();
3371   }
3372 }
3373 
3374 void Simulator::SimulateSVEHalvingAddSub(const Instruction* instr) {
3375   VectorFormat vform = instr->GetSVEVectorFormat();
3376   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3377   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3378   SimVRegister& zm = ReadVRegister(instr->GetRn());
3379   SimVRegister result;
3380 
3381   switch (form_hash_) {
3382     case "shadd_z_p_zz"_h:
3383       add(vform, result, zdn, zm).Halve(vform);
3384       break;
3385     case "shsub_z_p_zz"_h:
3386       sub(vform, result, zdn, zm).Halve(vform);
3387       break;
3388     case "shsubr_z_p_zz"_h:
3389       sub(vform, result, zm, zdn).Halve(vform);
3390       break;
3391     case "srhadd_z_p_zz"_h:
3392       add(vform, result, zdn, zm).Halve(vform).Round(vform);
3393       break;
3394     case "uhadd_z_p_zz"_h:
3395       add(vform, result, zdn, zm).Uhalve(vform);
3396       break;
3397     case "uhsub_z_p_zz"_h:
3398       sub(vform, result, zdn, zm).Uhalve(vform);
3399       break;
3400     case "uhsubr_z_p_zz"_h:
3401       sub(vform, result, zm, zdn).Uhalve(vform);
3402       break;
3403     case "urhadd_z_p_zz"_h:
3404       add(vform, result, zdn, zm).Uhalve(vform).Round(vform);
3405       break;
3406     default:
3407       VIXL_UNIMPLEMENTED();
3408       break;
3409   }
3410   mov_merging(vform, zdn, pg, result);
3411 }
3412 
3413 void Simulator::SimulateSVESaturatingArithmetic(const Instruction* instr) {
3414   VectorFormat vform = instr->GetSVEVectorFormat();
3415   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3416   SimVRegister& zm = ReadVRegister(instr->GetRn());
3417   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3418   SimVRegister result;
3419 
3420   switch (form_hash_) {
3421     case "sqadd_z_p_zz"_h:
3422       add(vform, result, zdn, zm).SignedSaturate(vform);
3423       break;
3424     case "sqsub_z_p_zz"_h:
3425       sub(vform, result, zdn, zm).SignedSaturate(vform);
3426       break;
3427     case "sqsubr_z_p_zz"_h:
3428       sub(vform, result, zm, zdn).SignedSaturate(vform);
3429       break;
3430     case "suqadd_z_p_zz"_h:
3431       suqadd(vform, result, zdn, zm);
3432       break;
3433     case "uqadd_z_p_zz"_h:
3434       add(vform, result, zdn, zm).UnsignedSaturate(vform);
3435       break;
3436     case "uqsub_z_p_zz"_h:
3437       sub(vform, result, zdn, zm).UnsignedSaturate(vform);
3438       break;
3439     case "uqsubr_z_p_zz"_h:
3440       sub(vform, result, zm, zdn).UnsignedSaturate(vform);
3441       break;
3442     case "usqadd_z_p_zz"_h:
3443       usqadd(vform, result, zdn, zm);
3444       break;
3445     default:
3446       VIXL_UNIMPLEMENTED();
3447       break;
3448   }
3449   mov_merging(vform, zdn, pg, result);
3450 }
3451 
3452 void Simulator::SimulateSVEIntArithPair(const Instruction* instr) {
3453   VectorFormat vform = instr->GetSVEVectorFormat();
3454   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3455   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3456   SimVRegister& zm = ReadVRegister(instr->GetRn());
3457   SimVRegister result;
3458 
3459   switch (form_hash_) {
3460     case "addp_z_p_zz"_h:
3461       addp(vform, result, zdn, zm);
3462       break;
3463     case "smaxp_z_p_zz"_h:
3464       smaxp(vform, result, zdn, zm);
3465       break;
3466     case "sminp_z_p_zz"_h:
3467       sminp(vform, result, zdn, zm);
3468       break;
3469     case "umaxp_z_p_zz"_h:
3470       umaxp(vform, result, zdn, zm);
3471       break;
3472     case "uminp_z_p_zz"_h:
3473       uminp(vform, result, zdn, zm);
3474       break;
3475     default:
3476       VIXL_UNIMPLEMENTED();
3477       break;
3478   }
3479   mov_merging(vform, zdn, pg, result);
3480 }
3481 
3482 void Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr) {
3483   VectorFormat vform = instr->GetSVEVectorFormat();
3484   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3485   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3486   SimVRegister& zm = ReadVRegister(instr->GetRn());
3487   SimVRegister result;
3488 
3489   switch (form_hash_) {
3490     case "faddp_z_p_zz"_h:
3491       faddp(vform, result, zdn, zm);
3492       break;
3493     case "fmaxnmp_z_p_zz"_h:
3494       fmaxnmp(vform, result, zdn, zm);
3495       break;
3496     case "fmaxp_z_p_zz"_h:
3497       fmaxp(vform, result, zdn, zm);
3498       break;
3499     case "fminnmp_z_p_zz"_h:
3500       fminnmp(vform, result, zdn, zm);
3501       break;
3502     case "fminp_z_p_zz"_h:
3503       fminp(vform, result, zdn, zm);
3504       break;
3505     default:
3506       VIXL_UNIMPLEMENTED();
3507   }
3508   mov_merging(vform, zdn, pg, result);
3509 }
3510 
3511 void Simulator::Simulate_ZdnT_PgM_ZdnT_const(const Instruction* instr) {
3512   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3513   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3514 
3515   std::pair<int, int> shift_and_lane_size =
3516       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true);
3517   unsigned lane_size = shift_and_lane_size.second;
3518   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
3519   int right_shift_dist = shift_and_lane_size.first;
3520   int left_shift_dist = (8 << lane_size) - right_shift_dist;
3521   SimVRegister result;
3522 
3523   switch (form_hash_) {
3524     case "sqshl_z_p_zi"_h:
3525       sqshl(vform, result, zdn, left_shift_dist);
3526       break;
3527     case "sqshlu_z_p_zi"_h:
3528       sqshlu(vform, result, zdn, left_shift_dist);
3529       break;
3530     case "srshr_z_p_zi"_h:
3531       sshr(vform, result, zdn, right_shift_dist).Round(vform);
3532       break;
3533     case "uqshl_z_p_zi"_h:
3534       uqshl(vform, result, zdn, left_shift_dist);
3535       break;
3536     case "urshr_z_p_zi"_h:
3537       ushr(vform, result, zdn, right_shift_dist).Round(vform);
3538       break;
3539     default:
3540       VIXL_UNIMPLEMENTED();
3541   }
3542   mov_merging(vform, zdn, pg, result);
3543 }
3544 
3545 void Simulator::SimulateSVEExclusiveOrRotate(const Instruction* instr) {
3546   VIXL_ASSERT(form_hash_ == "xar_z_zzi"_h);
3547 
3548   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3549   SimVRegister& zm = ReadVRegister(instr->GetRn());
3550 
3551   std::pair<int, int> shift_and_lane_size =
3552       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
3553   unsigned lane_size = shift_and_lane_size.second;
3554   VIXL_ASSERT(lane_size <= kDRegSizeInBytesLog2);
3555   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
3556   int shift_dist = shift_and_lane_size.first;
3557   eor(vform, zdn, zdn, zm);
3558   ror(vform, zdn, zdn, shift_dist);
3559 }
3560 
3561 void Simulator::Simulate_ZdnT_ZdnT_ZmT_const(const Instruction* instr) {
3562   VectorFormat vform = instr->GetSVEVectorFormat();
3563   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3564   SimVRegister& zm = ReadVRegister(instr->GetRn());
3565   int rot = (instr->ExtractBit(10) == 0) ? 90 : 270;
3566 
3567   switch (form_hash_) {
3568     case "cadd_z_zz"_h:
3569       cadd(vform, zdn, zdn, zm, rot);
3570       break;
3571     case "sqcadd_z_zz"_h:
3572       cadd(vform, zdn, zdn, zm, rot, /* saturate = */ true);
3573       break;
3574     default:
3575       VIXL_UNIMPLEMENTED();
3576   }
3577 }
3578 
3579 void Simulator::Simulate_ZtD_PgZ_ZnD_Xm(const Instruction* instr) {
3580   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3581   SimVRegister& zn = ReadVRegister(instr->GetRn());
3582   uint64_t xm = ReadXRegister(instr->GetRm());
3583 
3584   LogicSVEAddressVector addr(xm, &zn, kFormatVnD);
3585   int msize = -1;
3586   bool is_signed = false;
3587 
3588   switch (form_hash_) {
3589     case "ldnt1b_z_p_ar_d_64_unscaled"_h:
3590       msize = 0;
3591       break;
3592     case "ldnt1d_z_p_ar_d_64_unscaled"_h:
3593       msize = 3;
3594       break;
3595     case "ldnt1h_z_p_ar_d_64_unscaled"_h:
3596       msize = 1;
3597       break;
3598     case "ldnt1sb_z_p_ar_d_64_unscaled"_h:
3599       msize = 0;
3600       is_signed = true;
3601       break;
3602     case "ldnt1sh_z_p_ar_d_64_unscaled"_h:
3603       msize = 1;
3604       is_signed = true;
3605       break;
3606     case "ldnt1sw_z_p_ar_d_64_unscaled"_h:
3607       msize = 2;
3608       is_signed = true;
3609       break;
3610     case "ldnt1w_z_p_ar_d_64_unscaled"_h:
3611       msize = 2;
3612       break;
3613     default:
3614       VIXL_UNIMPLEMENTED();
3615   }
3616   addr.SetMsizeInBytesLog2(msize);
3617   SVEStructuredLoadHelper(kFormatVnD, pg, instr->GetRt(), addr, is_signed);
3618 }
3619 
3620 void Simulator::Simulate_ZtD_Pg_ZnD_Xm(const Instruction* instr) {
3621   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3622   SimVRegister& zn = ReadVRegister(instr->GetRn());
3623   uint64_t xm = ReadXRegister(instr->GetRm());
3624 
3625   LogicSVEAddressVector addr(xm, &zn, kFormatVnD);
3626   VIXL_ASSERT((form_hash_ == "stnt1b_z_p_ar_d_64_unscaled"_h) ||
3627               (form_hash_ == "stnt1d_z_p_ar_d_64_unscaled"_h) ||
3628               (form_hash_ == "stnt1h_z_p_ar_d_64_unscaled"_h) ||
3629               (form_hash_ == "stnt1w_z_p_ar_d_64_unscaled"_h));
3630 
3631   addr.SetMsizeInBytesLog2(
3632       instr->GetSVEMsizeFromDtype(/* is_signed = */ false));
3633   SVEStructuredStoreHelper(kFormatVnD, pg, instr->GetRt(), addr);
3634 }
3635 
3636 void Simulator::Simulate_ZtS_PgZ_ZnS_Xm(const Instruction* instr) {
3637   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3638   SimVRegister& zn = ReadVRegister(instr->GetRn());
3639   uint64_t xm = ReadXRegister(instr->GetRm());
3640 
3641   LogicSVEAddressVector addr(xm, &zn, kFormatVnS);
3642   int msize = -1;
3643   bool is_signed = false;
3644 
3645   switch (form_hash_) {
3646     case "ldnt1b_z_p_ar_s_x32_unscaled"_h:
3647       msize = 0;
3648       break;
3649     case "ldnt1h_z_p_ar_s_x32_unscaled"_h:
3650       msize = 1;
3651       break;
3652     case "ldnt1sb_z_p_ar_s_x32_unscaled"_h:
3653       msize = 0;
3654       is_signed = true;
3655       break;
3656     case "ldnt1sh_z_p_ar_s_x32_unscaled"_h:
3657       msize = 1;
3658       is_signed = true;
3659       break;
3660     case "ldnt1w_z_p_ar_s_x32_unscaled"_h:
3661       msize = 2;
3662       break;
3663     default:
3664       VIXL_UNIMPLEMENTED();
3665   }
3666   addr.SetMsizeInBytesLog2(msize);
3667   SVEStructuredLoadHelper(kFormatVnS, pg, instr->GetRt(), addr, is_signed);
3668 }
3669 
3670 void Simulator::Simulate_ZtS_Pg_ZnS_Xm(const Instruction* instr) {
3671   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3672   SimVRegister& zn = ReadVRegister(instr->GetRn());
3673   uint64_t xm = ReadXRegister(instr->GetRm());
3674 
3675   LogicSVEAddressVector addr(xm, &zn, kFormatVnS);
3676   VIXL_ASSERT((form_hash_ == "stnt1b_z_p_ar_s_x32_unscaled"_h) ||
3677               (form_hash_ == "stnt1h_z_p_ar_s_x32_unscaled"_h) ||
3678               (form_hash_ == "stnt1w_z_p_ar_s_x32_unscaled"_h));
3679 
3680   addr.SetMsizeInBytesLog2(
3681       instr->GetSVEMsizeFromDtype(/* is_signed = */ false));
3682   SVEStructuredStoreHelper(kFormatVnS, pg, instr->GetRt(), addr);
3683 }
3684 
3685 void Simulator::VisitReserved(const Instruction* instr) {
3686   // UDF is the only instruction in this group, and the Decoder is precise here.
3687   VIXL_ASSERT(instr->Mask(ReservedMask) == UDF);
3688 
3689   printf("UDF (permanently undefined) instruction at %p: 0x%08" PRIx32 "\n",
3690          reinterpret_cast<const void*>(instr),
3691          instr->GetInstructionBits());
3692   VIXL_ABORT_WITH_MSG("UNDEFINED (UDF)\n");
3693 }
3694 
3695 
3696 void Simulator::VisitUnimplemented(const Instruction* instr) {
3697   printf("Unimplemented instruction at %p: 0x%08" PRIx32 "\n",
3698          reinterpret_cast<const void*>(instr),
3699          instr->GetInstructionBits());
3700   VIXL_UNIMPLEMENTED();
3701 }
3702 
3703 
3704 void Simulator::VisitUnallocated(const Instruction* instr) {
3705   printf("Unallocated instruction at %p: 0x%08" PRIx32 "\n",
3706          reinterpret_cast<const void*>(instr),
3707          instr->GetInstructionBits());
3708   VIXL_UNIMPLEMENTED();
3709 }
3710 
3711 
3712 void Simulator::VisitPCRelAddressing(const Instruction* instr) {
3713   VIXL_ASSERT((instr->Mask(PCRelAddressingMask) == ADR) ||
3714               (instr->Mask(PCRelAddressingMask) == ADRP));
3715 
3716   WriteRegister(instr->GetRd(), instr->GetImmPCOffsetTarget());
3717 }
3718 
3719 
3720 void Simulator::VisitUnconditionalBranch(const Instruction* instr) {
3721   switch (instr->Mask(UnconditionalBranchMask)) {
3722     case BL:
3723       WriteLr(instr->GetNextInstruction());
3724       VIXL_FALLTHROUGH();
3725     case B:
3726       WritePc(instr->GetImmPCOffsetTarget());
3727       break;
3728     default:
3729       VIXL_UNREACHABLE();
3730   }
3731 }
3732 
3733 
3734 void Simulator::VisitConditionalBranch(const Instruction* instr) {
3735   VIXL_ASSERT(instr->Mask(ConditionalBranchMask) == B_cond);
3736   if (ConditionPassed(instr->GetConditionBranch())) {
3737     WritePc(instr->GetImmPCOffsetTarget());
3738   }
3739 }
3740 
3741 BType Simulator::GetBTypeFromInstruction(const Instruction* instr) const {
3742   switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
3743     case BLR:
3744     case BLRAA:
3745     case BLRAB:
3746     case BLRAAZ:
3747     case BLRABZ:
3748       return BranchAndLink;
3749     case BR:
3750     case BRAA:
3751     case BRAB:
3752     case BRAAZ:
3753     case BRABZ:
3754       if ((instr->GetRn() == 16) || (instr->GetRn() == 17) ||
3755           !PcIsInGuardedPage()) {
3756         return BranchFromUnguardedOrToIP;
3757       }
3758       return BranchFromGuardedNotToIP;
3759   }
3760   return DefaultBType;
3761 }
3762 
3763 void Simulator::VisitUnconditionalBranchToRegister(const Instruction* instr) {
3764   bool authenticate = false;
3765   bool link = false;
3766   bool ret = false;
3767   uint64_t addr = ReadXRegister(instr->GetRn());
3768   uint64_t context = 0;
3769 
3770   switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
3771     case BLR:
3772       link = true;
3773       VIXL_FALLTHROUGH();
3774     case BR:
3775       break;
3776 
3777     case BLRAAZ:
3778     case BLRABZ:
3779       link = true;
3780       VIXL_FALLTHROUGH();
3781     case BRAAZ:
3782     case BRABZ:
3783       authenticate = true;
3784       break;
3785 
3786     case BLRAA:
3787     case BLRAB:
3788       link = true;
3789       VIXL_FALLTHROUGH();
3790     case BRAA:
3791     case BRAB:
3792       authenticate = true;
3793       context = ReadXRegister(instr->GetRd());
3794       break;
3795 
3796     case RETAA:
3797     case RETAB:
3798       authenticate = true;
3799       addr = ReadXRegister(kLinkRegCode);
3800       context = ReadXRegister(31, Reg31IsStackPointer);
3801       VIXL_FALLTHROUGH();
3802     case RET:
3803       ret = true;
3804       break;
3805     default:
3806       VIXL_UNREACHABLE();
3807   }
3808 
3809   if (link) {
3810     WriteLr(instr->GetNextInstruction());
3811   }
3812 
3813   if (authenticate) {
3814     PACKey key = (instr->ExtractBit(10) == 0) ? kPACKeyIA : kPACKeyIB;
3815     addr = AuthPAC(addr, context, key, kInstructionPointer);
3816 
3817     int error_lsb = GetTopPACBit(addr, kInstructionPointer) - 2;
3818     if (((addr >> error_lsb) & 0x3) != 0x0) {
3819       VIXL_ABORT_WITH_MSG("Failed to authenticate pointer.");
3820     }
3821   }
3822 
3823   if (!ret) {
3824     // Check for interceptions to the target address, if one is found, call it.
3825     MetaDataDepot::BranchInterceptionAbstract* interception =
3826         meta_data_.FindBranchInterception(addr);
3827 
3828     if (interception != nullptr) {
3829       // Instead of writing the address of the function to the PC, call the
3830       // function's interception directly. We change the address that will be
3831       // branched to so that afterwards we continue execution from
3832       // the address in the LR. Note: the interception may modify the LR so
3833       // store it before calling the interception.
3834       addr = ReadRegister<uint64_t>(kLinkRegCode);
3835       (*interception)(this);
3836     }
3837   }
3838 
3839   WriteNextBType(GetBTypeFromInstruction(instr));
3840   WritePc(Instruction::Cast(addr));
3841 }
3842 
3843 
3844 void Simulator::VisitTestBranch(const Instruction* instr) {
3845   unsigned bit_pos =
3846       (instr->GetImmTestBranchBit5() << 5) | instr->GetImmTestBranchBit40();
3847   bool bit_zero = ((ReadXRegister(instr->GetRt()) >> bit_pos) & 1) == 0;
3848   bool take_branch = false;
3849   switch (instr->Mask(TestBranchMask)) {
3850     case TBZ:
3851       take_branch = bit_zero;
3852       break;
3853     case TBNZ:
3854       take_branch = !bit_zero;
3855       break;
3856     default:
3857       VIXL_UNIMPLEMENTED();
3858   }
3859   if (take_branch) {
3860     WritePc(instr->GetImmPCOffsetTarget());
3861   }
3862 }
3863 
3864 
3865 void Simulator::VisitCompareBranch(const Instruction* instr) {
3866   unsigned rt = instr->GetRt();
3867   bool take_branch = false;
3868   switch (instr->Mask(CompareBranchMask)) {
3869     case CBZ_w:
3870       take_branch = (ReadWRegister(rt) == 0);
3871       break;
3872     case CBZ_x:
3873       take_branch = (ReadXRegister(rt) == 0);
3874       break;
3875     case CBNZ_w:
3876       take_branch = (ReadWRegister(rt) != 0);
3877       break;
3878     case CBNZ_x:
3879       take_branch = (ReadXRegister(rt) != 0);
3880       break;
3881     default:
3882       VIXL_UNIMPLEMENTED();
3883   }
3884   if (take_branch) {
3885     WritePc(instr->GetImmPCOffsetTarget());
3886   }
3887 }
3888 
3889 
3890 void Simulator::AddSubHelper(const Instruction* instr, int64_t op2) {
3891   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3892   bool set_flags = instr->GetFlagsUpdate();
3893   int64_t new_val = 0;
3894   Instr operation = instr->Mask(AddSubOpMask);
3895 
3896   switch (operation) {
3897     case ADD:
3898     case ADDS: {
3899       new_val = AddWithCarry(reg_size,
3900                              set_flags,
3901                              ReadRegister(reg_size,
3902                                           instr->GetRn(),
3903                                           instr->GetRnMode()),
3904                              op2);
3905       break;
3906     }
3907     case SUB:
3908     case SUBS: {
3909       new_val = AddWithCarry(reg_size,
3910                              set_flags,
3911                              ReadRegister(reg_size,
3912                                           instr->GetRn(),
3913                                           instr->GetRnMode()),
3914                              ~op2,
3915                              1);
3916       break;
3917     }
3918     default:
3919       VIXL_UNREACHABLE();
3920   }
3921 
3922   WriteRegister(reg_size,
3923                 instr->GetRd(),
3924                 new_val,
3925                 LogRegWrites,
3926                 instr->GetRdMode());
3927 }
3928 
3929 
3930 void Simulator::VisitAddSubShifted(const Instruction* instr) {
3931   // Add/sub/adds/subs don't allow ROR as a shift mode.
3932   VIXL_ASSERT(instr->GetShiftDP() != ROR);
3933 
3934   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3935   int64_t op2 = ShiftOperand(reg_size,
3936                              ReadRegister(reg_size, instr->GetRm()),
3937                              static_cast<Shift>(instr->GetShiftDP()),
3938                              instr->GetImmDPShift());
3939   AddSubHelper(instr, op2);
3940 }
3941 
3942 
3943 void Simulator::VisitAddSubImmediate(const Instruction* instr) {
3944   int64_t op2 = instr->GetImmAddSub()
3945                 << ((instr->GetImmAddSubShift() == 1) ? 12 : 0);
3946   AddSubHelper(instr, op2);
3947 }
3948 
3949 
3950 void Simulator::VisitAddSubExtended(const Instruction* instr) {
3951   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3952   int64_t op2 = ExtendValue(reg_size,
3953                             ReadRegister(reg_size, instr->GetRm()),
3954                             static_cast<Extend>(instr->GetExtendMode()),
3955                             instr->GetImmExtendShift());
3956   AddSubHelper(instr, op2);
3957 }
3958 
3959 
3960 void Simulator::VisitAddSubWithCarry(const Instruction* instr) {
3961   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3962   int64_t op2 = ReadRegister(reg_size, instr->GetRm());
3963   int64_t new_val;
3964 
3965   if ((instr->Mask(AddSubOpMask) == SUB) ||
3966       (instr->Mask(AddSubOpMask) == SUBS)) {
3967     op2 = ~op2;
3968   }
3969 
3970   new_val = AddWithCarry(reg_size,
3971                          instr->GetFlagsUpdate(),
3972                          ReadRegister(reg_size, instr->GetRn()),
3973                          op2,
3974                          ReadC());
3975 
3976   WriteRegister(reg_size, instr->GetRd(), new_val);
3977 }
3978 
3979 
3980 void Simulator::VisitRotateRightIntoFlags(const Instruction* instr) {
3981   switch (instr->Mask(RotateRightIntoFlagsMask)) {
3982     case RMIF: {
3983       uint64_t value = ReadRegister<uint64_t>(instr->GetRn());
3984       unsigned shift = instr->GetImmRMIFRotation();
3985       unsigned mask = instr->GetNzcv();
3986       uint64_t rotated = RotateRight(value, shift, kXRegSize);
3987 
3988       ReadNzcv().SetFlags((rotated & mask) | (ReadNzcv().GetFlags() & ~mask));
3989       break;
3990     }
3991   }
3992 }
3993 
3994 
3995 void Simulator::VisitEvaluateIntoFlags(const Instruction* instr) {
3996   uint32_t value = ReadRegister<uint32_t>(instr->GetRn());
3997   unsigned msb = (instr->Mask(EvaluateIntoFlagsMask) == SETF16) ? 15 : 7;
3998 
3999   unsigned sign_bit = (value >> msb) & 1;
4000   unsigned overflow_bit = (value >> (msb + 1)) & 1;
4001   ReadNzcv().SetN(sign_bit);
4002   ReadNzcv().SetZ((value << (31 - msb)) == 0);
4003   ReadNzcv().SetV(sign_bit ^ overflow_bit);
4004 }
4005 
4006 
4007 void Simulator::VisitLogicalShifted(const Instruction* instr) {
4008   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
4009   Shift shift_type = static_cast<Shift>(instr->GetShiftDP());
4010   unsigned shift_amount = instr->GetImmDPShift();
4011   int64_t op2 = ShiftOperand(reg_size,
4012                              ReadRegister(reg_size, instr->GetRm()),
4013                              shift_type,
4014                              shift_amount);
4015   if (instr->Mask(NOT) == NOT) {
4016     op2 = ~op2;
4017   }
4018   LogicalHelper(instr, op2);
4019 }
4020 
4021 
4022 void Simulator::VisitLogicalImmediate(const Instruction* instr) {
4023   if (instr->GetImmLogical() == 0) {
4024     VIXL_UNIMPLEMENTED();
4025   } else {
4026     LogicalHelper(instr, instr->GetImmLogical());
4027   }
4028 }
4029 
4030 
4031 void Simulator::LogicalHelper(const Instruction* instr, int64_t op2) {
4032   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
4033   int64_t op1 = ReadRegister(reg_size, instr->GetRn());
4034   int64_t result = 0;
4035   bool update_flags = false;
4036 
4037   // Switch on the logical operation, stripping out the NOT bit, as it has a
4038   // different meaning for logical immediate instructions.
4039   switch (instr->Mask(LogicalOpMask & ~NOT)) {
4040     case ANDS:
4041       update_flags = true;
4042       VIXL_FALLTHROUGH();
4043     case AND:
4044       result = op1 & op2;
4045       break;
4046     case ORR:
4047       result = op1 | op2;
4048       break;
4049     case EOR:
4050       result = op1 ^ op2;
4051       break;
4052     default:
4053       VIXL_UNIMPLEMENTED();
4054   }
4055 
4056   if (update_flags) {
4057     ReadNzcv().SetN(CalcNFlag(result, reg_size));
4058     ReadNzcv().SetZ(CalcZFlag(result));
4059     ReadNzcv().SetC(0);
4060     ReadNzcv().SetV(0);
4061     LogSystemRegister(NZCV);
4062   }
4063 
4064   WriteRegister(reg_size,
4065                 instr->GetRd(),
4066                 result,
4067                 LogRegWrites,
4068                 instr->GetRdMode());
4069 }
4070 
4071 
4072 void Simulator::VisitConditionalCompareRegister(const Instruction* instr) {
4073   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
4074   ConditionalCompareHelper(instr, ReadRegister(reg_size, instr->GetRm()));
4075 }
4076 
4077 
4078 void Simulator::VisitConditionalCompareImmediate(const Instruction* instr) {
4079   ConditionalCompareHelper(instr, instr->GetImmCondCmp());
4080 }
4081 
4082 
4083 void Simulator::ConditionalCompareHelper(const Instruction* instr,
4084                                          int64_t op2) {
4085   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
4086   int64_t op1 = ReadRegister(reg_size, instr->GetRn());
4087 
4088   if (ConditionPassed(instr->GetCondition())) {
4089     // If the condition passes, set the status flags to the result of comparing
4090     // the operands.
4091     if (instr->Mask(ConditionalCompareMask) == CCMP) {
4092       AddWithCarry(reg_size, true, op1, ~op2, 1);
4093     } else {
4094       VIXL_ASSERT(instr->Mask(ConditionalCompareMask) == CCMN);
4095       AddWithCarry(reg_size, true, op1, op2, 0);
4096     }
4097   } else {
4098     // If the condition fails, set the status flags to the nzcv immediate.
4099     ReadNzcv().SetFlags(instr->GetNzcv());
4100     LogSystemRegister(NZCV);
4101   }
4102 }
4103 
4104 
4105 void Simulator::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
4106   int offset = instr->GetImmLSUnsigned() << instr->GetSizeLS();
4107   LoadStoreHelper(instr, offset, Offset);
4108 }
4109 
4110 
4111 void Simulator::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
4112   LoadStoreHelper(instr, instr->GetImmLS(), Offset);
4113 }
4114 
4115 
4116 void Simulator::VisitLoadStorePreIndex(const Instruction* instr) {
4117   LoadStoreHelper(instr, instr->GetImmLS(), PreIndex);
4118 }
4119 
4120 
4121 void Simulator::VisitLoadStorePostIndex(const Instruction* instr) {
4122   LoadStoreHelper(instr, instr->GetImmLS(), PostIndex);
4123 }
4124 
4125 
4126 template <typename T1, typename T2>
4127 void Simulator::LoadAcquireRCpcUnscaledOffsetHelper(const Instruction* instr) {
4128   unsigned rt = instr->GetRt();
4129   unsigned rn = instr->GetRn();
4130 
4131   unsigned element_size = sizeof(T2);
4132   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4133   int offset = instr->GetImmLS();
4134   address += offset;
4135 
4136   // Verify that the address is available to the host.
4137   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
4138 
4139   // Check the alignment of `address`.
4140   if (AlignDown(address, 16) != AlignDown(address + element_size - 1, 16)) {
4141     VIXL_ALIGNMENT_EXCEPTION();
4142   }
4143 
4144   WriteRegister<T1>(rt, static_cast<T1>(MemRead<T2>(address)));
4145 
4146   // Approximate load-acquire by issuing a full barrier after the load.
4147   __sync_synchronize();
4148 
4149   LogRead(rt, GetPrintRegisterFormat(element_size), address);
4150 }
4151 
4152 
4153 template <typename T>
4154 void Simulator::StoreReleaseUnscaledOffsetHelper(const Instruction* instr) {
4155   unsigned rt = instr->GetRt();
4156   unsigned rn = instr->GetRn();
4157 
4158   unsigned element_size = sizeof(T);
4159   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4160   int offset = instr->GetImmLS();
4161   address += offset;
4162 
4163   // Verify that the address is available to the host.
4164   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
4165 
4166   // Check the alignment of `address`.
4167   if (AlignDown(address, 16) != AlignDown(address + element_size - 1, 16)) {
4168     VIXL_ALIGNMENT_EXCEPTION();
4169   }
4170 
4171   // Approximate store-release by issuing a full barrier after the load.
4172   __sync_synchronize();
4173 
4174   MemWrite<T>(address, ReadRegister<T>(rt));
4175 
4176   LogWrite(rt, GetPrintRegisterFormat(element_size), address);
4177 }
4178 
4179 
4180 void Simulator::VisitLoadStoreRCpcUnscaledOffset(const Instruction* instr) {
4181   switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) {
4182     case LDAPURB:
4183       LoadAcquireRCpcUnscaledOffsetHelper<uint8_t, uint8_t>(instr);
4184       break;
4185     case LDAPURH:
4186       LoadAcquireRCpcUnscaledOffsetHelper<uint16_t, uint16_t>(instr);
4187       break;
4188     case LDAPUR_w:
4189       LoadAcquireRCpcUnscaledOffsetHelper<uint32_t, uint32_t>(instr);
4190       break;
4191     case LDAPUR_x:
4192       LoadAcquireRCpcUnscaledOffsetHelper<uint64_t, uint64_t>(instr);
4193       break;
4194     case LDAPURSB_w:
4195       LoadAcquireRCpcUnscaledOffsetHelper<int32_t, int8_t>(instr);
4196       break;
4197     case LDAPURSB_x:
4198       LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int8_t>(instr);
4199       break;
4200     case LDAPURSH_w:
4201       LoadAcquireRCpcUnscaledOffsetHelper<int32_t, int16_t>(instr);
4202       break;
4203     case LDAPURSH_x:
4204       LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int16_t>(instr);
4205       break;
4206     case LDAPURSW:
4207       LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int32_t>(instr);
4208       break;
4209     case STLURB:
4210       StoreReleaseUnscaledOffsetHelper<uint8_t>(instr);
4211       break;
4212     case STLURH:
4213       StoreReleaseUnscaledOffsetHelper<uint16_t>(instr);
4214       break;
4215     case STLUR_w:
4216       StoreReleaseUnscaledOffsetHelper<uint32_t>(instr);
4217       break;
4218     case STLUR_x:
4219       StoreReleaseUnscaledOffsetHelper<uint64_t>(instr);
4220       break;
4221   }
4222 }
4223 
4224 
4225 void Simulator::VisitLoadStorePAC(const Instruction* instr) {
4226   unsigned dst = instr->GetRt();
4227   unsigned addr_reg = instr->GetRn();
4228 
4229   uint64_t address = ReadXRegister(addr_reg, Reg31IsStackPointer);
4230 
4231   PACKey key = (instr->ExtractBit(23) == 0) ? kPACKeyDA : kPACKeyDB;
4232   address = AuthPAC(address, 0, key, kDataPointer);
4233 
4234   int error_lsb = GetTopPACBit(address, kInstructionPointer) - 2;
4235   if (((address >> error_lsb) & 0x3) != 0x0) {
4236     VIXL_ABORT_WITH_MSG("Failed to authenticate pointer.");
4237   }
4238 
4239 
4240   if ((addr_reg == 31) && ((address % 16) != 0)) {
4241     // When the base register is SP the stack pointer is required to be
4242     // quadword aligned prior to the address calculation and write-backs.
4243     // Misalignment will cause a stack alignment fault.
4244     VIXL_ALIGNMENT_EXCEPTION();
4245   }
4246 
4247   int64_t offset = instr->GetImmLSPAC();
4248   address += offset;
4249 
4250   if (instr->Mask(LoadStorePACPreBit) == LoadStorePACPreBit) {
4251     // Pre-index mode.
4252     VIXL_ASSERT(offset != 0);
4253     WriteXRegister(addr_reg, address, LogRegWrites, Reg31IsStackPointer);
4254   }
4255 
4256   uintptr_t addr_ptr = static_cast<uintptr_t>(address);
4257 
4258   // Verify that the calculated address is available to the host.
4259   VIXL_ASSERT(address == addr_ptr);
4260 
4261   WriteXRegister(dst, MemRead<uint64_t>(addr_ptr), NoRegLog);
4262   unsigned access_size = 1 << 3;
4263   LogRead(dst, GetPrintRegisterFormatForSize(access_size), addr_ptr);
4264 }
4265 
4266 
4267 void Simulator::VisitLoadStoreRegisterOffset(const Instruction* instr) {
4268   Extend ext = static_cast<Extend>(instr->GetExtendMode());
4269   VIXL_ASSERT((ext == UXTW) || (ext == UXTX) || (ext == SXTW) || (ext == SXTX));
4270   unsigned shift_amount = instr->GetImmShiftLS() * instr->GetSizeLS();
4271 
4272   int64_t offset =
4273       ExtendValue(kXRegSize, ReadXRegister(instr->GetRm()), ext, shift_amount);
4274   LoadStoreHelper(instr, offset, Offset);
4275 }
4276 
4277 
4278 void Simulator::LoadStoreHelper(const Instruction* instr,
4279                                 int64_t offset,
4280                                 AddrMode addrmode) {
4281   unsigned srcdst = instr->GetRt();
4282   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addrmode);
4283 
4284   bool rt_is_vreg = false;
4285   int extend_to_size = 0;
4286   LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask));
4287   switch (op) {
4288     case LDRB_w:
4289       WriteWRegister(srcdst, MemRead<uint8_t>(address), NoRegLog);
4290       extend_to_size = kWRegSizeInBytes;
4291       break;
4292     case LDRH_w:
4293       WriteWRegister(srcdst, MemRead<uint16_t>(address), NoRegLog);
4294       extend_to_size = kWRegSizeInBytes;
4295       break;
4296     case LDR_w:
4297       WriteWRegister(srcdst, MemRead<uint32_t>(address), NoRegLog);
4298       extend_to_size = kWRegSizeInBytes;
4299       break;
4300     case LDR_x:
4301       WriteXRegister(srcdst, MemRead<uint64_t>(address), NoRegLog);
4302       extend_to_size = kXRegSizeInBytes;
4303       break;
4304     case LDRSB_w:
4305       WriteWRegister(srcdst, MemRead<int8_t>(address), NoRegLog);
4306       extend_to_size = kWRegSizeInBytes;
4307       break;
4308     case LDRSH_w:
4309       WriteWRegister(srcdst, MemRead<int16_t>(address), NoRegLog);
4310       extend_to_size = kWRegSizeInBytes;
4311       break;
4312     case LDRSB_x:
4313       WriteXRegister(srcdst, MemRead<int8_t>(address), NoRegLog);
4314       extend_to_size = kXRegSizeInBytes;
4315       break;
4316     case LDRSH_x:
4317       WriteXRegister(srcdst, MemRead<int16_t>(address), NoRegLog);
4318       extend_to_size = kXRegSizeInBytes;
4319       break;
4320     case LDRSW_x:
4321       WriteXRegister(srcdst, MemRead<int32_t>(address), NoRegLog);
4322       extend_to_size = kXRegSizeInBytes;
4323       break;
4324     case LDR_b:
4325       WriteBRegister(srcdst, MemRead<uint8_t>(address), NoRegLog);
4326       rt_is_vreg = true;
4327       break;
4328     case LDR_h:
4329       WriteHRegister(srcdst, MemRead<uint16_t>(address), NoRegLog);
4330       rt_is_vreg = true;
4331       break;
4332     case LDR_s:
4333       WriteSRegister(srcdst, MemRead<float>(address), NoRegLog);
4334       rt_is_vreg = true;
4335       break;
4336     case LDR_d:
4337       WriteDRegister(srcdst, MemRead<double>(address), NoRegLog);
4338       rt_is_vreg = true;
4339       break;
4340     case LDR_q:
4341       WriteQRegister(srcdst, MemRead<qreg_t>(address), NoRegLog);
4342       rt_is_vreg = true;
4343       break;
4344 
4345     case STRB_w:
4346       MemWrite<uint8_t>(address, ReadWRegister(srcdst));
4347       break;
4348     case STRH_w:
4349       MemWrite<uint16_t>(address, ReadWRegister(srcdst));
4350       break;
4351     case STR_w:
4352       MemWrite<uint32_t>(address, ReadWRegister(srcdst));
4353       break;
4354     case STR_x:
4355       MemWrite<uint64_t>(address, ReadXRegister(srcdst));
4356       break;
4357     case STR_b:
4358       MemWrite<uint8_t>(address, ReadBRegister(srcdst));
4359       rt_is_vreg = true;
4360       break;
4361     case STR_h:
4362       MemWrite<uint16_t>(address, ReadHRegisterBits(srcdst));
4363       rt_is_vreg = true;
4364       break;
4365     case STR_s:
4366       MemWrite<float>(address, ReadSRegister(srcdst));
4367       rt_is_vreg = true;
4368       break;
4369     case STR_d:
4370       MemWrite<double>(address, ReadDRegister(srcdst));
4371       rt_is_vreg = true;
4372       break;
4373     case STR_q:
4374       MemWrite<qreg_t>(address, ReadQRegister(srcdst));
4375       rt_is_vreg = true;
4376       break;
4377 
4378     // Ignore prfm hint instructions.
4379     case PRFM:
4380       break;
4381 
4382     default:
4383       VIXL_UNIMPLEMENTED();
4384   }
4385 
4386   // Print a detailed trace (including the memory address).
4387   bool extend = (extend_to_size != 0);
4388   unsigned access_size = 1 << instr->GetSizeLS();
4389   unsigned result_size = extend ? extend_to_size : access_size;
4390   PrintRegisterFormat print_format =
4391       rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size)
4392                  : GetPrintRegisterFormatForSize(result_size);
4393 
4394   if (instr->IsLoad()) {
4395     if (rt_is_vreg) {
4396       LogVRead(srcdst, print_format, address);
4397     } else {
4398       LogExtendingRead(srcdst, print_format, access_size, address);
4399     }
4400   } else if (instr->IsStore()) {
4401     if (rt_is_vreg) {
4402       LogVWrite(srcdst, print_format, address);
4403     } else {
4404       LogWrite(srcdst, GetPrintRegisterFormatForSize(result_size), address);
4405     }
4406   } else {
4407     VIXL_ASSERT(op == PRFM);
4408   }
4409 
4410   local_monitor_.MaybeClear();
4411 }
4412 
4413 
4414 void Simulator::VisitLoadStorePairOffset(const Instruction* instr) {
4415   LoadStorePairHelper(instr, Offset);
4416 }
4417 
4418 
4419 void Simulator::VisitLoadStorePairPreIndex(const Instruction* instr) {
4420   LoadStorePairHelper(instr, PreIndex);
4421 }
4422 
4423 
4424 void Simulator::VisitLoadStorePairPostIndex(const Instruction* instr) {
4425   LoadStorePairHelper(instr, PostIndex);
4426 }
4427 
4428 
4429 void Simulator::VisitLoadStorePairNonTemporal(const Instruction* instr) {
4430   LoadStorePairHelper(instr, Offset);
4431 }
4432 
4433 
4434 void Simulator::LoadStorePairHelper(const Instruction* instr,
4435                                     AddrMode addrmode) {
4436   unsigned rt = instr->GetRt();
4437   unsigned rt2 = instr->GetRt2();
4438   int element_size = 1 << instr->GetSizeLSPair();
4439   int64_t offset = instr->GetImmLSPair() * element_size;
4440   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addrmode);
4441   uintptr_t address2 = address + element_size;
4442 
4443   LoadStorePairOp op =
4444       static_cast<LoadStorePairOp>(instr->Mask(LoadStorePairMask));
4445 
4446   // 'rt' and 'rt2' can only be aliased for stores.
4447   VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || (rt != rt2));
4448 
4449   bool rt_is_vreg = false;
4450   bool sign_extend = false;
4451   switch (op) {
4452     // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We
4453     // will print a more detailed log.
4454     case LDP_w: {
4455       WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
4456       WriteWRegister(rt2, MemRead<uint32_t>(address2), NoRegLog);
4457       break;
4458     }
4459     case LDP_s: {
4460       WriteSRegister(rt, MemRead<float>(address), NoRegLog);
4461       WriteSRegister(rt2, MemRead<float>(address2), NoRegLog);
4462       rt_is_vreg = true;
4463       break;
4464     }
4465     case LDP_x: {
4466       WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
4467       WriteXRegister(rt2, MemRead<uint64_t>(address2), NoRegLog);
4468       break;
4469     }
4470     case LDP_d: {
4471       WriteDRegister(rt, MemRead<double>(address), NoRegLog);
4472       WriteDRegister(rt2, MemRead<double>(address2), NoRegLog);
4473       rt_is_vreg = true;
4474       break;
4475     }
4476     case LDP_q: {
4477       WriteQRegister(rt, MemRead<qreg_t>(address), NoRegLog);
4478       WriteQRegister(rt2, MemRead<qreg_t>(address2), NoRegLog);
4479       rt_is_vreg = true;
4480       break;
4481     }
4482     case LDPSW_x: {
4483       WriteXRegister(rt, MemRead<int32_t>(address), NoRegLog);
4484       WriteXRegister(rt2, MemRead<int32_t>(address2), NoRegLog);
4485       sign_extend = true;
4486       break;
4487     }
4488     case STP_w: {
4489       MemWrite<uint32_t>(address, ReadWRegister(rt));
4490       MemWrite<uint32_t>(address2, ReadWRegister(rt2));
4491       break;
4492     }
4493     case STP_s: {
4494       MemWrite<float>(address, ReadSRegister(rt));
4495       MemWrite<float>(address2, ReadSRegister(rt2));
4496       rt_is_vreg = true;
4497       break;
4498     }
4499     case STP_x: {
4500       MemWrite<uint64_t>(address, ReadXRegister(rt));
4501       MemWrite<uint64_t>(address2, ReadXRegister(rt2));
4502       break;
4503     }
4504     case STP_d: {
4505       MemWrite<double>(address, ReadDRegister(rt));
4506       MemWrite<double>(address2, ReadDRegister(rt2));
4507       rt_is_vreg = true;
4508       break;
4509     }
4510     case STP_q: {
4511       MemWrite<qreg_t>(address, ReadQRegister(rt));
4512       MemWrite<qreg_t>(address2, ReadQRegister(rt2));
4513       rt_is_vreg = true;
4514       break;
4515     }
4516     default:
4517       VIXL_UNREACHABLE();
4518   }
4519 
4520   // Print a detailed trace (including the memory address).
4521   unsigned result_size = sign_extend ? kXRegSizeInBytes : element_size;
4522   PrintRegisterFormat print_format =
4523       rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size)
4524                  : GetPrintRegisterFormatForSize(result_size);
4525 
4526   if (instr->IsLoad()) {
4527     if (rt_is_vreg) {
4528       LogVRead(rt, print_format, address);
4529       LogVRead(rt2, print_format, address2);
4530     } else if (sign_extend) {
4531       LogExtendingRead(rt, print_format, element_size, address);
4532       LogExtendingRead(rt2, print_format, element_size, address2);
4533     } else {
4534       LogRead(rt, print_format, address);
4535       LogRead(rt2, print_format, address2);
4536     }
4537   } else {
4538     if (rt_is_vreg) {
4539       LogVWrite(rt, print_format, address);
4540       LogVWrite(rt2, print_format, address2);
4541     } else {
4542       LogWrite(rt, print_format, address);
4543       LogWrite(rt2, print_format, address2);
4544     }
4545   }
4546 
4547   local_monitor_.MaybeClear();
4548 }
4549 
4550 
4551 template <typename T>
4552 void Simulator::CompareAndSwapHelper(const Instruction* instr) {
4553   unsigned rs = instr->GetRs();
4554   unsigned rt = instr->GetRt();
4555   unsigned rn = instr->GetRn();
4556 
4557   unsigned element_size = sizeof(T);
4558   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4559 
4560   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
4561 
4562   bool is_acquire = instr->ExtractBit(22) == 1;
4563   bool is_release = instr->ExtractBit(15) == 1;
4564 
4565   T comparevalue = ReadRegister<T>(rs);
4566   T newvalue = ReadRegister<T>(rt);
4567 
4568   // The architecture permits that the data read clears any exclusive monitors
4569   // associated with that location, even if the compare subsequently fails.
4570   local_monitor_.Clear();
4571 
4572   T data = MemRead<T>(address);
4573   if (is_acquire) {
4574     // Approximate load-acquire by issuing a full barrier after the load.
4575     __sync_synchronize();
4576   }
4577 
4578   if (data == comparevalue) {
4579     if (is_release) {
4580       // Approximate store-release by issuing a full barrier before the store.
4581       __sync_synchronize();
4582     }
4583     MemWrite<T>(address, newvalue);
4584     LogWrite(rt, GetPrintRegisterFormatForSize(element_size), address);
4585   }
4586   WriteRegister<T>(rs, data, NoRegLog);
4587   LogRead(rs, GetPrintRegisterFormatForSize(element_size), address);
4588 }
4589 
4590 
4591 template <typename T>
4592 void Simulator::CompareAndSwapPairHelper(const Instruction* instr) {
4593   VIXL_ASSERT((sizeof(T) == 4) || (sizeof(T) == 8));
4594   unsigned rs = instr->GetRs();
4595   unsigned rt = instr->GetRt();
4596   unsigned rn = instr->GetRn();
4597 
4598   VIXL_ASSERT((rs % 2 == 0) && (rt % 2 == 0));
4599 
4600   unsigned element_size = sizeof(T);
4601   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4602 
4603   CheckIsValidUnalignedAtomicAccess(rn, address, element_size * 2);
4604 
4605   uint64_t address2 = address + element_size;
4606 
4607   bool is_acquire = instr->ExtractBit(22) == 1;
4608   bool is_release = instr->ExtractBit(15) == 1;
4609 
4610   T comparevalue_high = ReadRegister<T>(rs + 1);
4611   T comparevalue_low = ReadRegister<T>(rs);
4612   T newvalue_high = ReadRegister<T>(rt + 1);
4613   T newvalue_low = ReadRegister<T>(rt);
4614 
4615   // The architecture permits that the data read clears any exclusive monitors
4616   // associated with that location, even if the compare subsequently fails.
4617   local_monitor_.Clear();
4618 
4619   T data_low = MemRead<T>(address);
4620   T data_high = MemRead<T>(address2);
4621 
4622   if (is_acquire) {
4623     // Approximate load-acquire by issuing a full barrier after the load.
4624     __sync_synchronize();
4625   }
4626 
4627   bool same =
4628       (data_high == comparevalue_high) && (data_low == comparevalue_low);
4629   if (same) {
4630     if (is_release) {
4631       // Approximate store-release by issuing a full barrier before the store.
4632       __sync_synchronize();
4633     }
4634 
4635     MemWrite<T>(address, newvalue_low);
4636     MemWrite<T>(address2, newvalue_high);
4637   }
4638 
4639   WriteRegister<T>(rs + 1, data_high, NoRegLog);
4640   WriteRegister<T>(rs, data_low, NoRegLog);
4641 
4642   PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
4643   LogRead(rs, format, address);
4644   LogRead(rs + 1, format, address2);
4645 
4646   if (same) {
4647     LogWrite(rt, format, address);
4648     LogWrite(rt + 1, format, address2);
4649   }
4650 }
4651 
4652 bool Simulator::CanReadMemory(uintptr_t address, size_t size) {
4653   // To simulate fault-tolerant loads, we need to know what host addresses we
4654   // can access without generating a real fault. One way to do that is to
4655   // attempt to `write()` the memory to a placeholder pipe[1]. This is more
4656   // portable and less intrusive than using (global) signal handlers.
4657   //
4658   // [1]: https://stackoverflow.com/questions/7134590
4659 
4660   size_t written = 0;
4661   bool can_read = true;
4662   // `write` will normally return after one invocation, but it is allowed to
4663   // handle only part of the operation, so wrap it in a loop.
4664   while (can_read && (written < size)) {
4665     ssize_t result = write(placeholder_pipe_fd_[1],
4666                            reinterpret_cast<void*>(address + written),
4667                            size - written);
4668     if (result > 0) {
4669       written += result;
4670     } else {
4671       switch (result) {
4672         case -EPERM:
4673         case -EFAULT:
4674           // The address range is not accessible.
4675           // `write` is supposed to return -EFAULT in this case, but in practice
4676           // it seems to return -EPERM, so we accept that too.
4677           can_read = false;
4678           break;
4679         case -EINTR:
4680           // The call was interrupted by a signal. Just try again.
4681           break;
4682         default:
4683           // Any other error is fatal.
4684           VIXL_ABORT();
4685       }
4686     }
4687   }
4688   // Drain the read side of the pipe. If we don't do this, we'll leak memory as
4689   // the placeholder data is buffered. As before, we expect to drain the whole
4690   // write in one invocation, but cannot guarantee that, so we wrap it in a
4691   // loop. This function is primarily intended to implement SVE fault-tolerant
4692   // loads, so the maximum Z register size is a good default buffer size.
4693   char buffer[kZRegMaxSizeInBytes];
4694   while (written > 0) {
4695     ssize_t result = read(placeholder_pipe_fd_[0],
4696                           reinterpret_cast<void*>(buffer),
4697                           sizeof(buffer));
4698     // `read` blocks, and returns 0 only at EOF. We should not hit EOF until
4699     // we've read everything that was written, so treat 0 as an error.
4700     if (result > 0) {
4701       VIXL_ASSERT(static_cast<size_t>(result) <= written);
4702       written -= result;
4703     } else {
4704       // For -EINTR, just try again. We can't handle any other error.
4705       VIXL_CHECK(result == -EINTR);
4706     }
4707   }
4708 
4709   return can_read;
4710 }
4711 
4712 void Simulator::PrintExclusiveAccessWarning() {
4713   if (print_exclusive_access_warning_) {
4714     fprintf(stderr,
4715             "%sWARNING:%s VIXL simulator support for "
4716             "load-/store-/clear-exclusive "
4717             "instructions is limited. Refer to the README for details.%s\n",
4718             clr_warning,
4719             clr_warning_message,
4720             clr_normal);
4721     print_exclusive_access_warning_ = false;
4722   }
4723 }
4724 
4725 void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
4726   LoadStoreExclusive op =
4727       static_cast<LoadStoreExclusive>(instr->Mask(LoadStoreExclusiveMask));
4728 
4729   switch (op) {
4730     case CAS_w:
4731     case CASA_w:
4732     case CASL_w:
4733     case CASAL_w:
4734       CompareAndSwapHelper<uint32_t>(instr);
4735       break;
4736     case CAS_x:
4737     case CASA_x:
4738     case CASL_x:
4739     case CASAL_x:
4740       CompareAndSwapHelper<uint64_t>(instr);
4741       break;
4742     case CASB:
4743     case CASAB:
4744     case CASLB:
4745     case CASALB:
4746       CompareAndSwapHelper<uint8_t>(instr);
4747       break;
4748     case CASH:
4749     case CASAH:
4750     case CASLH:
4751     case CASALH:
4752       CompareAndSwapHelper<uint16_t>(instr);
4753       break;
4754     case CASP_w:
4755     case CASPA_w:
4756     case CASPL_w:
4757     case CASPAL_w:
4758       CompareAndSwapPairHelper<uint32_t>(instr);
4759       break;
4760     case CASP_x:
4761     case CASPA_x:
4762     case CASPL_x:
4763     case CASPAL_x:
4764       CompareAndSwapPairHelper<uint64_t>(instr);
4765       break;
4766     default:
4767       PrintExclusiveAccessWarning();
4768 
4769       unsigned rs = instr->GetRs();
4770       unsigned rt = instr->GetRt();
4771       unsigned rt2 = instr->GetRt2();
4772       unsigned rn = instr->GetRn();
4773 
4774       bool is_exclusive = !instr->GetLdStXNotExclusive();
4775       bool is_acquire_release =
4776           !is_exclusive || instr->GetLdStXAcquireRelease();
4777       bool is_load = instr->GetLdStXLoad();
4778       bool is_pair = instr->GetLdStXPair();
4779 
4780       unsigned element_size = 1 << instr->GetLdStXSizeLog2();
4781       unsigned access_size = is_pair ? element_size * 2 : element_size;
4782       uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4783 
4784       CheckIsValidUnalignedAtomicAccess(rn, address, access_size);
4785 
4786       if (is_load) {
4787         if (is_exclusive) {
4788           local_monitor_.MarkExclusive(address, access_size);
4789         } else {
4790           // Any non-exclusive load can clear the local monitor as a side
4791           // effect. We don't need to do this, but it is useful to stress the
4792           // simulated code.
4793           local_monitor_.Clear();
4794         }
4795 
4796         // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS).
4797         // We will print a more detailed log.
4798         unsigned reg_size = 0;
4799         switch (op) {
4800           case LDXRB_w:
4801           case LDAXRB_w:
4802           case LDARB_w:
4803           case LDLARB:
4804             WriteWRegister(rt, MemRead<uint8_t>(address), NoRegLog);
4805             reg_size = kWRegSizeInBytes;
4806             break;
4807           case LDXRH_w:
4808           case LDAXRH_w:
4809           case LDARH_w:
4810           case LDLARH:
4811             WriteWRegister(rt, MemRead<uint16_t>(address), NoRegLog);
4812             reg_size = kWRegSizeInBytes;
4813             break;
4814           case LDXR_w:
4815           case LDAXR_w:
4816           case LDAR_w:
4817           case LDLAR_w:
4818             WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
4819             reg_size = kWRegSizeInBytes;
4820             break;
4821           case LDXR_x:
4822           case LDAXR_x:
4823           case LDAR_x:
4824           case LDLAR_x:
4825             WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
4826             reg_size = kXRegSizeInBytes;
4827             break;
4828           case LDXP_w:
4829           case LDAXP_w:
4830             WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
4831             WriteWRegister(rt2,
4832                            MemRead<uint32_t>(address + element_size),
4833                            NoRegLog);
4834             reg_size = kWRegSizeInBytes;
4835             break;
4836           case LDXP_x:
4837           case LDAXP_x:
4838             WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
4839             WriteXRegister(rt2,
4840                            MemRead<uint64_t>(address + element_size),
4841                            NoRegLog);
4842             reg_size = kXRegSizeInBytes;
4843             break;
4844           default:
4845             VIXL_UNREACHABLE();
4846         }
4847 
4848         if (is_acquire_release) {
4849           // Approximate load-acquire by issuing a full barrier after the load.
4850           __sync_synchronize();
4851         }
4852 
4853         PrintRegisterFormat format = GetPrintRegisterFormatForSize(reg_size);
4854         LogExtendingRead(rt, format, element_size, address);
4855         if (is_pair) {
4856           LogExtendingRead(rt2, format, element_size, address + element_size);
4857         }
4858       } else {
4859         if (is_acquire_release) {
4860           // Approximate store-release by issuing a full barrier before the
4861           // store.
4862           __sync_synchronize();
4863         }
4864 
4865         bool do_store = true;
4866         if (is_exclusive) {
4867           do_store = local_monitor_.IsExclusive(address, access_size) &&
4868                      global_monitor_.IsExclusive(address, access_size);
4869           WriteWRegister(rs, do_store ? 0 : 1);
4870 
4871           //  - All exclusive stores explicitly clear the local monitor.
4872           local_monitor_.Clear();
4873         } else {
4874           //  - Any other store can clear the local monitor as a side effect.
4875           local_monitor_.MaybeClear();
4876         }
4877 
4878         if (do_store) {
4879           switch (op) {
4880             case STXRB_w:
4881             case STLXRB_w:
4882             case STLRB_w:
4883             case STLLRB:
4884               MemWrite<uint8_t>(address, ReadWRegister(rt));
4885               break;
4886             case STXRH_w:
4887             case STLXRH_w:
4888             case STLRH_w:
4889             case STLLRH:
4890               MemWrite<uint16_t>(address, ReadWRegister(rt));
4891               break;
4892             case STXR_w:
4893             case STLXR_w:
4894             case STLR_w:
4895             case STLLR_w:
4896               MemWrite<uint32_t>(address, ReadWRegister(rt));
4897               break;
4898             case STXR_x:
4899             case STLXR_x:
4900             case STLR_x:
4901             case STLLR_x:
4902               MemWrite<uint64_t>(address, ReadXRegister(rt));
4903               break;
4904             case STXP_w:
4905             case STLXP_w:
4906               MemWrite<uint32_t>(address, ReadWRegister(rt));
4907               MemWrite<uint32_t>(address + element_size, ReadWRegister(rt2));
4908               break;
4909             case STXP_x:
4910             case STLXP_x:
4911               MemWrite<uint64_t>(address, ReadXRegister(rt));
4912               MemWrite<uint64_t>(address + element_size, ReadXRegister(rt2));
4913               break;
4914             default:
4915               VIXL_UNREACHABLE();
4916           }
4917 
4918           PrintRegisterFormat format =
4919               GetPrintRegisterFormatForSize(element_size);
4920           LogWrite(rt, format, address);
4921           if (is_pair) {
4922             LogWrite(rt2, format, address + element_size);
4923           }
4924         }
4925       }
4926   }
4927 }
4928 
4929 template <typename T>
4930 void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) {
4931   unsigned rs = instr->GetRs();
4932   unsigned rt = instr->GetRt();
4933   unsigned rn = instr->GetRn();
4934 
4935   bool is_acquire = (instr->ExtractBit(23) == 1) && (rt != kZeroRegCode);
4936   bool is_release = instr->ExtractBit(22) == 1;
4937 
4938   unsigned element_size = sizeof(T);
4939   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4940 
4941   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
4942 
4943   T value = ReadRegister<T>(rs);
4944 
4945   T data = MemRead<T>(address);
4946 
4947   if (is_acquire) {
4948     // Approximate load-acquire by issuing a full barrier after the load.
4949     __sync_synchronize();
4950   }
4951 
4952   T result = 0;
4953   switch (instr->Mask(AtomicMemorySimpleOpMask)) {
4954     case LDADDOp:
4955       result = data + value;
4956       break;
4957     case LDCLROp:
4958       VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
4959       result = data & ~value;
4960       break;
4961     case LDEOROp:
4962       VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
4963       result = data ^ value;
4964       break;
4965     case LDSETOp:
4966       VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
4967       result = data | value;
4968       break;
4969 
4970     // Signed/Unsigned difference is done via the templated type T.
4971     case LDSMAXOp:
4972     case LDUMAXOp:
4973       result = (data > value) ? data : value;
4974       break;
4975     case LDSMINOp:
4976     case LDUMINOp:
4977       result = (data > value) ? value : data;
4978       break;
4979   }
4980 
4981   if (is_release) {
4982     // Approximate store-release by issuing a full barrier before the store.
4983     __sync_synchronize();
4984   }
4985 
4986   WriteRegister<T>(rt, data, NoRegLog);
4987 
4988   unsigned register_size = element_size;
4989   if (element_size < kXRegSizeInBytes) {
4990     register_size = kWRegSizeInBytes;
4991   }
4992   PrintRegisterFormat format = GetPrintRegisterFormatForSize(register_size);
4993   LogExtendingRead(rt, format, element_size, address);
4994 
4995   MemWrite<T>(address, result);
4996   format = GetPrintRegisterFormatForSize(element_size);
4997   LogWrite(rs, format, address);
4998 }
4999 
5000 template <typename T>
5001 void Simulator::AtomicMemorySwapHelper(const Instruction* instr) {
5002   unsigned rs = instr->GetRs();
5003   unsigned rt = instr->GetRt();
5004   unsigned rn = instr->GetRn();
5005 
5006   bool is_acquire = (instr->ExtractBit(23) == 1) && (rt != kZeroRegCode);
5007   bool is_release = instr->ExtractBit(22) == 1;
5008 
5009   unsigned element_size = sizeof(T);
5010   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
5011 
5012   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
5013 
5014   T data = MemRead<T>(address);
5015   if (is_acquire) {
5016     // Approximate load-acquire by issuing a full barrier after the load.
5017     __sync_synchronize();
5018   }
5019 
5020   if (is_release) {
5021     // Approximate store-release by issuing a full barrier before the store.
5022     __sync_synchronize();
5023   }
5024   MemWrite<T>(address, ReadRegister<T>(rs));
5025 
5026   WriteRegister<T>(rt, data);
5027 
5028   PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
5029   LogRead(rt, format, address);
5030   LogWrite(rs, format, address);
5031 }
5032 
5033 template <typename T>
5034 void Simulator::LoadAcquireRCpcHelper(const Instruction* instr) {
5035   unsigned rt = instr->GetRt();
5036   unsigned rn = instr->GetRn();
5037 
5038   unsigned element_size = sizeof(T);
5039   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
5040 
5041   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
5042 
5043   WriteRegister<T>(rt, MemRead<T>(address));
5044 
5045   // Approximate load-acquire by issuing a full barrier after the load.
5046   __sync_synchronize();
5047 
5048   LogRead(rt, GetPrintRegisterFormatForSize(element_size), address);
5049 }
5050 
5051 #define ATOMIC_MEMORY_SIMPLE_UINT_LIST(V) \
5052   V(LDADD)                                \
5053   V(LDCLR)                                \
5054   V(LDEOR)                                \
5055   V(LDSET)                                \
5056   V(LDUMAX)                               \
5057   V(LDUMIN)
5058 
5059 #define ATOMIC_MEMORY_SIMPLE_INT_LIST(V) \
5060   V(LDSMAX)                              \
5061   V(LDSMIN)
5062 
5063 void Simulator::VisitAtomicMemory(const Instruction* instr) {
5064   switch (instr->Mask(AtomicMemoryMask)) {
5065 // clang-format off
5066 #define SIM_FUNC_B(A) \
5067     case A##B:        \
5068     case A##AB:       \
5069     case A##LB:       \
5070     case A##ALB:
5071 #define SIM_FUNC_H(A) \
5072     case A##H:        \
5073     case A##AH:       \
5074     case A##LH:       \
5075     case A##ALH:
5076 #define SIM_FUNC_w(A) \
5077     case A##_w:       \
5078     case A##A_w:      \
5079     case A##L_w:      \
5080     case A##AL_w:
5081 #define SIM_FUNC_x(A) \
5082     case A##_x:       \
5083     case A##A_x:      \
5084     case A##L_x:      \
5085     case A##AL_x:
5086 
5087     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_B)
5088       AtomicMemorySimpleHelper<uint8_t>(instr);
5089       break;
5090     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_B)
5091       AtomicMemorySimpleHelper<int8_t>(instr);
5092       break;
5093     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_H)
5094       AtomicMemorySimpleHelper<uint16_t>(instr);
5095       break;
5096     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_H)
5097       AtomicMemorySimpleHelper<int16_t>(instr);
5098       break;
5099     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_w)
5100       AtomicMemorySimpleHelper<uint32_t>(instr);
5101       break;
5102     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_w)
5103       AtomicMemorySimpleHelper<int32_t>(instr);
5104       break;
5105     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_x)
5106       AtomicMemorySimpleHelper<uint64_t>(instr);
5107       break;
5108     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_x)
5109       AtomicMemorySimpleHelper<int64_t>(instr);
5110       break;
5111       // clang-format on
5112 
5113     case SWPB:
5114     case SWPAB:
5115     case SWPLB:
5116     case SWPALB:
5117       AtomicMemorySwapHelper<uint8_t>(instr);
5118       break;
5119     case SWPH:
5120     case SWPAH:
5121     case SWPLH:
5122     case SWPALH:
5123       AtomicMemorySwapHelper<uint16_t>(instr);
5124       break;
5125     case SWP_w:
5126     case SWPA_w:
5127     case SWPL_w:
5128     case SWPAL_w:
5129       AtomicMemorySwapHelper<uint32_t>(instr);
5130       break;
5131     case SWP_x:
5132     case SWPA_x:
5133     case SWPL_x:
5134     case SWPAL_x:
5135       AtomicMemorySwapHelper<uint64_t>(instr);
5136       break;
5137     case LDAPRB:
5138       LoadAcquireRCpcHelper<uint8_t>(instr);
5139       break;
5140     case LDAPRH:
5141       LoadAcquireRCpcHelper<uint16_t>(instr);
5142       break;
5143     case LDAPR_w:
5144       LoadAcquireRCpcHelper<uint32_t>(instr);
5145       break;
5146     case LDAPR_x:
5147       LoadAcquireRCpcHelper<uint64_t>(instr);
5148       break;
5149   }
5150 }
5151 
5152 
5153 void Simulator::VisitLoadLiteral(const Instruction* instr) {
5154   unsigned rt = instr->GetRt();
5155   uint64_t address = instr->GetLiteralAddress<uint64_t>();
5156 
5157   // Verify that the calculated address is available to the host.
5158   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
5159 
5160   switch (instr->Mask(LoadLiteralMask)) {
5161     // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_VREGS), then
5162     // print a more detailed log.
5163     case LDR_w_lit:
5164       WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
5165       LogRead(rt, kPrintWReg, address);
5166       break;
5167     case LDR_x_lit:
5168       WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
5169       LogRead(rt, kPrintXReg, address);
5170       break;
5171     case LDR_s_lit:
5172       WriteSRegister(rt, MemRead<float>(address), NoRegLog);
5173       LogVRead(rt, kPrintSRegFP, address);
5174       break;
5175     case LDR_d_lit:
5176       WriteDRegister(rt, MemRead<double>(address), NoRegLog);
5177       LogVRead(rt, kPrintDRegFP, address);
5178       break;
5179     case LDR_q_lit:
5180       WriteQRegister(rt, MemRead<qreg_t>(address), NoRegLog);
5181       LogVRead(rt, kPrintReg1Q, address);
5182       break;
5183     case LDRSW_x_lit:
5184       WriteXRegister(rt, MemRead<int32_t>(address), NoRegLog);
5185       LogExtendingRead(rt, kPrintXReg, kWRegSizeInBytes, address);
5186       break;
5187 
5188     // Ignore prfm hint instructions.
5189     case PRFM_lit:
5190       break;
5191 
5192     default:
5193       VIXL_UNREACHABLE();
5194   }
5195 
5196   local_monitor_.MaybeClear();
5197 }
5198 
5199 
5200 uintptr_t Simulator::AddressModeHelper(unsigned addr_reg,
5201                                        int64_t offset,
5202                                        AddrMode addrmode) {
5203   uint64_t address = ReadXRegister(addr_reg, Reg31IsStackPointer);
5204 
5205   if ((addr_reg == 31) && ((address % 16) != 0)) {
5206     // When the base register is SP the stack pointer is required to be
5207     // quadword aligned prior to the address calculation and write-backs.
5208     // Misalignment will cause a stack alignment fault.
5209     VIXL_ALIGNMENT_EXCEPTION();
5210   }
5211 
5212   if ((addrmode == PreIndex) || (addrmode == PostIndex)) {
5213     VIXL_ASSERT(offset != 0);
5214     // Only preindex should log the register update here. For Postindex, the
5215     // update will be printed automatically by LogWrittenRegisters _after_ the
5216     // memory access itself is logged.
5217     RegLogMode log_mode = (addrmode == PreIndex) ? LogRegWrites : NoRegLog;
5218     WriteXRegister(addr_reg, address + offset, log_mode, Reg31IsStackPointer);
5219   }
5220 
5221   if ((addrmode == Offset) || (addrmode == PreIndex)) {
5222     address += offset;
5223   }
5224 
5225   // Verify that the calculated address is available to the host.
5226   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
5227 
5228   return static_cast<uintptr_t>(address);
5229 }
5230 
5231 
5232 void Simulator::VisitMoveWideImmediate(const Instruction* instr) {
5233   MoveWideImmediateOp mov_op =
5234       static_cast<MoveWideImmediateOp>(instr->Mask(MoveWideImmediateMask));
5235   int64_t new_xn_val = 0;
5236 
5237   bool is_64_bits = instr->GetSixtyFourBits() == 1;
5238   // Shift is limited for W operations.
5239   VIXL_ASSERT(is_64_bits || (instr->GetShiftMoveWide() < 2));
5240 
5241   // Get the shifted immediate.
5242   int64_t shift = instr->GetShiftMoveWide() * 16;
5243   int64_t shifted_imm16 = static_cast<int64_t>(instr->GetImmMoveWide())
5244                           << shift;
5245 
5246   // Compute the new value.
5247   switch (mov_op) {
5248     case MOVN_w:
5249     case MOVN_x: {
5250       new_xn_val = ~shifted_imm16;
5251       if (!is_64_bits) new_xn_val &= kWRegMask;
5252       break;
5253     }
5254     case MOVK_w:
5255     case MOVK_x: {
5256       unsigned reg_code = instr->GetRd();
5257       int64_t prev_xn_val =
5258           is_64_bits ? ReadXRegister(reg_code) : ReadWRegister(reg_code);
5259       new_xn_val = (prev_xn_val & ~(INT64_C(0xffff) << shift)) | shifted_imm16;
5260       break;
5261     }
5262     case MOVZ_w:
5263     case MOVZ_x: {
5264       new_xn_val = shifted_imm16;
5265       break;
5266     }
5267     default:
5268       VIXL_UNREACHABLE();
5269   }
5270 
5271   // Update the destination register.
5272   WriteXRegister(instr->GetRd(), new_xn_val);
5273 }
5274 
5275 
5276 void Simulator::VisitConditionalSelect(const Instruction* instr) {
5277   uint64_t new_val = ReadXRegister(instr->GetRn());
5278 
5279   if (ConditionFailed(static_cast<Condition>(instr->GetCondition()))) {
5280     new_val = ReadXRegister(instr->GetRm());
5281     switch (instr->Mask(ConditionalSelectMask)) {
5282       case CSEL_w:
5283       case CSEL_x:
5284         break;
5285       case CSINC_w:
5286       case CSINC_x:
5287         new_val++;
5288         break;
5289       case CSINV_w:
5290       case CSINV_x:
5291         new_val = ~new_val;
5292         break;
5293       case CSNEG_w:
5294       case CSNEG_x:
5295         new_val = -new_val;
5296         break;
5297       default:
5298         VIXL_UNIMPLEMENTED();
5299     }
5300   }
5301   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
5302   WriteRegister(reg_size, instr->GetRd(), new_val);
5303 }
5304 
5305 
5306 #define PAUTH_MODES_REGISTER_CONTEXT(V)   \
5307   V(i, a, kPACKeyIA, kInstructionPointer) \
5308   V(i, b, kPACKeyIB, kInstructionPointer) \
5309   V(d, a, kPACKeyDA, kDataPointer)        \
5310   V(d, b, kPACKeyDB, kDataPointer)
5311 
5312 void Simulator::VisitDataProcessing1Source(const Instruction* instr) {
5313   unsigned dst = instr->GetRd();
5314   unsigned src = instr->GetRn();
5315   Reg31Mode r31_pac = Reg31IsStackPointer;
5316 
5317   switch (form_hash_) {
5318 #define DEFINE_PAUTH_FUNCS(SUF0, SUF1, KEY, D)      \
5319   case "pac" #SUF0 "z" #SUF1 "_64z_dp_1src"_h:      \
5320     VIXL_ASSERT(src == kZeroRegCode);               \
5321     r31_pac = Reg31IsZeroRegister;                  \
5322     VIXL_FALLTHROUGH();                             \
5323   case "pac" #SUF0 #SUF1 "_64p_dp_1src"_h: {        \
5324     uint64_t mod = ReadXRegister(src, r31_pac);     \
5325     uint64_t ptr = ReadXRegister(dst);              \
5326     WriteXRegister(dst, AddPAC(ptr, mod, KEY, D));  \
5327     break;                                          \
5328   }                                                 \
5329   case "aut" #SUF0 "z" #SUF1 "_64z_dp_1src"_h:      \
5330     VIXL_ASSERT(src == kZeroRegCode);               \
5331     r31_pac = Reg31IsZeroRegister;                  \
5332     VIXL_FALLTHROUGH();                             \
5333   case "aut" #SUF0 #SUF1 "_64p_dp_1src"_h: {        \
5334     uint64_t mod = ReadXRegister(src, r31_pac);     \
5335     uint64_t ptr = ReadXRegister(dst);              \
5336     WriteXRegister(dst, AuthPAC(ptr, mod, KEY, D)); \
5337     break;                                          \
5338   }
5339     PAUTH_MODES_REGISTER_CONTEXT(DEFINE_PAUTH_FUNCS)
5340 #undef DEFINE_PAUTH_FUNCS
5341 
5342     case "xpaci_64z_dp_1src"_h:
5343       WriteXRegister(dst, StripPAC(ReadXRegister(dst), kInstructionPointer));
5344       break;
5345     case "xpacd_64z_dp_1src"_h:
5346       WriteXRegister(dst, StripPAC(ReadXRegister(dst), kDataPointer));
5347       break;
5348     case "rbit_32_dp_1src"_h:
5349       WriteWRegister(dst, ReverseBits(ReadWRegister(src)));
5350       break;
5351     case "rbit_64_dp_1src"_h:
5352       WriteXRegister(dst, ReverseBits(ReadXRegister(src)));
5353       break;
5354     case "rev16_32_dp_1src"_h:
5355       WriteWRegister(dst, ReverseBytes(ReadWRegister(src), 1));
5356       break;
5357     case "rev16_64_dp_1src"_h:
5358       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 1));
5359       break;
5360     case "rev_32_dp_1src"_h:
5361       WriteWRegister(dst, ReverseBytes(ReadWRegister(src), 2));
5362       break;
5363     case "rev32_64_dp_1src"_h:
5364       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 2));
5365       break;
5366     case "rev_64_dp_1src"_h:
5367       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 3));
5368       break;
5369     case "clz_32_dp_1src"_h:
5370       WriteWRegister(dst, CountLeadingZeros(ReadWRegister(src)));
5371       break;
5372     case "clz_64_dp_1src"_h:
5373       WriteXRegister(dst, CountLeadingZeros(ReadXRegister(src)));
5374       break;
5375     case "cls_32_dp_1src"_h:
5376       WriteWRegister(dst, CountLeadingSignBits(ReadWRegister(src)));
5377       break;
5378     case "cls_64_dp_1src"_h:
5379       WriteXRegister(dst, CountLeadingSignBits(ReadXRegister(src)));
5380       break;
5381     case "abs_32_dp_1src"_h:
5382       WriteWRegister(dst, Abs(ReadWRegister(src)));
5383       break;
5384     case "abs_64_dp_1src"_h:
5385       WriteXRegister(dst, Abs(ReadXRegister(src)));
5386       break;
5387     case "cnt_32_dp_1src"_h:
5388       WriteWRegister(dst, CountSetBits(ReadWRegister(src)));
5389       break;
5390     case "cnt_64_dp_1src"_h:
5391       WriteXRegister(dst, CountSetBits(ReadXRegister(src)));
5392       break;
5393     case "ctz_32_dp_1src"_h:
5394       WriteWRegister(dst, CountTrailingZeros(ReadWRegister(src)));
5395       break;
5396     case "ctz_64_dp_1src"_h:
5397       WriteXRegister(dst, CountTrailingZeros(ReadXRegister(src)));
5398       break;
5399   }
5400 }
5401 
5402 uint32_t Simulator::Poly32Mod2(unsigned n, uint64_t data, uint32_t poly) {
5403   VIXL_ASSERT((n > 32) && (n <= 64));
5404   for (unsigned i = (n - 1); i >= 32; i--) {
5405     if (((data >> i) & 1) != 0) {
5406       uint64_t polysh32 = (uint64_t)poly << (i - 32);
5407       uint64_t mask = (UINT64_C(1) << i) - 1;
5408       data = ((data & mask) ^ polysh32);
5409     }
5410   }
5411   return data & 0xffffffff;
5412 }
5413 
5414 
5415 template <typename T>
5416 uint32_t Simulator::Crc32Checksum(uint32_t acc, T val, uint32_t poly) {
5417   unsigned size = sizeof(val) * 8;  // Number of bits in type T.
5418   VIXL_ASSERT((size == 8) || (size == 16) || (size == 32));
5419   uint64_t tempacc = static_cast<uint64_t>(ReverseBits(acc)) << size;
5420   uint64_t tempval = static_cast<uint64_t>(ReverseBits(val)) << 32;
5421   return ReverseBits(Poly32Mod2(32 + size, tempacc ^ tempval, poly));
5422 }
5423 
5424 
5425 uint32_t Simulator::Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly) {
5426   // Poly32Mod2 cannot handle inputs with more than 32 bits, so compute
5427   // the CRC of each 32-bit word sequentially.
5428   acc = Crc32Checksum(acc, (uint32_t)(val & 0xffffffff), poly);
5429   return Crc32Checksum(acc, (uint32_t)(val >> 32), poly);
5430 }
5431 
5432 
5433 void Simulator::VisitDataProcessing2Source(const Instruction* instr) {
5434   Shift shift_op = NO_SHIFT;
5435   int64_t result = 0;
5436   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
5437 
5438   switch (instr->Mask(DataProcessing2SourceMask)) {
5439     case SDIV_w: {
5440       int32_t rn = ReadWRegister(instr->GetRn());
5441       int32_t rm = ReadWRegister(instr->GetRm());
5442       if ((rn == kWMinInt) && (rm == -1)) {
5443         result = kWMinInt;
5444       } else if (rm == 0) {
5445         // Division by zero can be trapped, but not on A-class processors.
5446         result = 0;
5447       } else {
5448         result = rn / rm;
5449       }
5450       break;
5451     }
5452     case SDIV_x: {
5453       int64_t rn = ReadXRegister(instr->GetRn());
5454       int64_t rm = ReadXRegister(instr->GetRm());
5455       if ((rn == kXMinInt) && (rm == -1)) {
5456         result = kXMinInt;
5457       } else if (rm == 0) {
5458         // Division by zero can be trapped, but not on A-class processors.
5459         result = 0;
5460       } else {
5461         result = rn / rm;
5462       }
5463       break;
5464     }
5465     case UDIV_w: {
5466       uint32_t rn = static_cast<uint32_t>(ReadWRegister(instr->GetRn()));
5467       uint32_t rm = static_cast<uint32_t>(ReadWRegister(instr->GetRm()));
5468       if (rm == 0) {
5469         // Division by zero can be trapped, but not on A-class processors.
5470         result = 0;
5471       } else {
5472         result = rn / rm;
5473       }
5474       break;
5475     }
5476     case UDIV_x: {
5477       uint64_t rn = static_cast<uint64_t>(ReadXRegister(instr->GetRn()));
5478       uint64_t rm = static_cast<uint64_t>(ReadXRegister(instr->GetRm()));
5479       if (rm == 0) {
5480         // Division by zero can be trapped, but not on A-class processors.
5481         result = 0;
5482       } else {
5483         result = rn / rm;
5484       }
5485       break;
5486     }
5487     case LSLV_w:
5488     case LSLV_x:
5489       shift_op = LSL;
5490       break;
5491     case LSRV_w:
5492     case LSRV_x:
5493       shift_op = LSR;
5494       break;
5495     case ASRV_w:
5496     case ASRV_x:
5497       shift_op = ASR;
5498       break;
5499     case RORV_w:
5500     case RORV_x:
5501       shift_op = ROR;
5502       break;
5503     case PACGA: {
5504       uint64_t dst = static_cast<uint64_t>(ReadXRegister(instr->GetRn()));
5505       uint64_t src = static_cast<uint64_t>(
5506           ReadXRegister(instr->GetRm(), Reg31IsStackPointer));
5507       uint64_t code = ComputePAC(dst, src, kPACKeyGA);
5508       result = code & 0xffffffff00000000;
5509       break;
5510     }
5511     case CRC32B: {
5512       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5513       uint8_t val = ReadRegister<uint8_t>(instr->GetRm());
5514       result = Crc32Checksum(acc, val, CRC32_POLY);
5515       break;
5516     }
5517     case CRC32H: {
5518       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5519       uint16_t val = ReadRegister<uint16_t>(instr->GetRm());
5520       result = Crc32Checksum(acc, val, CRC32_POLY);
5521       break;
5522     }
5523     case CRC32W: {
5524       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5525       uint32_t val = ReadRegister<uint32_t>(instr->GetRm());
5526       result = Crc32Checksum(acc, val, CRC32_POLY);
5527       break;
5528     }
5529     case CRC32X: {
5530       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5531       uint64_t val = ReadRegister<uint64_t>(instr->GetRm());
5532       result = Crc32Checksum(acc, val, CRC32_POLY);
5533       reg_size = kWRegSize;
5534       break;
5535     }
5536     case CRC32CB: {
5537       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5538       uint8_t val = ReadRegister<uint8_t>(instr->GetRm());
5539       result = Crc32Checksum(acc, val, CRC32C_POLY);
5540       break;
5541     }
5542     case CRC32CH: {
5543       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5544       uint16_t val = ReadRegister<uint16_t>(instr->GetRm());
5545       result = Crc32Checksum(acc, val, CRC32C_POLY);
5546       break;
5547     }
5548     case CRC32CW: {
5549       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5550       uint32_t val = ReadRegister<uint32_t>(instr->GetRm());
5551       result = Crc32Checksum(acc, val, CRC32C_POLY);
5552       break;
5553     }
5554     case CRC32CX: {
5555       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5556       uint64_t val = ReadRegister<uint64_t>(instr->GetRm());
5557       result = Crc32Checksum(acc, val, CRC32C_POLY);
5558       reg_size = kWRegSize;
5559       break;
5560     }
5561     default:
5562       VIXL_UNIMPLEMENTED();
5563   }
5564 
5565   if (shift_op != NO_SHIFT) {
5566     // Shift distance encoded in the least-significant five/six bits of the
5567     // register.
5568     int mask = (instr->GetSixtyFourBits() == 1) ? 0x3f : 0x1f;
5569     unsigned shift = ReadWRegister(instr->GetRm()) & mask;
5570     result = ShiftOperand(reg_size,
5571                           ReadRegister(reg_size, instr->GetRn()),
5572                           shift_op,
5573                           shift);
5574   }
5575   WriteRegister(reg_size, instr->GetRd(), result);
5576 }
5577 
5578 void Simulator::SimulateSignedMinMax(const Instruction* instr) {
5579   int32_t wn = ReadWRegister(instr->GetRn());
5580   int32_t wm = ReadWRegister(instr->GetRm());
5581   int64_t xn = ReadXRegister(instr->GetRn());
5582   int64_t xm = ReadXRegister(instr->GetRm());
5583   int32_t imm = instr->ExtractSignedBits(17, 10);
5584   int dst = instr->GetRd();
5585 
5586   switch (form_hash_) {
5587     case "smax_64_minmax_imm"_h:
5588     case "smin_64_minmax_imm"_h:
5589       xm = imm;
5590       break;
5591     case "smax_32_minmax_imm"_h:
5592     case "smin_32_minmax_imm"_h:
5593       wm = imm;
5594       break;
5595   }
5596 
5597   switch (form_hash_) {
5598     case "smax_32_minmax_imm"_h:
5599     case "smax_32_dp_2src"_h:
5600       WriteWRegister(dst, std::max(wn, wm));
5601       break;
5602     case "smax_64_minmax_imm"_h:
5603     case "smax_64_dp_2src"_h:
5604       WriteXRegister(dst, std::max(xn, xm));
5605       break;
5606     case "smin_32_minmax_imm"_h:
5607     case "smin_32_dp_2src"_h:
5608       WriteWRegister(dst, std::min(wn, wm));
5609       break;
5610     case "smin_64_minmax_imm"_h:
5611     case "smin_64_dp_2src"_h:
5612       WriteXRegister(dst, std::min(xn, xm));
5613       break;
5614   }
5615 }
5616 
5617 void Simulator::SimulateUnsignedMinMax(const Instruction* instr) {
5618   uint64_t xn = ReadXRegister(instr->GetRn());
5619   uint64_t xm = ReadXRegister(instr->GetRm());
5620   uint32_t imm = instr->ExtractBits(17, 10);
5621   int dst = instr->GetRd();
5622 
5623   switch (form_hash_) {
5624     case "umax_64u_minmax_imm"_h:
5625     case "umax_32u_minmax_imm"_h:
5626     case "umin_64u_minmax_imm"_h:
5627     case "umin_32u_minmax_imm"_h:
5628       xm = imm;
5629       break;
5630   }
5631 
5632   switch (form_hash_) {
5633     case "umax_32u_minmax_imm"_h:
5634     case "umax_32_dp_2src"_h:
5635       xn &= 0xffff'ffff;
5636       xm &= 0xffff'ffff;
5637       VIXL_FALLTHROUGH();
5638     case "umax_64u_minmax_imm"_h:
5639     case "umax_64_dp_2src"_h:
5640       WriteXRegister(dst, std::max(xn, xm));
5641       break;
5642     case "umin_32u_minmax_imm"_h:
5643     case "umin_32_dp_2src"_h:
5644       xn &= 0xffff'ffff;
5645       xm &= 0xffff'ffff;
5646       VIXL_FALLTHROUGH();
5647     case "umin_64u_minmax_imm"_h:
5648     case "umin_64_dp_2src"_h:
5649       WriteXRegister(dst, std::min(xn, xm));
5650       break;
5651   }
5652 }
5653 
5654 void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
5655   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
5656 
5657   uint64_t result = 0;
5658   // Extract and sign- or zero-extend 32-bit arguments for widening operations.
5659   uint64_t rn_u32 = ReadRegister<uint32_t>(instr->GetRn());
5660   uint64_t rm_u32 = ReadRegister<uint32_t>(instr->GetRm());
5661   int64_t rn_s32 = ReadRegister<int32_t>(instr->GetRn());
5662   int64_t rm_s32 = ReadRegister<int32_t>(instr->GetRm());
5663   uint64_t rn_u64 = ReadXRegister(instr->GetRn());
5664   uint64_t rm_u64 = ReadXRegister(instr->GetRm());
5665   switch (instr->Mask(DataProcessing3SourceMask)) {
5666     case MADD_w:
5667     case MADD_x:
5668       result = ReadXRegister(instr->GetRa()) + (rn_u64 * rm_u64);
5669       break;
5670     case MSUB_w:
5671     case MSUB_x:
5672       result = ReadXRegister(instr->GetRa()) - (rn_u64 * rm_u64);
5673       break;
5674     case SMADDL_x:
5675       result = ReadXRegister(instr->GetRa()) +
5676                static_cast<uint64_t>(rn_s32 * rm_s32);
5677       break;
5678     case SMSUBL_x:
5679       result = ReadXRegister(instr->GetRa()) -
5680                static_cast<uint64_t>(rn_s32 * rm_s32);
5681       break;
5682     case UMADDL_x:
5683       result = ReadXRegister(instr->GetRa()) + (rn_u32 * rm_u32);
5684       break;
5685     case UMSUBL_x:
5686       result = ReadXRegister(instr->GetRa()) - (rn_u32 * rm_u32);
5687       break;
5688     case UMULH_x:
5689       result =
5690           internal::MultiplyHigh<64>(ReadRegister<uint64_t>(instr->GetRn()),
5691                                      ReadRegister<uint64_t>(instr->GetRm()));
5692       break;
5693     case SMULH_x:
5694       result = internal::MultiplyHigh<64>(ReadXRegister(instr->GetRn()),
5695                                           ReadXRegister(instr->GetRm()));
5696       break;
5697     default:
5698       VIXL_UNIMPLEMENTED();
5699   }
5700   WriteRegister(reg_size, instr->GetRd(), result);
5701 }
5702 
5703 
5704 void Simulator::VisitBitfield(const Instruction* instr) {
5705   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
5706   int64_t reg_mask = instr->GetSixtyFourBits() ? kXRegMask : kWRegMask;
5707   int R = instr->GetImmR();
5708   int S = instr->GetImmS();
5709 
5710   if (instr->GetSixtyFourBits() != instr->GetBitN()) {
5711     VisitUnallocated(instr);
5712   }
5713 
5714   if ((instr->GetSixtyFourBits() == 0) && ((S > 31) || (R > 31))) {
5715     VisitUnallocated(instr);
5716   }
5717 
5718   int diff = S - R;
5719   uint64_t mask;
5720   if (diff >= 0) {
5721     mask = ~UINT64_C(0) >> (64 - (diff + 1));
5722     mask = (static_cast<unsigned>(diff) < (reg_size - 1)) ? mask : reg_mask;
5723   } else {
5724     mask = ~UINT64_C(0) >> (64 - (S + 1));
5725     mask = RotateRight(mask, R, reg_size);
5726     diff += reg_size;
5727   }
5728 
5729   // inzero indicates if the extracted bitfield is inserted into the
5730   // destination register value or in zero.
5731   // If extend is true, extend the sign of the extracted bitfield.
5732   bool inzero = false;
5733   bool extend = false;
5734   switch (instr->Mask(BitfieldMask)) {
5735     case BFM_x:
5736     case BFM_w:
5737       break;
5738     case SBFM_x:
5739     case SBFM_w:
5740       inzero = true;
5741       extend = true;
5742       break;
5743     case UBFM_x:
5744     case UBFM_w:
5745       inzero = true;
5746       break;
5747     default:
5748       VIXL_UNIMPLEMENTED();
5749   }
5750 
5751   uint64_t dst = inzero ? 0 : ReadRegister(reg_size, instr->GetRd());
5752   uint64_t src = ReadRegister(reg_size, instr->GetRn());
5753   // Rotate source bitfield into place.
5754   uint64_t result = RotateRight(src, R, reg_size);
5755   // Determine the sign extension.
5756   uint64_t topbits = (diff == 63) ? 0 : (~UINT64_C(0) << (diff + 1));
5757   uint64_t signbits = extend && ((src >> S) & 1) ? topbits : 0;
5758 
5759   // Merge sign extension, dest/zero and bitfield.
5760   result = signbits | (result & mask) | (dst & ~mask);
5761 
5762   WriteRegister(reg_size, instr->GetRd(), result);
5763 }
5764 
5765 
5766 void Simulator::VisitExtract(const Instruction* instr) {
5767   unsigned lsb = instr->GetImmS();
5768   unsigned reg_size = (instr->GetSixtyFourBits() == 1) ? kXRegSize : kWRegSize;
5769   uint64_t low_res =
5770       static_cast<uint64_t>(ReadRegister(reg_size, instr->GetRm())) >> lsb;
5771   uint64_t high_res = (lsb == 0)
5772                           ? 0
5773                           : ReadRegister<uint64_t>(reg_size, instr->GetRn())
5774                                 << (reg_size - lsb);
5775   WriteRegister(reg_size, instr->GetRd(), low_res | high_res);
5776 }
5777 
5778 
5779 void Simulator::VisitFPImmediate(const Instruction* instr) {
5780   AssertSupportedFPCR();
5781   unsigned dest = instr->GetRd();
5782   switch (instr->Mask(FPImmediateMask)) {
5783     case FMOV_h_imm:
5784       WriteHRegister(dest, Float16ToRawbits(instr->GetImmFP16()));
5785       break;
5786     case FMOV_s_imm:
5787       WriteSRegister(dest, instr->GetImmFP32());
5788       break;
5789     case FMOV_d_imm:
5790       WriteDRegister(dest, instr->GetImmFP64());
5791       break;
5792     default:
5793       VIXL_UNREACHABLE();
5794   }
5795 }
5796 
5797 
5798 void Simulator::VisitFPIntegerConvert(const Instruction* instr) {
5799   AssertSupportedFPCR();
5800 
5801   unsigned dst = instr->GetRd();
5802   unsigned src = instr->GetRn();
5803 
5804   FPRounding round = ReadRMode();
5805 
5806   switch (instr->Mask(FPIntegerConvertMask)) {
5807     case FCVTAS_wh:
5808       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPTieAway));
5809       break;
5810     case FCVTAS_xh:
5811       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPTieAway));
5812       break;
5813     case FCVTAS_ws:
5814       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPTieAway));
5815       break;
5816     case FCVTAS_xs:
5817       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPTieAway));
5818       break;
5819     case FCVTAS_wd:
5820       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPTieAway));
5821       break;
5822     case FCVTAS_xd:
5823       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPTieAway));
5824       break;
5825     case FCVTAU_wh:
5826       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPTieAway));
5827       break;
5828     case FCVTAU_xh:
5829       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPTieAway));
5830       break;
5831     case FCVTAU_ws:
5832       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPTieAway));
5833       break;
5834     case FCVTAU_xs:
5835       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPTieAway));
5836       break;
5837     case FCVTAU_wd:
5838       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPTieAway));
5839       break;
5840     case FCVTAU_xd:
5841       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPTieAway));
5842       break;
5843     case FCVTMS_wh:
5844       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPNegativeInfinity));
5845       break;
5846     case FCVTMS_xh:
5847       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPNegativeInfinity));
5848       break;
5849     case FCVTMS_ws:
5850       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPNegativeInfinity));
5851       break;
5852     case FCVTMS_xs:
5853       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPNegativeInfinity));
5854       break;
5855     case FCVTMS_wd:
5856       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPNegativeInfinity));
5857       break;
5858     case FCVTMS_xd:
5859       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPNegativeInfinity));
5860       break;
5861     case FCVTMU_wh:
5862       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPNegativeInfinity));
5863       break;
5864     case FCVTMU_xh:
5865       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPNegativeInfinity));
5866       break;
5867     case FCVTMU_ws:
5868       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPNegativeInfinity));
5869       break;
5870     case FCVTMU_xs:
5871       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPNegativeInfinity));
5872       break;
5873     case FCVTMU_wd:
5874       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPNegativeInfinity));
5875       break;
5876     case FCVTMU_xd:
5877       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPNegativeInfinity));
5878       break;
5879     case FCVTPS_wh:
5880       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPPositiveInfinity));
5881       break;
5882     case FCVTPS_xh:
5883       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPPositiveInfinity));
5884       break;
5885     case FCVTPS_ws:
5886       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPPositiveInfinity));
5887       break;
5888     case FCVTPS_xs:
5889       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPPositiveInfinity));
5890       break;
5891     case FCVTPS_wd:
5892       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPPositiveInfinity));
5893       break;
5894     case FCVTPS_xd:
5895       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPPositiveInfinity));
5896       break;
5897     case FCVTPU_wh:
5898       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPPositiveInfinity));
5899       break;
5900     case FCVTPU_xh:
5901       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPPositiveInfinity));
5902       break;
5903     case FCVTPU_ws:
5904       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPPositiveInfinity));
5905       break;
5906     case FCVTPU_xs:
5907       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPPositiveInfinity));
5908       break;
5909     case FCVTPU_wd:
5910       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPPositiveInfinity));
5911       break;
5912     case FCVTPU_xd:
5913       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPPositiveInfinity));
5914       break;
5915     case FCVTNS_wh:
5916       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPTieEven));
5917       break;
5918     case FCVTNS_xh:
5919       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPTieEven));
5920       break;
5921     case FCVTNS_ws:
5922       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPTieEven));
5923       break;
5924     case FCVTNS_xs:
5925       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPTieEven));
5926       break;
5927     case FCVTNS_wd:
5928       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPTieEven));
5929       break;
5930     case FCVTNS_xd:
5931       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPTieEven));
5932       break;
5933     case FCVTNU_wh:
5934       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPTieEven));
5935       break;
5936     case FCVTNU_xh:
5937       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPTieEven));
5938       break;
5939     case FCVTNU_ws:
5940       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPTieEven));
5941       break;
5942     case FCVTNU_xs:
5943       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPTieEven));
5944       break;
5945     case FCVTNU_wd:
5946       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPTieEven));
5947       break;
5948     case FCVTNU_xd:
5949       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPTieEven));
5950       break;
5951     case FCVTZS_wh:
5952       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPZero));
5953       break;
5954     case FCVTZS_xh:
5955       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPZero));
5956       break;
5957     case FCVTZS_ws:
5958       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPZero));
5959       break;
5960     case FCVTZS_xs:
5961       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPZero));
5962       break;
5963     case FCVTZS_wd:
5964       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPZero));
5965       break;
5966     case FCVTZS_xd:
5967       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPZero));
5968       break;
5969     case FCVTZU_wh:
5970       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPZero));
5971       break;
5972     case FCVTZU_xh:
5973       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPZero));
5974       break;
5975     case FCVTZU_ws:
5976       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPZero));
5977       break;
5978     case FCVTZU_xs:
5979       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPZero));
5980       break;
5981     case FCVTZU_wd:
5982       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPZero));
5983       break;
5984     case FCVTZU_xd:
5985       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPZero));
5986       break;
5987     case FJCVTZS:
5988       WriteWRegister(dst, FPToFixedJS(ReadDRegister(src)));
5989       break;
5990     case FMOV_hw:
5991       WriteHRegister(dst, ReadWRegister(src) & kHRegMask);
5992       break;
5993     case FMOV_wh:
5994       WriteWRegister(dst, ReadHRegisterBits(src));
5995       break;
5996     case FMOV_xh:
5997       WriteXRegister(dst, ReadHRegisterBits(src));
5998       break;
5999     case FMOV_hx:
6000       WriteHRegister(dst, ReadXRegister(src) & kHRegMask);
6001       break;
6002     case FMOV_ws:
6003       WriteWRegister(dst, ReadSRegisterBits(src));
6004       break;
6005     case FMOV_xd:
6006       WriteXRegister(dst, ReadDRegisterBits(src));
6007       break;
6008     case FMOV_sw:
6009       WriteSRegisterBits(dst, ReadWRegister(src));
6010       break;
6011     case FMOV_dx:
6012       WriteDRegisterBits(dst, ReadXRegister(src));
6013       break;
6014     case FMOV_d1_x:
6015       LogicVRegister(ReadVRegister(dst))
6016           .SetUint(kFormatD, 1, ReadXRegister(src));
6017       break;
6018     case FMOV_x_d1:
6019       WriteXRegister(dst, LogicVRegister(ReadVRegister(src)).Uint(kFormatD, 1));
6020       break;
6021 
6022     // A 32-bit input can be handled in the same way as a 64-bit input, since
6023     // the sign- or zero-extension will not affect the conversion.
6024     case SCVTF_dx:
6025       WriteDRegister(dst, FixedToDouble(ReadXRegister(src), 0, round));
6026       break;
6027     case SCVTF_dw:
6028       WriteDRegister(dst, FixedToDouble(ReadWRegister(src), 0, round));
6029       break;
6030     case UCVTF_dx:
6031       WriteDRegister(dst, UFixedToDouble(ReadXRegister(src), 0, round));
6032       break;
6033     case UCVTF_dw: {
6034       WriteDRegister(dst,
6035                      UFixedToDouble(ReadRegister<uint32_t>(src), 0, round));
6036       break;
6037     }
6038     case SCVTF_sx:
6039       WriteSRegister(dst, FixedToFloat(ReadXRegister(src), 0, round));
6040       break;
6041     case SCVTF_sw:
6042       WriteSRegister(dst, FixedToFloat(ReadWRegister(src), 0, round));
6043       break;
6044     case UCVTF_sx:
6045       WriteSRegister(dst, UFixedToFloat(ReadXRegister(src), 0, round));
6046       break;
6047     case UCVTF_sw: {
6048       WriteSRegister(dst, UFixedToFloat(ReadRegister<uint32_t>(src), 0, round));
6049       break;
6050     }
6051     case SCVTF_hx:
6052       WriteHRegister(dst, FixedToFloat16(ReadXRegister(src), 0, round));
6053       break;
6054     case SCVTF_hw:
6055       WriteHRegister(dst, FixedToFloat16(ReadWRegister(src), 0, round));
6056       break;
6057     case UCVTF_hx:
6058       WriteHRegister(dst, UFixedToFloat16(ReadXRegister(src), 0, round));
6059       break;
6060     case UCVTF_hw: {
6061       WriteHRegister(dst,
6062                      UFixedToFloat16(ReadRegister<uint32_t>(src), 0, round));
6063       break;
6064     }
6065 
6066     default:
6067       VIXL_UNREACHABLE();
6068   }
6069 }
6070 
6071 
6072 void Simulator::VisitFPFixedPointConvert(const Instruction* instr) {
6073   AssertSupportedFPCR();
6074 
6075   unsigned dst = instr->GetRd();
6076   unsigned src = instr->GetRn();
6077   int fbits = 64 - instr->GetFPScale();
6078 
6079   FPRounding round = ReadRMode();
6080 
6081   switch (instr->Mask(FPFixedPointConvertMask)) {
6082     // A 32-bit input can be handled in the same way as a 64-bit input, since
6083     // the sign- or zero-extension will not affect the conversion.
6084     case SCVTF_dx_fixed:
6085       WriteDRegister(dst, FixedToDouble(ReadXRegister(src), fbits, round));
6086       break;
6087     case SCVTF_dw_fixed:
6088       WriteDRegister(dst, FixedToDouble(ReadWRegister(src), fbits, round));
6089       break;
6090     case UCVTF_dx_fixed:
6091       WriteDRegister(dst, UFixedToDouble(ReadXRegister(src), fbits, round));
6092       break;
6093     case UCVTF_dw_fixed: {
6094       WriteDRegister(dst,
6095                      UFixedToDouble(ReadRegister<uint32_t>(src), fbits, round));
6096       break;
6097     }
6098     case SCVTF_sx_fixed:
6099       WriteSRegister(dst, FixedToFloat(ReadXRegister(src), fbits, round));
6100       break;
6101     case SCVTF_sw_fixed:
6102       WriteSRegister(dst, FixedToFloat(ReadWRegister(src), fbits, round));
6103       break;
6104     case UCVTF_sx_fixed:
6105       WriteSRegister(dst, UFixedToFloat(ReadXRegister(src), fbits, round));
6106       break;
6107     case UCVTF_sw_fixed: {
6108       WriteSRegister(dst,
6109                      UFixedToFloat(ReadRegister<uint32_t>(src), fbits, round));
6110       break;
6111     }
6112     case SCVTF_hx_fixed:
6113       WriteHRegister(dst, FixedToFloat16(ReadXRegister(src), fbits, round));
6114       break;
6115     case SCVTF_hw_fixed:
6116       WriteHRegister(dst, FixedToFloat16(ReadWRegister(src), fbits, round));
6117       break;
6118     case UCVTF_hx_fixed:
6119       WriteHRegister(dst, UFixedToFloat16(ReadXRegister(src), fbits, round));
6120       break;
6121     case UCVTF_hw_fixed: {
6122       WriteHRegister(dst,
6123                      UFixedToFloat16(ReadRegister<uint32_t>(src),
6124                                      fbits,
6125                                      round));
6126       break;
6127     }
6128     case FCVTZS_xd_fixed:
6129       WriteXRegister(dst,
6130                      FPToInt64(ReadDRegister(src) * std::pow(2.0, fbits),
6131                                FPZero));
6132       break;
6133     case FCVTZS_wd_fixed:
6134       WriteWRegister(dst,
6135                      FPToInt32(ReadDRegister(src) * std::pow(2.0, fbits),
6136                                FPZero));
6137       break;
6138     case FCVTZU_xd_fixed:
6139       WriteXRegister(dst,
6140                      FPToUInt64(ReadDRegister(src) * std::pow(2.0, fbits),
6141                                 FPZero));
6142       break;
6143     case FCVTZU_wd_fixed:
6144       WriteWRegister(dst,
6145                      FPToUInt32(ReadDRegister(src) * std::pow(2.0, fbits),
6146                                 FPZero));
6147       break;
6148     case FCVTZS_xs_fixed:
6149       WriteXRegister(dst,
6150                      FPToInt64(ReadSRegister(src) * std::pow(2.0f, fbits),
6151                                FPZero));
6152       break;
6153     case FCVTZS_ws_fixed:
6154       WriteWRegister(dst,
6155                      FPToInt32(ReadSRegister(src) * std::pow(2.0f, fbits),
6156                                FPZero));
6157       break;
6158     case FCVTZU_xs_fixed:
6159       WriteXRegister(dst,
6160                      FPToUInt64(ReadSRegister(src) * std::pow(2.0f, fbits),
6161                                 FPZero));
6162       break;
6163     case FCVTZU_ws_fixed:
6164       WriteWRegister(dst,
6165                      FPToUInt32(ReadSRegister(src) * std::pow(2.0f, fbits),
6166                                 FPZero));
6167       break;
6168     case FCVTZS_xh_fixed: {
6169       double output =
6170           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
6171       WriteXRegister(dst, FPToInt64(output, FPZero));
6172       break;
6173     }
6174     case FCVTZS_wh_fixed: {
6175       double output =
6176           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
6177       WriteWRegister(dst, FPToInt32(output, FPZero));
6178       break;
6179     }
6180     case FCVTZU_xh_fixed: {
6181       double output =
6182           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
6183       WriteXRegister(dst, FPToUInt64(output, FPZero));
6184       break;
6185     }
6186     case FCVTZU_wh_fixed: {
6187       double output =
6188           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
6189       WriteWRegister(dst, FPToUInt32(output, FPZero));
6190       break;
6191     }
6192     default:
6193       VIXL_UNREACHABLE();
6194   }
6195 }
6196 
6197 
6198 void Simulator::VisitFPCompare(const Instruction* instr) {
6199   AssertSupportedFPCR();
6200 
6201   FPTrapFlags trap = DisableTrap;
6202   switch (instr->Mask(FPCompareMask)) {
6203     case FCMPE_h:
6204       trap = EnableTrap;
6205       VIXL_FALLTHROUGH();
6206     case FCMP_h:
6207       FPCompare(ReadHRegister(instr->GetRn()),
6208                 ReadHRegister(instr->GetRm()),
6209                 trap);
6210       break;
6211     case FCMPE_s:
6212       trap = EnableTrap;
6213       VIXL_FALLTHROUGH();
6214     case FCMP_s:
6215       FPCompare(ReadSRegister(instr->GetRn()),
6216                 ReadSRegister(instr->GetRm()),
6217                 trap);
6218       break;
6219     case FCMPE_d:
6220       trap = EnableTrap;
6221       VIXL_FALLTHROUGH();
6222     case FCMP_d:
6223       FPCompare(ReadDRegister(instr->GetRn()),
6224                 ReadDRegister(instr->GetRm()),
6225                 trap);
6226       break;
6227     case FCMPE_h_zero:
6228       trap = EnableTrap;
6229       VIXL_FALLTHROUGH();
6230     case FCMP_h_zero:
6231       FPCompare(ReadHRegister(instr->GetRn()), SimFloat16(0.0), trap);
6232       break;
6233     case FCMPE_s_zero:
6234       trap = EnableTrap;
6235       VIXL_FALLTHROUGH();
6236     case FCMP_s_zero:
6237       FPCompare(ReadSRegister(instr->GetRn()), 0.0f, trap);
6238       break;
6239     case FCMPE_d_zero:
6240       trap = EnableTrap;
6241       VIXL_FALLTHROUGH();
6242     case FCMP_d_zero:
6243       FPCompare(ReadDRegister(instr->GetRn()), 0.0, trap);
6244       break;
6245     default:
6246       VIXL_UNIMPLEMENTED();
6247   }
6248 }
6249 
6250 
6251 void Simulator::VisitFPConditionalCompare(const Instruction* instr) {
6252   AssertSupportedFPCR();
6253 
6254   FPTrapFlags trap = DisableTrap;
6255   switch (instr->Mask(FPConditionalCompareMask)) {
6256     case FCCMPE_h:
6257       trap = EnableTrap;
6258       VIXL_FALLTHROUGH();
6259     case FCCMP_h:
6260       if (ConditionPassed(instr->GetCondition())) {
6261         FPCompare(ReadHRegister(instr->GetRn()),
6262                   ReadHRegister(instr->GetRm()),
6263                   trap);
6264       } else {
6265         ReadNzcv().SetFlags(instr->GetNzcv());
6266         LogSystemRegister(NZCV);
6267       }
6268       break;
6269     case FCCMPE_s:
6270       trap = EnableTrap;
6271       VIXL_FALLTHROUGH();
6272     case FCCMP_s:
6273       if (ConditionPassed(instr->GetCondition())) {
6274         FPCompare(ReadSRegister(instr->GetRn()),
6275                   ReadSRegister(instr->GetRm()),
6276                   trap);
6277       } else {
6278         ReadNzcv().SetFlags(instr->GetNzcv());
6279         LogSystemRegister(NZCV);
6280       }
6281       break;
6282     case FCCMPE_d:
6283       trap = EnableTrap;
6284       VIXL_FALLTHROUGH();
6285     case FCCMP_d:
6286       if (ConditionPassed(instr->GetCondition())) {
6287         FPCompare(ReadDRegister(instr->GetRn()),
6288                   ReadDRegister(instr->GetRm()),
6289                   trap);
6290       } else {
6291         ReadNzcv().SetFlags(instr->GetNzcv());
6292         LogSystemRegister(NZCV);
6293       }
6294       break;
6295     default:
6296       VIXL_UNIMPLEMENTED();
6297   }
6298 }
6299 
6300 
6301 void Simulator::VisitFPConditionalSelect(const Instruction* instr) {
6302   AssertSupportedFPCR();
6303 
6304   Instr selected;
6305   if (ConditionPassed(instr->GetCondition())) {
6306     selected = instr->GetRn();
6307   } else {
6308     selected = instr->GetRm();
6309   }
6310 
6311   switch (instr->Mask(FPConditionalSelectMask)) {
6312     case FCSEL_h:
6313       WriteHRegister(instr->GetRd(), ReadHRegister(selected));
6314       break;
6315     case FCSEL_s:
6316       WriteSRegister(instr->GetRd(), ReadSRegister(selected));
6317       break;
6318     case FCSEL_d:
6319       WriteDRegister(instr->GetRd(), ReadDRegister(selected));
6320       break;
6321     default:
6322       VIXL_UNIMPLEMENTED();
6323   }
6324 }
6325 
6326 
6327 void Simulator::VisitFPDataProcessing1Source(const Instruction* instr) {
6328   AssertSupportedFPCR();
6329 
6330   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
6331   VectorFormat vform;
6332   switch (instr->Mask(FPTypeMask)) {
6333     default:
6334       VIXL_UNREACHABLE_OR_FALLTHROUGH();
6335     case FP64:
6336       vform = kFormatD;
6337       break;
6338     case FP32:
6339       vform = kFormatS;
6340       break;
6341     case FP16:
6342       vform = kFormatH;
6343       break;
6344   }
6345 
6346   SimVRegister& rd = ReadVRegister(instr->GetRd());
6347   SimVRegister& rn = ReadVRegister(instr->GetRn());
6348   bool inexact_exception = false;
6349   FrintMode frint_mode = kFrintToInteger;
6350 
6351   unsigned fd = instr->GetRd();
6352   unsigned fn = instr->GetRn();
6353 
6354   switch (instr->Mask(FPDataProcessing1SourceMask)) {
6355     case FMOV_h:
6356       WriteHRegister(fd, ReadHRegister(fn));
6357       return;
6358     case FMOV_s:
6359       WriteSRegister(fd, ReadSRegister(fn));
6360       return;
6361     case FMOV_d:
6362       WriteDRegister(fd, ReadDRegister(fn));
6363       return;
6364     case FABS_h:
6365     case FABS_s:
6366     case FABS_d:
6367       fabs_(vform, ReadVRegister(fd), ReadVRegister(fn));
6368       // Explicitly log the register update whilst we have type information.
6369       LogVRegister(fd, GetPrintRegisterFormatFP(vform));
6370       return;
6371     case FNEG_h:
6372     case FNEG_s:
6373     case FNEG_d:
6374       fneg(vform, ReadVRegister(fd), ReadVRegister(fn));
6375       // Explicitly log the register update whilst we have type information.
6376       LogVRegister(fd, GetPrintRegisterFormatFP(vform));
6377       return;
6378     case FCVT_ds:
6379       WriteDRegister(fd, FPToDouble(ReadSRegister(fn), ReadDN()));
6380       return;
6381     case FCVT_sd:
6382       WriteSRegister(fd, FPToFloat(ReadDRegister(fn), FPTieEven, ReadDN()));
6383       return;
6384     case FCVT_hs:
6385       WriteHRegister(fd,
6386                      Float16ToRawbits(
6387                          FPToFloat16(ReadSRegister(fn), FPTieEven, ReadDN())));
6388       return;
6389     case FCVT_sh:
6390       WriteSRegister(fd, FPToFloat(ReadHRegister(fn), ReadDN()));
6391       return;
6392     case FCVT_dh:
6393       WriteDRegister(fd, FPToDouble(ReadHRegister(fn), ReadDN()));
6394       return;
6395     case FCVT_hd:
6396       WriteHRegister(fd,
6397                      Float16ToRawbits(
6398                          FPToFloat16(ReadDRegister(fn), FPTieEven, ReadDN())));
6399       return;
6400     case FSQRT_h:
6401     case FSQRT_s:
6402     case FSQRT_d:
6403       fsqrt(vform, rd, rn);
6404       // Explicitly log the register update whilst we have type information.
6405       LogVRegister(fd, GetPrintRegisterFormatFP(vform));
6406       return;
6407     case FRINT32X_s:
6408     case FRINT32X_d:
6409       inexact_exception = true;
6410       frint_mode = kFrintToInt32;
6411       break;  // Use FPCR rounding mode.
6412     case FRINT64X_s:
6413     case FRINT64X_d:
6414       inexact_exception = true;
6415       frint_mode = kFrintToInt64;
6416       break;  // Use FPCR rounding mode.
6417     case FRINT32Z_s:
6418     case FRINT32Z_d:
6419       inexact_exception = true;
6420       frint_mode = kFrintToInt32;
6421       fpcr_rounding = FPZero;
6422       break;
6423     case FRINT64Z_s:
6424     case FRINT64Z_d:
6425       inexact_exception = true;
6426       frint_mode = kFrintToInt64;
6427       fpcr_rounding = FPZero;
6428       break;
6429     case FRINTI_h:
6430     case FRINTI_s:
6431     case FRINTI_d:
6432       break;  // Use FPCR rounding mode.
6433     case FRINTX_h:
6434     case FRINTX_s:
6435     case FRINTX_d:
6436       inexact_exception = true;
6437       break;
6438     case FRINTA_h:
6439     case FRINTA_s:
6440     case FRINTA_d:
6441       fpcr_rounding = FPTieAway;
6442       break;
6443     case FRINTM_h:
6444     case FRINTM_s:
6445     case FRINTM_d:
6446       fpcr_rounding = FPNegativeInfinity;
6447       break;
6448     case FRINTN_h:
6449     case FRINTN_s:
6450     case FRINTN_d:
6451       fpcr_rounding = FPTieEven;
6452       break;
6453     case FRINTP_h:
6454     case FRINTP_s:
6455     case FRINTP_d:
6456       fpcr_rounding = FPPositiveInfinity;
6457       break;
6458     case FRINTZ_h:
6459     case FRINTZ_s:
6460     case FRINTZ_d:
6461       fpcr_rounding = FPZero;
6462       break;
6463     default:
6464       VIXL_UNIMPLEMENTED();
6465   }
6466 
6467   // Only FRINT* instructions fall through the switch above.
6468   frint(vform, rd, rn, fpcr_rounding, inexact_exception, frint_mode);
6469   // Explicitly log the register update whilst we have type information.
6470   LogVRegister(fd, GetPrintRegisterFormatFP(vform));
6471 }
6472 
6473 
6474 void Simulator::VisitFPDataProcessing2Source(const Instruction* instr) {
6475   AssertSupportedFPCR();
6476 
6477   VectorFormat vform;
6478   switch (instr->Mask(FPTypeMask)) {
6479     default:
6480       VIXL_UNREACHABLE_OR_FALLTHROUGH();
6481     case FP64:
6482       vform = kFormatD;
6483       break;
6484     case FP32:
6485       vform = kFormatS;
6486       break;
6487     case FP16:
6488       vform = kFormatH;
6489       break;
6490   }
6491   SimVRegister& rd = ReadVRegister(instr->GetRd());
6492   SimVRegister& rn = ReadVRegister(instr->GetRn());
6493   SimVRegister& rm = ReadVRegister(instr->GetRm());
6494 
6495   switch (instr->Mask(FPDataProcessing2SourceMask)) {
6496     case FADD_h:
6497     case FADD_s:
6498     case FADD_d:
6499       fadd(vform, rd, rn, rm);
6500       break;
6501     case FSUB_h:
6502     case FSUB_s:
6503     case FSUB_d:
6504       fsub(vform, rd, rn, rm);
6505       break;
6506     case FMUL_h:
6507     case FMUL_s:
6508     case FMUL_d:
6509       fmul(vform, rd, rn, rm);
6510       break;
6511     case FNMUL_h:
6512     case FNMUL_s:
6513     case FNMUL_d:
6514       fnmul(vform, rd, rn, rm);
6515       break;
6516     case FDIV_h:
6517     case FDIV_s:
6518     case FDIV_d:
6519       fdiv(vform, rd, rn, rm);
6520       break;
6521     case FMAX_h:
6522     case FMAX_s:
6523     case FMAX_d:
6524       fmax(vform, rd, rn, rm);
6525       break;
6526     case FMIN_h:
6527     case FMIN_s:
6528     case FMIN_d:
6529       fmin(vform, rd, rn, rm);
6530       break;
6531     case FMAXNM_h:
6532     case FMAXNM_s:
6533     case FMAXNM_d:
6534       fmaxnm(vform, rd, rn, rm);
6535       break;
6536     case FMINNM_h:
6537     case FMINNM_s:
6538     case FMINNM_d:
6539       fminnm(vform, rd, rn, rm);
6540       break;
6541     default:
6542       VIXL_UNREACHABLE();
6543   }
6544   // Explicitly log the register update whilst we have type information.
6545   LogVRegister(instr->GetRd(), GetPrintRegisterFormatFP(vform));
6546 }
6547 
6548 
6549 void Simulator::VisitFPDataProcessing3Source(const Instruction* instr) {
6550   AssertSupportedFPCR();
6551 
6552   unsigned fd = instr->GetRd();
6553   unsigned fn = instr->GetRn();
6554   unsigned fm = instr->GetRm();
6555   unsigned fa = instr->GetRa();
6556 
6557   switch (instr->Mask(FPDataProcessing3SourceMask)) {
6558     // fd = fa +/- (fn * fm)
6559     case FMADD_h:
6560       WriteHRegister(fd,
6561                      FPMulAdd(ReadHRegister(fa),
6562                               ReadHRegister(fn),
6563                               ReadHRegister(fm)));
6564       break;
6565     case FMSUB_h:
6566       WriteHRegister(fd,
6567                      FPMulAdd(ReadHRegister(fa),
6568                               -ReadHRegister(fn),
6569                               ReadHRegister(fm)));
6570       break;
6571     case FMADD_s:
6572       WriteSRegister(fd,
6573                      FPMulAdd(ReadSRegister(fa),
6574                               ReadSRegister(fn),
6575                               ReadSRegister(fm)));
6576       break;
6577     case FMSUB_s:
6578       WriteSRegister(fd,
6579                      FPMulAdd(ReadSRegister(fa),
6580                               -ReadSRegister(fn),
6581                               ReadSRegister(fm)));
6582       break;
6583     case FMADD_d:
6584       WriteDRegister(fd,
6585                      FPMulAdd(ReadDRegister(fa),
6586                               ReadDRegister(fn),
6587                               ReadDRegister(fm)));
6588       break;
6589     case FMSUB_d:
6590       WriteDRegister(fd,
6591                      FPMulAdd(ReadDRegister(fa),
6592                               -ReadDRegister(fn),
6593                               ReadDRegister(fm)));
6594       break;
6595     // Negated variants of the above.
6596     case FNMADD_h:
6597       WriteHRegister(fd,
6598                      FPMulAdd(-ReadHRegister(fa),
6599                               -ReadHRegister(fn),
6600                               ReadHRegister(fm)));
6601       break;
6602     case FNMSUB_h:
6603       WriteHRegister(fd,
6604                      FPMulAdd(-ReadHRegister(fa),
6605                               ReadHRegister(fn),
6606                               ReadHRegister(fm)));
6607       break;
6608     case FNMADD_s:
6609       WriteSRegister(fd,
6610                      FPMulAdd(-ReadSRegister(fa),
6611                               -ReadSRegister(fn),
6612                               ReadSRegister(fm)));
6613       break;
6614     case FNMSUB_s:
6615       WriteSRegister(fd,
6616                      FPMulAdd(-ReadSRegister(fa),
6617                               ReadSRegister(fn),
6618                               ReadSRegister(fm)));
6619       break;
6620     case FNMADD_d:
6621       WriteDRegister(fd,
6622                      FPMulAdd(-ReadDRegister(fa),
6623                               -ReadDRegister(fn),
6624                               ReadDRegister(fm)));
6625       break;
6626     case FNMSUB_d:
6627       WriteDRegister(fd,
6628                      FPMulAdd(-ReadDRegister(fa),
6629                               ReadDRegister(fn),
6630                               ReadDRegister(fm)));
6631       break;
6632     default:
6633       VIXL_UNIMPLEMENTED();
6634   }
6635 }
6636 
6637 
6638 bool Simulator::FPProcessNaNs(const Instruction* instr) {
6639   unsigned fd = instr->GetRd();
6640   unsigned fn = instr->GetRn();
6641   unsigned fm = instr->GetRm();
6642   bool done = false;
6643 
6644   if (instr->Mask(FP64) == FP64) {
6645     double result = FPProcessNaNs(ReadDRegister(fn), ReadDRegister(fm));
6646     if (IsNaN(result)) {
6647       WriteDRegister(fd, result);
6648       done = true;
6649     }
6650   } else if (instr->Mask(FP32) == FP32) {
6651     float result = FPProcessNaNs(ReadSRegister(fn), ReadSRegister(fm));
6652     if (IsNaN(result)) {
6653       WriteSRegister(fd, result);
6654       done = true;
6655     }
6656   } else {
6657     VIXL_ASSERT(instr->Mask(FP16) == FP16);
6658     VIXL_UNIMPLEMENTED();
6659   }
6660 
6661   return done;
6662 }
6663 
6664 
6665 void Simulator::SysOp_W(int op, int64_t val) {
6666   switch (op) {
6667     case IVAU:
6668     case CVAC:
6669     case CVAU:
6670     case CVAP:
6671     case CVADP:
6672     case CIVAC:
6673     case CGVAC:
6674     case CGDVAC:
6675     case CGVAP:
6676     case CGDVAP:
6677     case CIGVAC:
6678     case CIGDVAC: {
6679       // Perform a placeholder memory access to ensure that we have read access
6680       // to the specified address. The read access does not require a tag match,
6681       // so temporarily disable MTE.
6682       bool mte_enabled = MetaDataDepot::MetaDataMTE::IsActive();
6683       MetaDataDepot::MetaDataMTE::SetActive(false);
6684       volatile uint8_t y = MemRead<uint8_t>(val);
6685       MetaDataDepot::MetaDataMTE::SetActive(mte_enabled);
6686       USE(y);
6687       // TODO: Implement ZVA, GVA, GZVA.
6688       break;
6689     }
6690     default:
6691       VIXL_UNIMPLEMENTED();
6692   }
6693 }
6694 
6695 void Simulator::PACHelper(int dst,
6696                           int src,
6697                           PACKey key,
6698                           decltype(&Simulator::AddPAC) pac_fn) {
6699   VIXL_ASSERT((dst == 17) || (dst == 30));
6700   VIXL_ASSERT((src == -1) || (src == 16) || (src == 31));
6701 
6702   uint64_t modifier = (src == -1) ? 0 : ReadXRegister(src, Reg31IsStackPointer);
6703   uint64_t result =
6704       (this->*pac_fn)(ReadXRegister(dst), modifier, key, kInstructionPointer);
6705   WriteXRegister(dst, result);
6706 }
6707 
6708 void Simulator::VisitSystem(const Instruction* instr) {
6709   PACKey pac_key = kPACKeyIA;  // Default key for PAC/AUTH handling.
6710 
6711   switch (form_hash_) {
6712     case "cfinv_m_pstate"_h:
6713       ReadNzcv().SetC(!ReadC());
6714       break;
6715     case "axflag_m_pstate"_h:
6716       ReadNzcv().SetN(0);
6717       ReadNzcv().SetZ(ReadNzcv().GetZ() | ReadNzcv().GetV());
6718       ReadNzcv().SetC(ReadNzcv().GetC() & ~ReadNzcv().GetV());
6719       ReadNzcv().SetV(0);
6720       break;
6721     case "xaflag_m_pstate"_h: {
6722       // Can't set the flags in place due to the logical dependencies.
6723       uint32_t n = (~ReadNzcv().GetC() & ~ReadNzcv().GetZ()) & 1;
6724       uint32_t z = ReadNzcv().GetZ() & ReadNzcv().GetC();
6725       uint32_t c = ReadNzcv().GetC() | ReadNzcv().GetZ();
6726       uint32_t v = ~ReadNzcv().GetC() & ReadNzcv().GetZ();
6727       ReadNzcv().SetN(n);
6728       ReadNzcv().SetZ(z);
6729       ReadNzcv().SetC(c);
6730       ReadNzcv().SetV(v);
6731       break;
6732     }
6733     case "xpaclri_hi_hints"_h:
6734       WriteXRegister(30, StripPAC(ReadXRegister(30), kInstructionPointer));
6735       break;
6736     case "clrex_bn_barriers"_h:
6737       PrintExclusiveAccessWarning();
6738       ClearLocalMonitor();
6739       break;
6740     case "msr_sr_systemmove"_h:
6741       switch (instr->GetImmSystemRegister()) {
6742         case NZCV:
6743           ReadNzcv().SetRawValue(ReadWRegister(instr->GetRt()));
6744           LogSystemRegister(NZCV);
6745           break;
6746         case FPCR:
6747           ReadFpcr().SetRawValue(ReadWRegister(instr->GetRt()));
6748           LogSystemRegister(FPCR);
6749           break;
6750         default:
6751           VIXL_UNIMPLEMENTED();
6752       }
6753       break;
6754     case "mrs_rs_systemmove"_h:
6755       switch (instr->GetImmSystemRegister()) {
6756         case NZCV:
6757           WriteXRegister(instr->GetRt(), ReadNzcv().GetRawValue());
6758           break;
6759         case FPCR:
6760           WriteXRegister(instr->GetRt(), ReadFpcr().GetRawValue());
6761           break;
6762         case RNDR:
6763         case RNDRRS: {
6764           uint64_t high = jrand48(rand_state_);
6765           uint64_t low = jrand48(rand_state_);
6766           uint64_t rand_num = (high << 32) | (low & 0xffffffff);
6767           WriteXRegister(instr->GetRt(), rand_num);
6768           // Simulate successful random number generation.
6769           // TODO: Return failure occasionally as a random number cannot be
6770           // returned in a period of time.
6771           ReadNzcv().SetRawValue(NoFlag);
6772           LogSystemRegister(NZCV);
6773           break;
6774         }
6775         default:
6776           VIXL_UNIMPLEMENTED();
6777       }
6778       break;
6779     case "nop_hi_hints"_h:
6780     case "esb_hi_hints"_h:
6781     case "csdb_hi_hints"_h:
6782       break;
6783     case "bti_hb_hints"_h:
6784       switch (instr->GetImmHint()) {
6785         case BTI_jc:
6786           break;
6787         case BTI:
6788           if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) {
6789             VIXL_ABORT_WITH_MSG("Executing BTI with wrong BType.");
6790           }
6791           break;
6792         case BTI_c:
6793           if (PcIsInGuardedPage() &&
6794               (ReadBType() == BranchFromGuardedNotToIP)) {
6795             VIXL_ABORT_WITH_MSG("Executing BTI c with wrong BType.");
6796           }
6797           break;
6798         case BTI_j:
6799           if (PcIsInGuardedPage() && (ReadBType() == BranchAndLink)) {
6800             VIXL_ABORT_WITH_MSG("Executing BTI j with wrong BType.");
6801           }
6802           break;
6803         default:
6804           VIXL_UNREACHABLE();
6805       }
6806       return;
6807     case "pacib1716_hi_hints"_h:
6808       pac_key = kPACKeyIB;
6809       VIXL_FALLTHROUGH();
6810     case "pacia1716_hi_hints"_h:
6811       PACHelper(17, 16, pac_key, &Simulator::AddPAC);
6812       break;
6813     case "pacibsp_hi_hints"_h:
6814       pac_key = kPACKeyIB;
6815       VIXL_FALLTHROUGH();
6816     case "paciasp_hi_hints"_h:
6817       PACHelper(30, 31, pac_key, &Simulator::AddPAC);
6818 
6819       // Check BType allows PACI[AB]SP instructions.
6820       if (PcIsInGuardedPage()) {
6821         switch (ReadBType()) {
6822           case BranchFromGuardedNotToIP:
6823           // TODO: This case depends on the value of SCTLR_EL1.BT0, which we
6824           // assume here to be zero. This allows execution of PACI[AB]SP when
6825           // BTYPE is BranchFromGuardedNotToIP (0b11).
6826           case DefaultBType:
6827           case BranchFromUnguardedOrToIP:
6828           case BranchAndLink:
6829             break;
6830         }
6831       }
6832       break;
6833     case "pacibz_hi_hints"_h:
6834       pac_key = kPACKeyIB;
6835       VIXL_FALLTHROUGH();
6836     case "paciaz_hi_hints"_h:
6837       PACHelper(30, -1, pac_key, &Simulator::AddPAC);
6838       break;
6839     case "autib1716_hi_hints"_h:
6840       pac_key = kPACKeyIB;
6841       VIXL_FALLTHROUGH();
6842     case "autia1716_hi_hints"_h:
6843       PACHelper(17, 16, pac_key, &Simulator::AuthPAC);
6844       break;
6845     case "autibsp_hi_hints"_h:
6846       pac_key = kPACKeyIB;
6847       VIXL_FALLTHROUGH();
6848     case "autiasp_hi_hints"_h:
6849       PACHelper(30, 31, pac_key, &Simulator::AuthPAC);
6850       break;
6851     case "autibz_hi_hints"_h:
6852       pac_key = kPACKeyIB;
6853       VIXL_FALLTHROUGH();
6854     case "autiaz_hi_hints"_h:
6855       PACHelper(30, -1, pac_key, &Simulator::AuthPAC);
6856       break;
6857     case "dsb_bo_barriers"_h:
6858     case "dmb_bo_barriers"_h:
6859     case "isb_bi_barriers"_h:
6860       __sync_synchronize();
6861       break;
6862     case "sys_cr_systeminstrs"_h:
6863       SysOp_W(instr->GetSysOp(), ReadXRegister(instr->GetRt()));
6864       break;
6865     default:
6866       VIXL_UNIMPLEMENTED();
6867   }
6868 }
6869 
6870 
6871 void Simulator::VisitException(const Instruction* instr) {
6872   switch (instr->Mask(ExceptionMask)) {
6873     case HLT:
6874       switch (instr->GetImmException()) {
6875         case kUnreachableOpcode:
6876           DoUnreachable(instr);
6877           return;
6878         case kTraceOpcode:
6879           DoTrace(instr);
6880           return;
6881         case kLogOpcode:
6882           DoLog(instr);
6883           return;
6884         case kPrintfOpcode:
6885           DoPrintf(instr);
6886           return;
6887         case kRuntimeCallOpcode:
6888           DoRuntimeCall(instr);
6889           return;
6890         case kSetCPUFeaturesOpcode:
6891         case kEnableCPUFeaturesOpcode:
6892         case kDisableCPUFeaturesOpcode:
6893           DoConfigureCPUFeatures(instr);
6894           return;
6895         case kSaveCPUFeaturesOpcode:
6896           DoSaveCPUFeatures(instr);
6897           return;
6898         case kRestoreCPUFeaturesOpcode:
6899           DoRestoreCPUFeatures(instr);
6900           return;
6901         case kMTEActive:
6902           MetaDataDepot::MetaDataMTE::SetActive(true);
6903           return;
6904         case kMTEInactive:
6905           MetaDataDepot::MetaDataMTE::SetActive(false);
6906           return;
6907         default:
6908           HostBreakpoint();
6909           return;
6910       }
6911     case BRK:
6912       if (debugger_enabled_) {
6913         uint64_t next_instr =
6914             reinterpret_cast<uint64_t>(pc_->GetNextInstruction());
6915         if (!debugger_->IsBreakpoint(next_instr)) {
6916           debugger_->RegisterBreakpoint(next_instr);
6917         }
6918       } else {
6919         HostBreakpoint();
6920       }
6921       return;
6922     default:
6923       VIXL_UNIMPLEMENTED();
6924   }
6925 }
6926 
6927 
6928 void Simulator::VisitCrypto2RegSHA(const Instruction* instr) {
6929   VisitUnimplemented(instr);
6930 }
6931 
6932 
6933 void Simulator::VisitCrypto3RegSHA(const Instruction* instr) {
6934   VisitUnimplemented(instr);
6935 }
6936 
6937 
6938 void Simulator::VisitCryptoAES(const Instruction* instr) {
6939   VisitUnimplemented(instr);
6940 }
6941 
6942 
6943 void Simulator::VisitNEON2RegMisc(const Instruction* instr) {
6944   NEONFormatDecoder nfd(instr);
6945   VectorFormat vf = nfd.GetVectorFormat();
6946 
6947   static const NEONFormatMap map_lp =
6948       {{23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}};
6949   VectorFormat vf_lp = nfd.GetVectorFormat(&map_lp);
6950 
6951   static const NEONFormatMap map_fcvtl = {{22}, {NF_4S, NF_2D}};
6952   VectorFormat vf_fcvtl = nfd.GetVectorFormat(&map_fcvtl);
6953 
6954   static const NEONFormatMap map_fcvtn = {{22, 30},
6955                                           {NF_4H, NF_8H, NF_2S, NF_4S}};
6956   VectorFormat vf_fcvtn = nfd.GetVectorFormat(&map_fcvtn);
6957 
6958   SimVRegister& rd = ReadVRegister(instr->GetRd());
6959   SimVRegister& rn = ReadVRegister(instr->GetRn());
6960 
6961   if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_opcode) {
6962     // These instructions all use a two bit size field, except NOT and RBIT,
6963     // which use the field to encode the operation.
6964     switch (instr->Mask(NEON2RegMiscMask)) {
6965       case NEON_REV64:
6966         rev64(vf, rd, rn);
6967         break;
6968       case NEON_REV32:
6969         rev32(vf, rd, rn);
6970         break;
6971       case NEON_REV16:
6972         rev16(vf, rd, rn);
6973         break;
6974       case NEON_SUQADD:
6975         suqadd(vf, rd, rd, rn);
6976         break;
6977       case NEON_USQADD:
6978         usqadd(vf, rd, rd, rn);
6979         break;
6980       case NEON_CLS:
6981         cls(vf, rd, rn);
6982         break;
6983       case NEON_CLZ:
6984         clz(vf, rd, rn);
6985         break;
6986       case NEON_CNT:
6987         cnt(vf, rd, rn);
6988         break;
6989       case NEON_SQABS:
6990         abs(vf, rd, rn).SignedSaturate(vf);
6991         break;
6992       case NEON_SQNEG:
6993         neg(vf, rd, rn).SignedSaturate(vf);
6994         break;
6995       case NEON_CMGT_zero:
6996         cmp(vf, rd, rn, 0, gt);
6997         break;
6998       case NEON_CMGE_zero:
6999         cmp(vf, rd, rn, 0, ge);
7000         break;
7001       case NEON_CMEQ_zero:
7002         cmp(vf, rd, rn, 0, eq);
7003         break;
7004       case NEON_CMLE_zero:
7005         cmp(vf, rd, rn, 0, le);
7006         break;
7007       case NEON_CMLT_zero:
7008         cmp(vf, rd, rn, 0, lt);
7009         break;
7010       case NEON_ABS:
7011         abs(vf, rd, rn);
7012         break;
7013       case NEON_NEG:
7014         neg(vf, rd, rn);
7015         break;
7016       case NEON_SADDLP:
7017         saddlp(vf_lp, rd, rn);
7018         break;
7019       case NEON_UADDLP:
7020         uaddlp(vf_lp, rd, rn);
7021         break;
7022       case NEON_SADALP:
7023         sadalp(vf_lp, rd, rn);
7024         break;
7025       case NEON_UADALP:
7026         uadalp(vf_lp, rd, rn);
7027         break;
7028       case NEON_RBIT_NOT:
7029         vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
7030         switch (instr->GetFPType()) {
7031           case 0:
7032             not_(vf, rd, rn);
7033             break;
7034           case 1:
7035             rbit(vf, rd, rn);
7036             break;
7037           default:
7038             VIXL_UNIMPLEMENTED();
7039         }
7040         break;
7041     }
7042   } else {
7043     VectorFormat fpf = nfd.GetVectorFormat(nfd.FPFormatMap());
7044     FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
7045     bool inexact_exception = false;
7046     FrintMode frint_mode = kFrintToInteger;
7047 
7048     // These instructions all use a one bit size field, except XTN, SQXTUN,
7049     // SHLL, SQXTN and UQXTN, which use a two bit size field.
7050     switch (instr->Mask(NEON2RegMiscFPMask)) {
7051       case NEON_FABS:
7052         fabs_(fpf, rd, rn);
7053         return;
7054       case NEON_FNEG:
7055         fneg(fpf, rd, rn);
7056         return;
7057       case NEON_FSQRT:
7058         fsqrt(fpf, rd, rn);
7059         return;
7060       case NEON_FCVTL:
7061         if (instr->Mask(NEON_Q)) {
7062           fcvtl2(vf_fcvtl, rd, rn);
7063         } else {
7064           fcvtl(vf_fcvtl, rd, rn);
7065         }
7066         return;
7067       case NEON_FCVTN:
7068         if (instr->Mask(NEON_Q)) {
7069           fcvtn2(vf_fcvtn, rd, rn);
7070         } else {
7071           fcvtn(vf_fcvtn, rd, rn);
7072         }
7073         return;
7074       case NEON_FCVTXN:
7075         if (instr->Mask(NEON_Q)) {
7076           fcvtxn2(vf_fcvtn, rd, rn);
7077         } else {
7078           fcvtxn(vf_fcvtn, rd, rn);
7079         }
7080         return;
7081 
7082       // The following instructions break from the switch statement, rather
7083       // than return.
7084       case NEON_FRINT32X:
7085         inexact_exception = true;
7086         frint_mode = kFrintToInt32;
7087         break;  // Use FPCR rounding mode.
7088       case NEON_FRINT32Z:
7089         inexact_exception = true;
7090         frint_mode = kFrintToInt32;
7091         fpcr_rounding = FPZero;
7092         break;
7093       case NEON_FRINT64X:
7094         inexact_exception = true;
7095         frint_mode = kFrintToInt64;
7096         break;  // Use FPCR rounding mode.
7097       case NEON_FRINT64Z:
7098         inexact_exception = true;
7099         frint_mode = kFrintToInt64;
7100         fpcr_rounding = FPZero;
7101         break;
7102       case NEON_FRINTI:
7103         break;  // Use FPCR rounding mode.
7104       case NEON_FRINTX:
7105         inexact_exception = true;
7106         break;
7107       case NEON_FRINTA:
7108         fpcr_rounding = FPTieAway;
7109         break;
7110       case NEON_FRINTM:
7111         fpcr_rounding = FPNegativeInfinity;
7112         break;
7113       case NEON_FRINTN:
7114         fpcr_rounding = FPTieEven;
7115         break;
7116       case NEON_FRINTP:
7117         fpcr_rounding = FPPositiveInfinity;
7118         break;
7119       case NEON_FRINTZ:
7120         fpcr_rounding = FPZero;
7121         break;
7122 
7123       case NEON_FCVTNS:
7124         fcvts(fpf, rd, rn, FPTieEven);
7125         return;
7126       case NEON_FCVTNU:
7127         fcvtu(fpf, rd, rn, FPTieEven);
7128         return;
7129       case NEON_FCVTPS:
7130         fcvts(fpf, rd, rn, FPPositiveInfinity);
7131         return;
7132       case NEON_FCVTPU:
7133         fcvtu(fpf, rd, rn, FPPositiveInfinity);
7134         return;
7135       case NEON_FCVTMS:
7136         fcvts(fpf, rd, rn, FPNegativeInfinity);
7137         return;
7138       case NEON_FCVTMU:
7139         fcvtu(fpf, rd, rn, FPNegativeInfinity);
7140         return;
7141       case NEON_FCVTZS:
7142         fcvts(fpf, rd, rn, FPZero);
7143         return;
7144       case NEON_FCVTZU:
7145         fcvtu(fpf, rd, rn, FPZero);
7146         return;
7147       case NEON_FCVTAS:
7148         fcvts(fpf, rd, rn, FPTieAway);
7149         return;
7150       case NEON_FCVTAU:
7151         fcvtu(fpf, rd, rn, FPTieAway);
7152         return;
7153       case NEON_SCVTF:
7154         scvtf(fpf, rd, rn, 0, fpcr_rounding);
7155         return;
7156       case NEON_UCVTF:
7157         ucvtf(fpf, rd, rn, 0, fpcr_rounding);
7158         return;
7159       case NEON_URSQRTE:
7160         ursqrte(fpf, rd, rn);
7161         return;
7162       case NEON_URECPE:
7163         urecpe(fpf, rd, rn);
7164         return;
7165       case NEON_FRSQRTE:
7166         frsqrte(fpf, rd, rn);
7167         return;
7168       case NEON_FRECPE:
7169         frecpe(fpf, rd, rn, fpcr_rounding);
7170         return;
7171       case NEON_FCMGT_zero:
7172         fcmp_zero(fpf, rd, rn, gt);
7173         return;
7174       case NEON_FCMGE_zero:
7175         fcmp_zero(fpf, rd, rn, ge);
7176         return;
7177       case NEON_FCMEQ_zero:
7178         fcmp_zero(fpf, rd, rn, eq);
7179         return;
7180       case NEON_FCMLE_zero:
7181         fcmp_zero(fpf, rd, rn, le);
7182         return;
7183       case NEON_FCMLT_zero:
7184         fcmp_zero(fpf, rd, rn, lt);
7185         return;
7186       default:
7187         if ((NEON_XTN_opcode <= instr->Mask(NEON2RegMiscOpcode)) &&
7188             (instr->Mask(NEON2RegMiscOpcode) <= NEON_UQXTN_opcode)) {
7189           switch (instr->Mask(NEON2RegMiscMask)) {
7190             case NEON_XTN:
7191               xtn(vf, rd, rn);
7192               return;
7193             case NEON_SQXTN:
7194               sqxtn(vf, rd, rn);
7195               return;
7196             case NEON_UQXTN:
7197               uqxtn(vf, rd, rn);
7198               return;
7199             case NEON_SQXTUN:
7200               sqxtun(vf, rd, rn);
7201               return;
7202             case NEON_SHLL:
7203               vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
7204               if (instr->Mask(NEON_Q)) {
7205                 shll2(vf, rd, rn);
7206               } else {
7207                 shll(vf, rd, rn);
7208               }
7209               return;
7210             default:
7211               VIXL_UNIMPLEMENTED();
7212           }
7213         } else {
7214           VIXL_UNIMPLEMENTED();
7215         }
7216     }
7217 
7218     // Only FRINT* instructions fall through the switch above.
7219     frint(fpf, rd, rn, fpcr_rounding, inexact_exception, frint_mode);
7220   }
7221 }
7222 
7223 
7224 void Simulator::VisitNEON2RegMiscFP16(const Instruction* instr) {
7225   static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
7226   NEONFormatDecoder nfd(instr);
7227   VectorFormat fpf = nfd.GetVectorFormat(&map_half);
7228 
7229   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
7230 
7231   SimVRegister& rd = ReadVRegister(instr->GetRd());
7232   SimVRegister& rn = ReadVRegister(instr->GetRn());
7233 
7234   switch (instr->Mask(NEON2RegMiscFP16Mask)) {
7235     case NEON_SCVTF_H:
7236       scvtf(fpf, rd, rn, 0, fpcr_rounding);
7237       return;
7238     case NEON_UCVTF_H:
7239       ucvtf(fpf, rd, rn, 0, fpcr_rounding);
7240       return;
7241     case NEON_FCVTNS_H:
7242       fcvts(fpf, rd, rn, FPTieEven);
7243       return;
7244     case NEON_FCVTNU_H:
7245       fcvtu(fpf, rd, rn, FPTieEven);
7246       return;
7247     case NEON_FCVTPS_H:
7248       fcvts(fpf, rd, rn, FPPositiveInfinity);
7249       return;
7250     case NEON_FCVTPU_H:
7251       fcvtu(fpf, rd, rn, FPPositiveInfinity);
7252       return;
7253     case NEON_FCVTMS_H:
7254       fcvts(fpf, rd, rn, FPNegativeInfinity);
7255       return;
7256     case NEON_FCVTMU_H:
7257       fcvtu(fpf, rd, rn, FPNegativeInfinity);
7258       return;
7259     case NEON_FCVTZS_H:
7260       fcvts(fpf, rd, rn, FPZero);
7261       return;
7262     case NEON_FCVTZU_H:
7263       fcvtu(fpf, rd, rn, FPZero);
7264       return;
7265     case NEON_FCVTAS_H:
7266       fcvts(fpf, rd, rn, FPTieAway);
7267       return;
7268     case NEON_FCVTAU_H:
7269       fcvtu(fpf, rd, rn, FPTieAway);
7270       return;
7271     case NEON_FRINTI_H:
7272       frint(fpf, rd, rn, fpcr_rounding, false);
7273       return;
7274     case NEON_FRINTX_H:
7275       frint(fpf, rd, rn, fpcr_rounding, true);
7276       return;
7277     case NEON_FRINTA_H:
7278       frint(fpf, rd, rn, FPTieAway, false);
7279       return;
7280     case NEON_FRINTM_H:
7281       frint(fpf, rd, rn, FPNegativeInfinity, false);
7282       return;
7283     case NEON_FRINTN_H:
7284       frint(fpf, rd, rn, FPTieEven, false);
7285       return;
7286     case NEON_FRINTP_H:
7287       frint(fpf, rd, rn, FPPositiveInfinity, false);
7288       return;
7289     case NEON_FRINTZ_H:
7290       frint(fpf, rd, rn, FPZero, false);
7291       return;
7292     case NEON_FABS_H:
7293       fabs_(fpf, rd, rn);
7294       return;
7295     case NEON_FNEG_H:
7296       fneg(fpf, rd, rn);
7297       return;
7298     case NEON_FSQRT_H:
7299       fsqrt(fpf, rd, rn);
7300       return;
7301     case NEON_FRSQRTE_H:
7302       frsqrte(fpf, rd, rn);
7303       return;
7304     case NEON_FRECPE_H:
7305       frecpe(fpf, rd, rn, fpcr_rounding);
7306       return;
7307     case NEON_FCMGT_H_zero:
7308       fcmp_zero(fpf, rd, rn, gt);
7309       return;
7310     case NEON_FCMGE_H_zero:
7311       fcmp_zero(fpf, rd, rn, ge);
7312       return;
7313     case NEON_FCMEQ_H_zero:
7314       fcmp_zero(fpf, rd, rn, eq);
7315       return;
7316     case NEON_FCMLE_H_zero:
7317       fcmp_zero(fpf, rd, rn, le);
7318       return;
7319     case NEON_FCMLT_H_zero:
7320       fcmp_zero(fpf, rd, rn, lt);
7321       return;
7322     default:
7323       VIXL_UNIMPLEMENTED();
7324       return;
7325   }
7326 }
7327 
7328 
7329 void Simulator::VisitNEON3Same(const Instruction* instr) {
7330   NEONFormatDecoder nfd(instr);
7331   SimVRegister& rd = ReadVRegister(instr->GetRd());
7332   SimVRegister& rn = ReadVRegister(instr->GetRn());
7333   SimVRegister& rm = ReadVRegister(instr->GetRm());
7334 
7335   if (instr->Mask(NEON3SameLogicalFMask) == NEON3SameLogicalFixed) {
7336     VectorFormat vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
7337     switch (instr->Mask(NEON3SameLogicalMask)) {
7338       case NEON_AND:
7339         and_(vf, rd, rn, rm);
7340         break;
7341       case NEON_ORR:
7342         orr(vf, rd, rn, rm);
7343         break;
7344       case NEON_ORN:
7345         orn(vf, rd, rn, rm);
7346         break;
7347       case NEON_EOR:
7348         eor(vf, rd, rn, rm);
7349         break;
7350       case NEON_BIC:
7351         bic(vf, rd, rn, rm);
7352         break;
7353       case NEON_BIF:
7354         bif(vf, rd, rn, rm);
7355         break;
7356       case NEON_BIT:
7357         bit(vf, rd, rn, rm);
7358         break;
7359       case NEON_BSL:
7360         bsl(vf, rd, rd, rn, rm);
7361         break;
7362       default:
7363         VIXL_UNIMPLEMENTED();
7364     }
7365   } else if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) {
7366     VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
7367     switch (instr->Mask(NEON3SameFPMask)) {
7368       case NEON_FADD:
7369         fadd(vf, rd, rn, rm);
7370         break;
7371       case NEON_FSUB:
7372         fsub(vf, rd, rn, rm);
7373         break;
7374       case NEON_FMUL:
7375         fmul(vf, rd, rn, rm);
7376         break;
7377       case NEON_FDIV:
7378         fdiv(vf, rd, rn, rm);
7379         break;
7380       case NEON_FMAX:
7381         fmax(vf, rd, rn, rm);
7382         break;
7383       case NEON_FMIN:
7384         fmin(vf, rd, rn, rm);
7385         break;
7386       case NEON_FMAXNM:
7387         fmaxnm(vf, rd, rn, rm);
7388         break;
7389       case NEON_FMINNM:
7390         fminnm(vf, rd, rn, rm);
7391         break;
7392       case NEON_FMLA:
7393         fmla(vf, rd, rd, rn, rm);
7394         break;
7395       case NEON_FMLS:
7396         fmls(vf, rd, rd, rn, rm);
7397         break;
7398       case NEON_FMULX:
7399         fmulx(vf, rd, rn, rm);
7400         break;
7401       case NEON_FACGE:
7402         fabscmp(vf, rd, rn, rm, ge);
7403         break;
7404       case NEON_FACGT:
7405         fabscmp(vf, rd, rn, rm, gt);
7406         break;
7407       case NEON_FCMEQ:
7408         fcmp(vf, rd, rn, rm, eq);
7409         break;
7410       case NEON_FCMGE:
7411         fcmp(vf, rd, rn, rm, ge);
7412         break;
7413       case NEON_FCMGT:
7414         fcmp(vf, rd, rn, rm, gt);
7415         break;
7416       case NEON_FRECPS:
7417         frecps(vf, rd, rn, rm);
7418         break;
7419       case NEON_FRSQRTS:
7420         frsqrts(vf, rd, rn, rm);
7421         break;
7422       case NEON_FABD:
7423         fabd(vf, rd, rn, rm);
7424         break;
7425       case NEON_FADDP:
7426         faddp(vf, rd, rn, rm);
7427         break;
7428       case NEON_FMAXP:
7429         fmaxp(vf, rd, rn, rm);
7430         break;
7431       case NEON_FMAXNMP:
7432         fmaxnmp(vf, rd, rn, rm);
7433         break;
7434       case NEON_FMINP:
7435         fminp(vf, rd, rn, rm);
7436         break;
7437       case NEON_FMINNMP:
7438         fminnmp(vf, rd, rn, rm);
7439         break;
7440       default:
7441         // FMLAL{2} and FMLSL{2} have special-case encodings.
7442         switch (instr->Mask(NEON3SameFHMMask)) {
7443           case NEON_FMLAL:
7444             fmlal(vf, rd, rn, rm);
7445             break;
7446           case NEON_FMLAL2:
7447             fmlal2(vf, rd, rn, rm);
7448             break;
7449           case NEON_FMLSL:
7450             fmlsl(vf, rd, rn, rm);
7451             break;
7452           case NEON_FMLSL2:
7453             fmlsl2(vf, rd, rn, rm);
7454             break;
7455           default:
7456             VIXL_UNIMPLEMENTED();
7457         }
7458     }
7459   } else {
7460     VectorFormat vf = nfd.GetVectorFormat();
7461     switch (instr->Mask(NEON3SameMask)) {
7462       case NEON_ADD:
7463         add(vf, rd, rn, rm);
7464         break;
7465       case NEON_ADDP:
7466         addp(vf, rd, rn, rm);
7467         break;
7468       case NEON_CMEQ:
7469         cmp(vf, rd, rn, rm, eq);
7470         break;
7471       case NEON_CMGE:
7472         cmp(vf, rd, rn, rm, ge);
7473         break;
7474       case NEON_CMGT:
7475         cmp(vf, rd, rn, rm, gt);
7476         break;
7477       case NEON_CMHI:
7478         cmp(vf, rd, rn, rm, hi);
7479         break;
7480       case NEON_CMHS:
7481         cmp(vf, rd, rn, rm, hs);
7482         break;
7483       case NEON_CMTST:
7484         cmptst(vf, rd, rn, rm);
7485         break;
7486       case NEON_MLS:
7487         mls(vf, rd, rd, rn, rm);
7488         break;
7489       case NEON_MLA:
7490         mla(vf, rd, rd, rn, rm);
7491         break;
7492       case NEON_MUL:
7493         mul(vf, rd, rn, rm);
7494         break;
7495       case NEON_PMUL:
7496         pmul(vf, rd, rn, rm);
7497         break;
7498       case NEON_SMAX:
7499         smax(vf, rd, rn, rm);
7500         break;
7501       case NEON_SMAXP:
7502         smaxp(vf, rd, rn, rm);
7503         break;
7504       case NEON_SMIN:
7505         smin(vf, rd, rn, rm);
7506         break;
7507       case NEON_SMINP:
7508         sminp(vf, rd, rn, rm);
7509         break;
7510       case NEON_SUB:
7511         sub(vf, rd, rn, rm);
7512         break;
7513       case NEON_UMAX:
7514         umax(vf, rd, rn, rm);
7515         break;
7516       case NEON_UMAXP:
7517         umaxp(vf, rd, rn, rm);
7518         break;
7519       case NEON_UMIN:
7520         umin(vf, rd, rn, rm);
7521         break;
7522       case NEON_UMINP:
7523         uminp(vf, rd, rn, rm);
7524         break;
7525       case NEON_SSHL:
7526         sshl(vf, rd, rn, rm);
7527         break;
7528       case NEON_USHL:
7529         ushl(vf, rd, rn, rm);
7530         break;
7531       case NEON_SABD:
7532         absdiff(vf, rd, rn, rm, true);
7533         break;
7534       case NEON_UABD:
7535         absdiff(vf, rd, rn, rm, false);
7536         break;
7537       case NEON_SABA:
7538         saba(vf, rd, rn, rm);
7539         break;
7540       case NEON_UABA:
7541         uaba(vf, rd, rn, rm);
7542         break;
7543       case NEON_UQADD:
7544         add(vf, rd, rn, rm).UnsignedSaturate(vf);
7545         break;
7546       case NEON_SQADD:
7547         add(vf, rd, rn, rm).SignedSaturate(vf);
7548         break;
7549       case NEON_UQSUB:
7550         sub(vf, rd, rn, rm).UnsignedSaturate(vf);
7551         break;
7552       case NEON_SQSUB:
7553         sub(vf, rd, rn, rm).SignedSaturate(vf);
7554         break;
7555       case NEON_SQDMULH:
7556         sqdmulh(vf, rd, rn, rm);
7557         break;
7558       case NEON_SQRDMULH:
7559         sqrdmulh(vf, rd, rn, rm);
7560         break;
7561       case NEON_UQSHL:
7562         ushl(vf, rd, rn, rm).UnsignedSaturate(vf);
7563         break;
7564       case NEON_SQSHL:
7565         sshl(vf, rd, rn, rm).SignedSaturate(vf);
7566         break;
7567       case NEON_URSHL:
7568         ushl(vf, rd, rn, rm).Round(vf);
7569         break;
7570       case NEON_SRSHL:
7571         sshl(vf, rd, rn, rm).Round(vf);
7572         break;
7573       case NEON_UQRSHL:
7574         ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
7575         break;
7576       case NEON_SQRSHL:
7577         sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
7578         break;
7579       case NEON_UHADD:
7580         add(vf, rd, rn, rm).Uhalve(vf);
7581         break;
7582       case NEON_URHADD:
7583         add(vf, rd, rn, rm).Uhalve(vf).Round(vf);
7584         break;
7585       case NEON_SHADD:
7586         add(vf, rd, rn, rm).Halve(vf);
7587         break;
7588       case NEON_SRHADD:
7589         add(vf, rd, rn, rm).Halve(vf).Round(vf);
7590         break;
7591       case NEON_UHSUB:
7592         sub(vf, rd, rn, rm).Uhalve(vf);
7593         break;
7594       case NEON_SHSUB:
7595         sub(vf, rd, rn, rm).Halve(vf);
7596         break;
7597       default:
7598         VIXL_UNIMPLEMENTED();
7599     }
7600   }
7601 }
7602 
7603 
7604 void Simulator::VisitNEON3SameFP16(const Instruction* instr) {
7605   NEONFormatDecoder nfd(instr);
7606   SimVRegister& rd = ReadVRegister(instr->GetRd());
7607   SimVRegister& rn = ReadVRegister(instr->GetRn());
7608   SimVRegister& rm = ReadVRegister(instr->GetRm());
7609 
7610   VectorFormat vf = nfd.GetVectorFormat(nfd.FP16FormatMap());
7611   switch (instr->Mask(NEON3SameFP16Mask)) {
7612 #define SIM_FUNC(A, B) \
7613   case NEON_##A##_H:   \
7614     B(vf, rd, rn, rm); \
7615     break;
7616     SIM_FUNC(FMAXNM, fmaxnm);
7617     SIM_FUNC(FADD, fadd);
7618     SIM_FUNC(FMULX, fmulx);
7619     SIM_FUNC(FMAX, fmax);
7620     SIM_FUNC(FRECPS, frecps);
7621     SIM_FUNC(FMINNM, fminnm);
7622     SIM_FUNC(FSUB, fsub);
7623     SIM_FUNC(FMIN, fmin);
7624     SIM_FUNC(FRSQRTS, frsqrts);
7625     SIM_FUNC(FMAXNMP, fmaxnmp);
7626     SIM_FUNC(FADDP, faddp);
7627     SIM_FUNC(FMUL, fmul);
7628     SIM_FUNC(FMAXP, fmaxp);
7629     SIM_FUNC(FDIV, fdiv);
7630     SIM_FUNC(FMINNMP, fminnmp);
7631     SIM_FUNC(FABD, fabd);
7632     SIM_FUNC(FMINP, fminp);
7633 #undef SIM_FUNC
7634     case NEON_FMLA_H:
7635       fmla(vf, rd, rd, rn, rm);
7636       break;
7637     case NEON_FMLS_H:
7638       fmls(vf, rd, rd, rn, rm);
7639       break;
7640     case NEON_FCMEQ_H:
7641       fcmp(vf, rd, rn, rm, eq);
7642       break;
7643     case NEON_FCMGE_H:
7644       fcmp(vf, rd, rn, rm, ge);
7645       break;
7646     case NEON_FACGE_H:
7647       fabscmp(vf, rd, rn, rm, ge);
7648       break;
7649     case NEON_FCMGT_H:
7650       fcmp(vf, rd, rn, rm, gt);
7651       break;
7652     case NEON_FACGT_H:
7653       fabscmp(vf, rd, rn, rm, gt);
7654       break;
7655     default:
7656       VIXL_UNIMPLEMENTED();
7657       break;
7658   }
7659 }
7660 
7661 void Simulator::VisitNEON3SameExtra(const Instruction* instr) {
7662   NEONFormatDecoder nfd(instr);
7663   SimVRegister& rd = ReadVRegister(instr->GetRd());
7664   SimVRegister& rn = ReadVRegister(instr->GetRn());
7665   SimVRegister& rm = ReadVRegister(instr->GetRm());
7666   int rot = 0;
7667   VectorFormat vf = nfd.GetVectorFormat();
7668 
7669   switch (form_hash_) {
7670     case "fcmla_asimdsame2_c"_h:
7671       rot = instr->GetImmRotFcmlaVec();
7672       fcmla(vf, rd, rn, rm, rd, rot);
7673       break;
7674     case "fcadd_asimdsame2_c"_h:
7675       rot = instr->GetImmRotFcadd();
7676       fcadd(vf, rd, rn, rm, rot);
7677       break;
7678     case "sdot_asimdsame2_d"_h:
7679       sdot(vf, rd, rn, rm);
7680       break;
7681     case "udot_asimdsame2_d"_h:
7682       udot(vf, rd, rn, rm);
7683       break;
7684     case "usdot_asimdsame2_d"_h:
7685       usdot(vf, rd, rn, rm);
7686       break;
7687     case "sqrdmlah_asimdsame2_only"_h:
7688       sqrdmlah(vf, rd, rn, rm);
7689       break;
7690     case "sqrdmlsh_asimdsame2_only"_h:
7691       sqrdmlsh(vf, rd, rn, rm);
7692       break;
7693   }
7694 }
7695 
7696 
7697 void Simulator::VisitNEON3Different(const Instruction* instr) {
7698   NEONFormatDecoder nfd(instr);
7699   VectorFormat vf = nfd.GetVectorFormat();
7700   VectorFormat vf_l = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
7701 
7702   SimVRegister& rd = ReadVRegister(instr->GetRd());
7703   SimVRegister& rn = ReadVRegister(instr->GetRn());
7704   SimVRegister& rm = ReadVRegister(instr->GetRm());
7705 
7706   switch (instr->Mask(NEON3DifferentMask)) {
7707     case NEON_PMULL:
7708       pmull(vf_l, rd, rn, rm);
7709       break;
7710     case NEON_PMULL2:
7711       pmull2(vf_l, rd, rn, rm);
7712       break;
7713     case NEON_UADDL:
7714       uaddl(vf_l, rd, rn, rm);
7715       break;
7716     case NEON_UADDL2:
7717       uaddl2(vf_l, rd, rn, rm);
7718       break;
7719     case NEON_SADDL:
7720       saddl(vf_l, rd, rn, rm);
7721       break;
7722     case NEON_SADDL2:
7723       saddl2(vf_l, rd, rn, rm);
7724       break;
7725     case NEON_USUBL:
7726       usubl(vf_l, rd, rn, rm);
7727       break;
7728     case NEON_USUBL2:
7729       usubl2(vf_l, rd, rn, rm);
7730       break;
7731     case NEON_SSUBL:
7732       ssubl(vf_l, rd, rn, rm);
7733       break;
7734     case NEON_SSUBL2:
7735       ssubl2(vf_l, rd, rn, rm);
7736       break;
7737     case NEON_SABAL:
7738       sabal(vf_l, rd, rn, rm);
7739       break;
7740     case NEON_SABAL2:
7741       sabal2(vf_l, rd, rn, rm);
7742       break;
7743     case NEON_UABAL:
7744       uabal(vf_l, rd, rn, rm);
7745       break;
7746     case NEON_UABAL2:
7747       uabal2(vf_l, rd, rn, rm);
7748       break;
7749     case NEON_SABDL:
7750       sabdl(vf_l, rd, rn, rm);
7751       break;
7752     case NEON_SABDL2:
7753       sabdl2(vf_l, rd, rn, rm);
7754       break;
7755     case NEON_UABDL:
7756       uabdl(vf_l, rd, rn, rm);
7757       break;
7758     case NEON_UABDL2:
7759       uabdl2(vf_l, rd, rn, rm);
7760       break;
7761     case NEON_SMLAL:
7762       smlal(vf_l, rd, rn, rm);
7763       break;
7764     case NEON_SMLAL2:
7765       smlal2(vf_l, rd, rn, rm);
7766       break;
7767     case NEON_UMLAL:
7768       umlal(vf_l, rd, rn, rm);
7769       break;
7770     case NEON_UMLAL2:
7771       umlal2(vf_l, rd, rn, rm);
7772       break;
7773     case NEON_SMLSL:
7774       smlsl(vf_l, rd, rn, rm);
7775       break;
7776     case NEON_SMLSL2:
7777       smlsl2(vf_l, rd, rn, rm);
7778       break;
7779     case NEON_UMLSL:
7780       umlsl(vf_l, rd, rn, rm);
7781       break;
7782     case NEON_UMLSL2:
7783       umlsl2(vf_l, rd, rn, rm);
7784       break;
7785     case NEON_SMULL:
7786       smull(vf_l, rd, rn, rm);
7787       break;
7788     case NEON_SMULL2:
7789       smull2(vf_l, rd, rn, rm);
7790       break;
7791     case NEON_UMULL:
7792       umull(vf_l, rd, rn, rm);
7793       break;
7794     case NEON_UMULL2:
7795       umull2(vf_l, rd, rn, rm);
7796       break;
7797     case NEON_SQDMLAL:
7798       sqdmlal(vf_l, rd, rn, rm);
7799       break;
7800     case NEON_SQDMLAL2:
7801       sqdmlal2(vf_l, rd, rn, rm);
7802       break;
7803     case NEON_SQDMLSL:
7804       sqdmlsl(vf_l, rd, rn, rm);
7805       break;
7806     case NEON_SQDMLSL2:
7807       sqdmlsl2(vf_l, rd, rn, rm);
7808       break;
7809     case NEON_SQDMULL:
7810       sqdmull(vf_l, rd, rn, rm);
7811       break;
7812     case NEON_SQDMULL2:
7813       sqdmull2(vf_l, rd, rn, rm);
7814       break;
7815     case NEON_UADDW:
7816       uaddw(vf_l, rd, rn, rm);
7817       break;
7818     case NEON_UADDW2:
7819       uaddw2(vf_l, rd, rn, rm);
7820       break;
7821     case NEON_SADDW:
7822       saddw(vf_l, rd, rn, rm);
7823       break;
7824     case NEON_SADDW2:
7825       saddw2(vf_l, rd, rn, rm);
7826       break;
7827     case NEON_USUBW:
7828       usubw(vf_l, rd, rn, rm);
7829       break;
7830     case NEON_USUBW2:
7831       usubw2(vf_l, rd, rn, rm);
7832       break;
7833     case NEON_SSUBW:
7834       ssubw(vf_l, rd, rn, rm);
7835       break;
7836     case NEON_SSUBW2:
7837       ssubw2(vf_l, rd, rn, rm);
7838       break;
7839     case NEON_ADDHN:
7840       addhn(vf, rd, rn, rm);
7841       break;
7842     case NEON_ADDHN2:
7843       addhn2(vf, rd, rn, rm);
7844       break;
7845     case NEON_RADDHN:
7846       raddhn(vf, rd, rn, rm);
7847       break;
7848     case NEON_RADDHN2:
7849       raddhn2(vf, rd, rn, rm);
7850       break;
7851     case NEON_SUBHN:
7852       subhn(vf, rd, rn, rm);
7853       break;
7854     case NEON_SUBHN2:
7855       subhn2(vf, rd, rn, rm);
7856       break;
7857     case NEON_RSUBHN:
7858       rsubhn(vf, rd, rn, rm);
7859       break;
7860     case NEON_RSUBHN2:
7861       rsubhn2(vf, rd, rn, rm);
7862       break;
7863     default:
7864       VIXL_UNIMPLEMENTED();
7865   }
7866 }
7867 
7868 
7869 void Simulator::VisitNEONAcrossLanes(const Instruction* instr) {
7870   NEONFormatDecoder nfd(instr);
7871 
7872   static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
7873 
7874   SimVRegister& rd = ReadVRegister(instr->GetRd());
7875   SimVRegister& rn = ReadVRegister(instr->GetRn());
7876 
7877   if (instr->Mask(NEONAcrossLanesFP16FMask) == NEONAcrossLanesFP16Fixed) {
7878     VectorFormat vf = nfd.GetVectorFormat(&map_half);
7879     switch (instr->Mask(NEONAcrossLanesFP16Mask)) {
7880       case NEON_FMAXV_H:
7881         fmaxv(vf, rd, rn);
7882         break;
7883       case NEON_FMINV_H:
7884         fminv(vf, rd, rn);
7885         break;
7886       case NEON_FMAXNMV_H:
7887         fmaxnmv(vf, rd, rn);
7888         break;
7889       case NEON_FMINNMV_H:
7890         fminnmv(vf, rd, rn);
7891         break;
7892       default:
7893         VIXL_UNIMPLEMENTED();
7894     }
7895   } else if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
7896     // The input operand's VectorFormat is passed for these instructions.
7897     VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
7898 
7899     switch (instr->Mask(NEONAcrossLanesFPMask)) {
7900       case NEON_FMAXV:
7901         fmaxv(vf, rd, rn);
7902         break;
7903       case NEON_FMINV:
7904         fminv(vf, rd, rn);
7905         break;
7906       case NEON_FMAXNMV:
7907         fmaxnmv(vf, rd, rn);
7908         break;
7909       case NEON_FMINNMV:
7910         fminnmv(vf, rd, rn);
7911         break;
7912       default:
7913         VIXL_UNIMPLEMENTED();
7914     }
7915   } else {
7916     VectorFormat vf = nfd.GetVectorFormat();
7917 
7918     switch (instr->Mask(NEONAcrossLanesMask)) {
7919       case NEON_ADDV:
7920         addv(vf, rd, rn);
7921         break;
7922       case NEON_SMAXV:
7923         smaxv(vf, rd, rn);
7924         break;
7925       case NEON_SMINV:
7926         sminv(vf, rd, rn);
7927         break;
7928       case NEON_UMAXV:
7929         umaxv(vf, rd, rn);
7930         break;
7931       case NEON_UMINV:
7932         uminv(vf, rd, rn);
7933         break;
7934       case NEON_SADDLV:
7935         saddlv(vf, rd, rn);
7936         break;
7937       case NEON_UADDLV:
7938         uaddlv(vf, rd, rn);
7939         break;
7940       default:
7941         VIXL_UNIMPLEMENTED();
7942     }
7943   }
7944 }
7945 
7946 void Simulator::SimulateNEONMulByElementLong(const Instruction* instr) {
7947   NEONFormatDecoder nfd(instr);
7948   VectorFormat vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
7949 
7950   SimVRegister& rd = ReadVRegister(instr->GetRd());
7951   SimVRegister& rn = ReadVRegister(instr->GetRn());
7952 
7953   int rm_reg = instr->GetRm();
7954   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
7955   if (instr->GetNEONSize() == 1) {
7956     rm_reg = instr->GetRmLow16();
7957     index = (index << 1) | instr->GetNEONM();
7958   }
7959   SimVRegister& rm = ReadVRegister(rm_reg);
7960 
7961   SimVRegister temp;
7962   VectorFormat indexform =
7963       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vf));
7964   dup_element(indexform, temp, rm, index);
7965 
7966   bool is_2 = instr->Mask(NEON_Q) ? true : false;
7967 
7968   switch (form_hash_) {
7969     case "smull_asimdelem_l"_h:
7970       smull(vf, rd, rn, temp, is_2);
7971       break;
7972     case "umull_asimdelem_l"_h:
7973       umull(vf, rd, rn, temp, is_2);
7974       break;
7975     case "smlal_asimdelem_l"_h:
7976       smlal(vf, rd, rn, temp, is_2);
7977       break;
7978     case "umlal_asimdelem_l"_h:
7979       umlal(vf, rd, rn, temp, is_2);
7980       break;
7981     case "smlsl_asimdelem_l"_h:
7982       smlsl(vf, rd, rn, temp, is_2);
7983       break;
7984     case "umlsl_asimdelem_l"_h:
7985       umlsl(vf, rd, rn, temp, is_2);
7986       break;
7987     case "sqdmull_asimdelem_l"_h:
7988       sqdmull(vf, rd, rn, temp, is_2);
7989       break;
7990     case "sqdmlal_asimdelem_l"_h:
7991       sqdmlal(vf, rd, rn, temp, is_2);
7992       break;
7993     case "sqdmlsl_asimdelem_l"_h:
7994       sqdmlsl(vf, rd, rn, temp, is_2);
7995       break;
7996     default:
7997       VIXL_UNREACHABLE();
7998   }
7999 }
8000 
8001 void Simulator::SimulateNEONFPMulByElementLong(const Instruction* instr) {
8002   VectorFormat vform = instr->GetNEONQ() ? kFormat4S : kFormat2S;
8003   SimVRegister& rd = ReadVRegister(instr->GetRd());
8004   SimVRegister& rn = ReadVRegister(instr->GetRn());
8005   SimVRegister& rm = ReadVRegister(instr->GetRmLow16());
8006 
8007   int index =
8008       (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
8009 
8010   switch (form_hash_) {
8011     case "fmlal_asimdelem_lh"_h:
8012       fmlal(vform, rd, rn, rm, index);
8013       break;
8014     case "fmlal2_asimdelem_lh"_h:
8015       fmlal2(vform, rd, rn, rm, index);
8016       break;
8017     case "fmlsl_asimdelem_lh"_h:
8018       fmlsl(vform, rd, rn, rm, index);
8019       break;
8020     case "fmlsl2_asimdelem_lh"_h:
8021       fmlsl2(vform, rd, rn, rm, index);
8022       break;
8023     default:
8024       VIXL_UNREACHABLE();
8025   }
8026 }
8027 
8028 void Simulator::SimulateNEONFPMulByElement(const Instruction* instr) {
8029   NEONFormatDecoder nfd(instr);
8030   static const NEONFormatMap map =
8031       {{23, 22, 30},
8032        {NF_4H, NF_8H, NF_UNDEF, NF_UNDEF, NF_2S, NF_4S, NF_UNDEF, NF_2D}};
8033   VectorFormat vform = nfd.GetVectorFormat(&map);
8034 
8035   SimVRegister& rd = ReadVRegister(instr->GetRd());
8036   SimVRegister& rn = ReadVRegister(instr->GetRn());
8037 
8038   int rm_reg = instr->GetRm();
8039   int index =
8040       (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
8041 
8042   if ((vform == kFormat4H) || (vform == kFormat8H)) {
8043     rm_reg &= 0xf;
8044   } else if ((vform == kFormat2S) || (vform == kFormat4S)) {
8045     index >>= 1;
8046   } else {
8047     VIXL_ASSERT(vform == kFormat2D);
8048     VIXL_ASSERT(instr->GetNEONL() == 0);
8049     index >>= 2;
8050   }
8051 
8052   SimVRegister& rm = ReadVRegister(rm_reg);
8053 
8054   switch (form_hash_) {
8055     case "fmul_asimdelem_rh_h"_h:
8056     case "fmul_asimdelem_r_sd"_h:
8057       fmul(vform, rd, rn, rm, index);
8058       break;
8059     case "fmla_asimdelem_rh_h"_h:
8060     case "fmla_asimdelem_r_sd"_h:
8061       fmla(vform, rd, rn, rm, index);
8062       break;
8063     case "fmls_asimdelem_rh_h"_h:
8064     case "fmls_asimdelem_r_sd"_h:
8065       fmls(vform, rd, rn, rm, index);
8066       break;
8067     case "fmulx_asimdelem_rh_h"_h:
8068     case "fmulx_asimdelem_r_sd"_h:
8069       fmulx(vform, rd, rn, rm, index);
8070       break;
8071     default:
8072       VIXL_UNREACHABLE();
8073   }
8074 }
8075 
8076 void Simulator::SimulateNEONComplexMulByElement(const Instruction* instr) {
8077   VectorFormat vform = instr->GetNEONQ() ? kFormat8H : kFormat4H;
8078   SimVRegister& rd = ReadVRegister(instr->GetRd());
8079   SimVRegister& rn = ReadVRegister(instr->GetRn());
8080   SimVRegister& rm = ReadVRegister(instr->GetRm());
8081   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
8082 
8083   switch (form_hash_) {
8084     case "fcmla_asimdelem_c_s"_h:
8085       vform = kFormat4S;
8086       index >>= 1;
8087       VIXL_FALLTHROUGH();
8088     case "fcmla_asimdelem_c_h"_h:
8089       fcmla(vform, rd, rn, rm, index, instr->GetImmRotFcmlaSca());
8090       break;
8091     default:
8092       VIXL_UNREACHABLE();
8093   }
8094 }
8095 
8096 void Simulator::SimulateNEONDotProdByElement(const Instruction* instr) {
8097   VectorFormat vform = instr->GetNEONQ() ? kFormat4S : kFormat2S;
8098 
8099   SimVRegister& rd = ReadVRegister(instr->GetRd());
8100   SimVRegister& rn = ReadVRegister(instr->GetRn());
8101   SimVRegister& rm = ReadVRegister(instr->GetRm());
8102   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
8103 
8104   SimVRegister temp;
8105   // NEON indexed `dot` allows the index value exceed the register size.
8106   // Promote the format to Q-sized vector format before the duplication.
8107   dup_elements_to_segments(VectorFormatFillQ(vform), temp, rm, index);
8108 
8109   switch (form_hash_) {
8110     case "sdot_asimdelem_d"_h:
8111       sdot(vform, rd, rn, temp);
8112       break;
8113     case "udot_asimdelem_d"_h:
8114       udot(vform, rd, rn, temp);
8115       break;
8116     case "sudot_asimdelem_d"_h:
8117       usdot(vform, rd, temp, rn);
8118       break;
8119     case "usdot_asimdelem_d"_h:
8120       usdot(vform, rd, rn, temp);
8121       break;
8122   }
8123 }
8124 
8125 void Simulator::VisitNEONByIndexedElement(const Instruction* instr) {
8126   NEONFormatDecoder nfd(instr);
8127   VectorFormat vform = nfd.GetVectorFormat();
8128 
8129   SimVRegister& rd = ReadVRegister(instr->GetRd());
8130   SimVRegister& rn = ReadVRegister(instr->GetRn());
8131 
8132   int rm_reg = instr->GetRm();
8133   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
8134 
8135   if ((vform == kFormat4H) || (vform == kFormat8H)) {
8136     rm_reg &= 0xf;
8137     index = (index << 1) | instr->GetNEONM();
8138   }
8139 
8140   SimVRegister& rm = ReadVRegister(rm_reg);
8141 
8142   switch (form_hash_) {
8143     case "mul_asimdelem_r"_h:
8144       mul(vform, rd, rn, rm, index);
8145       break;
8146     case "mla_asimdelem_r"_h:
8147       mla(vform, rd, rn, rm, index);
8148       break;
8149     case "mls_asimdelem_r"_h:
8150       mls(vform, rd, rn, rm, index);
8151       break;
8152     case "sqdmulh_asimdelem_r"_h:
8153       sqdmulh(vform, rd, rn, rm, index);
8154       break;
8155     case "sqrdmulh_asimdelem_r"_h:
8156       sqrdmulh(vform, rd, rn, rm, index);
8157       break;
8158     case "sqrdmlah_asimdelem_r"_h:
8159       sqrdmlah(vform, rd, rn, rm, index);
8160       break;
8161     case "sqrdmlsh_asimdelem_r"_h:
8162       sqrdmlsh(vform, rd, rn, rm, index);
8163       break;
8164   }
8165 }
8166 
8167 
8168 void Simulator::VisitNEONCopy(const Instruction* instr) {
8169   NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularFormatMap());
8170   VectorFormat vf = nfd.GetVectorFormat();
8171 
8172   SimVRegister& rd = ReadVRegister(instr->GetRd());
8173   SimVRegister& rn = ReadVRegister(instr->GetRn());
8174   int imm5 = instr->GetImmNEON5();
8175   int tz = CountTrailingZeros(imm5, 32);
8176   int reg_index = ExtractSignedBitfield32(31, tz + 1, imm5);
8177 
8178   if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) {
8179     int imm4 = instr->GetImmNEON4();
8180     int rn_index = ExtractSignedBitfield32(31, tz, imm4);
8181     ins_element(vf, rd, reg_index, rn, rn_index);
8182   } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) {
8183     ins_immediate(vf, rd, reg_index, ReadXRegister(instr->GetRn()));
8184   } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) {
8185     uint64_t value = LogicVRegister(rn).Uint(vf, reg_index);
8186     value &= MaxUintFromFormat(vf);
8187     WriteXRegister(instr->GetRd(), value);
8188   } else if (instr->Mask(NEONCopyUmovMask) == NEON_SMOV) {
8189     int64_t value = LogicVRegister(rn).Int(vf, reg_index);
8190     if (instr->GetNEONQ()) {
8191       WriteXRegister(instr->GetRd(), value);
8192     } else {
8193       WriteWRegister(instr->GetRd(), (int32_t)value);
8194     }
8195   } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) {
8196     dup_element(vf, rd, rn, reg_index);
8197   } else if (instr->Mask(NEONCopyDupGeneralMask) == NEON_DUP_GENERAL) {
8198     dup_immediate(vf, rd, ReadXRegister(instr->GetRn()));
8199   } else {
8200     VIXL_UNIMPLEMENTED();
8201   }
8202 }
8203 
8204 
8205 void Simulator::VisitNEONExtract(const Instruction* instr) {
8206   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
8207   VectorFormat vf = nfd.GetVectorFormat();
8208   SimVRegister& rd = ReadVRegister(instr->GetRd());
8209   SimVRegister& rn = ReadVRegister(instr->GetRn());
8210   SimVRegister& rm = ReadVRegister(instr->GetRm());
8211   if (instr->Mask(NEONExtractMask) == NEON_EXT) {
8212     int index = instr->GetImmNEONExt();
8213     ext(vf, rd, rn, rm, index);
8214   } else {
8215     VIXL_UNIMPLEMENTED();
8216   }
8217 }
8218 
8219 
8220 void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
8221                                                AddrMode addr_mode) {
8222   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
8223   VectorFormat vf = nfd.GetVectorFormat();
8224 
8225   uint64_t addr_base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
8226   int reg_size = RegisterSizeInBytesFromFormat(vf);
8227 
8228   int reg[4];
8229   uint64_t addr[4];
8230   for (int i = 0; i < 4; i++) {
8231     reg[i] = (instr->GetRt() + i) % kNumberOfVRegisters;
8232     addr[i] = addr_base + (i * reg_size);
8233   }
8234   int struct_parts = 1;
8235   int reg_count = 1;
8236   bool log_read = true;
8237 
8238   // Bit 23 determines whether this is an offset or post-index addressing mode.
8239   // In offset mode, bits 20 to 16 should be zero; these bits encode the
8240   // register or immediate in post-index mode.
8241   if ((instr->ExtractBit(23) == 0) && (instr->ExtractBits(20, 16) != 0)) {
8242     VIXL_UNREACHABLE();
8243   }
8244 
8245   // We use the PostIndex mask here, as it works in this case for both Offset
8246   // and PostIndex addressing.
8247   switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) {
8248     case NEON_LD1_4v:
8249     case NEON_LD1_4v_post:
8250       ld1(vf, ReadVRegister(reg[3]), addr[3]);
8251       reg_count++;
8252       VIXL_FALLTHROUGH();
8253     case NEON_LD1_3v:
8254     case NEON_LD1_3v_post:
8255       ld1(vf, ReadVRegister(reg[2]), addr[2]);
8256       reg_count++;
8257       VIXL_FALLTHROUGH();
8258     case NEON_LD1_2v:
8259     case NEON_LD1_2v_post:
8260       ld1(vf, ReadVRegister(reg[1]), addr[1]);
8261       reg_count++;
8262       VIXL_FALLTHROUGH();
8263     case NEON_LD1_1v:
8264     case NEON_LD1_1v_post:
8265       ld1(vf, ReadVRegister(reg[0]), addr[0]);
8266       break;
8267     case NEON_ST1_4v:
8268     case NEON_ST1_4v_post:
8269       st1(vf, ReadVRegister(reg[3]), addr[3]);
8270       reg_count++;
8271       VIXL_FALLTHROUGH();
8272     case NEON_ST1_3v:
8273     case NEON_ST1_3v_post:
8274       st1(vf, ReadVRegister(reg[2]), addr[2]);
8275       reg_count++;
8276       VIXL_FALLTHROUGH();
8277     case NEON_ST1_2v:
8278     case NEON_ST1_2v_post:
8279       st1(vf, ReadVRegister(reg[1]), addr[1]);
8280       reg_count++;
8281       VIXL_FALLTHROUGH();
8282     case NEON_ST1_1v:
8283     case NEON_ST1_1v_post:
8284       st1(vf, ReadVRegister(reg[0]), addr[0]);
8285       log_read = false;
8286       break;
8287     case NEON_LD2_post:
8288     case NEON_LD2:
8289       ld2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]);
8290       struct_parts = 2;
8291       reg_count = 2;
8292       break;
8293     case NEON_ST2:
8294     case NEON_ST2_post:
8295       st2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]);
8296       struct_parts = 2;
8297       reg_count = 2;
8298       log_read = false;
8299       break;
8300     case NEON_LD3_post:
8301     case NEON_LD3:
8302       ld3(vf,
8303           ReadVRegister(reg[0]),
8304           ReadVRegister(reg[1]),
8305           ReadVRegister(reg[2]),
8306           addr[0]);
8307       struct_parts = 3;
8308       reg_count = 3;
8309       break;
8310     case NEON_ST3:
8311     case NEON_ST3_post:
8312       st3(vf,
8313           ReadVRegister(reg[0]),
8314           ReadVRegister(reg[1]),
8315           ReadVRegister(reg[2]),
8316           addr[0]);
8317       struct_parts = 3;
8318       reg_count = 3;
8319       log_read = false;
8320       break;
8321     case NEON_ST4:
8322     case NEON_ST4_post:
8323       st4(vf,
8324           ReadVRegister(reg[0]),
8325           ReadVRegister(reg[1]),
8326           ReadVRegister(reg[2]),
8327           ReadVRegister(reg[3]),
8328           addr[0]);
8329       struct_parts = 4;
8330       reg_count = 4;
8331       log_read = false;
8332       break;
8333     case NEON_LD4_post:
8334     case NEON_LD4:
8335       ld4(vf,
8336           ReadVRegister(reg[0]),
8337           ReadVRegister(reg[1]),
8338           ReadVRegister(reg[2]),
8339           ReadVRegister(reg[3]),
8340           addr[0]);
8341       struct_parts = 4;
8342       reg_count = 4;
8343       break;
8344     default:
8345       VIXL_UNIMPLEMENTED();
8346   }
8347 
8348   bool do_trace = log_read ? ShouldTraceVRegs() : ShouldTraceWrites();
8349   if (do_trace) {
8350     PrintRegisterFormat print_format =
8351         GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
8352     const char* op;
8353     if (log_read) {
8354       op = "<-";
8355     } else {
8356       op = "->";
8357       // Stores don't represent a change to the source register's value, so only
8358       // print the relevant part of the value.
8359       print_format = GetPrintRegPartial(print_format);
8360     }
8361 
8362     VIXL_ASSERT((struct_parts == reg_count) || (struct_parts == 1));
8363     for (int s = reg_count - struct_parts; s >= 0; s -= struct_parts) {
8364       uintptr_t address = addr_base + (s * RegisterSizeInBytesFromFormat(vf));
8365       PrintVStructAccess(reg[s], struct_parts, print_format, op, address);
8366     }
8367   }
8368 
8369   if (addr_mode == PostIndex) {
8370     int rm = instr->GetRm();
8371     // The immediate post index addressing mode is indicated by rm = 31.
8372     // The immediate is implied by the number of vector registers used.
8373     addr_base += (rm == 31) ? (RegisterSizeInBytesFromFormat(vf) * reg_count)
8374                             : ReadXRegister(rm);
8375     WriteXRegister(instr->GetRn(),
8376                    addr_base,
8377                    LogRegWrites,
8378                    Reg31IsStackPointer);
8379   } else {
8380     VIXL_ASSERT(addr_mode == Offset);
8381   }
8382 }
8383 
8384 
8385 void Simulator::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
8386   NEONLoadStoreMultiStructHelper(instr, Offset);
8387 }
8388 
8389 
8390 void Simulator::VisitNEONLoadStoreMultiStructPostIndex(
8391     const Instruction* instr) {
8392   NEONLoadStoreMultiStructHelper(instr, PostIndex);
8393 }
8394 
8395 
8396 void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
8397                                                 AddrMode addr_mode) {
8398   uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
8399   int rt = instr->GetRt();
8400 
8401   // Bit 23 determines whether this is an offset or post-index addressing mode.
8402   // In offset mode, bits 20 to 16 should be zero; these bits encode the
8403   // register or immediate in post-index mode.
8404   if ((instr->ExtractBit(23) == 0) && (instr->ExtractBits(20, 16) != 0)) {
8405     VIXL_UNREACHABLE();
8406   }
8407 
8408   // We use the PostIndex mask here, as it works in this case for both Offset
8409   // and PostIndex addressing.
8410   bool do_load = false;
8411 
8412   bool replicating = false;
8413 
8414   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
8415   VectorFormat vf_t = nfd.GetVectorFormat();
8416 
8417   VectorFormat vf = kFormat16B;
8418   switch (instr->Mask(NEONLoadStoreSingleStructPostIndexMask)) {
8419     case NEON_LD1_b:
8420     case NEON_LD1_b_post:
8421     case NEON_LD2_b:
8422     case NEON_LD2_b_post:
8423     case NEON_LD3_b:
8424     case NEON_LD3_b_post:
8425     case NEON_LD4_b:
8426     case NEON_LD4_b_post:
8427       do_load = true;
8428       VIXL_FALLTHROUGH();
8429     case NEON_ST1_b:
8430     case NEON_ST1_b_post:
8431     case NEON_ST2_b:
8432     case NEON_ST2_b_post:
8433     case NEON_ST3_b:
8434     case NEON_ST3_b_post:
8435     case NEON_ST4_b:
8436     case NEON_ST4_b_post:
8437       break;
8438 
8439     case NEON_LD1_h:
8440     case NEON_LD1_h_post:
8441     case NEON_LD2_h:
8442     case NEON_LD2_h_post:
8443     case NEON_LD3_h:
8444     case NEON_LD3_h_post:
8445     case NEON_LD4_h:
8446     case NEON_LD4_h_post:
8447       do_load = true;
8448       VIXL_FALLTHROUGH();
8449     case NEON_ST1_h:
8450     case NEON_ST1_h_post:
8451     case NEON_ST2_h:
8452     case NEON_ST2_h_post:
8453     case NEON_ST3_h:
8454     case NEON_ST3_h_post:
8455     case NEON_ST4_h:
8456     case NEON_ST4_h_post:
8457       vf = kFormat8H;
8458       break;
8459     case NEON_LD1_s:
8460     case NEON_LD1_s_post:
8461     case NEON_LD2_s:
8462     case NEON_LD2_s_post:
8463     case NEON_LD3_s:
8464     case NEON_LD3_s_post:
8465     case NEON_LD4_s:
8466     case NEON_LD4_s_post:
8467       do_load = true;
8468       VIXL_FALLTHROUGH();
8469     case NEON_ST1_s:
8470     case NEON_ST1_s_post:
8471     case NEON_ST2_s:
8472     case NEON_ST2_s_post:
8473     case NEON_ST3_s:
8474     case NEON_ST3_s_post:
8475     case NEON_ST4_s:
8476     case NEON_ST4_s_post: {
8477       VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d);
8478       VIXL_STATIC_ASSERT((NEON_LD1_s_post | (1 << NEONLSSize_offset)) ==
8479                          NEON_LD1_d_post);
8480       VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d);
8481       VIXL_STATIC_ASSERT((NEON_ST1_s_post | (1 << NEONLSSize_offset)) ==
8482                          NEON_ST1_d_post);
8483       vf = ((instr->GetNEONLSSize() & 1) == 0) ? kFormat4S : kFormat2D;
8484       break;
8485     }
8486 
8487     case NEON_LD1R:
8488     case NEON_LD1R_post:
8489     case NEON_LD2R:
8490     case NEON_LD2R_post:
8491     case NEON_LD3R:
8492     case NEON_LD3R_post:
8493     case NEON_LD4R:
8494     case NEON_LD4R_post:
8495       vf = vf_t;
8496       do_load = true;
8497       replicating = true;
8498       break;
8499 
8500     default:
8501       VIXL_UNIMPLEMENTED();
8502   }
8503 
8504   int index_shift = LaneSizeInBytesLog2FromFormat(vf);
8505   int lane = instr->GetNEONLSIndex(index_shift);
8506   int reg_count = 0;
8507   int rt2 = (rt + 1) % kNumberOfVRegisters;
8508   int rt3 = (rt2 + 1) % kNumberOfVRegisters;
8509   int rt4 = (rt3 + 1) % kNumberOfVRegisters;
8510   switch (instr->Mask(NEONLoadStoreSingleLenMask)) {
8511     case NEONLoadStoreSingle1:
8512       reg_count = 1;
8513       if (replicating) {
8514         VIXL_ASSERT(do_load);
8515         ld1r(vf, ReadVRegister(rt), addr);
8516       } else if (do_load) {
8517         ld1(vf, ReadVRegister(rt), lane, addr);
8518       } else {
8519         st1(vf, ReadVRegister(rt), lane, addr);
8520       }
8521       break;
8522     case NEONLoadStoreSingle2:
8523       reg_count = 2;
8524       if (replicating) {
8525         VIXL_ASSERT(do_load);
8526         ld2r(vf, ReadVRegister(rt), ReadVRegister(rt2), addr);
8527       } else if (do_load) {
8528         ld2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr);
8529       } else {
8530         st2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr);
8531       }
8532       break;
8533     case NEONLoadStoreSingle3:
8534       reg_count = 3;
8535       if (replicating) {
8536         VIXL_ASSERT(do_load);
8537         ld3r(vf,
8538              ReadVRegister(rt),
8539              ReadVRegister(rt2),
8540              ReadVRegister(rt3),
8541              addr);
8542       } else if (do_load) {
8543         ld3(vf,
8544             ReadVRegister(rt),
8545             ReadVRegister(rt2),
8546             ReadVRegister(rt3),
8547             lane,
8548             addr);
8549       } else {
8550         st3(vf,
8551             ReadVRegister(rt),
8552             ReadVRegister(rt2),
8553             ReadVRegister(rt3),
8554             lane,
8555             addr);
8556       }
8557       break;
8558     case NEONLoadStoreSingle4:
8559       reg_count = 4;
8560       if (replicating) {
8561         VIXL_ASSERT(do_load);
8562         ld4r(vf,
8563              ReadVRegister(rt),
8564              ReadVRegister(rt2),
8565              ReadVRegister(rt3),
8566              ReadVRegister(rt4),
8567              addr);
8568       } else if (do_load) {
8569         ld4(vf,
8570             ReadVRegister(rt),
8571             ReadVRegister(rt2),
8572             ReadVRegister(rt3),
8573             ReadVRegister(rt4),
8574             lane,
8575             addr);
8576       } else {
8577         st4(vf,
8578             ReadVRegister(rt),
8579             ReadVRegister(rt2),
8580             ReadVRegister(rt3),
8581             ReadVRegister(rt4),
8582             lane,
8583             addr);
8584       }
8585       break;
8586     default:
8587       VIXL_UNIMPLEMENTED();
8588   }
8589 
8590   // Trace registers and/or memory writes.
8591   PrintRegisterFormat print_format =
8592       GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
8593   if (do_load) {
8594     if (ShouldTraceVRegs()) {
8595       if (replicating) {
8596         PrintVReplicatingStructAccess(rt, reg_count, print_format, "<-", addr);
8597       } else {
8598         PrintVSingleStructAccess(rt, reg_count, lane, print_format, "<-", addr);
8599       }
8600     }
8601   } else {
8602     if (ShouldTraceWrites()) {
8603       // Stores don't represent a change to the source register's value, so only
8604       // print the relevant part of the value.
8605       print_format = GetPrintRegPartial(print_format);
8606       PrintVSingleStructAccess(rt, reg_count, lane, print_format, "->", addr);
8607     }
8608   }
8609 
8610   if (addr_mode == PostIndex) {
8611     int rm = instr->GetRm();
8612     int lane_size = LaneSizeInBytesFromFormat(vf);
8613     WriteXRegister(instr->GetRn(),
8614                    addr + ((rm == 31) ? (reg_count * lane_size)
8615                                       : ReadXRegister(rm)),
8616                    LogRegWrites,
8617                    Reg31IsStackPointer);
8618   }
8619 }
8620 
8621 
8622 void Simulator::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
8623   NEONLoadStoreSingleStructHelper(instr, Offset);
8624 }
8625 
8626 
8627 void Simulator::VisitNEONLoadStoreSingleStructPostIndex(
8628     const Instruction* instr) {
8629   NEONLoadStoreSingleStructHelper(instr, PostIndex);
8630 }
8631 
8632 
8633 void Simulator::VisitNEONModifiedImmediate(const Instruction* instr) {
8634   SimVRegister& rd = ReadVRegister(instr->GetRd());
8635   int cmode = instr->GetNEONCmode();
8636   int cmode_3_1 = (cmode >> 1) & 7;
8637   int cmode_3 = (cmode >> 3) & 1;
8638   int cmode_2 = (cmode >> 2) & 1;
8639   int cmode_1 = (cmode >> 1) & 1;
8640   int cmode_0 = cmode & 1;
8641   int half_enc = instr->ExtractBit(11);
8642   int q = instr->GetNEONQ();
8643   int op_bit = instr->GetNEONModImmOp();
8644   uint64_t imm8 = instr->GetImmNEONabcdefgh();
8645   // Find the format and immediate value
8646   uint64_t imm = 0;
8647   VectorFormat vform = kFormatUndefined;
8648   switch (cmode_3_1) {
8649     case 0x0:
8650     case 0x1:
8651     case 0x2:
8652     case 0x3:
8653       vform = (q == 1) ? kFormat4S : kFormat2S;
8654       imm = imm8 << (8 * cmode_3_1);
8655       break;
8656     case 0x4:
8657     case 0x5:
8658       vform = (q == 1) ? kFormat8H : kFormat4H;
8659       imm = imm8 << (8 * cmode_1);
8660       break;
8661     case 0x6:
8662       vform = (q == 1) ? kFormat4S : kFormat2S;
8663       if (cmode_0 == 0) {
8664         imm = imm8 << 8 | 0x000000ff;
8665       } else {
8666         imm = imm8 << 16 | 0x0000ffff;
8667       }
8668       break;
8669     case 0x7:
8670       if (cmode_0 == 0 && op_bit == 0) {
8671         vform = q ? kFormat16B : kFormat8B;
8672         imm = imm8;
8673       } else if (cmode_0 == 0 && op_bit == 1) {
8674         vform = q ? kFormat2D : kFormat1D;
8675         imm = 0;
8676         for (int i = 0; i < 8; ++i) {
8677           if (imm8 & (1 << i)) {
8678             imm |= (UINT64_C(0xff) << (8 * i));
8679           }
8680         }
8681       } else {  // cmode_0 == 1, cmode == 0xf.
8682         if (half_enc == 1) {
8683           vform = q ? kFormat8H : kFormat4H;
8684           imm = Float16ToRawbits(instr->GetImmNEONFP16());
8685         } else if (op_bit == 0) {
8686           vform = q ? kFormat4S : kFormat2S;
8687           imm = FloatToRawbits(instr->GetImmNEONFP32());
8688         } else if (q == 1) {
8689           vform = kFormat2D;
8690           imm = DoubleToRawbits(instr->GetImmNEONFP64());
8691         } else {
8692           VIXL_ASSERT((q == 0) && (op_bit == 1) && (cmode == 0xf));
8693           VisitUnallocated(instr);
8694         }
8695       }
8696       break;
8697     default:
8698       VIXL_UNREACHABLE();
8699       break;
8700   }
8701 
8702   // Find the operation
8703   NEONModifiedImmediateOp op;
8704   if (cmode_3 == 0) {
8705     if (cmode_0 == 0) {
8706       op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
8707     } else {  // cmode<0> == '1'
8708       op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
8709     }
8710   } else {  // cmode<3> == '1'
8711     if (cmode_2 == 0) {
8712       if (cmode_0 == 0) {
8713         op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
8714       } else {  // cmode<0> == '1'
8715         op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
8716       }
8717     } else {  // cmode<2> == '1'
8718       if (cmode_1 == 0) {
8719         op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
8720       } else {  // cmode<1> == '1'
8721         if (cmode_0 == 0) {
8722           op = NEONModifiedImmediate_MOVI;
8723         } else {  // cmode<0> == '1'
8724           op = NEONModifiedImmediate_MOVI;
8725         }
8726       }
8727     }
8728   }
8729 
8730   // Call the logic function
8731   if (op == NEONModifiedImmediate_ORR) {
8732     orr(vform, rd, rd, imm);
8733   } else if (op == NEONModifiedImmediate_BIC) {
8734     bic(vform, rd, rd, imm);
8735   } else if (op == NEONModifiedImmediate_MOVI) {
8736     movi(vform, rd, imm);
8737   } else if (op == NEONModifiedImmediate_MVNI) {
8738     mvni(vform, rd, imm);
8739   } else {
8740     VisitUnimplemented(instr);
8741   }
8742 }
8743 
8744 
8745 void Simulator::VisitNEONScalar2RegMisc(const Instruction* instr) {
8746   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
8747   VectorFormat vf = nfd.GetVectorFormat();
8748 
8749   SimVRegister& rd = ReadVRegister(instr->GetRd());
8750   SimVRegister& rn = ReadVRegister(instr->GetRn());
8751 
8752   if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_scalar_opcode) {
8753     // These instructions all use a two bit size field, except NOT and RBIT,
8754     // which use the field to encode the operation.
8755     switch (instr->Mask(NEONScalar2RegMiscMask)) {
8756       case NEON_CMEQ_zero_scalar:
8757         cmp(vf, rd, rn, 0, eq);
8758         break;
8759       case NEON_CMGE_zero_scalar:
8760         cmp(vf, rd, rn, 0, ge);
8761         break;
8762       case NEON_CMGT_zero_scalar:
8763         cmp(vf, rd, rn, 0, gt);
8764         break;
8765       case NEON_CMLT_zero_scalar:
8766         cmp(vf, rd, rn, 0, lt);
8767         break;
8768       case NEON_CMLE_zero_scalar:
8769         cmp(vf, rd, rn, 0, le);
8770         break;
8771       case NEON_ABS_scalar:
8772         abs(vf, rd, rn);
8773         break;
8774       case NEON_SQABS_scalar:
8775         abs(vf, rd, rn).SignedSaturate(vf);
8776         break;
8777       case NEON_NEG_scalar:
8778         neg(vf, rd, rn);
8779         break;
8780       case NEON_SQNEG_scalar:
8781         neg(vf, rd, rn).SignedSaturate(vf);
8782         break;
8783       case NEON_SUQADD_scalar:
8784         suqadd(vf, rd, rd, rn);
8785         break;
8786       case NEON_USQADD_scalar:
8787         usqadd(vf, rd, rd, rn);
8788         break;
8789       default:
8790         VIXL_UNIMPLEMENTED();
8791         break;
8792     }
8793   } else {
8794     VectorFormat fpf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
8795     FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
8796 
8797     // These instructions all use a one bit size field, except SQXTUN, SQXTN
8798     // and UQXTN, which use a two bit size field.
8799     switch (instr->Mask(NEONScalar2RegMiscFPMask)) {
8800       case NEON_FRECPE_scalar:
8801         frecpe(fpf, rd, rn, fpcr_rounding);
8802         break;
8803       case NEON_FRECPX_scalar:
8804         frecpx(fpf, rd, rn);
8805         break;
8806       case NEON_FRSQRTE_scalar:
8807         frsqrte(fpf, rd, rn);
8808         break;
8809       case NEON_FCMGT_zero_scalar:
8810         fcmp_zero(fpf, rd, rn, gt);
8811         break;
8812       case NEON_FCMGE_zero_scalar:
8813         fcmp_zero(fpf, rd, rn, ge);
8814         break;
8815       case NEON_FCMEQ_zero_scalar:
8816         fcmp_zero(fpf, rd, rn, eq);
8817         break;
8818       case NEON_FCMLE_zero_scalar:
8819         fcmp_zero(fpf, rd, rn, le);
8820         break;
8821       case NEON_FCMLT_zero_scalar:
8822         fcmp_zero(fpf, rd, rn, lt);
8823         break;
8824       case NEON_SCVTF_scalar:
8825         scvtf(fpf, rd, rn, 0, fpcr_rounding);
8826         break;
8827       case NEON_UCVTF_scalar:
8828         ucvtf(fpf, rd, rn, 0, fpcr_rounding);
8829         break;
8830       case NEON_FCVTNS_scalar:
8831         fcvts(fpf, rd, rn, FPTieEven);
8832         break;
8833       case NEON_FCVTNU_scalar:
8834         fcvtu(fpf, rd, rn, FPTieEven);
8835         break;
8836       case NEON_FCVTPS_scalar:
8837         fcvts(fpf, rd, rn, FPPositiveInfinity);
8838         break;
8839       case NEON_FCVTPU_scalar:
8840         fcvtu(fpf, rd, rn, FPPositiveInfinity);
8841         break;
8842       case NEON_FCVTMS_scalar:
8843         fcvts(fpf, rd, rn, FPNegativeInfinity);
8844         break;
8845       case NEON_FCVTMU_scalar:
8846         fcvtu(fpf, rd, rn, FPNegativeInfinity);
8847         break;
8848       case NEON_FCVTZS_scalar:
8849         fcvts(fpf, rd, rn, FPZero);
8850         break;
8851       case NEON_FCVTZU_scalar:
8852         fcvtu(fpf, rd, rn, FPZero);
8853         break;
8854       case NEON_FCVTAS_scalar:
8855         fcvts(fpf, rd, rn, FPTieAway);
8856         break;
8857       case NEON_FCVTAU_scalar:
8858         fcvtu(fpf, rd, rn, FPTieAway);
8859         break;
8860       case NEON_FCVTXN_scalar:
8861         // Unlike all of the other FP instructions above, fcvtxn encodes dest
8862         // size S as size<0>=1. There's only one case, so we ignore the form.
8863         VIXL_ASSERT(instr->ExtractBit(22) == 1);
8864         fcvtxn(kFormatS, rd, rn);
8865         break;
8866       default:
8867         switch (instr->Mask(NEONScalar2RegMiscMask)) {
8868           case NEON_SQXTN_scalar:
8869             sqxtn(vf, rd, rn);
8870             break;
8871           case NEON_UQXTN_scalar:
8872             uqxtn(vf, rd, rn);
8873             break;
8874           case NEON_SQXTUN_scalar:
8875             sqxtun(vf, rd, rn);
8876             break;
8877           default:
8878             VIXL_UNIMPLEMENTED();
8879         }
8880     }
8881   }
8882 }
8883 
8884 
8885 void Simulator::VisitNEONScalar2RegMiscFP16(const Instruction* instr) {
8886   VectorFormat fpf = kFormatH;
8887   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
8888 
8889   SimVRegister& rd = ReadVRegister(instr->GetRd());
8890   SimVRegister& rn = ReadVRegister(instr->GetRn());
8891 
8892   switch (instr->Mask(NEONScalar2RegMiscFP16Mask)) {
8893     case NEON_FRECPE_H_scalar:
8894       frecpe(fpf, rd, rn, fpcr_rounding);
8895       break;
8896     case NEON_FRECPX_H_scalar:
8897       frecpx(fpf, rd, rn);
8898       break;
8899     case NEON_FRSQRTE_H_scalar:
8900       frsqrte(fpf, rd, rn);
8901       break;
8902     case NEON_FCMGT_H_zero_scalar:
8903       fcmp_zero(fpf, rd, rn, gt);
8904       break;
8905     case NEON_FCMGE_H_zero_scalar:
8906       fcmp_zero(fpf, rd, rn, ge);
8907       break;
8908     case NEON_FCMEQ_H_zero_scalar:
8909       fcmp_zero(fpf, rd, rn, eq);
8910       break;
8911     case NEON_FCMLE_H_zero_scalar:
8912       fcmp_zero(fpf, rd, rn, le);
8913       break;
8914     case NEON_FCMLT_H_zero_scalar:
8915       fcmp_zero(fpf, rd, rn, lt);
8916       break;
8917     case NEON_SCVTF_H_scalar:
8918       scvtf(fpf, rd, rn, 0, fpcr_rounding);
8919       break;
8920     case NEON_UCVTF_H_scalar:
8921       ucvtf(fpf, rd, rn, 0, fpcr_rounding);
8922       break;
8923     case NEON_FCVTNS_H_scalar:
8924       fcvts(fpf, rd, rn, FPTieEven);
8925       break;
8926     case NEON_FCVTNU_H_scalar:
8927       fcvtu(fpf, rd, rn, FPTieEven);
8928       break;
8929     case NEON_FCVTPS_H_scalar:
8930       fcvts(fpf, rd, rn, FPPositiveInfinity);
8931       break;
8932     case NEON_FCVTPU_H_scalar:
8933       fcvtu(fpf, rd, rn, FPPositiveInfinity);
8934       break;
8935     case NEON_FCVTMS_H_scalar:
8936       fcvts(fpf, rd, rn, FPNegativeInfinity);
8937       break;
8938     case NEON_FCVTMU_H_scalar:
8939       fcvtu(fpf, rd, rn, FPNegativeInfinity);
8940       break;
8941     case NEON_FCVTZS_H_scalar:
8942       fcvts(fpf, rd, rn, FPZero);
8943       break;
8944     case NEON_FCVTZU_H_scalar:
8945       fcvtu(fpf, rd, rn, FPZero);
8946       break;
8947     case NEON_FCVTAS_H_scalar:
8948       fcvts(fpf, rd, rn, FPTieAway);
8949       break;
8950     case NEON_FCVTAU_H_scalar:
8951       fcvtu(fpf, rd, rn, FPTieAway);
8952       break;
8953   }
8954 }
8955 
8956 
8957 void Simulator::VisitNEONScalar3Diff(const Instruction* instr) {
8958   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
8959   VectorFormat vf = nfd.GetVectorFormat();
8960 
8961   SimVRegister& rd = ReadVRegister(instr->GetRd());
8962   SimVRegister& rn = ReadVRegister(instr->GetRn());
8963   SimVRegister& rm = ReadVRegister(instr->GetRm());
8964   switch (instr->Mask(NEONScalar3DiffMask)) {
8965     case NEON_SQDMLAL_scalar:
8966       sqdmlal(vf, rd, rn, rm);
8967       break;
8968     case NEON_SQDMLSL_scalar:
8969       sqdmlsl(vf, rd, rn, rm);
8970       break;
8971     case NEON_SQDMULL_scalar:
8972       sqdmull(vf, rd, rn, rm);
8973       break;
8974     default:
8975       VIXL_UNIMPLEMENTED();
8976   }
8977 }
8978 
8979 
8980 void Simulator::VisitNEONScalar3Same(const Instruction* instr) {
8981   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
8982   VectorFormat vf = nfd.GetVectorFormat();
8983 
8984   SimVRegister& rd = ReadVRegister(instr->GetRd());
8985   SimVRegister& rn = ReadVRegister(instr->GetRn());
8986   SimVRegister& rm = ReadVRegister(instr->GetRm());
8987 
8988   if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) {
8989     vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
8990     switch (instr->Mask(NEONScalar3SameFPMask)) {
8991       case NEON_FMULX_scalar:
8992         fmulx(vf, rd, rn, rm);
8993         break;
8994       case NEON_FACGE_scalar:
8995         fabscmp(vf, rd, rn, rm, ge);
8996         break;
8997       case NEON_FACGT_scalar:
8998         fabscmp(vf, rd, rn, rm, gt);
8999         break;
9000       case NEON_FCMEQ_scalar:
9001         fcmp(vf, rd, rn, rm, eq);
9002         break;
9003       case NEON_FCMGE_scalar:
9004         fcmp(vf, rd, rn, rm, ge);
9005         break;
9006       case NEON_FCMGT_scalar:
9007         fcmp(vf, rd, rn, rm, gt);
9008         break;
9009       case NEON_FRECPS_scalar:
9010         frecps(vf, rd, rn, rm);
9011         break;
9012       case NEON_FRSQRTS_scalar:
9013         frsqrts(vf, rd, rn, rm);
9014         break;
9015       case NEON_FABD_scalar:
9016         fabd(vf, rd, rn, rm);
9017         break;
9018       default:
9019         VIXL_UNIMPLEMENTED();
9020     }
9021   } else {
9022     switch (instr->Mask(NEONScalar3SameMask)) {
9023       case NEON_ADD_scalar:
9024         add(vf, rd, rn, rm);
9025         break;
9026       case NEON_SUB_scalar:
9027         sub(vf, rd, rn, rm);
9028         break;
9029       case NEON_CMEQ_scalar:
9030         cmp(vf, rd, rn, rm, eq);
9031         break;
9032       case NEON_CMGE_scalar:
9033         cmp(vf, rd, rn, rm, ge);
9034         break;
9035       case NEON_CMGT_scalar:
9036         cmp(vf, rd, rn, rm, gt);
9037         break;
9038       case NEON_CMHI_scalar:
9039         cmp(vf, rd, rn, rm, hi);
9040         break;
9041       case NEON_CMHS_scalar:
9042         cmp(vf, rd, rn, rm, hs);
9043         break;
9044       case NEON_CMTST_scalar:
9045         cmptst(vf, rd, rn, rm);
9046         break;
9047       case NEON_USHL_scalar:
9048         ushl(vf, rd, rn, rm);
9049         break;
9050       case NEON_SSHL_scalar:
9051         sshl(vf, rd, rn, rm);
9052         break;
9053       case NEON_SQDMULH_scalar:
9054         sqdmulh(vf, rd, rn, rm);
9055         break;
9056       case NEON_SQRDMULH_scalar:
9057         sqrdmulh(vf, rd, rn, rm);
9058         break;
9059       case NEON_UQADD_scalar:
9060         add(vf, rd, rn, rm).UnsignedSaturate(vf);
9061         break;
9062       case NEON_SQADD_scalar:
9063         add(vf, rd, rn, rm).SignedSaturate(vf);
9064         break;
9065       case NEON_UQSUB_scalar:
9066         sub(vf, rd, rn, rm).UnsignedSaturate(vf);
9067         break;
9068       case NEON_SQSUB_scalar:
9069         sub(vf, rd, rn, rm).SignedSaturate(vf);
9070         break;
9071       case NEON_UQSHL_scalar:
9072         ushl(vf, rd, rn, rm).UnsignedSaturate(vf);
9073         break;
9074       case NEON_SQSHL_scalar:
9075         sshl(vf, rd, rn, rm).SignedSaturate(vf);
9076         break;
9077       case NEON_URSHL_scalar:
9078         ushl(vf, rd, rn, rm).Round(vf);
9079         break;
9080       case NEON_SRSHL_scalar:
9081         sshl(vf, rd, rn, rm).Round(vf);
9082         break;
9083       case NEON_UQRSHL_scalar:
9084         ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
9085         break;
9086       case NEON_SQRSHL_scalar:
9087         sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
9088         break;
9089       default:
9090         VIXL_UNIMPLEMENTED();
9091     }
9092   }
9093 }
9094 
9095 void Simulator::VisitNEONScalar3SameFP16(const Instruction* instr) {
9096   SimVRegister& rd = ReadVRegister(instr->GetRd());
9097   SimVRegister& rn = ReadVRegister(instr->GetRn());
9098   SimVRegister& rm = ReadVRegister(instr->GetRm());
9099 
9100   switch (instr->Mask(NEONScalar3SameFP16Mask)) {
9101     case NEON_FABD_H_scalar:
9102       fabd(kFormatH, rd, rn, rm);
9103       break;
9104     case NEON_FMULX_H_scalar:
9105       fmulx(kFormatH, rd, rn, rm);
9106       break;
9107     case NEON_FCMEQ_H_scalar:
9108       fcmp(kFormatH, rd, rn, rm, eq);
9109       break;
9110     case NEON_FCMGE_H_scalar:
9111       fcmp(kFormatH, rd, rn, rm, ge);
9112       break;
9113     case NEON_FCMGT_H_scalar:
9114       fcmp(kFormatH, rd, rn, rm, gt);
9115       break;
9116     case NEON_FACGE_H_scalar:
9117       fabscmp(kFormatH, rd, rn, rm, ge);
9118       break;
9119     case NEON_FACGT_H_scalar:
9120       fabscmp(kFormatH, rd, rn, rm, gt);
9121       break;
9122     case NEON_FRECPS_H_scalar:
9123       frecps(kFormatH, rd, rn, rm);
9124       break;
9125     case NEON_FRSQRTS_H_scalar:
9126       frsqrts(kFormatH, rd, rn, rm);
9127       break;
9128     default:
9129       VIXL_UNREACHABLE();
9130   }
9131 }
9132 
9133 
9134 void Simulator::VisitNEONScalar3SameExtra(const Instruction* instr) {
9135   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
9136   VectorFormat vf = nfd.GetVectorFormat();
9137 
9138   SimVRegister& rd = ReadVRegister(instr->GetRd());
9139   SimVRegister& rn = ReadVRegister(instr->GetRn());
9140   SimVRegister& rm = ReadVRegister(instr->GetRm());
9141 
9142   switch (instr->Mask(NEONScalar3SameExtraMask)) {
9143     case NEON_SQRDMLAH_scalar:
9144       sqrdmlah(vf, rd, rn, rm);
9145       break;
9146     case NEON_SQRDMLSH_scalar:
9147       sqrdmlsh(vf, rd, rn, rm);
9148       break;
9149     default:
9150       VIXL_UNIMPLEMENTED();
9151   }
9152 }
9153 
9154 void Simulator::VisitNEONScalarByIndexedElement(const Instruction* instr) {
9155   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
9156   VectorFormat vf = nfd.GetVectorFormat();
9157   VectorFormat vf_r = nfd.GetVectorFormat(nfd.ScalarFormatMap());
9158 
9159   SimVRegister& rd = ReadVRegister(instr->GetRd());
9160   SimVRegister& rn = ReadVRegister(instr->GetRn());
9161   ByElementOp Op = NULL;
9162 
9163   int rm_reg = instr->GetRm();
9164   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
9165   if (instr->GetNEONSize() == 1) {
9166     rm_reg &= 0xf;
9167     index = (index << 1) | instr->GetNEONM();
9168   }
9169 
9170   switch (instr->Mask(NEONScalarByIndexedElementMask)) {
9171     case NEON_SQDMULL_byelement_scalar:
9172       Op = &Simulator::sqdmull;
9173       break;
9174     case NEON_SQDMLAL_byelement_scalar:
9175       Op = &Simulator::sqdmlal;
9176       break;
9177     case NEON_SQDMLSL_byelement_scalar:
9178       Op = &Simulator::sqdmlsl;
9179       break;
9180     case NEON_SQDMULH_byelement_scalar:
9181       Op = &Simulator::sqdmulh;
9182       vf = vf_r;
9183       break;
9184     case NEON_SQRDMULH_byelement_scalar:
9185       Op = &Simulator::sqrdmulh;
9186       vf = vf_r;
9187       break;
9188     case NEON_SQRDMLAH_byelement_scalar:
9189       Op = &Simulator::sqrdmlah;
9190       vf = vf_r;
9191       break;
9192     case NEON_SQRDMLSH_byelement_scalar:
9193       Op = &Simulator::sqrdmlsh;
9194       vf = vf_r;
9195       break;
9196     default:
9197       vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
9198       index = instr->GetNEONH();
9199       if (instr->GetFPType() == 0) {
9200         index = (index << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
9201         rm_reg &= 0xf;
9202         vf = kFormatH;
9203       } else if ((instr->GetFPType() & 1) == 0) {
9204         index = (index << 1) | instr->GetNEONL();
9205       }
9206       switch (instr->Mask(NEONScalarByIndexedElementFPMask)) {
9207         case NEON_FMUL_H_byelement_scalar:
9208         case NEON_FMUL_byelement_scalar:
9209           Op = &Simulator::fmul;
9210           break;
9211         case NEON_FMLA_H_byelement_scalar:
9212         case NEON_FMLA_byelement_scalar:
9213           Op = &Simulator::fmla;
9214           break;
9215         case NEON_FMLS_H_byelement_scalar:
9216         case NEON_FMLS_byelement_scalar:
9217           Op = &Simulator::fmls;
9218           break;
9219         case NEON_FMULX_H_byelement_scalar:
9220         case NEON_FMULX_byelement_scalar:
9221           Op = &Simulator::fmulx;
9222           break;
9223         default:
9224           VIXL_UNIMPLEMENTED();
9225       }
9226   }
9227 
9228   (this->*Op)(vf, rd, rn, ReadVRegister(rm_reg), index);
9229 }
9230 
9231 
9232 void Simulator::VisitNEONScalarCopy(const Instruction* instr) {
9233   NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularScalarFormatMap());
9234   VectorFormat vf = nfd.GetVectorFormat();
9235 
9236   SimVRegister& rd = ReadVRegister(instr->GetRd());
9237   SimVRegister& rn = ReadVRegister(instr->GetRn());
9238 
9239   if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) {
9240     int imm5 = instr->GetImmNEON5();
9241     int tz = CountTrailingZeros(imm5, 32);
9242     int rn_index = ExtractSignedBitfield32(31, tz + 1, imm5);
9243     dup_element(vf, rd, rn, rn_index);
9244   } else {
9245     VIXL_UNIMPLEMENTED();
9246   }
9247 }
9248 
9249 
9250 void Simulator::VisitNEONScalarPairwise(const Instruction* instr) {
9251   NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarPairwiseFormatMap());
9252   VectorFormat vf = nfd.GetVectorFormat();
9253 
9254   SimVRegister& rd = ReadVRegister(instr->GetRd());
9255   SimVRegister& rn = ReadVRegister(instr->GetRn());
9256   switch (instr->Mask(NEONScalarPairwiseMask)) {
9257     case NEON_ADDP_scalar: {
9258       // All pairwise operations except ADDP use bit U to differentiate FP16
9259       // from FP32/FP64 variations.
9260       NEONFormatDecoder nfd_addp(instr, NEONFormatDecoder::FPScalarFormatMap());
9261       addp(nfd_addp.GetVectorFormat(), rd, rn);
9262       break;
9263     }
9264     case NEON_FADDP_h_scalar:
9265     case NEON_FADDP_scalar:
9266       faddp(vf, rd, rn);
9267       break;
9268     case NEON_FMAXP_h_scalar:
9269     case NEON_FMAXP_scalar:
9270       fmaxp(vf, rd, rn);
9271       break;
9272     case NEON_FMAXNMP_h_scalar:
9273     case NEON_FMAXNMP_scalar:
9274       fmaxnmp(vf, rd, rn);
9275       break;
9276     case NEON_FMINP_h_scalar:
9277     case NEON_FMINP_scalar:
9278       fminp(vf, rd, rn);
9279       break;
9280     case NEON_FMINNMP_h_scalar:
9281     case NEON_FMINNMP_scalar:
9282       fminnmp(vf, rd, rn);
9283       break;
9284     default:
9285       VIXL_UNIMPLEMENTED();
9286   }
9287 }
9288 
9289 
9290 void Simulator::VisitNEONScalarShiftImmediate(const Instruction* instr) {
9291   SimVRegister& rd = ReadVRegister(instr->GetRd());
9292   SimVRegister& rn = ReadVRegister(instr->GetRn());
9293   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
9294 
9295   static const NEONFormatMap map = {{22, 21, 20, 19},
9296                                     {NF_UNDEF,
9297                                      NF_B,
9298                                      NF_H,
9299                                      NF_H,
9300                                      NF_S,
9301                                      NF_S,
9302                                      NF_S,
9303                                      NF_S,
9304                                      NF_D,
9305                                      NF_D,
9306                                      NF_D,
9307                                      NF_D,
9308                                      NF_D,
9309                                      NF_D,
9310                                      NF_D,
9311                                      NF_D}};
9312   NEONFormatDecoder nfd(instr, &map);
9313   VectorFormat vf = nfd.GetVectorFormat();
9314 
9315   int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh());
9316   int immh_immb = instr->GetImmNEONImmhImmb();
9317   int right_shift = (16 << highest_set_bit) - immh_immb;
9318   int left_shift = immh_immb - (8 << highest_set_bit);
9319   switch (instr->Mask(NEONScalarShiftImmediateMask)) {
9320     case NEON_SHL_scalar:
9321       shl(vf, rd, rn, left_shift);
9322       break;
9323     case NEON_SLI_scalar:
9324       sli(vf, rd, rn, left_shift);
9325       break;
9326     case NEON_SQSHL_imm_scalar:
9327       sqshl(vf, rd, rn, left_shift);
9328       break;
9329     case NEON_UQSHL_imm_scalar:
9330       uqshl(vf, rd, rn, left_shift);
9331       break;
9332     case NEON_SQSHLU_scalar:
9333       sqshlu(vf, rd, rn, left_shift);
9334       break;
9335     case NEON_SRI_scalar:
9336       sri(vf, rd, rn, right_shift);
9337       break;
9338     case NEON_SSHR_scalar:
9339       sshr(vf, rd, rn, right_shift);
9340       break;
9341     case NEON_USHR_scalar:
9342       ushr(vf, rd, rn, right_shift);
9343       break;
9344     case NEON_SRSHR_scalar:
9345       sshr(vf, rd, rn, right_shift).Round(vf);
9346       break;
9347     case NEON_URSHR_scalar:
9348       ushr(vf, rd, rn, right_shift).Round(vf);
9349       break;
9350     case NEON_SSRA_scalar:
9351       ssra(vf, rd, rn, right_shift);
9352       break;
9353     case NEON_USRA_scalar:
9354       usra(vf, rd, rn, right_shift);
9355       break;
9356     case NEON_SRSRA_scalar:
9357       srsra(vf, rd, rn, right_shift);
9358       break;
9359     case NEON_URSRA_scalar:
9360       ursra(vf, rd, rn, right_shift);
9361       break;
9362     case NEON_UQSHRN_scalar:
9363       uqshrn(vf, rd, rn, right_shift);
9364       break;
9365     case NEON_UQRSHRN_scalar:
9366       uqrshrn(vf, rd, rn, right_shift);
9367       break;
9368     case NEON_SQSHRN_scalar:
9369       sqshrn(vf, rd, rn, right_shift);
9370       break;
9371     case NEON_SQRSHRN_scalar:
9372       sqrshrn(vf, rd, rn, right_shift);
9373       break;
9374     case NEON_SQSHRUN_scalar:
9375       sqshrun(vf, rd, rn, right_shift);
9376       break;
9377     case NEON_SQRSHRUN_scalar:
9378       sqrshrun(vf, rd, rn, right_shift);
9379       break;
9380     case NEON_FCVTZS_imm_scalar:
9381       fcvts(vf, rd, rn, FPZero, right_shift);
9382       break;
9383     case NEON_FCVTZU_imm_scalar:
9384       fcvtu(vf, rd, rn, FPZero, right_shift);
9385       break;
9386     case NEON_SCVTF_imm_scalar:
9387       scvtf(vf, rd, rn, right_shift, fpcr_rounding);
9388       break;
9389     case NEON_UCVTF_imm_scalar:
9390       ucvtf(vf, rd, rn, right_shift, fpcr_rounding);
9391       break;
9392     default:
9393       VIXL_UNIMPLEMENTED();
9394   }
9395 }
9396 
9397 
9398 void Simulator::VisitNEONShiftImmediate(const Instruction* instr) {
9399   SimVRegister& rd = ReadVRegister(instr->GetRd());
9400   SimVRegister& rn = ReadVRegister(instr->GetRn());
9401   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
9402 
9403   // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H,
9404   // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined.
9405   static const NEONFormatMap map = {{22, 21, 20, 19, 30},
9406                                     {NF_UNDEF, NF_UNDEF, NF_8B,    NF_16B,
9407                                      NF_4H,    NF_8H,    NF_4H,    NF_8H,
9408                                      NF_2S,    NF_4S,    NF_2S,    NF_4S,
9409                                      NF_2S,    NF_4S,    NF_2S,    NF_4S,
9410                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
9411                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
9412                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
9413                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D}};
9414   NEONFormatDecoder nfd(instr, &map);
9415   VectorFormat vf = nfd.GetVectorFormat();
9416 
9417   // 0001->8H, 001x->4S, 01xx->2D, all others undefined.
9418   static const NEONFormatMap map_l =
9419       {{22, 21, 20, 19},
9420        {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}};
9421   VectorFormat vf_l = nfd.GetVectorFormat(&map_l);
9422 
9423   int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh());
9424   int immh_immb = instr->GetImmNEONImmhImmb();
9425   int right_shift = (16 << highest_set_bit) - immh_immb;
9426   int left_shift = immh_immb - (8 << highest_set_bit);
9427 
9428   switch (instr->Mask(NEONShiftImmediateMask)) {
9429     case NEON_SHL:
9430       shl(vf, rd, rn, left_shift);
9431       break;
9432     case NEON_SLI:
9433       sli(vf, rd, rn, left_shift);
9434       break;
9435     case NEON_SQSHLU:
9436       sqshlu(vf, rd, rn, left_shift);
9437       break;
9438     case NEON_SRI:
9439       sri(vf, rd, rn, right_shift);
9440       break;
9441     case NEON_SSHR:
9442       sshr(vf, rd, rn, right_shift);
9443       break;
9444     case NEON_USHR:
9445       ushr(vf, rd, rn, right_shift);
9446       break;
9447     case NEON_SRSHR:
9448       sshr(vf, rd, rn, right_shift).Round(vf);
9449       break;
9450     case NEON_URSHR:
9451       ushr(vf, rd, rn, right_shift).Round(vf);
9452       break;
9453     case NEON_SSRA:
9454       ssra(vf, rd, rn, right_shift);
9455       break;
9456     case NEON_USRA:
9457       usra(vf, rd, rn, right_shift);
9458       break;
9459     case NEON_SRSRA:
9460       srsra(vf, rd, rn, right_shift);
9461       break;
9462     case NEON_URSRA:
9463       ursra(vf, rd, rn, right_shift);
9464       break;
9465     case NEON_SQSHL_imm:
9466       sqshl(vf, rd, rn, left_shift);
9467       break;
9468     case NEON_UQSHL_imm:
9469       uqshl(vf, rd, rn, left_shift);
9470       break;
9471     case NEON_SCVTF_imm:
9472       scvtf(vf, rd, rn, right_shift, fpcr_rounding);
9473       break;
9474     case NEON_UCVTF_imm:
9475       ucvtf(vf, rd, rn, right_shift, fpcr_rounding);
9476       break;
9477     case NEON_FCVTZS_imm:
9478       fcvts(vf, rd, rn, FPZero, right_shift);
9479       break;
9480     case NEON_FCVTZU_imm:
9481       fcvtu(vf, rd, rn, FPZero, right_shift);
9482       break;
9483     case NEON_SSHLL:
9484       vf = vf_l;
9485       if (instr->Mask(NEON_Q)) {
9486         sshll2(vf, rd, rn, left_shift);
9487       } else {
9488         sshll(vf, rd, rn, left_shift);
9489       }
9490       break;
9491     case NEON_USHLL:
9492       vf = vf_l;
9493       if (instr->Mask(NEON_Q)) {
9494         ushll2(vf, rd, rn, left_shift);
9495       } else {
9496         ushll(vf, rd, rn, left_shift);
9497       }
9498       break;
9499     case NEON_SHRN:
9500       if (instr->Mask(NEON_Q)) {
9501         shrn2(vf, rd, rn, right_shift);
9502       } else {
9503         shrn(vf, rd, rn, right_shift);
9504       }
9505       break;
9506     case NEON_RSHRN:
9507       if (instr->Mask(NEON_Q)) {
9508         rshrn2(vf, rd, rn, right_shift);
9509       } else {
9510         rshrn(vf, rd, rn, right_shift);
9511       }
9512       break;
9513     case NEON_UQSHRN:
9514       if (instr->Mask(NEON_Q)) {
9515         uqshrn2(vf, rd, rn, right_shift);
9516       } else {
9517         uqshrn(vf, rd, rn, right_shift);
9518       }
9519       break;
9520     case NEON_UQRSHRN:
9521       if (instr->Mask(NEON_Q)) {
9522         uqrshrn2(vf, rd, rn, right_shift);
9523       } else {
9524         uqrshrn(vf, rd, rn, right_shift);
9525       }
9526       break;
9527     case NEON_SQSHRN:
9528       if (instr->Mask(NEON_Q)) {
9529         sqshrn2(vf, rd, rn, right_shift);
9530       } else {
9531         sqshrn(vf, rd, rn, right_shift);
9532       }
9533       break;
9534     case NEON_SQRSHRN:
9535       if (instr->Mask(NEON_Q)) {
9536         sqrshrn2(vf, rd, rn, right_shift);
9537       } else {
9538         sqrshrn(vf, rd, rn, right_shift);
9539       }
9540       break;
9541     case NEON_SQSHRUN:
9542       if (instr->Mask(NEON_Q)) {
9543         sqshrun2(vf, rd, rn, right_shift);
9544       } else {
9545         sqshrun(vf, rd, rn, right_shift);
9546       }
9547       break;
9548     case NEON_SQRSHRUN:
9549       if (instr->Mask(NEON_Q)) {
9550         sqrshrun2(vf, rd, rn, right_shift);
9551       } else {
9552         sqrshrun(vf, rd, rn, right_shift);
9553       }
9554       break;
9555     default:
9556       VIXL_UNIMPLEMENTED();
9557   }
9558 }
9559 
9560 
9561 void Simulator::VisitNEONTable(const Instruction* instr) {
9562   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
9563   VectorFormat vf = nfd.GetVectorFormat();
9564 
9565   SimVRegister& rd = ReadVRegister(instr->GetRd());
9566   SimVRegister& rn = ReadVRegister(instr->GetRn());
9567   SimVRegister& rn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfVRegisters);
9568   SimVRegister& rn3 = ReadVRegister((instr->GetRn() + 2) % kNumberOfVRegisters);
9569   SimVRegister& rn4 = ReadVRegister((instr->GetRn() + 3) % kNumberOfVRegisters);
9570   SimVRegister& rm = ReadVRegister(instr->GetRm());
9571 
9572   switch (instr->Mask(NEONTableMask)) {
9573     case NEON_TBL_1v:
9574       tbl(vf, rd, rn, rm);
9575       break;
9576     case NEON_TBL_2v:
9577       tbl(vf, rd, rn, rn2, rm);
9578       break;
9579     case NEON_TBL_3v:
9580       tbl(vf, rd, rn, rn2, rn3, rm);
9581       break;
9582     case NEON_TBL_4v:
9583       tbl(vf, rd, rn, rn2, rn3, rn4, rm);
9584       break;
9585     case NEON_TBX_1v:
9586       tbx(vf, rd, rn, rm);
9587       break;
9588     case NEON_TBX_2v:
9589       tbx(vf, rd, rn, rn2, rm);
9590       break;
9591     case NEON_TBX_3v:
9592       tbx(vf, rd, rn, rn2, rn3, rm);
9593       break;
9594     case NEON_TBX_4v:
9595       tbx(vf, rd, rn, rn2, rn3, rn4, rm);
9596       break;
9597     default:
9598       VIXL_UNIMPLEMENTED();
9599   }
9600 }
9601 
9602 
9603 void Simulator::VisitNEONPerm(const Instruction* instr) {
9604   NEONFormatDecoder nfd(instr);
9605   VectorFormat vf = nfd.GetVectorFormat();
9606 
9607   SimVRegister& rd = ReadVRegister(instr->GetRd());
9608   SimVRegister& rn = ReadVRegister(instr->GetRn());
9609   SimVRegister& rm = ReadVRegister(instr->GetRm());
9610 
9611   switch (instr->Mask(NEONPermMask)) {
9612     case NEON_TRN1:
9613       trn1(vf, rd, rn, rm);
9614       break;
9615     case NEON_TRN2:
9616       trn2(vf, rd, rn, rm);
9617       break;
9618     case NEON_UZP1:
9619       uzp1(vf, rd, rn, rm);
9620       break;
9621     case NEON_UZP2:
9622       uzp2(vf, rd, rn, rm);
9623       break;
9624     case NEON_ZIP1:
9625       zip1(vf, rd, rn, rm);
9626       break;
9627     case NEON_ZIP2:
9628       zip2(vf, rd, rn, rm);
9629       break;
9630     default:
9631       VIXL_UNIMPLEMENTED();
9632   }
9633 }
9634 
9635 void Simulator::VisitSVEAddressGeneration(const Instruction* instr) {
9636   SimVRegister& zd = ReadVRegister(instr->GetRd());
9637   SimVRegister& zn = ReadVRegister(instr->GetRn());
9638   SimVRegister& zm = ReadVRegister(instr->GetRm());
9639   SimVRegister temp;
9640 
9641   VectorFormat vform = kFormatVnD;
9642   mov(vform, temp, zm);
9643 
9644   switch (instr->Mask(SVEAddressGenerationMask)) {
9645     case ADR_z_az_d_s32_scaled:
9646       sxt(vform, temp, temp, kSRegSize);
9647       break;
9648     case ADR_z_az_d_u32_scaled:
9649       uxt(vform, temp, temp, kSRegSize);
9650       break;
9651     case ADR_z_az_s_same_scaled:
9652       vform = kFormatVnS;
9653       break;
9654     case ADR_z_az_d_same_scaled:
9655       // Nothing to do.
9656       break;
9657     default:
9658       VIXL_UNIMPLEMENTED();
9659       break;
9660   }
9661 
9662   int shift_amount = instr->ExtractBits(11, 10);
9663   shl(vform, temp, temp, shift_amount);
9664   add(vform, zd, zn, temp);
9665 }
9666 
9667 void Simulator::VisitSVEBitwiseLogicalWithImm_Unpredicated(
9668     const Instruction* instr) {
9669   Instr op = instr->Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask);
9670   switch (op) {
9671     case AND_z_zi:
9672     case EOR_z_zi:
9673     case ORR_z_zi: {
9674       int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
9675       uint64_t imm = instr->GetSVEImmLogical();
9676       // Valid immediate is a non-zero bits
9677       VIXL_ASSERT(imm != 0);
9678       SVEBitwiseImmHelper(static_cast<SVEBitwiseLogicalWithImm_UnpredicatedOp>(
9679                               op),
9680                           SVEFormatFromLaneSizeInBytesLog2(lane_size),
9681                           ReadVRegister(instr->GetRd()),
9682                           imm);
9683       break;
9684     }
9685     default:
9686       VIXL_UNIMPLEMENTED();
9687       break;
9688   }
9689 }
9690 
9691 void Simulator::VisitSVEBroadcastBitmaskImm(const Instruction* instr) {
9692   switch (instr->Mask(SVEBroadcastBitmaskImmMask)) {
9693     case DUPM_z_i: {
9694       /* DUPM uses the same lane size and immediate encoding as bitwise logical
9695        * immediate instructions. */
9696       int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
9697       uint64_t imm = instr->GetSVEImmLogical();
9698       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
9699       dup_immediate(vform, ReadVRegister(instr->GetRd()), imm);
9700       break;
9701     }
9702     default:
9703       VIXL_UNIMPLEMENTED();
9704       break;
9705   }
9706 }
9707 
9708 void Simulator::VisitSVEBitwiseLogicalUnpredicated(const Instruction* instr) {
9709   SimVRegister& zd = ReadVRegister(instr->GetRd());
9710   SimVRegister& zn = ReadVRegister(instr->GetRn());
9711   SimVRegister& zm = ReadVRegister(instr->GetRm());
9712   Instr op = instr->Mask(SVEBitwiseLogicalUnpredicatedMask);
9713 
9714   LogicalOp logical_op = LogicalOpMask;
9715   switch (op) {
9716     case AND_z_zz:
9717       logical_op = AND;
9718       break;
9719     case BIC_z_zz:
9720       logical_op = BIC;
9721       break;
9722     case EOR_z_zz:
9723       logical_op = EOR;
9724       break;
9725     case ORR_z_zz:
9726       logical_op = ORR;
9727       break;
9728     default:
9729       VIXL_UNIMPLEMENTED();
9730       break;
9731   }
9732   // Lane size of registers is irrelevant to the bitwise operations, so perform
9733   // the operation on D-sized lanes.
9734   SVEBitwiseLogicalUnpredicatedHelper(logical_op, kFormatVnD, zd, zn, zm);
9735 }
9736 
9737 void Simulator::VisitSVEBitwiseShiftByImm_Predicated(const Instruction* instr) {
9738   SimVRegister& zdn = ReadVRegister(instr->GetRd());
9739   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9740 
9741   SimVRegister scratch;
9742   SimVRegister result;
9743 
9744   bool for_division = false;
9745   Shift shift_op = NO_SHIFT;
9746   switch (instr->Mask(SVEBitwiseShiftByImm_PredicatedMask)) {
9747     case ASRD_z_p_zi:
9748       shift_op = ASR;
9749       for_division = true;
9750       break;
9751     case ASR_z_p_zi:
9752       shift_op = ASR;
9753       break;
9754     case LSL_z_p_zi:
9755       shift_op = LSL;
9756       break;
9757     case LSR_z_p_zi:
9758       shift_op = LSR;
9759       break;
9760     default:
9761       VIXL_UNIMPLEMENTED();
9762       break;
9763   }
9764 
9765   std::pair<int, int> shift_and_lane_size =
9766       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true);
9767   unsigned lane_size = shift_and_lane_size.second;
9768   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
9769   int shift_dist = shift_and_lane_size.first;
9770 
9771   if ((shift_op == ASR) && for_division) {
9772     asrd(vform, result, zdn, shift_dist);
9773   } else {
9774     if (shift_op == LSL) {
9775       // Shift distance is computed differently for LSL. Convert the result.
9776       shift_dist = (8 << lane_size) - shift_dist;
9777     }
9778     dup_immediate(vform, scratch, shift_dist);
9779     SVEBitwiseShiftHelper(shift_op, vform, result, zdn, scratch, false);
9780   }
9781   mov_merging(vform, zdn, pg, result);
9782 }
9783 
9784 void Simulator::VisitSVEBitwiseShiftByVector_Predicated(
9785     const Instruction* instr) {
9786   VectorFormat vform = instr->GetSVEVectorFormat();
9787   SimVRegister& zdn = ReadVRegister(instr->GetRd());
9788   SimVRegister& zm = ReadVRegister(instr->GetRn());
9789   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9790   SimVRegister result;
9791 
9792   // SVE uses the whole (saturated) lane for the shift amount.
9793   bool shift_in_ls_byte = false;
9794 
9795   switch (form_hash_) {
9796     case "asrr_z_p_zz"_h:
9797       sshr(vform, result, zm, zdn);
9798       break;
9799     case "asr_z_p_zz"_h:
9800       sshr(vform, result, zdn, zm);
9801       break;
9802     case "lslr_z_p_zz"_h:
9803       sshl(vform, result, zm, zdn, shift_in_ls_byte);
9804       break;
9805     case "lsl_z_p_zz"_h:
9806       sshl(vform, result, zdn, zm, shift_in_ls_byte);
9807       break;
9808     case "lsrr_z_p_zz"_h:
9809       ushr(vform, result, zm, zdn);
9810       break;
9811     case "lsr_z_p_zz"_h:
9812       ushr(vform, result, zdn, zm);
9813       break;
9814     case "sqrshl_z_p_zz"_h:
9815       sshl(vform, result, zdn, zm, shift_in_ls_byte)
9816           .Round(vform)
9817           .SignedSaturate(vform);
9818       break;
9819     case "sqrshlr_z_p_zz"_h:
9820       sshl(vform, result, zm, zdn, shift_in_ls_byte)
9821           .Round(vform)
9822           .SignedSaturate(vform);
9823       break;
9824     case "sqshl_z_p_zz"_h:
9825       sshl(vform, result, zdn, zm, shift_in_ls_byte).SignedSaturate(vform);
9826       break;
9827     case "sqshlr_z_p_zz"_h:
9828       sshl(vform, result, zm, zdn, shift_in_ls_byte).SignedSaturate(vform);
9829       break;
9830     case "srshl_z_p_zz"_h:
9831       sshl(vform, result, zdn, zm, shift_in_ls_byte).Round(vform);
9832       break;
9833     case "srshlr_z_p_zz"_h:
9834       sshl(vform, result, zm, zdn, shift_in_ls_byte).Round(vform);
9835       break;
9836     case "uqrshl_z_p_zz"_h:
9837       ushl(vform, result, zdn, zm, shift_in_ls_byte)
9838           .Round(vform)
9839           .UnsignedSaturate(vform);
9840       break;
9841     case "uqrshlr_z_p_zz"_h:
9842       ushl(vform, result, zm, zdn, shift_in_ls_byte)
9843           .Round(vform)
9844           .UnsignedSaturate(vform);
9845       break;
9846     case "uqshl_z_p_zz"_h:
9847       ushl(vform, result, zdn, zm, shift_in_ls_byte).UnsignedSaturate(vform);
9848       break;
9849     case "uqshlr_z_p_zz"_h:
9850       ushl(vform, result, zm, zdn, shift_in_ls_byte).UnsignedSaturate(vform);
9851       break;
9852     case "urshl_z_p_zz"_h:
9853       ushl(vform, result, zdn, zm, shift_in_ls_byte).Round(vform);
9854       break;
9855     case "urshlr_z_p_zz"_h:
9856       ushl(vform, result, zm, zdn, shift_in_ls_byte).Round(vform);
9857       break;
9858     default:
9859       VIXL_UNIMPLEMENTED();
9860       break;
9861   }
9862   mov_merging(vform, zdn, pg, result);
9863 }
9864 
9865 void Simulator::VisitSVEBitwiseShiftByWideElements_Predicated(
9866     const Instruction* instr) {
9867   VectorFormat vform = instr->GetSVEVectorFormat();
9868   SimVRegister& zdn = ReadVRegister(instr->GetRd());
9869   SimVRegister& zm = ReadVRegister(instr->GetRn());
9870   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9871 
9872   SimVRegister result;
9873   Shift shift_op = ASR;
9874 
9875   switch (instr->Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) {
9876     case ASR_z_p_zw:
9877       break;
9878     case LSL_z_p_zw:
9879       shift_op = LSL;
9880       break;
9881     case LSR_z_p_zw:
9882       shift_op = LSR;
9883       break;
9884     default:
9885       VIXL_UNIMPLEMENTED();
9886       break;
9887   }
9888   SVEBitwiseShiftHelper(shift_op,
9889                         vform,
9890                         result,
9891                         zdn,
9892                         zm,
9893                         /* is_wide_elements = */ true);
9894   mov_merging(vform, zdn, pg, result);
9895 }
9896 
9897 void Simulator::VisitSVEBitwiseShiftUnpredicated(const Instruction* instr) {
9898   SimVRegister& zd = ReadVRegister(instr->GetRd());
9899   SimVRegister& zn = ReadVRegister(instr->GetRn());
9900 
9901   Shift shift_op = NO_SHIFT;
9902   switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
9903     case ASR_z_zi:
9904     case ASR_z_zw:
9905       shift_op = ASR;
9906       break;
9907     case LSL_z_zi:
9908     case LSL_z_zw:
9909       shift_op = LSL;
9910       break;
9911     case LSR_z_zi:
9912     case LSR_z_zw:
9913       shift_op = LSR;
9914       break;
9915     default:
9916       VIXL_UNIMPLEMENTED();
9917       break;
9918   }
9919 
9920   switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
9921     case ASR_z_zi:
9922     case LSL_z_zi:
9923     case LSR_z_zi: {
9924       SimVRegister scratch;
9925       std::pair<int, int> shift_and_lane_size =
9926           instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
9927       unsigned lane_size = shift_and_lane_size.second;
9928       VIXL_ASSERT(lane_size <= kDRegSizeInBytesLog2);
9929       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
9930       int shift_dist = shift_and_lane_size.first;
9931       if (shift_op == LSL) {
9932         // Shift distance is computed differently for LSL. Convert the result.
9933         shift_dist = (8 << lane_size) - shift_dist;
9934       }
9935       dup_immediate(vform, scratch, shift_dist);
9936       SVEBitwiseShiftHelper(shift_op, vform, zd, zn, scratch, false);
9937       break;
9938     }
9939     case ASR_z_zw:
9940     case LSL_z_zw:
9941     case LSR_z_zw:
9942       SVEBitwiseShiftHelper(shift_op,
9943                             instr->GetSVEVectorFormat(),
9944                             zd,
9945                             zn,
9946                             ReadVRegister(instr->GetRm()),
9947                             true);
9948       break;
9949     default:
9950       VIXL_UNIMPLEMENTED();
9951       break;
9952   }
9953 }
9954 
9955 void Simulator::VisitSVEIncDecRegisterByElementCount(const Instruction* instr) {
9956   // Although the instructions have a separate encoding class, the lane size is
9957   // encoded in the same way as most other SVE instructions.
9958   VectorFormat vform = instr->GetSVEVectorFormat();
9959 
9960   int pattern = instr->GetImmSVEPredicateConstraint();
9961   int count = GetPredicateConstraintLaneCount(vform, pattern);
9962   int multiplier = instr->ExtractBits(19, 16) + 1;
9963 
9964   switch (instr->Mask(SVEIncDecRegisterByElementCountMask)) {
9965     case DECB_r_rs:
9966     case DECD_r_rs:
9967     case DECH_r_rs:
9968     case DECW_r_rs:
9969       count = -count;
9970       break;
9971     case INCB_r_rs:
9972     case INCD_r_rs:
9973     case INCH_r_rs:
9974     case INCW_r_rs:
9975       // Nothing to do.
9976       break;
9977     default:
9978       VIXL_UNIMPLEMENTED();
9979       return;
9980   }
9981 
9982   WriteXRegister(instr->GetRd(),
9983                  IncDecN(ReadXRegister(instr->GetRd()),
9984                          count * multiplier,
9985                          kXRegSize));
9986 }
9987 
9988 void Simulator::VisitSVEIncDecVectorByElementCount(const Instruction* instr) {
9989   VectorFormat vform = instr->GetSVEVectorFormat();
9990   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
9991     VIXL_UNIMPLEMENTED();
9992   }
9993 
9994   int pattern = instr->GetImmSVEPredicateConstraint();
9995   int count = GetPredicateConstraintLaneCount(vform, pattern);
9996   int multiplier = instr->ExtractBits(19, 16) + 1;
9997 
9998   switch (instr->Mask(SVEIncDecVectorByElementCountMask)) {
9999     case DECD_z_zs:
10000     case DECH_z_zs:
10001     case DECW_z_zs:
10002       count = -count;
10003       break;
10004     case INCD_z_zs:
10005     case INCH_z_zs:
10006     case INCW_z_zs:
10007       // Nothing to do.
10008       break;
10009     default:
10010       VIXL_UNIMPLEMENTED();
10011       break;
10012   }
10013 
10014   SimVRegister& zd = ReadVRegister(instr->GetRd());
10015   SimVRegister scratch;
10016   dup_immediate(vform,
10017                 scratch,
10018                 IncDecN(0,
10019                         count * multiplier,
10020                         LaneSizeInBitsFromFormat(vform)));
10021   add(vform, zd, zd, scratch);
10022 }
10023 
10024 void Simulator::VisitSVESaturatingIncDecRegisterByElementCount(
10025     const Instruction* instr) {
10026   // Although the instructions have a separate encoding class, the lane size is
10027   // encoded in the same way as most other SVE instructions.
10028   VectorFormat vform = instr->GetSVEVectorFormat();
10029 
10030   int pattern = instr->GetImmSVEPredicateConstraint();
10031   int count = GetPredicateConstraintLaneCount(vform, pattern);
10032   int multiplier = instr->ExtractBits(19, 16) + 1;
10033 
10034   unsigned width = kXRegSize;
10035   bool is_signed = false;
10036 
10037   switch (instr->Mask(SVESaturatingIncDecRegisterByElementCountMask)) {
10038     case SQDECB_r_rs_sx:
10039     case SQDECD_r_rs_sx:
10040     case SQDECH_r_rs_sx:
10041     case SQDECW_r_rs_sx:
10042       width = kWRegSize;
10043       VIXL_FALLTHROUGH();
10044     case SQDECB_r_rs_x:
10045     case SQDECD_r_rs_x:
10046     case SQDECH_r_rs_x:
10047     case SQDECW_r_rs_x:
10048       is_signed = true;
10049       count = -count;
10050       break;
10051     case SQINCB_r_rs_sx:
10052     case SQINCD_r_rs_sx:
10053     case SQINCH_r_rs_sx:
10054     case SQINCW_r_rs_sx:
10055       width = kWRegSize;
10056       VIXL_FALLTHROUGH();
10057     case SQINCB_r_rs_x:
10058     case SQINCD_r_rs_x:
10059     case SQINCH_r_rs_x:
10060     case SQINCW_r_rs_x:
10061       is_signed = true;
10062       break;
10063     case UQDECB_r_rs_uw:
10064     case UQDECD_r_rs_uw:
10065     case UQDECH_r_rs_uw:
10066     case UQDECW_r_rs_uw:
10067       width = kWRegSize;
10068       VIXL_FALLTHROUGH();
10069     case UQDECB_r_rs_x:
10070     case UQDECD_r_rs_x:
10071     case UQDECH_r_rs_x:
10072     case UQDECW_r_rs_x:
10073       count = -count;
10074       break;
10075     case UQINCB_r_rs_uw:
10076     case UQINCD_r_rs_uw:
10077     case UQINCH_r_rs_uw:
10078     case UQINCW_r_rs_uw:
10079       width = kWRegSize;
10080       VIXL_FALLTHROUGH();
10081     case UQINCB_r_rs_x:
10082     case UQINCD_r_rs_x:
10083     case UQINCH_r_rs_x:
10084     case UQINCW_r_rs_x:
10085       // Nothing to do.
10086       break;
10087     default:
10088       VIXL_UNIMPLEMENTED();
10089       break;
10090   }
10091 
10092   WriteXRegister(instr->GetRd(),
10093                  IncDecN(ReadXRegister(instr->GetRd()),
10094                          count * multiplier,
10095                          width,
10096                          true,
10097                          is_signed));
10098 }
10099 
10100 void Simulator::VisitSVESaturatingIncDecVectorByElementCount(
10101     const Instruction* instr) {
10102   VectorFormat vform = instr->GetSVEVectorFormat();
10103   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
10104     VIXL_UNIMPLEMENTED();
10105   }
10106 
10107   int pattern = instr->GetImmSVEPredicateConstraint();
10108   int count = GetPredicateConstraintLaneCount(vform, pattern);
10109   int multiplier = instr->ExtractBits(19, 16) + 1;
10110 
10111   SimVRegister& zd = ReadVRegister(instr->GetRd());
10112   SimVRegister scratch;
10113   dup_immediate(vform,
10114                 scratch,
10115                 IncDecN(0,
10116                         count * multiplier,
10117                         LaneSizeInBitsFromFormat(vform)));
10118 
10119   switch (instr->Mask(SVESaturatingIncDecVectorByElementCountMask)) {
10120     case SQDECD_z_zs:
10121     case SQDECH_z_zs:
10122     case SQDECW_z_zs:
10123       sub(vform, zd, zd, scratch).SignedSaturate(vform);
10124       break;
10125     case SQINCD_z_zs:
10126     case SQINCH_z_zs:
10127     case SQINCW_z_zs:
10128       add(vform, zd, zd, scratch).SignedSaturate(vform);
10129       break;
10130     case UQDECD_z_zs:
10131     case UQDECH_z_zs:
10132     case UQDECW_z_zs:
10133       sub(vform, zd, zd, scratch).UnsignedSaturate(vform);
10134       break;
10135     case UQINCD_z_zs:
10136     case UQINCH_z_zs:
10137     case UQINCW_z_zs:
10138       add(vform, zd, zd, scratch).UnsignedSaturate(vform);
10139       break;
10140     default:
10141       VIXL_UNIMPLEMENTED();
10142       break;
10143   }
10144 }
10145 
10146 void Simulator::VisitSVEElementCount(const Instruction* instr) {
10147   switch (instr->Mask(SVEElementCountMask)) {
10148     case CNTB_r_s:
10149     case CNTD_r_s:
10150     case CNTH_r_s:
10151     case CNTW_r_s:
10152       // All handled below.
10153       break;
10154     default:
10155       VIXL_UNIMPLEMENTED();
10156       break;
10157   }
10158 
10159   // Although the instructions are separated, the lane size is encoded in the
10160   // same way as most other SVE instructions.
10161   VectorFormat vform = instr->GetSVEVectorFormat();
10162 
10163   int pattern = instr->GetImmSVEPredicateConstraint();
10164   int count = GetPredicateConstraintLaneCount(vform, pattern);
10165   int multiplier = instr->ExtractBits(19, 16) + 1;
10166   WriteXRegister(instr->GetRd(), count * multiplier);
10167 }
10168 
10169 void Simulator::VisitSVEFPAccumulatingReduction(const Instruction* instr) {
10170   VectorFormat vform = instr->GetSVEVectorFormat();
10171   SimVRegister& vdn = ReadVRegister(instr->GetRd());
10172   SimVRegister& zm = ReadVRegister(instr->GetRn());
10173   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10174 
10175   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10176 
10177   switch (instr->Mask(SVEFPAccumulatingReductionMask)) {
10178     case FADDA_v_p_z:
10179       fadda(vform, vdn, pg, zm);
10180       break;
10181     default:
10182       VIXL_UNIMPLEMENTED();
10183       break;
10184   }
10185 }
10186 
10187 void Simulator::VisitSVEFPArithmetic_Predicated(const Instruction* instr) {
10188   VectorFormat vform = instr->GetSVEVectorFormat();
10189   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10190   SimVRegister& zm = ReadVRegister(instr->GetRn());
10191   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10192 
10193   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10194 
10195   SimVRegister result;
10196   switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) {
10197     case FABD_z_p_zz:
10198       fabd(vform, result, zdn, zm);
10199       break;
10200     case FADD_z_p_zz:
10201       fadd(vform, result, zdn, zm);
10202       break;
10203     case FDIVR_z_p_zz:
10204       fdiv(vform, result, zm, zdn);
10205       break;
10206     case FDIV_z_p_zz:
10207       fdiv(vform, result, zdn, zm);
10208       break;
10209     case FMAXNM_z_p_zz:
10210       fmaxnm(vform, result, zdn, zm);
10211       break;
10212     case FMAX_z_p_zz:
10213       fmax(vform, result, zdn, zm);
10214       break;
10215     case FMINNM_z_p_zz:
10216       fminnm(vform, result, zdn, zm);
10217       break;
10218     case FMIN_z_p_zz:
10219       fmin(vform, result, zdn, zm);
10220       break;
10221     case FMULX_z_p_zz:
10222       fmulx(vform, result, zdn, zm);
10223       break;
10224     case FMUL_z_p_zz:
10225       fmul(vform, result, zdn, zm);
10226       break;
10227     case FSCALE_z_p_zz:
10228       fscale(vform, result, zdn, zm);
10229       break;
10230     case FSUBR_z_p_zz:
10231       fsub(vform, result, zm, zdn);
10232       break;
10233     case FSUB_z_p_zz:
10234       fsub(vform, result, zdn, zm);
10235       break;
10236     default:
10237       VIXL_UNIMPLEMENTED();
10238       break;
10239   }
10240   mov_merging(vform, zdn, pg, result);
10241 }
10242 
10243 void Simulator::VisitSVEFPArithmeticWithImm_Predicated(
10244     const Instruction* instr) {
10245   VectorFormat vform = instr->GetSVEVectorFormat();
10246   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
10247     VIXL_UNIMPLEMENTED();
10248   }
10249 
10250   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10251   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10252   SimVRegister result;
10253 
10254   int i1 = instr->ExtractBit(5);
10255   SimVRegister add_sub_imm, min_max_imm, mul_imm;
10256   uint64_t half = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 0.5);
10257   uint64_t one = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 1.0);
10258   uint64_t two = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 2.0);
10259   dup_immediate(vform, add_sub_imm, i1 ? one : half);
10260   dup_immediate(vform, min_max_imm, i1 ? one : 0);
10261   dup_immediate(vform, mul_imm, i1 ? two : half);
10262 
10263   switch (instr->Mask(SVEFPArithmeticWithImm_PredicatedMask)) {
10264     case FADD_z_p_zs:
10265       fadd(vform, result, zdn, add_sub_imm);
10266       break;
10267     case FMAXNM_z_p_zs:
10268       fmaxnm(vform, result, zdn, min_max_imm);
10269       break;
10270     case FMAX_z_p_zs:
10271       fmax(vform, result, zdn, min_max_imm);
10272       break;
10273     case FMINNM_z_p_zs:
10274       fminnm(vform, result, zdn, min_max_imm);
10275       break;
10276     case FMIN_z_p_zs:
10277       fmin(vform, result, zdn, min_max_imm);
10278       break;
10279     case FMUL_z_p_zs:
10280       fmul(vform, result, zdn, mul_imm);
10281       break;
10282     case FSUBR_z_p_zs:
10283       fsub(vform, result, add_sub_imm, zdn);
10284       break;
10285     case FSUB_z_p_zs:
10286       fsub(vform, result, zdn, add_sub_imm);
10287       break;
10288     default:
10289       VIXL_UNIMPLEMENTED();
10290       break;
10291   }
10292   mov_merging(vform, zdn, pg, result);
10293 }
10294 
10295 void Simulator::VisitSVEFPTrigMulAddCoefficient(const Instruction* instr) {
10296   VectorFormat vform = instr->GetSVEVectorFormat();
10297   SimVRegister& zd = ReadVRegister(instr->GetRd());
10298   SimVRegister& zm = ReadVRegister(instr->GetRn());
10299 
10300   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10301 
10302   switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) {
10303     case FTMAD_z_zzi:
10304       ftmad(vform, zd, zd, zm, instr->ExtractBits(18, 16));
10305       break;
10306     default:
10307       VIXL_UNIMPLEMENTED();
10308       break;
10309   }
10310 }
10311 
10312 void Simulator::VisitSVEFPArithmeticUnpredicated(const Instruction* instr) {
10313   VectorFormat vform = instr->GetSVEVectorFormat();
10314   SimVRegister& zd = ReadVRegister(instr->GetRd());
10315   SimVRegister& zn = ReadVRegister(instr->GetRn());
10316   SimVRegister& zm = ReadVRegister(instr->GetRm());
10317 
10318   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10319 
10320   switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) {
10321     case FADD_z_zz:
10322       fadd(vform, zd, zn, zm);
10323       break;
10324     case FMUL_z_zz:
10325       fmul(vform, zd, zn, zm);
10326       break;
10327     case FRECPS_z_zz:
10328       frecps(vform, zd, zn, zm);
10329       break;
10330     case FRSQRTS_z_zz:
10331       frsqrts(vform, zd, zn, zm);
10332       break;
10333     case FSUB_z_zz:
10334       fsub(vform, zd, zn, zm);
10335       break;
10336     case FTSMUL_z_zz:
10337       ftsmul(vform, zd, zn, zm);
10338       break;
10339     default:
10340       VIXL_UNIMPLEMENTED();
10341       break;
10342   }
10343 }
10344 
10345 void Simulator::VisitSVEFPCompareVectors(const Instruction* instr) {
10346   SimPRegister& pd = ReadPRegister(instr->GetPd());
10347   SimVRegister& zn = ReadVRegister(instr->GetRn());
10348   SimVRegister& zm = ReadVRegister(instr->GetRm());
10349   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10350   VectorFormat vform = instr->GetSVEVectorFormat();
10351   SimVRegister result;
10352 
10353   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10354 
10355   switch (instr->Mask(SVEFPCompareVectorsMask)) {
10356     case FACGE_p_p_zz:
10357       fabscmp(vform, result, zn, zm, ge);
10358       break;
10359     case FACGT_p_p_zz:
10360       fabscmp(vform, result, zn, zm, gt);
10361       break;
10362     case FCMEQ_p_p_zz:
10363       fcmp(vform, result, zn, zm, eq);
10364       break;
10365     case FCMGE_p_p_zz:
10366       fcmp(vform, result, zn, zm, ge);
10367       break;
10368     case FCMGT_p_p_zz:
10369       fcmp(vform, result, zn, zm, gt);
10370       break;
10371     case FCMNE_p_p_zz:
10372       fcmp(vform, result, zn, zm, ne);
10373       break;
10374     case FCMUO_p_p_zz:
10375       fcmp(vform, result, zn, zm, uo);
10376       break;
10377     default:
10378       VIXL_UNIMPLEMENTED();
10379       break;
10380   }
10381 
10382   ExtractFromSimVRegister(vform, pd, result);
10383   mov_zeroing(pd, pg, pd);
10384 }
10385 
10386 void Simulator::VisitSVEFPCompareWithZero(const Instruction* instr) {
10387   SimPRegister& pd = ReadPRegister(instr->GetPd());
10388   SimVRegister& zn = ReadVRegister(instr->GetRn());
10389   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10390   VectorFormat vform = instr->GetSVEVectorFormat();
10391 
10392   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10393 
10394   SimVRegister result;
10395   SimVRegister zeros;
10396   dup_immediate(kFormatVnD, zeros, 0);
10397 
10398   switch (instr->Mask(SVEFPCompareWithZeroMask)) {
10399     case FCMEQ_p_p_z0:
10400       fcmp(vform, result, zn, zeros, eq);
10401       break;
10402     case FCMGE_p_p_z0:
10403       fcmp(vform, result, zn, zeros, ge);
10404       break;
10405     case FCMGT_p_p_z0:
10406       fcmp(vform, result, zn, zeros, gt);
10407       break;
10408     case FCMLE_p_p_z0:
10409       fcmp(vform, result, zn, zeros, le);
10410       break;
10411     case FCMLT_p_p_z0:
10412       fcmp(vform, result, zn, zeros, lt);
10413       break;
10414     case FCMNE_p_p_z0:
10415       fcmp(vform, result, zn, zeros, ne);
10416       break;
10417     default:
10418       VIXL_UNIMPLEMENTED();
10419       break;
10420   }
10421 
10422   ExtractFromSimVRegister(vform, pd, result);
10423   mov_zeroing(pd, pg, pd);
10424 }
10425 
10426 void Simulator::VisitSVEFPComplexAddition(const Instruction* instr) {
10427   VectorFormat vform = instr->GetSVEVectorFormat();
10428 
10429   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
10430     VIXL_UNIMPLEMENTED();
10431   }
10432 
10433   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10434   SimVRegister& zm = ReadVRegister(instr->GetRn());
10435   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10436   int rot = instr->ExtractBit(16);
10437 
10438   SimVRegister result;
10439 
10440   switch (instr->Mask(SVEFPComplexAdditionMask)) {
10441     case FCADD_z_p_zz:
10442       fcadd(vform, result, zdn, zm, rot);
10443       break;
10444     default:
10445       VIXL_UNIMPLEMENTED();
10446       break;
10447   }
10448   mov_merging(vform, zdn, pg, result);
10449 }
10450 
10451 void Simulator::VisitSVEFPComplexMulAdd(const Instruction* instr) {
10452   VectorFormat vform = instr->GetSVEVectorFormat();
10453 
10454   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
10455     VIXL_UNIMPLEMENTED();
10456   }
10457 
10458   SimVRegister& zda = ReadVRegister(instr->GetRd());
10459   SimVRegister& zn = ReadVRegister(instr->GetRn());
10460   SimVRegister& zm = ReadVRegister(instr->GetRm());
10461   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10462   int rot = instr->ExtractBits(14, 13);
10463 
10464   SimVRegister result;
10465 
10466   switch (instr->Mask(SVEFPComplexMulAddMask)) {
10467     case FCMLA_z_p_zzz:
10468       fcmla(vform, result, zn, zm, zda, rot);
10469       break;
10470     default:
10471       VIXL_UNIMPLEMENTED();
10472       break;
10473   }
10474   mov_merging(vform, zda, pg, result);
10475 }
10476 
10477 void Simulator::VisitSVEFPComplexMulAddIndex(const Instruction* instr) {
10478   SimVRegister& zda = ReadVRegister(instr->GetRd());
10479   SimVRegister& zn = ReadVRegister(instr->GetRn());
10480   int rot = instr->ExtractBits(11, 10);
10481   unsigned zm_code = instr->GetRm();
10482   int index = -1;
10483   VectorFormat vform, vform_dup;
10484 
10485   switch (instr->Mask(SVEFPComplexMulAddIndexMask)) {
10486     case FCMLA_z_zzzi_h:
10487       vform = kFormatVnH;
10488       vform_dup = kFormatVnS;
10489       index = zm_code >> 3;
10490       zm_code &= 0x7;
10491       break;
10492     case FCMLA_z_zzzi_s:
10493       vform = kFormatVnS;
10494       vform_dup = kFormatVnD;
10495       index = zm_code >> 4;
10496       zm_code &= 0xf;
10497       break;
10498     default:
10499       VIXL_UNIMPLEMENTED();
10500       break;
10501   }
10502 
10503   if (index >= 0) {
10504     SimVRegister temp;
10505     dup_elements_to_segments(vform_dup, temp, ReadVRegister(zm_code), index);
10506     fcmla(vform, zda, zn, temp, zda, rot);
10507   }
10508 }
10509 
10510 typedef LogicVRegister (Simulator::*FastReduceFn)(VectorFormat vform,
10511                                                   LogicVRegister dst,
10512                                                   const LogicVRegister& src);
10513 
10514 void Simulator::VisitSVEFPFastReduction(const Instruction* instr) {
10515   VectorFormat vform = instr->GetSVEVectorFormat();
10516   SimVRegister& vd = ReadVRegister(instr->GetRd());
10517   SimVRegister& zn = ReadVRegister(instr->GetRn());
10518   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10519   int lane_size = LaneSizeInBitsFromFormat(vform);
10520 
10521   uint64_t inactive_value = 0;
10522   FastReduceFn fn = nullptr;
10523 
10524   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10525 
10526   switch (instr->Mask(SVEFPFastReductionMask)) {
10527     case FADDV_v_p_z:
10528       fn = &Simulator::faddv;
10529       break;
10530     case FMAXNMV_v_p_z:
10531       inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
10532       fn = &Simulator::fmaxnmv;
10533       break;
10534     case FMAXV_v_p_z:
10535       inactive_value = FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
10536       fn = &Simulator::fmaxv;
10537       break;
10538     case FMINNMV_v_p_z:
10539       inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
10540       fn = &Simulator::fminnmv;
10541       break;
10542     case FMINV_v_p_z:
10543       inactive_value = FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
10544       fn = &Simulator::fminv;
10545       break;
10546     default:
10547       VIXL_UNIMPLEMENTED();
10548       break;
10549   }
10550 
10551   SimVRegister scratch;
10552   dup_immediate(vform, scratch, inactive_value);
10553   mov_merging(vform, scratch, pg, zn);
10554   if (fn != nullptr) (this->*fn)(vform, vd, scratch);
10555 }
10556 
10557 void Simulator::VisitSVEFPMulIndex(const Instruction* instr) {
10558   VectorFormat vform = kFormatUndefined;
10559 
10560   switch (instr->Mask(SVEFPMulIndexMask)) {
10561     case FMUL_z_zzi_d:
10562       vform = kFormatVnD;
10563       break;
10564     case FMUL_z_zzi_h_i3h:
10565     case FMUL_z_zzi_h:
10566       vform = kFormatVnH;
10567       break;
10568     case FMUL_z_zzi_s:
10569       vform = kFormatVnS;
10570       break;
10571     default:
10572       VIXL_UNIMPLEMENTED();
10573       break;
10574   }
10575 
10576   SimVRegister& zd = ReadVRegister(instr->GetRd());
10577   SimVRegister& zn = ReadVRegister(instr->GetRn());
10578   SimVRegister temp;
10579 
10580   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
10581   fmul(vform, zd, zn, temp);
10582 }
10583 
10584 void Simulator::VisitSVEFPMulAdd(const Instruction* instr) {
10585   VectorFormat vform = instr->GetSVEVectorFormat();
10586   SimVRegister& zd = ReadVRegister(instr->GetRd());
10587   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10588   SimVRegister result;
10589 
10590   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10591 
10592   if (instr->ExtractBit(15) == 0) {
10593     // Floating-point multiply-accumulate writing addend.
10594     SimVRegister& zm = ReadVRegister(instr->GetRm());
10595     SimVRegister& zn = ReadVRegister(instr->GetRn());
10596 
10597     switch (instr->Mask(SVEFPMulAddMask)) {
10598       // zda = zda + zn * zm
10599       case FMLA_z_p_zzz:
10600         fmla(vform, result, zd, zn, zm);
10601         break;
10602       // zda = -zda + -zn * zm
10603       case FNMLA_z_p_zzz:
10604         fneg(vform, result, zd);
10605         fmls(vform, result, result, zn, zm);
10606         break;
10607       // zda = zda + -zn * zm
10608       case FMLS_z_p_zzz:
10609         fmls(vform, result, zd, zn, zm);
10610         break;
10611       // zda = -zda + zn * zm
10612       case FNMLS_z_p_zzz:
10613         fneg(vform, result, zd);
10614         fmla(vform, result, result, zn, zm);
10615         break;
10616       default:
10617         VIXL_UNIMPLEMENTED();
10618         break;
10619     }
10620   } else {
10621     // Floating-point multiply-accumulate writing multiplicand.
10622     SimVRegister& za = ReadVRegister(instr->GetRm());
10623     SimVRegister& zm = ReadVRegister(instr->GetRn());
10624 
10625     switch (instr->Mask(SVEFPMulAddMask)) {
10626       // zdn = za + zdn * zm
10627       case FMAD_z_p_zzz:
10628         fmla(vform, result, za, zd, zm);
10629         break;
10630       // zdn = -za + -zdn * zm
10631       case FNMAD_z_p_zzz:
10632         fneg(vform, result, za);
10633         fmls(vform, result, result, zd, zm);
10634         break;
10635       // zdn = za + -zdn * zm
10636       case FMSB_z_p_zzz:
10637         fmls(vform, result, za, zd, zm);
10638         break;
10639       // zdn = -za + zdn * zm
10640       case FNMSB_z_p_zzz:
10641         fneg(vform, result, za);
10642         fmla(vform, result, result, zd, zm);
10643         break;
10644       default:
10645         VIXL_UNIMPLEMENTED();
10646         break;
10647     }
10648   }
10649 
10650   mov_merging(vform, zd, pg, result);
10651 }
10652 
10653 void Simulator::VisitSVEFPMulAddIndex(const Instruction* instr) {
10654   VectorFormat vform = kFormatUndefined;
10655 
10656   switch (instr->Mask(SVEFPMulAddIndexMask)) {
10657     case FMLA_z_zzzi_d:
10658     case FMLS_z_zzzi_d:
10659       vform = kFormatVnD;
10660       break;
10661     case FMLA_z_zzzi_s:
10662     case FMLS_z_zzzi_s:
10663       vform = kFormatVnS;
10664       break;
10665     case FMLA_z_zzzi_h:
10666     case FMLS_z_zzzi_h:
10667     case FMLA_z_zzzi_h_i3h:
10668     case FMLS_z_zzzi_h_i3h:
10669       vform = kFormatVnH;
10670       break;
10671     default:
10672       VIXL_UNIMPLEMENTED();
10673       break;
10674   }
10675 
10676   SimVRegister& zd = ReadVRegister(instr->GetRd());
10677   SimVRegister& zn = ReadVRegister(instr->GetRn());
10678   SimVRegister temp;
10679 
10680   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
10681   if (instr->ExtractBit(10) == 1) {
10682     fmls(vform, zd, zd, zn, temp);
10683   } else {
10684     fmla(vform, zd, zd, zn, temp);
10685   }
10686 }
10687 
10688 void Simulator::VisitSVEFPConvertToInt(const Instruction* instr) {
10689   SimVRegister& zd = ReadVRegister(instr->GetRd());
10690   SimVRegister& zn = ReadVRegister(instr->GetRn());
10691   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10692   int dst_data_size;
10693   int src_data_size;
10694 
10695   switch (instr->Mask(SVEFPConvertToIntMask)) {
10696     case FCVTZS_z_p_z_d2w:
10697     case FCVTZU_z_p_z_d2w:
10698       dst_data_size = kSRegSize;
10699       src_data_size = kDRegSize;
10700       break;
10701     case FCVTZS_z_p_z_d2x:
10702     case FCVTZU_z_p_z_d2x:
10703       dst_data_size = kDRegSize;
10704       src_data_size = kDRegSize;
10705       break;
10706     case FCVTZS_z_p_z_fp162h:
10707     case FCVTZU_z_p_z_fp162h:
10708       dst_data_size = kHRegSize;
10709       src_data_size = kHRegSize;
10710       break;
10711     case FCVTZS_z_p_z_fp162w:
10712     case FCVTZU_z_p_z_fp162w:
10713       dst_data_size = kSRegSize;
10714       src_data_size = kHRegSize;
10715       break;
10716     case FCVTZS_z_p_z_fp162x:
10717     case FCVTZU_z_p_z_fp162x:
10718       dst_data_size = kDRegSize;
10719       src_data_size = kHRegSize;
10720       break;
10721     case FCVTZS_z_p_z_s2w:
10722     case FCVTZU_z_p_z_s2w:
10723       dst_data_size = kSRegSize;
10724       src_data_size = kSRegSize;
10725       break;
10726     case FCVTZS_z_p_z_s2x:
10727     case FCVTZU_z_p_z_s2x:
10728       dst_data_size = kDRegSize;
10729       src_data_size = kSRegSize;
10730       break;
10731     default:
10732       VIXL_UNIMPLEMENTED();
10733       dst_data_size = 0;
10734       src_data_size = 0;
10735       break;
10736   }
10737 
10738   VectorFormat vform =
10739       SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
10740 
10741   if (instr->ExtractBit(16) == 0) {
10742     fcvts(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero);
10743   } else {
10744     fcvtu(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero);
10745   }
10746 }
10747 
10748 void Simulator::VisitSVEFPConvertPrecision(const Instruction* instr) {
10749   SimVRegister& zd = ReadVRegister(instr->GetRd());
10750   SimVRegister& zn = ReadVRegister(instr->GetRn());
10751   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10752   VectorFormat dst_data_size = kFormatUndefined;
10753   VectorFormat src_data_size = kFormatUndefined;
10754 
10755   switch (instr->Mask(SVEFPConvertPrecisionMask)) {
10756     case FCVT_z_p_z_d2h:
10757       dst_data_size = kFormatVnH;
10758       src_data_size = kFormatVnD;
10759       break;
10760     case FCVT_z_p_z_d2s:
10761       dst_data_size = kFormatVnS;
10762       src_data_size = kFormatVnD;
10763       break;
10764     case FCVT_z_p_z_h2d:
10765       dst_data_size = kFormatVnD;
10766       src_data_size = kFormatVnH;
10767       break;
10768     case FCVT_z_p_z_h2s:
10769       dst_data_size = kFormatVnS;
10770       src_data_size = kFormatVnH;
10771       break;
10772     case FCVT_z_p_z_s2d:
10773       dst_data_size = kFormatVnD;
10774       src_data_size = kFormatVnS;
10775       break;
10776     case FCVT_z_p_z_s2h:
10777       dst_data_size = kFormatVnH;
10778       src_data_size = kFormatVnS;
10779       break;
10780     default:
10781       VIXL_UNIMPLEMENTED();
10782       break;
10783   }
10784 
10785   fcvt(dst_data_size, src_data_size, zd, pg, zn);
10786 }
10787 
10788 void Simulator::VisitSVEFPUnaryOp(const Instruction* instr) {
10789   SimVRegister& zd = ReadVRegister(instr->GetRd());
10790   SimVRegister& zn = ReadVRegister(instr->GetRn());
10791   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10792   VectorFormat vform = instr->GetSVEVectorFormat();
10793   SimVRegister result;
10794 
10795   switch (instr->Mask(SVEFPUnaryOpMask)) {
10796     case FRECPX_z_p_z:
10797       frecpx(vform, result, zn);
10798       break;
10799     case FSQRT_z_p_z:
10800       fsqrt(vform, result, zn);
10801       break;
10802     default:
10803       VIXL_UNIMPLEMENTED();
10804       break;
10805   }
10806   mov_merging(vform, zd, pg, result);
10807 }
10808 
10809 void Simulator::VisitSVEFPRoundToIntegralValue(const Instruction* instr) {
10810   SimVRegister& zd = ReadVRegister(instr->GetRd());
10811   SimVRegister& zn = ReadVRegister(instr->GetRn());
10812   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10813   VectorFormat vform = instr->GetSVEVectorFormat();
10814   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
10815   bool exact_exception = false;
10816 
10817   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10818 
10819   switch (instr->Mask(SVEFPRoundToIntegralValueMask)) {
10820     case FRINTA_z_p_z:
10821       fpcr_rounding = FPTieAway;
10822       break;
10823     case FRINTI_z_p_z:
10824       break;  // Use FPCR rounding mode.
10825     case FRINTM_z_p_z:
10826       fpcr_rounding = FPNegativeInfinity;
10827       break;
10828     case FRINTN_z_p_z:
10829       fpcr_rounding = FPTieEven;
10830       break;
10831     case FRINTP_z_p_z:
10832       fpcr_rounding = FPPositiveInfinity;
10833       break;
10834     case FRINTX_z_p_z:
10835       exact_exception = true;
10836       break;
10837     case FRINTZ_z_p_z:
10838       fpcr_rounding = FPZero;
10839       break;
10840     default:
10841       VIXL_UNIMPLEMENTED();
10842       break;
10843   }
10844 
10845   SimVRegister result;
10846   frint(vform, result, zn, fpcr_rounding, exact_exception, kFrintToInteger);
10847   mov_merging(vform, zd, pg, result);
10848 }
10849 
10850 void Simulator::VisitSVEIntConvertToFP(const Instruction* instr) {
10851   SimVRegister& zd = ReadVRegister(instr->GetRd());
10852   SimVRegister& zn = ReadVRegister(instr->GetRn());
10853   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10854   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
10855   int dst_data_size;
10856   int src_data_size;
10857 
10858   switch (instr->Mask(SVEIntConvertToFPMask)) {
10859     case SCVTF_z_p_z_h2fp16:
10860     case UCVTF_z_p_z_h2fp16:
10861       dst_data_size = kHRegSize;
10862       src_data_size = kHRegSize;
10863       break;
10864     case SCVTF_z_p_z_w2d:
10865     case UCVTF_z_p_z_w2d:
10866       dst_data_size = kDRegSize;
10867       src_data_size = kSRegSize;
10868       break;
10869     case SCVTF_z_p_z_w2fp16:
10870     case UCVTF_z_p_z_w2fp16:
10871       dst_data_size = kHRegSize;
10872       src_data_size = kSRegSize;
10873       break;
10874     case SCVTF_z_p_z_w2s:
10875     case UCVTF_z_p_z_w2s:
10876       dst_data_size = kSRegSize;
10877       src_data_size = kSRegSize;
10878       break;
10879     case SCVTF_z_p_z_x2d:
10880     case UCVTF_z_p_z_x2d:
10881       dst_data_size = kDRegSize;
10882       src_data_size = kDRegSize;
10883       break;
10884     case SCVTF_z_p_z_x2fp16:
10885     case UCVTF_z_p_z_x2fp16:
10886       dst_data_size = kHRegSize;
10887       src_data_size = kDRegSize;
10888       break;
10889     case SCVTF_z_p_z_x2s:
10890     case UCVTF_z_p_z_x2s:
10891       dst_data_size = kSRegSize;
10892       src_data_size = kDRegSize;
10893       break;
10894     default:
10895       VIXL_UNIMPLEMENTED();
10896       dst_data_size = 0;
10897       src_data_size = 0;
10898       break;
10899   }
10900 
10901   VectorFormat vform =
10902       SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
10903 
10904   if (instr->ExtractBit(16) == 0) {
10905     scvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding);
10906   } else {
10907     ucvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding);
10908   }
10909 }
10910 
10911 void Simulator::VisitSVEFPUnaryOpUnpredicated(const Instruction* instr) {
10912   VectorFormat vform = instr->GetSVEVectorFormat();
10913   SimVRegister& zd = ReadVRegister(instr->GetRd());
10914   SimVRegister& zn = ReadVRegister(instr->GetRn());
10915   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
10916 
10917   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10918 
10919   switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) {
10920     case FRECPE_z_z:
10921       frecpe(vform, zd, zn, fpcr_rounding);
10922       break;
10923     case FRSQRTE_z_z:
10924       frsqrte(vform, zd, zn);
10925       break;
10926     default:
10927       VIXL_UNIMPLEMENTED();
10928       break;
10929   }
10930 }
10931 
10932 void Simulator::VisitSVEIncDecByPredicateCount(const Instruction* instr) {
10933   VectorFormat vform = instr->GetSVEVectorFormat();
10934   SimPRegister& pg = ReadPRegister(instr->ExtractBits(8, 5));
10935 
10936   int count = CountActiveLanes(vform, pg);
10937 
10938   if (instr->ExtractBit(11) == 0) {
10939     SimVRegister& zdn = ReadVRegister(instr->GetRd());
10940     switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
10941       case DECP_z_p_z:
10942         sub_uint(vform, zdn, zdn, count);
10943         break;
10944       case INCP_z_p_z:
10945         add_uint(vform, zdn, zdn, count);
10946         break;
10947       case SQDECP_z_p_z:
10948         sub_uint(vform, zdn, zdn, count).SignedSaturate(vform);
10949         break;
10950       case SQINCP_z_p_z:
10951         add_uint(vform, zdn, zdn, count).SignedSaturate(vform);
10952         break;
10953       case UQDECP_z_p_z:
10954         sub_uint(vform, zdn, zdn, count).UnsignedSaturate(vform);
10955         break;
10956       case UQINCP_z_p_z:
10957         add_uint(vform, zdn, zdn, count).UnsignedSaturate(vform);
10958         break;
10959       default:
10960         VIXL_UNIMPLEMENTED();
10961         break;
10962     }
10963   } else {
10964     bool is_saturating = (instr->ExtractBit(18) == 0);
10965     bool decrement =
10966         is_saturating ? instr->ExtractBit(17) : instr->ExtractBit(16);
10967     bool is_signed = (instr->ExtractBit(16) == 0);
10968     bool sf = is_saturating ? (instr->ExtractBit(10) != 0) : true;
10969     unsigned width = sf ? kXRegSize : kWRegSize;
10970 
10971     switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
10972       case DECP_r_p_r:
10973       case INCP_r_p_r:
10974       case SQDECP_r_p_r_sx:
10975       case SQDECP_r_p_r_x:
10976       case SQINCP_r_p_r_sx:
10977       case SQINCP_r_p_r_x:
10978       case UQDECP_r_p_r_uw:
10979       case UQDECP_r_p_r_x:
10980       case UQINCP_r_p_r_uw:
10981       case UQINCP_r_p_r_x:
10982         WriteXRegister(instr->GetRd(),
10983                        IncDecN(ReadXRegister(instr->GetRd()),
10984                                decrement ? -count : count,
10985                                width,
10986                                is_saturating,
10987                                is_signed));
10988         break;
10989       default:
10990         VIXL_UNIMPLEMENTED();
10991         break;
10992     }
10993   }
10994 }
10995 
10996 uint64_t Simulator::IncDecN(uint64_t acc,
10997                             int64_t delta,
10998                             unsigned n,
10999                             bool is_saturating,
11000                             bool is_signed) {
11001   VIXL_ASSERT(n <= 64);
11002   VIXL_ASSERT(IsIntN(n, delta));
11003 
11004   uint64_t sign_mask = UINT64_C(1) << (n - 1);
11005   uint64_t mask = GetUintMask(n);
11006 
11007   acc &= mask;  // Ignore initial accumulator high bits.
11008   uint64_t result = (acc + delta) & mask;
11009 
11010   bool result_negative = ((result & sign_mask) != 0);
11011 
11012   if (is_saturating) {
11013     if (is_signed) {
11014       bool acc_negative = ((acc & sign_mask) != 0);
11015       bool delta_negative = delta < 0;
11016 
11017       // If the signs of the operands are the same, but different from the
11018       // result, there was an overflow.
11019       if ((acc_negative == delta_negative) &&
11020           (acc_negative != result_negative)) {
11021         if (result_negative) {
11022           // Saturate to [..., INT<n>_MAX].
11023           result_negative = false;
11024           result = mask & ~sign_mask;  // E.g. 0x000000007fffffff
11025         } else {
11026           // Saturate to [INT<n>_MIN, ...].
11027           result_negative = true;
11028           result = ~mask | sign_mask;  // E.g. 0xffffffff80000000
11029         }
11030       }
11031     } else {
11032       if ((delta < 0) && (result > acc)) {
11033         // Saturate to [0, ...].
11034         result = 0;
11035       } else if ((delta > 0) && (result < acc)) {
11036         // Saturate to [..., UINT<n>_MAX].
11037         result = mask;
11038       }
11039     }
11040   }
11041 
11042   // Sign-extend if necessary.
11043   if (result_negative && is_signed) result |= ~mask;
11044 
11045   return result;
11046 }
11047 
11048 void Simulator::VisitSVEIndexGeneration(const Instruction* instr) {
11049   VectorFormat vform = instr->GetSVEVectorFormat();
11050   SimVRegister& zd = ReadVRegister(instr->GetRd());
11051   switch (instr->Mask(SVEIndexGenerationMask)) {
11052     case INDEX_z_ii:
11053     case INDEX_z_ir:
11054     case INDEX_z_ri:
11055     case INDEX_z_rr: {
11056       uint64_t start = instr->ExtractBit(10) ? ReadXRegister(instr->GetRn())
11057                                              : instr->ExtractSignedBits(9, 5);
11058       uint64_t step = instr->ExtractBit(11) ? ReadXRegister(instr->GetRm())
11059                                             : instr->ExtractSignedBits(20, 16);
11060       index(vform, zd, start, step);
11061       break;
11062     }
11063     default:
11064       VIXL_UNIMPLEMENTED();
11065       break;
11066   }
11067 }
11068 
11069 void Simulator::VisitSVEIntArithmeticUnpredicated(const Instruction* instr) {
11070   VectorFormat vform = instr->GetSVEVectorFormat();
11071   SimVRegister& zd = ReadVRegister(instr->GetRd());
11072   SimVRegister& zn = ReadVRegister(instr->GetRn());
11073   SimVRegister& zm = ReadVRegister(instr->GetRm());
11074   switch (instr->Mask(SVEIntArithmeticUnpredicatedMask)) {
11075     case ADD_z_zz:
11076       add(vform, zd, zn, zm);
11077       break;
11078     case SQADD_z_zz:
11079       add(vform, zd, zn, zm).SignedSaturate(vform);
11080       break;
11081     case SQSUB_z_zz:
11082       sub(vform, zd, zn, zm).SignedSaturate(vform);
11083       break;
11084     case SUB_z_zz:
11085       sub(vform, zd, zn, zm);
11086       break;
11087     case UQADD_z_zz:
11088       add(vform, zd, zn, zm).UnsignedSaturate(vform);
11089       break;
11090     case UQSUB_z_zz:
11091       sub(vform, zd, zn, zm).UnsignedSaturate(vform);
11092       break;
11093     default:
11094       VIXL_UNIMPLEMENTED();
11095       break;
11096   }
11097 }
11098 
11099 void Simulator::VisitSVEIntAddSubtractVectors_Predicated(
11100     const Instruction* instr) {
11101   VectorFormat vform = instr->GetSVEVectorFormat();
11102   SimVRegister& zdn = ReadVRegister(instr->GetRd());
11103   SimVRegister& zm = ReadVRegister(instr->GetRn());
11104   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11105   SimVRegister result;
11106 
11107   switch (instr->Mask(SVEIntAddSubtractVectors_PredicatedMask)) {
11108     case ADD_z_p_zz:
11109       add(vform, result, zdn, zm);
11110       break;
11111     case SUBR_z_p_zz:
11112       sub(vform, result, zm, zdn);
11113       break;
11114     case SUB_z_p_zz:
11115       sub(vform, result, zdn, zm);
11116       break;
11117     default:
11118       VIXL_UNIMPLEMENTED();
11119       break;
11120   }
11121   mov_merging(vform, zdn, pg, result);
11122 }
11123 
11124 void Simulator::VisitSVEBitwiseLogical_Predicated(const Instruction* instr) {
11125   VectorFormat vform = instr->GetSVEVectorFormat();
11126   SimVRegister& zdn = ReadVRegister(instr->GetRd());
11127   SimVRegister& zm = ReadVRegister(instr->GetRn());
11128   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11129   SimVRegister result;
11130 
11131   switch (instr->Mask(SVEBitwiseLogical_PredicatedMask)) {
11132     case AND_z_p_zz:
11133       SVEBitwiseLogicalUnpredicatedHelper(AND, vform, result, zdn, zm);
11134       break;
11135     case BIC_z_p_zz:
11136       SVEBitwiseLogicalUnpredicatedHelper(BIC, vform, result, zdn, zm);
11137       break;
11138     case EOR_z_p_zz:
11139       SVEBitwiseLogicalUnpredicatedHelper(EOR, vform, result, zdn, zm);
11140       break;
11141     case ORR_z_p_zz:
11142       SVEBitwiseLogicalUnpredicatedHelper(ORR, vform, result, zdn, zm);
11143       break;
11144     default:
11145       VIXL_UNIMPLEMENTED();
11146       break;
11147   }
11148   mov_merging(vform, zdn, pg, result);
11149 }
11150 
11151 void Simulator::VisitSVEIntMulVectors_Predicated(const Instruction* instr) {
11152   VectorFormat vform = instr->GetSVEVectorFormat();
11153   SimVRegister& zdn = ReadVRegister(instr->GetRd());
11154   SimVRegister& zm = ReadVRegister(instr->GetRn());
11155   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11156   SimVRegister result;
11157 
11158   switch (instr->Mask(SVEIntMulVectors_PredicatedMask)) {
11159     case MUL_z_p_zz:
11160       mul(vform, result, zdn, zm);
11161       break;
11162     case SMULH_z_p_zz:
11163       smulh(vform, result, zdn, zm);
11164       break;
11165     case UMULH_z_p_zz:
11166       umulh(vform, result, zdn, zm);
11167       break;
11168     default:
11169       VIXL_UNIMPLEMENTED();
11170       break;
11171   }
11172   mov_merging(vform, zdn, pg, result);
11173 }
11174 
11175 void Simulator::VisitSVEIntMinMaxDifference_Predicated(
11176     const Instruction* instr) {
11177   VectorFormat vform = instr->GetSVEVectorFormat();
11178   SimVRegister& zdn = ReadVRegister(instr->GetRd());
11179   SimVRegister& zm = ReadVRegister(instr->GetRn());
11180   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11181   SimVRegister result;
11182 
11183   switch (instr->Mask(SVEIntMinMaxDifference_PredicatedMask)) {
11184     case SABD_z_p_zz:
11185       absdiff(vform, result, zdn, zm, true);
11186       break;
11187     case SMAX_z_p_zz:
11188       smax(vform, result, zdn, zm);
11189       break;
11190     case SMIN_z_p_zz:
11191       smin(vform, result, zdn, zm);
11192       break;
11193     case UABD_z_p_zz:
11194       absdiff(vform, result, zdn, zm, false);
11195       break;
11196     case UMAX_z_p_zz:
11197       umax(vform, result, zdn, zm);
11198       break;
11199     case UMIN_z_p_zz:
11200       umin(vform, result, zdn, zm);
11201       break;
11202     default:
11203       VIXL_UNIMPLEMENTED();
11204       break;
11205   }
11206   mov_merging(vform, zdn, pg, result);
11207 }
11208 
11209 void Simulator::VisitSVEIntMulImm_Unpredicated(const Instruction* instr) {
11210   VectorFormat vform = instr->GetSVEVectorFormat();
11211   SimVRegister& zd = ReadVRegister(instr->GetRd());
11212   SimVRegister scratch;
11213 
11214   switch (instr->Mask(SVEIntMulImm_UnpredicatedMask)) {
11215     case MUL_z_zi:
11216       dup_immediate(vform, scratch, instr->GetImmSVEIntWideSigned());
11217       mul(vform, zd, zd, scratch);
11218       break;
11219     default:
11220       VIXL_UNIMPLEMENTED();
11221       break;
11222   }
11223 }
11224 
11225 void Simulator::VisitSVEIntDivideVectors_Predicated(const Instruction* instr) {
11226   VectorFormat vform = instr->GetSVEVectorFormat();
11227   SimVRegister& zdn = ReadVRegister(instr->GetRd());
11228   SimVRegister& zm = ReadVRegister(instr->GetRn());
11229   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11230   SimVRegister result;
11231 
11232   VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
11233 
11234   switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) {
11235     case SDIVR_z_p_zz:
11236       sdiv(vform, result, zm, zdn);
11237       break;
11238     case SDIV_z_p_zz:
11239       sdiv(vform, result, zdn, zm);
11240       break;
11241     case UDIVR_z_p_zz:
11242       udiv(vform, result, zm, zdn);
11243       break;
11244     case UDIV_z_p_zz:
11245       udiv(vform, result, zdn, zm);
11246       break;
11247     default:
11248       VIXL_UNIMPLEMENTED();
11249       break;
11250   }
11251   mov_merging(vform, zdn, pg, result);
11252 }
11253 
11254 void Simulator::VisitSVEIntMinMaxImm_Unpredicated(const Instruction* instr) {
11255   VectorFormat vform = instr->GetSVEVectorFormat();
11256   SimVRegister& zd = ReadVRegister(instr->GetRd());
11257   SimVRegister scratch;
11258 
11259   uint64_t unsigned_imm = instr->GetImmSVEIntWideUnsigned();
11260   int64_t signed_imm = instr->GetImmSVEIntWideSigned();
11261 
11262   switch (instr->Mask(SVEIntMinMaxImm_UnpredicatedMask)) {
11263     case SMAX_z_zi:
11264       dup_immediate(vform, scratch, signed_imm);
11265       smax(vform, zd, zd, scratch);
11266       break;
11267     case SMIN_z_zi:
11268       dup_immediate(vform, scratch, signed_imm);
11269       smin(vform, zd, zd, scratch);
11270       break;
11271     case UMAX_z_zi:
11272       dup_immediate(vform, scratch, unsigned_imm);
11273       umax(vform, zd, zd, scratch);
11274       break;
11275     case UMIN_z_zi:
11276       dup_immediate(vform, scratch, unsigned_imm);
11277       umin(vform, zd, zd, scratch);
11278       break;
11279     default:
11280       VIXL_UNIMPLEMENTED();
11281       break;
11282   }
11283 }
11284 
11285 void Simulator::VisitSVEIntCompareScalarCountAndLimit(
11286     const Instruction* instr) {
11287   unsigned rn_code = instr->GetRn();
11288   unsigned rm_code = instr->GetRm();
11289   SimPRegister& pd = ReadPRegister(instr->GetPd());
11290   VectorFormat vform = instr->GetSVEVectorFormat();
11291 
11292   bool is_64_bit = instr->ExtractBit(12) == 1;
11293   int rsize = is_64_bit ? kXRegSize : kWRegSize;
11294   uint64_t mask = is_64_bit ? kXRegMask : kWRegMask;
11295 
11296   uint64_t usrc1 = ReadXRegister(rn_code);
11297   int64_t ssrc2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code);
11298   uint64_t usrc2 = ssrc2 & mask;
11299 
11300   bool reverse = (form_hash_ == "whilege_p_p_rr"_h) ||
11301                  (form_hash_ == "whilegt_p_p_rr"_h) ||
11302                  (form_hash_ == "whilehi_p_p_rr"_h) ||
11303                  (form_hash_ == "whilehs_p_p_rr"_h);
11304 
11305   int lane_count = LaneCountFromFormat(vform);
11306   bool last = true;
11307   for (int i = 0; i < lane_count; i++) {
11308     usrc1 &= mask;
11309     int64_t ssrc1 = ExtractSignedBitfield64(rsize - 1, 0, usrc1);
11310 
11311     bool cond = false;
11312     switch (form_hash_) {
11313       case "whilele_p_p_rr"_h:
11314         cond = ssrc1 <= ssrc2;
11315         break;
11316       case "whilelo_p_p_rr"_h:
11317         cond = usrc1 < usrc2;
11318         break;
11319       case "whilels_p_p_rr"_h:
11320         cond = usrc1 <= usrc2;
11321         break;
11322       case "whilelt_p_p_rr"_h:
11323         cond = ssrc1 < ssrc2;
11324         break;
11325       case "whilege_p_p_rr"_h:
11326         cond = ssrc1 >= ssrc2;
11327         break;
11328       case "whilegt_p_p_rr"_h:
11329         cond = ssrc1 > ssrc2;
11330         break;
11331       case "whilehi_p_p_rr"_h:
11332         cond = usrc1 > usrc2;
11333         break;
11334       case "whilehs_p_p_rr"_h:
11335         cond = usrc1 >= usrc2;
11336         break;
11337       default:
11338         VIXL_UNIMPLEMENTED();
11339         break;
11340     }
11341     last = last && cond;
11342     LogicPRegister dst(pd);
11343     int lane = reverse ? ((lane_count - 1) - i) : i;
11344     dst.SetActive(vform, lane, last);
11345     usrc1 += reverse ? -1 : 1;
11346   }
11347 
11348   PredTest(vform, GetPTrue(), pd);
11349   LogSystemRegister(NZCV);
11350 }
11351 
11352 void Simulator::VisitSVEConditionallyTerminateScalars(
11353     const Instruction* instr) {
11354   unsigned rn_code = instr->GetRn();
11355   unsigned rm_code = instr->GetRm();
11356   bool is_64_bit = instr->ExtractBit(22) == 1;
11357   uint64_t src1 = is_64_bit ? ReadXRegister(rn_code) : ReadWRegister(rn_code);
11358   uint64_t src2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code);
11359   bool term = false;
11360   switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) {
11361     case CTERMEQ_rr:
11362       term = src1 == src2;
11363       break;
11364     case CTERMNE_rr:
11365       term = src1 != src2;
11366       break;
11367     default:
11368       VIXL_UNIMPLEMENTED();
11369       break;
11370   }
11371   ReadNzcv().SetN(term ? 1 : 0);
11372   ReadNzcv().SetV(term ? 0 : !ReadC());
11373   LogSystemRegister(NZCV);
11374 }
11375 
11376 void Simulator::VisitSVEIntCompareSignedImm(const Instruction* instr) {
11377   bool commute_inputs = false;
11378   Condition cond = al;
11379   switch (instr->Mask(SVEIntCompareSignedImmMask)) {
11380     case CMPEQ_p_p_zi:
11381       cond = eq;
11382       break;
11383     case CMPGE_p_p_zi:
11384       cond = ge;
11385       break;
11386     case CMPGT_p_p_zi:
11387       cond = gt;
11388       break;
11389     case CMPLE_p_p_zi:
11390       cond = ge;
11391       commute_inputs = true;
11392       break;
11393     case CMPLT_p_p_zi:
11394       cond = gt;
11395       commute_inputs = true;
11396       break;
11397     case CMPNE_p_p_zi:
11398       cond = ne;
11399       break;
11400     default:
11401       VIXL_UNIMPLEMENTED();
11402       break;
11403   }
11404 
11405   VectorFormat vform = instr->GetSVEVectorFormat();
11406   SimVRegister src2;
11407   dup_immediate(vform,
11408                 src2,
11409                 ExtractSignedBitfield64(4, 0, instr->ExtractBits(20, 16)));
11410   SVEIntCompareVectorsHelper(cond,
11411                              vform,
11412                              ReadPRegister(instr->GetPd()),
11413                              ReadPRegister(instr->GetPgLow8()),
11414                              commute_inputs ? src2
11415                                             : ReadVRegister(instr->GetRn()),
11416                              commute_inputs ? ReadVRegister(instr->GetRn())
11417                                             : src2);
11418 }
11419 
11420 void Simulator::VisitSVEIntCompareUnsignedImm(const Instruction* instr) {
11421   bool commute_inputs = false;
11422   Condition cond = al;
11423   switch (instr->Mask(SVEIntCompareUnsignedImmMask)) {
11424     case CMPHI_p_p_zi:
11425       cond = hi;
11426       break;
11427     case CMPHS_p_p_zi:
11428       cond = hs;
11429       break;
11430     case CMPLO_p_p_zi:
11431       cond = hi;
11432       commute_inputs = true;
11433       break;
11434     case CMPLS_p_p_zi:
11435       cond = hs;
11436       commute_inputs = true;
11437       break;
11438     default:
11439       VIXL_UNIMPLEMENTED();
11440       break;
11441   }
11442 
11443   VectorFormat vform = instr->GetSVEVectorFormat();
11444   SimVRegister src2;
11445   dup_immediate(vform, src2, instr->ExtractBits(20, 14));
11446   SVEIntCompareVectorsHelper(cond,
11447                              vform,
11448                              ReadPRegister(instr->GetPd()),
11449                              ReadPRegister(instr->GetPgLow8()),
11450                              commute_inputs ? src2
11451                                             : ReadVRegister(instr->GetRn()),
11452                              commute_inputs ? ReadVRegister(instr->GetRn())
11453                                             : src2);
11454 }
11455 
11456 void Simulator::VisitSVEIntCompareVectors(const Instruction* instr) {
11457   Instr op = instr->Mask(SVEIntCompareVectorsMask);
11458   bool is_wide_elements = false;
11459   switch (op) {
11460     case CMPEQ_p_p_zw:
11461     case CMPGE_p_p_zw:
11462     case CMPGT_p_p_zw:
11463     case CMPHI_p_p_zw:
11464     case CMPHS_p_p_zw:
11465     case CMPLE_p_p_zw:
11466     case CMPLO_p_p_zw:
11467     case CMPLS_p_p_zw:
11468     case CMPLT_p_p_zw:
11469     case CMPNE_p_p_zw:
11470       is_wide_elements = true;
11471       break;
11472   }
11473 
11474   Condition cond;
11475   switch (op) {
11476     case CMPEQ_p_p_zw:
11477     case CMPEQ_p_p_zz:
11478       cond = eq;
11479       break;
11480     case CMPGE_p_p_zw:
11481     case CMPGE_p_p_zz:
11482       cond = ge;
11483       break;
11484     case CMPGT_p_p_zw:
11485     case CMPGT_p_p_zz:
11486       cond = gt;
11487       break;
11488     case CMPHI_p_p_zw:
11489     case CMPHI_p_p_zz:
11490       cond = hi;
11491       break;
11492     case CMPHS_p_p_zw:
11493     case CMPHS_p_p_zz:
11494       cond = hs;
11495       break;
11496     case CMPNE_p_p_zw:
11497     case CMPNE_p_p_zz:
11498       cond = ne;
11499       break;
11500     case CMPLE_p_p_zw:
11501       cond = le;
11502       break;
11503     case CMPLO_p_p_zw:
11504       cond = lo;
11505       break;
11506     case CMPLS_p_p_zw:
11507       cond = ls;
11508       break;
11509     case CMPLT_p_p_zw:
11510       cond = lt;
11511       break;
11512     default:
11513       VIXL_UNIMPLEMENTED();
11514       cond = al;
11515       break;
11516   }
11517 
11518   SVEIntCompareVectorsHelper(cond,
11519                              instr->GetSVEVectorFormat(),
11520                              ReadPRegister(instr->GetPd()),
11521                              ReadPRegister(instr->GetPgLow8()),
11522                              ReadVRegister(instr->GetRn()),
11523                              ReadVRegister(instr->GetRm()),
11524                              is_wide_elements);
11525 }
11526 
11527 void Simulator::VisitSVEFPExponentialAccelerator(const Instruction* instr) {
11528   VectorFormat vform = instr->GetSVEVectorFormat();
11529   SimVRegister& zd = ReadVRegister(instr->GetRd());
11530   SimVRegister& zn = ReadVRegister(instr->GetRn());
11531 
11532   VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) ||
11533               (vform == kFormatVnD));
11534 
11535   switch (instr->Mask(SVEFPExponentialAcceleratorMask)) {
11536     case FEXPA_z_z:
11537       fexpa(vform, zd, zn);
11538       break;
11539     default:
11540       VIXL_UNIMPLEMENTED();
11541       break;
11542   }
11543 }
11544 
11545 void Simulator::VisitSVEFPTrigSelectCoefficient(const Instruction* instr) {
11546   VectorFormat vform = instr->GetSVEVectorFormat();
11547   SimVRegister& zd = ReadVRegister(instr->GetRd());
11548   SimVRegister& zn = ReadVRegister(instr->GetRn());
11549   SimVRegister& zm = ReadVRegister(instr->GetRm());
11550 
11551   VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) ||
11552               (vform == kFormatVnD));
11553 
11554   switch (instr->Mask(SVEFPTrigSelectCoefficientMask)) {
11555     case FTSSEL_z_zz:
11556       ftssel(vform, zd, zn, zm);
11557       break;
11558     default:
11559       VIXL_UNIMPLEMENTED();
11560       break;
11561   }
11562 }
11563 
11564 void Simulator::VisitSVEConstructivePrefix_Unpredicated(
11565     const Instruction* instr) {
11566   SimVRegister& zd = ReadVRegister(instr->GetRd());
11567   SimVRegister& zn = ReadVRegister(instr->GetRn());
11568 
11569   switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) {
11570     case MOVPRFX_z_z:
11571       mov(kFormatVnD, zd, zn);  // The lane size is arbitrary.
11572       break;
11573     default:
11574       VIXL_UNIMPLEMENTED();
11575       break;
11576   }
11577 }
11578 
11579 void Simulator::VisitSVEIntMulAddPredicated(const Instruction* instr) {
11580   VectorFormat vform = instr->GetSVEVectorFormat();
11581 
11582   SimVRegister& zd = ReadVRegister(instr->GetRd());
11583   SimVRegister& zm = ReadVRegister(instr->GetRm());
11584 
11585   SimVRegister result;
11586   switch (instr->Mask(SVEIntMulAddPredicatedMask)) {
11587     case MLA_z_p_zzz:
11588       mla(vform, result, zd, ReadVRegister(instr->GetRn()), zm);
11589       break;
11590     case MLS_z_p_zzz:
11591       mls(vform, result, zd, ReadVRegister(instr->GetRn()), zm);
11592       break;
11593     case MAD_z_p_zzz:
11594       // 'za' is encoded in 'Rn'.
11595       mla(vform, result, ReadVRegister(instr->GetRn()), zd, zm);
11596       break;
11597     case MSB_z_p_zzz: {
11598       // 'za' is encoded in 'Rn'.
11599       mls(vform, result, ReadVRegister(instr->GetRn()), zd, zm);
11600       break;
11601     }
11602     default:
11603       VIXL_UNIMPLEMENTED();
11604       break;
11605   }
11606   mov_merging(vform, zd, ReadPRegister(instr->GetPgLow8()), result);
11607 }
11608 
11609 void Simulator::VisitSVEIntMulAddUnpredicated(const Instruction* instr) {
11610   VectorFormat vform = instr->GetSVEVectorFormat();
11611   SimVRegister& zda = ReadVRegister(instr->GetRd());
11612   SimVRegister& zn = ReadVRegister(instr->GetRn());
11613   SimVRegister& zm = ReadVRegister(instr->GetRm());
11614 
11615   switch (form_hash_) {
11616     case "sdot_z_zzz"_h:
11617       sdot(vform, zda, zn, zm);
11618       break;
11619     case "udot_z_zzz"_h:
11620       udot(vform, zda, zn, zm);
11621       break;
11622     case "usdot_z_zzz_s"_h:
11623       usdot(vform, zda, zn, zm);
11624       break;
11625     default:
11626       VIXL_UNIMPLEMENTED();
11627       break;
11628   }
11629 }
11630 
11631 void Simulator::VisitSVEMovprfx(const Instruction* instr) {
11632   VectorFormat vform = instr->GetSVEVectorFormat();
11633   SimVRegister& zn = ReadVRegister(instr->GetRn());
11634   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11635   SimVRegister& zd = ReadVRegister(instr->GetRd());
11636 
11637   switch (instr->Mask(SVEMovprfxMask)) {
11638     case MOVPRFX_z_p_z:
11639       if (instr->ExtractBit(16)) {
11640         mov_merging(vform, zd, pg, zn);
11641       } else {
11642         mov_zeroing(vform, zd, pg, zn);
11643       }
11644       break;
11645     default:
11646       VIXL_UNIMPLEMENTED();
11647       break;
11648   }
11649 }
11650 
11651 void Simulator::VisitSVEIntReduction(const Instruction* instr) {
11652   VectorFormat vform = instr->GetSVEVectorFormat();
11653   SimVRegister& vd = ReadVRegister(instr->GetRd());
11654   SimVRegister& zn = ReadVRegister(instr->GetRn());
11655   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11656 
11657   if (instr->Mask(SVEIntReductionLogicalFMask) == SVEIntReductionLogicalFixed) {
11658     switch (instr->Mask(SVEIntReductionLogicalMask)) {
11659       case ANDV_r_p_z:
11660         andv(vform, vd, pg, zn);
11661         break;
11662       case EORV_r_p_z:
11663         eorv(vform, vd, pg, zn);
11664         break;
11665       case ORV_r_p_z:
11666         orv(vform, vd, pg, zn);
11667         break;
11668       default:
11669         VIXL_UNIMPLEMENTED();
11670         break;
11671     }
11672   } else {
11673     switch (instr->Mask(SVEIntReductionMask)) {
11674       case SADDV_r_p_z:
11675         saddv(vform, vd, pg, zn);
11676         break;
11677       case SMAXV_r_p_z:
11678         smaxv(vform, vd, pg, zn);
11679         break;
11680       case SMINV_r_p_z:
11681         sminv(vform, vd, pg, zn);
11682         break;
11683       case UADDV_r_p_z:
11684         uaddv(vform, vd, pg, zn);
11685         break;
11686       case UMAXV_r_p_z:
11687         umaxv(vform, vd, pg, zn);
11688         break;
11689       case UMINV_r_p_z:
11690         uminv(vform, vd, pg, zn);
11691         break;
11692       default:
11693         VIXL_UNIMPLEMENTED();
11694         break;
11695     }
11696   }
11697 }
11698 
11699 void Simulator::VisitSVEIntUnaryArithmeticPredicated(const Instruction* instr) {
11700   VectorFormat vform = instr->GetSVEVectorFormat();
11701   SimVRegister& zn = ReadVRegister(instr->GetRn());
11702 
11703   SimVRegister result;
11704   switch (instr->Mask(SVEIntUnaryArithmeticPredicatedMask)) {
11705     case ABS_z_p_z:
11706       abs(vform, result, zn);
11707       break;
11708     case CLS_z_p_z:
11709       cls(vform, result, zn);
11710       break;
11711     case CLZ_z_p_z:
11712       clz(vform, result, zn);
11713       break;
11714     case CNOT_z_p_z:
11715       cnot(vform, result, zn);
11716       break;
11717     case CNT_z_p_z:
11718       cnt(vform, result, zn);
11719       break;
11720     case FABS_z_p_z:
11721       fabs_(vform, result, zn);
11722       break;
11723     case FNEG_z_p_z:
11724       fneg(vform, result, zn);
11725       break;
11726     case NEG_z_p_z:
11727       neg(vform, result, zn);
11728       break;
11729     case NOT_z_p_z:
11730       not_(vform, result, zn);
11731       break;
11732     case SXTB_z_p_z:
11733     case SXTH_z_p_z:
11734     case SXTW_z_p_z:
11735       sxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17)));
11736       break;
11737     case UXTB_z_p_z:
11738     case UXTH_z_p_z:
11739     case UXTW_z_p_z:
11740       uxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17)));
11741       break;
11742     default:
11743       VIXL_UNIMPLEMENTED();
11744       break;
11745   }
11746 
11747   SimVRegister& zd = ReadVRegister(instr->GetRd());
11748   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11749   mov_merging(vform, zd, pg, result);
11750 }
11751 
11752 void Simulator::VisitSVECopyFPImm_Predicated(const Instruction* instr) {
11753   // There is only one instruction in this group.
11754   VIXL_ASSERT(instr->Mask(SVECopyFPImm_PredicatedMask) == FCPY_z_p_i);
11755 
11756   VectorFormat vform = instr->GetSVEVectorFormat();
11757   SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16));
11758   SimVRegister& zd = ReadVRegister(instr->GetRd());
11759 
11760   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
11761 
11762   SimVRegister result;
11763   switch (instr->Mask(SVECopyFPImm_PredicatedMask)) {
11764     case FCPY_z_p_i: {
11765       int imm8 = instr->ExtractBits(12, 5);
11766       uint64_t value = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform),
11767                                            Instruction::Imm8ToFP64(imm8));
11768       dup_immediate(vform, result, value);
11769       break;
11770     }
11771     default:
11772       VIXL_UNIMPLEMENTED();
11773       break;
11774   }
11775   mov_merging(vform, zd, pg, result);
11776 }
11777 
11778 void Simulator::VisitSVEIntAddSubtractImm_Unpredicated(
11779     const Instruction* instr) {
11780   VectorFormat vform = instr->GetSVEVectorFormat();
11781   SimVRegister& zd = ReadVRegister(instr->GetRd());
11782   SimVRegister scratch;
11783 
11784   uint64_t imm = instr->GetImmSVEIntWideUnsigned();
11785   imm <<= instr->ExtractBit(13) * 8;
11786 
11787   switch (instr->Mask(SVEIntAddSubtractImm_UnpredicatedMask)) {
11788     case ADD_z_zi:
11789       add_uint(vform, zd, zd, imm);
11790       break;
11791     case SQADD_z_zi:
11792       add_uint(vform, zd, zd, imm).SignedSaturate(vform);
11793       break;
11794     case SQSUB_z_zi:
11795       sub_uint(vform, zd, zd, imm).SignedSaturate(vform);
11796       break;
11797     case SUBR_z_zi:
11798       dup_immediate(vform, scratch, imm);
11799       sub(vform, zd, scratch, zd);
11800       break;
11801     case SUB_z_zi:
11802       sub_uint(vform, zd, zd, imm);
11803       break;
11804     case UQADD_z_zi:
11805       add_uint(vform, zd, zd, imm).UnsignedSaturate(vform);
11806       break;
11807     case UQSUB_z_zi:
11808       sub_uint(vform, zd, zd, imm).UnsignedSaturate(vform);
11809       break;
11810     default:
11811       break;
11812   }
11813 }
11814 
11815 void Simulator::VisitSVEBroadcastIntImm_Unpredicated(const Instruction* instr) {
11816   SimVRegister& zd = ReadVRegister(instr->GetRd());
11817 
11818   VectorFormat format = instr->GetSVEVectorFormat();
11819   int64_t imm = instr->GetImmSVEIntWideSigned();
11820   int shift = instr->ExtractBit(13) * 8;
11821   imm *= 1 << shift;
11822 
11823   switch (instr->Mask(SVEBroadcastIntImm_UnpredicatedMask)) {
11824     case DUP_z_i:
11825       // The encoding of byte-sized lanes with lsl #8 is undefined.
11826       if ((format == kFormatVnB) && (shift == 8)) {
11827         VIXL_UNIMPLEMENTED();
11828       } else {
11829         dup_immediate(format, zd, imm);
11830       }
11831       break;
11832     default:
11833       VIXL_UNIMPLEMENTED();
11834       break;
11835   }
11836 }
11837 
11838 void Simulator::VisitSVEBroadcastFPImm_Unpredicated(const Instruction* instr) {
11839   VectorFormat vform = instr->GetSVEVectorFormat();
11840   SimVRegister& zd = ReadVRegister(instr->GetRd());
11841 
11842   switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) {
11843     case FDUP_z_i:
11844       switch (vform) {
11845         case kFormatVnH:
11846           dup_immediate(vform, zd, Float16ToRawbits(instr->GetSVEImmFP16()));
11847           break;
11848         case kFormatVnS:
11849           dup_immediate(vform, zd, FloatToRawbits(instr->GetSVEImmFP32()));
11850           break;
11851         case kFormatVnD:
11852           dup_immediate(vform, zd, DoubleToRawbits(instr->GetSVEImmFP64()));
11853           break;
11854         default:
11855           VIXL_UNIMPLEMENTED();
11856       }
11857       break;
11858     default:
11859       VIXL_UNIMPLEMENTED();
11860       break;
11861   }
11862 }
11863 
11864 void Simulator::VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets(
11865     const Instruction* instr) {
11866   switch (instr->Mask(
11867       SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask)) {
11868     case LD1H_z_p_bz_s_x32_scaled:
11869     case LD1SH_z_p_bz_s_x32_scaled:
11870     case LDFF1H_z_p_bz_s_x32_scaled:
11871     case LDFF1SH_z_p_bz_s_x32_scaled:
11872       break;
11873     default:
11874       VIXL_UNIMPLEMENTED();
11875       break;
11876   }
11877 
11878   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
11879   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
11880 }
11881 
11882 void Simulator::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets(
11883     const Instruction* instr) {
11884   switch (instr->Mask(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask)) {
11885     case LD1B_z_p_bz_s_x32_unscaled:
11886     case LD1H_z_p_bz_s_x32_unscaled:
11887     case LD1SB_z_p_bz_s_x32_unscaled:
11888     case LD1SH_z_p_bz_s_x32_unscaled:
11889     case LD1W_z_p_bz_s_x32_unscaled:
11890     case LDFF1B_z_p_bz_s_x32_unscaled:
11891     case LDFF1H_z_p_bz_s_x32_unscaled:
11892     case LDFF1SB_z_p_bz_s_x32_unscaled:
11893     case LDFF1SH_z_p_bz_s_x32_unscaled:
11894     case LDFF1W_z_p_bz_s_x32_unscaled:
11895       break;
11896     default:
11897       VIXL_UNIMPLEMENTED();
11898       break;
11899   }
11900 
11901   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
11902   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
11903 }
11904 
11905 void Simulator::VisitSVE32BitGatherLoad_VectorPlusImm(
11906     const Instruction* instr) {
11907   switch (instr->Mask(SVE32BitGatherLoad_VectorPlusImmMask)) {
11908     case LD1B_z_p_ai_s:
11909       VIXL_UNIMPLEMENTED();
11910       break;
11911     case LD1H_z_p_ai_s:
11912       VIXL_UNIMPLEMENTED();
11913       break;
11914     case LD1SB_z_p_ai_s:
11915       VIXL_UNIMPLEMENTED();
11916       break;
11917     case LD1SH_z_p_ai_s:
11918       VIXL_UNIMPLEMENTED();
11919       break;
11920     case LD1W_z_p_ai_s:
11921       VIXL_UNIMPLEMENTED();
11922       break;
11923     case LDFF1B_z_p_ai_s:
11924       VIXL_UNIMPLEMENTED();
11925       break;
11926     case LDFF1H_z_p_ai_s:
11927       VIXL_UNIMPLEMENTED();
11928       break;
11929     case LDFF1SB_z_p_ai_s:
11930       VIXL_UNIMPLEMENTED();
11931       break;
11932     case LDFF1SH_z_p_ai_s:
11933       VIXL_UNIMPLEMENTED();
11934       break;
11935     case LDFF1W_z_p_ai_s:
11936       VIXL_UNIMPLEMENTED();
11937       break;
11938     default:
11939       VIXL_UNIMPLEMENTED();
11940       break;
11941   }
11942 }
11943 
11944 void Simulator::VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets(
11945     const Instruction* instr) {
11946   switch (
11947       instr->Mask(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask)) {
11948     case LD1W_z_p_bz_s_x32_scaled:
11949     case LDFF1W_z_p_bz_s_x32_scaled:
11950       break;
11951     default:
11952       VIXL_UNIMPLEMENTED();
11953       break;
11954   }
11955 
11956   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
11957   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
11958 }
11959 
11960 void Simulator::VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets(
11961     const Instruction* instr) {
11962   switch (
11963       instr->Mask(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask)) {
11964     // Ignore prefetch hint instructions.
11965     case PRFB_i_p_bz_s_x32_scaled:
11966     case PRFD_i_p_bz_s_x32_scaled:
11967     case PRFH_i_p_bz_s_x32_scaled:
11968     case PRFW_i_p_bz_s_x32_scaled:
11969       break;
11970     default:
11971       VIXL_UNIMPLEMENTED();
11972       break;
11973   }
11974 }
11975 
11976 void Simulator::VisitSVE32BitGatherPrefetch_VectorPlusImm(
11977     const Instruction* instr) {
11978   switch (instr->Mask(SVE32BitGatherPrefetch_VectorPlusImmMask)) {
11979     // Ignore prefetch hint instructions.
11980     case PRFB_i_p_ai_s:
11981     case PRFD_i_p_ai_s:
11982     case PRFH_i_p_ai_s:
11983     case PRFW_i_p_ai_s:
11984       break;
11985     default:
11986       VIXL_UNIMPLEMENTED();
11987       break;
11988   }
11989 }
11990 
11991 void Simulator::VisitSVEContiguousPrefetch_ScalarPlusImm(
11992     const Instruction* instr) {
11993   switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusImmMask)) {
11994     // Ignore prefetch hint instructions.
11995     case PRFB_i_p_bi_s:
11996     case PRFD_i_p_bi_s:
11997     case PRFH_i_p_bi_s:
11998     case PRFW_i_p_bi_s:
11999       break;
12000     default:
12001       VIXL_UNIMPLEMENTED();
12002       break;
12003   }
12004 }
12005 
12006 void Simulator::VisitSVEContiguousPrefetch_ScalarPlusScalar(
12007     const Instruction* instr) {
12008   switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusScalarMask)) {
12009     // Ignore prefetch hint instructions.
12010     case PRFB_i_p_br_s:
12011     case PRFD_i_p_br_s:
12012     case PRFH_i_p_br_s:
12013     case PRFW_i_p_br_s:
12014       if (instr->GetRm() == kZeroRegCode) {
12015         VIXL_UNIMPLEMENTED();
12016       }
12017       break;
12018     default:
12019       VIXL_UNIMPLEMENTED();
12020       break;
12021   }
12022 }
12023 
12024 void Simulator::VisitSVELoadAndBroadcastElement(const Instruction* instr) {
12025   bool is_signed;
12026   switch (instr->Mask(SVELoadAndBroadcastElementMask)) {
12027     case LD1RB_z_p_bi_u8:
12028     case LD1RB_z_p_bi_u16:
12029     case LD1RB_z_p_bi_u32:
12030     case LD1RB_z_p_bi_u64:
12031     case LD1RH_z_p_bi_u16:
12032     case LD1RH_z_p_bi_u32:
12033     case LD1RH_z_p_bi_u64:
12034     case LD1RW_z_p_bi_u32:
12035     case LD1RW_z_p_bi_u64:
12036     case LD1RD_z_p_bi_u64:
12037       is_signed = false;
12038       break;
12039     case LD1RSB_z_p_bi_s16:
12040     case LD1RSB_z_p_bi_s32:
12041     case LD1RSB_z_p_bi_s64:
12042     case LD1RSH_z_p_bi_s32:
12043     case LD1RSH_z_p_bi_s64:
12044     case LD1RSW_z_p_bi_s64:
12045       is_signed = true;
12046       break;
12047     default:
12048       // This encoding group is complete, so no other values should be possible.
12049       VIXL_UNREACHABLE();
12050       is_signed = false;
12051       break;
12052   }
12053 
12054   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
12055   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed, 13);
12056   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
12057   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
12058   uint64_t offset = instr->ExtractBits(21, 16) << msize_in_bytes_log2;
12059   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset;
12060   VectorFormat unpack_vform =
12061       SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
12062   SimVRegister temp;
12063   ld1r(vform, unpack_vform, temp, base, is_signed);
12064   mov_zeroing(vform,
12065               ReadVRegister(instr->GetRt()),
12066               ReadPRegister(instr->GetPgLow8()),
12067               temp);
12068 }
12069 
12070 void Simulator::VisitSVELoadPredicateRegister(const Instruction* instr) {
12071   switch (instr->Mask(SVELoadPredicateRegisterMask)) {
12072     case LDR_p_bi: {
12073       SimPRegister& pt = ReadPRegister(instr->GetPt());
12074       int pl = GetPredicateLengthInBytes();
12075       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
12076       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
12077       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12078       uint64_t address = base + multiplier * pl;
12079       for (int i = 0; i < pl; i++) {
12080         pt.Insert(i, MemRead<uint8_t>(address + i));
12081       }
12082       LogPRead(instr->GetPt(), address);
12083       break;
12084     }
12085     default:
12086       VIXL_UNIMPLEMENTED();
12087       break;
12088   }
12089 }
12090 
12091 void Simulator::VisitSVELoadVectorRegister(const Instruction* instr) {
12092   switch (instr->Mask(SVELoadVectorRegisterMask)) {
12093     case LDR_z_bi: {
12094       SimVRegister& zt = ReadVRegister(instr->GetRt());
12095       int vl = GetVectorLengthInBytes();
12096       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
12097       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
12098       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12099       uint64_t address = base + multiplier * vl;
12100       for (int i = 0; i < vl; i++) {
12101         zt.Insert(i, MemRead<uint8_t>(address + i));
12102       }
12103       LogZRead(instr->GetRt(), address);
12104       break;
12105     }
12106     default:
12107       VIXL_UNIMPLEMENTED();
12108       break;
12109   }
12110 }
12111 
12112 void Simulator::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets(
12113     const Instruction* instr) {
12114   switch (instr->Mask(
12115       SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) {
12116     case LD1D_z_p_bz_d_x32_scaled:
12117     case LD1H_z_p_bz_d_x32_scaled:
12118     case LD1SH_z_p_bz_d_x32_scaled:
12119     case LD1SW_z_p_bz_d_x32_scaled:
12120     case LD1W_z_p_bz_d_x32_scaled:
12121     case LDFF1H_z_p_bz_d_x32_scaled:
12122     case LDFF1W_z_p_bz_d_x32_scaled:
12123     case LDFF1D_z_p_bz_d_x32_scaled:
12124     case LDFF1SH_z_p_bz_d_x32_scaled:
12125     case LDFF1SW_z_p_bz_d_x32_scaled:
12126       break;
12127     default:
12128       VIXL_UNIMPLEMENTED();
12129       break;
12130   }
12131 
12132   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
12133   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod);
12134 }
12135 
12136 void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets(
12137     const Instruction* instr) {
12138   switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) {
12139     case LD1D_z_p_bz_d_64_scaled:
12140     case LD1H_z_p_bz_d_64_scaled:
12141     case LD1SH_z_p_bz_d_64_scaled:
12142     case LD1SW_z_p_bz_d_64_scaled:
12143     case LD1W_z_p_bz_d_64_scaled:
12144     case LDFF1H_z_p_bz_d_64_scaled:
12145     case LDFF1W_z_p_bz_d_64_scaled:
12146     case LDFF1D_z_p_bz_d_64_scaled:
12147     case LDFF1SH_z_p_bz_d_64_scaled:
12148     case LDFF1SW_z_p_bz_d_64_scaled:
12149       break;
12150     default:
12151       VIXL_UNIMPLEMENTED();
12152       break;
12153   }
12154 
12155   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, SVE_LSL);
12156 }
12157 
12158 void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets(
12159     const Instruction* instr) {
12160   switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) {
12161     case LD1B_z_p_bz_d_64_unscaled:
12162     case LD1D_z_p_bz_d_64_unscaled:
12163     case LD1H_z_p_bz_d_64_unscaled:
12164     case LD1SB_z_p_bz_d_64_unscaled:
12165     case LD1SH_z_p_bz_d_64_unscaled:
12166     case LD1SW_z_p_bz_d_64_unscaled:
12167     case LD1W_z_p_bz_d_64_unscaled:
12168     case LDFF1B_z_p_bz_d_64_unscaled:
12169     case LDFF1D_z_p_bz_d_64_unscaled:
12170     case LDFF1H_z_p_bz_d_64_unscaled:
12171     case LDFF1SB_z_p_bz_d_64_unscaled:
12172     case LDFF1SH_z_p_bz_d_64_unscaled:
12173     case LDFF1SW_z_p_bz_d_64_unscaled:
12174     case LDFF1W_z_p_bz_d_64_unscaled:
12175       break;
12176     default:
12177       VIXL_UNIMPLEMENTED();
12178       break;
12179   }
12180 
12181   SVEGatherLoadScalarPlusVectorHelper(instr,
12182                                       kFormatVnD,
12183                                       NO_SVE_OFFSET_MODIFIER);
12184 }
12185 
12186 void Simulator::VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets(
12187     const Instruction* instr) {
12188   switch (instr->Mask(
12189       SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
12190     case LD1B_z_p_bz_d_x32_unscaled:
12191     case LD1D_z_p_bz_d_x32_unscaled:
12192     case LD1H_z_p_bz_d_x32_unscaled:
12193     case LD1SB_z_p_bz_d_x32_unscaled:
12194     case LD1SH_z_p_bz_d_x32_unscaled:
12195     case LD1SW_z_p_bz_d_x32_unscaled:
12196     case LD1W_z_p_bz_d_x32_unscaled:
12197     case LDFF1B_z_p_bz_d_x32_unscaled:
12198     case LDFF1H_z_p_bz_d_x32_unscaled:
12199     case LDFF1W_z_p_bz_d_x32_unscaled:
12200     case LDFF1D_z_p_bz_d_x32_unscaled:
12201     case LDFF1SB_z_p_bz_d_x32_unscaled:
12202     case LDFF1SH_z_p_bz_d_x32_unscaled:
12203     case LDFF1SW_z_p_bz_d_x32_unscaled:
12204       break;
12205     default:
12206       VIXL_UNIMPLEMENTED();
12207       break;
12208   }
12209 
12210   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
12211   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod);
12212 }
12213 
12214 void Simulator::VisitSVE64BitGatherLoad_VectorPlusImm(
12215     const Instruction* instr) {
12216   switch (instr->Mask(SVE64BitGatherLoad_VectorPlusImmMask)) {
12217     case LD1B_z_p_ai_d:
12218     case LD1D_z_p_ai_d:
12219     case LD1H_z_p_ai_d:
12220     case LD1SB_z_p_ai_d:
12221     case LD1SH_z_p_ai_d:
12222     case LD1SW_z_p_ai_d:
12223     case LD1W_z_p_ai_d:
12224     case LDFF1B_z_p_ai_d:
12225     case LDFF1D_z_p_ai_d:
12226     case LDFF1H_z_p_ai_d:
12227     case LDFF1SB_z_p_ai_d:
12228     case LDFF1SH_z_p_ai_d:
12229     case LDFF1SW_z_p_ai_d:
12230     case LDFF1W_z_p_ai_d:
12231       break;
12232     default:
12233       VIXL_UNIMPLEMENTED();
12234       break;
12235   }
12236   bool is_signed = instr->ExtractBit(14) == 0;
12237   bool is_ff = instr->ExtractBit(13) == 1;
12238   // Note that these instructions don't use the Dtype encoding.
12239   int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
12240   uint64_t imm = instr->ExtractBits(20, 16) << msize_in_bytes_log2;
12241   LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD);
12242   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12243   if (is_ff) {
12244     VIXL_UNIMPLEMENTED();
12245   } else {
12246     SVEStructuredLoadHelper(kFormatVnD,
12247                             ReadPRegister(instr->GetPgLow8()),
12248                             instr->GetRt(),
12249                             addr,
12250                             is_signed);
12251   }
12252 }
12253 
12254 void Simulator::VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets(
12255     const Instruction* instr) {
12256   switch (
12257       instr->Mask(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask)) {
12258     // Ignore prefetch hint instructions.
12259     case PRFB_i_p_bz_d_64_scaled:
12260     case PRFD_i_p_bz_d_64_scaled:
12261     case PRFH_i_p_bz_d_64_scaled:
12262     case PRFW_i_p_bz_d_64_scaled:
12263       break;
12264     default:
12265       VIXL_UNIMPLEMENTED();
12266       break;
12267   }
12268 }
12269 
12270 void Simulator::
12271     VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets(
12272         const Instruction* instr) {
12273   switch (instr->Mask(
12274       SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
12275     // Ignore prefetch hint instructions.
12276     case PRFB_i_p_bz_d_x32_scaled:
12277     case PRFD_i_p_bz_d_x32_scaled:
12278     case PRFH_i_p_bz_d_x32_scaled:
12279     case PRFW_i_p_bz_d_x32_scaled:
12280       break;
12281     default:
12282       VIXL_UNIMPLEMENTED();
12283       break;
12284   }
12285 }
12286 
12287 void Simulator::VisitSVE64BitGatherPrefetch_VectorPlusImm(
12288     const Instruction* instr) {
12289   switch (instr->Mask(SVE64BitGatherPrefetch_VectorPlusImmMask)) {
12290     // Ignore prefetch hint instructions.
12291     case PRFB_i_p_ai_d:
12292     case PRFD_i_p_ai_d:
12293     case PRFH_i_p_ai_d:
12294     case PRFW_i_p_ai_d:
12295       break;
12296     default:
12297       VIXL_UNIMPLEMENTED();
12298       break;
12299   }
12300 }
12301 
12302 void Simulator::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar(
12303     const Instruction* instr) {
12304   bool is_signed;
12305   switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
12306     case LDFF1B_z_p_br_u8:
12307     case LDFF1B_z_p_br_u16:
12308     case LDFF1B_z_p_br_u32:
12309     case LDFF1B_z_p_br_u64:
12310     case LDFF1H_z_p_br_u16:
12311     case LDFF1H_z_p_br_u32:
12312     case LDFF1H_z_p_br_u64:
12313     case LDFF1W_z_p_br_u32:
12314     case LDFF1W_z_p_br_u64:
12315     case LDFF1D_z_p_br_u64:
12316       is_signed = false;
12317       break;
12318     case LDFF1SB_z_p_br_s16:
12319     case LDFF1SB_z_p_br_s32:
12320     case LDFF1SB_z_p_br_s64:
12321     case LDFF1SH_z_p_br_s32:
12322     case LDFF1SH_z_p_br_s64:
12323     case LDFF1SW_z_p_br_s64:
12324       is_signed = true;
12325       break;
12326     default:
12327       // This encoding group is complete, so no other values should be possible.
12328       VIXL_UNREACHABLE();
12329       is_signed = false;
12330       break;
12331   }
12332 
12333   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
12334   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
12335   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
12336   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
12337   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12338   uint64_t offset = ReadXRegister(instr->GetRm());
12339   offset <<= msize_in_bytes_log2;
12340   LogicSVEAddressVector addr(base + offset);
12341   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12342   SVEFaultTolerantLoadHelper(vform,
12343                              ReadPRegister(instr->GetPgLow8()),
12344                              instr->GetRt(),
12345                              addr,
12346                              kSVEFirstFaultLoad,
12347                              is_signed);
12348 }
12349 
12350 void Simulator::VisitSVEContiguousNonFaultLoad_ScalarPlusImm(
12351     const Instruction* instr) {
12352   bool is_signed = false;
12353   switch (instr->Mask(SVEContiguousNonFaultLoad_ScalarPlusImmMask)) {
12354     case LDNF1B_z_p_bi_u16:
12355     case LDNF1B_z_p_bi_u32:
12356     case LDNF1B_z_p_bi_u64:
12357     case LDNF1B_z_p_bi_u8:
12358     case LDNF1D_z_p_bi_u64:
12359     case LDNF1H_z_p_bi_u16:
12360     case LDNF1H_z_p_bi_u32:
12361     case LDNF1H_z_p_bi_u64:
12362     case LDNF1W_z_p_bi_u32:
12363     case LDNF1W_z_p_bi_u64:
12364       break;
12365     case LDNF1SB_z_p_bi_s16:
12366     case LDNF1SB_z_p_bi_s32:
12367     case LDNF1SB_z_p_bi_s64:
12368     case LDNF1SH_z_p_bi_s32:
12369     case LDNF1SH_z_p_bi_s64:
12370     case LDNF1SW_z_p_bi_s64:
12371       is_signed = true;
12372       break;
12373     default:
12374       VIXL_UNIMPLEMENTED();
12375       break;
12376   }
12377   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
12378   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
12379   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
12380   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
12381   int vl = GetVectorLengthInBytes();
12382   int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
12383   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12384   uint64_t offset =
12385       (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
12386   LogicSVEAddressVector addr(base + offset);
12387   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12388   SVEFaultTolerantLoadHelper(vform,
12389                              ReadPRegister(instr->GetPgLow8()),
12390                              instr->GetRt(),
12391                              addr,
12392                              kSVENonFaultLoad,
12393                              is_signed);
12394 }
12395 
12396 void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm(
12397     const Instruction* instr) {
12398   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12399   VectorFormat vform = kFormatUndefined;
12400 
12401   switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusImmMask)) {
12402     case LDNT1B_z_p_bi_contiguous:
12403       vform = kFormatVnB;
12404       break;
12405     case LDNT1D_z_p_bi_contiguous:
12406       vform = kFormatVnD;
12407       break;
12408     case LDNT1H_z_p_bi_contiguous:
12409       vform = kFormatVnH;
12410       break;
12411     case LDNT1W_z_p_bi_contiguous:
12412       vform = kFormatVnS;
12413       break;
12414     default:
12415       VIXL_UNIMPLEMENTED();
12416       break;
12417   }
12418   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
12419   int vl = GetVectorLengthInBytes();
12420   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12421   uint64_t offset = instr->ExtractSignedBits(19, 16) * vl;
12422   LogicSVEAddressVector addr(base + offset);
12423   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12424   SVEStructuredLoadHelper(vform,
12425                           pg,
12426                           instr->GetRt(),
12427                           addr,
12428                           /* is_signed = */ false);
12429 }
12430 
12431 void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar(
12432     const Instruction* instr) {
12433   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12434   VectorFormat vform = kFormatUndefined;
12435 
12436   switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusScalarMask)) {
12437     case LDNT1B_z_p_br_contiguous:
12438       vform = kFormatVnB;
12439       break;
12440     case LDNT1D_z_p_br_contiguous:
12441       vform = kFormatVnD;
12442       break;
12443     case LDNT1H_z_p_br_contiguous:
12444       vform = kFormatVnH;
12445       break;
12446     case LDNT1W_z_p_br_contiguous:
12447       vform = kFormatVnS;
12448       break;
12449     default:
12450       VIXL_UNIMPLEMENTED();
12451       break;
12452   }
12453   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
12454   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12455   uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2;
12456   LogicSVEAddressVector addr(base + offset);
12457   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12458   SVEStructuredLoadHelper(vform,
12459                           pg,
12460                           instr->GetRt(),
12461                           addr,
12462                           /* is_signed = */ false);
12463 }
12464 
12465 void Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm(
12466     const Instruction* instr) {
12467   SimVRegister& zt = ReadVRegister(instr->GetRt());
12468   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12469 
12470   uint64_t dwords = 2;
12471   VectorFormat vform_dst = kFormatVnQ;
12472   if ((form_hash_ == "ld1rob_z_p_bi_u8"_h) ||
12473       (form_hash_ == "ld1roh_z_p_bi_u16"_h) ||
12474       (form_hash_ == "ld1row_z_p_bi_u32"_h) ||
12475       (form_hash_ == "ld1rod_z_p_bi_u64"_h)) {
12476     dwords = 4;
12477     vform_dst = kFormatVnO;
12478   }
12479 
12480   uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12481   uint64_t offset =
12482       instr->ExtractSignedBits(19, 16) * dwords * kDRegSizeInBytes;
12483   int msz = instr->ExtractBits(24, 23);
12484   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
12485 
12486   for (unsigned i = 0; i < dwords; i++) {
12487     ld1(kFormatVnD, zt, i, addr + offset + (i * kDRegSizeInBytes));
12488   }
12489   mov_zeroing(vform, zt, pg, zt);
12490   dup_element(vform_dst, zt, zt, 0);
12491 }
12492 
12493 void Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar(
12494     const Instruction* instr) {
12495   SimVRegister& zt = ReadVRegister(instr->GetRt());
12496   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12497 
12498   uint64_t bytes = 16;
12499   VectorFormat vform_dst = kFormatVnQ;
12500   if ((form_hash_ == "ld1rob_z_p_br_contiguous"_h) ||
12501       (form_hash_ == "ld1roh_z_p_br_contiguous"_h) ||
12502       (form_hash_ == "ld1row_z_p_br_contiguous"_h) ||
12503       (form_hash_ == "ld1rod_z_p_br_contiguous"_h)) {
12504     bytes = 32;
12505     vform_dst = kFormatVnO;
12506   }
12507 
12508   uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12509   uint64_t offset = ReadXRegister(instr->GetRm());
12510   int msz = instr->ExtractBits(24, 23);
12511   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
12512   offset <<= msz;
12513   for (unsigned i = 0; i < bytes; i++) {
12514     ld1(kFormatVnB, zt, i, addr + offset + i);
12515   }
12516   mov_zeroing(vform, zt, pg, zt);
12517   dup_element(vform_dst, zt, zt, 0);
12518 }
12519 
12520 void Simulator::VisitSVELoadMultipleStructures_ScalarPlusImm(
12521     const Instruction* instr) {
12522   switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusImmMask)) {
12523     case LD2B_z_p_bi_contiguous:
12524     case LD2D_z_p_bi_contiguous:
12525     case LD2H_z_p_bi_contiguous:
12526     case LD2W_z_p_bi_contiguous:
12527     case LD3B_z_p_bi_contiguous:
12528     case LD3D_z_p_bi_contiguous:
12529     case LD3H_z_p_bi_contiguous:
12530     case LD3W_z_p_bi_contiguous:
12531     case LD4B_z_p_bi_contiguous:
12532     case LD4D_z_p_bi_contiguous:
12533     case LD4H_z_p_bi_contiguous:
12534     case LD4W_z_p_bi_contiguous: {
12535       int vl = GetVectorLengthInBytes();
12536       int msz = instr->ExtractBits(24, 23);
12537       int reg_count = instr->ExtractBits(22, 21) + 1;
12538       uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count;
12539       LogicSVEAddressVector addr(
12540           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
12541       addr.SetMsizeInBytesLog2(msz);
12542       addr.SetRegCount(reg_count);
12543       SVEStructuredLoadHelper(SVEFormatFromLaneSizeInBytesLog2(msz),
12544                               ReadPRegister(instr->GetPgLow8()),
12545                               instr->GetRt(),
12546                               addr);
12547       break;
12548     }
12549     default:
12550       VIXL_UNIMPLEMENTED();
12551       break;
12552   }
12553 }
12554 
12555 void Simulator::VisitSVELoadMultipleStructures_ScalarPlusScalar(
12556     const Instruction* instr) {
12557   switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusScalarMask)) {
12558     case LD2B_z_p_br_contiguous:
12559     case LD2D_z_p_br_contiguous:
12560     case LD2H_z_p_br_contiguous:
12561     case LD2W_z_p_br_contiguous:
12562     case LD3B_z_p_br_contiguous:
12563     case LD3D_z_p_br_contiguous:
12564     case LD3H_z_p_br_contiguous:
12565     case LD3W_z_p_br_contiguous:
12566     case LD4B_z_p_br_contiguous:
12567     case LD4D_z_p_br_contiguous:
12568     case LD4H_z_p_br_contiguous:
12569     case LD4W_z_p_br_contiguous: {
12570       int msz = instr->ExtractBits(24, 23);
12571       uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz);
12572       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
12573       LogicSVEAddressVector addr(
12574           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
12575       addr.SetMsizeInBytesLog2(msz);
12576       addr.SetRegCount(instr->ExtractBits(22, 21) + 1);
12577       SVEStructuredLoadHelper(vform,
12578                               ReadPRegister(instr->GetPgLow8()),
12579                               instr->GetRt(),
12580                               addr,
12581                               false);
12582       break;
12583     }
12584     default:
12585       VIXL_UNIMPLEMENTED();
12586       break;
12587   }
12588 }
12589 
12590 void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets(
12591     const Instruction* instr) {
12592   switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) {
12593     case ST1H_z_p_bz_s_x32_scaled:
12594     case ST1W_z_p_bz_s_x32_scaled: {
12595       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12596       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12597       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
12598       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12599       SVEOffsetModifier mod =
12600           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
12601       LogicSVEAddressVector addr(base,
12602                                  &ReadVRegister(instr->GetRm()),
12603                                  kFormatVnS,
12604                                  mod,
12605                                  scale);
12606       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12607       SVEStructuredStoreHelper(kFormatVnS,
12608                                ReadPRegister(instr->GetPgLow8()),
12609                                instr->GetRt(),
12610                                addr);
12611       break;
12612     }
12613     default:
12614       VIXL_UNIMPLEMENTED();
12615       break;
12616   }
12617 }
12618 
12619 void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets(
12620     const Instruction* instr) {
12621   switch (
12622       instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) {
12623     case ST1B_z_p_bz_s_x32_unscaled:
12624     case ST1H_z_p_bz_s_x32_unscaled:
12625     case ST1W_z_p_bz_s_x32_unscaled: {
12626       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12627       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12628       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12629       SVEOffsetModifier mod =
12630           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
12631       LogicSVEAddressVector addr(base,
12632                                  &ReadVRegister(instr->GetRm()),
12633                                  kFormatVnS,
12634                                  mod);
12635       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12636       SVEStructuredStoreHelper(kFormatVnS,
12637                                ReadPRegister(instr->GetPgLow8()),
12638                                instr->GetRt(),
12639                                addr);
12640       break;
12641     }
12642     default:
12643       VIXL_UNIMPLEMENTED();
12644       break;
12645   }
12646 }
12647 
12648 void Simulator::VisitSVE32BitScatterStore_VectorPlusImm(
12649     const Instruction* instr) {
12650   int msz = 0;
12651   switch (instr->Mask(SVE32BitScatterStore_VectorPlusImmMask)) {
12652     case ST1B_z_p_ai_s:
12653       msz = 0;
12654       break;
12655     case ST1H_z_p_ai_s:
12656       msz = 1;
12657       break;
12658     case ST1W_z_p_ai_s:
12659       msz = 2;
12660       break;
12661     default:
12662       VIXL_UNIMPLEMENTED();
12663       break;
12664   }
12665   uint64_t imm = instr->ExtractBits(20, 16) << msz;
12666   LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnS);
12667   addr.SetMsizeInBytesLog2(msz);
12668   SVEStructuredStoreHelper(kFormatVnS,
12669                            ReadPRegister(instr->GetPgLow8()),
12670                            instr->GetRt(),
12671                            addr);
12672 }
12673 
12674 void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets(
12675     const Instruction* instr) {
12676   switch (instr->Mask(SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask)) {
12677     case ST1D_z_p_bz_d_64_scaled:
12678     case ST1H_z_p_bz_d_64_scaled:
12679     case ST1W_z_p_bz_d_64_scaled: {
12680       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12681       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12682       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
12683       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12684       LogicSVEAddressVector addr(base,
12685                                  &ReadVRegister(instr->GetRm()),
12686                                  kFormatVnD,
12687                                  SVE_LSL,
12688                                  scale);
12689       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12690       SVEStructuredStoreHelper(kFormatVnD,
12691                                ReadPRegister(instr->GetPgLow8()),
12692                                instr->GetRt(),
12693                                addr);
12694       break;
12695     }
12696     default:
12697       VIXL_UNIMPLEMENTED();
12698       break;
12699   }
12700 }
12701 
12702 void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets(
12703     const Instruction* instr) {
12704   switch (
12705       instr->Mask(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask)) {
12706     case ST1B_z_p_bz_d_64_unscaled:
12707     case ST1D_z_p_bz_d_64_unscaled:
12708     case ST1H_z_p_bz_d_64_unscaled:
12709     case ST1W_z_p_bz_d_64_unscaled: {
12710       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12711       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12712       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12713       LogicSVEAddressVector addr(base,
12714                                  &ReadVRegister(instr->GetRm()),
12715                                  kFormatVnD,
12716                                  NO_SVE_OFFSET_MODIFIER);
12717       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12718       SVEStructuredStoreHelper(kFormatVnD,
12719                                ReadPRegister(instr->GetPgLow8()),
12720                                instr->GetRt(),
12721                                addr);
12722       break;
12723     }
12724     default:
12725       VIXL_UNIMPLEMENTED();
12726       break;
12727   }
12728 }
12729 
12730 void Simulator::VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets(
12731     const Instruction* instr) {
12732   switch (instr->Mask(
12733       SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
12734     case ST1D_z_p_bz_d_x32_scaled:
12735     case ST1H_z_p_bz_d_x32_scaled:
12736     case ST1W_z_p_bz_d_x32_scaled: {
12737       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12738       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12739       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
12740       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12741       SVEOffsetModifier mod =
12742           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
12743       LogicSVEAddressVector addr(base,
12744                                  &ReadVRegister(instr->GetRm()),
12745                                  kFormatVnD,
12746                                  mod,
12747                                  scale);
12748       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12749       SVEStructuredStoreHelper(kFormatVnD,
12750                                ReadPRegister(instr->GetPgLow8()),
12751                                instr->GetRt(),
12752                                addr);
12753       break;
12754     }
12755     default:
12756       VIXL_UNIMPLEMENTED();
12757       break;
12758   }
12759 }
12760 
12761 void Simulator::
12762     VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets(
12763         const Instruction* instr) {
12764   switch (instr->Mask(
12765       SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
12766     case ST1B_z_p_bz_d_x32_unscaled:
12767     case ST1D_z_p_bz_d_x32_unscaled:
12768     case ST1H_z_p_bz_d_x32_unscaled:
12769     case ST1W_z_p_bz_d_x32_unscaled: {
12770       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12771       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12772       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12773       SVEOffsetModifier mod =
12774           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
12775       LogicSVEAddressVector addr(base,
12776                                  &ReadVRegister(instr->GetRm()),
12777                                  kFormatVnD,
12778                                  mod);
12779       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12780       SVEStructuredStoreHelper(kFormatVnD,
12781                                ReadPRegister(instr->GetPgLow8()),
12782                                instr->GetRt(),
12783                                addr);
12784       break;
12785     }
12786     default:
12787       VIXL_UNIMPLEMENTED();
12788       break;
12789   }
12790 }
12791 
12792 void Simulator::VisitSVE64BitScatterStore_VectorPlusImm(
12793     const Instruction* instr) {
12794   int msz = 0;
12795   switch (instr->Mask(SVE64BitScatterStore_VectorPlusImmMask)) {
12796     case ST1B_z_p_ai_d:
12797       msz = 0;
12798       break;
12799     case ST1D_z_p_ai_d:
12800       msz = 3;
12801       break;
12802     case ST1H_z_p_ai_d:
12803       msz = 1;
12804       break;
12805     case ST1W_z_p_ai_d:
12806       msz = 2;
12807       break;
12808     default:
12809       VIXL_UNIMPLEMENTED();
12810       break;
12811   }
12812   uint64_t imm = instr->ExtractBits(20, 16) << msz;
12813   LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD);
12814   addr.SetMsizeInBytesLog2(msz);
12815   SVEStructuredStoreHelper(kFormatVnD,
12816                            ReadPRegister(instr->GetPgLow8()),
12817                            instr->GetRt(),
12818                            addr);
12819 }
12820 
12821 void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusImm(
12822     const Instruction* instr) {
12823   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12824   VectorFormat vform = kFormatUndefined;
12825 
12826   switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusImmMask)) {
12827     case STNT1B_z_p_bi_contiguous:
12828       vform = kFormatVnB;
12829       break;
12830     case STNT1D_z_p_bi_contiguous:
12831       vform = kFormatVnD;
12832       break;
12833     case STNT1H_z_p_bi_contiguous:
12834       vform = kFormatVnH;
12835       break;
12836     case STNT1W_z_p_bi_contiguous:
12837       vform = kFormatVnS;
12838       break;
12839     default:
12840       VIXL_UNIMPLEMENTED();
12841       break;
12842   }
12843   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
12844   int vl = GetVectorLengthInBytes();
12845   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12846   uint64_t offset = instr->ExtractSignedBits(19, 16) * vl;
12847   LogicSVEAddressVector addr(base + offset);
12848   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12849   SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr);
12850 }
12851 
12852 void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar(
12853     const Instruction* instr) {
12854   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12855   VectorFormat vform = kFormatUndefined;
12856 
12857   switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusScalarMask)) {
12858     case STNT1B_z_p_br_contiguous:
12859       vform = kFormatVnB;
12860       break;
12861     case STNT1D_z_p_br_contiguous:
12862       vform = kFormatVnD;
12863       break;
12864     case STNT1H_z_p_br_contiguous:
12865       vform = kFormatVnH;
12866       break;
12867     case STNT1W_z_p_br_contiguous:
12868       vform = kFormatVnS;
12869       break;
12870     default:
12871       VIXL_UNIMPLEMENTED();
12872       break;
12873   }
12874   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
12875   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12876   uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2;
12877   LogicSVEAddressVector addr(base + offset);
12878   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12879   SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr);
12880 }
12881 
12882 void Simulator::VisitSVEContiguousStore_ScalarPlusImm(
12883     const Instruction* instr) {
12884   switch (instr->Mask(SVEContiguousStore_ScalarPlusImmMask)) {
12885     case ST1B_z_p_bi:
12886     case ST1D_z_p_bi:
12887     case ST1H_z_p_bi:
12888     case ST1W_z_p_bi: {
12889       int vl = GetVectorLengthInBytes();
12890       int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12891       int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(false);
12892       VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
12893       int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
12894       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12895       uint64_t offset =
12896           (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
12897       VectorFormat vform =
12898           SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
12899       LogicSVEAddressVector addr(base + offset);
12900       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12901       SVEStructuredStoreHelper(vform,
12902                                ReadPRegister(instr->GetPgLow8()),
12903                                instr->GetRt(),
12904                                addr);
12905       break;
12906     }
12907     default:
12908       VIXL_UNIMPLEMENTED();
12909       break;
12910   }
12911 }
12912 
12913 void Simulator::VisitSVEContiguousStore_ScalarPlusScalar(
12914     const Instruction* instr) {
12915   switch (instr->Mask(SVEContiguousStore_ScalarPlusScalarMask)) {
12916     case ST1B_z_p_br:
12917     case ST1D_z_p_br:
12918     case ST1H_z_p_br:
12919     case ST1W_z_p_br: {
12920       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12921       uint64_t offset = ReadXRegister(instr->GetRm());
12922       offset <<= instr->ExtractBits(24, 23);
12923       VectorFormat vform =
12924           SVEFormatFromLaneSizeInBytesLog2(instr->ExtractBits(22, 21));
12925       LogicSVEAddressVector addr(base + offset);
12926       addr.SetMsizeInBytesLog2(instr->ExtractBits(24, 23));
12927       SVEStructuredStoreHelper(vform,
12928                                ReadPRegister(instr->GetPgLow8()),
12929                                instr->GetRt(),
12930                                addr);
12931       break;
12932     }
12933     default:
12934       VIXL_UNIMPLEMENTED();
12935       break;
12936   }
12937 }
12938 
12939 void Simulator::VisitSVECopySIMDFPScalarRegisterToVector_Predicated(
12940     const Instruction* instr) {
12941   VectorFormat vform = instr->GetSVEVectorFormat();
12942   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12943   SimVRegister z_result;
12944 
12945   switch (instr->Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) {
12946     case CPY_z_p_v:
12947       dup_element(vform, z_result, ReadVRegister(instr->GetRn()), 0);
12948       mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result);
12949       break;
12950     default:
12951       VIXL_UNIMPLEMENTED();
12952       break;
12953   }
12954 }
12955 
12956 void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusImm(
12957     const Instruction* instr) {
12958   switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusImmMask)) {
12959     case ST2B_z_p_bi_contiguous:
12960     case ST2D_z_p_bi_contiguous:
12961     case ST2H_z_p_bi_contiguous:
12962     case ST2W_z_p_bi_contiguous:
12963     case ST3B_z_p_bi_contiguous:
12964     case ST3D_z_p_bi_contiguous:
12965     case ST3H_z_p_bi_contiguous:
12966     case ST3W_z_p_bi_contiguous:
12967     case ST4B_z_p_bi_contiguous:
12968     case ST4D_z_p_bi_contiguous:
12969     case ST4H_z_p_bi_contiguous:
12970     case ST4W_z_p_bi_contiguous: {
12971       int vl = GetVectorLengthInBytes();
12972       int msz = instr->ExtractBits(24, 23);
12973       int reg_count = instr->ExtractBits(22, 21) + 1;
12974       uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count;
12975       LogicSVEAddressVector addr(
12976           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
12977       addr.SetMsizeInBytesLog2(msz);
12978       addr.SetRegCount(reg_count);
12979       SVEStructuredStoreHelper(SVEFormatFromLaneSizeInBytesLog2(msz),
12980                                ReadPRegister(instr->GetPgLow8()),
12981                                instr->GetRt(),
12982                                addr);
12983       break;
12984     }
12985     default:
12986       VIXL_UNIMPLEMENTED();
12987       break;
12988   }
12989 }
12990 
12991 void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusScalar(
12992     const Instruction* instr) {
12993   switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusScalarMask)) {
12994     case ST2B_z_p_br_contiguous:
12995     case ST2D_z_p_br_contiguous:
12996     case ST2H_z_p_br_contiguous:
12997     case ST2W_z_p_br_contiguous:
12998     case ST3B_z_p_br_contiguous:
12999     case ST3D_z_p_br_contiguous:
13000     case ST3H_z_p_br_contiguous:
13001     case ST3W_z_p_br_contiguous:
13002     case ST4B_z_p_br_contiguous:
13003     case ST4D_z_p_br_contiguous:
13004     case ST4H_z_p_br_contiguous:
13005     case ST4W_z_p_br_contiguous: {
13006       int msz = instr->ExtractBits(24, 23);
13007       uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz);
13008       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
13009       LogicSVEAddressVector addr(
13010           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
13011       addr.SetMsizeInBytesLog2(msz);
13012       addr.SetRegCount(instr->ExtractBits(22, 21) + 1);
13013       SVEStructuredStoreHelper(vform,
13014                                ReadPRegister(instr->GetPgLow8()),
13015                                instr->GetRt(),
13016                                addr);
13017       break;
13018     }
13019     default:
13020       VIXL_UNIMPLEMENTED();
13021       break;
13022   }
13023 }
13024 
13025 void Simulator::VisitSVEStorePredicateRegister(const Instruction* instr) {
13026   switch (instr->Mask(SVEStorePredicateRegisterMask)) {
13027     case STR_p_bi: {
13028       SimPRegister& pt = ReadPRegister(instr->GetPt());
13029       int pl = GetPredicateLengthInBytes();
13030       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
13031       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
13032       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
13033       uint64_t address = base + multiplier * pl;
13034       for (int i = 0; i < pl; i++) {
13035         MemWrite(address + i, pt.GetLane<uint8_t>(i));
13036       }
13037       LogPWrite(instr->GetPt(), address);
13038       break;
13039     }
13040     default:
13041       VIXL_UNIMPLEMENTED();
13042       break;
13043   }
13044 }
13045 
13046 void Simulator::VisitSVEStoreVectorRegister(const Instruction* instr) {
13047   switch (instr->Mask(SVEStoreVectorRegisterMask)) {
13048     case STR_z_bi: {
13049       SimVRegister& zt = ReadVRegister(instr->GetRt());
13050       int vl = GetVectorLengthInBytes();
13051       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
13052       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
13053       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
13054       uint64_t address = base + multiplier * vl;
13055       for (int i = 0; i < vl; i++) {
13056         MemWrite(address + i, zt.GetLane<uint8_t>(i));
13057       }
13058       LogZWrite(instr->GetRt(), address);
13059       break;
13060     }
13061     default:
13062       VIXL_UNIMPLEMENTED();
13063       break;
13064   }
13065 }
13066 
13067 void Simulator::VisitSVEMulIndex(const Instruction* instr) {
13068   VectorFormat vform = instr->GetSVEVectorFormat();
13069   SimVRegister& zda = ReadVRegister(instr->GetRd());
13070   SimVRegister& zn = ReadVRegister(instr->GetRn());
13071   std::pair<int, int> zm_and_index = instr->GetSVEMulZmAndIndex();
13072   SimVRegister zm = ReadVRegister(zm_and_index.first);
13073   int index = zm_and_index.second;
13074 
13075   SimVRegister temp;
13076   dup_elements_to_segments(vform, temp, zm, index);
13077 
13078   switch (form_hash_) {
13079     case "sdot_z_zzzi_d"_h:
13080     case "sdot_z_zzzi_s"_h:
13081       sdot(vform, zda, zn, temp);
13082       break;
13083     case "udot_z_zzzi_d"_h:
13084     case "udot_z_zzzi_s"_h:
13085       udot(vform, zda, zn, temp);
13086       break;
13087     case "sudot_z_zzzi_s"_h:
13088       usdot(vform, zda, temp, zn);
13089       break;
13090     case "usdot_z_zzzi_s"_h:
13091       usdot(vform, zda, zn, temp);
13092       break;
13093     default:
13094       VIXL_UNIMPLEMENTED();
13095       break;
13096   }
13097 }
13098 
13099 void Simulator::SimulateMatrixMul(const Instruction* instr) {
13100   VectorFormat vform = kFormatVnS;
13101   SimVRegister& dn = ReadVRegister(instr->GetRd());
13102   SimVRegister& n = ReadVRegister(instr->GetRn());
13103   SimVRegister& m = ReadVRegister(instr->GetRm());
13104 
13105   bool n_signed = false;
13106   bool m_signed = false;
13107   switch (form_hash_) {
13108     case "smmla_asimdsame2_g"_h:
13109       vform = kFormat4S;
13110       VIXL_FALLTHROUGH();
13111     case "smmla_z_zzz"_h:
13112       n_signed = m_signed = true;
13113       break;
13114     case "ummla_asimdsame2_g"_h:
13115       vform = kFormat4S;
13116       VIXL_FALLTHROUGH();
13117     case "ummla_z_zzz"_h:
13118       // Nothing to do.
13119       break;
13120     case "usmmla_asimdsame2_g"_h:
13121       vform = kFormat4S;
13122       VIXL_FALLTHROUGH();
13123     case "usmmla_z_zzz"_h:
13124       m_signed = true;
13125       break;
13126     default:
13127       VIXL_UNIMPLEMENTED();
13128       break;
13129   }
13130   matmul(vform, dn, n, m, n_signed, m_signed);
13131 }
13132 
13133 void Simulator::SimulateSVEFPMatrixMul(const Instruction* instr) {
13134   VectorFormat vform = instr->GetSVEVectorFormat();
13135   SimVRegister& zdn = ReadVRegister(instr->GetRd());
13136   SimVRegister& zn = ReadVRegister(instr->GetRn());
13137   SimVRegister& zm = ReadVRegister(instr->GetRm());
13138 
13139   switch (form_hash_) {
13140     case "fmmla_z_zzz_s"_h:
13141     case "fmmla_z_zzz_d"_h:
13142       fmatmul(vform, zdn, zn, zm);
13143       break;
13144     default:
13145       VIXL_UNIMPLEMENTED();
13146       break;
13147   }
13148 }
13149 
13150 void Simulator::VisitSVEPartitionBreakCondition(const Instruction* instr) {
13151   SimPRegister& pd = ReadPRegister(instr->GetPd());
13152   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13153   SimPRegister& pn = ReadPRegister(instr->GetPn());
13154   SimPRegister result;
13155 
13156   switch (instr->Mask(SVEPartitionBreakConditionMask)) {
13157     case BRKAS_p_p_p_z:
13158     case BRKA_p_p_p:
13159       brka(result, pg, pn);
13160       break;
13161     case BRKBS_p_p_p_z:
13162     case BRKB_p_p_p:
13163       brkb(result, pg, pn);
13164       break;
13165     default:
13166       VIXL_UNIMPLEMENTED();
13167       break;
13168   }
13169 
13170   if (instr->ExtractBit(4) == 1) {
13171     mov_merging(pd, pg, result);
13172   } else {
13173     mov_zeroing(pd, pg, result);
13174   }
13175 
13176   // Set flag if needed.
13177   if (instr->ExtractBit(22) == 1) {
13178     PredTest(kFormatVnB, pg, pd);
13179   }
13180 }
13181 
13182 void Simulator::VisitSVEPropagateBreakToNextPartition(
13183     const Instruction* instr) {
13184   SimPRegister& pdm = ReadPRegister(instr->GetPd());
13185   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13186   SimPRegister& pn = ReadPRegister(instr->GetPn());
13187 
13188   switch (instr->Mask(SVEPropagateBreakToNextPartitionMask)) {
13189     case BRKNS_p_p_pp:
13190     case BRKN_p_p_pp:
13191       brkn(pdm, pg, pn);
13192       break;
13193     default:
13194       VIXL_UNIMPLEMENTED();
13195       break;
13196   }
13197 
13198   // Set flag if needed.
13199   if (instr->ExtractBit(22) == 1) {
13200     // Note that this ignores `pg`.
13201     PredTest(kFormatVnB, GetPTrue(), pdm);
13202   }
13203 }
13204 
13205 void Simulator::VisitSVEUnpackPredicateElements(const Instruction* instr) {
13206   SimPRegister& pd = ReadPRegister(instr->GetPd());
13207   SimPRegister& pn = ReadPRegister(instr->GetPn());
13208 
13209   SimVRegister temp = Simulator::ExpandToSimVRegister(pn);
13210   SimVRegister zero;
13211   dup_immediate(kFormatVnB, zero, 0);
13212 
13213   switch (instr->Mask(SVEUnpackPredicateElementsMask)) {
13214     case PUNPKHI_p_p:
13215       zip2(kFormatVnB, temp, temp, zero);
13216       break;
13217     case PUNPKLO_p_p:
13218       zip1(kFormatVnB, temp, temp, zero);
13219       break;
13220     default:
13221       VIXL_UNIMPLEMENTED();
13222       break;
13223   }
13224   Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp);
13225 }
13226 
13227 void Simulator::VisitSVEPermutePredicateElements(const Instruction* instr) {
13228   VectorFormat vform = instr->GetSVEVectorFormat();
13229   SimPRegister& pd = ReadPRegister(instr->GetPd());
13230   SimPRegister& pn = ReadPRegister(instr->GetPn());
13231   SimPRegister& pm = ReadPRegister(instr->GetPm());
13232 
13233   SimVRegister temp0 = Simulator::ExpandToSimVRegister(pn);
13234   SimVRegister temp1 = Simulator::ExpandToSimVRegister(pm);
13235 
13236   switch (instr->Mask(SVEPermutePredicateElementsMask)) {
13237     case TRN1_p_pp:
13238       trn1(vform, temp0, temp0, temp1);
13239       break;
13240     case TRN2_p_pp:
13241       trn2(vform, temp0, temp0, temp1);
13242       break;
13243     case UZP1_p_pp:
13244       uzp1(vform, temp0, temp0, temp1);
13245       break;
13246     case UZP2_p_pp:
13247       uzp2(vform, temp0, temp0, temp1);
13248       break;
13249     case ZIP1_p_pp:
13250       zip1(vform, temp0, temp0, temp1);
13251       break;
13252     case ZIP2_p_pp:
13253       zip2(vform, temp0, temp0, temp1);
13254       break;
13255     default:
13256       VIXL_UNIMPLEMENTED();
13257       break;
13258   }
13259   Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp0);
13260 }
13261 
13262 void Simulator::VisitSVEReversePredicateElements(const Instruction* instr) {
13263   switch (instr->Mask(SVEReversePredicateElementsMask)) {
13264     case REV_p_p: {
13265       VectorFormat vform = instr->GetSVEVectorFormat();
13266       SimPRegister& pn = ReadPRegister(instr->GetPn());
13267       SimPRegister& pd = ReadPRegister(instr->GetPd());
13268       SimVRegister temp = Simulator::ExpandToSimVRegister(pn);
13269       rev(vform, temp, temp);
13270       Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp);
13271       break;
13272     }
13273     default:
13274       VIXL_UNIMPLEMENTED();
13275       break;
13276   }
13277 }
13278 
13279 void Simulator::VisitSVEPermuteVectorExtract(const Instruction* instr) {
13280   SimVRegister& zdn = ReadVRegister(instr->GetRd());
13281   // Second source register "Zm" is encoded where "Zn" would usually be.
13282   SimVRegister& zm = ReadVRegister(instr->GetRn());
13283 
13284   int index = instr->GetSVEExtractImmediate();
13285   int vl = GetVectorLengthInBytes();
13286   index = (index >= vl) ? 0 : index;
13287 
13288   switch (instr->Mask(SVEPermuteVectorExtractMask)) {
13289     case EXT_z_zi_des:
13290       ext(kFormatVnB, zdn, zdn, zm, index);
13291       break;
13292     default:
13293       VIXL_UNIMPLEMENTED();
13294       break;
13295   }
13296 }
13297 
13298 void Simulator::VisitSVEPermuteVectorInterleaving(const Instruction* instr) {
13299   VectorFormat vform = instr->GetSVEVectorFormat();
13300   SimVRegister& zd = ReadVRegister(instr->GetRd());
13301   SimVRegister& zn = ReadVRegister(instr->GetRn());
13302   SimVRegister& zm = ReadVRegister(instr->GetRm());
13303 
13304   switch (instr->Mask(SVEPermuteVectorInterleavingMask)) {
13305     case TRN1_z_zz:
13306       trn1(vform, zd, zn, zm);
13307       break;
13308     case TRN2_z_zz:
13309       trn2(vform, zd, zn, zm);
13310       break;
13311     case UZP1_z_zz:
13312       uzp1(vform, zd, zn, zm);
13313       break;
13314     case UZP2_z_zz:
13315       uzp2(vform, zd, zn, zm);
13316       break;
13317     case ZIP1_z_zz:
13318       zip1(vform, zd, zn, zm);
13319       break;
13320     case ZIP2_z_zz:
13321       zip2(vform, zd, zn, zm);
13322       break;
13323     default:
13324       VIXL_UNIMPLEMENTED();
13325       break;
13326   }
13327 }
13328 
13329 void Simulator::VisitSVEConditionallyBroadcastElementToVector(
13330     const Instruction* instr) {
13331   VectorFormat vform = instr->GetSVEVectorFormat();
13332   SimVRegister& zdn = ReadVRegister(instr->GetRd());
13333   SimVRegister& zm = ReadVRegister(instr->GetRn());
13334   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13335 
13336   int active_offset = -1;
13337   switch (instr->Mask(SVEConditionallyBroadcastElementToVectorMask)) {
13338     case CLASTA_z_p_zz:
13339       active_offset = 1;
13340       break;
13341     case CLASTB_z_p_zz:
13342       active_offset = 0;
13343       break;
13344     default:
13345       VIXL_UNIMPLEMENTED();
13346       break;
13347   }
13348 
13349   if (active_offset >= 0) {
13350     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
13351     if (value.first) {
13352       dup_immediate(vform, zdn, value.second);
13353     } else {
13354       // Trigger a line of trace for the operation, even though it doesn't
13355       // change the register value.
13356       mov(vform, zdn, zdn);
13357     }
13358   }
13359 }
13360 
13361 void Simulator::VisitSVEConditionallyExtractElementToSIMDFPScalar(
13362     const Instruction* instr) {
13363   VectorFormat vform = instr->GetSVEVectorFormat();
13364   SimVRegister& vdn = ReadVRegister(instr->GetRd());
13365   SimVRegister& zm = ReadVRegister(instr->GetRn());
13366   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13367 
13368   int active_offset = -1;
13369   switch (instr->Mask(SVEConditionallyExtractElementToSIMDFPScalarMask)) {
13370     case CLASTA_v_p_z:
13371       active_offset = 1;
13372       break;
13373     case CLASTB_v_p_z:
13374       active_offset = 0;
13375       break;
13376     default:
13377       VIXL_UNIMPLEMENTED();
13378       break;
13379   }
13380 
13381   if (active_offset >= 0) {
13382     LogicVRegister dst(vdn);
13383     uint64_t src1_value = dst.Uint(vform, 0);
13384     std::pair<bool, uint64_t> src2_value = clast(vform, pg, zm, active_offset);
13385     dup_immediate(vform, vdn, 0);
13386     dst.SetUint(vform, 0, src2_value.first ? src2_value.second : src1_value);
13387   }
13388 }
13389 
13390 void Simulator::VisitSVEConditionallyExtractElementToGeneralRegister(
13391     const Instruction* instr) {
13392   VectorFormat vform = instr->GetSVEVectorFormat();
13393   SimVRegister& zm = ReadVRegister(instr->GetRn());
13394   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13395 
13396   int active_offset = -1;
13397   switch (instr->Mask(SVEConditionallyExtractElementToGeneralRegisterMask)) {
13398     case CLASTA_r_p_z:
13399       active_offset = 1;
13400       break;
13401     case CLASTB_r_p_z:
13402       active_offset = 0;
13403       break;
13404     default:
13405       VIXL_UNIMPLEMENTED();
13406       break;
13407   }
13408 
13409   if (active_offset >= 0) {
13410     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
13411     uint64_t masked_src = ReadXRegister(instr->GetRd()) &
13412                           GetUintMask(LaneSizeInBitsFromFormat(vform));
13413     WriteXRegister(instr->GetRd(), value.first ? value.second : masked_src);
13414   }
13415 }
13416 
13417 void Simulator::VisitSVEExtractElementToSIMDFPScalarRegister(
13418     const Instruction* instr) {
13419   VectorFormat vform = instr->GetSVEVectorFormat();
13420   SimVRegister& vdn = ReadVRegister(instr->GetRd());
13421   SimVRegister& zm = ReadVRegister(instr->GetRn());
13422   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13423 
13424   int active_offset = -1;
13425   switch (instr->Mask(SVEExtractElementToSIMDFPScalarRegisterMask)) {
13426     case LASTA_v_p_z:
13427       active_offset = 1;
13428       break;
13429     case LASTB_v_p_z:
13430       active_offset = 0;
13431       break;
13432     default:
13433       VIXL_UNIMPLEMENTED();
13434       break;
13435   }
13436 
13437   if (active_offset >= 0) {
13438     LogicVRegister dst(vdn);
13439     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
13440     dup_immediate(vform, vdn, 0);
13441     dst.SetUint(vform, 0, value.second);
13442   }
13443 }
13444 
13445 void Simulator::VisitSVEExtractElementToGeneralRegister(
13446     const Instruction* instr) {
13447   VectorFormat vform = instr->GetSVEVectorFormat();
13448   SimVRegister& zm = ReadVRegister(instr->GetRn());
13449   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13450 
13451   int active_offset = -1;
13452   switch (instr->Mask(SVEExtractElementToGeneralRegisterMask)) {
13453     case LASTA_r_p_z:
13454       active_offset = 1;
13455       break;
13456     case LASTB_r_p_z:
13457       active_offset = 0;
13458       break;
13459     default:
13460       VIXL_UNIMPLEMENTED();
13461       break;
13462   }
13463 
13464   if (active_offset >= 0) {
13465     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
13466     WriteXRegister(instr->GetRd(), value.second);
13467   }
13468 }
13469 
13470 void Simulator::VisitSVECompressActiveElements(const Instruction* instr) {
13471   VectorFormat vform = instr->GetSVEVectorFormat();
13472   SimVRegister& zd = ReadVRegister(instr->GetRd());
13473   SimVRegister& zn = ReadVRegister(instr->GetRn());
13474   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13475 
13476   switch (instr->Mask(SVECompressActiveElementsMask)) {
13477     case COMPACT_z_p_z:
13478       compact(vform, zd, pg, zn);
13479       break;
13480     default:
13481       VIXL_UNIMPLEMENTED();
13482       break;
13483   }
13484 }
13485 
13486 void Simulator::VisitSVECopyGeneralRegisterToVector_Predicated(
13487     const Instruction* instr) {
13488   VectorFormat vform = instr->GetSVEVectorFormat();
13489   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13490   SimVRegister z_result;
13491 
13492   switch (instr->Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) {
13493     case CPY_z_p_r:
13494       dup_immediate(vform,
13495                     z_result,
13496                     ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
13497       mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result);
13498       break;
13499     default:
13500       VIXL_UNIMPLEMENTED();
13501       break;
13502   }
13503 }
13504 
13505 void Simulator::VisitSVECopyIntImm_Predicated(const Instruction* instr) {
13506   VectorFormat vform = instr->GetSVEVectorFormat();
13507   SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16));
13508   SimVRegister& zd = ReadVRegister(instr->GetRd());
13509 
13510   SimVRegister result;
13511   switch (instr->Mask(SVECopyIntImm_PredicatedMask)) {
13512     case CPY_z_p_i: {
13513       // Use unsigned arithmetic to avoid undefined behaviour during the shift.
13514       uint64_t imm8 = instr->GetImmSVEIntWideSigned();
13515       dup_immediate(vform, result, imm8 << (instr->ExtractBit(13) * 8));
13516       break;
13517     }
13518     default:
13519       VIXL_UNIMPLEMENTED();
13520       break;
13521   }
13522 
13523   if (instr->ExtractBit(14) != 0) {
13524     mov_merging(vform, zd, pg, result);
13525   } else {
13526     mov_zeroing(vform, zd, pg, result);
13527   }
13528 }
13529 
13530 void Simulator::VisitSVEReverseWithinElements(const Instruction* instr) {
13531   SimVRegister& zd = ReadVRegister(instr->GetRd());
13532   SimVRegister& zn = ReadVRegister(instr->GetRn());
13533   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13534   SimVRegister result;
13535 
13536   // In NEON, the chunk size in which elements are REVersed is in the
13537   // instruction mnemonic, and the element size attached to the register.
13538   // SVE reverses the semantics; the mapping to logic functions below is to
13539   // account for this.
13540   VectorFormat chunk_form = instr->GetSVEVectorFormat();
13541   VectorFormat element_form = kFormatUndefined;
13542 
13543   switch (instr->Mask(SVEReverseWithinElementsMask)) {
13544     case RBIT_z_p_z:
13545       rbit(chunk_form, result, zn);
13546       break;
13547     case REVB_z_z:
13548       VIXL_ASSERT((chunk_form == kFormatVnH) || (chunk_form == kFormatVnS) ||
13549                   (chunk_form == kFormatVnD));
13550       element_form = kFormatVnB;
13551       break;
13552     case REVH_z_z:
13553       VIXL_ASSERT((chunk_form == kFormatVnS) || (chunk_form == kFormatVnD));
13554       element_form = kFormatVnH;
13555       break;
13556     case REVW_z_z:
13557       VIXL_ASSERT(chunk_form == kFormatVnD);
13558       element_form = kFormatVnS;
13559       break;
13560     default:
13561       VIXL_UNIMPLEMENTED();
13562       break;
13563   }
13564 
13565   if (instr->Mask(SVEReverseWithinElementsMask) != RBIT_z_p_z) {
13566     VIXL_ASSERT(element_form != kFormatUndefined);
13567     switch (chunk_form) {
13568       case kFormatVnH:
13569         rev16(element_form, result, zn);
13570         break;
13571       case kFormatVnS:
13572         rev32(element_form, result, zn);
13573         break;
13574       case kFormatVnD:
13575         rev64(element_form, result, zn);
13576         break;
13577       default:
13578         VIXL_UNIMPLEMENTED();
13579     }
13580   }
13581 
13582   mov_merging(chunk_form, zd, pg, result);
13583 }
13584 
13585 void Simulator::VisitSVEVectorSplice(const Instruction* instr) {
13586   VectorFormat vform = instr->GetSVEVectorFormat();
13587   SimVRegister& zd = ReadVRegister(instr->GetRd());
13588   SimVRegister& zn = ReadVRegister(instr->GetRn());
13589   SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
13590   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13591 
13592   switch (form_hash_) {
13593     case "splice_z_p_zz_des"_h:
13594       splice(vform, zd, pg, zd, zn);
13595       break;
13596     case "splice_z_p_zz_con"_h:
13597       splice(vform, zd, pg, zn, zn2);
13598       break;
13599     default:
13600       VIXL_UNIMPLEMENTED();
13601       break;
13602   }
13603 }
13604 
13605 void Simulator::VisitSVEBroadcastGeneralRegister(const Instruction* instr) {
13606   SimVRegister& zd = ReadVRegister(instr->GetRd());
13607   switch (instr->Mask(SVEBroadcastGeneralRegisterMask)) {
13608     case DUP_z_r:
13609       dup_immediate(instr->GetSVEVectorFormat(),
13610                     zd,
13611                     ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
13612       break;
13613     default:
13614       VIXL_UNIMPLEMENTED();
13615       break;
13616   }
13617 }
13618 
13619 void Simulator::VisitSVEInsertSIMDFPScalarRegister(const Instruction* instr) {
13620   SimVRegister& zd = ReadVRegister(instr->GetRd());
13621   VectorFormat vform = instr->GetSVEVectorFormat();
13622   switch (instr->Mask(SVEInsertSIMDFPScalarRegisterMask)) {
13623     case INSR_z_v:
13624       insr(vform, zd, ReadDRegisterBits(instr->GetRn()));
13625       break;
13626     default:
13627       VIXL_UNIMPLEMENTED();
13628       break;
13629   }
13630 }
13631 
13632 void Simulator::VisitSVEInsertGeneralRegister(const Instruction* instr) {
13633   SimVRegister& zd = ReadVRegister(instr->GetRd());
13634   VectorFormat vform = instr->GetSVEVectorFormat();
13635   switch (instr->Mask(SVEInsertGeneralRegisterMask)) {
13636     case INSR_z_r:
13637       insr(vform, zd, ReadXRegister(instr->GetRn()));
13638       break;
13639     default:
13640       VIXL_UNIMPLEMENTED();
13641       break;
13642   }
13643 }
13644 
13645 void Simulator::VisitSVEBroadcastIndexElement(const Instruction* instr) {
13646   SimVRegister& zd = ReadVRegister(instr->GetRd());
13647   switch (instr->Mask(SVEBroadcastIndexElementMask)) {
13648     case DUP_z_zi: {
13649       std::pair<int, int> index_and_lane_size =
13650           instr->GetSVEPermuteIndexAndLaneSizeLog2();
13651       int index = index_and_lane_size.first;
13652       int lane_size_in_bytes_log_2 = index_and_lane_size.second;
13653       VectorFormat vform =
13654           SVEFormatFromLaneSizeInBytesLog2(lane_size_in_bytes_log_2);
13655       if ((index < 0) || (index >= LaneCountFromFormat(vform))) {
13656         // Out of bounds, set the destination register to zero.
13657         dup_immediate(kFormatVnD, zd, 0);
13658       } else {
13659         dup_element(vform, zd, ReadVRegister(instr->GetRn()), index);
13660       }
13661       return;
13662     }
13663     default:
13664       VIXL_UNIMPLEMENTED();
13665       break;
13666   }
13667 }
13668 
13669 void Simulator::VisitSVEReverseVectorElements(const Instruction* instr) {
13670   SimVRegister& zd = ReadVRegister(instr->GetRd());
13671   VectorFormat vform = instr->GetSVEVectorFormat();
13672   switch (instr->Mask(SVEReverseVectorElementsMask)) {
13673     case REV_z_z:
13674       rev(vform, zd, ReadVRegister(instr->GetRn()));
13675       break;
13676     default:
13677       VIXL_UNIMPLEMENTED();
13678       break;
13679   }
13680 }
13681 
13682 void Simulator::VisitSVEUnpackVectorElements(const Instruction* instr) {
13683   SimVRegister& zd = ReadVRegister(instr->GetRd());
13684   VectorFormat vform = instr->GetSVEVectorFormat();
13685   switch (instr->Mask(SVEUnpackVectorElementsMask)) {
13686     case SUNPKHI_z_z:
13687       unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kSignedExtend);
13688       break;
13689     case SUNPKLO_z_z:
13690       unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kSignedExtend);
13691       break;
13692     case UUNPKHI_z_z:
13693       unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kUnsignedExtend);
13694       break;
13695     case UUNPKLO_z_z:
13696       unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kUnsignedExtend);
13697       break;
13698     default:
13699       VIXL_UNIMPLEMENTED();
13700       break;
13701   }
13702 }
13703 
13704 void Simulator::VisitSVETableLookup(const Instruction* instr) {
13705   VectorFormat vform = instr->GetSVEVectorFormat();
13706   SimVRegister& zd = ReadVRegister(instr->GetRd());
13707   SimVRegister& zn = ReadVRegister(instr->GetRn());
13708   SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
13709   SimVRegister& zm = ReadVRegister(instr->GetRm());
13710 
13711   switch (form_hash_) {
13712     case "tbl_z_zz_1"_h:
13713       tbl(vform, zd, zn, zm);
13714       break;
13715     case "tbl_z_zz_2"_h:
13716       tbl(vform, zd, zn, zn2, zm);
13717       break;
13718     case "tbx_z_zz"_h:
13719       tbx(vform, zd, zn, zm);
13720       break;
13721     default:
13722       VIXL_UNIMPLEMENTED();
13723       break;
13724   }
13725 }
13726 
13727 void Simulator::VisitSVEPredicateCount(const Instruction* instr) {
13728   VectorFormat vform = instr->GetSVEVectorFormat();
13729   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13730   SimPRegister& pn = ReadPRegister(instr->GetPn());
13731 
13732   switch (instr->Mask(SVEPredicateCountMask)) {
13733     case CNTP_r_p_p: {
13734       WriteXRegister(instr->GetRd(), CountActiveAndTrueLanes(vform, pg, pn));
13735       break;
13736     }
13737     default:
13738       VIXL_UNIMPLEMENTED();
13739       break;
13740   }
13741 }
13742 
13743 void Simulator::VisitSVEPredicateLogical(const Instruction* instr) {
13744   Instr op = instr->Mask(SVEPredicateLogicalMask);
13745   SimPRegister& pd = ReadPRegister(instr->GetPd());
13746   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13747   SimPRegister& pn = ReadPRegister(instr->GetPn());
13748   SimPRegister& pm = ReadPRegister(instr->GetPm());
13749   SimPRegister result;
13750   switch (op) {
13751     case ANDS_p_p_pp_z:
13752     case AND_p_p_pp_z:
13753     case BICS_p_p_pp_z:
13754     case BIC_p_p_pp_z:
13755     case EORS_p_p_pp_z:
13756     case EOR_p_p_pp_z:
13757     case NANDS_p_p_pp_z:
13758     case NAND_p_p_pp_z:
13759     case NORS_p_p_pp_z:
13760     case NOR_p_p_pp_z:
13761     case ORNS_p_p_pp_z:
13762     case ORN_p_p_pp_z:
13763     case ORRS_p_p_pp_z:
13764     case ORR_p_p_pp_z:
13765       SVEPredicateLogicalHelper(static_cast<SVEPredicateLogicalOp>(op),
13766                                 result,
13767                                 pn,
13768                                 pm);
13769       break;
13770     case SEL_p_p_pp:
13771       sel(pd, pg, pn, pm);
13772       return;
13773     default:
13774       VIXL_UNIMPLEMENTED();
13775       break;
13776   }
13777 
13778   mov_zeroing(pd, pg, result);
13779   if (instr->Mask(SVEPredicateLogicalSetFlagsBit) != 0) {
13780     PredTest(kFormatVnB, pg, pd);
13781   }
13782 }
13783 
13784 void Simulator::VisitSVEPredicateFirstActive(const Instruction* instr) {
13785   LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5));
13786   LogicPRegister pdn = ReadPRegister(instr->GetPd());
13787   switch (instr->Mask(SVEPredicateFirstActiveMask)) {
13788     case PFIRST_p_p_p:
13789       pfirst(pdn, pg, pdn);
13790       // TODO: Is this broken when pg == pdn?
13791       PredTest(kFormatVnB, pg, pdn);
13792       break;
13793     default:
13794       VIXL_UNIMPLEMENTED();
13795       break;
13796   }
13797 }
13798 
13799 void Simulator::VisitSVEPredicateInitialize(const Instruction* instr) {
13800   // This group only contains PTRUE{S}, and there are no unallocated encodings.
13801   VIXL_STATIC_ASSERT(
13802       SVEPredicateInitializeMask ==
13803       (SVEPredicateInitializeFMask | SVEPredicateInitializeSetFlagsBit));
13804   VIXL_ASSERT((instr->Mask(SVEPredicateInitializeMask) == PTRUE_p_s) ||
13805               (instr->Mask(SVEPredicateInitializeMask) == PTRUES_p_s));
13806 
13807   LogicPRegister pdn = ReadPRegister(instr->GetPd());
13808   VectorFormat vform = instr->GetSVEVectorFormat();
13809 
13810   ptrue(vform, pdn, instr->GetImmSVEPredicateConstraint());
13811   if (instr->ExtractBit(16)) PredTest(vform, pdn, pdn);
13812 }
13813 
13814 void Simulator::VisitSVEPredicateNextActive(const Instruction* instr) {
13815   // This group only contains PNEXT, and there are no unallocated encodings.
13816   VIXL_STATIC_ASSERT(SVEPredicateNextActiveFMask == SVEPredicateNextActiveMask);
13817   VIXL_ASSERT(instr->Mask(SVEPredicateNextActiveMask) == PNEXT_p_p_p);
13818 
13819   LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5));
13820   LogicPRegister pdn = ReadPRegister(instr->GetPd());
13821   VectorFormat vform = instr->GetSVEVectorFormat();
13822 
13823   pnext(vform, pdn, pg, pdn);
13824   // TODO: Is this broken when pg == pdn?
13825   PredTest(vform, pg, pdn);
13826 }
13827 
13828 void Simulator::VisitSVEPredicateReadFromFFR_Predicated(
13829     const Instruction* instr) {
13830   LogicPRegister pd(ReadPRegister(instr->GetPd()));
13831   LogicPRegister pg(ReadPRegister(instr->GetPn()));
13832   FlagsUpdate flags = LeaveFlags;
13833   switch (instr->Mask(SVEPredicateReadFromFFR_PredicatedMask)) {
13834     case RDFFR_p_p_f:
13835       // Do nothing.
13836       break;
13837     case RDFFRS_p_p_f:
13838       flags = SetFlags;
13839       break;
13840     default:
13841       VIXL_UNIMPLEMENTED();
13842       break;
13843   }
13844 
13845   LogicPRegister ffr(ReadFFR());
13846   mov_zeroing(pd, pg, ffr);
13847 
13848   if (flags == SetFlags) {
13849     PredTest(kFormatVnB, pg, pd);
13850   }
13851 }
13852 
13853 void Simulator::VisitSVEPredicateReadFromFFR_Unpredicated(
13854     const Instruction* instr) {
13855   LogicPRegister pd(ReadPRegister(instr->GetPd()));
13856   LogicPRegister ffr(ReadFFR());
13857   switch (instr->Mask(SVEPredicateReadFromFFR_UnpredicatedMask)) {
13858     case RDFFR_p_f:
13859       mov(pd, ffr);
13860       break;
13861     default:
13862       VIXL_UNIMPLEMENTED();
13863       break;
13864   }
13865 }
13866 
13867 void Simulator::VisitSVEPredicateTest(const Instruction* instr) {
13868   switch (instr->Mask(SVEPredicateTestMask)) {
13869     case PTEST_p_p:
13870       PredTest(kFormatVnB,
13871                ReadPRegister(instr->ExtractBits(13, 10)),
13872                ReadPRegister(instr->GetPn()));
13873       break;
13874     default:
13875       VIXL_UNIMPLEMENTED();
13876       break;
13877   }
13878 }
13879 
13880 void Simulator::VisitSVEPredicateZero(const Instruction* instr) {
13881   switch (instr->Mask(SVEPredicateZeroMask)) {
13882     case PFALSE_p:
13883       pfalse(ReadPRegister(instr->GetPd()));
13884       break;
13885     default:
13886       VIXL_UNIMPLEMENTED();
13887       break;
13888   }
13889 }
13890 
13891 void Simulator::VisitSVEPropagateBreak(const Instruction* instr) {
13892   SimPRegister& pd = ReadPRegister(instr->GetPd());
13893   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13894   SimPRegister& pn = ReadPRegister(instr->GetPn());
13895   SimPRegister& pm = ReadPRegister(instr->GetPm());
13896 
13897   bool set_flags = false;
13898   switch (instr->Mask(SVEPropagateBreakMask)) {
13899     case BRKPAS_p_p_pp:
13900       set_flags = true;
13901       VIXL_FALLTHROUGH();
13902     case BRKPA_p_p_pp:
13903       brkpa(pd, pg, pn, pm);
13904       break;
13905     case BRKPBS_p_p_pp:
13906       set_flags = true;
13907       VIXL_FALLTHROUGH();
13908     case BRKPB_p_p_pp:
13909       brkpb(pd, pg, pn, pm);
13910       break;
13911     default:
13912       VIXL_UNIMPLEMENTED();
13913       break;
13914   }
13915 
13916   if (set_flags) {
13917     PredTest(kFormatVnB, pg, pd);
13918   }
13919 }
13920 
13921 void Simulator::VisitSVEStackFrameAdjustment(const Instruction* instr) {
13922   uint64_t length = 0;
13923   switch (instr->Mask(SVEStackFrameAdjustmentMask)) {
13924     case ADDPL_r_ri:
13925       length = GetPredicateLengthInBytes();
13926       break;
13927     case ADDVL_r_ri:
13928       length = GetVectorLengthInBytes();
13929       break;
13930     default:
13931       VIXL_UNIMPLEMENTED();
13932   }
13933   uint64_t base = ReadXRegister(instr->GetRm(), Reg31IsStackPointer);
13934   WriteXRegister(instr->GetRd(),
13935                  base + (length * instr->GetImmSVEVLScale()),
13936                  LogRegWrites,
13937                  Reg31IsStackPointer);
13938 }
13939 
13940 void Simulator::VisitSVEStackFrameSize(const Instruction* instr) {
13941   int64_t scale = instr->GetImmSVEVLScale();
13942 
13943   switch (instr->Mask(SVEStackFrameSizeMask)) {
13944     case RDVL_r_i:
13945       WriteXRegister(instr->GetRd(), GetVectorLengthInBytes() * scale);
13946       break;
13947     default:
13948       VIXL_UNIMPLEMENTED();
13949   }
13950 }
13951 
13952 void Simulator::VisitSVEVectorSelect(const Instruction* instr) {
13953   // The only instruction in this group is `sel`, and there are no unused
13954   // encodings.
13955   VIXL_ASSERT(instr->Mask(SVEVectorSelectMask) == SEL_z_p_zz);
13956 
13957   VectorFormat vform = instr->GetSVEVectorFormat();
13958   SimVRegister& zd = ReadVRegister(instr->GetRd());
13959   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13960   SimVRegister& zn = ReadVRegister(instr->GetRn());
13961   SimVRegister& zm = ReadVRegister(instr->GetRm());
13962 
13963   sel(vform, zd, pg, zn, zm);
13964 }
13965 
13966 void Simulator::VisitSVEFFRInitialise(const Instruction* instr) {
13967   switch (instr->Mask(SVEFFRInitialiseMask)) {
13968     case SETFFR_f: {
13969       LogicPRegister ffr(ReadFFR());
13970       ffr.SetAllBits();
13971       break;
13972     }
13973     default:
13974       VIXL_UNIMPLEMENTED();
13975       break;
13976   }
13977 }
13978 
13979 void Simulator::VisitSVEFFRWriteFromPredicate(const Instruction* instr) {
13980   switch (instr->Mask(SVEFFRWriteFromPredicateMask)) {
13981     case WRFFR_f_p: {
13982       SimPRegister pn(ReadPRegister(instr->GetPn()));
13983       bool last_active = true;
13984       for (unsigned i = 0; i < pn.GetSizeInBits(); i++) {
13985         bool active = pn.GetBit(i);
13986         if (active && !last_active) {
13987           // `pn` is non-monotonic. This is UNPREDICTABLE.
13988           VIXL_ABORT();
13989         }
13990         last_active = active;
13991       }
13992       mov(ReadFFR(), pn);
13993       break;
13994     }
13995     default:
13996       VIXL_UNIMPLEMENTED();
13997       break;
13998   }
13999 }
14000 
14001 void Simulator::VisitSVEContiguousLoad_ScalarPlusImm(const Instruction* instr) {
14002   bool is_signed;
14003   switch (instr->Mask(SVEContiguousLoad_ScalarPlusImmMask)) {
14004     case LD1B_z_p_bi_u8:
14005     case LD1B_z_p_bi_u16:
14006     case LD1B_z_p_bi_u32:
14007     case LD1B_z_p_bi_u64:
14008     case LD1H_z_p_bi_u16:
14009     case LD1H_z_p_bi_u32:
14010     case LD1H_z_p_bi_u64:
14011     case LD1W_z_p_bi_u32:
14012     case LD1W_z_p_bi_u64:
14013     case LD1D_z_p_bi_u64:
14014       is_signed = false;
14015       break;
14016     case LD1SB_z_p_bi_s16:
14017     case LD1SB_z_p_bi_s32:
14018     case LD1SB_z_p_bi_s64:
14019     case LD1SH_z_p_bi_s32:
14020     case LD1SH_z_p_bi_s64:
14021     case LD1SW_z_p_bi_s64:
14022       is_signed = true;
14023       break;
14024     default:
14025       // This encoding group is complete, so no other values should be possible.
14026       VIXL_UNREACHABLE();
14027       is_signed = false;
14028       break;
14029   }
14030 
14031   int vl = GetVectorLengthInBytes();
14032   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
14033   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
14034   VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
14035   int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
14036   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
14037   uint64_t offset =
14038       (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
14039   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
14040   LogicSVEAddressVector addr(base + offset);
14041   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
14042   SVEStructuredLoadHelper(vform,
14043                           ReadPRegister(instr->GetPgLow8()),
14044                           instr->GetRt(),
14045                           addr,
14046                           is_signed);
14047 }
14048 
14049 void Simulator::VisitSVEContiguousLoad_ScalarPlusScalar(
14050     const Instruction* instr) {
14051   bool is_signed;
14052   switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
14053     case LD1B_z_p_br_u8:
14054     case LD1B_z_p_br_u16:
14055     case LD1B_z_p_br_u32:
14056     case LD1B_z_p_br_u64:
14057     case LD1H_z_p_br_u16:
14058     case LD1H_z_p_br_u32:
14059     case LD1H_z_p_br_u64:
14060     case LD1W_z_p_br_u32:
14061     case LD1W_z_p_br_u64:
14062     case LD1D_z_p_br_u64:
14063       is_signed = false;
14064       break;
14065     case LD1SB_z_p_br_s16:
14066     case LD1SB_z_p_br_s32:
14067     case LD1SB_z_p_br_s64:
14068     case LD1SH_z_p_br_s32:
14069     case LD1SH_z_p_br_s64:
14070     case LD1SW_z_p_br_s64:
14071       is_signed = true;
14072       break;
14073     default:
14074       // This encoding group is complete, so no other values should be possible.
14075       VIXL_UNREACHABLE();
14076       is_signed = false;
14077       break;
14078   }
14079 
14080   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
14081   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
14082   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
14083   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
14084   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
14085   uint64_t offset = ReadXRegister(instr->GetRm());
14086   offset <<= msize_in_bytes_log2;
14087   LogicSVEAddressVector addr(base + offset);
14088   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
14089   SVEStructuredLoadHelper(vform,
14090                           ReadPRegister(instr->GetPgLow8()),
14091                           instr->GetRt(),
14092                           addr,
14093                           is_signed);
14094 }
14095 
14096 void Simulator::DoUnreachable(const Instruction* instr) {
14097   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
14098               (instr->GetImmException() == kUnreachableOpcode));
14099 
14100   fprintf(stream_,
14101           "Hit UNREACHABLE marker at pc=%p.\n",
14102           reinterpret_cast<const void*>(instr));
14103   abort();
14104 }
14105 
14106 void Simulator::Simulate_XdSP_XnSP_Xm(const Instruction* instr) {
14107   VIXL_ASSERT(form_hash_ == Hash("irg_64i_dp_2src"));
14108   uint64_t rn = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
14109   uint64_t rm = ReadXRegister(instr->GetRm());
14110   uint64_t tag = GenerateRandomTag(rm & 0xffff);
14111   uint64_t new_val = GetAddressWithAllocationTag(rn, tag);
14112   WriteXRegister(instr->GetRd(), new_val, LogRegWrites, Reg31IsStackPointer);
14113 }
14114 
14115 void Simulator::SimulateMTEAddSubTag(const Instruction* instr) {
14116   uint64_t rn = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
14117   uint64_t rn_tag = GetAllocationTagFromAddress(rn);
14118   uint64_t tag_offset = instr->ExtractBits(13, 10);
14119   // TODO: implement GCR_EL1.Exclude to provide a tag exclusion list.
14120   uint64_t new_tag = ChooseNonExcludedTag(rn_tag, tag_offset);
14121 
14122   uint64_t offset = instr->ExtractBits(21, 16) * kMTETagGranuleInBytes;
14123   int carry = 0;
14124   if (form_hash_ == Hash("subg_64_addsub_immtags")) {
14125     offset = ~offset;
14126     carry = 1;
14127   } else {
14128     VIXL_ASSERT(form_hash_ == Hash("addg_64_addsub_immtags"));
14129   }
14130   uint64_t new_val =
14131       AddWithCarry(kXRegSize, /* set_flags = */ false, rn, offset, carry);
14132   new_val = GetAddressWithAllocationTag(new_val, new_tag);
14133   WriteXRegister(instr->GetRd(), new_val, LogRegWrites, Reg31IsStackPointer);
14134 }
14135 
14136 void Simulator::SimulateMTETagMaskInsert(const Instruction* instr) {
14137   VIXL_ASSERT(form_hash_ == Hash("gmi_64g_dp_2src"));
14138   uint64_t mask = ReadXRegister(instr->GetRm());
14139   uint64_t tag = GetAllocationTagFromAddress(
14140       ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
14141   uint64_t mask_bit = 1 << tag;
14142   WriteXRegister(instr->GetRd(), mask | mask_bit);
14143 }
14144 
14145 void Simulator::SimulateMTESubPointer(const Instruction* instr) {
14146   uint64_t rn = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
14147   uint64_t rm = ReadXRegister(instr->GetRm(), Reg31IsStackPointer);
14148 
14149   VIXL_ASSERT((form_hash_ == Hash("subps_64s_dp_2src")) ||
14150               (form_hash_ == Hash("subp_64s_dp_2src")));
14151   bool set_flags = (form_hash_ == Hash("subps_64s_dp_2src"));
14152 
14153   rn = ExtractSignedBitfield64(55, 0, rn);
14154   rm = ExtractSignedBitfield64(55, 0, rm);
14155   uint64_t new_val = AddWithCarry(kXRegSize, set_flags, rn, ~rm, 1);
14156   WriteXRegister(instr->GetRd(), new_val);
14157 }
14158 
14159 void Simulator::SimulateMTEStoreTagPair(const Instruction* instr) {
14160   uint64_t rn = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
14161   uint64_t rt = ReadXRegister(instr->GetRt());
14162   uint64_t rt2 = ReadXRegister(instr->GetRt2());
14163   int offset = instr->GetImmLSPair() * static_cast<int>(kMTETagGranuleInBytes);
14164 
14165   AddrMode addr_mode = Offset;
14166   switch (form_hash_) {
14167     case Hash("stgp_64_ldstpair_off"):
14168       // Default is the offset mode.
14169       break;
14170     case Hash("stgp_64_ldstpair_post"):
14171       addr_mode = PostIndex;
14172       break;
14173     case Hash("stgp_64_ldstpair_pre"):
14174       addr_mode = PreIndex;
14175       break;
14176     default:
14177       VIXL_UNIMPLEMENTED();
14178   }
14179 
14180   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addr_mode);
14181   if (!IsAligned(address, kMTETagGranuleInBytes)) {
14182     VIXL_ALIGNMENT_EXCEPTION();
14183   }
14184 
14185   int tag = GetAllocationTagFromAddress(rn);
14186   meta_data_.SetMTETag(address, tag);
14187 
14188   MemWrite<uint64_t>(address, rt);
14189   MemWrite<uint64_t>(address + kXRegSizeInBytes, rt2);
14190 }
14191 
14192 void Simulator::SimulateMTEStoreTag(const Instruction* instr) {
14193   uint64_t rt = ReadXRegister(instr->GetRt(), Reg31IsStackPointer);
14194   int offset = instr->GetImmLS() * static_cast<int>(kMTETagGranuleInBytes);
14195 
14196   AddrMode addr_mode = Offset;
14197   switch (form_hash_) {
14198     case Hash("st2g_64soffset_ldsttags"):
14199     case Hash("stg_64soffset_ldsttags"):
14200     case Hash("stz2g_64soffset_ldsttags"):
14201     case Hash("stzg_64soffset_ldsttags"):
14202       // Default is the offset mode.
14203       break;
14204     case Hash("st2g_64spost_ldsttags"):
14205     case Hash("stg_64spost_ldsttags"):
14206     case Hash("stz2g_64spost_ldsttags"):
14207     case Hash("stzg_64spost_ldsttags"):
14208       addr_mode = PostIndex;
14209       break;
14210     case Hash("st2g_64spre_ldsttags"):
14211     case Hash("stg_64spre_ldsttags"):
14212     case Hash("stz2g_64spre_ldsttags"):
14213     case Hash("stzg_64spre_ldsttags"):
14214       addr_mode = PreIndex;
14215       break;
14216     default:
14217       VIXL_UNIMPLEMENTED();
14218   }
14219 
14220   bool is_pair = false;
14221   switch (form_hash_) {
14222     case Hash("st2g_64soffset_ldsttags"):
14223     case Hash("st2g_64spost_ldsttags"):
14224     case Hash("st2g_64spre_ldsttags"):
14225     case Hash("stz2g_64soffset_ldsttags"):
14226     case Hash("stz2g_64spost_ldsttags"):
14227     case Hash("stz2g_64spre_ldsttags"):
14228       is_pair = true;
14229       break;
14230     default:
14231       break;
14232   }
14233 
14234   bool is_zeroing = false;
14235   switch (form_hash_) {
14236     case Hash("stz2g_64soffset_ldsttags"):
14237     case Hash("stz2g_64spost_ldsttags"):
14238     case Hash("stz2g_64spre_ldsttags"):
14239     case Hash("stzg_64soffset_ldsttags"):
14240     case Hash("stzg_64spost_ldsttags"):
14241     case Hash("stzg_64spre_ldsttags"):
14242       is_zeroing = true;
14243       break;
14244     default:
14245       break;
14246   }
14247 
14248   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addr_mode);
14249 
14250   if (is_zeroing) {
14251     if (!IsAligned(reinterpret_cast<uintptr_t>(address),
14252                    kMTETagGranuleInBytes)) {
14253       VIXL_ALIGNMENT_EXCEPTION();
14254     }
14255     VIXL_STATIC_ASSERT(kMTETagGranuleInBytes >= sizeof(uint64_t));
14256     VIXL_STATIC_ASSERT(kMTETagGranuleInBytes % sizeof(uint64_t) == 0);
14257 
14258     size_t fill_size = kMTETagGranuleInBytes;
14259     if (is_pair) {
14260       fill_size += kMTETagGranuleInBytes;
14261     }
14262 
14263     size_t fill_offset = 0;
14264     while (fill_offset < fill_size) {
14265       MemWrite<uint64_t>(address + fill_offset, 0);
14266       fill_offset += sizeof(uint64_t);
14267     }
14268   }
14269 
14270   int tag = GetAllocationTagFromAddress(rt);
14271   meta_data_.SetMTETag(address, tag, instr);
14272   if (is_pair) {
14273     meta_data_.SetMTETag(address + kMTETagGranuleInBytes, tag, instr);
14274   }
14275 }
14276 
14277 void Simulator::SimulateMTELoadTag(const Instruction* instr) {
14278   uint64_t rt = ReadXRegister(instr->GetRt());
14279   int offset = instr->GetImmLS() * static_cast<int>(kMTETagGranuleInBytes);
14280 
14281   switch (form_hash_) {
14282     case Hash("ldg_64loffset_ldsttags"):
14283       break;
14284     default:
14285       VIXL_UNIMPLEMENTED();
14286   }
14287 
14288   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, Offset);
14289   address = AlignDown(address, kMTETagGranuleInBytes);
14290   uint64_t tag = meta_data_.GetMTETag(address, instr);
14291   WriteXRegister(instr->GetRt(), GetAddressWithAllocationTag(rt, tag));
14292 }
14293 
14294 void Simulator::SimulateCpyFP(const Instruction* instr) {
14295   MOPSPHelper<"cpy"_h>(instr);
14296   LogSystemRegister(NZCV);
14297 }
14298 
14299 void Simulator::SimulateCpyP(const Instruction* instr) {
14300   MOPSPHelper<"cpy"_h>(instr);
14301 
14302   int d = instr->GetRd();
14303   int n = instr->GetRn();
14304   int s = instr->GetRs();
14305 
14306   // Determine copy direction. For cases in which direction is implementation
14307   // defined, use forward.
14308   bool is_backwards = false;
14309   uint64_t xs = ReadXRegister(s);
14310   uint64_t xd = ReadXRegister(d);
14311   uint64_t xn = ReadXRegister(n);
14312 
14313   // Ignore the top byte of addresses for comparisons. We can use xn as is,
14314   // as it should have zero in bits 63:55.
14315   uint64_t xs_tbi = ExtractUnsignedBitfield64(55, 0, xs);
14316   uint64_t xd_tbi = ExtractUnsignedBitfield64(55, 0, xd);
14317   VIXL_ASSERT(ExtractUnsignedBitfield64(63, 55, xn) == 0);
14318   if ((xs_tbi < xd_tbi) && ((xs_tbi + xn) > xd_tbi)) {
14319     is_backwards = true;
14320     WriteXRegister(s, xs + xn);
14321     WriteXRegister(d, xd + xn);
14322   }
14323 
14324   ReadNzcv().SetN(is_backwards ? 1 : 0);
14325   LogSystemRegister(NZCV);
14326 }
14327 
14328 void Simulator::SimulateCpyM(const Instruction* instr) {
14329   VIXL_ASSERT(instr->IsConsistentMOPSTriplet<"cpy"_h>());
14330   VIXL_ASSERT(instr->IsMOPSMainOf(GetLastExecutedInstruction(), "cpy"_h));
14331 
14332   int d = instr->GetRd();
14333   int n = instr->GetRn();
14334   int s = instr->GetRs();
14335 
14336   uint64_t xd = ReadXRegister(d);
14337   uint64_t xn = ReadXRegister(n);
14338   uint64_t xs = ReadXRegister(s);
14339   bool is_backwards = ReadN();
14340 
14341   int step = 1;
14342   if (is_backwards) {
14343     step = -1;
14344     xs--;
14345     xd--;
14346   }
14347 
14348   while (xn--) {
14349     uint8_t temp = MemRead<uint8_t>(xs);
14350     MemWrite<uint8_t>(xd, temp);
14351     LogMemTransfer(xd, xs, temp);
14352     xs += step;
14353     xd += step;
14354   }
14355 
14356   if (is_backwards) {
14357     xs++;
14358     xd++;
14359   }
14360 
14361   WriteXRegister(d, xd);
14362   WriteXRegister(n, 0);
14363   WriteXRegister(s, xs);
14364 }
14365 
14366 void Simulator::SimulateCpyE(const Instruction* instr) {
14367   USE(instr);
14368   VIXL_ASSERT(instr->IsConsistentMOPSTriplet<"cpy"_h>());
14369   VIXL_ASSERT(instr->IsMOPSEpilogueOf(GetLastExecutedInstruction(), "cpy"_h));
14370   // This implementation does nothing in the epilogue; all copying is completed
14371   // in the "main" part.
14372 }
14373 
14374 void Simulator::SimulateSetP(const Instruction* instr) {
14375   MOPSPHelper<"set"_h>(instr);
14376   LogSystemRegister(NZCV);
14377 }
14378 
14379 void Simulator::SimulateSetM(const Instruction* instr) {
14380   VIXL_ASSERT(instr->IsConsistentMOPSTriplet<"set"_h>());
14381   VIXL_ASSERT(instr->IsMOPSMainOf(GetLastExecutedInstruction(), "set"_h));
14382 
14383   uint64_t xd = ReadXRegister(instr->GetRd());
14384   uint64_t xn = ReadXRegister(instr->GetRn());
14385   uint64_t xs = ReadXRegister(instr->GetRs());
14386 
14387   while (xn--) {
14388     LogWrite(instr->GetRs(), GetPrintRegPartial(kPrintRegLaneSizeB), xd);
14389     MemWrite<uint8_t>(xd++, xs);
14390   }
14391   WriteXRegister(instr->GetRd(), xd);
14392   WriteXRegister(instr->GetRn(), 0);
14393 }
14394 
14395 void Simulator::SimulateSetE(const Instruction* instr) {
14396   USE(instr);
14397   VIXL_ASSERT(instr->IsConsistentMOPSTriplet<"set"_h>());
14398   VIXL_ASSERT(instr->IsMOPSEpilogueOf(GetLastExecutedInstruction(), "set"_h));
14399   // This implementation does nothing in the epilogue; all setting is completed
14400   // in the "main" part.
14401 }
14402 
14403 void Simulator::SimulateSetGP(const Instruction* instr) {
14404   MOPSPHelper<"setg"_h>(instr);
14405 
14406   uint64_t xd = ReadXRegister(instr->GetRd());
14407   uint64_t xn = ReadXRegister(instr->GetRn());
14408 
14409   if ((xn > 0) && !IsAligned(xd, kMTETagGranuleInBytes)) {
14410     VIXL_ALIGNMENT_EXCEPTION();
14411   }
14412 
14413   if (!IsAligned(xn, kMTETagGranuleInBytes)) {
14414     VIXL_ALIGNMENT_EXCEPTION();
14415   }
14416 
14417   LogSystemRegister(NZCV);
14418 }
14419 
14420 void Simulator::SimulateSetGM(const Instruction* instr) {
14421   uint64_t xd = ReadXRegister(instr->GetRd());
14422   uint64_t xn = ReadXRegister(instr->GetRn());
14423 
14424   int tag = GetAllocationTagFromAddress(xd);
14425   while (xn) {
14426     meta_data_.SetMTETag(xd, tag);
14427     xd += 16;
14428     xn -= 16;
14429   }
14430   SimulateSetM(instr);
14431 }
14432 
14433 void Simulator::DoTrace(const Instruction* instr) {
14434   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
14435               (instr->GetImmException() == kTraceOpcode));
14436 
14437   // Read the arguments encoded inline in the instruction stream.
14438   uint32_t parameters;
14439   uint32_t command;
14440 
14441   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
14442   memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
14443   memcpy(&command, instr + kTraceCommandOffset, sizeof(command));
14444 
14445   switch (command) {
14446     case TRACE_ENABLE:
14447       SetTraceParameters(GetTraceParameters() | parameters);
14448       break;
14449     case TRACE_DISABLE:
14450       SetTraceParameters(GetTraceParameters() & ~parameters);
14451       break;
14452     default:
14453       VIXL_UNREACHABLE();
14454   }
14455 
14456   WritePc(instr->GetInstructionAtOffset(kTraceLength));
14457 }
14458 
14459 
14460 void Simulator::DoLog(const Instruction* instr) {
14461   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
14462               (instr->GetImmException() == kLogOpcode));
14463 
14464   // Read the arguments encoded inline in the instruction stream.
14465   uint32_t parameters;
14466 
14467   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
14468   memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
14469 
14470   // We don't support a one-shot LOG_DISASM.
14471   VIXL_ASSERT((parameters & LOG_DISASM) == 0);
14472   // Print the requested information.
14473   if (parameters & LOG_SYSREGS) PrintSystemRegisters();
14474   if (parameters & LOG_REGS) PrintRegisters();
14475   if (parameters & LOG_VREGS) PrintVRegisters();
14476 
14477   WritePc(instr->GetInstructionAtOffset(kLogLength));
14478 }
14479 
14480 
14481 void Simulator::DoPrintf(const Instruction* instr) {
14482   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
14483               (instr->GetImmException() == kPrintfOpcode));
14484 
14485   // Read the arguments encoded inline in the instruction stream.
14486   uint32_t arg_count;
14487   uint32_t arg_pattern_list;
14488   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
14489   memcpy(&arg_count, instr + kPrintfArgCountOffset, sizeof(arg_count));
14490   memcpy(&arg_pattern_list,
14491          instr + kPrintfArgPatternListOffset,
14492          sizeof(arg_pattern_list));
14493 
14494   VIXL_ASSERT(arg_count <= kPrintfMaxArgCount);
14495   VIXL_ASSERT((arg_pattern_list >> (kPrintfArgPatternBits * arg_count)) == 0);
14496 
14497   // We need to call the host printf function with a set of arguments defined by
14498   // arg_pattern_list. Because we don't know the types and sizes of the
14499   // arguments, this is very difficult to do in a robust and portable way. To
14500   // work around the problem, we pick apart the format string, and print one
14501   // format placeholder at a time.
14502 
14503   // Allocate space for the format string. We take a copy, so we can modify it.
14504   // Leave enough space for one extra character per expected argument (plus the
14505   // '\0' termination).
14506   const char* format_base = ReadRegister<const char*>(0);
14507   VIXL_ASSERT(format_base != NULL);
14508   size_t length = strlen(format_base) + 1;
14509   char* const format = allocator_.New<char[]>(length + arg_count);
14510   // A list of chunks, each with exactly one format placeholder.
14511   const char* chunks[kPrintfMaxArgCount];
14512 
14513   // Copy the format string and search for format placeholders.
14514   uint32_t placeholder_count = 0;
14515   char* format_scratch = format;
14516   for (size_t i = 0; i < length; i++) {
14517     if (format_base[i] != '%') {
14518       *format_scratch++ = format_base[i];
14519     } else {
14520       if (format_base[i + 1] == '%') {
14521         // Ignore explicit "%%" sequences.
14522         *format_scratch++ = format_base[i];
14523         i++;
14524         // Chunks after the first are passed as format strings to printf, so we
14525         // need to escape '%' characters in those chunks.
14526         if (placeholder_count > 0) *format_scratch++ = format_base[i];
14527       } else {
14528         VIXL_CHECK(placeholder_count < arg_count);
14529         // Insert '\0' before placeholders, and store their locations.
14530         *format_scratch++ = '\0';
14531         chunks[placeholder_count++] = format_scratch;
14532         *format_scratch++ = format_base[i];
14533       }
14534     }
14535   }
14536   VIXL_CHECK(placeholder_count == arg_count);
14537 
14538   // Finally, call printf with each chunk, passing the appropriate register
14539   // argument. Normally, printf returns the number of bytes transmitted, so we
14540   // can emulate a single printf call by adding the result from each chunk. If
14541   // any call returns a negative (error) value, though, just return that value.
14542 
14543   printf("%s", clr_printf);
14544 
14545   // Because '\0' is inserted before each placeholder, the first string in
14546   // 'format' contains no format placeholders and should be printed literally.
14547   int result = printf("%s", format);
14548   int pcs_r = 1;  // Start at x1. x0 holds the format string.
14549   int pcs_f = 0;  // Start at d0.
14550   if (result >= 0) {
14551     for (uint32_t i = 0; i < placeholder_count; i++) {
14552       int part_result = -1;
14553 
14554       uint32_t arg_pattern = arg_pattern_list >> (i * kPrintfArgPatternBits);
14555       arg_pattern &= (1 << kPrintfArgPatternBits) - 1;
14556       switch (arg_pattern) {
14557         case kPrintfArgW:
14558           part_result = printf(chunks[i], ReadWRegister(pcs_r++));
14559           break;
14560         case kPrintfArgX:
14561           part_result = printf(chunks[i], ReadXRegister(pcs_r++));
14562           break;
14563         case kPrintfArgD:
14564           part_result = printf(chunks[i], ReadDRegister(pcs_f++));
14565           break;
14566         default:
14567           VIXL_UNREACHABLE();
14568       }
14569 
14570       if (part_result < 0) {
14571         // Handle error values.
14572         result = part_result;
14573         break;
14574       }
14575 
14576       result += part_result;
14577     }
14578   }
14579 
14580   printf("%s", clr_normal);
14581 
14582   // Printf returns its result in x0 (just like the C library's printf).
14583   WriteXRegister(0, result);
14584 
14585   // The printf parameters are inlined in the code, so skip them.
14586   WritePc(instr->GetInstructionAtOffset(kPrintfLength));
14587 
14588   // Set LR as if we'd just called a native printf function.
14589   WriteLr(ReadPc());
14590   allocator_.DeleteArray(format);
14591 }
14592 
14593 
14594 #ifdef VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT
14595 void Simulator::DoRuntimeCall(const Instruction* instr) {
14596   VIXL_STATIC_ASSERT(kRuntimeCallAddressSize == sizeof(uintptr_t));
14597   // The appropriate `Simulator::SimulateRuntimeCall()` wrapper and the function
14598   // to call are passed inlined in the assembly.
14599   uintptr_t call_wrapper_address =
14600       MemRead<uintptr_t>(instr + kRuntimeCallWrapperOffset);
14601   uintptr_t function_address =
14602       MemRead<uintptr_t>(instr + kRuntimeCallFunctionOffset);
14603   RuntimeCallType call_type = static_cast<RuntimeCallType>(
14604       MemRead<uint32_t>(instr + kRuntimeCallTypeOffset));
14605   auto runtime_call_wrapper =
14606       reinterpret_cast<void (*)(Simulator*, uintptr_t)>(call_wrapper_address);
14607 
14608   if (call_type == kCallRuntime) {
14609     WriteRegister(kLinkRegCode,
14610                   instr->GetInstructionAtOffset(kRuntimeCallLength));
14611   }
14612   runtime_call_wrapper(this, function_address);
14613   // Read the return address from `lr` and write it into `pc`.
14614   WritePc(ReadRegister<Instruction*>(kLinkRegCode));
14615 }
14616 #else
14617 void Simulator::DoRuntimeCall(const Instruction* instr) {
14618   USE(instr);
14619   VIXL_UNREACHABLE();
14620 }
14621 #endif
14622 
14623 
14624 void Simulator::DoConfigureCPUFeatures(const Instruction* instr) {
14625   VIXL_ASSERT(instr->Mask(ExceptionMask) == HLT);
14626 
14627   typedef ConfigureCPUFeaturesElementType ElementType;
14628   VIXL_ASSERT(CPUFeatures::kNumberOfFeatures <
14629               std::numeric_limits<ElementType>::max());
14630 
14631   // k{Set,Enable,Disable}CPUFeatures have the same parameter encoding.
14632 
14633   size_t element_size = sizeof(ElementType);
14634   size_t offset = kConfigureCPUFeaturesListOffset;
14635 
14636   // Read the kNone-terminated list of features.
14637   CPUFeatures parameters;
14638   while (true) {
14639     ElementType feature = MemRead<ElementType>(instr + offset);
14640     offset += element_size;
14641     if (feature == static_cast<ElementType>(CPUFeatures::kNone)) break;
14642     parameters.Combine(static_cast<CPUFeatures::Feature>(feature));
14643   }
14644 
14645   switch (instr->GetImmException()) {
14646     case kSetCPUFeaturesOpcode:
14647       SetCPUFeatures(parameters);
14648       break;
14649     case kEnableCPUFeaturesOpcode:
14650       GetCPUFeatures()->Combine(parameters);
14651       break;
14652     case kDisableCPUFeaturesOpcode:
14653       GetCPUFeatures()->Remove(parameters);
14654       break;
14655     default:
14656       VIXL_UNREACHABLE();
14657       break;
14658   }
14659 
14660   WritePc(instr->GetInstructionAtOffset(AlignUp(offset, kInstructionSize)));
14661 }
14662 
14663 
14664 void Simulator::DoSaveCPUFeatures(const Instruction* instr) {
14665   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
14666               (instr->GetImmException() == kSaveCPUFeaturesOpcode));
14667   USE(instr);
14668 
14669   saved_cpu_features_.push_back(*GetCPUFeatures());
14670 }
14671 
14672 
14673 void Simulator::DoRestoreCPUFeatures(const Instruction* instr) {
14674   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
14675               (instr->GetImmException() == kRestoreCPUFeaturesOpcode));
14676   USE(instr);
14677 
14678   SetCPUFeatures(saved_cpu_features_.back());
14679   saved_cpu_features_.pop_back();
14680 }
14681 
14682 void* Simulator::Mmap(
14683     void* address, size_t length, int prot, int flags, int fd, off_t offset) {
14684   // The underlying system `mmap` in the simulated environment doesn't recognize
14685   // PROT_BTI and PROT_MTE. Although the kernel probably just ignores the bits
14686   // it doesn't know, mask those protections out before calling is safer.
14687   int intenal_prot = prot;
14688   prot &= ~(PROT_BTI | PROT_MTE);
14689 
14690   uint64_t address2 = reinterpret_cast<uint64_t>(
14691       mmap(address, length, prot, flags, fd, offset));
14692 
14693   if (intenal_prot & PROT_MTE) {
14694     // The returning address of `mmap` isn't tagged.
14695     int tag = static_cast<int>(GenerateRandomTag());
14696     SetGranuleTag(address2, tag, length);
14697     address2 = GetAddressWithAllocationTag(address2, tag);
14698   }
14699 
14700   return reinterpret_cast<void*>(address2);
14701 }
14702 
14703 
14704 int Simulator::Munmap(void* address, size_t length, int prot) {
14705   if (prot & PROT_MTE) {
14706     // Untag the address since `munmap` doesn't recognize the memory tagging
14707     // managed by the Simulator.
14708     address = AddressUntag(address);
14709     CleanGranuleTag(reinterpret_cast<char*>(address), length);
14710   }
14711 
14712   return munmap(address, length);
14713 }
14714 
14715 
14716 }  // namespace aarch64
14717 }  // namespace vixl
14718 
14719 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
14720