1 // Copyright 2019, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include <cfloat>
28 #include <cmath>
29 #include <cstdio>
30 #include <cstdlib>
31 #include <cstring>
32 #include <sys/mman.h>
33 
34 #include "test-runner.h"
35 #include "test-utils.h"
36 
37 #include "aarch64/cpu-aarch64.h"
38 #include "aarch64/disasm-aarch64.h"
39 #include "aarch64/macro-assembler-aarch64.h"
40 #include "aarch64/simulator-aarch64.h"
41 #include "aarch64/test-utils-aarch64.h"
42 #include "test-assembler-aarch64.h"
43 
44 namespace vixl {
45 namespace aarch64 {
46 
TEST(load_store_b)47 TEST(load_store_b) {
48   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
49 
50   uint8_t src[3] = {0x12, 0x23, 0x34};
51   uint8_t dst[3] = {0, 0, 0};
52   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
53   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
54 
55   START();
56   __ Mov(x17, src_base);
57   __ Mov(x18, dst_base);
58   __ Mov(x19, src_base);
59   __ Mov(x20, dst_base);
60   __ Mov(x21, src_base);
61   __ Mov(x22, dst_base);
62   __ Ldr(b0, MemOperand(x17, sizeof(src[0])));
63   __ Str(b0, MemOperand(x18, sizeof(dst[0]), PostIndex));
64   __ Ldr(b1, MemOperand(x19, sizeof(src[0]), PostIndex));
65   __ Str(b1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
66   __ Ldr(b2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
67   __ Str(b2, MemOperand(x22, sizeof(dst[0])));
68   END();
69 
70   if (CAN_RUN()) {
71     RUN();
72 
73     ASSERT_EQUAL_128(0, 0x23, q0);
74     ASSERT_EQUAL_64(0x23, dst[0]);
75     ASSERT_EQUAL_128(0, 0x12, q1);
76     ASSERT_EQUAL_64(0x12, dst[2]);
77     ASSERT_EQUAL_128(0, 0x34, q2);
78     ASSERT_EQUAL_64(0x34, dst[1]);
79     ASSERT_EQUAL_64(src_base, x17);
80     ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
81     ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
82     ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
83     ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
84     ASSERT_EQUAL_64(dst_base, x22);
85   }
86 }
87 
88 
TEST(load_store_h)89 TEST(load_store_h) {
90   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
91 
92   uint16_t src[3] = {0x1234, 0x2345, 0x3456};
93   uint16_t dst[3] = {0, 0, 0};
94   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
95   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
96 
97   START();
98   __ Mov(x17, src_base);
99   __ Mov(x18, dst_base);
100   __ Mov(x19, src_base);
101   __ Mov(x20, dst_base);
102   __ Mov(x21, src_base);
103   __ Mov(x22, dst_base);
104   __ Ldr(h0, MemOperand(x17, sizeof(src[0])));
105   __ Str(h0, MemOperand(x18, sizeof(dst[0]), PostIndex));
106   __ Ldr(h1, MemOperand(x19, sizeof(src[0]), PostIndex));
107   __ Str(h1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
108   __ Ldr(h2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
109   __ Str(h2, MemOperand(x22, sizeof(dst[0])));
110   END();
111 
112   if (CAN_RUN()) {
113     RUN();
114 
115     ASSERT_EQUAL_128(0, 0x2345, q0);
116     ASSERT_EQUAL_64(0x2345, dst[0]);
117     ASSERT_EQUAL_128(0, 0x1234, q1);
118     ASSERT_EQUAL_64(0x1234, dst[2]);
119     ASSERT_EQUAL_128(0, 0x3456, q2);
120     ASSERT_EQUAL_64(0x3456, dst[1]);
121     ASSERT_EQUAL_64(src_base, x17);
122     ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
123     ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
124     ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
125     ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
126     ASSERT_EQUAL_64(dst_base, x22);
127   }
128 }
129 
130 
TEST(load_store_q)131 TEST(load_store_q) {
132   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
133 
134   uint8_t src[48] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe, 0x01, 0x23,
135                      0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x21, 0x43, 0x65, 0x87,
136                      0xa9, 0xcb, 0xed, 0x0f, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc,
137                      0xde, 0xf0, 0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02,
138                      0x42, 0x64, 0x86, 0xa8, 0xca, 0xec, 0x0e, 0x20};
139 
140   uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
141   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
142   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
143 
144   START();
145   __ Mov(x17, src_base);
146   __ Mov(x18, dst_base);
147   __ Mov(x19, src_base);
148   __ Mov(x20, dst_base);
149   __ Mov(x21, src_base);
150   __ Mov(x22, dst_base);
151   __ Ldr(q0, MemOperand(x17, 16));
152   __ Str(q0, MemOperand(x18, 16, PostIndex));
153   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
154   __ Str(q1, MemOperand(x20, 32, PreIndex));
155   __ Ldr(q2, MemOperand(x21, 32, PreIndex));
156   __ Str(q2, MemOperand(x22, 16));
157   END();
158 
159   if (CAN_RUN()) {
160     RUN();
161 
162     ASSERT_EQUAL_128(0xf0debc9a78563412, 0x0fedcba987654321, q0);
163     ASSERT_EQUAL_64(0x0fedcba987654321, dst[0]);
164     ASSERT_EQUAL_64(0xf0debc9a78563412, dst[1]);
165     ASSERT_EQUAL_128(0xefcdab8967452301, 0xfedcba9876543210, q1);
166     ASSERT_EQUAL_64(0xfedcba9876543210, dst[4]);
167     ASSERT_EQUAL_64(0xefcdab8967452301, dst[5]);
168     ASSERT_EQUAL_128(0x200eeccaa8866442, 0x02e0ceac8a684624, q2);
169     ASSERT_EQUAL_64(0x02e0ceac8a684624, dst[2]);
170     ASSERT_EQUAL_64(0x200eeccaa8866442, dst[3]);
171     ASSERT_EQUAL_64(src_base, x17);
172     ASSERT_EQUAL_64(dst_base + 16, x18);
173     ASSERT_EQUAL_64(src_base + 16, x19);
174     ASSERT_EQUAL_64(dst_base + 32, x20);
175     ASSERT_EQUAL_64(src_base + 32, x21);
176     ASSERT_EQUAL_64(dst_base, x22);
177   }
178 }
179 
180 
TEST(load_store_v_regoffset)181 TEST(load_store_v_regoffset) {
182   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
183 
184   uint8_t src[64];
185   for (unsigned i = 0; i < sizeof(src); i++) {
186     src[i] = i;
187   }
188   uint8_t dst[64];
189   memset(dst, 0, sizeof(dst));
190 
191   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
192   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
193 
194   START();
195   __ Mov(x17, src_base + 16);
196   __ Mov(x18, 1);
197   __ Mov(w19, -1);
198   __ Mov(x20, dst_base - 1);
199 
200   __ Ldr(b0, MemOperand(x17, x18));
201   __ Ldr(b1, MemOperand(x17, x19, SXTW));
202 
203   __ Ldr(h2, MemOperand(x17, x18));
204   __ Ldr(h3, MemOperand(x17, x18, UXTW, 1));
205   __ Ldr(h4, MemOperand(x17, x19, SXTW, 1));
206   __ Ldr(h5, MemOperand(x17, x18, LSL, 1));
207 
208   __ Ldr(s16, MemOperand(x17, x18));
209   __ Ldr(s17, MemOperand(x17, x18, UXTW, 2));
210   __ Ldr(s18, MemOperand(x17, x19, SXTW, 2));
211   __ Ldr(s19, MemOperand(x17, x18, LSL, 2));
212 
213   __ Ldr(d20, MemOperand(x17, x18));
214   __ Ldr(d21, MemOperand(x17, x18, UXTW, 3));
215   __ Ldr(d22, MemOperand(x17, x19, SXTW, 3));
216   __ Ldr(d23, MemOperand(x17, x18, LSL, 3));
217 
218   __ Ldr(q24, MemOperand(x17, x18));
219   __ Ldr(q25, MemOperand(x17, x18, UXTW, 4));
220   __ Ldr(q26, MemOperand(x17, x19, SXTW, 4));
221   __ Ldr(q27, MemOperand(x17, x18, LSL, 4));
222 
223   // Store [bhsdq]27 to adjacent memory locations, then load again to check.
224   __ Str(b27, MemOperand(x20, x18));
225   __ Str(h27, MemOperand(x20, x18, UXTW, 1));
226   __ Add(x20, x20, 8);
227   __ Str(s27, MemOperand(x20, x19, SXTW, 2));
228   __ Sub(x20, x20, 8);
229   __ Str(d27, MemOperand(x20, x18, LSL, 3));
230   __ Add(x20, x20, 32);
231   __ Str(q27, MemOperand(x20, x19, SXTW, 4));
232 
233   __ Sub(x20, x20, 32);
234   __ Ldr(q6, MemOperand(x20, x18));
235   __ Ldr(q7, MemOperand(x20, x18, LSL, 4));
236 
237   END();
238 
239   if (CAN_RUN()) {
240     RUN();
241 
242     ASSERT_EQUAL_128(0, 0x11, q0);
243     ASSERT_EQUAL_128(0, 0x0f, q1);
244     ASSERT_EQUAL_128(0, 0x1211, q2);
245     ASSERT_EQUAL_128(0, 0x1312, q3);
246     ASSERT_EQUAL_128(0, 0x0f0e, q4);
247     ASSERT_EQUAL_128(0, 0x1312, q5);
248     ASSERT_EQUAL_128(0, 0x14131211, q16);
249     ASSERT_EQUAL_128(0, 0x17161514, q17);
250     ASSERT_EQUAL_128(0, 0x0f0e0d0c, q18);
251     ASSERT_EQUAL_128(0, 0x17161514, q19);
252     ASSERT_EQUAL_128(0, 0x1817161514131211, q20);
253     ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q21);
254     ASSERT_EQUAL_128(0, 0x0f0e0d0c0b0a0908, q22);
255     ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q23);
256     ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q24);
257     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q25);
258     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q26);
259     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q27);
260     ASSERT_EQUAL_128(0x2027262524232221, 0x2023222120212020, q6);
261     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q7);
262   }
263 }
264 
TEST(ldp_stp_quad)265 TEST(ldp_stp_quad) {
266   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
267 
268   uint64_t src[4] = {0x0123456789abcdef,
269                      0xaaaaaaaa55555555,
270                      0xfedcba9876543210,
271                      0x55555555aaaaaaaa};
272   uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
273   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
274   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
275 
276   START();
277   __ Mov(x16, src_base);
278   __ Mov(x17, dst_base);
279   __ Ldp(q31, q0, MemOperand(x16, 4 * sizeof(src[0]), PostIndex));
280   __ Stp(q0, q31, MemOperand(x17, 2 * sizeof(dst[1]), PreIndex));
281   END();
282 
283   if (CAN_RUN()) {
284     RUN();
285 
286     ASSERT_EQUAL_128(0xaaaaaaaa55555555, 0x0123456789abcdef, q31);
287     ASSERT_EQUAL_128(0x55555555aaaaaaaa, 0xfedcba9876543210, q0);
288     ASSERT_EQUAL_64(0, dst[0]);
289     ASSERT_EQUAL_64(0, dst[1]);
290     ASSERT_EQUAL_64(0xfedcba9876543210, dst[2]);
291     ASSERT_EQUAL_64(0x55555555aaaaaaaa, dst[3]);
292     ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]);
293     ASSERT_EQUAL_64(0xaaaaaaaa55555555, dst[5]);
294     ASSERT_EQUAL_64(src_base + 4 * sizeof(src[0]), x16);
295     ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[1]), x17);
296   }
297 }
298 
TEST(neon_ld1_d)299 TEST(neon_ld1_d) {
300   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
301 
302   uint8_t src[32 + 5];
303   for (unsigned i = 0; i < sizeof(src); i++) {
304     src[i] = i;
305   }
306   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
307 
308   START();
309   __ Mov(x17, src_base);
310   __ Ldr(q2, MemOperand(x17));  // Initialise top 64-bits of Q register.
311   __ Ld1(v2.V8B(), MemOperand(x17));
312   __ Add(x17, x17, 1);
313   __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x17));
314   __ Add(x17, x17, 1);
315   __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x17));
316   __ Add(x17, x17, 1);
317   __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
318   __ Add(x17, x17, 1);
319   __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
320   __ Add(x17, x17, 1);
321   __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(), MemOperand(x17));
322   END();
323 
324   if (CAN_RUN()) {
325     RUN();
326 
327     ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
328     ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
329     ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
330     ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
331     ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
332     ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
333     ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
334     ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
335     ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
336     ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
337     ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
338     ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
339     ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
340     ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
341     ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
342     ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
343     ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
344     ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
345   }
346 }
347 
348 
TEST(neon_ld1_d_postindex)349 TEST(neon_ld1_d_postindex) {
350   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
351 
352   uint8_t src[32 + 5];
353   for (unsigned i = 0; i < sizeof(src); i++) {
354     src[i] = i;
355   }
356   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
357 
358   START();
359   __ Mov(x17, src_base);
360   __ Mov(x18, src_base + 1);
361   __ Mov(x19, src_base + 2);
362   __ Mov(x20, src_base + 3);
363   __ Mov(x21, src_base + 4);
364   __ Mov(x22, src_base + 5);
365   __ Mov(x23, 1);
366   __ Ldr(q2, MemOperand(x17));  // Initialise top 64-bits of Q register.
367   __ Ld1(v2.V8B(), MemOperand(x17, x23, PostIndex));
368   __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x18, 16, PostIndex));
369   __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x19, 24, PostIndex));
370   __ Ld1(v16.V2S(),
371          v17.V2S(),
372          v18.V2S(),
373          v19.V2S(),
374          MemOperand(x20, 32, PostIndex));
375   __ Ld1(v30.V2S(),
376          v31.V2S(),
377          v0.V2S(),
378          v1.V2S(),
379          MemOperand(x21, 32, PostIndex));
380   __ Ld1(v20.V1D(),
381          v21.V1D(),
382          v22.V1D(),
383          v23.V1D(),
384          MemOperand(x22, 32, PostIndex));
385   END();
386 
387   if (CAN_RUN()) {
388     RUN();
389 
390     ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
391     ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
392     ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
393     ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
394     ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
395     ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
396     ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
397     ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
398     ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
399     ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
400     ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
401     ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
402     ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
403     ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
404     ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
405     ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
406     ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
407     ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
408     ASSERT_EQUAL_64(src_base + 1, x17);
409     ASSERT_EQUAL_64(src_base + 1 + 16, x18);
410     ASSERT_EQUAL_64(src_base + 2 + 24, x19);
411     ASSERT_EQUAL_64(src_base + 3 + 32, x20);
412     ASSERT_EQUAL_64(src_base + 4 + 32, x21);
413     ASSERT_EQUAL_64(src_base + 5 + 32, x22);
414   }
415 }
416 
417 
TEST(neon_ld1_q)418 TEST(neon_ld1_q) {
419   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
420 
421   uint8_t src[64 + 4];
422   for (unsigned i = 0; i < sizeof(src); i++) {
423     src[i] = i;
424   }
425   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
426 
427   START();
428   __ Mov(x17, src_base);
429   __ Ld1(v2.V16B(), MemOperand(x17));
430   __ Add(x17, x17, 1);
431   __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x17));
432   __ Add(x17, x17, 1);
433   __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x17));
434   __ Add(x17, x17, 1);
435   __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x17));
436   __ Add(x17, x17, 1);
437   __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
438   END();
439 
440   if (CAN_RUN()) {
441     RUN();
442 
443     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
444     ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
445     ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
446     ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
447     ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
448     ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
449     ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
450     ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
451     ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
452     ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
453     ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
454     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
455     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
456     ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
457   }
458 }
459 
460 
TEST(neon_ld1_q_postindex)461 TEST(neon_ld1_q_postindex) {
462   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
463 
464   uint8_t src[64 + 4];
465   for (unsigned i = 0; i < sizeof(src); i++) {
466     src[i] = i;
467   }
468   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
469 
470   START();
471   __ Mov(x17, src_base);
472   __ Mov(x18, src_base + 1);
473   __ Mov(x19, src_base + 2);
474   __ Mov(x20, src_base + 3);
475   __ Mov(x21, src_base + 4);
476   __ Mov(x22, 1);
477   __ Ld1(v2.V16B(), MemOperand(x17, x22, PostIndex));
478   __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x18, 32, PostIndex));
479   __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x19, 48, PostIndex));
480   __ Ld1(v16.V4S(),
481          v17.V4S(),
482          v18.V4S(),
483          v19.V4S(),
484          MemOperand(x20, 64, PostIndex));
485   __ Ld1(v30.V2D(),
486          v31.V2D(),
487          v0.V2D(),
488          v1.V2D(),
489          MemOperand(x21, 64, PostIndex));
490   END();
491 
492   if (CAN_RUN()) {
493     RUN();
494 
495     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
496     ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
497     ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
498     ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
499     ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
500     ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
501     ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
502     ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
503     ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
504     ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
505     ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
506     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
507     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
508     ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
509     ASSERT_EQUAL_64(src_base + 1, x17);
510     ASSERT_EQUAL_64(src_base + 1 + 32, x18);
511     ASSERT_EQUAL_64(src_base + 2 + 48, x19);
512     ASSERT_EQUAL_64(src_base + 3 + 64, x20);
513     ASSERT_EQUAL_64(src_base + 4 + 64, x21);
514   }
515 }
516 
517 
TEST(neon_ld1_lane)518 TEST(neon_ld1_lane) {
519   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
520 
521   uint8_t src[64];
522   for (unsigned i = 0; i < sizeof(src); i++) {
523     src[i] = i;
524   }
525   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
526 
527   START();
528 
529   // Test loading whole register by element.
530   __ Mov(x17, src_base);
531   for (int i = 15; i >= 0; i--) {
532     __ Ld1(v0.B(), i, MemOperand(x17));
533     __ Add(x17, x17, 1);
534   }
535 
536   __ Mov(x17, src_base);
537   for (int i = 7; i >= 0; i--) {
538     __ Ld1(v1.H(), i, MemOperand(x17));
539     __ Add(x17, x17, 1);
540   }
541 
542   __ Mov(x17, src_base);
543   for (int i = 3; i >= 0; i--) {
544     __ Ld1(v2.S(), i, MemOperand(x17));
545     __ Add(x17, x17, 1);
546   }
547 
548   __ Mov(x17, src_base);
549   for (int i = 1; i >= 0; i--) {
550     __ Ld1(v3.D(), i, MemOperand(x17));
551     __ Add(x17, x17, 1);
552   }
553 
554   // Test loading a single element into an initialised register.
555   __ Mov(x17, src_base);
556   __ Ldr(q4, MemOperand(x17));
557   __ Ld1(v4.B(), 4, MemOperand(x17));
558   __ Ldr(q5, MemOperand(x17));
559   __ Ld1(v5.H(), 3, MemOperand(x17));
560   __ Ldr(q6, MemOperand(x17));
561   __ Ld1(v6.S(), 2, MemOperand(x17));
562   __ Ldr(q7, MemOperand(x17));
563   __ Ld1(v7.D(), 1, MemOperand(x17));
564 
565   END();
566 
567   if (CAN_RUN()) {
568     RUN();
569 
570     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
571     ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q1);
572     ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q2);
573     ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q3);
574     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
575     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
576     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
577     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
578   }
579 }
580 
TEST(neon_ld2_d)581 TEST(neon_ld2_d) {
582   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
583 
584   uint8_t src[64 + 4];
585   for (unsigned i = 0; i < sizeof(src); i++) {
586     src[i] = i;
587   }
588   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
589 
590   START();
591   __ Mov(x17, src_base);
592   __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17));
593   __ Add(x17, x17, 1);
594   __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x17));
595   __ Add(x17, x17, 1);
596   __ Ld2(v6.V4H(), v7.V4H(), MemOperand(x17));
597   __ Add(x17, x17, 1);
598   __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x17));
599   END();
600 
601   if (CAN_RUN()) {
602     RUN();
603 
604     ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
605     ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
606     ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
607     ASSERT_EQUAL_128(0, 0x100e0c0a08060402, q5);
608     ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q6);
609     ASSERT_EQUAL_128(0, 0x11100d0c09080504, q7);
610     ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q31);
611     ASSERT_EQUAL_128(0, 0x1211100f0a090807, q0);
612   }
613 }
614 
TEST(neon_ld2_d_postindex)615 TEST(neon_ld2_d_postindex) {
616   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
617 
618   uint8_t src[32 + 4];
619   for (unsigned i = 0; i < sizeof(src); i++) {
620     src[i] = i;
621   }
622   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
623 
624   START();
625   __ Mov(x17, src_base);
626   __ Mov(x18, src_base + 1);
627   __ Mov(x19, src_base + 2);
628   __ Mov(x20, src_base + 3);
629   __ Mov(x21, src_base + 4);
630   __ Mov(x22, 1);
631   __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17, x22, PostIndex));
632   __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x18, 16, PostIndex));
633   __ Ld2(v5.V4H(), v6.V4H(), MemOperand(x19, 16, PostIndex));
634   __ Ld2(v16.V2S(), v17.V2S(), MemOperand(x20, 16, PostIndex));
635   __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x21, 16, PostIndex));
636   END();
637 
638   if (CAN_RUN()) {
639     RUN();
640 
641     ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
642     ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
643     ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
644     ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q5);
645     ASSERT_EQUAL_128(0, 0x11100d0c09080504, q6);
646     ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q16);
647     ASSERT_EQUAL_128(0, 0x1211100f0a090807, q17);
648     ASSERT_EQUAL_128(0, 0x0f0e0d0c07060504, q31);
649     ASSERT_EQUAL_128(0, 0x131211100b0a0908, q0);
650 
651     ASSERT_EQUAL_64(src_base + 1, x17);
652     ASSERT_EQUAL_64(src_base + 1 + 16, x18);
653     ASSERT_EQUAL_64(src_base + 2 + 16, x19);
654     ASSERT_EQUAL_64(src_base + 3 + 16, x20);
655     ASSERT_EQUAL_64(src_base + 4 + 16, x21);
656   }
657 }
658 
659 
TEST(neon_ld2_q)660 TEST(neon_ld2_q) {
661   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
662 
663   uint8_t src[64 + 4];
664   for (unsigned i = 0; i < sizeof(src); i++) {
665     src[i] = i;
666   }
667   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
668 
669   START();
670   __ Mov(x17, src_base);
671   __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17));
672   __ Add(x17, x17, 1);
673   __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x17));
674   __ Add(x17, x17, 1);
675   __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x17));
676   __ Add(x17, x17, 1);
677   __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x17));
678   __ Add(x17, x17, 1);
679   __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x17));
680   END();
681 
682   if (CAN_RUN()) {
683     RUN();
684 
685     ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
686     ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
687     ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
688     ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
689     ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
690     ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
691     ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
692     ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
693     ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
694     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
695   }
696 }
697 
698 
TEST(neon_ld2_q_postindex)699 TEST(neon_ld2_q_postindex) {
700   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
701 
702   uint8_t src[64 + 4];
703   for (unsigned i = 0; i < sizeof(src); i++) {
704     src[i] = i;
705   }
706   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
707 
708   START();
709   __ Mov(x17, src_base);
710   __ Mov(x18, src_base + 1);
711   __ Mov(x19, src_base + 2);
712   __ Mov(x20, src_base + 3);
713   __ Mov(x21, src_base + 4);
714   __ Mov(x22, 1);
715   __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17, x22, PostIndex));
716   __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x18, 32, PostIndex));
717   __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x19, 32, PostIndex));
718   __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x20, 32, PostIndex));
719   __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x21, 32, PostIndex));
720   END();
721 
722   if (CAN_RUN()) {
723     RUN();
724 
725     ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
726     ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
727     ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
728     ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
729     ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
730     ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
731     ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
732     ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
733     ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
734     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
735 
736 
737     ASSERT_EQUAL_64(src_base + 1, x17);
738     ASSERT_EQUAL_64(src_base + 1 + 32, x18);
739     ASSERT_EQUAL_64(src_base + 2 + 32, x19);
740     ASSERT_EQUAL_64(src_base + 3 + 32, x20);
741     ASSERT_EQUAL_64(src_base + 4 + 32, x21);
742   }
743 }
744 
745 
TEST(neon_ld2_lane)746 TEST(neon_ld2_lane) {
747   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
748 
749   uint8_t src[64];
750   for (unsigned i = 0; i < sizeof(src); i++) {
751     src[i] = i;
752   }
753   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
754 
755   START();
756 
757   // Test loading whole register by element.
758   __ Mov(x17, src_base);
759   for (int i = 15; i >= 0; i--) {
760     __ Ld2(v0.B(), v1.B(), i, MemOperand(x17));
761     __ Add(x17, x17, 1);
762   }
763 
764   __ Mov(x17, src_base);
765   for (int i = 7; i >= 0; i--) {
766     __ Ld2(v2.H(), v3.H(), i, MemOperand(x17));
767     __ Add(x17, x17, 1);
768   }
769 
770   __ Mov(x17, src_base);
771   for (int i = 3; i >= 0; i--) {
772     __ Ld2(v4.S(), v5.S(), i, MemOperand(x17));
773     __ Add(x17, x17, 1);
774   }
775 
776   __ Mov(x17, src_base);
777   for (int i = 1; i >= 0; i--) {
778     __ Ld2(v6.D(), v7.D(), i, MemOperand(x17));
779     __ Add(x17, x17, 1);
780   }
781 
782   // Test loading a single element into an initialised register.
783   __ Mov(x17, src_base);
784   __ Mov(x4, x17);
785   __ Ldr(q8, MemOperand(x4, 16, PostIndex));
786   __ Ldr(q9, MemOperand(x4));
787   __ Ld2(v8.B(), v9.B(), 4, MemOperand(x17));
788   __ Mov(x5, x17);
789   __ Ldr(q10, MemOperand(x5, 16, PostIndex));
790   __ Ldr(q11, MemOperand(x5));
791   __ Ld2(v10.H(), v11.H(), 3, MemOperand(x17));
792   __ Mov(x6, x17);
793   __ Ldr(q12, MemOperand(x6, 16, PostIndex));
794   __ Ldr(q13, MemOperand(x6));
795   __ Ld2(v12.S(), v13.S(), 2, MemOperand(x17));
796   __ Mov(x7, x17);
797   __ Ldr(q14, MemOperand(x7, 16, PostIndex));
798   __ Ldr(q15, MemOperand(x7));
799   __ Ld2(v14.D(), v15.D(), 1, MemOperand(x17));
800 
801   END();
802 
803   if (CAN_RUN()) {
804     RUN();
805 
806     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
807     ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
808     ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q2);
809     ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q3);
810     ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q4);
811     ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q5);
812     ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q6);
813     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q7);
814     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
815     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
816     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
817     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
818     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
819     ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
820     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
821     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
822   }
823 }
824 
825 
TEST(neon_ld2_lane_postindex)826 TEST(neon_ld2_lane_postindex) {
827   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
828 
829   uint8_t src[64];
830   for (unsigned i = 0; i < sizeof(src); i++) {
831     src[i] = i;
832   }
833   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
834 
835   START();
836   __ Mov(x17, src_base);
837   __ Mov(x18, src_base);
838   __ Mov(x19, src_base);
839   __ Mov(x20, src_base);
840   __ Mov(x21, src_base);
841   __ Mov(x22, src_base);
842   __ Mov(x23, src_base);
843   __ Mov(x24, src_base);
844 
845   // Test loading whole register by element.
846   for (int i = 15; i >= 0; i--) {
847     __ Ld2(v0.B(), v1.B(), i, MemOperand(x17, 2, PostIndex));
848   }
849 
850   for (int i = 7; i >= 0; i--) {
851     __ Ld2(v2.H(), v3.H(), i, MemOperand(x18, 4, PostIndex));
852   }
853 
854   for (int i = 3; i >= 0; i--) {
855     __ Ld2(v4.S(), v5.S(), i, MemOperand(x19, 8, PostIndex));
856   }
857 
858   for (int i = 1; i >= 0; i--) {
859     __ Ld2(v6.D(), v7.D(), i, MemOperand(x20, 16, PostIndex));
860   }
861 
862   // Test loading a single element into an initialised register.
863   __ Mov(x25, 1);
864   __ Mov(x4, x21);
865   __ Ldr(q8, MemOperand(x4, 16, PostIndex));
866   __ Ldr(q9, MemOperand(x4));
867   __ Ld2(v8.B(), v9.B(), 4, MemOperand(x21, x25, PostIndex));
868   __ Add(x25, x25, 1);
869 
870   __ Mov(x5, x22);
871   __ Ldr(q10, MemOperand(x5, 16, PostIndex));
872   __ Ldr(q11, MemOperand(x5));
873   __ Ld2(v10.H(), v11.H(), 3, MemOperand(x22, x25, PostIndex));
874   __ Add(x25, x25, 1);
875 
876   __ Mov(x6, x23);
877   __ Ldr(q12, MemOperand(x6, 16, PostIndex));
878   __ Ldr(q13, MemOperand(x6));
879   __ Ld2(v12.S(), v13.S(), 2, MemOperand(x23, x25, PostIndex));
880   __ Add(x25, x25, 1);
881 
882   __ Mov(x7, x24);
883   __ Ldr(q14, MemOperand(x7, 16, PostIndex));
884   __ Ldr(q15, MemOperand(x7));
885   __ Ld2(v14.D(), v15.D(), 1, MemOperand(x24, x25, PostIndex));
886 
887   END();
888 
889   if (CAN_RUN()) {
890     RUN();
891 
892     ASSERT_EQUAL_128(0x00020406080a0c0e, 0x10121416181a1c1e, q0);
893     ASSERT_EQUAL_128(0x01030507090b0d0f, 0x11131517191b1d1f, q1);
894     ASSERT_EQUAL_128(0x0100050409080d0c, 0x1110151419181d1c, q2);
895     ASSERT_EQUAL_128(0x030207060b0a0f0e, 0x131217161b1a1f1e, q3);
896     ASSERT_EQUAL_128(0x030201000b0a0908, 0x131211101b1a1918, q4);
897     ASSERT_EQUAL_128(0x070605040f0e0d0c, 0x171615141f1e1d1c, q5);
898     ASSERT_EQUAL_128(0x0706050403020100, 0x1716151413121110, q6);
899     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1f1e1d1c1b1a1918, q7);
900     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
901     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
902     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
903     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
904     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
905     ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
906     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
907     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
908 
909 
910     ASSERT_EQUAL_64(src_base + 32, x17);
911     ASSERT_EQUAL_64(src_base + 32, x18);
912     ASSERT_EQUAL_64(src_base + 32, x19);
913     ASSERT_EQUAL_64(src_base + 32, x20);
914     ASSERT_EQUAL_64(src_base + 1, x21);
915     ASSERT_EQUAL_64(src_base + 2, x22);
916     ASSERT_EQUAL_64(src_base + 3, x23);
917     ASSERT_EQUAL_64(src_base + 4, x24);
918   }
919 }
920 
921 
TEST(neon_ld2_alllanes)922 TEST(neon_ld2_alllanes) {
923   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
924 
925   uint8_t src[64];
926   for (unsigned i = 0; i < sizeof(src); i++) {
927     src[i] = i;
928   }
929   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
930 
931   START();
932   __ Mov(x17, src_base + 1);
933   __ Mov(x18, 1);
934   __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17));
935   __ Add(x17, x17, 2);
936   __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17));
937   __ Add(x17, x17, 1);
938   __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17));
939   __ Add(x17, x17, 1);
940   __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17));
941   __ Add(x17, x17, 4);
942   __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17));
943   __ Add(x17, x17, 1);
944   __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17));
945   __ Add(x17, x17, 8);
946   __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17));
947   END();
948 
949   if (CAN_RUN()) {
950     RUN();
951 
952     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
953     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
954     ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
955     ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
956     ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
957     ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
958     ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
959     ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
960     ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
961     ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
962     ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
963     ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
964     ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
965     ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
966   }
967 }
968 
969 
TEST(neon_ld2_alllanes_postindex)970 TEST(neon_ld2_alllanes_postindex) {
971   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
972 
973   uint8_t src[64];
974   for (unsigned i = 0; i < sizeof(src); i++) {
975     src[i] = i;
976   }
977   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
978 
979   START();
980   __ Mov(x17, src_base + 1);
981   __ Mov(x18, 1);
982   __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17, 2, PostIndex));
983   __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17, x18, PostIndex));
984   __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17, x18, PostIndex));
985   __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17, 4, PostIndex));
986   __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17, x18, PostIndex));
987   __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17, 8, PostIndex));
988   __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17, 16, PostIndex));
989   END();
990 
991   if (CAN_RUN()) {
992     RUN();
993 
994     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
995     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
996     ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
997     ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
998     ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
999     ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
1000     ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
1001     ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
1002     ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
1003     ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
1004     ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
1005     ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
1006     ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
1007     ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
1008     ASSERT_EQUAL_64(src_base + 34, x17);
1009   }
1010 }
1011 
1012 
TEST(neon_ld3_d)1013 TEST(neon_ld3_d) {
1014   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1015 
1016   uint8_t src[64 + 4];
1017   for (unsigned i = 0; i < sizeof(src); i++) {
1018     src[i] = i;
1019   }
1020   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1021 
1022   START();
1023   __ Mov(x17, src_base);
1024   __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17));
1025   __ Add(x17, x17, 1);
1026   __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x17));
1027   __ Add(x17, x17, 1);
1028   __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x17));
1029   __ Add(x17, x17, 1);
1030   __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
1031   END();
1032 
1033   if (CAN_RUN()) {
1034     RUN();
1035 
1036     ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
1037     ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
1038     ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
1039     ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
1040     ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
1041     ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
1042     ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
1043     ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
1044     ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
1045     ASSERT_EQUAL_128(0, 0x1211100f06050403, q31);
1046     ASSERT_EQUAL_128(0, 0x161514130a090807, q0);
1047     ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q1);
1048   }
1049 }
1050 
1051 
TEST(neon_ld3_d_postindex)1052 TEST(neon_ld3_d_postindex) {
1053   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1054 
1055   uint8_t src[32 + 4];
1056   for (unsigned i = 0; i < sizeof(src); i++) {
1057     src[i] = i;
1058   }
1059   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1060 
1061   START();
1062   __ Mov(x17, src_base);
1063   __ Mov(x18, src_base + 1);
1064   __ Mov(x19, src_base + 2);
1065   __ Mov(x20, src_base + 3);
1066   __ Mov(x21, src_base + 4);
1067   __ Mov(x22, 1);
1068   __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17, x22, PostIndex));
1069   __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x18, 24, PostIndex));
1070   __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x19, 24, PostIndex));
1071   __ Ld3(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x20, 24, PostIndex));
1072   __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x21, 24, PostIndex));
1073   END();
1074 
1075   if (CAN_RUN()) {
1076     RUN();
1077 
1078     ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
1079     ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
1080     ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
1081     ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
1082     ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
1083     ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
1084     ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
1085     ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
1086     ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
1087     ASSERT_EQUAL_128(0, 0x1211100f06050403, q11);
1088     ASSERT_EQUAL_128(0, 0x161514130a090807, q12);
1089     ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q13);
1090     ASSERT_EQUAL_128(0, 0x1312111007060504, q31);
1091     ASSERT_EQUAL_128(0, 0x171615140b0a0908, q0);
1092     ASSERT_EQUAL_128(0, 0x1b1a19180f0e0d0c, q1);
1093 
1094     ASSERT_EQUAL_64(src_base + 1, x17);
1095     ASSERT_EQUAL_64(src_base + 1 + 24, x18);
1096     ASSERT_EQUAL_64(src_base + 2 + 24, x19);
1097     ASSERT_EQUAL_64(src_base + 3 + 24, x20);
1098     ASSERT_EQUAL_64(src_base + 4 + 24, x21);
1099   }
1100 }
1101 
1102 
TEST(neon_ld3_q)1103 TEST(neon_ld3_q) {
1104   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1105 
1106   uint8_t src[64 + 4];
1107   for (unsigned i = 0; i < sizeof(src); i++) {
1108     src[i] = i;
1109   }
1110   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1111 
1112   START();
1113   __ Mov(x17, src_base);
1114   __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17));
1115   __ Add(x17, x17, 1);
1116   __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
1117   __ Add(x17, x17, 1);
1118   __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x17));
1119   __ Add(x17, x17, 1);
1120   __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x17));
1121   __ Add(x17, x17, 1);
1122   __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
1123   END();
1124 
1125   if (CAN_RUN()) {
1126     RUN();
1127 
1128     ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
1129     ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
1130     ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
1131     ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
1132     ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
1133     ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
1134     ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
1135     ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
1136     ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
1137     ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
1138     ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
1139     ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
1140     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
1141     ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
1142     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
1143   }
1144 }
1145 
1146 
TEST(neon_ld3_q_postindex)1147 TEST(neon_ld3_q_postindex) {
1148   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1149 
1150   uint8_t src[64 + 4];
1151   for (unsigned i = 0; i < sizeof(src); i++) {
1152     src[i] = i;
1153   }
1154   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1155 
1156   START();
1157   __ Mov(x17, src_base);
1158   __ Mov(x18, src_base + 1);
1159   __ Mov(x19, src_base + 2);
1160   __ Mov(x20, src_base + 3);
1161   __ Mov(x21, src_base + 4);
1162   __ Mov(x22, 1);
1163 
1164   __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17, x22, PostIndex));
1165   __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x18, 48, PostIndex));
1166   __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x19, 48, PostIndex));
1167   __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x20, 48, PostIndex));
1168   __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x21, 48, PostIndex));
1169   END();
1170 
1171   if (CAN_RUN()) {
1172     RUN();
1173 
1174     ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
1175     ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
1176     ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
1177     ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
1178     ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
1179     ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
1180     ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
1181     ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
1182     ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
1183     ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
1184     ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
1185     ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
1186     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
1187     ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
1188     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
1189 
1190     ASSERT_EQUAL_64(src_base + 1, x17);
1191     ASSERT_EQUAL_64(src_base + 1 + 48, x18);
1192     ASSERT_EQUAL_64(src_base + 2 + 48, x19);
1193     ASSERT_EQUAL_64(src_base + 3 + 48, x20);
1194     ASSERT_EQUAL_64(src_base + 4 + 48, x21);
1195   }
1196 }
1197 
1198 
TEST(neon_ld3_lane)1199 TEST(neon_ld3_lane) {
1200   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1201 
1202   uint8_t src[64];
1203   for (unsigned i = 0; i < sizeof(src); i++) {
1204     src[i] = i;
1205   }
1206   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1207 
1208   START();
1209 
1210   // Test loading whole register by element.
1211   __ Mov(x17, src_base);
1212   for (int i = 15; i >= 0; i--) {
1213     __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17));
1214     __ Add(x17, x17, 1);
1215   }
1216 
1217   __ Mov(x17, src_base);
1218   for (int i = 7; i >= 0; i--) {
1219     __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x17));
1220     __ Add(x17, x17, 1);
1221   }
1222 
1223   __ Mov(x17, src_base);
1224   for (int i = 3; i >= 0; i--) {
1225     __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x17));
1226     __ Add(x17, x17, 1);
1227   }
1228 
1229   __ Mov(x17, src_base);
1230   for (int i = 1; i >= 0; i--) {
1231     __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x17));
1232     __ Add(x17, x17, 1);
1233   }
1234 
1235   // Test loading a single element into an initialised register.
1236   __ Mov(x17, src_base);
1237   __ Mov(x4, x17);
1238   __ Ldr(q12, MemOperand(x4, 16, PostIndex));
1239   __ Ldr(q13, MemOperand(x4, 16, PostIndex));
1240   __ Ldr(q14, MemOperand(x4));
1241   __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x17));
1242   __ Mov(x5, x17);
1243   __ Ldr(q15, MemOperand(x5, 16, PostIndex));
1244   __ Ldr(q16, MemOperand(x5, 16, PostIndex));
1245   __ Ldr(q17, MemOperand(x5));
1246   __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x17));
1247   __ Mov(x6, x17);
1248   __ Ldr(q18, MemOperand(x6, 16, PostIndex));
1249   __ Ldr(q19, MemOperand(x6, 16, PostIndex));
1250   __ Ldr(q20, MemOperand(x6));
1251   __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x17));
1252   __ Mov(x7, x17);
1253   __ Ldr(q21, MemOperand(x7, 16, PostIndex));
1254   __ Ldr(q22, MemOperand(x7, 16, PostIndex));
1255   __ Ldr(q23, MemOperand(x7));
1256   __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x17));
1257 
1258   END();
1259 
1260   if (CAN_RUN()) {
1261     RUN();
1262 
1263     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
1264     ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
1265     ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
1266     ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q3);
1267     ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q4);
1268     ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q5);
1269     ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q6);
1270     ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q7);
1271     ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q8);
1272     ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q9);
1273     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q10);
1274     ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q11);
1275     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
1276     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
1277     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
1278     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
1279     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
1280     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
1281   }
1282 }
1283 
1284 
TEST(neon_ld3_lane_postindex)1285 TEST(neon_ld3_lane_postindex) {
1286   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1287 
1288   uint8_t src[64];
1289   for (unsigned i = 0; i < sizeof(src); i++) {
1290     src[i] = i;
1291   }
1292   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1293 
1294   START();
1295 
1296   // Test loading whole register by element.
1297   __ Mov(x17, src_base);
1298   __ Mov(x18, src_base);
1299   __ Mov(x19, src_base);
1300   __ Mov(x20, src_base);
1301   __ Mov(x21, src_base);
1302   __ Mov(x22, src_base);
1303   __ Mov(x23, src_base);
1304   __ Mov(x24, src_base);
1305   for (int i = 15; i >= 0; i--) {
1306     __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17, 3, PostIndex));
1307   }
1308 
1309   for (int i = 7; i >= 0; i--) {
1310     __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x18, 6, PostIndex));
1311   }
1312 
1313   for (int i = 3; i >= 0; i--) {
1314     __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x19, 12, PostIndex));
1315   }
1316 
1317   for (int i = 1; i >= 0; i--) {
1318     __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x20, 24, PostIndex));
1319   }
1320 
1321 
1322   // Test loading a single element into an initialised register.
1323   __ Mov(x25, 1);
1324   __ Mov(x4, x21);
1325   __ Ldr(q12, MemOperand(x4, 16, PostIndex));
1326   __ Ldr(q13, MemOperand(x4, 16, PostIndex));
1327   __ Ldr(q14, MemOperand(x4));
1328   __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x21, x25, PostIndex));
1329   __ Add(x25, x25, 1);
1330 
1331   __ Mov(x5, x22);
1332   __ Ldr(q15, MemOperand(x5, 16, PostIndex));
1333   __ Ldr(q16, MemOperand(x5, 16, PostIndex));
1334   __ Ldr(q17, MemOperand(x5));
1335   __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x22, x25, PostIndex));
1336   __ Add(x25, x25, 1);
1337 
1338   __ Mov(x6, x23);
1339   __ Ldr(q18, MemOperand(x6, 16, PostIndex));
1340   __ Ldr(q19, MemOperand(x6, 16, PostIndex));
1341   __ Ldr(q20, MemOperand(x6));
1342   __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x23, x25, PostIndex));
1343   __ Add(x25, x25, 1);
1344 
1345   __ Mov(x7, x24);
1346   __ Ldr(q21, MemOperand(x7, 16, PostIndex));
1347   __ Ldr(q22, MemOperand(x7, 16, PostIndex));
1348   __ Ldr(q23, MemOperand(x7));
1349   __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x24, x25, PostIndex));
1350 
1351   END();
1352 
1353   if (CAN_RUN()) {
1354     RUN();
1355 
1356     ASSERT_EQUAL_128(0x000306090c0f1215, 0x181b1e2124272a2d, q0);
1357     ASSERT_EQUAL_128(0x0104070a0d101316, 0x191c1f2225282b2e, q1);
1358     ASSERT_EQUAL_128(0x0205080b0e111417, 0x1a1d202326292c2f, q2);
1359     ASSERT_EQUAL_128(0x010007060d0c1312, 0x19181f1e25242b2a, q3);
1360     ASSERT_EQUAL_128(0x030209080f0e1514, 0x1b1a212027262d2c, q4);
1361     ASSERT_EQUAL_128(0x05040b0a11101716, 0x1d1c232229282f2e, q5);
1362     ASSERT_EQUAL_128(0x030201000f0e0d0c, 0x1b1a191827262524, q6);
1363     ASSERT_EQUAL_128(0x0706050413121110, 0x1f1e1d1c2b2a2928, q7);
1364     ASSERT_EQUAL_128(0x0b0a090817161514, 0x232221202f2e2d2c, q8);
1365     ASSERT_EQUAL_128(0x0706050403020100, 0x1f1e1d1c1b1a1918, q9);
1366     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2726252423222120, q10);
1367     ASSERT_EQUAL_128(0x1716151413121110, 0x2f2e2d2c2b2a2928, q11);
1368     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
1369     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
1370     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
1371     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
1372     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
1373     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
1374     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q18);
1375     ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q19);
1376     ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q20);
1377     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q21);
1378     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q22);
1379     ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q23);
1380 
1381     ASSERT_EQUAL_64(src_base + 48, x17);
1382     ASSERT_EQUAL_64(src_base + 48, x18);
1383     ASSERT_EQUAL_64(src_base + 48, x19);
1384     ASSERT_EQUAL_64(src_base + 48, x20);
1385     ASSERT_EQUAL_64(src_base + 1, x21);
1386     ASSERT_EQUAL_64(src_base + 2, x22);
1387     ASSERT_EQUAL_64(src_base + 3, x23);
1388     ASSERT_EQUAL_64(src_base + 4, x24);
1389   }
1390 }
1391 
1392 
TEST(neon_ld3_alllanes)1393 TEST(neon_ld3_alllanes) {
1394   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1395 
1396   uint8_t src[64];
1397   for (unsigned i = 0; i < sizeof(src); i++) {
1398     src[i] = i;
1399   }
1400   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1401 
1402   START();
1403   __ Mov(x17, src_base + 1);
1404   __ Mov(x18, 1);
1405   __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17));
1406   __ Add(x17, x17, 3);
1407   __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
1408   __ Add(x17, x17, 1);
1409   __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17));
1410   __ Add(x17, x17, 1);
1411   __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17));
1412   __ Add(x17, x17, 6);
1413   __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17));
1414   __ Add(x17, x17, 1);
1415   __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
1416   __ Add(x17, x17, 12);
1417   __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17));
1418   END();
1419 
1420   if (CAN_RUN()) {
1421     RUN();
1422 
1423     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
1424     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
1425     ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
1426     ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
1427     ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
1428     ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
1429     ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
1430     ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
1431     ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
1432     ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
1433     ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
1434     ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
1435     ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
1436     ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
1437     ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
1438     ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
1439     ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
1440     ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
1441     ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
1442     ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
1443     ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
1444   }
1445 }
1446 
1447 
TEST(neon_ld3_alllanes_postindex)1448 TEST(neon_ld3_alllanes_postindex) {
1449   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1450 
1451   uint8_t src[64];
1452   for (unsigned i = 0; i < sizeof(src); i++) {
1453     src[i] = i;
1454   }
1455   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1456   __ Mov(x17, src_base + 1);
1457   __ Mov(x18, 1);
1458 
1459   START();
1460   __ Mov(x17, src_base + 1);
1461   __ Mov(x18, 1);
1462   __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17, 3, PostIndex));
1463   __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17, x18, PostIndex));
1464   __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17, x18, PostIndex));
1465   __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17, 6, PostIndex));
1466   __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17, x18, PostIndex));
1467   __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17, 12, PostIndex));
1468   __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17, 24, PostIndex));
1469   END();
1470 
1471   if (CAN_RUN()) {
1472     RUN();
1473 
1474     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
1475     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
1476     ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
1477     ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
1478     ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
1479     ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
1480     ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
1481     ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
1482     ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
1483     ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
1484     ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
1485     ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
1486     ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
1487     ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
1488     ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
1489     ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
1490     ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
1491     ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
1492     ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
1493     ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
1494     ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
1495   }
1496 }
1497 
1498 
TEST(neon_ld4_d)1499 TEST(neon_ld4_d) {
1500   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1501 
1502   uint8_t src[64 + 4];
1503   for (unsigned i = 0; i < sizeof(src); i++) {
1504     src[i] = i;
1505   }
1506   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1507 
1508   START();
1509   __ Mov(x17, src_base);
1510   __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(), MemOperand(x17));
1511   __ Add(x17, x17, 1);
1512   __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(), MemOperand(x17));
1513   __ Add(x17, x17, 1);
1514   __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x17));
1515   __ Add(x17, x17, 1);
1516   __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
1517   END();
1518 
1519   if (CAN_RUN()) {
1520     RUN();
1521 
1522     ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
1523     ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
1524     ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
1525     ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
1526     ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
1527     ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
1528     ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
1529     ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
1530     ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
1531     ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
1532     ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
1533     ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
1534     ASSERT_EQUAL_128(0, 0x1615141306050403, q30);
1535     ASSERT_EQUAL_128(0, 0x1a1918170a090807, q31);
1536     ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q0);
1537     ASSERT_EQUAL_128(0, 0x2221201f1211100f, q1);
1538   }
1539 }
1540 
1541 
TEST(neon_ld4_d_postindex)1542 TEST(neon_ld4_d_postindex) {
1543   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1544 
1545   uint8_t src[32 + 4];
1546   for (unsigned i = 0; i < sizeof(src); i++) {
1547     src[i] = i;
1548   }
1549   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1550 
1551   START();
1552   __ Mov(x17, src_base);
1553   __ Mov(x18, src_base + 1);
1554   __ Mov(x19, src_base + 2);
1555   __ Mov(x20, src_base + 3);
1556   __ Mov(x21, src_base + 4);
1557   __ Mov(x22, 1);
1558   __ Ld4(v2.V8B(),
1559          v3.V8B(),
1560          v4.V8B(),
1561          v5.V8B(),
1562          MemOperand(x17, x22, PostIndex));
1563   __ Ld4(v6.V8B(),
1564          v7.V8B(),
1565          v8.V8B(),
1566          v9.V8B(),
1567          MemOperand(x18, 32, PostIndex));
1568   __ Ld4(v10.V4H(),
1569          v11.V4H(),
1570          v12.V4H(),
1571          v13.V4H(),
1572          MemOperand(x19, 32, PostIndex));
1573   __ Ld4(v14.V2S(),
1574          v15.V2S(),
1575          v16.V2S(),
1576          v17.V2S(),
1577          MemOperand(x20, 32, PostIndex));
1578   __ Ld4(v30.V2S(),
1579          v31.V2S(),
1580          v0.V2S(),
1581          v1.V2S(),
1582          MemOperand(x21, 32, PostIndex));
1583   END();
1584 
1585   if (CAN_RUN()) {
1586     RUN();
1587 
1588     ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
1589     ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
1590     ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
1591     ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
1592     ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
1593     ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
1594     ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
1595     ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
1596     ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
1597     ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
1598     ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
1599     ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
1600     ASSERT_EQUAL_128(0, 0x1615141306050403, q14);
1601     ASSERT_EQUAL_128(0, 0x1a1918170a090807, q15);
1602     ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q16);
1603     ASSERT_EQUAL_128(0, 0x2221201f1211100f, q17);
1604     ASSERT_EQUAL_128(0, 0x1716151407060504, q30);
1605     ASSERT_EQUAL_128(0, 0x1b1a19180b0a0908, q31);
1606     ASSERT_EQUAL_128(0, 0x1f1e1d1c0f0e0d0c, q0);
1607     ASSERT_EQUAL_128(0, 0x2322212013121110, q1);
1608 
1609 
1610     ASSERT_EQUAL_64(src_base + 1, x17);
1611     ASSERT_EQUAL_64(src_base + 1 + 32, x18);
1612     ASSERT_EQUAL_64(src_base + 2 + 32, x19);
1613     ASSERT_EQUAL_64(src_base + 3 + 32, x20);
1614     ASSERT_EQUAL_64(src_base + 4 + 32, x21);
1615   }
1616 }
1617 
1618 
TEST(neon_ld4_q)1619 TEST(neon_ld4_q) {
1620   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1621 
1622   uint8_t src[64 + 4];
1623   for (unsigned i = 0; i < sizeof(src); i++) {
1624     src[i] = i;
1625   }
1626   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1627 
1628   START();
1629   __ Mov(x17, src_base);
1630   __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
1631   __ Add(x17, x17, 1);
1632   __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(), MemOperand(x17));
1633   __ Add(x17, x17, 1);
1634   __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), MemOperand(x17));
1635   __ Add(x17, x17, 1);
1636   __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
1637   __ Add(x17, x17, 1);
1638   __ Ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x17));
1639   END();
1640 
1641   if (CAN_RUN()) {
1642     RUN();
1643 
1644     ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
1645     ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
1646     ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
1647     ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
1648     ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
1649     ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
1650     ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
1651     ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
1652     ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
1653     ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
1654     ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
1655     ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
1656     ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
1657     ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
1658     ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
1659     ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
1660     ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q18);
1661     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q19);
1662     ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q20);
1663     ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q21);
1664   }
1665 }
1666 
1667 
TEST(neon_ld4_q_postindex)1668 TEST(neon_ld4_q_postindex) {
1669   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1670 
1671   uint8_t src[64 + 4];
1672   for (unsigned i = 0; i < sizeof(src); i++) {
1673     src[i] = i;
1674   }
1675   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1676 
1677   START();
1678   __ Mov(x17, src_base);
1679   __ Mov(x18, src_base + 1);
1680   __ Mov(x19, src_base + 2);
1681   __ Mov(x20, src_base + 3);
1682   __ Mov(x21, src_base + 4);
1683   __ Mov(x22, 1);
1684 
1685   __ Ld4(v2.V16B(),
1686          v3.V16B(),
1687          v4.V16B(),
1688          v5.V16B(),
1689          MemOperand(x17, x22, PostIndex));
1690   __ Ld4(v6.V16B(),
1691          v7.V16B(),
1692          v8.V16B(),
1693          v9.V16B(),
1694          MemOperand(x18, 64, PostIndex));
1695   __ Ld4(v10.V8H(),
1696          v11.V8H(),
1697          v12.V8H(),
1698          v13.V8H(),
1699          MemOperand(x19, 64, PostIndex));
1700   __ Ld4(v14.V4S(),
1701          v15.V4S(),
1702          v16.V4S(),
1703          v17.V4S(),
1704          MemOperand(x20, 64, PostIndex));
1705   __ Ld4(v30.V2D(),
1706          v31.V2D(),
1707          v0.V2D(),
1708          v1.V2D(),
1709          MemOperand(x21, 64, PostIndex));
1710   END();
1711 
1712   if (CAN_RUN()) {
1713     RUN();
1714 
1715     ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
1716     ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
1717     ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
1718     ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
1719     ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
1720     ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
1721     ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
1722     ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
1723     ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
1724     ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
1725     ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
1726     ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
1727     ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
1728     ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
1729     ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
1730     ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
1731     ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q30);
1732     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q31);
1733     ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q0);
1734     ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q1);
1735 
1736 
1737     ASSERT_EQUAL_64(src_base + 1, x17);
1738     ASSERT_EQUAL_64(src_base + 1 + 64, x18);
1739     ASSERT_EQUAL_64(src_base + 2 + 64, x19);
1740     ASSERT_EQUAL_64(src_base + 3 + 64, x20);
1741     ASSERT_EQUAL_64(src_base + 4 + 64, x21);
1742   }
1743 }
1744 
1745 
TEST(neon_ld4_lane)1746 TEST(neon_ld4_lane) {
1747   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1748 
1749   uint8_t src[64];
1750   for (unsigned i = 0; i < sizeof(src); i++) {
1751     src[i] = i;
1752   }
1753   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1754 
1755   START();
1756 
1757   // Test loading whole register by element.
1758   __ Mov(x17, src_base);
1759   for (int i = 15; i >= 0; i--) {
1760     __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17));
1761     __ Add(x17, x17, 1);
1762   }
1763 
1764   __ Mov(x17, src_base);
1765   for (int i = 7; i >= 0; i--) {
1766     __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x17));
1767     __ Add(x17, x17, 1);
1768   }
1769 
1770   __ Mov(x17, src_base);
1771   for (int i = 3; i >= 0; i--) {
1772     __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x17));
1773     __ Add(x17, x17, 1);
1774   }
1775 
1776   __ Mov(x17, src_base);
1777   for (int i = 1; i >= 0; i--) {
1778     __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i, MemOperand(x17));
1779     __ Add(x17, x17, 1);
1780   }
1781 
1782   // Test loading a single element into an initialised register.
1783   __ Mov(x17, src_base);
1784   __ Mov(x4, x17);
1785   __ Ldr(q16, MemOperand(x4, 16, PostIndex));
1786   __ Ldr(q17, MemOperand(x4, 16, PostIndex));
1787   __ Ldr(q18, MemOperand(x4, 16, PostIndex));
1788   __ Ldr(q19, MemOperand(x4));
1789   __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4, MemOperand(x17));
1790 
1791   __ Mov(x5, x17);
1792   __ Ldr(q20, MemOperand(x5, 16, PostIndex));
1793   __ Ldr(q21, MemOperand(x5, 16, PostIndex));
1794   __ Ldr(q22, MemOperand(x5, 16, PostIndex));
1795   __ Ldr(q23, MemOperand(x5));
1796   __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3, MemOperand(x17));
1797 
1798   __ Mov(x6, x17);
1799   __ Ldr(q24, MemOperand(x6, 16, PostIndex));
1800   __ Ldr(q25, MemOperand(x6, 16, PostIndex));
1801   __ Ldr(q26, MemOperand(x6, 16, PostIndex));
1802   __ Ldr(q27, MemOperand(x6));
1803   __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2, MemOperand(x17));
1804 
1805   __ Mov(x7, x17);
1806   __ Ldr(q28, MemOperand(x7, 16, PostIndex));
1807   __ Ldr(q29, MemOperand(x7, 16, PostIndex));
1808   __ Ldr(q30, MemOperand(x7, 16, PostIndex));
1809   __ Ldr(q31, MemOperand(x7));
1810   __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x17));
1811 
1812   END();
1813 
1814   if (CAN_RUN()) {
1815     RUN();
1816 
1817     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
1818     ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
1819     ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
1820     ASSERT_EQUAL_128(0x030405060708090a, 0x0b0c0d0e0f101112, q3);
1821     ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q4);
1822     ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q5);
1823     ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q6);
1824     ASSERT_EQUAL_128(0x0706080709080a09, 0x0b0a0c0b0d0c0e0d, q7);
1825     ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q8);
1826     ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q9);
1827     ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q10);
1828     ASSERT_EQUAL_128(0x0f0e0d0c100f0e0d, 0x11100f0e1211100f, q11);
1829     ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q12);
1830     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q13);
1831     ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q14);
1832     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x201f1e1d1c1b1a19, q15);
1833     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
1834     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
1835     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
1836     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
1837     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
1838     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
1839     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
1840     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
1841     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
1842     ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
1843     ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
1844     ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
1845     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
1846     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
1847     ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
1848     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
1849   }
1850 }
1851 
1852 
TEST(neon_ld4_lane_postindex)1853 TEST(neon_ld4_lane_postindex) {
1854   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1855 
1856   uint8_t src[64];
1857   for (unsigned i = 0; i < sizeof(src); i++) {
1858     src[i] = i;
1859   }
1860   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1861 
1862   START();
1863 
1864   // Test loading whole register by element.
1865   __ Mov(x17, src_base);
1866   for (int i = 15; i >= 0; i--) {
1867     __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17, 4, PostIndex));
1868   }
1869 
1870   __ Mov(x18, src_base);
1871   for (int i = 7; i >= 0; i--) {
1872     __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x18, 8, PostIndex));
1873   }
1874 
1875   __ Mov(x19, src_base);
1876   for (int i = 3; i >= 0; i--) {
1877     __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x19, 16, PostIndex));
1878   }
1879 
1880   __ Mov(x20, src_base);
1881   for (int i = 1; i >= 0; i--) {
1882     __ Ld4(v12.D(),
1883            v13.D(),
1884            v14.D(),
1885            v15.D(),
1886            i,
1887            MemOperand(x20, 32, PostIndex));
1888   }
1889 
1890   // Test loading a single element into an initialised register.
1891   __ Mov(x25, 1);
1892   __ Mov(x21, src_base);
1893   __ Mov(x22, src_base);
1894   __ Mov(x23, src_base);
1895   __ Mov(x24, src_base);
1896 
1897   __ Mov(x4, x21);
1898   __ Ldr(q16, MemOperand(x4, 16, PostIndex));
1899   __ Ldr(q17, MemOperand(x4, 16, PostIndex));
1900   __ Ldr(q18, MemOperand(x4, 16, PostIndex));
1901   __ Ldr(q19, MemOperand(x4));
1902   __ Ld4(v16.B(),
1903          v17.B(),
1904          v18.B(),
1905          v19.B(),
1906          4,
1907          MemOperand(x21, x25, PostIndex));
1908   __ Add(x25, x25, 1);
1909 
1910   __ Mov(x5, x22);
1911   __ Ldr(q20, MemOperand(x5, 16, PostIndex));
1912   __ Ldr(q21, MemOperand(x5, 16, PostIndex));
1913   __ Ldr(q22, MemOperand(x5, 16, PostIndex));
1914   __ Ldr(q23, MemOperand(x5));
1915   __ Ld4(v20.H(),
1916          v21.H(),
1917          v22.H(),
1918          v23.H(),
1919          3,
1920          MemOperand(x22, x25, PostIndex));
1921   __ Add(x25, x25, 1);
1922 
1923   __ Mov(x6, x23);
1924   __ Ldr(q24, MemOperand(x6, 16, PostIndex));
1925   __ Ldr(q25, MemOperand(x6, 16, PostIndex));
1926   __ Ldr(q26, MemOperand(x6, 16, PostIndex));
1927   __ Ldr(q27, MemOperand(x6));
1928   __ Ld4(v24.S(),
1929          v25.S(),
1930          v26.S(),
1931          v27.S(),
1932          2,
1933          MemOperand(x23, x25, PostIndex));
1934   __ Add(x25, x25, 1);
1935 
1936   __ Mov(x7, x24);
1937   __ Ldr(q28, MemOperand(x7, 16, PostIndex));
1938   __ Ldr(q29, MemOperand(x7, 16, PostIndex));
1939   __ Ldr(q30, MemOperand(x7, 16, PostIndex));
1940   __ Ldr(q31, MemOperand(x7));
1941   __ Ld4(v28.D(),
1942          v29.D(),
1943          v30.D(),
1944          v31.D(),
1945          1,
1946          MemOperand(x24, x25, PostIndex));
1947 
1948   END();
1949 
1950   if (CAN_RUN()) {
1951     RUN();
1952 
1953     ASSERT_EQUAL_128(0x0004080c1014181c, 0x2024282c3034383c, q0);
1954     ASSERT_EQUAL_128(0x0105090d1115191d, 0x2125292d3135393d, q1);
1955     ASSERT_EQUAL_128(0x02060a0e12161a1e, 0x22262a2e32363a3e, q2);
1956     ASSERT_EQUAL_128(0x03070b0f13171b1f, 0x23272b2f33373b3f, q3);
1957     ASSERT_EQUAL_128(0x0100090811101918, 0x2120292831303938, q4);
1958     ASSERT_EQUAL_128(0x03020b0a13121b1a, 0x23222b2a33323b3a, q5);
1959     ASSERT_EQUAL_128(0x05040d0c15141d1c, 0x25242d2c35343d3c, q6);
1960     ASSERT_EQUAL_128(0x07060f0e17161f1e, 0x27262f2e37363f3e, q7);
1961     ASSERT_EQUAL_128(0x0302010013121110, 0x2322212033323130, q8);
1962     ASSERT_EQUAL_128(0x0706050417161514, 0x2726252437363534, q9);
1963     ASSERT_EQUAL_128(0x0b0a09081b1a1918, 0x2b2a29283b3a3938, q10);
1964     ASSERT_EQUAL_128(0x0f0e0d0c1f1e1d1c, 0x2f2e2d2c3f3e3d3c, q11);
1965     ASSERT_EQUAL_128(0x0706050403020100, 0x2726252423222120, q12);
1966     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2f2e2d2c2b2a2928, q13);
1967     ASSERT_EQUAL_128(0x1716151413121110, 0x3736353433323130, q14);
1968     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3f3e3d3c3b3a3938, q15);
1969     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
1970     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
1971     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
1972     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
1973     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
1974     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
1975     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
1976     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
1977     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
1978     ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
1979     ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
1980     ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
1981     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
1982     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
1983     ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
1984     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
1985 
1986     ASSERT_EQUAL_64(src_base + 64, x17);
1987     ASSERT_EQUAL_64(src_base + 64, x18);
1988     ASSERT_EQUAL_64(src_base + 64, x19);
1989     ASSERT_EQUAL_64(src_base + 64, x20);
1990     ASSERT_EQUAL_64(src_base + 1, x21);
1991     ASSERT_EQUAL_64(src_base + 2, x22);
1992     ASSERT_EQUAL_64(src_base + 3, x23);
1993     ASSERT_EQUAL_64(src_base + 4, x24);
1994   }
1995 }
1996 
1997 
TEST(neon_ld4_alllanes)1998 TEST(neon_ld4_alllanes) {
1999   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2000 
2001   uint8_t src[64];
2002   for (unsigned i = 0; i < sizeof(src); i++) {
2003     src[i] = i;
2004   }
2005   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2006 
2007   START();
2008   __ Mov(x17, src_base + 1);
2009   __ Mov(x18, 1);
2010   __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x17));
2011   __ Add(x17, x17, 4);
2012   __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
2013   __ Add(x17, x17, 1);
2014   __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(), MemOperand(x17));
2015   __ Add(x17, x17, 1);
2016   __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x17));
2017   __ Add(x17, x17, 8);
2018   __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
2019   __ Add(x17, x17, 1);
2020   __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(), MemOperand(x17));
2021   __ Add(x17, x17, 16);
2022   __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(), MemOperand(x17));
2023 
2024 
2025   END();
2026 
2027   if (CAN_RUN()) {
2028     RUN();
2029 
2030     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
2031     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
2032     ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
2033     ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
2034     ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
2035     ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
2036     ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
2037     ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
2038     ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
2039     ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
2040     ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
2041     ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
2042     ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
2043     ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
2044     ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
2045     ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
2046     ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
2047     ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
2048     ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
2049     ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
2050     ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
2051     ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
2052     ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
2053     ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
2054     ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
2055     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
2056     ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
2057     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
2058   }
2059 }
2060 
2061 
TEST(neon_ld4_alllanes_postindex)2062 TEST(neon_ld4_alllanes_postindex) {
2063   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2064 
2065   uint8_t src[64];
2066   for (unsigned i = 0; i < sizeof(src); i++) {
2067     src[i] = i;
2068   }
2069   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2070   __ Mov(x17, src_base + 1);
2071   __ Mov(x18, 1);
2072 
2073   START();
2074   __ Mov(x17, src_base + 1);
2075   __ Mov(x18, 1);
2076   __ Ld4r(v0.V8B(),
2077           v1.V8B(),
2078           v2.V8B(),
2079           v3.V8B(),
2080           MemOperand(x17, 4, PostIndex));
2081   __ Ld4r(v4.V16B(),
2082           v5.V16B(),
2083           v6.V16B(),
2084           v7.V16B(),
2085           MemOperand(x17, x18, PostIndex));
2086   __ Ld4r(v8.V4H(),
2087           v9.V4H(),
2088           v10.V4H(),
2089           v11.V4H(),
2090           MemOperand(x17, x18, PostIndex));
2091   __ Ld4r(v12.V8H(),
2092           v13.V8H(),
2093           v14.V8H(),
2094           v15.V8H(),
2095           MemOperand(x17, 8, PostIndex));
2096   __ Ld4r(v16.V2S(),
2097           v17.V2S(),
2098           v18.V2S(),
2099           v19.V2S(),
2100           MemOperand(x17, x18, PostIndex));
2101   __ Ld4r(v20.V4S(),
2102           v21.V4S(),
2103           v22.V4S(),
2104           v23.V4S(),
2105           MemOperand(x17, 16, PostIndex));
2106   __ Ld4r(v24.V2D(),
2107           v25.V2D(),
2108           v26.V2D(),
2109           v27.V2D(),
2110           MemOperand(x17, 32, PostIndex));
2111   END();
2112 
2113   if (CAN_RUN()) {
2114     RUN();
2115 
2116     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
2117     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
2118     ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
2119     ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
2120     ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
2121     ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
2122     ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
2123     ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
2124     ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
2125     ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
2126     ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
2127     ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
2128     ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
2129     ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
2130     ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
2131     ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
2132     ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
2133     ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
2134     ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
2135     ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
2136     ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
2137     ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
2138     ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
2139     ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
2140     ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
2141     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
2142     ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
2143     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
2144     ASSERT_EQUAL_64(src_base + 64, x17);
2145   }
2146 }
2147 
2148 
TEST(neon_st1_lane)2149 TEST(neon_st1_lane) {
2150   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2151 
2152   uint8_t src[64];
2153   for (unsigned i = 0; i < sizeof(src); i++) {
2154     src[i] = i;
2155   }
2156   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2157 
2158   START();
2159   __ Mov(x17, src_base);
2160   __ Mov(x18, -16);
2161   __ Ldr(q0, MemOperand(x17));
2162 
2163   for (int i = 15; i >= 0; i--) {
2164     __ St1(v0.B(), i, MemOperand(x17));
2165     __ Add(x17, x17, 1);
2166   }
2167   __ Ldr(q1, MemOperand(x17, x18));
2168 
2169   for (int i = 7; i >= 0; i--) {
2170     __ St1(v0.H(), i, MemOperand(x17));
2171     __ Add(x17, x17, 2);
2172   }
2173   __ Ldr(q2, MemOperand(x17, x18));
2174 
2175   for (int i = 3; i >= 0; i--) {
2176     __ St1(v0.S(), i, MemOperand(x17));
2177     __ Add(x17, x17, 4);
2178   }
2179   __ Ldr(q3, MemOperand(x17, x18));
2180 
2181   for (int i = 1; i >= 0; i--) {
2182     __ St1(v0.D(), i, MemOperand(x17));
2183     __ Add(x17, x17, 8);
2184   }
2185   __ Ldr(q4, MemOperand(x17, x18));
2186 
2187   END();
2188 
2189   if (CAN_RUN()) {
2190     RUN();
2191 
2192     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
2193     ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
2194     ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
2195     ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
2196   }
2197 }
2198 
2199 
TEST(neon_st2_lane)2200 TEST(neon_st2_lane) {
2201   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2202 
2203   // Struct size * addressing modes * element sizes * vector size.
2204   uint8_t dst[2 * 2 * 4 * 16];
2205   memset(dst, 0, sizeof(dst));
2206   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2207 
2208   START();
2209   __ Mov(x17, dst_base);
2210   __ Mov(x18, dst_base);
2211   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2212   __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2213 
2214   // Test B stores with and without post index.
2215   for (int i = 15; i >= 0; i--) {
2216     __ St2(v0.B(), v1.B(), i, MemOperand(x18));
2217     __ Add(x18, x18, 2);
2218   }
2219   for (int i = 15; i >= 0; i--) {
2220     __ St2(v0.B(), v1.B(), i, MemOperand(x18, 2, PostIndex));
2221   }
2222   __ Ldr(q2, MemOperand(x17, 0 * 16));
2223   __ Ldr(q3, MemOperand(x17, 1 * 16));
2224   __ Ldr(q4, MemOperand(x17, 2 * 16));
2225   __ Ldr(q5, MemOperand(x17, 3 * 16));
2226 
2227   // Test H stores with and without post index.
2228   __ Mov(x0, 4);
2229   for (int i = 7; i >= 0; i--) {
2230     __ St2(v0.H(), v1.H(), i, MemOperand(x18));
2231     __ Add(x18, x18, 4);
2232   }
2233   for (int i = 7; i >= 0; i--) {
2234     __ St2(v0.H(), v1.H(), i, MemOperand(x18, x0, PostIndex));
2235   }
2236   __ Ldr(q6, MemOperand(x17, 4 * 16));
2237   __ Ldr(q7, MemOperand(x17, 5 * 16));
2238   __ Ldr(q16, MemOperand(x17, 6 * 16));
2239   __ Ldr(q17, MemOperand(x17, 7 * 16));
2240 
2241   // Test S stores with and without post index.
2242   for (int i = 3; i >= 0; i--) {
2243     __ St2(v0.S(), v1.S(), i, MemOperand(x18));
2244     __ Add(x18, x18, 8);
2245   }
2246   for (int i = 3; i >= 0; i--) {
2247     __ St2(v0.S(), v1.S(), i, MemOperand(x18, 8, PostIndex));
2248   }
2249   __ Ldr(q18, MemOperand(x17, 8 * 16));
2250   __ Ldr(q19, MemOperand(x17, 9 * 16));
2251   __ Ldr(q20, MemOperand(x17, 10 * 16));
2252   __ Ldr(q21, MemOperand(x17, 11 * 16));
2253 
2254   // Test D stores with and without post index.
2255   __ Mov(x0, 16);
2256   __ St2(v0.D(), v1.D(), 1, MemOperand(x18));
2257   __ Add(x18, x18, 16);
2258   __ St2(v0.D(), v1.D(), 0, MemOperand(x18, 16, PostIndex));
2259   __ St2(v0.D(), v1.D(), 1, MemOperand(x18, x0, PostIndex));
2260   __ St2(v0.D(), v1.D(), 0, MemOperand(x18, x0, PostIndex));
2261   __ Ldr(q22, MemOperand(x17, 12 * 16));
2262   __ Ldr(q23, MemOperand(x17, 13 * 16));
2263   __ Ldr(q24, MemOperand(x17, 14 * 16));
2264   __ Ldr(q25, MemOperand(x17, 15 * 16));
2265   END();
2266 
2267   if (CAN_RUN()) {
2268     RUN();
2269 
2270     ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q2);
2271     ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q3);
2272     ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q4);
2273     ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q5);
2274 
2275     ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q6);
2276     ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q7);
2277     ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q16);
2278     ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q17);
2279 
2280     ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q18);
2281     ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q19);
2282     ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q20);
2283     ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q21);
2284 
2285     ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
2286     ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
2287     ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
2288     ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
2289   }
2290 }
2291 
2292 
TEST(neon_st3_lane)2293 TEST(neon_st3_lane) {
2294   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2295 
2296   // Struct size * addressing modes * element sizes * vector size.
2297   uint8_t dst[3 * 2 * 4 * 16];
2298   memset(dst, 0, sizeof(dst));
2299   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2300 
2301   START();
2302   __ Mov(x17, dst_base);
2303   __ Mov(x18, dst_base);
2304   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2305   __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2306   __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2307 
2308   // Test B stores with and without post index.
2309   for (int i = 15; i >= 0; i--) {
2310     __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18));
2311     __ Add(x18, x18, 3);
2312   }
2313   for (int i = 15; i >= 0; i--) {
2314     __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18, 3, PostIndex));
2315   }
2316   __ Ldr(q3, MemOperand(x17, 0 * 16));
2317   __ Ldr(q4, MemOperand(x17, 1 * 16));
2318   __ Ldr(q5, MemOperand(x17, 2 * 16));
2319   __ Ldr(q6, MemOperand(x17, 3 * 16));
2320   __ Ldr(q7, MemOperand(x17, 4 * 16));
2321   __ Ldr(q16, MemOperand(x17, 5 * 16));
2322 
2323   // Test H stores with and without post index.
2324   __ Mov(x0, 6);
2325   for (int i = 7; i >= 0; i--) {
2326     __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18));
2327     __ Add(x18, x18, 6);
2328   }
2329   for (int i = 7; i >= 0; i--) {
2330     __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18, x0, PostIndex));
2331   }
2332   __ Ldr(q17, MemOperand(x17, 6 * 16));
2333   __ Ldr(q18, MemOperand(x17, 7 * 16));
2334   __ Ldr(q19, MemOperand(x17, 8 * 16));
2335   __ Ldr(q20, MemOperand(x17, 9 * 16));
2336   __ Ldr(q21, MemOperand(x17, 10 * 16));
2337   __ Ldr(q22, MemOperand(x17, 11 * 16));
2338 
2339   // Test S stores with and without post index.
2340   for (int i = 3; i >= 0; i--) {
2341     __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18));
2342     __ Add(x18, x18, 12);
2343   }
2344   for (int i = 3; i >= 0; i--) {
2345     __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18, 12, PostIndex));
2346   }
2347   __ Ldr(q23, MemOperand(x17, 12 * 16));
2348   __ Ldr(q24, MemOperand(x17, 13 * 16));
2349   __ Ldr(q25, MemOperand(x17, 14 * 16));
2350   __ Ldr(q26, MemOperand(x17, 15 * 16));
2351   __ Ldr(q27, MemOperand(x17, 16 * 16));
2352   __ Ldr(q28, MemOperand(x17, 17 * 16));
2353 
2354   // Test D stores with and without post index.
2355   __ Mov(x0, 24);
2356   __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18));
2357   __ Add(x18, x18, 24);
2358   __ St3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x18, 24, PostIndex));
2359   __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18, x0, PostIndex));
2360   __ Ldr(q29, MemOperand(x17, 18 * 16));
2361   __ Ldr(q30, MemOperand(x17, 19 * 16));
2362   __ Ldr(q31, MemOperand(x17, 20 * 16));
2363   END();
2364 
2365   if (CAN_RUN()) {
2366     RUN();
2367 
2368     ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q3);
2369     ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q4);
2370     ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q5);
2371     ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q6);
2372     ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q7);
2373     ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q16);
2374 
2375     ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q17);
2376     ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q18);
2377     ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q19);
2378     ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q20);
2379     ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q21);
2380     ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q22);
2381 
2382     ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q23);
2383     ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q24);
2384     ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q25);
2385     ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q26);
2386     ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q27);
2387     ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q28);
2388   }
2389 }
2390 
2391 
TEST(neon_st4_lane)2392 TEST(neon_st4_lane) {
2393   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2394 
2395   // Struct size * element sizes * vector size.
2396   uint8_t dst[4 * 4 * 16];
2397   memset(dst, 0, sizeof(dst));
2398   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2399 
2400   START();
2401   __ Mov(x17, dst_base);
2402   __ Mov(x18, dst_base);
2403   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2404   __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2405   __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2406   __ Movi(v3.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2407 
2408   // Test B stores without post index.
2409   for (int i = 15; i >= 0; i--) {
2410     __ St4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x18));
2411     __ Add(x18, x18, 4);
2412   }
2413   __ Ldr(q4, MemOperand(x17, 0 * 16));
2414   __ Ldr(q5, MemOperand(x17, 1 * 16));
2415   __ Ldr(q6, MemOperand(x17, 2 * 16));
2416   __ Ldr(q7, MemOperand(x17, 3 * 16));
2417 
2418   // Test H stores with post index.
2419   __ Mov(x0, 8);
2420   for (int i = 7; i >= 0; i--) {
2421     __ St4(v0.H(), v1.H(), v2.H(), v3.H(), i, MemOperand(x18, x0, PostIndex));
2422   }
2423   __ Ldr(q16, MemOperand(x17, 4 * 16));
2424   __ Ldr(q17, MemOperand(x17, 5 * 16));
2425   __ Ldr(q18, MemOperand(x17, 6 * 16));
2426   __ Ldr(q19, MemOperand(x17, 7 * 16));
2427 
2428   // Test S stores without post index.
2429   for (int i = 3; i >= 0; i--) {
2430     __ St4(v0.S(), v1.S(), v2.S(), v3.S(), i, MemOperand(x18));
2431     __ Add(x18, x18, 16);
2432   }
2433   __ Ldr(q20, MemOperand(x17, 8 * 16));
2434   __ Ldr(q21, MemOperand(x17, 9 * 16));
2435   __ Ldr(q22, MemOperand(x17, 10 * 16));
2436   __ Ldr(q23, MemOperand(x17, 11 * 16));
2437 
2438   // Test D stores with post index.
2439   __ Mov(x0, 32);
2440   __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 0, MemOperand(x18, 32, PostIndex));
2441   __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 1, MemOperand(x18, x0, PostIndex));
2442 
2443   __ Ldr(q24, MemOperand(x17, 12 * 16));
2444   __ Ldr(q25, MemOperand(x17, 13 * 16));
2445   __ Ldr(q26, MemOperand(x17, 14 * 16));
2446   __ Ldr(q27, MemOperand(x17, 15 * 16));
2447   END();
2448 
2449   if (CAN_RUN()) {
2450     RUN();
2451 
2452     ASSERT_EQUAL_128(0x2323130322221202, 0x2121110120201000, q4);
2453     ASSERT_EQUAL_128(0x2727170726261606, 0x2525150524241404, q5);
2454     ASSERT_EQUAL_128(0x2b2b1b0b2a2a1a0a, 0x2929190928281808, q6);
2455     ASSERT_EQUAL_128(0x2f2f1f0f2e2e1e0e, 0x2d2d1d0d2c2c1c0c, q7);
2456 
2457     ASSERT_EQUAL_128(0x2223222312130203, 0x2021202110110001, q16);
2458     ASSERT_EQUAL_128(0x2627262716170607, 0x2425242514150405, q17);
2459     ASSERT_EQUAL_128(0x2a2b2a2b1a1b0a0b, 0x2829282918190809, q18);
2460     ASSERT_EQUAL_128(0x2e2f2e2f1e1f0e0f, 0x2c2d2c2d1c1d0c0d, q19);
2461 
2462     ASSERT_EQUAL_128(0x2021222320212223, 0x1011121300010203, q20);
2463     ASSERT_EQUAL_128(0x2425262724252627, 0x1415161704050607, q21);
2464     ASSERT_EQUAL_128(0x28292a2b28292a2b, 0x18191a1b08090a0b, q22);
2465     ASSERT_EQUAL_128(0x2c2d2e2f2c2d2e2f, 0x1c1d1e1f0c0d0e0f, q23);
2466 
2467     ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q24);
2468     ASSERT_EQUAL_128(0x28292a2b2c2d2e2f, 0x28292a2b2c2d2e2f, q25);
2469     ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q26);
2470     ASSERT_EQUAL_128(0x2021222324252627, 0x2021222324252627, q27);
2471   }
2472 }
2473 
2474 
TEST(neon_ld1_lane_postindex)2475 TEST(neon_ld1_lane_postindex) {
2476   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2477 
2478   uint8_t src[64];
2479   for (unsigned i = 0; i < sizeof(src); i++) {
2480     src[i] = i;
2481   }
2482   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2483 
2484   START();
2485   __ Mov(x17, src_base);
2486   __ Mov(x18, src_base);
2487   __ Mov(x19, src_base);
2488   __ Mov(x20, src_base);
2489   __ Mov(x21, src_base);
2490   __ Mov(x22, src_base);
2491   __ Mov(x23, src_base);
2492   __ Mov(x24, src_base);
2493 
2494   // Test loading whole register by element.
2495   for (int i = 15; i >= 0; i--) {
2496     __ Ld1(v0.B(), i, MemOperand(x17, 1, PostIndex));
2497   }
2498 
2499   for (int i = 7; i >= 0; i--) {
2500     __ Ld1(v1.H(), i, MemOperand(x18, 2, PostIndex));
2501   }
2502 
2503   for (int i = 3; i >= 0; i--) {
2504     __ Ld1(v2.S(), i, MemOperand(x19, 4, PostIndex));
2505   }
2506 
2507   for (int i = 1; i >= 0; i--) {
2508     __ Ld1(v3.D(), i, MemOperand(x20, 8, PostIndex));
2509   }
2510 
2511   // Test loading a single element into an initialised register.
2512   __ Mov(x25, 1);
2513   __ Ldr(q4, MemOperand(x21));
2514   __ Ld1(v4.B(), 4, MemOperand(x21, x25, PostIndex));
2515   __ Add(x25, x25, 1);
2516 
2517   __ Ldr(q5, MemOperand(x22));
2518   __ Ld1(v5.H(), 3, MemOperand(x22, x25, PostIndex));
2519   __ Add(x25, x25, 1);
2520 
2521   __ Ldr(q6, MemOperand(x23));
2522   __ Ld1(v6.S(), 2, MemOperand(x23, x25, PostIndex));
2523   __ Add(x25, x25, 1);
2524 
2525   __ Ldr(q7, MemOperand(x24));
2526   __ Ld1(v7.D(), 1, MemOperand(x24, x25, PostIndex));
2527 
2528   END();
2529 
2530   if (CAN_RUN()) {
2531     RUN();
2532 
2533     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
2534     ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q1);
2535     ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q2);
2536     ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q3);
2537     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
2538     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
2539     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
2540     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
2541     ASSERT_EQUAL_64(src_base + 16, x17);
2542     ASSERT_EQUAL_64(src_base + 16, x18);
2543     ASSERT_EQUAL_64(src_base + 16, x19);
2544     ASSERT_EQUAL_64(src_base + 16, x20);
2545     ASSERT_EQUAL_64(src_base + 1, x21);
2546     ASSERT_EQUAL_64(src_base + 2, x22);
2547     ASSERT_EQUAL_64(src_base + 3, x23);
2548     ASSERT_EQUAL_64(src_base + 4, x24);
2549   }
2550 }
2551 
2552 
TEST(neon_st1_lane_postindex)2553 TEST(neon_st1_lane_postindex) {
2554   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2555 
2556   uint8_t src[64];
2557   for (unsigned i = 0; i < sizeof(src); i++) {
2558     src[i] = i;
2559   }
2560   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2561 
2562   START();
2563   __ Mov(x17, src_base);
2564   __ Mov(x18, -16);
2565   __ Ldr(q0, MemOperand(x17));
2566 
2567   for (int i = 15; i >= 0; i--) {
2568     __ St1(v0.B(), i, MemOperand(x17, 1, PostIndex));
2569   }
2570   __ Ldr(q1, MemOperand(x17, x18));
2571 
2572   for (int i = 7; i >= 0; i--) {
2573     __ St1(v0.H(), i, MemOperand(x17, 2, PostIndex));
2574   }
2575   __ Ldr(q2, MemOperand(x17, x18));
2576 
2577   for (int i = 3; i >= 0; i--) {
2578     __ St1(v0.S(), i, MemOperand(x17, 4, PostIndex));
2579   }
2580   __ Ldr(q3, MemOperand(x17, x18));
2581 
2582   for (int i = 1; i >= 0; i--) {
2583     __ St1(v0.D(), i, MemOperand(x17, 8, PostIndex));
2584   }
2585   __ Ldr(q4, MemOperand(x17, x18));
2586 
2587   END();
2588 
2589   if (CAN_RUN()) {
2590     RUN();
2591 
2592     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
2593     ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
2594     ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
2595     ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
2596   }
2597 }
2598 
2599 
TEST(neon_ld1_alllanes)2600 TEST(neon_ld1_alllanes) {
2601   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2602 
2603   uint8_t src[64];
2604   for (unsigned i = 0; i < sizeof(src); i++) {
2605     src[i] = i;
2606   }
2607   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2608 
2609   START();
2610   __ Mov(x17, src_base + 1);
2611   __ Ld1r(v0.V8B(), MemOperand(x17));
2612   __ Add(x17, x17, 1);
2613   __ Ld1r(v1.V16B(), MemOperand(x17));
2614   __ Add(x17, x17, 1);
2615   __ Ld1r(v2.V4H(), MemOperand(x17));
2616   __ Add(x17, x17, 1);
2617   __ Ld1r(v3.V8H(), MemOperand(x17));
2618   __ Add(x17, x17, 1);
2619   __ Ld1r(v4.V2S(), MemOperand(x17));
2620   __ Add(x17, x17, 1);
2621   __ Ld1r(v5.V4S(), MemOperand(x17));
2622   __ Add(x17, x17, 1);
2623   __ Ld1r(v6.V1D(), MemOperand(x17));
2624   __ Add(x17, x17, 1);
2625   __ Ld1r(v7.V2D(), MemOperand(x17));
2626   END();
2627 
2628   if (CAN_RUN()) {
2629     RUN();
2630 
2631     ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
2632     ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
2633     ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
2634     ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
2635     ASSERT_EQUAL_128(0, 0x0807060508070605, q4);
2636     ASSERT_EQUAL_128(0x0908070609080706, 0x0908070609080706, q5);
2637     ASSERT_EQUAL_128(0, 0x0e0d0c0b0a090807, q6);
2638     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0f0e0d0c0b0a0908, q7);
2639   }
2640 }
2641 
2642 
TEST(neon_ld1_alllanes_postindex)2643 TEST(neon_ld1_alllanes_postindex) {
2644   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2645 
2646   uint8_t src[64];
2647   for (unsigned i = 0; i < sizeof(src); i++) {
2648     src[i] = i;
2649   }
2650   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2651 
2652   START();
2653   __ Mov(x17, src_base + 1);
2654   __ Mov(x18, 1);
2655   __ Ld1r(v0.V8B(), MemOperand(x17, 1, PostIndex));
2656   __ Ld1r(v1.V16B(), MemOperand(x17, x18, PostIndex));
2657   __ Ld1r(v2.V4H(), MemOperand(x17, x18, PostIndex));
2658   __ Ld1r(v3.V8H(), MemOperand(x17, 2, PostIndex));
2659   __ Ld1r(v4.V2S(), MemOperand(x17, x18, PostIndex));
2660   __ Ld1r(v5.V4S(), MemOperand(x17, 4, PostIndex));
2661   __ Ld1r(v6.V2D(), MemOperand(x17, 8, PostIndex));
2662   END();
2663 
2664   if (CAN_RUN()) {
2665     RUN();
2666 
2667     ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
2668     ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
2669     ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
2670     ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
2671     ASSERT_EQUAL_128(0, 0x0908070609080706, q4);
2672     ASSERT_EQUAL_128(0x0a0908070a090807, 0x0a0908070a090807, q5);
2673     ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x1211100f0e0d0c0b, q6);
2674     ASSERT_EQUAL_64(src_base + 19, x17);
2675   }
2676 }
2677 
2678 
TEST(neon_st1_d)2679 TEST(neon_st1_d) {
2680   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2681 
2682   uint8_t src[14 * kDRegSizeInBytes];
2683   for (unsigned i = 0; i < sizeof(src); i++) {
2684     src[i] = i;
2685   }
2686   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2687 
2688   START();
2689   __ Mov(x17, src_base);
2690   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2691   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2692   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2693   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2694   __ Mov(x17, src_base);
2695 
2696   __ St1(v0.V8B(), MemOperand(x17));
2697   __ Ldr(d16, MemOperand(x17, 8, PostIndex));
2698 
2699   __ St1(v0.V8B(), v1.V8B(), MemOperand(x17));
2700   __ Ldr(q17, MemOperand(x17, 16, PostIndex));
2701 
2702   __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17));
2703   __ Ldr(d18, MemOperand(x17, 8, PostIndex));
2704   __ Ldr(d19, MemOperand(x17, 8, PostIndex));
2705   __ Ldr(d20, MemOperand(x17, 8, PostIndex));
2706 
2707   __ St1(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x17));
2708   __ Ldr(q21, MemOperand(x17, 16, PostIndex));
2709   __ Ldr(q22, MemOperand(x17, 16, PostIndex));
2710 
2711   __ St1(v0.V1D(), v1.V1D(), v2.V1D(), v3.V1D(), MemOperand(x17));
2712   __ Ldr(q23, MemOperand(x17, 16, PostIndex));
2713   __ Ldr(q24, MemOperand(x17));
2714   END();
2715 
2716   if (CAN_RUN()) {
2717     RUN();
2718 
2719     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0);
2720     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q1);
2721     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q2);
2722     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q3);
2723     ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
2724     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
2725     ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
2726     ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
2727     ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
2728     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
2729     ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
2730     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
2731     ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
2732   }
2733 }
2734 
2735 
TEST(neon_st1_d_postindex)2736 TEST(neon_st1_d_postindex) {
2737   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2738 
2739   uint8_t src[64 + 14 * kDRegSizeInBytes];
2740   for (unsigned i = 0; i < sizeof(src); i++) {
2741     src[i] = i;
2742   }
2743   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2744 
2745   START();
2746   __ Mov(x17, src_base);
2747   __ Mov(x18, -8);
2748   __ Mov(x19, -16);
2749   __ Mov(x20, -24);
2750   __ Mov(x21, -32);
2751   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2752   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2753   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2754   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2755   __ Mov(x17, src_base);
2756 
2757   __ St1(v0.V8B(), MemOperand(x17, 8, PostIndex));
2758   __ Ldr(d16, MemOperand(x17, x18));
2759 
2760   __ St1(v0.V8B(), v1.V8B(), MemOperand(x17, 16, PostIndex));
2761   __ Ldr(q17, MemOperand(x17, x19));
2762 
2763   __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17, 24, PostIndex));
2764   __ Ldr(d18, MemOperand(x17, x20));
2765   __ Ldr(d19, MemOperand(x17, x19));
2766   __ Ldr(d20, MemOperand(x17, x18));
2767 
2768   __ St1(v0.V2S(),
2769          v1.V2S(),
2770          v2.V2S(),
2771          v3.V2S(),
2772          MemOperand(x17, 32, PostIndex));
2773   __ Ldr(q21, MemOperand(x17, x21));
2774   __ Ldr(q22, MemOperand(x17, x19));
2775 
2776   __ St1(v0.V1D(),
2777          v1.V1D(),
2778          v2.V1D(),
2779          v3.V1D(),
2780          MemOperand(x17, 32, PostIndex));
2781   __ Ldr(q23, MemOperand(x17, x21));
2782   __ Ldr(q24, MemOperand(x17, x19));
2783   END();
2784 
2785   if (CAN_RUN()) {
2786     RUN();
2787 
2788     ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
2789     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
2790     ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
2791     ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
2792     ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
2793     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
2794     ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
2795     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
2796     ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
2797   }
2798 }
2799 
2800 
TEST(neon_st1_q)2801 TEST(neon_st1_q) {
2802   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2803 
2804   uint8_t src[64 + 160];
2805   for (unsigned i = 0; i < sizeof(src); i++) {
2806     src[i] = i;
2807   }
2808   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2809 
2810   START();
2811   __ Mov(x17, src_base);
2812   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2813   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2814   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2815   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2816 
2817   __ St1(v0.V16B(), MemOperand(x17));
2818   __ Ldr(q16, MemOperand(x17, 16, PostIndex));
2819 
2820   __ St1(v0.V8H(), v1.V8H(), MemOperand(x17));
2821   __ Ldr(q17, MemOperand(x17, 16, PostIndex));
2822   __ Ldr(q18, MemOperand(x17, 16, PostIndex));
2823 
2824   __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17));
2825   __ Ldr(q19, MemOperand(x17, 16, PostIndex));
2826   __ Ldr(q20, MemOperand(x17, 16, PostIndex));
2827   __ Ldr(q21, MemOperand(x17, 16, PostIndex));
2828 
2829   __ St1(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x17));
2830   __ Ldr(q22, MemOperand(x17, 16, PostIndex));
2831   __ Ldr(q23, MemOperand(x17, 16, PostIndex));
2832   __ Ldr(q24, MemOperand(x17, 16, PostIndex));
2833   __ Ldr(q25, MemOperand(x17));
2834   END();
2835 
2836   if (CAN_RUN()) {
2837     RUN();
2838 
2839     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
2840     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
2841     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
2842     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
2843     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
2844     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
2845     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
2846     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
2847     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
2848     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
2849   }
2850 }
2851 
2852 
TEST(neon_st1_q_postindex)2853 TEST(neon_st1_q_postindex) {
2854   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2855 
2856   uint8_t src[64 + 160];
2857   for (unsigned i = 0; i < sizeof(src); i++) {
2858     src[i] = i;
2859   }
2860   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2861 
2862   START();
2863   __ Mov(x17, src_base);
2864   __ Mov(x18, -16);
2865   __ Mov(x19, -32);
2866   __ Mov(x20, -48);
2867   __ Mov(x21, -64);
2868   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2869   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2870   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2871   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2872 
2873   __ St1(v0.V16B(), MemOperand(x17, 16, PostIndex));
2874   __ Ldr(q16, MemOperand(x17, x18));
2875 
2876   __ St1(v0.V8H(), v1.V8H(), MemOperand(x17, 32, PostIndex));
2877   __ Ldr(q17, MemOperand(x17, x19));
2878   __ Ldr(q18, MemOperand(x17, x18));
2879 
2880   __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17, 48, PostIndex));
2881   __ Ldr(q19, MemOperand(x17, x20));
2882   __ Ldr(q20, MemOperand(x17, x19));
2883   __ Ldr(q21, MemOperand(x17, x18));
2884 
2885   __ St1(v0.V2D(),
2886          v1.V2D(),
2887          v2.V2D(),
2888          v3.V2D(),
2889          MemOperand(x17, 64, PostIndex));
2890   __ Ldr(q22, MemOperand(x17, x21));
2891   __ Ldr(q23, MemOperand(x17, x20));
2892   __ Ldr(q24, MemOperand(x17, x19));
2893   __ Ldr(q25, MemOperand(x17, x18));
2894 
2895   END();
2896 
2897   if (CAN_RUN()) {
2898     RUN();
2899 
2900     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
2901     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
2902     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
2903     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
2904     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
2905     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
2906     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
2907     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
2908     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
2909     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
2910   }
2911 }
2912 
2913 
TEST(neon_st2_d)2914 TEST(neon_st2_d) {
2915   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2916 
2917   uint8_t src[4 * 16];
2918   for (unsigned i = 0; i < sizeof(src); i++) {
2919     src[i] = i;
2920   }
2921   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2922 
2923   START();
2924   __ Mov(x17, src_base);
2925   __ Mov(x18, src_base);
2926   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2927   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2928 
2929   __ St2(v0.V8B(), v1.V8B(), MemOperand(x18));
2930   __ Add(x18, x18, 22);
2931   __ St2(v0.V4H(), v1.V4H(), MemOperand(x18));
2932   __ Add(x18, x18, 11);
2933   __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
2934 
2935   __ Mov(x19, src_base);
2936   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
2937   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
2938   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
2939   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
2940 
2941   END();
2942 
2943   if (CAN_RUN()) {
2944     RUN();
2945 
2946     ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q0);
2947     ASSERT_EQUAL_128(0x0504131203021110, 0x0100151413121110, q1);
2948     ASSERT_EQUAL_128(0x1615140706050413, 0x1211100302010014, q2);
2949     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323117, q3);
2950   }
2951 }
2952 
2953 
TEST(neon_st2_d_postindex)2954 TEST(neon_st2_d_postindex) {
2955   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2956 
2957   uint8_t src[4 * 16];
2958   for (unsigned i = 0; i < sizeof(src); i++) {
2959     src[i] = i;
2960   }
2961   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2962 
2963   START();
2964   __ Mov(x22, 5);
2965   __ Mov(x17, src_base);
2966   __ Mov(x18, src_base);
2967   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2968   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2969 
2970   __ St2(v0.V8B(), v1.V8B(), MemOperand(x18, x22, PostIndex));
2971   __ St2(v0.V4H(), v1.V4H(), MemOperand(x18, 16, PostIndex));
2972   __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
2973 
2974 
2975   __ Mov(x19, src_base);
2976   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
2977   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
2978   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
2979 
2980   END();
2981 
2982   if (CAN_RUN()) {
2983     RUN();
2984 
2985     ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
2986     ASSERT_EQUAL_128(0x0605041312111003, 0x0201001716070615, q1);
2987     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726251716151407, q2);
2988   }
2989 }
2990 
2991 
TEST(neon_st2_q)2992 TEST(neon_st2_q) {
2993   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2994 
2995   uint8_t src[5 * 16];
2996   for (unsigned i = 0; i < sizeof(src); i++) {
2997     src[i] = i;
2998   }
2999   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3000 
3001   START();
3002   __ Mov(x17, src_base);
3003   __ Mov(x18, src_base);
3004   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3005   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3006 
3007   __ St2(v0.V16B(), v1.V16B(), MemOperand(x18));
3008   __ Add(x18, x18, 8);
3009   __ St2(v0.V8H(), v1.V8H(), MemOperand(x18));
3010   __ Add(x18, x18, 22);
3011   __ St2(v0.V4S(), v1.V4S(), MemOperand(x18));
3012   __ Add(x18, x18, 2);
3013   __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
3014 
3015   __ Mov(x19, src_base);
3016   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3017   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3018   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3019   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3020 
3021   END();
3022 
3023   if (CAN_RUN()) {
3024     RUN();
3025 
3026     ASSERT_EQUAL_128(0x1312030211100100, 0x1303120211011000, q0);
3027     ASSERT_EQUAL_128(0x01000b0a19180908, 0x1716070615140504, q1);
3028     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q2);
3029     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0f0e0d0c0b0a0908, q3);
3030   }
3031 }
3032 
3033 
TEST(neon_st2_q_postindex)3034 TEST(neon_st2_q_postindex) {
3035   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3036 
3037   uint8_t src[5 * 16];
3038   for (unsigned i = 0; i < sizeof(src); i++) {
3039     src[i] = i;
3040   }
3041   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3042 
3043   START();
3044   __ Mov(x22, 5);
3045   __ Mov(x17, src_base);
3046   __ Mov(x18, src_base);
3047   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3048   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3049 
3050   __ St2(v0.V16B(), v1.V16B(), MemOperand(x18, x22, PostIndex));
3051   __ St2(v0.V8H(), v1.V8H(), MemOperand(x18, 32, PostIndex));
3052   __ St2(v0.V4S(), v1.V4S(), MemOperand(x18, x22, PostIndex));
3053   __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
3054 
3055   __ Mov(x19, src_base);
3056   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3057   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3058   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3059   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3060   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3061 
3062   END();
3063 
3064   if (CAN_RUN()) {
3065     RUN();
3066 
3067     ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
3068     ASSERT_EQUAL_128(0x1c0d0c1b1a0b0a19, 0x1809081716070615, q1);
3069     ASSERT_EQUAL_128(0x0504030201001003, 0x0201001f1e0f0e1d, q2);
3070     ASSERT_EQUAL_128(0x0d0c0b0a09081716, 0x1514131211100706, q3);
3071     ASSERT_EQUAL_128(0x4f4e4d4c4b4a1f1e, 0x1d1c1b1a19180f0e, q4);
3072   }
3073 }
3074 
3075 
TEST(neon_st3_d)3076 TEST(neon_st3_d) {
3077   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3078 
3079   uint8_t src[3 * 16];
3080   for (unsigned i = 0; i < sizeof(src); i++) {
3081     src[i] = i;
3082   }
3083   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3084 
3085   START();
3086   __ Mov(x17, src_base);
3087   __ Mov(x18, src_base);
3088   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3089   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3090   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3091 
3092   __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18));
3093   __ Add(x18, x18, 3);
3094   __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18));
3095   __ Add(x18, x18, 2);
3096   __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
3097 
3098 
3099   __ Mov(x19, src_base);
3100   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3101   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3102 
3103   END();
3104 
3105   if (CAN_RUN()) {
3106     RUN();
3107 
3108     ASSERT_EQUAL_128(0x2221201312111003, 0x0201000100201000, q0);
3109     ASSERT_EQUAL_128(0x1f1e1d2726252417, 0x1615140706050423, q1);
3110   }
3111 }
3112 
3113 
TEST(neon_st3_d_postindex)3114 TEST(neon_st3_d_postindex) {
3115   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3116 
3117   uint8_t src[4 * 16];
3118   for (unsigned i = 0; i < sizeof(src); i++) {
3119     src[i] = i;
3120   }
3121   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3122 
3123   START();
3124   __ Mov(x22, 5);
3125   __ Mov(x17, src_base);
3126   __ Mov(x18, src_base);
3127   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3128   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3129   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3130 
3131   __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18, x22, PostIndex));
3132   __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18, 24, PostIndex));
3133   __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
3134 
3135 
3136   __ Mov(x19, src_base);
3137   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3138   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3139   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3140   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3141 
3142   END();
3143 
3144   if (CAN_RUN()) {
3145     RUN();
3146 
3147     ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3148     ASSERT_EQUAL_128(0x0201002726171607, 0x0625241514050423, q1);
3149     ASSERT_EQUAL_128(0x1615140706050423, 0x2221201312111003, q2);
3150     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736352726252417, q3);
3151   }
3152 }
3153 
3154 
TEST(neon_st3_q)3155 TEST(neon_st3_q) {
3156   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3157 
3158   uint8_t src[6 * 16];
3159   for (unsigned i = 0; i < sizeof(src); i++) {
3160     src[i] = i;
3161   }
3162   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3163 
3164   START();
3165   __ Mov(x17, src_base);
3166   __ Mov(x18, src_base);
3167   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3168   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3169   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3170 
3171   __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18));
3172   __ Add(x18, x18, 5);
3173   __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18));
3174   __ Add(x18, x18, 12);
3175   __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18));
3176   __ Add(x18, x18, 22);
3177   __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
3178 
3179   __ Mov(x19, src_base);
3180   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3181   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3182   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3183   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3184   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3185   __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3186 
3187   END();
3188 
3189   if (CAN_RUN()) {
3190     RUN();
3191 
3192     ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3193     ASSERT_EQUAL_128(0x0605042322212013, 0x1211100302010023, q1);
3194     ASSERT_EQUAL_128(0x1007060504030201, 0x0025241716151407, q2);
3195     ASSERT_EQUAL_128(0x0827262524232221, 0x2017161514131211, q3);
3196     ASSERT_EQUAL_128(0x281f1e1d1c1b1a19, 0x180f0e0d0c0b0a09, q4);
3197     ASSERT_EQUAL_128(0x5f5e5d5c5b5a5958, 0x572f2e2d2c2b2a29, q5);
3198   }
3199 }
3200 
3201 
TEST(neon_st3_q_postindex)3202 TEST(neon_st3_q_postindex) {
3203   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3204 
3205   uint8_t src[7 * 16];
3206   for (unsigned i = 0; i < sizeof(src); i++) {
3207     src[i] = i;
3208   }
3209   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3210 
3211   START();
3212   __ Mov(x22, 5);
3213   __ Mov(x17, src_base);
3214   __ Mov(x18, src_base);
3215   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3216   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3217   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3218 
3219   __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18, x22, PostIndex));
3220   __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18, 48, PostIndex));
3221   __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18, x22, PostIndex));
3222   __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
3223 
3224   __ Mov(x19, src_base);
3225   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3226   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3227   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3228   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3229   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3230   __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3231   __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3232 
3233   END();
3234 
3235   if (CAN_RUN()) {
3236     RUN();
3237 
3238     ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3239     ASSERT_EQUAL_128(0x1809082726171607, 0x0625241514050423, q1);
3240     ASSERT_EQUAL_128(0x0e2d2c1d1c0d0c2b, 0x2a1b1a0b0a292819, q2);
3241     ASSERT_EQUAL_128(0x0504030201001003, 0x0201002f2e1f1e0f, q3);
3242     ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q4);
3243     ASSERT_EQUAL_128(0x1d1c1b1a19180f0e, 0x0d0c0b0a09082726, q5);
3244     ASSERT_EQUAL_128(0x6f6e6d6c6b6a2f2e, 0x2d2c2b2a29281f1e, q6);
3245   }
3246 }
3247 
3248 
TEST(neon_st4_d)3249 TEST(neon_st4_d) {
3250   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3251 
3252   uint8_t src[4 * 16];
3253   for (unsigned i = 0; i < sizeof(src); i++) {
3254     src[i] = i;
3255   }
3256   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3257 
3258   START();
3259   __ Mov(x17, src_base);
3260   __ Mov(x18, src_base);
3261   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3262   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3263   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3264   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3265 
3266   __ St4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x18));
3267   __ Add(x18, x18, 12);
3268   __ St4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), MemOperand(x18));
3269   __ Add(x18, x18, 15);
3270   __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
3271 
3272 
3273   __ Mov(x19, src_base);
3274   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3275   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3276   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3277   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3278 
3279   END();
3280 
3281   if (CAN_RUN()) {
3282     RUN();
3283 
3284     ASSERT_EQUAL_128(0x1110010032221202, 0X3121110130201000, q0);
3285     ASSERT_EQUAL_128(0x1003020100322322, 0X1312030231302120, q1);
3286     ASSERT_EQUAL_128(0x1407060504333231, 0X3023222120131211, q2);
3287     ASSERT_EQUAL_128(0x3f3e3d3c3b373635, 0x3427262524171615, q3);
3288   }
3289 }
3290 
3291 
TEST(neon_st4_d_postindex)3292 TEST(neon_st4_d_postindex) {
3293   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3294 
3295   uint8_t src[5 * 16];
3296   for (unsigned i = 0; i < sizeof(src); i++) {
3297     src[i] = i;
3298   }
3299   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3300 
3301   START();
3302   __ Mov(x22, 5);
3303   __ Mov(x17, src_base);
3304   __ Mov(x18, src_base);
3305   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3306   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3307   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3308   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3309 
3310   __ St4(v0.V8B(),
3311          v1.V8B(),
3312          v2.V8B(),
3313          v3.V8B(),
3314          MemOperand(x18, x22, PostIndex));
3315   __ St4(v0.V4H(),
3316          v1.V4H(),
3317          v2.V4H(),
3318          v3.V4H(),
3319          MemOperand(x18, 32, PostIndex));
3320   __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
3321 
3322 
3323   __ Mov(x19, src_base);
3324   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3325   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3326   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3327   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3328   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3329 
3330   END();
3331 
3332   if (CAN_RUN()) {
3333     RUN();
3334 
3335     ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3336     ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
3337     ASSERT_EQUAL_128(0x2221201312111003, 0x0201003736272617, q2);
3338     ASSERT_EQUAL_128(0x2625241716151407, 0x0605043332313023, q3);
3339     ASSERT_EQUAL_128(0x4f4e4d4c4b4a4948, 0x4746453736353427, q4);
3340   }
3341 }
3342 
3343 
TEST(neon_st4_q)3344 TEST(neon_st4_q) {
3345   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3346 
3347   uint8_t src[7 * 16];
3348   for (unsigned i = 0; i < sizeof(src); i++) {
3349     src[i] = i;
3350   }
3351   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3352 
3353   START();
3354   __ Mov(x17, src_base);
3355   __ Mov(x18, src_base);
3356   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3357   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3358   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3359   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3360 
3361   __ St4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), MemOperand(x18));
3362   __ Add(x18, x18, 5);
3363   __ St4(v0.V8H(), v1.V8H(), v2.V8H(), v3.V8H(), MemOperand(x18));
3364   __ Add(x18, x18, 12);
3365   __ St4(v0.V4S(), v1.V4S(), v2.V4S(), v3.V4S(), MemOperand(x18));
3366   __ Add(x18, x18, 22);
3367   __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
3368   __ Add(x18, x18, 10);
3369 
3370   __ Mov(x19, src_base);
3371   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3372   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3373   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3374   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3375   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3376   __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3377   __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3378 
3379   END();
3380 
3381   if (CAN_RUN()) {
3382     RUN();
3383 
3384     ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3385     ASSERT_EQUAL_128(0x3231302322212013, 0x1211100302010013, q1);
3386     ASSERT_EQUAL_128(0x1007060504030201, 0x0015140706050433, q2);
3387     ASSERT_EQUAL_128(0x3027262524232221, 0x2017161514131211, q3);
3388     ASSERT_EQUAL_128(0x180f0e0d0c0b0a09, 0x0837363534333231, q4);
3389     ASSERT_EQUAL_128(0x382f2e2d2c2b2a29, 0x281f1e1d1c1b1a19, q5);
3390     ASSERT_EQUAL_128(0x6f6e6d6c6b6a6968, 0x673f3e3d3c3b3a39, q6);
3391   }
3392 }
3393 
3394 
TEST(neon_st4_q_postindex)3395 TEST(neon_st4_q_postindex) {
3396   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3397 
3398   uint8_t src[9 * 16];
3399   for (unsigned i = 0; i < sizeof(src); i++) {
3400     src[i] = i;
3401   }
3402   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3403 
3404   START();
3405   __ Mov(x22, 5);
3406   __ Mov(x17, src_base);
3407   __ Mov(x18, src_base);
3408   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3409   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3410   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3411   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3412 
3413   __ St4(v0.V16B(),
3414          v1.V16B(),
3415          v2.V16B(),
3416          v3.V16B(),
3417          MemOperand(x18, x22, PostIndex));
3418   __ St4(v0.V8H(),
3419          v1.V8H(),
3420          v2.V8H(),
3421          v3.V8H(),
3422          MemOperand(x18, 64, PostIndex));
3423   __ St4(v0.V4S(),
3424          v1.V4S(),
3425          v2.V4S(),
3426          v3.V4S(),
3427          MemOperand(x18, x22, PostIndex));
3428   __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
3429 
3430   __ Mov(x19, src_base);
3431   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3432   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3433   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3434   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3435   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3436   __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3437   __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3438   __ Ldr(q7, MemOperand(x19, 16, PostIndex));
3439   __ Ldr(q8, MemOperand(x19, 16, PostIndex));
3440 
3441   END();
3442 
3443   if (CAN_RUN()) {
3444     RUN();
3445 
3446     ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3447     ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
3448     ASSERT_EQUAL_128(0x1a0b0a3938292819, 0x1809083736272617, q2);
3449     ASSERT_EQUAL_128(0x1e0f0e3d3c2d2c1d, 0x1c0d0c3b3a2b2a1b, q3);
3450     ASSERT_EQUAL_128(0x0504030201001003, 0x0201003f3e2f2e1f, q4);
3451     ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q5);
3452     ASSERT_EQUAL_128(0x0d0c0b0a09083736, 0x3534333231302726, q6);
3453     ASSERT_EQUAL_128(0x2d2c2b2a29281f1e, 0x1d1c1b1a19180f0e, q7);
3454     ASSERT_EQUAL_128(0x8f8e8d8c8b8a3f3e, 0x3d3c3b3a39382f2e, q8);
3455   }
3456 }
3457 
3458 
TEST(neon_destructive_minmaxp)3459 TEST(neon_destructive_minmaxp) {
3460   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3461 
3462   START();
3463   __ Movi(v0.V2D(), 0, 0x2222222233333333);
3464   __ Movi(v1.V2D(), 0, 0x0000000011111111);
3465 
3466   __ Sminp(v16.V2S(), v0.V2S(), v1.V2S());
3467   __ Mov(v17, v0);
3468   __ Sminp(v17.V2S(), v17.V2S(), v1.V2S());
3469   __ Mov(v18, v1);
3470   __ Sminp(v18.V2S(), v0.V2S(), v18.V2S());
3471   __ Mov(v19, v0);
3472   __ Sminp(v19.V2S(), v19.V2S(), v19.V2S());
3473 
3474   __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
3475   __ Mov(v21, v0);
3476   __ Smaxp(v21.V2S(), v21.V2S(), v1.V2S());
3477   __ Mov(v22, v1);
3478   __ Smaxp(v22.V2S(), v0.V2S(), v22.V2S());
3479   __ Mov(v23, v0);
3480   __ Smaxp(v23.V2S(), v23.V2S(), v23.V2S());
3481 
3482   __ Uminp(v24.V2S(), v0.V2S(), v1.V2S());
3483   __ Mov(v25, v0);
3484   __ Uminp(v25.V2S(), v25.V2S(), v1.V2S());
3485   __ Mov(v26, v1);
3486   __ Uminp(v26.V2S(), v0.V2S(), v26.V2S());
3487   __ Mov(v27, v0);
3488   __ Uminp(v27.V2S(), v27.V2S(), v27.V2S());
3489 
3490   __ Umaxp(v28.V2S(), v0.V2S(), v1.V2S());
3491   __ Mov(v29, v0);
3492   __ Umaxp(v29.V2S(), v29.V2S(), v1.V2S());
3493   __ Mov(v30, v1);
3494   __ Umaxp(v30.V2S(), v0.V2S(), v30.V2S());
3495   __ Mov(v31, v0);
3496   __ Umaxp(v31.V2S(), v31.V2S(), v31.V2S());
3497   END();
3498 
3499   if (CAN_RUN()) {
3500     RUN();
3501 
3502     ASSERT_EQUAL_128(0, 0x0000000022222222, q16);
3503     ASSERT_EQUAL_128(0, 0x0000000022222222, q17);
3504     ASSERT_EQUAL_128(0, 0x0000000022222222, q18);
3505     ASSERT_EQUAL_128(0, 0x2222222222222222, q19);
3506 
3507     ASSERT_EQUAL_128(0, 0x1111111133333333, q20);
3508     ASSERT_EQUAL_128(0, 0x1111111133333333, q21);
3509     ASSERT_EQUAL_128(0, 0x1111111133333333, q22);
3510     ASSERT_EQUAL_128(0, 0x3333333333333333, q23);
3511 
3512     ASSERT_EQUAL_128(0, 0x0000000022222222, q24);
3513     ASSERT_EQUAL_128(0, 0x0000000022222222, q25);
3514     ASSERT_EQUAL_128(0, 0x0000000022222222, q26);
3515     ASSERT_EQUAL_128(0, 0x2222222222222222, q27);
3516 
3517     ASSERT_EQUAL_128(0, 0x1111111133333333, q28);
3518     ASSERT_EQUAL_128(0, 0x1111111133333333, q29);
3519     ASSERT_EQUAL_128(0, 0x1111111133333333, q30);
3520     ASSERT_EQUAL_128(0, 0x3333333333333333, q31);
3521   }
3522 }
3523 
3524 
TEST(neon_destructive_tbl)3525 TEST(neon_destructive_tbl) {
3526   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3527 
3528   START();
3529   __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
3530   __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
3531   __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
3532   __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
3533   __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
3534 
3535   __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
3536   __ Tbl(v16.V16B(), v1.V16B(), v0.V16B());
3537   __ Mov(v17, v0);
3538   __ Tbl(v17.V16B(), v1.V16B(), v17.V16B());
3539   __ Mov(v18, v1);
3540   __ Tbl(v18.V16B(), v18.V16B(), v0.V16B());
3541   __ Mov(v19, v0);
3542   __ Tbl(v19.V16B(), v19.V16B(), v19.V16B());
3543 
3544   __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
3545   __ Tbl(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
3546   __ Mov(v21, v0);
3547   __ Tbl(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
3548   __ Mov(v22, v1);
3549   __ Mov(v23, v2);
3550   __ Mov(v24, v3);
3551   __ Mov(v25, v4);
3552   __ Tbl(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
3553   __ Mov(v26, v0);
3554   __ Mov(v27, v1);
3555   __ Mov(v28, v2);
3556   __ Mov(v29, v3);
3557   __ Tbl(v26.V16B(),
3558          v26.V16B(),
3559          v27.V16B(),
3560          v28.V16B(),
3561          v29.V16B(),
3562          v26.V16B());
3563   END();
3564 
3565   if (CAN_RUN()) {
3566     RUN();
3567 
3568     ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q16);
3569     ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q17);
3570     ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q18);
3571     ASSERT_EQUAL_128(0x0f00000000000000, 0x0000000000424100, q19);
3572 
3573     ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q20);
3574     ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q21);
3575     ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q22);
3576     ASSERT_EQUAL_128(0x0f000000c4c5c6b7, 0xb8b9aaabac424100, q26);
3577   }
3578 }
3579 
3580 
TEST(neon_destructive_tbx)3581 TEST(neon_destructive_tbx) {
3582   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3583 
3584   START();
3585   __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
3586   __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
3587   __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
3588   __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
3589   __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
3590 
3591   __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
3592   __ Tbx(v16.V16B(), v1.V16B(), v0.V16B());
3593   __ Mov(v17, v0);
3594   __ Tbx(v17.V16B(), v1.V16B(), v17.V16B());
3595   __ Mov(v18, v1);
3596   __ Tbx(v18.V16B(), v18.V16B(), v0.V16B());
3597   __ Mov(v19, v0);
3598   __ Tbx(v19.V16B(), v19.V16B(), v19.V16B());
3599 
3600   __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
3601   __ Tbx(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
3602   __ Mov(v21, v0);
3603   __ Tbx(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
3604   __ Mov(v22, v1);
3605   __ Mov(v23, v2);
3606   __ Mov(v24, v3);
3607   __ Mov(v25, v4);
3608   __ Tbx(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
3609   __ Mov(v26, v0);
3610   __ Mov(v27, v1);
3611   __ Mov(v28, v2);
3612   __ Mov(v29, v3);
3613   __ Tbx(v26.V16B(),
3614          v26.V16B(),
3615          v27.V16B(),
3616          v28.V16B(),
3617          v29.V16B(),
3618          v26.V16B());
3619   END();
3620 
3621   if (CAN_RUN()) {
3622     RUN();
3623 
3624     ASSERT_EQUAL_128(0xa055555555555555, 0x5555555555adaeaf, q16);
3625     ASSERT_EQUAL_128(0xa041424334353627, 0x28291a1b1cadaeaf, q17);
3626     ASSERT_EQUAL_128(0xa0aeadacabaaa9a8, 0xa7a6a5a4a3adaeaf, q18);
3627     ASSERT_EQUAL_128(0x0f41424334353627, 0x28291a1b1c424100, q19);
3628 
3629     ASSERT_EQUAL_128(0xa0555555d4d5d6c7, 0xc8c9babbbcadaeaf, q20);
3630     ASSERT_EQUAL_128(0xa0414243d4d5d6c7, 0xc8c9babbbcadaeaf, q21);
3631     ASSERT_EQUAL_128(0xa0aeadacd4d5d6c7, 0xc8c9babbbcadaeaf, q22);
3632     ASSERT_EQUAL_128(0x0f414243c4c5c6b7, 0xb8b9aaabac424100, q26);
3633   }
3634 }
3635 
3636 
TEST(neon_destructive_fcvtl)3637 TEST(neon_destructive_fcvtl) {
3638   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3639 
3640   START();
3641   __ Movi(v0.V2D(), 0x400000003f800000, 0xbf800000c0000000);
3642   __ Fcvtl(v16.V2D(), v0.V2S());
3643   __ Fcvtl2(v17.V2D(), v0.V4S());
3644   __ Mov(v18, v0);
3645   __ Mov(v19, v0);
3646   __ Fcvtl(v18.V2D(), v18.V2S());
3647   __ Fcvtl2(v19.V2D(), v19.V4S());
3648 
3649   __ Movi(v1.V2D(), 0x40003c003c004000, 0xc000bc00bc00c000);
3650   __ Fcvtl(v20.V4S(), v1.V4H());
3651   __ Fcvtl2(v21.V4S(), v1.V8H());
3652   __ Mov(v22, v1);
3653   __ Mov(v23, v1);
3654   __ Fcvtl(v22.V4S(), v22.V4H());
3655   __ Fcvtl2(v23.V4S(), v23.V8H());
3656 
3657   END();
3658 
3659   if (CAN_RUN()) {
3660     RUN();
3661 
3662     ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q16);
3663     ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q17);
3664     ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q18);
3665     ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q19);
3666 
3667     ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q20);
3668     ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q21);
3669     ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q22);
3670     ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q23);
3671   }
3672 }
3673 
TEST(fadd_h_neon)3674 TEST(fadd_h_neon) {
3675   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3676                       CPUFeatures::kFP,
3677                       CPUFeatures::kNEONHalf);
3678 
3679   START();
3680   __ Fmov(v0.V4H(), 24.0);
3681   __ Fmov(v1.V4H(), 1024.0);
3682   __ Fmov(v2.V8H(), 5.5);
3683   __ Fmov(v3.V8H(), 2048.0);
3684   __ Fmov(v4.V8H(), kFP16PositiveInfinity);
3685   __ Fmov(v5.V8H(), kFP16NegativeInfinity);
3686   __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c2f));
3687   __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe0f));
3688 
3689   __ Fadd(v8.V4H(), v1.V4H(), v0.V4H());
3690   __ Fadd(v9.V8H(), v3.V8H(), v2.V8H());
3691   __ Fadd(v10.V4H(), v4.V4H(), v3.V4H());
3692 
3693   __ Fadd(v11.V4H(), v6.V4H(), v1.V4H());
3694   __ Fadd(v12.V4H(), v7.V4H(), v7.V4H());
3695 
3696   END();
3697 
3698   if (CAN_RUN()) {
3699     RUN();
3700 
3701     ASSERT_EQUAL_128(0x0000000000000000, 0x6418641864186418, q8);
3702     // 2053.5 is unrepresentable in FP16.
3703     ASSERT_EQUAL_128(0x6803680368036803, 0x6803680368036803, q9);
3704 
3705     // Note: we test NaNs here as vectors aren't covered by process_nans_half
3706     // and we don't have traces for half-precision enabled hardware.
3707     // Default (Signalling NaN)
3708     ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q10);
3709     // Quiet NaN from Signalling.
3710     ASSERT_EQUAL_128(0x0000000000000000, 0x7e2f7e2f7e2f7e2f, q11);
3711     // Quiet NaN.
3712     ASSERT_EQUAL_128(0x0000000000000000, 0xfe0ffe0ffe0ffe0f, q12);
3713   }
3714 }
3715 
TEST(fsub_h_neon)3716 TEST(fsub_h_neon) {
3717   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3718                       CPUFeatures::kFP,
3719                       CPUFeatures::kNEONHalf);
3720 
3721   START();
3722   __ Fmov(v0.V4H(), 24.0);
3723   __ Fmov(v1.V4H(), 1024.0);
3724   __ Fmov(v2.V8H(), 5.5);
3725   __ Fmov(v3.V8H(), 2048.0);
3726   __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3727   __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3728   __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c22));
3729   __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe02));
3730 
3731   __ Fsub(v0.V4H(), v1.V4H(), v0.V4H());
3732   __ Fsub(v8.V8H(), v3.V8H(), v2.V8H());
3733   __ Fsub(v9.V4H(), v4.V4H(), v3.V4H());
3734   __ Fsub(v10.V4H(), v0.V4H(), v1.V4H());
3735 
3736   __ Fsub(v11.V4H(), v6.V4H(), v2.V4H());
3737   __ Fsub(v12.V4H(), v7.V4H(), v7.V4H());
3738   END();
3739 
3740   if (CAN_RUN()) {
3741     RUN();
3742 
3743     ASSERT_EQUAL_128(0x0000000000000000, 0x63d063d063d063d0, q0);
3744     // 2042.5 is unpresentable in FP16:
3745     ASSERT_EQUAL_128(0x67fa67fa67fa67fa, 0x67fa67fa67fa67fa, q8);
3746 
3747     // Note: we test NaNs here as vectors aren't covered by process_nans_half
3748     // and we don't have traces for half-precision enabled hardware.
3749     // Signalling (Default) NaN.
3750     ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q9);
3751     ASSERT_EQUAL_128(0x0000000000000000, 0xce00ce00ce00ce00, q10);
3752     // Quiet NaN from Signalling.
3753     ASSERT_EQUAL_128(0x0000000000000000, 0x7e227e227e227e22, q11);
3754     // Quiet NaN.
3755     ASSERT_EQUAL_128(0x0000000000000000, 0xfe02fe02fe02fe02, q12);
3756   }
3757 }
3758 
TEST(fmul_h_neon)3759 TEST(fmul_h_neon) {
3760   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3761                       CPUFeatures::kFP,
3762                       CPUFeatures::kNEONHalf);
3763 
3764   START();
3765   __ Fmov(v0.V4H(), 24.0);
3766   __ Fmov(v1.V4H(), -2.0);
3767   __ Fmov(v2.V8H(), 5.5);
3768   __ Fmov(v3.V8H(), 0.5);
3769   __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3770   __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3771 
3772   __ Fmul(v6.V4H(), v1.V4H(), v0.V4H());
3773   __ Fmul(v7.V8H(), v3.V8H(), v2.V8H());
3774   __ Fmul(v8.V4H(), v4.V4H(), v3.V4H());
3775   __ Fmul(v9.V4H(), v0.V4H(), v1.V4H());
3776   __ Fmul(v10.V4H(), v5.V4H(), v0.V4H());
3777   END();
3778 
3779   if (CAN_RUN()) {
3780     RUN();
3781 
3782     ASSERT_EQUAL_128(0x0000000000000000, 0xd200d200d200d200, q6);
3783     ASSERT_EQUAL_128(0x4180418041804180, 0x4180418041804180, q7);
3784     ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q8);
3785     ASSERT_EQUAL_128(0x0000000000000000, 0xd200d200d200d200, q9);
3786     ASSERT_EQUAL_128(0x0000000000000000, 0xfc00fc00fc00fc00, q10);
3787   }
3788 }
3789 
TEST(fdiv_h_neon)3790 TEST(fdiv_h_neon) {
3791   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3792                       CPUFeatures::kFP,
3793                       CPUFeatures::kNEONHalf);
3794 
3795   START();
3796   __ Fmov(v0.V4H(), 24.0);
3797   __ Fmov(v1.V4H(), -2.0);
3798   __ Fmov(v2.V8H(), 5.5);
3799   __ Fmov(v3.V8H(), 0.5);
3800   __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3801   __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3802 
3803   __ Fdiv(v6.V4H(), v0.V4H(), v1.V4H());
3804   __ Fdiv(v7.V8H(), v2.V8H(), v3.V8H());
3805   __ Fdiv(v8.V4H(), v4.V4H(), v3.V4H());
3806   __ Fdiv(v9.V4H(), v1.V4H(), v0.V4H());
3807   __ Fdiv(v10.V4H(), v5.V4H(), v0.V4H());
3808   END();
3809 
3810   if (CAN_RUN()) {
3811     RUN();
3812 
3813     ASSERT_EQUAL_128(0x0000000000000000, 0xca00ca00ca00ca00, q6);
3814     ASSERT_EQUAL_128(0x4980498049804980, 0x4980498049804980, q7);
3815     ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q8);
3816     // -0.083333... is unrepresentable in FP16:
3817     ASSERT_EQUAL_128(0x0000000000000000, 0xad55ad55ad55ad55, q9);
3818     ASSERT_EQUAL_128(0x0000000000000000, 0xfc00fc00fc00fc00, q10);
3819   }
3820 }
3821 
TEST(neon_fcvtl)3822 TEST(neon_fcvtl) {
3823   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3824 
3825   START();
3826 
3827   __ Movi(v0.V2D(), 0x000080007efffeff, 0x3100b1007c00fc00);
3828   __ Movi(v1.V2D(), 0x03ff83ff00038003, 0x000180017c01fc01);
3829   __ Movi(v2.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3830   __ Movi(v3.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3831   __ Movi(v4.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3832   __ Fcvtl(v16.V4S(), v0.V4H());
3833   __ Fcvtl2(v17.V4S(), v0.V8H());
3834   __ Fcvtl(v18.V4S(), v1.V4H());
3835   __ Fcvtl2(v19.V4S(), v1.V8H());
3836 
3837   __ Fcvtl(v20.V2D(), v2.V2S());
3838   __ Fcvtl2(v21.V2D(), v2.V4S());
3839   __ Fcvtl(v22.V2D(), v3.V2S());
3840   __ Fcvtl2(v23.V2D(), v3.V4S());
3841   __ Fcvtl(v24.V2D(), v4.V2S());
3842   __ Fcvtl2(v25.V2D(), v4.V4S());
3843 
3844   END();
3845 
3846   if (CAN_RUN()) {
3847     RUN();
3848     ASSERT_EQUAL_128(0x3e200000be200000, 0x7f800000ff800000, q16);
3849     ASSERT_EQUAL_128(0x0000000080000000, 0x7fdfe000ffdfe000, q17);
3850     ASSERT_EQUAL_128(0x33800000b3800000, 0x7fc02000ffc02000, q18);
3851     ASSERT_EQUAL_128(0x387fc000b87fc000, 0x34400000b4400000, q19);
3852     ASSERT_EQUAL_128(0x7ff0000000000000, 0xfff0000000000000, q20);
3853     ASSERT_EQUAL_128(0x3fc4000000000000, 0xbfc4000000000000, q21);
3854     ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q22);
3855     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000000, q23);
3856     ASSERT_EQUAL_128(0x36a0000000000000, 0xb6a0000000000000, q24);
3857     ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q25);
3858   }
3859 }
3860 
3861 
TEST(neon_fcvtn)3862 TEST(neon_fcvtn) {
3863   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3864 
3865   START();
3866 
3867   __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3868   __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3869   __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3870   __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
3871   __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
3872   __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
3873   __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
3874   __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
3875   __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
3876 
3877   __ Fcvtn(v16.V4H(), v0.V4S());
3878   __ Fcvtn2(v16.V8H(), v1.V4S());
3879   __ Fcvtn(v17.V4H(), v2.V4S());
3880   __ Fcvtn(v18.V2S(), v3.V2D());
3881   __ Fcvtn2(v18.V4S(), v4.V2D());
3882   __ Fcvtn(v19.V2S(), v5.V2D());
3883   __ Fcvtn2(v19.V4S(), v6.V2D());
3884   __ Fcvtn(v20.V2S(), v7.V2D());
3885   __ Fcvtn2(v20.V4S(), v8.V2D());
3886   END();
3887 
3888   if (CAN_RUN()) {
3889     RUN();
3890     ASSERT_EQUAL_128(0x000080007e7ffe7f, 0x3100b1007c00fc00, q16);
3891     ASSERT_EQUAL_64(0x7e7ffe7f00008000, d17);
3892     ASSERT_EQUAL_128(0x7f800000ff800000, 0x3e200000be200000, q18);
3893     ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x0000000080000000, q19);
3894     ASSERT_EQUAL_128(0x0000000080000000, 0x7fc7ffffffc7ffff, q20);
3895   }
3896 }
3897 
TEST(neon_fcvtn_fcvtxn_regression_test)3898 TEST(neon_fcvtn_fcvtxn_regression_test) {
3899   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3900 
3901   START();
3902   __ Movi(v0.V2D(), 0x3ff0000000000000, 0xbff0000000000000);
3903   __ Movi(v1.V2D(), 0x3f800000bf800000, 0x40000000c0000000);
3904   __ Movi(v2.V2D(), 0x3ff0000000000000, 0xbff0000000000000);
3905 
3906   __ Fcvtn(v16.V2S(), v0.V2D());
3907   __ Fcvtn(v17.V4H(), v1.V4S());
3908   __ Fcvtn(v0.V2S(), v0.V2D());
3909   __ Fcvtn(v1.V4H(), v1.V4S());
3910   __ Fcvtxn(v2.V2S(), v2.V2D());
3911   END();
3912 
3913   if (CAN_RUN()) {
3914     RUN();
3915     ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q16);
3916     ASSERT_EQUAL_128(0x0000000000000000, 0x3c00bc004000c000, q17);
3917     ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q0);
3918     ASSERT_EQUAL_128(0x0000000000000000, 0x3c00bc004000c000, q1);
3919     ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q2);
3920   }
3921 }
3922 
TEST(neon_fcvtxn)3923 TEST(neon_fcvtxn) {
3924   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3925 
3926   START();
3927   __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3928   __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3929   __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3930   __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
3931   __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
3932   __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
3933   __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
3934   __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
3935   __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
3936   __ Movi(v9.V2D(), 0x41ed000000000000, 0x41efffffffefffff);
3937   __ Fcvtxn(v16.V2S(), v0.V2D());
3938   __ Fcvtxn2(v16.V4S(), v1.V2D());
3939   __ Fcvtxn(v17.V2S(), v2.V2D());
3940   __ Fcvtxn2(v17.V4S(), v3.V2D());
3941   __ Fcvtxn(v18.V2S(), v4.V2D());
3942   __ Fcvtxn2(v18.V4S(), v5.V2D());
3943   __ Fcvtxn(v19.V2S(), v6.V2D());
3944   __ Fcvtxn2(v19.V4S(), v7.V2D());
3945   __ Fcvtxn(v20.V2S(), v8.V2D());
3946   __ Fcvtxn2(v20.V4S(), v9.V2D());
3947   __ Fcvtxn(s21, d0);
3948   END();
3949 
3950   if (CAN_RUN()) {
3951     RUN();
3952     ASSERT_EQUAL_128(0x000000017f7fffff, 0x310000057f7fffff, q16);
3953     ASSERT_EQUAL_128(0x3e200000be200000, 0x7f7fffff00000001, q17);
3954     ASSERT_EQUAL_128(0x0000000080000000, 0x7f800000ff800000, q18);
3955     ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x7fc7ffffffc7ffff, q19);
3956     ASSERT_EQUAL_128(0x4f6800004f7fffff, 0x0000000180000001, q20);
3957     ASSERT_EQUAL_128(0, 0x7f7fffff, q21);
3958   }
3959 }
3960 
TEST(neon_3same_addp)3961 TEST(neon_3same_addp) {
3962   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3963 
3964   START();
3965 
3966   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
3967   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
3968   __ Addp(v16.V16B(), v0.V16B(), v1.V16B());
3969 
3970   END();
3971 
3972   if (CAN_RUN()) {
3973     RUN();
3974     ASSERT_EQUAL_128(0x00ff54ffff54aaff, 0xffffffffffffffff, q16);
3975   }
3976 }
3977 
TEST(neon_3same_sqdmulh_sqrdmulh)3978 TEST(neon_3same_sqdmulh_sqrdmulh) {
3979   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3980 
3981   START();
3982 
3983   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
3984   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
3985   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
3986   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
3987 
3988   __ Sqdmulh(v16.V4H(), v0.V4H(), v1.V4H());
3989   __ Sqdmulh(v17.V4S(), v2.V4S(), v3.V4S());
3990   __ Sqdmulh(h18, h0, h1);
3991   __ Sqdmulh(s19, s2, s3);
3992 
3993   __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.V4H());
3994   __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.V4S());
3995   __ Sqrdmulh(h22, h0, h1);
3996   __ Sqrdmulh(s23, s2, s3);
3997 
3998   END();
3999 
4000   if (CAN_RUN()) {
4001     RUN();
4002     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100007fff, q16);
4003     ASSERT_EQUAL_128(0x000000017fffffff, 0x000000007fffffff, q17);
4004     ASSERT_EQUAL_128(0, 0x7fff, q18);
4005     ASSERT_EQUAL_128(0, 0x7fffffff, q19);
4006     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100017fff, q20);
4007     ASSERT_EQUAL_128(0x000000017fffffff, 0x000000017fffffff, q21);
4008     ASSERT_EQUAL_128(0, 0x7fff, q22);
4009     ASSERT_EQUAL_128(0, 0x7fffffff, q23);
4010   }
4011 }
4012 
TEST(neon_byelement_sqdmulh_sqrdmulh)4013 TEST(neon_byelement_sqdmulh_sqrdmulh) {
4014   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4015 
4016   START();
4017 
4018   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4019   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4020   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4021   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4022 
4023   __ Sqdmulh(v16.V4H(), v0.V4H(), v1.H(), 1);
4024   __ Sqdmulh(v17.V4S(), v2.V4S(), v3.S(), 1);
4025   __ Sqdmulh(h18, h0, v1.H(), 0);
4026   __ Sqdmulh(s19, s2, v3.S(), 0);
4027 
4028   __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.H(), 1);
4029   __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.S(), 1);
4030   __ Sqrdmulh(h22, h0, v1.H(), 0);
4031   __ Sqrdmulh(s23, s2, v3.S(), 0);
4032 
4033   END();
4034 
4035   if (CAN_RUN()) {
4036     RUN();
4037     ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000fff0, q16);
4038     ASSERT_EQUAL_128(0x00000000fffffff0, 0x00000000fffffff0, q17);
4039     ASSERT_EQUAL_128(0, 0x7fff, q18);
4040     ASSERT_EQUAL_128(0, 0x7fffffff, q19);
4041     ASSERT_EQUAL_128(0x0000000000000000, 0x000000010001fff0, q20);
4042     ASSERT_EQUAL_128(0x00000001fffffff0, 0x00000001fffffff0, q21);
4043     ASSERT_EQUAL_128(0, 0x7fff, q22);
4044     ASSERT_EQUAL_128(0, 0x7fffffff, q23);
4045   }
4046 }
4047 
TEST(neon_3same_sqrdmlah)4048 TEST(neon_3same_sqrdmlah) {
4049   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4050 
4051   START();
4052 
4053   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4054   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4055   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4056   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4057 
4058   __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
4059   __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
4060   __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
4061   __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
4062 
4063   __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.V4H());
4064   __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.V4S());
4065   __ Sqrdmlah(h18, h0, h1);
4066   __ Sqrdmlah(s19, s2, s3);
4067 
4068   END();
4069 
4070   if (CAN_RUN()) {
4071     RUN();
4072     ASSERT_EQUAL_128(0, 0x0000040104010000, q16);
4073     ASSERT_EQUAL_128(0x000000017fffffff, 0x000000217fffffff, q17);
4074     ASSERT_EQUAL_128(0, 0x7fff, q18);
4075     ASSERT_EQUAL_128(0, 0, q19);
4076   }
4077 }
4078 
TEST(neon_byelement_sqrdmlah)4079 TEST(neon_byelement_sqrdmlah) {
4080   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4081 
4082   START();
4083 
4084   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4085   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4086   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4087   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4088 
4089   __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
4090   __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
4091   __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
4092   __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
4093 
4094   __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.H(), 1);
4095   __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.S(), 1);
4096   __ Sqrdmlah(h18, h0, v1.H(), 0);
4097   __ Sqrdmlah(s19, s2, v3.S(), 0);
4098 
4099   END();
4100 
4101   if (CAN_RUN()) {
4102     RUN();
4103     ASSERT_EQUAL_128(0, 0x0000040104018000, q16);
4104     ASSERT_EQUAL_128(0x00000001fffffff0, 0x0000002100107ff0, q17);
4105     ASSERT_EQUAL_128(0, 0x7fff, q18);
4106     ASSERT_EQUAL_128(0, 0, q19);
4107   }
4108 }
4109 
TEST(neon_3same_sqrdmlsh)4110 TEST(neon_3same_sqrdmlsh) {
4111   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4112 
4113   START();
4114 
4115   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004000500);
4116   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000100080);
4117   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4118   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4119 
4120   __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
4121   __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
4122   __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
4123   __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
4124 
4125   __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.V4H());
4126   __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.V4S());
4127   __ Sqrdmlsh(h18, h0, h1);
4128   __ Sqrdmlsh(s19, s2, s3);
4129 
4130   END();
4131 
4132   if (CAN_RUN()) {
4133     RUN();
4134     ASSERT_EQUAL_128(0, 0x40003fff40003ffb, q16);
4135     ASSERT_EQUAL_128(0x40003fffc0004000, 0x40004000c0004000, q17);
4136     ASSERT_EQUAL_128(0, 0x3ffb, q18);
4137     ASSERT_EQUAL_128(0, 0xc0004000, q19);
4138   }
4139 }
4140 
TEST(neon_byelement_sqrdmlsh)4141 TEST(neon_byelement_sqrdmlsh) {
4142   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4143 
4144   START();
4145 
4146   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4147   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4148   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4149   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4150 
4151   __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
4152   __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
4153   __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
4154   __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
4155 
4156   __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.H(), 1);
4157   __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.S(), 1);
4158   __ Sqrdmlsh(h18, h0, v1.H(), 0);
4159   __ Sqrdmlsh(s19, s2, v3.S(), 0);
4160 
4161   END();
4162 
4163   if (CAN_RUN()) {
4164     RUN();
4165     ASSERT_EQUAL_128(0, 0x4000400040004010, q16);
4166     ASSERT_EQUAL_128(0x4000400040004010, 0x4000400040004010, q17);
4167     ASSERT_EQUAL_128(0, 0xc000, q18);
4168     ASSERT_EQUAL_128(0, 0xc0004000, q19);
4169   }
4170 }
4171 
TEST(neon_3same_sdot_udot)4172 TEST(neon_3same_sdot_udot) {
4173   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kDotProduct);
4174 
4175   START();
4176 
4177   __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
4178   __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
4179   __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
4180 
4181   __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
4182   __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
4183   __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
4184   __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
4185 
4186   __ Sdot(v16.V4S(), v0.V16B(), v1.V16B());
4187   __ Sdot(v17.V2S(), v1.V8B(), v2.V8B());
4188 
4189   __ Udot(v18.V4S(), v0.V16B(), v1.V16B());
4190   __ Udot(v19.V2S(), v1.V8B(), v2.V8B());
4191 
4192   END();
4193 
4194   if (CAN_RUN()) {
4195     RUN();
4196     ASSERT_EQUAL_128(0x000037d8000045f8, 0x000037d8000045f8, q16);
4197     ASSERT_EQUAL_128(0, 0x0000515e00004000, q17);
4198     ASSERT_EQUAL_128(0x000119d8000127f8, 0x000119d8000127f8, q18);
4199     ASSERT_EQUAL_128(0, 0x0000c35e00004000, q19);
4200   }
4201 }
4202 
TEST(neon_byelement_sdot_udot)4203 TEST(neon_byelement_sdot_udot) {
4204   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kDotProduct);
4205 
4206   START();
4207 
4208   __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
4209   __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
4210   __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
4211 
4212   __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
4213   __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
4214   __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
4215   __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
4216 
4217   __ Sdot(v16.V4S(), v0.V16B(), v1.S4B(), 1);
4218   __ Sdot(v17.V2S(), v1.V8B(), v2.S4B(), 1);
4219 
4220   __ Udot(v18.V4S(), v0.V16B(), v1.S4B(), 1);
4221   __ Udot(v19.V2S(), v1.V8B(), v2.S4B(), 1);
4222 
4223   END();
4224 
4225   if (CAN_RUN()) {
4226     RUN();
4227     ASSERT_EQUAL_128(0x000037d8000037d8, 0x000037d8000037d8, q16);
4228     ASSERT_EQUAL_128(0, 0x0000515e0000587e, q17);
4229     ASSERT_EQUAL_128(0x000119d8000119d8, 0x000119d8000119d8, q18);
4230     ASSERT_EQUAL_128(0, 0x0000c35e0000ca7e, q19);
4231   }
4232 }
4233 
4234 
TEST(neon_2regmisc_saddlp)4235 TEST(neon_2regmisc_saddlp) {
4236   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4237 
4238   START();
4239 
4240   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4241 
4242   __ Saddlp(v16.V8H(), v0.V16B());
4243   __ Saddlp(v17.V4H(), v0.V8B());
4244 
4245   __ Saddlp(v18.V4S(), v0.V8H());
4246   __ Saddlp(v19.V2S(), v0.V4H());
4247 
4248   __ Saddlp(v20.V2D(), v0.V4S());
4249   __ Saddlp(v21.V1D(), v0.V2S());
4250 
4251   END();
4252 
4253   if (CAN_RUN()) {
4254     RUN();
4255     ASSERT_EQUAL_128(0x0080ffffff010080, 0xff01ffff0080ff01, q16);
4256     ASSERT_EQUAL_128(0x0000000000000000, 0xff01ffff0080ff01, q17);
4257     ASSERT_EQUAL_128(0x0000800000000081, 0xffff7f81ffff8200, q18);
4258     ASSERT_EQUAL_128(0x0000000000000000, 0xffff7f81ffff8200, q19);
4259     ASSERT_EQUAL_128(0x0000000000818000, 0xffffffff82017f81, q20);
4260     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff82017f81, q21);
4261   }
4262 }
4263 
TEST(neon_2regmisc_uaddlp)4264 TEST(neon_2regmisc_uaddlp) {
4265   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4266 
4267   START();
4268 
4269   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4270 
4271   __ Uaddlp(v16.V8H(), v0.V16B());
4272   __ Uaddlp(v17.V4H(), v0.V8B());
4273 
4274   __ Uaddlp(v18.V4S(), v0.V8H());
4275   __ Uaddlp(v19.V2S(), v0.V4H());
4276 
4277   __ Uaddlp(v20.V2D(), v0.V4S());
4278   __ Uaddlp(v21.V1D(), v0.V2S());
4279 
4280   END();
4281 
4282   if (CAN_RUN()) {
4283     RUN();
4284     ASSERT_EQUAL_128(0x008000ff01010080, 0x010100ff00800101, q16);
4285     ASSERT_EQUAL_128(0x0000000000000000, 0x010100ff00800101, q17);
4286     ASSERT_EQUAL_128(0x0000800000010081, 0x00017f8100008200, q18);
4287     ASSERT_EQUAL_128(0x0000000000000000, 0x00017f8100008200, q19);
4288     ASSERT_EQUAL_128(0x0000000100818000, 0x0000000082017f81, q20);
4289     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000082017f81, q21);
4290   }
4291 }
4292 
TEST(neon_2regmisc_sadalp)4293 TEST(neon_2regmisc_sadalp) {
4294   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4295 
4296   START();
4297 
4298   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4299   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
4300   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
4301   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
4302   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
4303 
4304   __ Mov(v16.V16B(), v1.V16B());
4305   __ Mov(v17.V16B(), v1.V16B());
4306   __ Sadalp(v16.V8H(), v0.V16B());
4307   __ Sadalp(v17.V4H(), v0.V8B());
4308 
4309   __ Mov(v18.V16B(), v2.V16B());
4310   __ Mov(v19.V16B(), v2.V16B());
4311   __ Sadalp(v18.V4S(), v1.V8H());
4312   __ Sadalp(v19.V2S(), v1.V4H());
4313 
4314   __ Mov(v20.V16B(), v3.V16B());
4315   __ Mov(v21.V16B(), v4.V16B());
4316   __ Sadalp(v20.V2D(), v2.V4S());
4317   __ Sadalp(v21.V1D(), v2.V2S());
4318 
4319   END();
4320 
4321   if (CAN_RUN()) {
4322     RUN();
4323     ASSERT_EQUAL_128(0x80808000ff000080, 0xff00ffff00817f00, q16);
4324     ASSERT_EQUAL_128(0x0000000000000000, 0xff00ffff00817f00, q17);
4325     ASSERT_EQUAL_128(0x7fff0001fffffffe, 0xffffffff80007fff, q18);
4326     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff80007fff, q19);
4327     ASSERT_EQUAL_128(0x7fffffff80000000, 0x800000007ffffffe, q20);
4328     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
4329   }
4330 }
4331 
TEST(neon_2regmisc_uadalp)4332 TEST(neon_2regmisc_uadalp) {
4333   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4334 
4335   START();
4336 
4337   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4338   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
4339   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
4340   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
4341   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
4342 
4343   __ Mov(v16.V16B(), v1.V16B());
4344   __ Mov(v17.V16B(), v1.V16B());
4345   __ Uadalp(v16.V8H(), v0.V16B());
4346   __ Uadalp(v17.V4H(), v0.V8B());
4347 
4348   __ Mov(v18.V16B(), v2.V16B());
4349   __ Mov(v19.V16B(), v2.V16B());
4350   __ Uadalp(v18.V4S(), v1.V8H());
4351   __ Uadalp(v19.V2S(), v1.V4H());
4352 
4353   __ Mov(v20.V16B(), v3.V16B());
4354   __ Mov(v21.V16B(), v4.V16B());
4355   __ Uadalp(v20.V2D(), v2.V4S());
4356   __ Uadalp(v21.V1D(), v2.V2S());
4357 
4358   END();
4359 
4360   if (CAN_RUN()) {
4361     RUN();
4362     ASSERT_EQUAL_128(0x8080810001000080, 0x010000ff00818100, q16);
4363     ASSERT_EQUAL_128(0x0000000000000000, 0x010000ff00818100, q17);
4364     ASSERT_EQUAL_128(0x800100010000fffe, 0x0000ffff80007fff, q18);
4365     ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff80007fff, q19);
4366     ASSERT_EQUAL_128(0x8000000180000000, 0x800000007ffffffe, q20);
4367     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
4368   }
4369 }
4370 
TEST(neon_3same_mul)4371 TEST(neon_3same_mul) {
4372   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4373 
4374   START();
4375 
4376   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4377   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4378   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4379   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4380 
4381   __ Mla(v16.V16B(), v0.V16B(), v1.V16B());
4382   __ Mls(v17.V16B(), v0.V16B(), v1.V16B());
4383   __ Mul(v18.V16B(), v0.V16B(), v1.V16B());
4384 
4385   END();
4386 
4387   if (CAN_RUN()) {
4388     RUN();
4389     ASSERT_EQUAL_128(0x0102757605b1b208, 0x5f0a61450db90f56, q16);
4390     ASSERT_EQUAL_128(0x01029192055b5c08, 0xb30ab5d30d630faa, q17);
4391     ASSERT_EQUAL_128(0x0000727200abab00, 0x5600563900ab0056, q18);
4392   }
4393 }
4394 
4395 
TEST(neon_3same_absdiff)4396 TEST(neon_3same_absdiff) {
4397   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4398 
4399   START();
4400 
4401   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4402   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4403   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4404   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4405 
4406   __ Saba(v16.V16B(), v0.V16B(), v1.V16B());
4407   __ Uaba(v17.V16B(), v0.V16B(), v1.V16B());
4408   __ Sabd(v18.V16B(), v0.V16B(), v1.V16B());
4409   __ Uabd(v19.V16B(), v0.V16B(), v1.V16B());
4410 
4411   END();
4412 
4413   if (CAN_RUN()) {
4414     RUN();
4415     ASSERT_EQUAL_128(0x0202aeaf065c5d5e, 0x5e5f600c62646455, q16);
4416     ASSERT_EQUAL_128(0x0002585904b0b1b2, 0x5e5f600c62b86455, q17);
4417     ASSERT_EQUAL_128(0x0100abab01565656, 0x5555550055565555, q18);
4418     ASSERT_EQUAL_128(0xff005555ffaaaaaa, 0x5555550055aa5555, q19);
4419   }
4420 }
4421 
4422 
TEST(neon_byelement_mul)4423 TEST(neon_byelement_mul) {
4424   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4425 
4426   START();
4427 
4428   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4429   __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4430 
4431 
4432   __ Mul(v16.V4H(), v0.V4H(), v1.H(), 0);
4433   __ Mul(v17.V8H(), v0.V8H(), v1.H(), 7);
4434   __ Mul(v18.V2S(), v0.V2S(), v1.S(), 0);
4435   __ Mul(v19.V4S(), v0.V4S(), v1.S(), 3);
4436 
4437   __ Movi(v20.V2D(), 0x0000000000000000, 0x0001000200030004);
4438   __ Movi(v21.V2D(), 0x0005000600070008, 0x0001000200030004);
4439   __ Mla(v20.V4H(), v0.V4H(), v1.H(), 0);
4440   __ Mla(v21.V8H(), v0.V8H(), v1.H(), 7);
4441 
4442   __ Movi(v22.V2D(), 0x0000000000000000, 0x0000000200000004);
4443   __ Movi(v23.V2D(), 0x0000000600000008, 0x0000000200000004);
4444   __ Mla(v22.V2S(), v0.V2S(), v1.S(), 0);
4445   __ Mla(v23.V4S(), v0.V4S(), v1.S(), 3);
4446 
4447   __ Movi(v24.V2D(), 0x0000000000000000, 0x0100aaabfe015456);
4448   __ Movi(v25.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4449   __ Mls(v24.V4H(), v0.V4H(), v1.H(), 0);
4450   __ Mls(v25.V8H(), v0.V8H(), v1.H(), 7);
4451 
4452   __ Movi(v26.V2D(), 0x0000000000000000, 0xc8e2aaabe1c85456);
4453   __ Movi(v27.V2D(), 0x39545572c6aa54e4, 0x39545572c6aa54e4);
4454   __ Mls(v26.V2S(), v0.V2S(), v1.S(), 0);
4455   __ Mls(v27.V4S(), v0.V4S(), v1.S(), 3);
4456 
4457   END();
4458 
4459   if (CAN_RUN()) {
4460     RUN();
4461     ASSERT_EQUAL_128(0x0000000000000000, 0x0100aaabfe015456, q16);
4462     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
4463     ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaabe1c85456, q18);
4464     ASSERT_EQUAL_128(0x39545572c6aa54e4, 0x39545572c6aa54e4, q19);
4465 
4466     ASSERT_EQUAL_128(0x0000000000000000, 0x0101aaadfe04545a, q20);
4467     ASSERT_EQUAL_128(0xff05aa5b010655b2, 0xff01aa57010255ae, q21);
4468     ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaade1c8545a, q22);
4469     ASSERT_EQUAL_128(0x39545578c6aa54ec, 0x39545574c6aa54e8, q23);
4470 
4471     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4472     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4473     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
4474     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
4475   }
4476 }
4477 
4478 
TEST(neon_byelement_mull)4479 TEST(neon_byelement_mull) {
4480   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4481 
4482   START();
4483 
4484   __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
4485   __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4486 
4487 
4488   __ Smull(v16.V4S(), v0.V4H(), v1.H(), 7);
4489   __ Smull2(v17.V4S(), v0.V8H(), v1.H(), 0);
4490   __ Umull(v18.V4S(), v0.V4H(), v1.H(), 7);
4491   __ Umull2(v19.V4S(), v0.V8H(), v1.H(), 0);
4492 
4493   __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
4494   __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
4495   __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
4496   __ Movi(v23.V2D(), 0x0000000100000002, 0x0000000200000001);
4497 
4498   __ Smlal(v20.V4S(), v0.V4H(), v1.H(), 7);
4499   __ Smlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
4500   __ Umlal(v22.V4S(), v0.V4H(), v1.H(), 7);
4501   __ Umlal2(v23.V4S(), v0.V8H(), v1.H(), 0);
4502 
4503   __ Movi(v24.V2D(), 0xffffff00ffffaa55, 0x000000ff000055aa);
4504   __ Movi(v25.V2D(), 0xffaaaaabffff55ab, 0x0054ffab0000fe01);
4505   __ Movi(v26.V2D(), 0x0000ff000000aa55, 0x000000ff000055aa);
4506   __ Movi(v27.V2D(), 0x00a9aaab00fe55ab, 0x0054ffab0000fe01);
4507 
4508   __ Smlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
4509   __ Smlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
4510   __ Umlsl(v26.V4S(), v0.V4H(), v1.H(), 7);
4511   __ Umlsl2(v27.V4S(), v0.V8H(), v1.H(), 0);
4512 
4513   END();
4514 
4515   if (CAN_RUN()) {
4516     RUN();
4517 
4518     ASSERT_EQUAL_128(0xffffff00ffffaa55, 0x000000ff000055aa, q16);
4519     ASSERT_EQUAL_128(0xffaaaaabffff55ab, 0x0054ffab0000fe01, q17);
4520     ASSERT_EQUAL_128(0x0000ff000000aa55, 0x000000ff000055aa, q18);
4521     ASSERT_EQUAL_128(0x00a9aaab00fe55ab, 0x0054ffab0000fe01, q19);
4522 
4523     ASSERT_EQUAL_128(0xffffff01ffffaa57, 0x00000101000055ab, q20);
4524     ASSERT_EQUAL_128(0xffaaaaacffff55ad, 0x0054ffad0000fe02, q21);
4525     ASSERT_EQUAL_128(0x0000ff010000aa57, 0x00000101000055ab, q22);
4526     ASSERT_EQUAL_128(0x00a9aaac00fe55ad, 0x0054ffad0000fe02, q23);
4527 
4528     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4529     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4530     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
4531     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
4532   }
4533 }
4534 
4535 
TEST(neon_byelement_sqdmull)4536 TEST(neon_byelement_sqdmull) {
4537   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4538 
4539   START();
4540 
4541   __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
4542   __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4543 
4544   __ Sqdmull(v16.V4S(), v0.V4H(), v1.H(), 7);
4545   __ Sqdmull2(v17.V4S(), v0.V8H(), v1.H(), 0);
4546   __ Sqdmull(s18, h0, v1.H(), 7);
4547 
4548   __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
4549   __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
4550   __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
4551 
4552   __ Sqdmlal(v20.V4S(), v0.V4H(), v1.H(), 7);
4553   __ Sqdmlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
4554   __ Sqdmlal(s22, h0, v1.H(), 7);
4555 
4556   __ Movi(v24.V2D(), 0xfffffe00ffff54aa, 0x000001fe0000ab54);
4557   __ Movi(v25.V2D(), 0xff555556fffeab56, 0x00a9ff560001fc02);
4558   __ Movi(v26.V2D(), 0x0000000000000000, 0x000000000000ab54);
4559 
4560   __ Sqdmlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
4561   __ Sqdmlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
4562   __ Sqdmlsl(s26, h0, v1.H(), 7);
4563 
4564   END();
4565 
4566   if (CAN_RUN()) {
4567     RUN();
4568 
4569     ASSERT_EQUAL_128(0xfffffe00ffff54aa, 0x000001fe0000ab54, q16);
4570     ASSERT_EQUAL_128(0xff555556fffeab56, 0x00a9ff560001fc02, q17);
4571     ASSERT_EQUAL_128(0, 0x0000ab54, q18);
4572 
4573     ASSERT_EQUAL_128(0xfffffe01ffff54ac, 0x000002000000ab55, q20);
4574     ASSERT_EQUAL_128(0xff555557fffeab58, 0x00a9ff580001fc03, q21);
4575     ASSERT_EQUAL_128(0, 0x0000ab55, q22);
4576 
4577     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4578     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4579     ASSERT_EQUAL_128(0, 0x00000000, q26);
4580   }
4581 }
4582 
4583 
TEST(neon_3diff_absdiff)4584 TEST(neon_3diff_absdiff) {
4585   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4586 
4587   START();
4588 
4589   __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4590   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4591   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4592   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4593   __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4594   __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4595 
4596   __ Sabal(v16.V8H(), v0.V8B(), v1.V8B());
4597   __ Uabal(v17.V8H(), v0.V8B(), v1.V8B());
4598   __ Sabal2(v18.V8H(), v0.V16B(), v1.V16B());
4599   __ Uabal2(v19.V8H(), v0.V16B(), v1.V16B());
4600 
4601   END();
4602 
4603   if (CAN_RUN()) {
4604     RUN();
4605     ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0b620d630f55, q16);
4606     ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0bb60d630f55, q17);
4607     ASSERT_EQUAL_128(0x0103030405b107b3, 0x090b0b620d640f55, q18);
4608     ASSERT_EQUAL_128(0x02010304055b075d, 0x0a090bb60db80fab, q19);
4609   }
4610 }
4611 
4612 
TEST(neon_3diff_sqdmull)4613 TEST(neon_3diff_sqdmull) {
4614   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4615 
4616   START();
4617 
4618   __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4619   __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4620   __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4621   __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4622 
4623   __ Sqdmull(v16.V4S(), v0.V4H(), v1.V4H());
4624   __ Sqdmull2(v17.V4S(), v0.V8H(), v1.V8H());
4625   __ Sqdmull(v18.V2D(), v2.V2S(), v3.V2S());
4626   __ Sqdmull2(v19.V2D(), v2.V4S(), v3.V4S());
4627   __ Sqdmull(s20, h0, h1);
4628   __ Sqdmull(d21, s2, s3);
4629 
4630   END();
4631 
4632   if (CAN_RUN()) {
4633     RUN();
4634     ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q16);
4635     ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q17);
4636     ASSERT_EQUAL_128(0x8000000100000000, 0x7fffffffffffffff, q18);
4637     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000100000000, q19);
4638     ASSERT_EQUAL_128(0, 0x7fffffff, q20);
4639     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
4640   }
4641 }
4642 
4643 
TEST(neon_3diff_sqdmlal)4644 TEST(neon_3diff_sqdmlal) {
4645   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4646 
4647   START();
4648 
4649   __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4650   __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4651   __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4652   __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4653 
4654   __ Movi(v16.V2D(), 0xffffffff00000001, 0x8fffffff00000001);
4655   __ Movi(v17.V2D(), 0x00000001ffffffff, 0x00000001ffffffff);
4656   __ Movi(v18.V2D(), 0x8000000000000001, 0x0000000000000001);
4657   __ Movi(v19.V2D(), 0xffffffffffffffff, 0x7fffffffffffffff);
4658   __ Movi(v20.V2D(), 0, 0x00000001);
4659   __ Movi(v21.V2D(), 0, 0x00000001);
4660 
4661   __ Sqdmlal(v16.V4S(), v0.V4H(), v1.V4H());
4662   __ Sqdmlal2(v17.V4S(), v0.V8H(), v1.V8H());
4663   __ Sqdmlal(v18.V2D(), v2.V2S(), v3.V2S());
4664   __ Sqdmlal2(v19.V2D(), v2.V4S(), v3.V4S());
4665   __ Sqdmlal(s20, h0, h1);
4666   __ Sqdmlal(d21, s2, s3);
4667 
4668   END();
4669 
4670   if (CAN_RUN()) {
4671     RUN();
4672     ASSERT_EQUAL_128(0x8000ffff7ffe0003, 0x800000007fffffff, q16);
4673     ASSERT_EQUAL_128(0x800100017ffe0001, 0x800100017ffffffe, q17);
4674     ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q18);
4675     ASSERT_EQUAL_128(0x7ffffffffffffffe, 0x00000000ffffffff, q19);
4676     ASSERT_EQUAL_128(0, 0x7fffffff, q20);
4677     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
4678   }
4679 }
4680 
4681 
TEST(neon_3diff_sqdmlsl)4682 TEST(neon_3diff_sqdmlsl) {
4683   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4684 
4685   START();
4686 
4687   __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4688   __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4689   __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4690   __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4691 
4692   __ Movi(v16.V2D(), 0xffffffff00000001, 0x7ffffffe80000001);
4693   __ Movi(v17.V2D(), 0x00000001ffffffff, 0x7ffffffe00000001);
4694   __ Movi(v18.V2D(), 0x8000000000000001, 0x8000000000000001);
4695   __ Movi(v19.V2D(), 0xfffffffffffffffe, 0x7fffffffffffffff);
4696   __ Movi(v20.V2D(), 0, 0x00000001);
4697   __ Movi(v21.V2D(), 0, 0x00000001);
4698 
4699   __ Sqdmlsl(v16.V4S(), v0.V4H(), v1.V4H());
4700   __ Sqdmlsl2(v17.V4S(), v0.V8H(), v1.V8H());
4701   __ Sqdmlsl(v18.V2D(), v2.V2S(), v3.V2S());
4702   __ Sqdmlsl2(v19.V2D(), v2.V4S(), v3.V4S());
4703   __ Sqdmlsl(s20, h0, h1);
4704   __ Sqdmlsl(d21, s2, s3);
4705 
4706   END();
4707 
4708   if (CAN_RUN()) {
4709     RUN();
4710     ASSERT_EQUAL_128(0x7ffeffff8001ffff, 0x7fffffff80000000, q16);
4711     ASSERT_EQUAL_128(0x7fff00018001fffd, 0x7fffffff80000002, q17);
4712     ASSERT_EQUAL_128(0xffffffff00000001, 0x8000000000000000, q18);
4713     ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q19);
4714     ASSERT_EQUAL_128(0, 0x80000002, q20);
4715     ASSERT_EQUAL_128(0, 0x8000000000000002, q21);
4716   }
4717 }
4718 
4719 
TEST(neon_3diff_mla)4720 TEST(neon_3diff_mla) {
4721   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4722 
4723   START();
4724 
4725   __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4726   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4727   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4728   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4729   __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4730   __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4731 
4732   __ Smlal(v16.V8H(), v0.V8B(), v1.V8B());
4733   __ Umlal(v17.V8H(), v0.V8B(), v1.V8B());
4734   __ Smlal2(v18.V8H(), v0.V16B(), v1.V16B());
4735   __ Umlal2(v19.V8H(), v0.V16B(), v1.V16B());
4736 
4737   END();
4738 
4739   if (CAN_RUN()) {
4740     RUN();
4741     ASSERT_EQUAL_128(0x01580304055c2341, 0x090a0ab70d0e0f56, q16);
4742     ASSERT_EQUAL_128(0xaa580304ae5c2341, 0x090a5fb70d0eb856, q17);
4743     ASSERT_EQUAL_128(0x01020304e878ea7a, 0x090a0ab70cb90f00, q18);
4744     ASSERT_EQUAL_128(0x010203043d783f7a, 0x090a5fb761b90f00, q19);
4745   }
4746 }
4747 
4748 
TEST(neon_3diff_mls)4749 TEST(neon_3diff_mls) {
4750   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4751 
4752   START();
4753 
4754   __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4755   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4756   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4757   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4758   __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4759   __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4760 
4761   __ Smlsl(v16.V8H(), v0.V8B(), v1.V8B());
4762   __ Umlsl(v17.V8H(), v0.V8B(), v1.V8B());
4763   __ Smlsl2(v18.V8H(), v0.V16B(), v1.V16B());
4764   __ Umlsl2(v19.V8H(), v0.V16B(), v1.V16B());
4765 
4766   END();
4767 
4768   if (CAN_RUN()) {
4769     RUN();
4770     ASSERT_EQUAL_128(0x00ac030404b0eacf, 0x090a0b610d0e0eaa, q16);
4771     ASSERT_EQUAL_128(0x57ac03045bb0eacf, 0x090ab6610d0e65aa, q17);
4772     ASSERT_EQUAL_128(0x0102030421942396, 0x090a0b610d630f00, q18);
4773     ASSERT_EQUAL_128(0x01020304cc94ce96, 0x090ab661b8630f00, q19);
4774   }
4775 }
4776 
4777 
TEST(neon_3same_compare)4778 TEST(neon_3same_compare) {
4779   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4780 
4781   START();
4782 
4783   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4784   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4785 
4786   __ Cmeq(v16.V16B(), v0.V16B(), v0.V16B());
4787   __ Cmeq(v17.V16B(), v0.V16B(), v1.V16B());
4788   __ Cmge(v18.V16B(), v0.V16B(), v0.V16B());
4789   __ Cmge(v19.V16B(), v0.V16B(), v1.V16B());
4790   __ Cmgt(v20.V16B(), v0.V16B(), v0.V16B());
4791   __ Cmgt(v21.V16B(), v0.V16B(), v1.V16B());
4792   __ Cmhi(v22.V16B(), v0.V16B(), v0.V16B());
4793   __ Cmhi(v23.V16B(), v0.V16B(), v1.V16B());
4794   __ Cmhs(v24.V16B(), v0.V16B(), v0.V16B());
4795   __ Cmhs(v25.V16B(), v0.V16B(), v1.V16B());
4796 
4797   END();
4798 
4799   if (CAN_RUN()) {
4800     RUN();
4801     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
4802     ASSERT_EQUAL_128(0x00ff000000000000, 0x000000ff00000000, q17);
4803     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
4804     ASSERT_EQUAL_128(0x00ff00ffff00ff00, 0xff0000ff0000ff00, q19);
4805     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
4806     ASSERT_EQUAL_128(0x000000ffff00ff00, 0xff0000000000ff00, q21);
4807     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
4808     ASSERT_EQUAL_128(0xff00ff0000ff00ff, 0xff00000000ffff00, q23);
4809     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q24);
4810     ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xff0000ff00ffff00, q25);
4811   }
4812 }
4813 
4814 
TEST(neon_3same_scalar_compare)4815 TEST(neon_3same_scalar_compare) {
4816   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4817 
4818   START();
4819 
4820   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4821   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4822 
4823   __ Cmeq(d16, d0, d0);
4824   __ Cmeq(d17, d0, d1);
4825   __ Cmeq(d18, d1, d0);
4826   __ Cmge(d19, d0, d0);
4827   __ Cmge(d20, d0, d1);
4828   __ Cmge(d21, d1, d0);
4829   __ Cmgt(d22, d0, d0);
4830   __ Cmgt(d23, d0, d1);
4831   __ Cmhi(d24, d0, d0);
4832   __ Cmhi(d25, d0, d1);
4833   __ Cmhs(d26, d0, d0);
4834   __ Cmhs(d27, d0, d1);
4835   __ Cmhs(d28, d1, d0);
4836 
4837   END();
4838 
4839   if (CAN_RUN()) {
4840     RUN();
4841 
4842     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q16);
4843     ASSERT_EQUAL_128(0, 0x0000000000000000, q17);
4844     ASSERT_EQUAL_128(0, 0x0000000000000000, q18);
4845     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
4846     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
4847     ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
4848     ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
4849     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q23);
4850     ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
4851     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
4852     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
4853     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q27);
4854     ASSERT_EQUAL_128(0, 0x0000000000000000, q28);
4855   }
4856 }
4857 
TEST(neon_fcmeq_h)4858 TEST(neon_fcmeq_h) {
4859   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4860                       CPUFeatures::kFP,
4861                       CPUFeatures::kNEONHalf);
4862 
4863   START();
4864 
4865   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
4866   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
4867   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
4868   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
4869 
4870   __ Fcmeq(v4.V8H(), v0.V8H(), v0.V8H());
4871   __ Fcmeq(v5.V8H(), v1.V8H(), v0.V8H());
4872   __ Fcmeq(v6.V8H(), v2.V8H(), v0.V8H());
4873   __ Fcmeq(v7.V8H(), v3.V8H(), v0.V8H());
4874   __ Fcmeq(v8.V4H(), v0.V4H(), v0.V4H());
4875   __ Fcmeq(v9.V4H(), v1.V4H(), v0.V4H());
4876   __ Fcmeq(v10.V4H(), v2.V4H(), v0.V4H());
4877   __ Fcmeq(v11.V4H(), v3.V4H(), v0.V4H());
4878 
4879   END();
4880 
4881   if (CAN_RUN()) {
4882     RUN();
4883 
4884     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
4885     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
4886     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
4887     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v7);
4888     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
4889     ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
4890     ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
4891     ASSERT_EQUAL_128(0, 0x0000000000000000, v11);
4892   }
4893 }
4894 
TEST(neon_fcmeq_h_scalar)4895 TEST(neon_fcmeq_h_scalar) {
4896   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4897                       CPUFeatures::kFP,
4898                       CPUFeatures::kNEONHalf,
4899                       CPUFeatures::kFPHalf);
4900 
4901   START();
4902 
4903   __ Fmov(h0, Float16(0.0));
4904   __ Fmov(h1, RawbitsToFloat16(0xffff));
4905   __ Fmov(h2, Float16(-1.0));
4906   __ Fmov(h3, Float16(1.0));
4907   __ Fcmeq(h4, h0, h0);
4908   __ Fcmeq(h5, h1, h0);
4909   __ Fcmeq(h6, h2, h0);
4910   __ Fcmeq(h7, h3, h0);
4911 
4912   END();
4913 
4914   if (CAN_RUN()) {
4915     RUN();
4916 
4917     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
4918     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
4919     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
4920     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h7);
4921   }
4922 }
4923 
TEST(neon_fcmge_h)4924 TEST(neon_fcmge_h) {
4925   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4926                       CPUFeatures::kFP,
4927                       CPUFeatures::kNEONHalf);
4928 
4929   START();
4930 
4931   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
4932   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
4933   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
4934   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
4935 
4936   __ Fcmge(v4.V8H(), v0.V8H(), v0.V8H());
4937   __ Fcmge(v5.V8H(), v1.V8H(), v0.V8H());
4938   __ Fcmge(v6.V8H(), v2.V8H(), v0.V8H());
4939   __ Fcmge(v7.V8H(), v3.V8H(), v0.V8H());
4940   __ Fcmge(v8.V4H(), v0.V4H(), v0.V4H());
4941   __ Fcmge(v9.V4H(), v1.V4H(), v0.V4H());
4942   __ Fcmge(v10.V4H(), v2.V4H(), v0.V4H());
4943   __ Fcmge(v11.V4H(), v3.V4H(), v0.V4H());
4944 
4945   END();
4946 
4947   if (CAN_RUN()) {
4948     RUN();
4949 
4950     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
4951     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
4952     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
4953     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
4954     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
4955     ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
4956     ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
4957     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
4958   }
4959 }
4960 
TEST(neon_fcmge_h_scalar)4961 TEST(neon_fcmge_h_scalar) {
4962   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4963                       CPUFeatures::kFP,
4964                       CPUFeatures::kNEONHalf,
4965                       CPUFeatures::kFPHalf);
4966 
4967   START();
4968 
4969   __ Fmov(h0, Float16(0.0));
4970   __ Fmov(h1, RawbitsToFloat16(0xffff));
4971   __ Fmov(h2, Float16(-1.0));
4972   __ Fmov(h3, Float16(1.0));
4973   __ Fcmge(h4, h0, h0);
4974   __ Fcmge(h5, h1, h0);
4975   __ Fcmge(h6, h2, h0);
4976   __ Fcmge(h7, h3, h0);
4977 
4978   END();
4979 
4980   if (CAN_RUN()) {
4981     RUN();
4982 
4983     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
4984     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
4985     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
4986     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
4987   }
4988 }
4989 
TEST(neon_fcmgt_h)4990 TEST(neon_fcmgt_h) {
4991   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4992                       CPUFeatures::kFP,
4993                       CPUFeatures::kNEONHalf);
4994 
4995   START();
4996 
4997   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
4998   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
4999   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
5000   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
5001 
5002   __ Fcmgt(v4.V8H(), v0.V8H(), v0.V8H());
5003   __ Fcmgt(v5.V8H(), v1.V8H(), v0.V8H());
5004   __ Fcmgt(v6.V8H(), v2.V8H(), v0.V8H());
5005   __ Fcmgt(v7.V8H(), v3.V8H(), v0.V8H());
5006   __ Fcmgt(v8.V4H(), v0.V4H(), v0.V4H());
5007   __ Fcmgt(v9.V4H(), v1.V4H(), v0.V4H());
5008   __ Fcmgt(v10.V4H(), v2.V4H(), v0.V4H());
5009   __ Fcmgt(v11.V4H(), v3.V4H(), v0.V4H());
5010 
5011   END();
5012 
5013   if (CAN_RUN()) {
5014     RUN();
5015 
5016     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v4);
5017     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
5018     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
5019     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
5020     ASSERT_EQUAL_128(0, 0x0000000000000000, v8);
5021     ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
5022     ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
5023     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5024   }
5025 }
5026 
TEST(neon_fcmgt_h_scalar)5027 TEST(neon_fcmgt_h_scalar) {
5028   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5029                       CPUFeatures::kFP,
5030                       CPUFeatures::kNEONHalf,
5031                       CPUFeatures::kFPHalf);
5032 
5033   START();
5034 
5035   __ Fmov(h0, Float16(0.0));
5036   __ Fmov(h1, RawbitsToFloat16(0xffff));
5037   __ Fmov(h2, Float16(-1.0));
5038   __ Fmov(h3, Float16(1.0));
5039   __ Fcmgt(h4, h0, h0);
5040   __ Fcmgt(h5, h1, h0);
5041   __ Fcmgt(h6, h2, h0);
5042   __ Fcmgt(h7, h3, h0);
5043 
5044   END();
5045 
5046   if (CAN_RUN()) {
5047     RUN();
5048 
5049     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h4);
5050     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5051     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
5052     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5053   }
5054 }
5055 
TEST(neon_facge_h)5056 TEST(neon_facge_h) {
5057   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5058                       CPUFeatures::kFP,
5059                       CPUFeatures::kNEONHalf);
5060 
5061   START();
5062 
5063   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
5064   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
5065   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
5066   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
5067 
5068   __ Facge(v4.V8H(), v0.V8H(), v0.V8H());
5069   __ Facge(v5.V8H(), v1.V8H(), v0.V8H());
5070   __ Facge(v6.V8H(), v2.V8H(), v0.V8H());
5071   __ Facge(v7.V8H(), v3.V8H(), v0.V8H());
5072   __ Facge(v8.V4H(), v0.V4H(), v0.V4H());
5073   __ Facge(v9.V4H(), v1.V4H(), v0.V4H());
5074   __ Facge(v10.V4H(), v2.V4H(), v0.V4H());
5075   __ Facge(v11.V4H(), v3.V4H(), v0.V4H());
5076 
5077   END();
5078 
5079   if (CAN_RUN()) {
5080     RUN();
5081 
5082     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
5083     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
5084     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v6);
5085     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
5086     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
5087     ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
5088     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v10);
5089     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5090   }
5091 }
5092 
TEST(neon_facge_h_scalar)5093 TEST(neon_facge_h_scalar) {
5094   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5095                       CPUFeatures::kFP,
5096                       CPUFeatures::kNEONHalf,
5097                       CPUFeatures::kFPHalf);
5098 
5099   START();
5100 
5101   __ Fmov(h0, Float16(0.0));
5102   __ Fmov(h1, RawbitsToFloat16(0xffff));
5103   __ Fmov(h2, Float16(-1.0));
5104   __ Fmov(h3, Float16(1.0));
5105   __ Facge(h4, h0, h0);
5106   __ Facge(h5, h1, h0);
5107   __ Facge(h6, h2, h0);
5108   __ Facge(h7, h3, h0);
5109 
5110   END();
5111 
5112   if (CAN_RUN()) {
5113     RUN();
5114 
5115     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
5116     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5117     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h6);
5118     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5119   }
5120 }
5121 
TEST(neon_facgt_h)5122 TEST(neon_facgt_h) {
5123   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5124                       CPUFeatures::kFP,
5125                       CPUFeatures::kNEONHalf);
5126 
5127   START();
5128 
5129   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
5130   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
5131   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
5132   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
5133 
5134   __ Facgt(v4.V8H(), v0.V8H(), v0.V8H());
5135   __ Facgt(v5.V8H(), v1.V8H(), v0.V8H());
5136   __ Facgt(v6.V8H(), v2.V8H(), v0.V8H());
5137   __ Facgt(v7.V8H(), v3.V8H(), v0.V8H());
5138   __ Facgt(v8.V4H(), v0.V4H(), v0.V4H());
5139   __ Facgt(v9.V4H(), v1.V4H(), v0.V4H());
5140   __ Facgt(v10.V4H(), v2.V4H(), v0.V4H());
5141   __ Facgt(v11.V4H(), v3.V4H(), v0.V4H());
5142 
5143   END();
5144 
5145   if (CAN_RUN()) {
5146     RUN();
5147 
5148     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v4);
5149     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
5150     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v6);
5151     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
5152     ASSERT_EQUAL_128(0, 0x0000000000000000, v8);
5153     ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
5154     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v10);
5155     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5156   }
5157 }
5158 
TEST(neon_facgt_h_scalar)5159 TEST(neon_facgt_h_scalar) {
5160   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5161                       CPUFeatures::kFP,
5162                       CPUFeatures::kNEONHalf,
5163                       CPUFeatures::kFPHalf);
5164 
5165   START();
5166 
5167   __ Fmov(h0, Float16(0.0));
5168   __ Fmov(h1, RawbitsToFloat16(0xffff));
5169   __ Fmov(h2, Float16(-1.0));
5170   __ Fmov(h3, Float16(1.0));
5171   __ Facgt(h4, h0, h0);
5172   __ Facgt(h5, h1, h0);
5173   __ Facgt(h6, h2, h0);
5174   __ Facgt(h7, h3, h0);
5175 
5176   END();
5177 
5178   if (CAN_RUN()) {
5179     RUN();
5180 
5181     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h4);
5182     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5183     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h6);
5184     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5185   }
5186 }
5187 
TEST(neon_2regmisc_fcmeq)5188 TEST(neon_2regmisc_fcmeq) {
5189   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5190 
5191   START();
5192 
5193   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
5194   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
5195   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
5196   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
5197 
5198   __ Fcmeq(s16, s0, 0.0);
5199   __ Fcmeq(s17, s1, 0.0);
5200   __ Fcmeq(s18, s2, 0.0);
5201   __ Fcmeq(d19, d0, 0.0);
5202   __ Fcmeq(d20, d1, 0.0);
5203   __ Fcmeq(d21, d2, 0.0);
5204   __ Fcmeq(v22.V2S(), v0.V2S(), 0.0);
5205   __ Fcmeq(v23.V4S(), v1.V4S(), 0.0);
5206   __ Fcmeq(v24.V2D(), v1.V2D(), 0.0);
5207   __ Fcmeq(v25.V2D(), v2.V2D(), 0.0);
5208 
5209   END();
5210 
5211   if (CAN_RUN()) {
5212     RUN();
5213     ASSERT_EQUAL_128(0, 0xffffffff, q16);
5214     ASSERT_EQUAL_128(0, 0x00000000, q17);
5215     ASSERT_EQUAL_128(0, 0x00000000, q18);
5216     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5217     ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5218     ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
5219     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5220     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5221     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5222     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
5223   }
5224 }
5225 
TEST(neon_2regmisc_fcmge)5226 TEST(neon_2regmisc_fcmge) {
5227   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5228 
5229   START();
5230 
5231   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
5232   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
5233   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
5234   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
5235 
5236   __ Fcmge(s16, s0, 0.0);
5237   __ Fcmge(s17, s1, 0.0);
5238   __ Fcmge(s18, s2, 0.0);
5239   __ Fcmge(d19, d0, 0.0);
5240   __ Fcmge(d20, d1, 0.0);
5241   __ Fcmge(d21, d3, 0.0);
5242   __ Fcmge(v22.V2S(), v0.V2S(), 0.0);
5243   __ Fcmge(v23.V4S(), v1.V4S(), 0.0);
5244   __ Fcmge(v24.V2D(), v1.V2D(), 0.0);
5245   __ Fcmge(v25.V2D(), v3.V2D(), 0.0);
5246 
5247   END();
5248 
5249   if (CAN_RUN()) {
5250     RUN();
5251     ASSERT_EQUAL_128(0, 0xffffffff, q16);
5252     ASSERT_EQUAL_128(0, 0x00000000, q17);
5253     ASSERT_EQUAL_128(0, 0x00000000, q18);
5254     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5255     ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5256     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5257     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5258     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5259     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5260     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5261   }
5262 }
5263 
5264 
TEST(neon_2regmisc_fcmgt)5265 TEST(neon_2regmisc_fcmgt) {
5266   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5267 
5268   START();
5269 
5270   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
5271   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
5272   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
5273   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
5274 
5275   __ Fcmgt(s16, s0, 0.0);
5276   __ Fcmgt(s17, s1, 0.0);
5277   __ Fcmgt(s18, s2, 0.0);
5278   __ Fcmgt(d19, d0, 0.0);
5279   __ Fcmgt(d20, d1, 0.0);
5280   __ Fcmgt(d21, d3, 0.0);
5281   __ Fcmgt(v22.V2S(), v0.V2S(), 0.0);
5282   __ Fcmgt(v23.V4S(), v1.V4S(), 0.0);
5283   __ Fcmgt(v24.V2D(), v1.V2D(), 0.0);
5284   __ Fcmgt(v25.V2D(), v3.V2D(), 0.0);
5285 
5286   END();
5287 
5288   if (CAN_RUN()) {
5289     RUN();
5290     ASSERT_EQUAL_128(0, 0x00000000, q16);
5291     ASSERT_EQUAL_128(0, 0x00000000, q17);
5292     ASSERT_EQUAL_128(0, 0x00000000, q18);
5293     ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
5294     ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5295     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5296     ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
5297     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5298     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5299     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5300   }
5301 }
5302 
TEST(neon_2regmisc_fcmle)5303 TEST(neon_2regmisc_fcmle) {
5304   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5305 
5306   START();
5307 
5308   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
5309   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
5310   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
5311   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
5312 
5313   __ Fcmle(s16, s0, 0.0);
5314   __ Fcmle(s17, s1, 0.0);
5315   __ Fcmle(s18, s3, 0.0);
5316   __ Fcmle(d19, d0, 0.0);
5317   __ Fcmle(d20, d1, 0.0);
5318   __ Fcmle(d21, d2, 0.0);
5319   __ Fcmle(v22.V2S(), v0.V2S(), 0.0);
5320   __ Fcmle(v23.V4S(), v1.V4S(), 0.0);
5321   __ Fcmle(v24.V2D(), v1.V2D(), 0.0);
5322   __ Fcmle(v25.V2D(), v2.V2D(), 0.0);
5323 
5324   END();
5325 
5326   if (CAN_RUN()) {
5327     RUN();
5328     ASSERT_EQUAL_128(0, 0xffffffff, q16);
5329     ASSERT_EQUAL_128(0, 0x00000000, q17);
5330     ASSERT_EQUAL_128(0, 0x00000000, q18);
5331     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5332     ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5333     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5334     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5335     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5336     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5337     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5338   }
5339 }
5340 
5341 
TEST(neon_2regmisc_fcmlt)5342 TEST(neon_2regmisc_fcmlt) {
5343   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5344 
5345   START();
5346 
5347   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
5348   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
5349   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
5350   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
5351 
5352   __ Fcmlt(s16, s0, 0.0);
5353   __ Fcmlt(s17, s1, 0.0);
5354   __ Fcmlt(s18, s3, 0.0);
5355   __ Fcmlt(d19, d0, 0.0);
5356   __ Fcmlt(d20, d1, 0.0);
5357   __ Fcmlt(d21, d2, 0.0);
5358   __ Fcmlt(v22.V2S(), v0.V2S(), 0.0);
5359   __ Fcmlt(v23.V4S(), v1.V4S(), 0.0);
5360   __ Fcmlt(v24.V2D(), v1.V2D(), 0.0);
5361   __ Fcmlt(v25.V2D(), v2.V2D(), 0.0);
5362 
5363   END();
5364 
5365   if (CAN_RUN()) {
5366     RUN();
5367     ASSERT_EQUAL_128(0, 0x00000000, q16);
5368     ASSERT_EQUAL_128(0, 0x00000000, q17);
5369     ASSERT_EQUAL_128(0, 0x00000000, q18);
5370     ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
5371     ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5372     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5373     ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
5374     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5375     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5376     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5377   }
5378 }
5379 
TEST(neon_2regmisc_cmeq)5380 TEST(neon_2regmisc_cmeq) {
5381   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5382 
5383   START();
5384 
5385   __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5386   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5387 
5388   __ Cmeq(v16.V8B(), v1.V8B(), 0);
5389   __ Cmeq(v17.V16B(), v1.V16B(), 0);
5390   __ Cmeq(v18.V4H(), v1.V4H(), 0);
5391   __ Cmeq(v19.V8H(), v1.V8H(), 0);
5392   __ Cmeq(v20.V2S(), v0.V2S(), 0);
5393   __ Cmeq(v21.V4S(), v0.V4S(), 0);
5394   __ Cmeq(d22, d0, 0);
5395   __ Cmeq(d23, d1, 0);
5396   __ Cmeq(v24.V2D(), v0.V2D(), 0);
5397 
5398   END();
5399 
5400   if (CAN_RUN()) {
5401     RUN();
5402     ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000000ff00, q16);
5403     ASSERT_EQUAL_128(0xffff0000000000ff, 0xffff00000000ff00, q17);
5404     ASSERT_EQUAL_128(0x0000000000000000, 0xffff000000000000, q18);
5405     ASSERT_EQUAL_128(0xffff000000000000, 0xffff000000000000, q19);
5406     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
5407     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q21);
5408     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5409     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5410     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5411   }
5412 }
5413 
5414 
TEST(neon_2regmisc_cmge)5415 TEST(neon_2regmisc_cmge) {
5416   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5417 
5418   START();
5419 
5420   __ Movi(v0.V2D(), 0xff01000200030004, 0x0000000000000000);
5421   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5422 
5423   __ Cmge(v16.V8B(), v1.V8B(), 0);
5424   __ Cmge(v17.V16B(), v1.V16B(), 0);
5425   __ Cmge(v18.V4H(), v1.V4H(), 0);
5426   __ Cmge(v19.V8H(), v1.V8H(), 0);
5427   __ Cmge(v20.V2S(), v0.V2S(), 0);
5428   __ Cmge(v21.V4S(), v0.V4S(), 0);
5429   __ Cmge(d22, d0, 0);
5430   __ Cmge(d23, d1, 0);
5431   __ Cmge(v24.V2D(), v0.V2D(), 0);
5432 
5433   END();
5434 
5435   if (CAN_RUN()) {
5436     RUN();
5437     ASSERT_EQUAL_128(0x0000000000000000, 0xffff00ffffffff00, q16);
5438     ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xffff00ffffffff00, q17);
5439     ASSERT_EQUAL_128(0x0000000000000000, 0xffff0000ffffffff, q18);
5440     ASSERT_EQUAL_128(0xffffffff00000000, 0xffff0000ffffffff, q19);
5441     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
5442     ASSERT_EQUAL_128(0x00000000ffffffff, 0xffffffffffffffff, q21);
5443     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5444     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
5445     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5446   }
5447 }
5448 
5449 
TEST(neon_2regmisc_cmlt)5450 TEST(neon_2regmisc_cmlt) {
5451   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5452 
5453   START();
5454 
5455   __ Movi(v0.V2D(), 0x0001000200030004, 0xff00000000000000);
5456   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5457 
5458   __ Cmlt(v16.V8B(), v1.V8B(), 0);
5459   __ Cmlt(v17.V16B(), v1.V16B(), 0);
5460   __ Cmlt(v18.V4H(), v1.V4H(), 0);
5461   __ Cmlt(v19.V8H(), v1.V8H(), 0);
5462   __ Cmlt(v20.V2S(), v1.V2S(), 0);
5463   __ Cmlt(v21.V4S(), v1.V4S(), 0);
5464   __ Cmlt(d22, d0, 0);
5465   __ Cmlt(d23, d1, 0);
5466   __ Cmlt(v24.V2D(), v0.V2D(), 0);
5467 
5468   END();
5469 
5470   if (CAN_RUN()) {
5471     RUN();
5472     ASSERT_EQUAL_128(0x0000000000000000, 0x0000ff00000000ff, q16);
5473     ASSERT_EQUAL_128(0x000000ffff00ff00, 0x0000ff00000000ff, q17);
5474     ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff00000000, q18);
5475     ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000ffff00000000, q19);
5476     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5477     ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
5478     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5479     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5480     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5481   }
5482 }
5483 
5484 
TEST(neon_2regmisc_cmle)5485 TEST(neon_2regmisc_cmle) {
5486   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5487 
5488   START();
5489 
5490   __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5491   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5492 
5493   __ Cmle(v16.V8B(), v1.V8B(), 0);
5494   __ Cmle(v17.V16B(), v1.V16B(), 0);
5495   __ Cmle(v18.V4H(), v1.V4H(), 0);
5496   __ Cmle(v19.V8H(), v1.V8H(), 0);
5497   __ Cmle(v20.V2S(), v1.V2S(), 0);
5498   __ Cmle(v21.V4S(), v1.V4S(), 0);
5499   __ Cmle(d22, d0, 0);
5500   __ Cmle(d23, d1, 0);
5501   __ Cmle(v24.V2D(), v0.V2D(), 0);
5502 
5503   END();
5504 
5505   if (CAN_RUN()) {
5506     RUN();
5507     ASSERT_EQUAL_128(0x0000000000000000, 0xffffff000000ffff, q16);
5508     ASSERT_EQUAL_128(0xffff00ffff00ffff, 0xffffff000000ffff, q17);
5509     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff00000000, q18);
5510     ASSERT_EQUAL_128(0xffff0000ffffffff, 0xffffffff00000000, q19);
5511     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5512     ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
5513     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5514     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5515     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5516   }
5517 }
5518 
5519 
TEST(neon_2regmisc_cmgt)5520 TEST(neon_2regmisc_cmgt) {
5521   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5522 
5523   START();
5524 
5525   __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5526   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5527 
5528   __ Cmgt(v16.V8B(), v1.V8B(), 0);
5529   __ Cmgt(v17.V16B(), v1.V16B(), 0);
5530   __ Cmgt(v18.V4H(), v1.V4H(), 0);
5531   __ Cmgt(v19.V8H(), v1.V8H(), 0);
5532   __ Cmgt(v20.V2S(), v0.V2S(), 0);
5533   __ Cmgt(v21.V4S(), v0.V4S(), 0);
5534   __ Cmgt(d22, d0, 0);
5535   __ Cmgt(d23, d1, 0);
5536   __ Cmgt(v24.V2D(), v0.V2D(), 0);
5537 
5538   END();
5539 
5540   if (CAN_RUN()) {
5541     RUN();
5542     ASSERT_EQUAL_128(0x0000000000000000, 0x000000ffffff0000, q16);
5543     ASSERT_EQUAL_128(0x0000ff0000ff0000, 0x000000ffffff0000, q17);
5544     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
5545     ASSERT_EQUAL_128(0x0000ffff00000000, 0x00000000ffffffff, q19);
5546     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5547     ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q21);
5548     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
5549     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
5550     ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q24);
5551   }
5552 }
5553 
5554 
TEST(neon_2regmisc_neg)5555 TEST(neon_2regmisc_neg) {
5556   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5557 
5558   START();
5559 
5560   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5561   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5562   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5563   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5564   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5565 
5566   __ Neg(v16.V8B(), v0.V8B());
5567   __ Neg(v17.V16B(), v0.V16B());
5568   __ Neg(v18.V4H(), v1.V4H());
5569   __ Neg(v19.V8H(), v1.V8H());
5570   __ Neg(v20.V2S(), v2.V2S());
5571   __ Neg(v21.V4S(), v2.V4S());
5572   __ Neg(d22, d3);
5573   __ Neg(v23.V2D(), v3.V2D());
5574   __ Neg(v24.V2D(), v4.V2D());
5575 
5576   END();
5577 
5578   if (CAN_RUN()) {
5579     RUN();
5580     ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100ff81807f, q16);
5581     ASSERT_EQUAL_128(0x81ff00017f8081ff, 0x807f0100ff81807f, q17);
5582     ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
5583     ASSERT_EQUAL_128(0x80007fff00010000, 0x00010000ffff8001, q19);
5584     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
5585     ASSERT_EQUAL_128(0x8000000000000001, 0x0000000080000001, q21);
5586     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000001, q22);
5587     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q23);
5588     ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
5589   }
5590 }
5591 
5592 
TEST(neon_2regmisc_sqneg)5593 TEST(neon_2regmisc_sqneg) {
5594   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5595 
5596   START();
5597 
5598   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5599   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5600   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5601   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5602   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5603 
5604   __ Sqneg(v16.V8B(), v0.V8B());
5605   __ Sqneg(v17.V16B(), v0.V16B());
5606   __ Sqneg(v18.V4H(), v1.V4H());
5607   __ Sqneg(v19.V8H(), v1.V8H());
5608   __ Sqneg(v20.V2S(), v2.V2S());
5609   __ Sqneg(v21.V4S(), v2.V4S());
5610   __ Sqneg(v22.V2D(), v3.V2D());
5611   __ Sqneg(v23.V2D(), v4.V2D());
5612 
5613   __ Sqneg(b24, b0);
5614   __ Sqneg(h25, h1);
5615   __ Sqneg(s26, s2);
5616   __ Sqneg(d27, d3);
5617 
5618   END();
5619 
5620   if (CAN_RUN()) {
5621     RUN();
5622     ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100ff817f7f, q16);
5623     ASSERT_EQUAL_128(0x81ff00017f7f81ff, 0x7f7f0100ff817f7f, q17);
5624     ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
5625     ASSERT_EQUAL_128(0x7fff7fff00010000, 0x00010000ffff8001, q19);
5626     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
5627     ASSERT_EQUAL_128(0x7fffffff00000001, 0x0000000080000001, q21);
5628     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q22);
5629     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
5630 
5631     ASSERT_EQUAL_128(0, 0x7f, q24);
5632     ASSERT_EQUAL_128(0, 0x8001, q25);
5633     ASSERT_EQUAL_128(0, 0x80000001, q26);
5634     ASSERT_EQUAL_128(0, 0x8000000000000001, q27);
5635   }
5636 }
5637 
5638 
TEST(neon_2regmisc_abs)5639 TEST(neon_2regmisc_abs) {
5640   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5641 
5642   START();
5643 
5644   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5645   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5646   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5647   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5648   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5649 
5650   __ Abs(v16.V8B(), v0.V8B());
5651   __ Abs(v17.V16B(), v0.V16B());
5652   __ Abs(v18.V4H(), v1.V4H());
5653   __ Abs(v19.V8H(), v1.V8H());
5654   __ Abs(v20.V2S(), v2.V2S());
5655   __ Abs(v21.V4S(), v2.V4S());
5656   __ Abs(d22, d3);
5657   __ Abs(v23.V2D(), v3.V2D());
5658   __ Abs(v24.V2D(), v4.V2D());
5659 
5660   END();
5661 
5662   if (CAN_RUN()) {
5663     RUN();
5664     ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100017f807f, q16);
5665     ASSERT_EQUAL_128(0x7f0100017f807f01, 0x807f0100017f807f, q17);
5666     ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
5667     ASSERT_EQUAL_128(0x80007fff00010000, 0x0001000000017fff, q19);
5668     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
5669     ASSERT_EQUAL_128(0x8000000000000001, 0x000000007fffffff, q21);
5670     ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffffffffffff, q22);
5671     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q23);
5672     ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
5673   }
5674 }
5675 
5676 
TEST(neon_2regmisc_sqabs)5677 TEST(neon_2regmisc_sqabs) {
5678   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5679 
5680   START();
5681 
5682   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5683   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5684   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5685   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5686   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5687 
5688   __ Sqabs(v16.V8B(), v0.V8B());
5689   __ Sqabs(v17.V16B(), v0.V16B());
5690   __ Sqabs(v18.V4H(), v1.V4H());
5691   __ Sqabs(v19.V8H(), v1.V8H());
5692   __ Sqabs(v20.V2S(), v2.V2S());
5693   __ Sqabs(v21.V4S(), v2.V4S());
5694   __ Sqabs(v22.V2D(), v3.V2D());
5695   __ Sqabs(v23.V2D(), v4.V2D());
5696 
5697   __ Sqabs(b24, b0);
5698   __ Sqabs(h25, h1);
5699   __ Sqabs(s26, s2);
5700   __ Sqabs(d27, d3);
5701 
5702   END();
5703 
5704   if (CAN_RUN()) {
5705     RUN();
5706     ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100017f7f7f, q16);
5707     ASSERT_EQUAL_128(0x7f0100017f7f7f01, 0x7f7f0100017f7f7f, q17);
5708     ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
5709     ASSERT_EQUAL_128(0x7fff7fff00010000, 0x0001000000017fff, q19);
5710     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
5711     ASSERT_EQUAL_128(0x7fffffff00000001, 0x000000007fffffff, q21);
5712     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q22);
5713     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
5714 
5715     ASSERT_EQUAL_128(0, 0x7f, q24);
5716     ASSERT_EQUAL_128(0, 0x7fff, q25);
5717     ASSERT_EQUAL_128(0, 0x7fffffff, q26);
5718     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
5719   }
5720 }
5721 
TEST(neon_2regmisc_suqadd)5722 TEST(neon_2regmisc_suqadd) {
5723   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5724 
5725   START();
5726 
5727   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5728   __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f0180ff);
5729 
5730   __ Movi(v2.V2D(), 0x80008001ffff0000, 0xffff000000017ffd);
5731   __ Movi(v3.V2D(), 0xffff000080008001, 0x00017fffffff0001);
5732 
5733   __ Movi(v4.V2D(), 0x80000000fffffffe, 0xfffffff17ffffffe);
5734   __ Movi(v5.V2D(), 0xffffffff80000000, 0x7fffffff00000002);
5735 
5736   __ Movi(v6.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5737   __ Movi(v7.V2D(), 0x8000000000000000, 0x8000000000000002);
5738 
5739   __ Mov(v16.V2D(), v0.V2D());
5740   __ Mov(v17.V2D(), v0.V2D());
5741   __ Mov(v18.V2D(), v2.V2D());
5742   __ Mov(v19.V2D(), v2.V2D());
5743   __ Mov(v20.V2D(), v4.V2D());
5744   __ Mov(v21.V2D(), v4.V2D());
5745   __ Mov(v22.V2D(), v6.V2D());
5746 
5747   __ Mov(v23.V2D(), v0.V2D());
5748   __ Mov(v24.V2D(), v2.V2D());
5749   __ Mov(v25.V2D(), v4.V2D());
5750   __ Mov(v26.V2D(), v6.V2D());
5751 
5752   __ Suqadd(v16.V8B(), v1.V8B());
5753   __ Suqadd(v17.V16B(), v1.V16B());
5754   __ Suqadd(v18.V4H(), v3.V4H());
5755   __ Suqadd(v19.V8H(), v3.V8H());
5756   __ Suqadd(v20.V2S(), v5.V2S());
5757   __ Suqadd(v21.V4S(), v5.V4S());
5758   __ Suqadd(v22.V2D(), v7.V2D());
5759 
5760   __ Suqadd(b23, b1);
5761   __ Suqadd(h24, h3);
5762   __ Suqadd(s25, s5);
5763   __ Suqadd(d26, d7);
5764 
5765   END();
5766 
5767   if (CAN_RUN()) {
5768     RUN();
5769     ASSERT_EQUAL_128(0x0000000000000000, 0x81817f7f7f7f007f, q16);
5770     ASSERT_EQUAL_128(0x7f7f7f7f7f807f7f, 0x81817f7f7f7f007f, q17);
5771     ASSERT_EQUAL_128(0x0000000000000000, 0x00007fff7fff7ffe, q18);
5772     ASSERT_EQUAL_128(0x7fff80017fff7fff, 0x00007fff7fff7ffe, q19);
5773     ASSERT_EQUAL_128(0x0000000000000000, 0x7ffffff07fffffff, q20);
5774     ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x7ffffff07fffffff, q21);
5775     ASSERT_EQUAL_128(0x0000000000000001, 0x7fffffffffffffff, q22);
5776 
5777     ASSERT_EQUAL_128(0, 0x7f, q23);
5778     ASSERT_EQUAL_128(0, 0x7ffe, q24);
5779     ASSERT_EQUAL_128(0, 0x7fffffff, q25);
5780     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
5781   }
5782 }
5783 
TEST(neon_2regmisc_usqadd)5784 TEST(neon_2regmisc_usqadd) {
5785   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5786 
5787   START();
5788 
5789   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f7ffe);
5790   __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f018002);
5791 
5792   __ Movi(v2.V2D(), 0x80008001fffe0000, 0xffff000000017ffd);
5793   __ Movi(v3.V2D(), 0xffff000000028001, 0x00017fffffff0001);
5794 
5795   __ Movi(v4.V2D(), 0x80000000fffffffe, 0x00000001fffffffe);
5796   __ Movi(v5.V2D(), 0xffffffff80000000, 0xfffffffe00000002);
5797 
5798   __ Movi(v6.V2D(), 0x8000000000000002, 0x7fffffffffffffff);
5799   __ Movi(v7.V2D(), 0x7fffffffffffffff, 0x8000000000000000);
5800 
5801   __ Mov(v16.V2D(), v0.V2D());
5802   __ Mov(v17.V2D(), v0.V2D());
5803   __ Mov(v18.V2D(), v2.V2D());
5804   __ Mov(v19.V2D(), v2.V2D());
5805   __ Mov(v20.V2D(), v4.V2D());
5806   __ Mov(v21.V2D(), v4.V2D());
5807   __ Mov(v22.V2D(), v6.V2D());
5808 
5809   __ Mov(v23.V2D(), v0.V2D());
5810   __ Mov(v24.V2D(), v2.V2D());
5811   __ Mov(v25.V2D(), v4.V2D());
5812   __ Mov(v26.V2D(), v6.V2D());
5813 
5814   __ Usqadd(v16.V8B(), v1.V8B());
5815   __ Usqadd(v17.V16B(), v1.V16B());
5816   __ Usqadd(v18.V4H(), v3.V4H());
5817   __ Usqadd(v19.V8H(), v3.V8H());
5818   __ Usqadd(v20.V2S(), v5.V2S());
5819   __ Usqadd(v21.V4S(), v5.V4S());
5820   __ Usqadd(v22.V2D(), v7.V2D());
5821 
5822   __ Usqadd(b23, b1);
5823   __ Usqadd(h24, h3);
5824   __ Usqadd(s25, s5);
5825   __ Usqadd(d26, d7);
5826 
5827   END();
5828 
5829   if (CAN_RUN()) {
5830     RUN();
5831     ASSERT_EQUAL_128(0x0000000000000000, 0x81817f00808000ff, q16);
5832     ASSERT_EQUAL_128(0x8080008080808080, 0x81817f00808000ff, q17);
5833     ASSERT_EQUAL_128(0x0000000000000000, 0xffff7fff00007ffe, q18);
5834     ASSERT_EQUAL_128(0x7fff8001ffff0000, 0xffff7fff00007ffe, q19);
5835     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q20);
5836     ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x00000000ffffffff, q21);
5837     ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q22);
5838 
5839     ASSERT_EQUAL_128(0, 0xff, q23);
5840     ASSERT_EQUAL_128(0, 0x7ffe, q24);
5841     ASSERT_EQUAL_128(0, 0xffffffff, q25);
5842     ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
5843   }
5844 }
5845 
TEST(neon_2regmisc_xtn)5846 TEST(neon_2regmisc_xtn) {
5847   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5848 
5849   START();
5850 
5851   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5852   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5853   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5854   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5855   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5856 
5857   __ Xtn(v16.V8B(), v0.V8H());
5858   __ Xtn2(v16.V16B(), v1.V8H());
5859   __ Xtn(v17.V4H(), v1.V4S());
5860   __ Xtn2(v17.V8H(), v2.V4S());
5861   __ Xtn(v18.V2S(), v3.V2D());
5862   __ Xtn2(v18.V4S(), v4.V2D());
5863 
5864   END();
5865 
5866   if (CAN_RUN()) {
5867     RUN();
5868     ASSERT_EQUAL_128(0x0001ff00ff0001ff, 0x01ff800181007f81, q16);
5869     ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8001000000007fff, q17);
5870     ASSERT_EQUAL_128(0x0000000000000000, 0x00000001ffffffff, q18);
5871   }
5872 }
5873 
5874 
TEST(neon_2regmisc_sqxtn)5875 TEST(neon_2regmisc_sqxtn) {
5876   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5877 
5878   START();
5879 
5880   __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5881   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5882   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5883   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5884   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5885 
5886   __ Sqxtn(v16.V8B(), v0.V8H());
5887   __ Sqxtn2(v16.V16B(), v1.V8H());
5888   __ Sqxtn(v17.V4H(), v1.V4S());
5889   __ Sqxtn2(v17.V8H(), v2.V4S());
5890   __ Sqxtn(v18.V2S(), v3.V2D());
5891   __ Sqxtn2(v18.V4S(), v4.V2D());
5892   __ Sqxtn(b19, h0);
5893   __ Sqxtn(h20, s0);
5894   __ Sqxtn(s21, d0);
5895 
5896   END();
5897 
5898   if (CAN_RUN()) {
5899     RUN();
5900     ASSERT_EQUAL_128(0x8080ff00ff00017f, 0x7f7a807f80807f80, q16);
5901     ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000800080007fff, q17);
5902     ASSERT_EQUAL_128(0x8000000000000000, 0x800000007fffffff, q18);
5903     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
5904     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000007fff, q20);
5905     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
5906   }
5907 }
5908 
5909 
TEST(neon_2regmisc_uqxtn)5910 TEST(neon_2regmisc_uqxtn) {
5911   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5912 
5913   START();
5914 
5915   __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5916   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5917   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5918   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5919   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5920 
5921   __ Uqxtn(v16.V8B(), v0.V8H());
5922   __ Uqxtn2(v16.V16B(), v1.V8H());
5923   __ Uqxtn(v17.V4H(), v1.V4S());
5924   __ Uqxtn2(v17.V8H(), v2.V4S());
5925   __ Uqxtn(v18.V2S(), v3.V2D());
5926   __ Uqxtn2(v18.V4S(), v4.V2D());
5927   __ Uqxtn(b19, h0);
5928   __ Uqxtn(h20, s0);
5929   __ Uqxtn(s21, d0);
5930 
5931   END();
5932 
5933   if (CAN_RUN()) {
5934     RUN();
5935     ASSERT_EQUAL_128(0xffffff00ff0001ff, 0xff7affffffffffff, q16);
5936     ASSERT_EQUAL_128(0xffffffff0000ffff, 0xffffffffffffffff, q17);
5937     ASSERT_EQUAL_128(0xffffffff00000000, 0xffffffffffffffff, q18);
5938     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000ff, q19);
5939     ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
5940     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q21);
5941   }
5942 }
5943 
5944 
TEST(neon_2regmisc_sqxtun)5945 TEST(neon_2regmisc_sqxtun) {
5946   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5947 
5948   START();
5949 
5950   __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5951   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5952   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5953   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5954   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5955 
5956   __ Sqxtun(v16.V8B(), v0.V8H());
5957   __ Sqxtun2(v16.V16B(), v1.V8H());
5958   __ Sqxtun(v17.V4H(), v1.V4S());
5959   __ Sqxtun2(v17.V8H(), v2.V4S());
5960   __ Sqxtun(v18.V2S(), v3.V2D());
5961   __ Sqxtun2(v18.V4S(), v4.V2D());
5962   __ Sqxtun(b19, h0);
5963   __ Sqxtun(h20, s0);
5964   __ Sqxtun(s21, d0);
5965 
5966   END();
5967 
5968   if (CAN_RUN()) {
5969     RUN();
5970     ASSERT_EQUAL_128(0x00000000000001ff, 0xff7a00ff0000ff00, q16);
5971     ASSERT_EQUAL_128(0x000000000000ffff, 0x000000000000ffff, q17);
5972     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
5973     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
5974     ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
5975     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q21);
5976   }
5977 }
5978 
TEST(neon_2regmisc_xtn_regression_test)5979 TEST(neon_2regmisc_xtn_regression_test) {
5980   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5981 
5982   START();
5983 
5984   __ Movi(v0.V2D(), 0x5555555555555555, 0x5555555555555555);
5985   __ Movi(v1.V2D(), 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
5986   __ Movi(v2.V2D(), 0x5555555555555555, 0x5555555555555555);
5987   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
5988   __ Movi(v4.V2D(), 0x5555555555555555, 0x5555555555555555);
5989   __ Movi(v5.V2D(), 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
5990   __ Movi(v6.V2D(), 0x5555555555555555, 0x5555555555555555);
5991   __ Movi(v7.V2D(), 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
5992 
5993   __ Xtn(v0.V2S(), v0.V2D());
5994   __ Xtn2(v1.V4S(), v1.V2D());
5995   __ Sqxtn(v2.V2S(), v2.V2D());
5996   __ Sqxtn2(v3.V4S(), v3.V2D());
5997   __ Uqxtn(v4.V2S(), v4.V2D());
5998   __ Uqxtn2(v5.V4S(), v5.V2D());
5999   __ Sqxtun(v6.V2S(), v6.V2D());
6000   __ Sqxtun2(v7.V4S(), v7.V2D());
6001 
6002   END();
6003 
6004   if (CAN_RUN()) {
6005     RUN();
6006     ASSERT_EQUAL_128(0x0000000000000000, 0x5555555555555555, q0);
6007     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa, q1);
6008     ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffff7fffffff, q2);
6009     ASSERT_EQUAL_128(0x8000000080000000, 0xaaaaaaaaaaaaaaaa, q3);
6010     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q4);
6011     ASSERT_EQUAL_128(0xffffffffffffffff, 0xaaaaaaaaaaaaaaaa, q5);
6012     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q6);
6013     ASSERT_EQUAL_128(0x0000000000000000, 0xaaaaaaaaaaaaaaaa, q7);
6014   }
6015 }
6016 
TEST(neon_3same_and)6017 TEST(neon_3same_and) {
6018   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6019 
6020   START();
6021 
6022   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6023   __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6024 
6025   __ And(v16.V16B(), v0.V16B(), v0.V16B());  // self test
6026   __ And(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
6027   __ And(v24.V8B(), v0.V8B(), v0.V8B());     // self test
6028   __ And(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
6029   END();
6030 
6031   if (CAN_RUN()) {
6032     RUN();
6033     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
6034     ASSERT_EQUAL_128(0x0000000000555500, 0xaa00aa00005500aa, q17);
6035     ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
6036     ASSERT_EQUAL_128(0, 0xaa00aa00005500aa, q25);
6037   }
6038 }
6039 
TEST(neon_3same_bic)6040 TEST(neon_3same_bic) {
6041   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6042 
6043   START();
6044 
6045   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6046   __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
6047 
6048   __ Bic(v16.V16B(), v0.V16B(), v0.V16B());  // self test
6049   __ Bic(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
6050   __ Bic(v24.V8B(), v0.V8B(), v0.V8B());     // self test
6051   __ Bic(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
6052   END();
6053 
6054   if (CAN_RUN()) {
6055     RUN();
6056     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
6057     ASSERT_EQUAL_128(0xff00005500aa5500, 0x0000aa0000005500, q17);
6058     ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
6059     ASSERT_EQUAL_128(0, 0x0000aa0000005500, q25);
6060   }
6061 }
6062 
TEST(neon_3same_orr)6063 TEST(neon_3same_orr) {
6064   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6065 
6066   START();
6067 
6068   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6069   __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6070 
6071   __ Orr(v16.V16B(), v0.V16B(), v0.V16B());  // self test
6072   __ Orr(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
6073   __ Orr(v24.V8B(), v0.V8B(), v0.V8B());     // self test
6074   __ Orr(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
6075   END();
6076 
6077   if (CAN_RUN()) {
6078     RUN();
6079     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
6080     ASSERT_EQUAL_128(0xffaaffffffffffaa, 0xff55ff5555ff55ff, q17);
6081     ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
6082     ASSERT_EQUAL_128(0, 0xff55ff5555ff55ff, q25);
6083   }
6084 }
6085 
TEST(neon_3same_mov)6086 TEST(neon_3same_mov) {
6087   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6088 
6089   START();
6090 
6091   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6092 
6093   __ Mov(v16.V16B(), v0.V16B());
6094   __ Mov(v17.V8H(), v0.V8H());
6095   __ Mov(v18.V4S(), v0.V4S());
6096   __ Mov(v19.V2D(), v0.V2D());
6097 
6098   __ Mov(v24.V8B(), v0.V8B());
6099   __ Mov(v25.V4H(), v0.V4H());
6100   __ Mov(v26.V2S(), v0.V2S());
6101   END();
6102 
6103   if (CAN_RUN()) {
6104     RUN();
6105 
6106     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
6107     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
6108     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q18);
6109     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q19);
6110 
6111     ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q24);
6112     ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q25);
6113     ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q26);
6114   }
6115 }
6116 
TEST(neon_3same_orn)6117 TEST(neon_3same_orn) {
6118   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6119 
6120   START();
6121 
6122   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6123   __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6124 
6125   __ Orn(v16.V16B(), v0.V16B(), v0.V16B());  // self test
6126   __ Orn(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
6127   __ Orn(v24.V8B(), v0.V8B(), v0.V8B());     // self test
6128   __ Orn(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
6129   END();
6130 
6131   if (CAN_RUN()) {
6132     RUN();
6133     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
6134     ASSERT_EQUAL_128(0xff55aa5500ff55ff, 0xffaaaaffaaffffaa, q17);
6135     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q24);
6136     ASSERT_EQUAL_128(0, 0xffaaaaffaaffffaa, q25);
6137   }
6138 }
6139 
TEST(neon_3same_eor)6140 TEST(neon_3same_eor) {
6141   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6142 
6143   START();
6144 
6145   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6146   __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
6147 
6148   __ Eor(v16.V16B(), v0.V16B(), v0.V16B());  // self test
6149   __ Eor(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
6150   __ Eor(v24.V8B(), v0.V8B(), v0.V8B());     // self test
6151   __ Eor(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
6152   END();
6153 
6154   if (CAN_RUN()) {
6155     RUN();
6156     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
6157     ASSERT_EQUAL_128(0xffff0055aaaaff55, 0x00ffaa0000005555, q17);
6158     ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
6159     ASSERT_EQUAL_128(0, 0x00ffaa0000005555, q25);
6160   }
6161 }
6162 
TEST(neon_3same_bif)6163 TEST(neon_3same_bif) {
6164   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6165 
6166   START();
6167 
6168   __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6169   __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6170   __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6171 
6172   __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6173   __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6174   __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6175 
6176   __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6177   __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6178   __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6179 
6180   __ Bif(v16.V16B(), v0.V16B(), v1.V16B());
6181   __ Bif(v17.V16B(), v2.V16B(), v3.V16B());
6182   __ Bif(v18.V8B(), v4.V8B(), v5.V8B());
6183   END();
6184 
6185   if (CAN_RUN()) {
6186     RUN();
6187 
6188     ASSERT_EQUAL_128(0xffffff00ff0055ff, 0xffaa0055aa00aaaa, q16);
6189     ASSERT_EQUAL_128(0x5555ffffffcccc00, 0xff333300fff0f000, q17);
6190     ASSERT_EQUAL_128(0, 0xf0f0f0f0f00f0ff0, q18);
6191   }
6192 }
6193 
TEST(neon_3same_bit)6194 TEST(neon_3same_bit) {
6195   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6196 
6197   START();
6198 
6199   __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6200   __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6201   __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6202 
6203   __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6204   __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6205   __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6206 
6207   __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6208   __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6209   __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6210 
6211   __ Bit(v16.V16B(), v0.V16B(), v1.V16B());
6212   __ Bit(v17.V16B(), v2.V16B(), v3.V16B());
6213   __ Bit(v18.V8B(), v4.V8B(), v5.V8B());
6214   END();
6215 
6216   if (CAN_RUN()) {
6217     RUN();
6218 
6219     ASSERT_EQUAL_128(0xff000000ff00ff55, 0xaaff550000aaaaaa, q16);
6220     ASSERT_EQUAL_128(0x55550000cc00ffcc, 0x3300ff33f000fff0, q17);
6221     ASSERT_EQUAL_128(0, 0xf0f0f0f00ff0f00f, q18);
6222   }
6223 }
6224 
TEST(neon_3same_bsl)6225 TEST(neon_3same_bsl) {
6226   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6227 
6228   START();
6229 
6230   __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6231   __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6232   __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6233 
6234   __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6235   __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6236   __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6237 
6238   __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6239   __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6240   __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6241 
6242   __ Bsl(v16.V16B(), v0.V16B(), v1.V16B());
6243   __ Bsl(v17.V16B(), v2.V16B(), v3.V16B());
6244   __ Bsl(v18.V8B(), v4.V8B(), v5.V8B());
6245   END();
6246 
6247   if (CAN_RUN()) {
6248     RUN();
6249 
6250     ASSERT_EQUAL_128(0xff0000ffff005555, 0xaaaa55aa55aaffaa, q16);
6251     ASSERT_EQUAL_128(0xff550000cc33ff00, 0x33ccff00f00fff00, q17);
6252     ASSERT_EQUAL_128(0, 0xf0fffff000f0f000, q18);
6253   }
6254 }
6255 
6256 
TEST(neon_3same_smax)6257 TEST(neon_3same_smax) {
6258   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6259 
6260   START();
6261 
6262   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6263   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6264 
6265   __ Smax(v16.V8B(), v0.V8B(), v1.V8B());
6266   __ Smax(v18.V4H(), v0.V4H(), v1.V4H());
6267   __ Smax(v20.V2S(), v0.V2S(), v1.V2S());
6268 
6269   __ Smax(v17.V16B(), v0.V16B(), v1.V16B());
6270   __ Smax(v19.V8H(), v0.V8H(), v1.V8H());
6271   __ Smax(v21.V4S(), v0.V4S(), v1.V4S());
6272   END();
6273 
6274   if (CAN_RUN()) {
6275     RUN();
6276 
6277     ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
6278     ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
6279     ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6280     ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
6281     ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
6282     ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
6283   }
6284 }
6285 
6286 
TEST(neon_3same_smaxp)6287 TEST(neon_3same_smaxp) {
6288   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6289 
6290   START();
6291 
6292   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6293   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6294 
6295   __ Smaxp(v16.V8B(), v0.V8B(), v1.V8B());
6296   __ Smaxp(v18.V4H(), v0.V4H(), v1.V4H());
6297   __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
6298 
6299   __ Smaxp(v17.V16B(), v0.V16B(), v1.V16B());
6300   __ Smaxp(v19.V8H(), v0.V8H(), v1.V8H());
6301   __ Smaxp(v21.V4S(), v0.V4S(), v1.V4S());
6302   END();
6303 
6304   if (CAN_RUN()) {
6305     RUN();
6306 
6307     ASSERT_EQUAL_128(0x0, 0x0000ff55ffff0055, q16);
6308     ASSERT_EQUAL_128(0x0, 0x000055ffffff0000, q18);
6309     ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6310     ASSERT_EQUAL_128(0x5555aaaa0000ff55, 0xaaaa5555ffff0055, q17);
6311     ASSERT_EQUAL_128(0x55aaaaaa000055ff, 0xaaaa5555ffff0000, q19);
6312     ASSERT_EQUAL_128(0x55aa555500000000, 0x555555550000aa55, q21);
6313   }
6314 }
6315 
6316 
TEST(neon_addp_scalar)6317 TEST(neon_addp_scalar) {
6318   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6319 
6320   START();
6321 
6322   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6323   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6324   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6325 
6326   __ Addp(d16, v0.V2D());
6327   __ Addp(d17, v1.V2D());
6328   __ Addp(d18, v2.V2D());
6329 
6330   END();
6331 
6332   if (CAN_RUN()) {
6333     RUN();
6334 
6335     ASSERT_EQUAL_128(0x0, 0x00224466ef66fa80, q16);
6336     ASSERT_EQUAL_128(0x0, 0x55aa5556aa5500a9, q17);
6337     ASSERT_EQUAL_128(0x0, 0xaaaaaaa96655ff55, q18);
6338   }
6339 }
6340 
TEST(neon_acrosslanes_addv)6341 TEST(neon_acrosslanes_addv) {
6342   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6343 
6344   START();
6345 
6346   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6347   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6348   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6349 
6350   __ Addv(b16, v0.V8B());
6351   __ Addv(b17, v0.V16B());
6352   __ Addv(h18, v1.V4H());
6353   __ Addv(h19, v1.V8H());
6354   __ Addv(s20, v2.V4S());
6355 
6356   END();
6357 
6358   if (CAN_RUN()) {
6359     RUN();
6360 
6361     ASSERT_EQUAL_128(0x0, 0xc7, q16);
6362     ASSERT_EQUAL_128(0x0, 0x99, q17);
6363     ASSERT_EQUAL_128(0x0, 0x55a9, q18);
6364     ASSERT_EQUAL_128(0x0, 0x55fc, q19);
6365     ASSERT_EQUAL_128(0x0, 0x1100a9fe, q20);
6366   }
6367 }
6368 
6369 
TEST(neon_acrosslanes_saddlv)6370 TEST(neon_acrosslanes_saddlv) {
6371   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6372 
6373   START();
6374 
6375   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6376   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6377   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6378 
6379   __ Saddlv(h16, v0.V8B());
6380   __ Saddlv(h17, v0.V16B());
6381   __ Saddlv(s18, v1.V4H());
6382   __ Saddlv(s19, v1.V8H());
6383   __ Saddlv(d20, v2.V4S());
6384 
6385   END();
6386 
6387   if (CAN_RUN()) {
6388     RUN();
6389 
6390     ASSERT_EQUAL_128(0x0, 0xffc7, q16);
6391     ASSERT_EQUAL_128(0x0, 0xff99, q17);
6392     ASSERT_EQUAL_128(0x0, 0x000055a9, q18);
6393     ASSERT_EQUAL_128(0x0, 0x000055fc, q19);
6394     ASSERT_EQUAL_128(0x0, 0x0000001100a9fe, q20);
6395   }
6396 }
6397 
6398 
TEST(neon_acrosslanes_uaddlv)6399 TEST(neon_acrosslanes_uaddlv) {
6400   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6401 
6402   START();
6403 
6404   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6405   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6406   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6407 
6408   __ Uaddlv(h16, v0.V8B());
6409   __ Uaddlv(h17, v0.V16B());
6410   __ Uaddlv(s18, v1.V4H());
6411   __ Uaddlv(s19, v1.V8H());
6412   __ Uaddlv(d20, v2.V4S());
6413 
6414   END();
6415 
6416   if (CAN_RUN()) {
6417     RUN();
6418 
6419     ASSERT_EQUAL_128(0x0, 0x02c7, q16);
6420     ASSERT_EQUAL_128(0x0, 0x0599, q17);
6421     ASSERT_EQUAL_128(0x0, 0x000155a9, q18);
6422     ASSERT_EQUAL_128(0x0, 0x000355fc, q19);
6423     ASSERT_EQUAL_128(0x0, 0x000000021100a9fe, q20);
6424   }
6425 }
6426 
6427 
TEST(neon_acrosslanes_smaxv)6428 TEST(neon_acrosslanes_smaxv) {
6429   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6430 
6431   START();
6432 
6433   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6434   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6435   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6436 
6437   __ Smaxv(b16, v0.V8B());
6438   __ Smaxv(b17, v0.V16B());
6439   __ Smaxv(h18, v1.V4H());
6440   __ Smaxv(h19, v1.V8H());
6441   __ Smaxv(s20, v2.V4S());
6442 
6443   END();
6444 
6445   if (CAN_RUN()) {
6446     RUN();
6447 
6448     ASSERT_EQUAL_128(0x0, 0x33, q16);
6449     ASSERT_EQUAL_128(0x0, 0x44, q17);
6450     ASSERT_EQUAL_128(0x0, 0x55ff, q18);
6451     ASSERT_EQUAL_128(0x0, 0x55ff, q19);
6452     ASSERT_EQUAL_128(0x0, 0x66555555, q20);
6453   }
6454 }
6455 
6456 
TEST(neon_acrosslanes_sminv)6457 TEST(neon_acrosslanes_sminv) {
6458   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6459 
6460   START();
6461 
6462   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6463   __ Movi(v1.V2D(), 0xfffa5555aaaaaaaa, 0x00000000ffaa55ff);
6464   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6465 
6466   __ Sminv(b16, v0.V8B());
6467   __ Sminv(b17, v0.V16B());
6468   __ Sminv(h18, v1.V4H());
6469   __ Sminv(h19, v1.V8H());
6470   __ Sminv(s20, v2.V4S());
6471 
6472   END();
6473 
6474   if (CAN_RUN()) {
6475     RUN();
6476 
6477     ASSERT_EQUAL_128(0x0, 0xaa, q16);
6478     ASSERT_EQUAL_128(0x0, 0x80, q17);
6479     ASSERT_EQUAL_128(0x0, 0xffaa, q18);
6480     ASSERT_EQUAL_128(0x0, 0xaaaa, q19);
6481     ASSERT_EQUAL_128(0x0, 0xaaaaaaaa, q20);
6482   }
6483 }
6484 
TEST(neon_acrosslanes_umaxv)6485 TEST(neon_acrosslanes_umaxv) {
6486   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6487 
6488   START();
6489 
6490   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6491   __ Movi(v1.V2D(), 0x55aa5555aaaaffab, 0x00000000ffaa55ff);
6492   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6493 
6494   __ Umaxv(b16, v0.V8B());
6495   __ Umaxv(b17, v0.V16B());
6496   __ Umaxv(h18, v1.V4H());
6497   __ Umaxv(h19, v1.V8H());
6498   __ Umaxv(s20, v2.V4S());
6499 
6500   END();
6501 
6502   if (CAN_RUN()) {
6503     RUN();
6504 
6505     ASSERT_EQUAL_128(0x0, 0xfc, q16);
6506     ASSERT_EQUAL_128(0x0, 0xfe, q17);
6507     ASSERT_EQUAL_128(0x0, 0xffaa, q18);
6508     ASSERT_EQUAL_128(0x0, 0xffab, q19);
6509     ASSERT_EQUAL_128(0x0, 0xffffffff, q20);
6510   }
6511 }
6512 
6513 
TEST(neon_acrosslanes_uminv)6514 TEST(neon_acrosslanes_uminv) {
6515   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6516 
6517   START();
6518 
6519   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x02112233aabbfc01);
6520   __ Movi(v1.V2D(), 0xfffa5555aaaa0000, 0x00010003ffaa55ff);
6521   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6522 
6523   __ Uminv(b16, v0.V8B());
6524   __ Uminv(b17, v0.V16B());
6525   __ Uminv(h18, v1.V4H());
6526   __ Uminv(h19, v1.V8H());
6527   __ Uminv(s20, v2.V4S());
6528 
6529   END();
6530 
6531   if (CAN_RUN()) {
6532     RUN();
6533 
6534     ASSERT_EQUAL_128(0x0, 0x01, q16);
6535     ASSERT_EQUAL_128(0x0, 0x00, q17);
6536     ASSERT_EQUAL_128(0x0, 0x0001, q18);
6537     ASSERT_EQUAL_128(0x0, 0x0000, q19);
6538     ASSERT_EQUAL_128(0x0, 0x0000aa00, q20);
6539   }
6540 }
6541 
6542 
TEST(neon_3same_smin)6543 TEST(neon_3same_smin) {
6544   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6545 
6546   START();
6547 
6548   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6549   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6550 
6551   __ Smin(v16.V8B(), v0.V8B(), v1.V8B());
6552   __ Smin(v18.V4H(), v0.V4H(), v1.V4H());
6553   __ Smin(v20.V2S(), v0.V2S(), v1.V2S());
6554 
6555   __ Smin(v17.V16B(), v0.V16B(), v1.V16B());
6556   __ Smin(v19.V8H(), v0.V8H(), v1.V8H());
6557   __ Smin(v21.V4S(), v0.V4S(), v1.V4S());
6558   END();
6559 
6560   if (CAN_RUN()) {
6561     RUN();
6562 
6563     ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
6564     ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
6565     ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
6566     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
6567     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
6568     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
6569   }
6570 }
6571 
6572 
TEST(neon_3same_umax)6573 TEST(neon_3same_umax) {
6574   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6575 
6576   START();
6577 
6578   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6579   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6580 
6581   __ Umax(v16.V8B(), v0.V8B(), v1.V8B());
6582   __ Umax(v18.V4H(), v0.V4H(), v1.V4H());
6583   __ Umax(v20.V2S(), v0.V2S(), v1.V2S());
6584 
6585   __ Umax(v17.V16B(), v0.V16B(), v1.V16B());
6586   __ Umax(v19.V8H(), v0.V8H(), v1.V8H());
6587   __ Umax(v21.V4S(), v0.V4S(), v1.V4S());
6588   END();
6589 
6590   if (CAN_RUN()) {
6591     RUN();
6592 
6593     ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
6594     ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
6595     ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
6596     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
6597     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
6598     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
6599   }
6600 }
6601 
6602 
TEST(neon_3same_umin)6603 TEST(neon_3same_umin) {
6604   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6605 
6606   START();
6607 
6608   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6609   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6610 
6611   __ Umin(v16.V8B(), v0.V8B(), v1.V8B());
6612   __ Umin(v18.V4H(), v0.V4H(), v1.V4H());
6613   __ Umin(v20.V2S(), v0.V2S(), v1.V2S());
6614 
6615   __ Umin(v17.V16B(), v0.V16B(), v1.V16B());
6616   __ Umin(v19.V8H(), v0.V8H(), v1.V8H());
6617   __ Umin(v21.V4S(), v0.V4S(), v1.V4S());
6618   END();
6619 
6620   if (CAN_RUN()) {
6621     RUN();
6622 
6623     ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
6624     ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
6625     ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6626     ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
6627     ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
6628     ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
6629   }
6630 }
6631 
6632 
TEST(neon_3same_extra_fcadd)6633 TEST(neon_3same_extra_fcadd) {
6634   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6635 
6636   START();
6637 
6638   // (0i, 5) (d)
6639   __ Movi(v0.V2D(), 0x0, 0x4014000000000000);
6640   // (5i, 0) (d)
6641   __ Movi(v1.V2D(), 0x4014000000000000, 0x0);
6642   // (10i, 10) (d)
6643   __ Movi(v2.V2D(), 0x4024000000000000, 0x4024000000000000);
6644   // (5i, 5), (5i, 5) (f)
6645   __ Movi(v3.V2D(), 0x40A0000040A00000, 0x40A0000040A00000);
6646   // (5i, 5), (0i, 0) (f)
6647   __ Movi(v4.V2D(), 0x40A0000040A00000, 0x0);
6648   // 324567i, 16000 (f)
6649   __ Movi(v5.V2D(), 0x0, 0x489E7AE0467A0000);
6650 
6651   // Subtraction (10, 10) - (5, 5) == (5, 5)
6652   __ Fcadd(v31.V2D(), v2.V2D(), v1.V2D(), 90);
6653   __ Fcadd(v31.V2D(), v31.V2D(), v0.V2D(), 270);
6654 
6655   // Addition (10, 10) + (5, 5) == (15, 15)
6656   __ Fcadd(v30.V2D(), v2.V2D(), v1.V2D(), 270);
6657   __ Fcadd(v30.V2D(), v30.V2D(), v0.V2D(), 90);
6658 
6659   // 2S
6660   __ Fcadd(v29.V2S(), v4.V2S(), v5.V2S(), 90);
6661   __ Fcadd(v28.V2S(), v4.V2S(), v5.V2S(), 270);
6662 
6663   // 4S
6664   __ Fcadd(v27.V4S(), v3.V4S(), v4.V4S(), 90);
6665   __ Fcadd(v26.V4S(), v3.V4S(), v4.V4S(), 270);
6666 
6667   END();
6668 
6669   if (CAN_RUN()) {
6670     RUN();
6671     ASSERT_EQUAL_128(0x4014000000000000, 0x4014000000000000, q31);
6672     ASSERT_EQUAL_128(0x402E000000000000, 0x402E000000000000, q30);
6673     ASSERT_EQUAL_128(0x0, 0x467a0000c89e7ae0, q29);  // (16000i, -324567)
6674     ASSERT_EQUAL_128(0x0, 0xc67a0000489e7ae0, q28);  // (-16000i, 324567)
6675     ASSERT_EQUAL_128(0x4120000000000000, 0x40A0000040A00000, q27);
6676     ASSERT_EQUAL_128(0x0000000041200000, 0x40A0000040A00000, q26);
6677   }
6678 }
6679 
6680 
TEST(neon_3same_extra_fcmla)6681 TEST(neon_3same_extra_fcmla) {
6682   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6683 
6684   START();
6685 
6686   __ Movi(v1.V2D(), 0x0, 0x40A0000040400000);  // (5i, 3) (f)
6687   __ Movi(v2.V2D(), 0x0, 0x4040000040A00000);  // (3i, 5) (f)
6688 
6689   __ Movi(v3.V2D(), 0x0, 0x4000000040400000);  // (2i, 3) (f)
6690   __ Movi(v4.V2D(), 0x0, 0x40E000003F800000);  // (7i, 1) (f)
6691 
6692   __ Movi(v5.V2D(), 0x0, 0x4000000040400000);  // (2i, 3) (f)
6693   __ Movi(v6.V2D(), 0x0, 0x408000003F800000);  // (4i, 1) (f)
6694 
6695   // (1.5i, 2.5), (31.5i, 1024) (f)
6696   __ Movi(v7.V2D(), 0x3FC0000040200000, 0x41FC000044800000);
6697   // (2048i, 412.75), (3645i, 0) (f)
6698   __ Movi(v8.V2D(), 0x4500000043CE6000, 0x4563D00000000000);
6699   // (2000i, 450,000) (d)
6700   __ Movi(v9.V2D(), 0x409F400000000000, 0x411B774000000000);
6701   // (30,000i, 1250) (d)
6702   __ Movi(v10.V2D(), 0x40DD4C0000000000, 0x4093880000000000);
6703 
6704   // DST
6705   __ Movi(v24.V2D(), 0x0, 0x0);
6706   __ Movi(v25.V2D(), 0x0, 0x0);
6707   __ Movi(v26.V2D(), 0x0, 0x0);
6708   __ Movi(v27.V2D(), 0x0, 0x0);
6709   __ Movi(v28.V2D(), 0x0, 0x0);
6710   __ Movi(v29.V2D(), 0x0, 0x0);
6711   __ Movi(v30.V2D(), 0x0, 0x0);
6712   __ Movi(v31.V2D(), 0x0, 0x0);
6713 
6714   // Full calculations
6715   __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 90);
6716   __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 0);
6717 
6718   __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 0);
6719   __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 90);
6720 
6721   __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 90);
6722   __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 0);
6723 
6724   __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 0);
6725   __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 90);
6726 
6727   // Partial checks
6728   __ Fcmla(v27.V2S(), v1.V2S(), v2.V2S(), 0);
6729   __ Fcmla(v26.V2S(), v2.V2S(), v1.V2S(), 0);
6730 
6731   __ Fcmla(v25.V4S(), v7.V4S(), v8.V4S(), 270);
6732   __ Fcmla(v24.V4S(), v7.V4S(), v8.V4S(), 180);
6733 
6734   END();
6735 
6736   if (CAN_RUN()) {
6737     RUN();
6738 
6739     ASSERT_EQUAL_128(0x0, 0x4208000000000000, q31);  // (34i, 0)
6740     ASSERT_EQUAL_128(0x0, 0x41B80000C1300000, q30);  // (23i, -11)
6741     ASSERT_EQUAL_128(0x0, 0x41600000C0A00000, q29);  // (14i, -5)
6742 
6743     // (13502500000i, 502500000)
6744     ASSERT_EQUAL_128(0x4209267E65000000, 0x41BDF38AA0000000, q28);
6745     ASSERT_EQUAL_128(0x0, 0x4110000041700000, q27);  //  (9i, 15)
6746     ASSERT_EQUAL_128(0x0, 0x41C8000041700000, q26);  // (25i, 15)
6747     // (512i, 1.031875E3), (373248i, 0)
6748     ASSERT_EQUAL_128(0xc41ac80045400000, 0x0000000047e040c0, q25);
6749     // (619.125i, -3072), (0i, -114817.5)
6750     ASSERT_EQUAL_128(0xc5a00000c480fc00, 0xca63d00000000000, q24);
6751   }
6752 }
6753 
6754 
TEST(neon_byelement_fcmla)6755 TEST(neon_byelement_fcmla) {
6756   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6757 
6758   START();
6759 
6760   // (5i, 3), (5i, 3) (f)
6761   __ Movi(v1.V2D(), 0x40A0000040400000, 0x40A0000040400000);
6762   // (3i, 5), (3i, 5) (f)
6763   __ Movi(v2.V2D(), 0x4040000040A00000, 0x4040000040A00000);
6764   // (7i, 1), (5i, 3) (f)
6765   __ Movi(v3.V2D(), 0x40E000003F800000, 0x40A0000040400000);
6766   // (4i, 1), (3i, 5) (f)
6767   __ Movi(v4.V2D(), 0x408000003F800000, 0x4040000040A00000);
6768   // (4i, 1), (7i, 1) (f)
6769   __ Movi(v5.V2D(), 0x408000003F800000, 0x40E000003F800000);
6770   // (2i, 3), (0, 0) (f)
6771   __ Movi(v6.V2D(), 0x4000000040400000, 0x0);
6772 
6773   // DST
6774   __ Movi(v22.V2D(), 0x0, 0x0);
6775   __ Movi(v23.V2D(), 0x0, 0x0);
6776   __ Movi(v24.V2D(), 0x0, 0x0);
6777   __ Movi(v25.V2D(), 0x0, 0x0);
6778   __ Movi(v26.V2D(), 0x0, 0x0);
6779   __ Movi(v27.V2D(), 0x0, 0x0);
6780   __ Movi(v28.V2D(), 0x0, 0x0);
6781   __ Movi(v29.V2D(), 0x0, 0x0);
6782   __ Movi(v30.V2D(), 0x0, 0x0);
6783   __ Movi(v31.V2D(), 0x0, 0x0);
6784 
6785   // Full calculation (pairs)
6786   __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 90);
6787   __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 0);
6788   __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 90);
6789   __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 0);
6790 
6791   // Rotations
6792   __ Fcmla(v29.V4S(), v3.V4S(), v4.S(), 1, 0);
6793   __ Fcmla(v28.V4S(), v3.V4S(), v4.S(), 1, 90);
6794   __ Fcmla(v27.V4S(), v3.V4S(), v4.S(), 1, 180);
6795   __ Fcmla(v26.V4S(), v3.V4S(), v4.S(), 1, 270);
6796   __ Fcmla(v25.V4S(), v3.V4S(), v4.S(), 0, 270);
6797   __ Fcmla(v24.V4S(), v3.V4S(), v4.S(), 0, 180);
6798   __ Fcmla(v23.V4S(), v3.V4S(), v4.S(), 0, 90);
6799   __ Fcmla(v22.V4S(), v3.V4S(), v4.S(), 0, 0);
6800 
6801   END();
6802 
6803   if (CAN_RUN()) {
6804     RUN();
6805     // (34i, 0), (34i, 0)
6806     ASSERT_EQUAL_128(0x4208000000000000, 0x4208000000000000, q31);
6807     // (14i, -5), (23i, -11)
6808     ASSERT_EQUAL_128(0x41600000C0A00000, 0x41B80000C1300000, q30);
6809     // (4i, 1), (12i, 3)
6810     ASSERT_EQUAL_128(0x408000003f800000, 0x4140000040400000, q29);
6811     // (7i, -28), (5i, -20)
6812     ASSERT_EQUAL_128(0x40e00000c1e00000, 0x40a00000c1a00000, q28);
6813     // (-4i, -1), (-12i, -3)
6814     ASSERT_EQUAL_128(0xc0800000bf800000, 0xc1400000c0400000, q27);
6815     // (-7i, 28), (-5i, 20)
6816     ASSERT_EQUAL_128(0xc0e0000041e00000, 0xc0a0000041a00000, q26);
6817     // (-35i, 21), (-25i, 15)
6818     ASSERT_EQUAL_128(0xc20c000041a80000, 0xc1c8000041700000, q25);
6819     // (-3i, -5), (-9i, -15)
6820     ASSERT_EQUAL_128(0xc0400000c0a00000, 0xc1100000c1700000, q24);
6821     // (35i, -21), (25i, -15)
6822     ASSERT_EQUAL_128(0x420c0000c1a80000, 0x41c80000c1700000, q23);
6823     // (3i, 5), (9i, 15)
6824     ASSERT_EQUAL_128(0x4040000040a00000, 0x4110000041700000, q22);
6825   }
6826 }
6827 
6828 
TEST(neon_2regmisc_mvn)6829 TEST(neon_2regmisc_mvn) {
6830   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6831 
6832   START();
6833 
6834   __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6835 
6836   __ Mvn(v16.V16B(), v0.V16B());
6837   __ Mvn(v17.V8H(), v0.V8H());
6838   __ Mvn(v18.V4S(), v0.V4S());
6839   __ Mvn(v19.V2D(), v0.V2D());
6840 
6841   __ Mvn(v24.V8B(), v0.V8B());
6842   __ Mvn(v25.V4H(), v0.V4H());
6843   __ Mvn(v26.V2S(), v0.V2S());
6844 
6845   END();
6846 
6847   if (CAN_RUN()) {
6848     RUN();
6849 
6850     ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
6851     ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q17);
6852     ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q18);
6853     ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q19);
6854 
6855     ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q24);
6856     ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q25);
6857     ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q26);
6858   }
6859 }
6860 
6861 
TEST(neon_2regmisc_not)6862 TEST(neon_2regmisc_not) {
6863   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6864 
6865   START();
6866 
6867   __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6868   __ Movi(v1.V2D(), 0, 0x00ffff0000ffff00);
6869 
6870   __ Not(v16.V16B(), v0.V16B());
6871   __ Not(v17.V8B(), v1.V8B());
6872   END();
6873 
6874   if (CAN_RUN()) {
6875     RUN();
6876 
6877     ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
6878     ASSERT_EQUAL_128(0x0, 0xff0000ffff0000ff, q17);
6879   }
6880 }
6881 
6882 
TEST(neon_2regmisc_cls_clz_cnt)6883 TEST(neon_2regmisc_cls_clz_cnt) {
6884   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6885 
6886   START();
6887 
6888   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6889   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6890 
6891   __ Cls(v16.V8B(), v1.V8B());
6892   __ Cls(v17.V16B(), v1.V16B());
6893   __ Cls(v18.V4H(), v1.V4H());
6894   __ Cls(v19.V8H(), v1.V8H());
6895   __ Cls(v20.V2S(), v1.V2S());
6896   __ Cls(v21.V4S(), v1.V4S());
6897 
6898   __ Clz(v22.V8B(), v0.V8B());
6899   __ Clz(v23.V16B(), v0.V16B());
6900   __ Clz(v24.V4H(), v0.V4H());
6901   __ Clz(v25.V8H(), v0.V8H());
6902   __ Clz(v26.V2S(), v0.V2S());
6903   __ Clz(v27.V4S(), v0.V4S());
6904 
6905   __ Cnt(v28.V8B(), v0.V8B());
6906   __ Cnt(v29.V16B(), v1.V16B());
6907 
6908   END();
6909 
6910   if (CAN_RUN()) {
6911     RUN();
6912 
6913     ASSERT_EQUAL_128(0x0000000000000000, 0x0601000000000102, q16);
6914     ASSERT_EQUAL_128(0x0601000000000102, 0x0601000000000102, q17);
6915     ASSERT_EQUAL_128(0x0000000000000000, 0x0006000000000001, q18);
6916     ASSERT_EQUAL_128(0x0006000000000001, 0x0006000000000001, q19);
6917     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000600000000, q20);
6918     ASSERT_EQUAL_128(0x0000000600000000, 0x0000000600000000, q21);
6919 
6920     ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q22);
6921     ASSERT_EQUAL_128(0x0807060605050505, 0x0404040404040404, q23);
6922     ASSERT_EQUAL_128(0x0000000000000000, 0x0004000400040004, q24);
6923     ASSERT_EQUAL_128(0x000f000600050005, 0x0004000400040004, q25);
6924     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000400000004, q26);
6925     ASSERT_EQUAL_128(0x0000000f00000005, 0x0000000400000004, q27);
6926 
6927     ASSERT_EQUAL_128(0x0000000000000000, 0x0102020302030304, q28);
6928     ASSERT_EQUAL_128(0x0705050305030301, 0x0103030503050507, q29);
6929   }
6930 }
6931 
TEST(neon_2regmisc_rev)6932 TEST(neon_2regmisc_rev) {
6933   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6934 
6935   START();
6936 
6937   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6938   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6939 
6940   __ Rev16(v16.V8B(), v0.V8B());
6941   __ Rev16(v17.V16B(), v0.V16B());
6942 
6943   __ Rev32(v18.V8B(), v0.V8B());
6944   __ Rev32(v19.V16B(), v0.V16B());
6945   __ Rev32(v20.V4H(), v0.V4H());
6946   __ Rev32(v21.V8H(), v0.V8H());
6947 
6948   __ Rev64(v22.V8B(), v0.V8B());
6949   __ Rev64(v23.V16B(), v0.V16B());
6950   __ Rev64(v24.V4H(), v0.V4H());
6951   __ Rev64(v25.V8H(), v0.V8H());
6952   __ Rev64(v26.V2S(), v0.V2S());
6953   __ Rev64(v27.V4S(), v0.V4S());
6954 
6955   __ Rbit(v28.V8B(), v1.V8B());
6956   __ Rbit(v29.V16B(), v1.V16B());
6957 
6958   END();
6959 
6960   if (CAN_RUN()) {
6961     RUN();
6962 
6963     ASSERT_EQUAL_128(0x0000000000000000, 0x09080b0a0d0c0f0e, q16);
6964     ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q17);
6965 
6966     ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a09080f0e0d0c, q18);
6967     ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q19);
6968     ASSERT_EQUAL_128(0x0000000000000000, 0x0a0b08090e0f0c0d, q20);
6969     ASSERT_EQUAL_128(0x0203000106070405, 0x0a0b08090e0f0c0d, q21);
6970 
6971     ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0b0a0908, q22);
6972     ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q23);
6973     ASSERT_EQUAL_128(0x0000000000000000, 0x0e0f0c0d0a0b0809, q24);
6974     ASSERT_EQUAL_128(0x0607040502030001, 0x0e0f0c0d0a0b0809, q25);
6975     ASSERT_EQUAL_128(0x0000000000000000, 0x0c0d0e0f08090a0b, q26);
6976     ASSERT_EQUAL_128(0x0405060700010203, 0x0c0d0e0f08090a0b, q27);
6977 
6978     ASSERT_EQUAL_128(0x0000000000000000, 0x80c4a2e691d5b3f7, q28);
6979     ASSERT_EQUAL_128(0x7f3b5d196e2a4c08, 0x80c4a2e691d5b3f7, q29);
6980   }
6981 }
6982 
6983 
TEST(neon_sli)6984 TEST(neon_sli) {
6985   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6986 
6987   START();
6988 
6989   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6990   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6991 
6992   __ Mov(v16.V2D(), v0.V2D());
6993   __ Mov(v17.V2D(), v0.V2D());
6994   __ Mov(v18.V2D(), v0.V2D());
6995   __ Mov(v19.V2D(), v0.V2D());
6996   __ Mov(v20.V2D(), v0.V2D());
6997   __ Mov(v21.V2D(), v0.V2D());
6998   __ Mov(v22.V2D(), v0.V2D());
6999   __ Mov(v23.V2D(), v0.V2D());
7000 
7001   __ Sli(v16.V8B(), v1.V8B(), 4);
7002   __ Sli(v17.V16B(), v1.V16B(), 7);
7003   __ Sli(v18.V4H(), v1.V4H(), 8);
7004   __ Sli(v19.V8H(), v1.V8H(), 15);
7005   __ Sli(v20.V2S(), v1.V2S(), 0);
7006   __ Sli(v21.V4S(), v1.V4S(), 31);
7007   __ Sli(v22.V2D(), v1.V2D(), 48);
7008 
7009   __ Sli(d23, d1, 48);
7010 
7011   END();
7012 
7013   if (CAN_RUN()) {
7014     RUN();
7015 
7016     ASSERT_EQUAL_128(0x0000000000000000, 0x18395a7b9cbddeff, q16);
7017     ASSERT_EQUAL_128(0x0001020304050607, 0x88898a8b8c8d8e8f, q17);
7018     ASSERT_EQUAL_128(0x0000000000000000, 0x2309670bab0def0f, q18);
7019     ASSERT_EQUAL_128(0x0001020304050607, 0x88098a0b8c0d8e0f, q19);
7020     ASSERT_EQUAL_128(0x0000000000000000, 0x0123456789abcdef, q20);
7021     ASSERT_EQUAL_128(0x0001020304050607, 0x88090a0b8c0d0e0f, q21);
7022     ASSERT_EQUAL_128(0x3210020304050607, 0xcdef0a0b0c0d0e0f, q22);
7023 
7024     ASSERT_EQUAL_128(0x0000000000000000, 0xcdef0a0b0c0d0e0f, q23);
7025   }
7026 }
7027 
7028 
TEST(neon_sri)7029 TEST(neon_sri) {
7030   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7031 
7032   START();
7033 
7034   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
7035   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7036 
7037   __ Mov(v16.V2D(), v0.V2D());
7038   __ Mov(v17.V2D(), v0.V2D());
7039   __ Mov(v18.V2D(), v0.V2D());
7040   __ Mov(v19.V2D(), v0.V2D());
7041   __ Mov(v20.V2D(), v0.V2D());
7042   __ Mov(v21.V2D(), v0.V2D());
7043   __ Mov(v22.V2D(), v0.V2D());
7044   __ Mov(v23.V2D(), v0.V2D());
7045 
7046   __ Sri(v16.V8B(), v1.V8B(), 4);
7047   __ Sri(v17.V16B(), v1.V16B(), 7);
7048   __ Sri(v18.V4H(), v1.V4H(), 8);
7049   __ Sri(v19.V8H(), v1.V8H(), 15);
7050   __ Sri(v20.V2S(), v1.V2S(), 1);
7051   __ Sri(v21.V4S(), v1.V4S(), 31);
7052   __ Sri(v22.V2D(), v1.V2D(), 48);
7053 
7054   __ Sri(d23, d1, 48);
7055 
7056   END();
7057 
7058   if (CAN_RUN()) {
7059     RUN();
7060 
7061     ASSERT_EQUAL_128(0x0000000000000000, 0x00020406080a0c0e, q16);
7062     ASSERT_EQUAL_128(0x0101030304040606, 0x08080a0a0d0d0f0f, q17);
7063     ASSERT_EQUAL_128(0x0000000000000000, 0x08010a450c890ecd, q18);
7064     ASSERT_EQUAL_128(0x0001020304040606, 0x08080a0a0c0d0e0f, q19);
7065     ASSERT_EQUAL_128(0x0000000000000000, 0x0091a2b344d5e6f7, q20);
7066     ASSERT_EQUAL_128(0x0001020304050606, 0x08090a0a0c0d0e0f, q21);
7067     ASSERT_EQUAL_128(0x000102030405fedc, 0x08090a0b0c0d0123, q22);
7068 
7069     ASSERT_EQUAL_128(0x0000000000000000, 0x08090a0b0c0d0123, q23);
7070   }
7071 }
7072 
7073 
TEST(neon_shrn)7074 TEST(neon_shrn) {
7075   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7076 
7077   START();
7078 
7079   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7080   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7081   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7082   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7083   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7084 
7085   __ Shrn(v16.V8B(), v0.V8H(), 8);
7086   __ Shrn2(v16.V16B(), v1.V8H(), 1);
7087   __ Shrn(v17.V4H(), v1.V4S(), 16);
7088   __ Shrn2(v17.V8H(), v2.V4S(), 1);
7089   __ Shrn(v18.V2S(), v3.V2D(), 32);
7090   __ Shrn2(v18.V4S(), v3.V2D(), 1);
7091 
7092   END();
7093 
7094   if (CAN_RUN()) {
7095     RUN();
7096     ASSERT_EQUAL_128(0x0000ff00ff0000ff, 0x7f00817f80ff0180, q16);
7097     ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8000ffffffff0001, q17);
7098     ASSERT_EQUAL_128(0x00000000ffffffff, 0x800000007fffffff, q18);
7099   }
7100 }
7101 
7102 
TEST(neon_rshrn)7103 TEST(neon_rshrn) {
7104   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7105 
7106   START();
7107 
7108   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7109   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7110   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7111   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7112   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7113 
7114   __ Rshrn(v16.V8B(), v0.V8H(), 8);
7115   __ Rshrn2(v16.V16B(), v1.V8H(), 1);
7116   __ Rshrn(v17.V4H(), v1.V4S(), 16);
7117   __ Rshrn2(v17.V8H(), v2.V4S(), 1);
7118   __ Rshrn(v18.V2S(), v3.V2D(), 32);
7119   __ Rshrn2(v18.V4S(), v3.V2D(), 1);
7120 
7121   END();
7122 
7123   if (CAN_RUN()) {
7124     RUN();
7125     ASSERT_EQUAL_128(0x0001000000000100, 0x7f01827f81ff0181, q16);
7126     ASSERT_EQUAL_128(0x0000000000000000, 0x8001ffffffff0001, q17);
7127     ASSERT_EQUAL_128(0x0000000100000000, 0x8000000080000000, q18);
7128   }
7129 }
7130 
7131 
TEST(neon_uqshrn)7132 TEST(neon_uqshrn) {
7133   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7134 
7135   START();
7136 
7137   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7138   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7139   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7140   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7141   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7142 
7143   __ Uqshrn(v16.V8B(), v0.V8H(), 8);
7144   __ Uqshrn2(v16.V16B(), v1.V8H(), 1);
7145   __ Uqshrn(v17.V4H(), v1.V4S(), 16);
7146   __ Uqshrn2(v17.V8H(), v2.V4S(), 1);
7147   __ Uqshrn(v18.V2S(), v3.V2D(), 32);
7148   __ Uqshrn2(v18.V4S(), v3.V2D(), 1);
7149 
7150   __ Uqshrn(b19, h0, 8);
7151   __ Uqshrn(h20, s1, 16);
7152   __ Uqshrn(s21, d3, 32);
7153 
7154   END();
7155 
7156   if (CAN_RUN()) {
7157     RUN();
7158     ASSERT_EQUAL_128(0xffffff00ff0000ff, 0x7f00817f80ff0180, q16);
7159     ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8000ffffffff0001, q17);
7160     ASSERT_EQUAL_128(0xffffffffffffffff, 0x800000007fffffff, q18);
7161     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
7162     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7163     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7164   }
7165 }
7166 
7167 
TEST(neon_uqrshrn)7168 TEST(neon_uqrshrn) {
7169   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7170 
7171   START();
7172 
7173   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7174   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7175   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7176   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7177   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7178 
7179   __ Uqrshrn(v16.V8B(), v0.V8H(), 8);
7180   __ Uqrshrn2(v16.V16B(), v1.V8H(), 1);
7181   __ Uqrshrn(v17.V4H(), v1.V4S(), 16);
7182   __ Uqrshrn2(v17.V8H(), v2.V4S(), 1);
7183   __ Uqrshrn(v18.V2S(), v3.V2D(), 32);
7184   __ Uqrshrn2(v18.V4S(), v3.V2D(), 1);
7185 
7186   __ Uqrshrn(b19, h0, 8);
7187   __ Uqrshrn(h20, s1, 16);
7188   __ Uqrshrn(s21, d3, 32);
7189 
7190   END();
7191 
7192   if (CAN_RUN()) {
7193     RUN();
7194     ASSERT_EQUAL_128(0xffffff00ff0001ff, 0x7f01827f81ff0181, q16);
7195     ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8001ffffffff0001, q17);
7196     ASSERT_EQUAL_128(0xffffffffffffffff, 0x8000000080000000, q18);
7197     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
7198     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7199     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
7200   }
7201 }
7202 
7203 
TEST(neon_sqshrn)7204 TEST(neon_sqshrn) {
7205   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7206 
7207   START();
7208 
7209   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7210   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7211   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7212   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7213   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7214 
7215   __ Sqshrn(v16.V8B(), v0.V8H(), 8);
7216   __ Sqshrn2(v16.V16B(), v1.V8H(), 1);
7217   __ Sqshrn(v17.V4H(), v1.V4S(), 16);
7218   __ Sqshrn2(v17.V8H(), v2.V4S(), 1);
7219   __ Sqshrn(v18.V2S(), v3.V2D(), 32);
7220   __ Sqshrn2(v18.V4S(), v3.V2D(), 1);
7221 
7222   __ Sqshrn(b19, h0, 8);
7223   __ Sqshrn(h20, s1, 16);
7224   __ Sqshrn(s21, d3, 32);
7225 
7226   END();
7227 
7228   if (CAN_RUN()) {
7229     RUN();
7230     ASSERT_EQUAL_128(0x8080ff00ff00007f, 0x7f00817f80ff0180, q16);
7231     ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000ffffffff0001, q17);
7232     ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
7233     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
7234     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7235     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7236   }
7237 }
7238 
7239 
TEST(neon_sqrshrn)7240 TEST(neon_sqrshrn) {
7241   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7242 
7243   START();
7244 
7245   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7246   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7247   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7248   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7249   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7250 
7251   __ Sqrshrn(v16.V8B(), v0.V8H(), 8);
7252   __ Sqrshrn2(v16.V16B(), v1.V8H(), 1);
7253   __ Sqrshrn(v17.V4H(), v1.V4S(), 16);
7254   __ Sqrshrn2(v17.V8H(), v2.V4S(), 1);
7255   __ Sqrshrn(v18.V2S(), v3.V2D(), 32);
7256   __ Sqrshrn2(v18.V4S(), v3.V2D(), 1);
7257 
7258   __ Sqrshrn(b19, h0, 8);
7259   __ Sqrshrn(h20, s1, 16);
7260   __ Sqrshrn(s21, d3, 32);
7261 
7262   END();
7263 
7264   if (CAN_RUN()) {
7265     RUN();
7266     ASSERT_EQUAL_128(0x808000000000017f, 0x7f01827f81ff0181, q16);
7267     ASSERT_EQUAL_128(0x8000000000007fff, 0x8001ffffffff0001, q17);
7268     ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
7269     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
7270     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7271     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7272   }
7273 }
7274 
7275 
TEST(neon_sqshrun)7276 TEST(neon_sqshrun) {
7277   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7278 
7279   START();
7280 
7281   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7282   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7283   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7284   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7285   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7286 
7287   __ Sqshrun(v16.V8B(), v0.V8H(), 8);
7288   __ Sqshrun2(v16.V16B(), v1.V8H(), 1);
7289   __ Sqshrun(v17.V4H(), v1.V4S(), 16);
7290   __ Sqshrun2(v17.V8H(), v2.V4S(), 1);
7291   __ Sqshrun(v18.V2S(), v3.V2D(), 32);
7292   __ Sqshrun2(v18.V4S(), v3.V2D(), 1);
7293 
7294   __ Sqshrun(b19, h0, 8);
7295   __ Sqshrun(h20, s1, 16);
7296   __ Sqshrun(s21, d3, 32);
7297 
7298   END();
7299 
7300   if (CAN_RUN()) {
7301     RUN();
7302     ASSERT_EQUAL_128(0x00000000000000ff, 0x7f00007f00000100, q16);
7303     ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
7304     ASSERT_EQUAL_128(0x00000000ffffffff, 0x000000007fffffff, q18);
7305     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
7306     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7307     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7308   }
7309 }
7310 
7311 
TEST(neon_sqrshrun)7312 TEST(neon_sqrshrun) {
7313   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7314 
7315   START();
7316 
7317   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7318   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7319   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7320   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7321   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7322 
7323   __ Sqrshrun(v16.V8B(), v0.V8H(), 8);
7324   __ Sqrshrun2(v16.V16B(), v1.V8H(), 1);
7325   __ Sqrshrun(v17.V4H(), v1.V4S(), 16);
7326   __ Sqrshrun2(v17.V8H(), v2.V4S(), 1);
7327   __ Sqrshrun(v18.V2S(), v3.V2D(), 32);
7328   __ Sqrshrun2(v18.V4S(), v3.V2D(), 1);
7329 
7330   __ Sqrshrun(b19, h0, 8);
7331   __ Sqrshrun(h20, s1, 16);
7332   __ Sqrshrun(s21, d3, 32);
7333 
7334   END();
7335 
7336   if (CAN_RUN()) {
7337     RUN();
7338     ASSERT_EQUAL_128(0x00000000000001ff, 0x7f01007f00000100, q16);
7339     ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
7340     ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000080000000, q18);
7341     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
7342     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7343     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
7344   }
7345 }
7346 
TEST(neon_modimm_bic)7347 TEST(neon_modimm_bic) {
7348   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7349 
7350   START();
7351 
7352   __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7353   __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7354   __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7355   __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7356   __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7357   __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7358   __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7359   __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7360   __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7361   __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7362   __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7363   __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7364 
7365   __ Bic(v16.V4H(), 0x00, 0);
7366   __ Bic(v17.V4H(), 0xff, 8);
7367   __ Bic(v18.V8H(), 0x00, 0);
7368   __ Bic(v19.V8H(), 0xff, 8);
7369 
7370   __ Bic(v20.V2S(), 0x00, 0);
7371   __ Bic(v21.V2S(), 0xff, 8);
7372   __ Bic(v22.V2S(), 0x00, 16);
7373   __ Bic(v23.V2S(), 0xff, 24);
7374 
7375   __ Bic(v24.V4S(), 0xff, 0);
7376   __ Bic(v25.V4S(), 0x00, 8);
7377   __ Bic(v26.V4S(), 0xff, 16);
7378   __ Bic(v27.V4S(), 0x00, 24);
7379 
7380   END();
7381 
7382   if (CAN_RUN()) {
7383     RUN();
7384 
7385     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
7386     ASSERT_EQUAL_128(0x0, 0x005500ff000000aa, q17);
7387     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
7388     ASSERT_EQUAL_128(0x00aa0055000000aa, 0x005500ff000000aa, q19);
7389 
7390     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
7391     ASSERT_EQUAL_128(0x0, 0x555500ff000000aa, q21);
7392     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
7393     ASSERT_EQUAL_128(0x0, 0x0055ffff0000aaaa, q23);
7394 
7395     ASSERT_EQUAL_128(0x00aaff00ff005500, 0x5555ff000000aa00, q24);
7396     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
7397     ASSERT_EQUAL_128(0x0000ff55ff0055aa, 0x5500ffff0000aaaa, q26);
7398     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
7399   }
7400 }
7401 
7402 
TEST(neon_modimm_movi_16bit_any)7403 TEST(neon_modimm_movi_16bit_any) {
7404   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7405 
7406   START();
7407 
7408   __ Movi(v0.V4H(), 0xabab);
7409   __ Movi(v1.V4H(), 0xab00);
7410   __ Movi(v2.V4H(), 0xabff);
7411   __ Movi(v3.V8H(), 0x00ab);
7412   __ Movi(v4.V8H(), 0xffab);
7413   __ Movi(v5.V8H(), 0xabcd);
7414 
7415   END();
7416 
7417   if (CAN_RUN()) {
7418     RUN();
7419 
7420     ASSERT_EQUAL_128(0x0, 0xabababababababab, q0);
7421     ASSERT_EQUAL_128(0x0, 0xab00ab00ab00ab00, q1);
7422     ASSERT_EQUAL_128(0x0, 0xabffabffabffabff, q2);
7423     ASSERT_EQUAL_128(0x00ab00ab00ab00ab, 0x00ab00ab00ab00ab, q3);
7424     ASSERT_EQUAL_128(0xffabffabffabffab, 0xffabffabffabffab, q4);
7425     ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q5);
7426   }
7427 }
7428 
7429 
TEST(neon_modimm_movi_32bit_any)7430 TEST(neon_modimm_movi_32bit_any) {
7431   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7432 
7433   START();
7434 
7435   __ Movi(v0.V2S(), 0x000000ab);
7436   __ Movi(v1.V2S(), 0x0000ab00);
7437   __ Movi(v2.V4S(), 0x00ab0000);
7438   __ Movi(v3.V4S(), 0xab000000);
7439 
7440   __ Movi(v4.V2S(), 0xffffffab);
7441   __ Movi(v5.V2S(), 0xffffabff);
7442   __ Movi(v6.V4S(), 0xffabffff);
7443   __ Movi(v7.V4S(), 0xabffffff);
7444 
7445   __ Movi(v16.V2S(), 0x0000abff);
7446   __ Movi(v17.V2S(), 0x00abffff);
7447   __ Movi(v18.V4S(), 0xffab0000);
7448   __ Movi(v19.V4S(), 0xffffab00);
7449 
7450   __ Movi(v20.V4S(), 0xabababab);
7451   __ Movi(v21.V4S(), 0xabcdabcd);
7452   __ Movi(v22.V4S(), 0xabcdef01);
7453   __ Movi(v23.V4S(), 0x00ffff00);
7454 
7455   END();
7456 
7457   if (CAN_RUN()) {
7458     RUN();
7459 
7460     ASSERT_EQUAL_128(0x0, 0x000000ab000000ab, q0);
7461     ASSERT_EQUAL_128(0x0, 0x0000ab000000ab00, q1);
7462     ASSERT_EQUAL_128(0x00ab000000ab0000, 0x00ab000000ab0000, q2);
7463     ASSERT_EQUAL_128(0xab000000ab000000, 0xab000000ab000000, q3);
7464 
7465     ASSERT_EQUAL_128(0x0, 0xffffffabffffffab, q4);
7466     ASSERT_EQUAL_128(0x0, 0xffffabffffffabff, q5);
7467     ASSERT_EQUAL_128(0xffabffffffabffff, 0xffabffffffabffff, q6);
7468     ASSERT_EQUAL_128(0xabffffffabffffff, 0xabffffffabffffff, q7);
7469 
7470     ASSERT_EQUAL_128(0x0, 0x0000abff0000abff, q16);
7471     ASSERT_EQUAL_128(0x0, 0x00abffff00abffff, q17);
7472     ASSERT_EQUAL_128(0xffab0000ffab0000, 0xffab0000ffab0000, q18);
7473     ASSERT_EQUAL_128(0xffffab00ffffab00, 0xffffab00ffffab00, q19);
7474 
7475     ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q20);
7476     ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q21);
7477     ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q22);
7478     ASSERT_EQUAL_128(0x00ffff0000ffff00, 0x00ffff0000ffff00, q23);
7479   }
7480 }
7481 
7482 
TEST(neon_modimm_movi_64bit_any)7483 TEST(neon_modimm_movi_64bit_any) {
7484   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7485 
7486   START();
7487 
7488   __ Movi(v0.V1D(), 0x00ffff0000ffffff);
7489   __ Movi(v1.V2D(), 0xabababababababab);
7490   __ Movi(v2.V2D(), 0xabcdabcdabcdabcd);
7491   __ Movi(v3.V2D(), 0xabcdef01abcdef01);
7492   __ Movi(v4.V1D(), 0xabcdef0123456789);
7493   __ Movi(v5.V2D(), 0xabcdef0123456789);
7494 
7495   END();
7496 
7497   if (CAN_RUN()) {
7498     RUN();
7499 
7500     ASSERT_EQUAL_64(0x00ffff0000ffffff, d0);
7501     ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q1);
7502     ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q2);
7503     ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q3);
7504     ASSERT_EQUAL_64(0xabcdef0123456789, d4);
7505     ASSERT_EQUAL_128(0xabcdef0123456789, 0xabcdef0123456789, q5);
7506   }
7507 }
7508 
7509 
TEST(neon_modimm_movi)7510 TEST(neon_modimm_movi) {
7511   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7512 
7513   START();
7514 
7515   __ Movi(v0.V8B(), 0xaa);
7516   __ Movi(v1.V16B(), 0x55);
7517 
7518   __ Movi(d2, 0x00ffff0000ffffff);
7519   __ Movi(v3.V2D(), 0x00ffff0000ffffff);
7520 
7521   __ Movi(v16.V4H(), 0x00, LSL, 0);
7522   __ Movi(v17.V4H(), 0xff, LSL, 8);
7523   __ Movi(v18.V8H(), 0x00, LSL, 0);
7524   __ Movi(v19.V8H(), 0xff, LSL, 8);
7525 
7526   __ Movi(v20.V2S(), 0x00, LSL, 0);
7527   __ Movi(v21.V2S(), 0xff, LSL, 8);
7528   __ Movi(v22.V2S(), 0x00, LSL, 16);
7529   __ Movi(v23.V2S(), 0xff, LSL, 24);
7530 
7531   __ Movi(v24.V4S(), 0xff, LSL, 0);
7532   __ Movi(v25.V4S(), 0x00, LSL, 8);
7533   __ Movi(v26.V4S(), 0xff, LSL, 16);
7534   __ Movi(v27.V4S(), 0x00, LSL, 24);
7535 
7536   __ Movi(v28.V2S(), 0xaa, MSL, 8);
7537   __ Movi(v29.V2S(), 0x55, MSL, 16);
7538   __ Movi(v30.V4S(), 0xff, MSL, 8);
7539   __ Movi(v31.V4S(), 0x00, MSL, 16);
7540 
7541   END();
7542 
7543   if (CAN_RUN()) {
7544     RUN();
7545 
7546     ASSERT_EQUAL_128(0x0, 0xaaaaaaaaaaaaaaaa, q0);
7547     ASSERT_EQUAL_128(0x5555555555555555, 0x5555555555555555, q1);
7548 
7549     ASSERT_EQUAL_128(0x0, 0x00ffff0000ffffff, q2);
7550     ASSERT_EQUAL_128(0x00ffff0000ffffff, 0x00ffff0000ffffff, q3);
7551 
7552     ASSERT_EQUAL_128(0x0, 0x0000000000000000, q16);
7553     ASSERT_EQUAL_128(0x0, 0xff00ff00ff00ff00, q17);
7554     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q18);
7555     ASSERT_EQUAL_128(0xff00ff00ff00ff00, 0xff00ff00ff00ff00, q19);
7556 
7557     ASSERT_EQUAL_128(0x0, 0x0000000000000000, q20);
7558     ASSERT_EQUAL_128(0x0, 0x0000ff000000ff00, q21);
7559     ASSERT_EQUAL_128(0x0, 0x0000000000000000, q22);
7560     ASSERT_EQUAL_128(0x0, 0xff000000ff000000, q23);
7561 
7562     ASSERT_EQUAL_128(0x000000ff000000ff, 0x000000ff000000ff, q24);
7563     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
7564     ASSERT_EQUAL_128(0x00ff000000ff0000, 0x00ff000000ff0000, q26);
7565     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
7566 
7567     ASSERT_EQUAL_128(0x0, 0x0000aaff0000aaff, q28);
7568     ASSERT_EQUAL_128(0x0, 0x0055ffff0055ffff, q29);
7569     ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q30);
7570     ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q31);
7571   }
7572 }
7573 
7574 
TEST(neon_modimm_mvni)7575 TEST(neon_modimm_mvni) {
7576   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7577 
7578   START();
7579 
7580   __ Mvni(v16.V4H(), 0x00, LSL, 0);
7581   __ Mvni(v17.V4H(), 0xff, LSL, 8);
7582   __ Mvni(v18.V8H(), 0x00, LSL, 0);
7583   __ Mvni(v19.V8H(), 0xff, LSL, 8);
7584 
7585   __ Mvni(v20.V2S(), 0x00, LSL, 0);
7586   __ Mvni(v21.V2S(), 0xff, LSL, 8);
7587   __ Mvni(v22.V2S(), 0x00, LSL, 16);
7588   __ Mvni(v23.V2S(), 0xff, LSL, 24);
7589 
7590   __ Mvni(v24.V4S(), 0xff, LSL, 0);
7591   __ Mvni(v25.V4S(), 0x00, LSL, 8);
7592   __ Mvni(v26.V4S(), 0xff, LSL, 16);
7593   __ Mvni(v27.V4S(), 0x00, LSL, 24);
7594 
7595   __ Mvni(v28.V2S(), 0xaa, MSL, 8);
7596   __ Mvni(v29.V2S(), 0x55, MSL, 16);
7597   __ Mvni(v30.V4S(), 0xff, MSL, 8);
7598   __ Mvni(v31.V4S(), 0x00, MSL, 16);
7599 
7600   END();
7601 
7602   if (CAN_RUN()) {
7603     RUN();
7604 
7605     ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q16);
7606     ASSERT_EQUAL_128(0x0, 0x00ff00ff00ff00ff, q17);
7607     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
7608     ASSERT_EQUAL_128(0x00ff00ff00ff00ff, 0x00ff00ff00ff00ff, q19);
7609 
7610     ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q20);
7611     ASSERT_EQUAL_128(0x0, 0xffff00ffffff00ff, q21);
7612     ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q22);
7613     ASSERT_EQUAL_128(0x0, 0x00ffffff00ffffff, q23);
7614 
7615     ASSERT_EQUAL_128(0xffffff00ffffff00, 0xffffff00ffffff00, q24);
7616     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
7617     ASSERT_EQUAL_128(0xff00ffffff00ffff, 0xff00ffffff00ffff, q26);
7618     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q27);
7619 
7620     ASSERT_EQUAL_128(0x0, 0xffff5500ffff5500, q28);
7621     ASSERT_EQUAL_128(0x0, 0xffaa0000ffaa0000, q29);
7622     ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q30);
7623     ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q31);
7624   }
7625 }
7626 
7627 
TEST(neon_modimm_orr)7628 TEST(neon_modimm_orr) {
7629   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7630 
7631   START();
7632 
7633   __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7634   __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7635   __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7636   __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7637   __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7638   __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7639   __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7640   __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7641   __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7642   __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7643   __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7644   __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7645 
7646   __ Orr(v16.V4H(), 0x00, 0);
7647   __ Orr(v17.V4H(), 0xff, 8);
7648   __ Orr(v18.V8H(), 0x00, 0);
7649   __ Orr(v19.V8H(), 0xff, 8);
7650 
7651   __ Orr(v20.V2S(), 0x00, 0);
7652   __ Orr(v21.V2S(), 0xff, 8);
7653   __ Orr(v22.V2S(), 0x00, 16);
7654   __ Orr(v23.V2S(), 0xff, 24);
7655 
7656   __ Orr(v24.V4S(), 0xff, 0);
7657   __ Orr(v25.V4S(), 0x00, 8);
7658   __ Orr(v26.V4S(), 0xff, 16);
7659   __ Orr(v27.V4S(), 0x00, 24);
7660 
7661   END();
7662 
7663   if (CAN_RUN()) {
7664     RUN();
7665 
7666     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
7667     ASSERT_EQUAL_128(0x0, 0xff55ffffff00ffaa, q17);
7668     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
7669     ASSERT_EQUAL_128(0xffaaff55ff00ffaa, 0xff55ffffff00ffaa, q19);
7670 
7671     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
7672     ASSERT_EQUAL_128(0x0, 0x5555ffff0000ffaa, q21);
7673     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
7674     ASSERT_EQUAL_128(0x0, 0xff55ffffff00aaaa, q23);
7675 
7676     ASSERT_EQUAL_128(0x00aaffffff0055ff, 0x5555ffff0000aaff, q24);
7677     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
7678     ASSERT_EQUAL_128(0x00ffff55ffff55aa, 0x55ffffff00ffaaaa, q26);
7679     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
7680   }
7681 }
7682 
TEST(ldr_literal_values_q)7683 TEST(ldr_literal_values_q) {
7684   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7685 
7686   static const uint64_t kHalfValues[] = {0x8000000000000000,
7687                                          0x7fffffffffffffff,
7688                                          0x0000000000000000,
7689                                          0xffffffffffffffff,
7690                                          0x00ff00ff00ff00ff,
7691                                          0x1234567890abcdef};
7692   const int card = sizeof(kHalfValues) / sizeof(kHalfValues[0]);
7693   const Register& ref_low64 = x1;
7694   const Register& ref_high64 = x2;
7695   const Register& loaded_low64 = x3;
7696   const Register& loaded_high64 = x4;
7697   const VRegister& tgt = q0;
7698 
7699   START();
7700   __ Mov(x0, 0);
7701 
7702   for (int i = 0; i < card; i++) {
7703     __ Mov(ref_low64, kHalfValues[i]);
7704     for (int j = 0; j < card; j++) {
7705       __ Mov(ref_high64, kHalfValues[j]);
7706       __ Ldr(tgt, kHalfValues[j], kHalfValues[i]);
7707       __ Mov(loaded_low64, tgt.V2D(), 0);
7708       __ Mov(loaded_high64, tgt.V2D(), 1);
7709       __ Cmp(loaded_low64, ref_low64);
7710       __ Ccmp(loaded_high64, ref_high64, NoFlag, eq);
7711       __ Cset(x0, ne);
7712     }
7713   }
7714   END();
7715 
7716   if (CAN_RUN()) {
7717     RUN();
7718 
7719     // If one of the values differs, the trace can be used to identify which
7720     // one.
7721     ASSERT_EQUAL_64(0, x0);
7722   }
7723 }
7724 
TEST(fmov_vec_imm)7725 TEST(fmov_vec_imm) {
7726   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
7727                       CPUFeatures::kFP,
7728                       CPUFeatures::kNEONHalf);
7729 
7730   START();
7731 
7732   __ Fmov(v0.V2S(), 20.0);
7733   __ Fmov(v1.V4S(), 1024.0);
7734 
7735   __ Fmov(v2.V4H(), RawbitsToFloat16(0xC500U));
7736   __ Fmov(v3.V8H(), RawbitsToFloat16(0x4A80U));
7737 
7738   END();
7739   if (CAN_RUN()) {
7740     RUN();
7741 
7742     ASSERT_EQUAL_64(0x41A0000041A00000, d0);
7743     ASSERT_EQUAL_128(0x4480000044800000, 0x4480000044800000, q1);
7744     ASSERT_EQUAL_64(0xC500C500C500C500, d2);
7745     ASSERT_EQUAL_128(0x4A804A804A804A80, 0x4A804A804A804A80, q3);
7746   }
7747 }
7748 
7749 // TODO: add arbitrary values once load literal to Q registers is supported.
TEST(neon_modimm_fmov)7750 TEST(neon_modimm_fmov) {
7751   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
7752 
7753   // Immediates which can be encoded in the instructions.
7754   const float kOne = 1.0f;
7755   const float kPointFive = 0.5f;
7756   const double kMinusThirteen = -13.0;
7757   // Immediates which cannot be encoded in the instructions.
7758   const float kNonImmFP32 = 255.0f;
7759   const double kNonImmFP64 = 12.3456;
7760 
7761   START();
7762   __ Fmov(v11.V2S(), kOne);
7763   __ Fmov(v12.V4S(), kPointFive);
7764   __ Fmov(v22.V2D(), kMinusThirteen);
7765   __ Fmov(v13.V2S(), kNonImmFP32);
7766   __ Fmov(v14.V4S(), kNonImmFP32);
7767   __ Fmov(v23.V2D(), kNonImmFP64);
7768   __ Fmov(v1.V2S(), 0.0);
7769   __ Fmov(v2.V4S(), 0.0);
7770   __ Fmov(v3.V2D(), 0.0);
7771   __ Fmov(v4.V2S(), kFP32PositiveInfinity);
7772   __ Fmov(v5.V4S(), kFP32PositiveInfinity);
7773   __ Fmov(v6.V2D(), kFP64PositiveInfinity);
7774   END();
7775 
7776   if (CAN_RUN()) {
7777     RUN();
7778 
7779     const uint64_t kOne1S = FloatToRawbits(1.0);
7780     const uint64_t kOne2S = (kOne1S << 32) | kOne1S;
7781     const uint64_t kPointFive1S = FloatToRawbits(0.5);
7782     const uint64_t kPointFive2S = (kPointFive1S << 32) | kPointFive1S;
7783     const uint64_t kMinusThirteen1D = DoubleToRawbits(-13.0);
7784     const uint64_t kNonImmFP321S = FloatToRawbits(kNonImmFP32);
7785     const uint64_t kNonImmFP322S = (kNonImmFP321S << 32) | kNonImmFP321S;
7786     const uint64_t kNonImmFP641D = DoubleToRawbits(kNonImmFP64);
7787     const uint64_t kFP32Inf1S = FloatToRawbits(kFP32PositiveInfinity);
7788     const uint64_t kFP32Inf2S = (kFP32Inf1S << 32) | kFP32Inf1S;
7789     const uint64_t kFP64Inf1D = DoubleToRawbits(kFP64PositiveInfinity);
7790 
7791     ASSERT_EQUAL_128(0x0, kOne2S, q11);
7792     ASSERT_EQUAL_128(kPointFive2S, kPointFive2S, q12);
7793     ASSERT_EQUAL_128(kMinusThirteen1D, kMinusThirteen1D, q22);
7794     ASSERT_EQUAL_128(0x0, kNonImmFP322S, q13);
7795     ASSERT_EQUAL_128(kNonImmFP322S, kNonImmFP322S, q14);
7796     ASSERT_EQUAL_128(kNonImmFP641D, kNonImmFP641D, q23);
7797     ASSERT_EQUAL_128(0x0, 0x0, q1);
7798     ASSERT_EQUAL_128(0x0, 0x0, q2);
7799     ASSERT_EQUAL_128(0x0, 0x0, q3);
7800     ASSERT_EQUAL_128(0x0, kFP32Inf2S, q4);
7801     ASSERT_EQUAL_128(kFP32Inf2S, kFP32Inf2S, q5);
7802     ASSERT_EQUAL_128(kFP64Inf1D, kFP64Inf1D, q6);
7803   }
7804 }
7805 
7806 
TEST(neon_perm)7807 TEST(neon_perm) {
7808   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7809 
7810   START();
7811 
7812   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
7813   __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
7814 
7815   __ Trn1(v16.V16B(), v0.V16B(), v1.V16B());
7816   __ Trn2(v17.V16B(), v0.V16B(), v1.V16B());
7817   __ Zip1(v18.V16B(), v0.V16B(), v1.V16B());
7818   __ Zip2(v19.V16B(), v0.V16B(), v1.V16B());
7819   __ Uzp1(v20.V16B(), v0.V16B(), v1.V16B());
7820   __ Uzp2(v21.V16B(), v0.V16B(), v1.V16B());
7821 
7822   END();
7823 
7824   if (CAN_RUN()) {
7825     RUN();
7826 
7827     ASSERT_EQUAL_128(0x1101130315051707, 0x19091b0b1d0d1f0f, q16);
7828     ASSERT_EQUAL_128(0x1000120214041606, 0x18081a0a1c0c1e0e, q17);
7829     ASSERT_EQUAL_128(0x180819091a0a1b0b, 0x1c0c1d0d1e0e1f0f, q18);
7830     ASSERT_EQUAL_128(0x1000110112021303, 0x1404150516061707, q19);
7831     ASSERT_EQUAL_128(0x11131517191b1d1f, 0x01030507090b0d0f, q20);
7832     ASSERT_EQUAL_128(0x10121416181a1c1e, 0x00020406080a0c0e, q21);
7833   }
7834 }
7835 
7836 
TEST(neon_copy_dup_element)7837 TEST(neon_copy_dup_element) {
7838   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7839 
7840   START();
7841 
7842   __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7843   __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7844   __ Movi(v2.V2D(), 0xffeddccbbaae9988, 0x0011223344556677);
7845   __ Movi(v3.V2D(), 0x7766554433221100, 0x8899aabbccddeeff);
7846   __ Movi(v4.V2D(), 0x7766554433221100, 0x0123456789abcdef);
7847   __ Movi(v5.V2D(), 0x0011223344556677, 0x0123456789abcdef);
7848 
7849   __ Dup(v16.V16B(), v0.B(), 0);
7850   __ Dup(v17.V8H(), v1.H(), 7);
7851   __ Dup(v18.V4S(), v1.S(), 3);
7852   __ Dup(v19.V2D(), v0.D(), 0);
7853 
7854   __ Dup(v20.V8B(), v0.B(), 0);
7855   __ Dup(v21.V4H(), v1.H(), 7);
7856   __ Dup(v22.V2S(), v1.S(), 3);
7857 
7858   __ Dup(v23.B(), v0.B(), 0);
7859   __ Dup(v24.H(), v1.H(), 7);
7860   __ Dup(v25.S(), v1.S(), 3);
7861   __ Dup(v26.D(), v0.D(), 0);
7862 
7863   __ Dup(v2.V16B(), v2.B(), 0);
7864   __ Dup(v3.V8H(), v3.H(), 7);
7865   __ Dup(v4.V4S(), v4.S(), 0);
7866   __ Dup(v5.V2D(), v5.D(), 1);
7867 
7868   END();
7869 
7870   if (CAN_RUN()) {
7871     RUN();
7872 
7873     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
7874     ASSERT_EQUAL_128(0xffedffedffedffed, 0xffedffedffedffed, q17);
7875     ASSERT_EQUAL_128(0xffeddccbffeddccb, 0xffeddccbffeddccb, q18);
7876     ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
7877 
7878     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
7879     ASSERT_EQUAL_128(0, 0xffedffedffedffed, q21);
7880     ASSERT_EQUAL_128(0, 0xffeddccbffeddccb, q22);
7881 
7882     ASSERT_EQUAL_128(0, 0x00000000000000ff, q23);
7883     ASSERT_EQUAL_128(0, 0x000000000000ffed, q24);
7884     ASSERT_EQUAL_128(0, 0x00000000ffeddccb, q25);
7885     ASSERT_EQUAL_128(0, 0x8899aabbccddeeff, q26);
7886 
7887     ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q2);
7888     ASSERT_EQUAL_128(0x7766776677667766, 0x7766776677667766, q3);
7889     ASSERT_EQUAL_128(0x89abcdef89abcdef, 0x89abcdef89abcdef, q4);
7890     ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q5);
7891   }
7892 }
7893 
7894 
TEST(neon_copy_dup_general)7895 TEST(neon_copy_dup_general) {
7896   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7897 
7898   START();
7899 
7900   __ Mov(x0, 0x0011223344556677);
7901 
7902   __ Dup(v16.V16B(), w0);
7903   __ Dup(v17.V8H(), w0);
7904   __ Dup(v18.V4S(), w0);
7905   __ Dup(v19.V2D(), x0);
7906 
7907   __ Dup(v20.V8B(), w0);
7908   __ Dup(v21.V4H(), w0);
7909   __ Dup(v22.V2S(), w0);
7910 
7911   __ Dup(v2.V16B(), wzr);
7912   __ Dup(v3.V8H(), wzr);
7913   __ Dup(v4.V4S(), wzr);
7914   __ Dup(v5.V2D(), xzr);
7915 
7916   END();
7917 
7918   if (CAN_RUN()) {
7919     RUN();
7920 
7921     ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q16);
7922     ASSERT_EQUAL_128(0x6677667766776677, 0x6677667766776677, q17);
7923     ASSERT_EQUAL_128(0x4455667744556677, 0x4455667744556677, q18);
7924     ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
7925 
7926     ASSERT_EQUAL_128(0, 0x7777777777777777, q20);
7927     ASSERT_EQUAL_128(0, 0x6677667766776677, q21);
7928     ASSERT_EQUAL_128(0, 0x4455667744556677, q22);
7929 
7930     ASSERT_EQUAL_128(0, 0, q2);
7931     ASSERT_EQUAL_128(0, 0, q3);
7932     ASSERT_EQUAL_128(0, 0, q4);
7933     ASSERT_EQUAL_128(0, 0, q5);
7934   }
7935 }
7936 
7937 
TEST(neon_copy_ins_element)7938 TEST(neon_copy_ins_element) {
7939   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7940 
7941   START();
7942 
7943   __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7944   __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7945   __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7946   __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7947   __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7948   __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7949 
7950   __ Movi(v2.V2D(), 0, 0x0011223344556677);
7951   __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
7952   __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
7953   __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
7954 
7955   __ Ins(v16.V16B(), 15, v0.V16B(), 0);
7956   __ Ins(v17.V8H(), 0, v1.V8H(), 7);
7957   __ Ins(v18.V4S(), 3, v1.V4S(), 0);
7958   __ Ins(v19.V2D(), 1, v0.V2D(), 0);
7959 
7960   __ Ins(v2.V16B(), 2, v2.V16B(), 0);
7961   __ Ins(v3.V8H(), 0, v3.V8H(), 7);
7962   __ Ins(v4.V4S(), 3, v4.V4S(), 0);
7963   __ Ins(v5.V2D(), 0, v5.V2D(), 1);
7964 
7965   END();
7966 
7967   if (CAN_RUN()) {
7968     RUN();
7969 
7970     ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
7971     ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
7972     ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
7973     ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
7974 
7975     ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
7976     ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
7977     ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
7978     ASSERT_EQUAL_128(0, 0, q5);
7979   }
7980 }
7981 
7982 
TEST(neon_copy_mov_element)7983 TEST(neon_copy_mov_element) {
7984   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7985 
7986   START();
7987 
7988   __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7989   __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7990   __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7991   __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7992   __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7993   __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7994 
7995   __ Movi(v2.V2D(), 0, 0x0011223344556677);
7996   __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
7997   __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
7998   __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
7999 
8000   __ Mov(v16.V16B(), 15, v0.V16B(), 0);
8001   __ Mov(v17.V8H(), 0, v1.V8H(), 7);
8002   __ Mov(v18.V4S(), 3, v1.V4S(), 0);
8003   __ Mov(v19.V2D(), 1, v0.V2D(), 0);
8004 
8005   __ Mov(v2.V16B(), 2, v2.V16B(), 0);
8006   __ Mov(v3.V8H(), 0, v3.V8H(), 7);
8007   __ Mov(v4.V4S(), 3, v4.V4S(), 0);
8008   __ Mov(v5.V2D(), 0, v5.V2D(), 1);
8009 
8010   END();
8011 
8012   if (CAN_RUN()) {
8013     RUN();
8014 
8015     ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
8016     ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
8017     ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
8018     ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
8019 
8020     ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
8021     ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
8022     ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
8023     ASSERT_EQUAL_128(0, 0, q5);
8024   }
8025 }
8026 
8027 
TEST(neon_copy_smov)8028 TEST(neon_copy_smov) {
8029   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8030 
8031   START();
8032 
8033   __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8034 
8035   __ Smov(w0, v0.B(), 7);
8036   __ Smov(w1, v0.B(), 15);
8037 
8038   __ Smov(w2, v0.H(), 0);
8039   __ Smov(w3, v0.H(), 3);
8040 
8041   __ Smov(x4, v0.B(), 7);
8042   __ Smov(x5, v0.B(), 15);
8043 
8044   __ Smov(x6, v0.H(), 0);
8045   __ Smov(x7, v0.H(), 3);
8046 
8047   __ Smov(x16, v0.S(), 0);
8048   __ Smov(x17, v0.S(), 1);
8049 
8050   END();
8051 
8052   if (CAN_RUN()) {
8053     RUN();
8054 
8055     ASSERT_EQUAL_32(0xfffffffe, w0);
8056     ASSERT_EQUAL_32(0x00000001, w1);
8057     ASSERT_EQUAL_32(0x00003210, w2);
8058     ASSERT_EQUAL_32(0xfffffedc, w3);
8059     ASSERT_EQUAL_64(0xfffffffffffffffe, x4);
8060     ASSERT_EQUAL_64(0x0000000000000001, x5);
8061     ASSERT_EQUAL_64(0x0000000000003210, x6);
8062     ASSERT_EQUAL_64(0xfffffffffffffedc, x7);
8063     ASSERT_EQUAL_64(0x0000000076543210, x16);
8064     ASSERT_EQUAL_64(0xfffffffffedcba98, x17);
8065   }
8066 }
8067 
8068 
TEST(neon_copy_umov_mov)8069 TEST(neon_copy_umov_mov) {
8070   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8071 
8072   START();
8073 
8074   __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8075 
8076   __ Umov(w0, v0.B(), 15);
8077   __ Umov(w1, v0.H(), 0);
8078   __ Umov(w2, v0.S(), 3);
8079   __ Umov(x3, v0.D(), 1);
8080 
8081   __ Mov(w4, v0.S(), 3);
8082   __ Mov(x5, v0.D(), 1);
8083 
8084   END();
8085 
8086   if (CAN_RUN()) {
8087     RUN();
8088 
8089     ASSERT_EQUAL_32(0x00000001, w0);
8090     ASSERT_EQUAL_32(0x00003210, w1);
8091     ASSERT_EQUAL_32(0x01234567, w2);
8092     ASSERT_EQUAL_64(0x0123456789abcdef, x3);
8093     ASSERT_EQUAL_32(0x01234567, w4);
8094     ASSERT_EQUAL_64(0x0123456789abcdef, x5);
8095   }
8096 }
8097 
8098 
TEST(neon_copy_ins_general)8099 TEST(neon_copy_ins_general) {
8100   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8101 
8102   START();
8103 
8104   __ Mov(x0, 0x0011223344556677);
8105   __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8106   __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
8107   __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8108   __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8109 
8110   __ Movi(v2.V2D(), 0, 0x0011223344556677);
8111   __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
8112   __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
8113   __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
8114 
8115   __ Ins(v16.V16B(), 15, w0);
8116   __ Ins(v17.V8H(), 0, w0);
8117   __ Ins(v18.V4S(), 3, w0);
8118   __ Ins(v19.V2D(), 0, x0);
8119 
8120   __ Ins(v2.V16B(), 2, w0);
8121   __ Ins(v3.V8H(), 0, w0);
8122   __ Ins(v4.V4S(), 3, w0);
8123   __ Ins(v5.V2D(), 1, x0);
8124 
8125   END();
8126 
8127   if (CAN_RUN()) {
8128     RUN();
8129 
8130     ASSERT_EQUAL_128(0x7723456789abcdef, 0xfedcba9876543210, q16);
8131     ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789ab6677, q17);
8132     ASSERT_EQUAL_128(0x4455667744556677, 0x8899aabbccddeeff, q18);
8133     ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
8134 
8135     ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
8136     ASSERT_EQUAL_128(0, 0x8899aabbccdd6677, q3);
8137     ASSERT_EQUAL_128(0x4455667700000000, 0x0123456789abcdef, q4);
8138     ASSERT_EQUAL_128(0x0011223344556677, 0x0123456789abcdef, q5);
8139   }
8140 }
8141 
8142 
TEST(neon_extract_ext)8143 TEST(neon_extract_ext) {
8144   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8145 
8146   START();
8147 
8148   __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8149   __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
8150 
8151   __ Movi(v2.V2D(), 0, 0x0011223344556677);
8152   __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
8153 
8154   __ Ext(v16.V16B(), v0.V16B(), v1.V16B(), 0);
8155   __ Ext(v17.V16B(), v0.V16B(), v1.V16B(), 15);
8156   __ Ext(v1.V16B(), v0.V16B(), v1.V16B(), 8);  // Dest is same as one Src
8157   __ Ext(v0.V16B(), v0.V16B(), v0.V16B(), 8);  // All reg are the same
8158 
8159   __ Ext(v18.V8B(), v2.V8B(), v3.V8B(), 0);
8160   __ Ext(v19.V8B(), v2.V8B(), v3.V8B(), 7);
8161   __ Ext(v2.V8B(), v2.V8B(), v3.V8B(), 4);  // Dest is same as one Src
8162   __ Ext(v3.V8B(), v3.V8B(), v3.V8B(), 4);  // All reg are the same
8163 
8164   END();
8165 
8166   if (CAN_RUN()) {
8167     RUN();
8168 
8169     ASSERT_EQUAL_128(0x0011223344556677, 0x8899aabbccddeeff, q16);
8170     ASSERT_EQUAL_128(0xeddccbbaae998877, 0x6655443322110000, q17);
8171     ASSERT_EQUAL_128(0x7766554433221100, 0x0011223344556677, q1);
8172     ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q0);
8173 
8174     ASSERT_EQUAL_128(0, 0x0011223344556677, q18);
8175     ASSERT_EQUAL_128(0, 0x99aabbccddeeff00, q19);
8176     ASSERT_EQUAL_128(0, 0xccddeeff00112233, q2);
8177     ASSERT_EQUAL_128(0, 0xccddeeff8899aabb, q3);
8178   }
8179 }
8180 
8181 
TEST(neon_3different_uaddl)8182 TEST(neon_3different_uaddl) {
8183   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8184 
8185   START();
8186 
8187   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);
8188   __ Movi(v1.V2D(), 0, 0x00010280810e0fff);
8189   __ Movi(v2.V2D(), 0, 0x0101010101010101);
8190 
8191   __ Movi(v3.V2D(), 0x0000000000000000, 0x0000000000000000);
8192   __ Movi(v4.V2D(), 0x0000000000000000, 0x0000000000000000);
8193   __ Movi(v5.V2D(), 0, 0x0000000180008001);
8194   __ Movi(v6.V2D(), 0, 0x000e000ff000ffff);
8195   __ Movi(v7.V2D(), 0, 0x0001000100010001);
8196 
8197   __ Movi(v16.V2D(), 0x0000000000000000, 0x0000000000000000);
8198   __ Movi(v17.V2D(), 0x0000000000000000, 0x0000000000000000);
8199   __ Movi(v18.V2D(), 0, 0x0000000000000001);
8200   __ Movi(v19.V2D(), 0, 0x80000001ffffffff);
8201   __ Movi(v20.V2D(), 0, 0x0000000100000001);
8202 
8203   __ Uaddl(v0.V8H(), v1.V8B(), v2.V8B());
8204 
8205   __ Uaddl(v3.V4S(), v5.V4H(), v7.V4H());
8206   __ Uaddl(v4.V4S(), v6.V4H(), v7.V4H());
8207 
8208   __ Uaddl(v16.V2D(), v18.V2S(), v20.V2S());
8209   __ Uaddl(v17.V2D(), v19.V2S(), v20.V2S());
8210 
8211 
8212   END();
8213 
8214   if (CAN_RUN()) {
8215     RUN();
8216 
8217     ASSERT_EQUAL_128(0x0001000200030081, 0x0082000f00100100, q0);
8218     ASSERT_EQUAL_128(0x0000000100000002, 0x0000800100008002, q3);
8219     ASSERT_EQUAL_128(0x0000000f00000010, 0x0000f00100010000, q4);
8220     ASSERT_EQUAL_128(0x0000000000000001, 0x0000000000000002, q16);
8221     ASSERT_EQUAL_128(0x0000000080000002, 0x0000000100000000, q17);
8222   }
8223 }
8224 
8225 
TEST(neon_3different_addhn_subhn)8226 TEST(neon_3different_addhn_subhn) {
8227   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8228 
8229   START();
8230 
8231   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8232   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8233   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8234   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8235   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8236 
8237   __ Addhn(v16.V8B(), v0.V8H(), v1.V8H());
8238   __ Addhn2(v16.V16B(), v2.V8H(), v3.V8H());
8239   __ Raddhn(v17.V8B(), v0.V8H(), v1.V8H());
8240   __ Raddhn2(v17.V16B(), v2.V8H(), v3.V8H());
8241   __ Subhn(v18.V8B(), v0.V8H(), v1.V8H());
8242   __ Subhn2(v18.V16B(), v2.V8H(), v3.V8H());
8243   __ Rsubhn(v19.V8B(), v0.V8H(), v1.V8H());
8244   __ Rsubhn2(v19.V16B(), v2.V8H(), v3.V8H());
8245 
8246   END();
8247 
8248   if (CAN_RUN()) {
8249     RUN();
8250 
8251     ASSERT_EQUAL_128(0x0000ff007fff7fff, 0xff81817f80ff0100, q16);
8252     ASSERT_EQUAL_128(0x0000000080008000, 0xff81817f81ff0201, q17);
8253     ASSERT_EQUAL_128(0x0000ffff80008000, 0xff80817f80ff0100, q18);
8254     ASSERT_EQUAL_128(0x0000000080008000, 0xff81827f81ff0101, q19);
8255   }
8256 }
8257 
TEST(neon_d_only_scalar)8258 TEST(neon_d_only_scalar) {
8259   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8260 
8261   START();
8262 
8263   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8264   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8265   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
8266   __ Movi(v3.V2D(), 0xffffffffffffffff, 2);
8267   __ Movi(v4.V2D(), 0xffffffffffffffff, -2);
8268 
8269   __ Add(d16, d0, d0);
8270   __ Add(d17, d1, d1);
8271   __ Add(d18, d2, d2);
8272   __ Sub(d19, d0, d0);
8273   __ Sub(d20, d0, d1);
8274   __ Sub(d21, d1, d0);
8275   __ Ushl(d22, d0, d3);
8276   __ Ushl(d23, d0, d4);
8277   __ Sshl(d24, d0, d3);
8278   __ Sshl(d25, d0, d4);
8279   __ Ushr(d26, d0, 1);
8280   __ Sshr(d27, d0, 3);
8281   __ Shl(d28, d0, 0);
8282   __ Shl(d29, d0, 16);
8283 
8284   END();
8285 
8286   if (CAN_RUN()) {
8287     RUN();
8288 
8289     ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q16);
8290     ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q17);
8291     ASSERT_EQUAL_128(0, 0x2000000020002020, q18);
8292     ASSERT_EQUAL_128(0, 0, q19);
8293     ASSERT_EQUAL_128(0, 0x7000000170017171, q20);
8294     ASSERT_EQUAL_128(0, 0x8ffffffe8ffe8e8f, q21);
8295     ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q22);
8296     ASSERT_EQUAL_128(0, 0x3c0000003c003c3c, q23);
8297     ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q24);
8298     ASSERT_EQUAL_128(0, 0xfc0000003c003c3c, q25);
8299     ASSERT_EQUAL_128(0, 0x7800000078007878, q26);
8300     ASSERT_EQUAL_128(0, 0xfe0000001e001e1e, q27);
8301     ASSERT_EQUAL_128(0, 0xf0000000f000f0f0, q28);
8302     ASSERT_EQUAL_128(0, 0x0000f000f0f00000, q29);
8303   }
8304 }
8305 
8306 
TEST(neon_sqshl_imm_scalar)8307 TEST(neon_sqshl_imm_scalar) {
8308   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8309 
8310   START();
8311 
8312   __ Movi(v0.V2D(), 0x0, 0x7f);
8313   __ Movi(v1.V2D(), 0x0, 0x80);
8314   __ Movi(v2.V2D(), 0x0, 0x01);
8315   __ Sqshl(b16, b0, 1);
8316   __ Sqshl(b17, b1, 1);
8317   __ Sqshl(b18, b2, 1);
8318 
8319   __ Movi(v0.V2D(), 0x0, 0x7fff);
8320   __ Movi(v1.V2D(), 0x0, 0x8000);
8321   __ Movi(v2.V2D(), 0x0, 0x0001);
8322   __ Sqshl(h19, h0, 1);
8323   __ Sqshl(h20, h1, 1);
8324   __ Sqshl(h21, h2, 1);
8325 
8326   __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8327   __ Movi(v1.V2D(), 0x0, 0x80000000);
8328   __ Movi(v2.V2D(), 0x0, 0x00000001);
8329   __ Sqshl(s22, s0, 1);
8330   __ Sqshl(s23, s1, 1);
8331   __ Sqshl(s24, s2, 1);
8332 
8333   __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8334   __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8335   __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8336   __ Sqshl(d25, d0, 1);
8337   __ Sqshl(d26, d1, 1);
8338   __ Sqshl(d27, d2, 1);
8339 
8340   END();
8341 
8342   if (CAN_RUN()) {
8343     RUN();
8344 
8345     ASSERT_EQUAL_128(0, 0x7f, q16);
8346     ASSERT_EQUAL_128(0, 0x80, q17);
8347     ASSERT_EQUAL_128(0, 0x02, q18);
8348 
8349     ASSERT_EQUAL_128(0, 0x7fff, q19);
8350     ASSERT_EQUAL_128(0, 0x8000, q20);
8351     ASSERT_EQUAL_128(0, 0x0002, q21);
8352 
8353     ASSERT_EQUAL_128(0, 0x7fffffff, q22);
8354     ASSERT_EQUAL_128(0, 0x80000000, q23);
8355     ASSERT_EQUAL_128(0, 0x00000002, q24);
8356 
8357     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q25);
8358     ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
8359     ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
8360   }
8361 }
8362 
8363 
TEST(neon_uqshl_imm_scalar)8364 TEST(neon_uqshl_imm_scalar) {
8365   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8366 
8367   START();
8368 
8369   __ Movi(v0.V2D(), 0x0, 0x7f);
8370   __ Movi(v1.V2D(), 0x0, 0x80);
8371   __ Movi(v2.V2D(), 0x0, 0x01);
8372   __ Uqshl(b16, b0, 1);
8373   __ Uqshl(b17, b1, 1);
8374   __ Uqshl(b18, b2, 1);
8375 
8376   __ Movi(v0.V2D(), 0x0, 0x7fff);
8377   __ Movi(v1.V2D(), 0x0, 0x8000);
8378   __ Movi(v2.V2D(), 0x0, 0x0001);
8379   __ Uqshl(h19, h0, 1);
8380   __ Uqshl(h20, h1, 1);
8381   __ Uqshl(h21, h2, 1);
8382 
8383   __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8384   __ Movi(v1.V2D(), 0x0, 0x80000000);
8385   __ Movi(v2.V2D(), 0x0, 0x00000001);
8386   __ Uqshl(s22, s0, 1);
8387   __ Uqshl(s23, s1, 1);
8388   __ Uqshl(s24, s2, 1);
8389 
8390   __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8391   __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8392   __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8393   __ Uqshl(d25, d0, 1);
8394   __ Uqshl(d26, d1, 1);
8395   __ Uqshl(d27, d2, 1);
8396 
8397   END();
8398 
8399   if (CAN_RUN()) {
8400     RUN();
8401 
8402     ASSERT_EQUAL_128(0, 0xfe, q16);
8403     ASSERT_EQUAL_128(0, 0xff, q17);
8404     ASSERT_EQUAL_128(0, 0x02, q18);
8405 
8406     ASSERT_EQUAL_128(0, 0xfffe, q19);
8407     ASSERT_EQUAL_128(0, 0xffff, q20);
8408     ASSERT_EQUAL_128(0, 0x0002, q21);
8409 
8410     ASSERT_EQUAL_128(0, 0xfffffffe, q22);
8411     ASSERT_EQUAL_128(0, 0xffffffff, q23);
8412     ASSERT_EQUAL_128(0, 0x00000002, q24);
8413 
8414     ASSERT_EQUAL_128(0, 0xfffffffffffffffe, q25);
8415     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
8416     ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
8417   }
8418 }
8419 
8420 
TEST(neon_sqshlu_scalar)8421 TEST(neon_sqshlu_scalar) {
8422   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8423 
8424   START();
8425 
8426   __ Movi(v0.V2D(), 0x0, 0x7f);
8427   __ Movi(v1.V2D(), 0x0, 0x80);
8428   __ Movi(v2.V2D(), 0x0, 0x01);
8429   __ Sqshlu(b16, b0, 2);
8430   __ Sqshlu(b17, b1, 2);
8431   __ Sqshlu(b18, b2, 2);
8432 
8433   __ Movi(v0.V2D(), 0x0, 0x7fff);
8434   __ Movi(v1.V2D(), 0x0, 0x8000);
8435   __ Movi(v2.V2D(), 0x0, 0x0001);
8436   __ Sqshlu(h19, h0, 2);
8437   __ Sqshlu(h20, h1, 2);
8438   __ Sqshlu(h21, h2, 2);
8439 
8440   __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8441   __ Movi(v1.V2D(), 0x0, 0x80000000);
8442   __ Movi(v2.V2D(), 0x0, 0x00000001);
8443   __ Sqshlu(s22, s0, 2);
8444   __ Sqshlu(s23, s1, 2);
8445   __ Sqshlu(s24, s2, 2);
8446 
8447   __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8448   __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8449   __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8450   __ Sqshlu(d25, d0, 2);
8451   __ Sqshlu(d26, d1, 2);
8452   __ Sqshlu(d27, d2, 2);
8453 
8454   END();
8455 
8456   if (CAN_RUN()) {
8457     RUN();
8458 
8459     ASSERT_EQUAL_128(0, 0xff, q16);
8460     ASSERT_EQUAL_128(0, 0x00, q17);
8461     ASSERT_EQUAL_128(0, 0x04, q18);
8462 
8463     ASSERT_EQUAL_128(0, 0xffff, q19);
8464     ASSERT_EQUAL_128(0, 0x0000, q20);
8465     ASSERT_EQUAL_128(0, 0x0004, q21);
8466 
8467     ASSERT_EQUAL_128(0, 0xffffffff, q22);
8468     ASSERT_EQUAL_128(0, 0x00000000, q23);
8469     ASSERT_EQUAL_128(0, 0x00000004, q24);
8470 
8471     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
8472     ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
8473     ASSERT_EQUAL_128(0, 0x0000000000000004, q27);
8474   }
8475 }
8476 
8477 
TEST(neon_sshll)8478 TEST(neon_sshll) {
8479   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8480 
8481   START();
8482 
8483   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8484   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8485   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8486 
8487   __ Sshll(v16.V8H(), v0.V8B(), 4);
8488   __ Sshll2(v17.V8H(), v0.V16B(), 4);
8489 
8490   __ Sshll(v18.V4S(), v1.V4H(), 8);
8491   __ Sshll2(v19.V4S(), v1.V8H(), 8);
8492 
8493   __ Sshll(v20.V2D(), v2.V2S(), 16);
8494   __ Sshll2(v21.V2D(), v2.V4S(), 16);
8495 
8496   END();
8497 
8498   if (CAN_RUN()) {
8499     RUN();
8500 
8501     ASSERT_EQUAL_128(0xf800f810fff00000, 0x001007f0f800f810, q16);
8502     ASSERT_EQUAL_128(0x07f000100000fff0, 0xf810f80007f00010, q17);
8503     ASSERT_EQUAL_128(0xffffff0000000000, 0x00000100007fff00, q18);
8504     ASSERT_EQUAL_128(0xff800000ff800100, 0xffffff0000000000, q19);
8505     ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
8506     ASSERT_EQUAL_128(0xffff800000000000, 0xffffffffffff0000, q21);
8507   }
8508 }
8509 
TEST(neon_shll)8510 TEST(neon_shll) {
8511   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8512 
8513   START();
8514 
8515   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8516   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8517   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8518 
8519   __ Shll(v16.V8H(), v0.V8B(), 8);
8520   __ Shll2(v17.V8H(), v0.V16B(), 8);
8521 
8522   __ Shll(v18.V4S(), v1.V4H(), 16);
8523   __ Shll2(v19.V4S(), v1.V8H(), 16);
8524 
8525   __ Shll(v20.V2D(), v2.V2S(), 32);
8526   __ Shll2(v21.V2D(), v2.V4S(), 32);
8527 
8528   END();
8529 
8530   if (CAN_RUN()) {
8531     RUN();
8532 
8533     ASSERT_EQUAL_128(0x80008100ff000000, 0x01007f0080008100, q16);
8534     ASSERT_EQUAL_128(0x7f0001000000ff00, 0x810080007f000100, q17);
8535     ASSERT_EQUAL_128(0xffff000000000000, 0x000100007fff0000, q18);
8536     ASSERT_EQUAL_128(0x8000000080010000, 0xffff000000000000, q19);
8537     ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffff00000000, q20);
8538     ASSERT_EQUAL_128(0x8000000000000000, 0xffffffff00000000, q21);
8539   }
8540 }
8541 
TEST(neon_ushll)8542 TEST(neon_ushll) {
8543   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8544 
8545   START();
8546 
8547   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8548   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8549   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8550 
8551   __ Ushll(v16.V8H(), v0.V8B(), 4);
8552   __ Ushll2(v17.V8H(), v0.V16B(), 4);
8553 
8554   __ Ushll(v18.V4S(), v1.V4H(), 8);
8555   __ Ushll2(v19.V4S(), v1.V8H(), 8);
8556 
8557   __ Ushll(v20.V2D(), v2.V2S(), 16);
8558   __ Ushll2(v21.V2D(), v2.V4S(), 16);
8559 
8560   END();
8561 
8562   if (CAN_RUN()) {
8563     RUN();
8564 
8565     ASSERT_EQUAL_128(0x080008100ff00000, 0x001007f008000810, q16);
8566     ASSERT_EQUAL_128(0x07f0001000000ff0, 0x0810080007f00010, q17);
8567     ASSERT_EQUAL_128(0x00ffff0000000000, 0x00000100007fff00, q18);
8568     ASSERT_EQUAL_128(0x0080000000800100, 0x00ffff0000000000, q19);
8569     ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
8570     ASSERT_EQUAL_128(0x0000800000000000, 0x0000ffffffff0000, q21);
8571   }
8572 }
8573 
8574 
TEST(neon_sxtl)8575 TEST(neon_sxtl) {
8576   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8577 
8578   START();
8579 
8580   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8581   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8582   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8583 
8584   __ Sxtl(v16.V8H(), v0.V8B());
8585   __ Sxtl2(v17.V8H(), v0.V16B());
8586 
8587   __ Sxtl(v18.V4S(), v1.V4H());
8588   __ Sxtl2(v19.V4S(), v1.V8H());
8589 
8590   __ Sxtl(v20.V2D(), v2.V2S());
8591   __ Sxtl2(v21.V2D(), v2.V4S());
8592 
8593   END();
8594 
8595   if (CAN_RUN()) {
8596     RUN();
8597 
8598     ASSERT_EQUAL_128(0xff80ff81ffff0000, 0x0001007fff80ff81, q16);
8599     ASSERT_EQUAL_128(0x007f00010000ffff, 0xff81ff80007f0001, q17);
8600     ASSERT_EQUAL_128(0xffffffff00000000, 0x0000000100007fff, q18);
8601     ASSERT_EQUAL_128(0xffff8000ffff8001, 0xffffffff00000000, q19);
8602     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
8603     ASSERT_EQUAL_128(0xffffffff80000000, 0xffffffffffffffff, q21);
8604   }
8605 }
8606 
8607 
TEST(neon_uxtl)8608 TEST(neon_uxtl) {
8609   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8610 
8611   START();
8612 
8613   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8614   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8615   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8616 
8617   __ Uxtl(v16.V8H(), v0.V8B());
8618   __ Uxtl2(v17.V8H(), v0.V16B());
8619 
8620   __ Uxtl(v18.V4S(), v1.V4H());
8621   __ Uxtl2(v19.V4S(), v1.V8H());
8622 
8623   __ Uxtl(v20.V2D(), v2.V2S());
8624   __ Uxtl2(v21.V2D(), v2.V4S());
8625 
8626   END();
8627 
8628   if (CAN_RUN()) {
8629     RUN();
8630 
8631     ASSERT_EQUAL_128(0x0080008100ff0000, 0x0001007f00800081, q16);
8632     ASSERT_EQUAL_128(0x007f0001000000ff, 0x00810080007f0001, q17);
8633     ASSERT_EQUAL_128(0x0000ffff00000000, 0x0000000100007fff, q18);
8634     ASSERT_EQUAL_128(0x0000800000008001, 0x0000ffff00000000, q19);
8635     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
8636     ASSERT_EQUAL_128(0x0000000080000000, 0x00000000ffffffff, q21);
8637   }
8638 }
8639 
8640 
TEST(neon_ssra)8641 TEST(neon_ssra) {
8642   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8643 
8644   START();
8645 
8646   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8647   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8648   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8649   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8650   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8651 
8652   __ Mov(v16.V2D(), v0.V2D());
8653   __ Mov(v17.V2D(), v0.V2D());
8654   __ Mov(v18.V2D(), v1.V2D());
8655   __ Mov(v19.V2D(), v1.V2D());
8656   __ Mov(v20.V2D(), v2.V2D());
8657   __ Mov(v21.V2D(), v2.V2D());
8658   __ Mov(v22.V2D(), v3.V2D());
8659   __ Mov(v23.V2D(), v4.V2D());
8660   __ Mov(v24.V2D(), v3.V2D());
8661   __ Mov(v25.V2D(), v4.V2D());
8662 
8663   __ Ssra(v16.V8B(), v0.V8B(), 4);
8664   __ Ssra(v17.V16B(), v0.V16B(), 4);
8665 
8666   __ Ssra(v18.V4H(), v1.V4H(), 8);
8667   __ Ssra(v19.V8H(), v1.V8H(), 8);
8668 
8669   __ Ssra(v20.V2S(), v2.V2S(), 16);
8670   __ Ssra(v21.V4S(), v2.V4S(), 16);
8671 
8672   __ Ssra(v22.V2D(), v3.V2D(), 32);
8673   __ Ssra(v23.V2D(), v4.V2D(), 32);
8674 
8675   __ Ssra(d24, d3, 48);
8676 
8677   END();
8678 
8679   if (CAN_RUN()) {
8680     RUN();
8681 
8682     ASSERT_EQUAL_128(0x0000000000000000, 0x7879fe0001867879, q16);
8683     ASSERT_EQUAL_128(0x860100fe79788601, 0x7879fe0001867879, q17);
8684     ASSERT_EQUAL_128(0x0000000000000000, 0xfffe00000001807e, q18);
8685     ASSERT_EQUAL_128(0x7f807f81fffe0000, 0xfffe00000001807e, q19);
8686     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
8687     ASSERT_EQUAL_128(0x7fff8000fffffffe, 0x0000000080007ffe, q21);
8688     ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007ffffffe, q22);
8689     ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
8690     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
8691   }
8692 }
8693 
TEST(neon_srsra)8694 TEST(neon_srsra) {
8695   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8696 
8697   START();
8698 
8699   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8700   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8701   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8702   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8703   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8704 
8705   __ Mov(v16.V2D(), v0.V2D());
8706   __ Mov(v17.V2D(), v0.V2D());
8707   __ Mov(v18.V2D(), v1.V2D());
8708   __ Mov(v19.V2D(), v1.V2D());
8709   __ Mov(v20.V2D(), v2.V2D());
8710   __ Mov(v21.V2D(), v2.V2D());
8711   __ Mov(v22.V2D(), v3.V2D());
8712   __ Mov(v23.V2D(), v4.V2D());
8713   __ Mov(v24.V2D(), v3.V2D());
8714   __ Mov(v25.V2D(), v4.V2D());
8715 
8716   __ Srsra(v16.V8B(), v0.V8B(), 4);
8717   __ Srsra(v17.V16B(), v0.V16B(), 4);
8718 
8719   __ Srsra(v18.V4H(), v1.V4H(), 8);
8720   __ Srsra(v19.V8H(), v1.V8H(), 8);
8721 
8722   __ Srsra(v20.V2S(), v2.V2S(), 16);
8723   __ Srsra(v21.V4S(), v2.V4S(), 16);
8724 
8725   __ Srsra(v22.V2D(), v3.V2D(), 32);
8726   __ Srsra(v23.V2D(), v4.V2D(), 32);
8727 
8728   __ Srsra(d24, d3, 48);
8729 
8730   END();
8731 
8732   if (CAN_RUN()) {
8733     RUN();
8734 
8735     ASSERT_EQUAL_128(0x0000000000000000, 0x7879ff0001877879, q16);
8736     ASSERT_EQUAL_128(0x870100ff79788701, 0x7879ff0001877879, q17);
8737     ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000001807f, q18);
8738     ASSERT_EQUAL_128(0x7f807f81ffff0000, 0xffff00000001807f, q19);
8739     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
8740     ASSERT_EQUAL_128(0x7fff8000ffffffff, 0x0000000080007fff, q21);
8741     ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007fffffff, q22);
8742     ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
8743     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
8744   }
8745 }
8746 
TEST(neon_usra)8747 TEST(neon_usra) {
8748   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8749 
8750   START();
8751 
8752   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8753   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8754   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8755   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8756   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8757 
8758   __ Mov(v16.V2D(), v0.V2D());
8759   __ Mov(v17.V2D(), v0.V2D());
8760   __ Mov(v18.V2D(), v1.V2D());
8761   __ Mov(v19.V2D(), v1.V2D());
8762   __ Mov(v20.V2D(), v2.V2D());
8763   __ Mov(v21.V2D(), v2.V2D());
8764   __ Mov(v22.V2D(), v3.V2D());
8765   __ Mov(v23.V2D(), v4.V2D());
8766   __ Mov(v24.V2D(), v3.V2D());
8767   __ Mov(v25.V2D(), v4.V2D());
8768 
8769   __ Usra(v16.V8B(), v0.V8B(), 4);
8770   __ Usra(v17.V16B(), v0.V16B(), 4);
8771 
8772   __ Usra(v18.V4H(), v1.V4H(), 8);
8773   __ Usra(v19.V8H(), v1.V8H(), 8);
8774 
8775   __ Usra(v20.V2S(), v2.V2S(), 16);
8776   __ Usra(v21.V4S(), v2.V4S(), 16);
8777 
8778   __ Usra(v22.V2D(), v3.V2D(), 32);
8779   __ Usra(v23.V2D(), v4.V2D(), 32);
8780 
8781   __ Usra(d24, d3, 48);
8782 
8783   END();
8784 
8785   if (CAN_RUN()) {
8786     RUN();
8787 
8788     ASSERT_EQUAL_128(0x0000000000000000, 0x88890e0001868889, q16);
8789     ASSERT_EQUAL_128(0x8601000e89888601, 0x88890e0001868889, q17);
8790     ASSERT_EQUAL_128(0x0000000000000000, 0x00fe00000001807e, q18);
8791     ASSERT_EQUAL_128(0x8080808100fe0000, 0x00fe00000001807e, q19);
8792     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
8793     ASSERT_EQUAL_128(0x800080000000fffe, 0x0000000080007ffe, q21);
8794     ASSERT_EQUAL_128(0x8000000080000001, 0x800000007ffffffe, q22);
8795     ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
8796     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
8797   }
8798 }
8799 
TEST(neon_ursra)8800 TEST(neon_ursra) {
8801   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8802 
8803   START();
8804 
8805   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8806   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8807   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8808   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8809   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8810 
8811   __ Mov(v16.V2D(), v0.V2D());
8812   __ Mov(v17.V2D(), v0.V2D());
8813   __ Mov(v18.V2D(), v1.V2D());
8814   __ Mov(v19.V2D(), v1.V2D());
8815   __ Mov(v20.V2D(), v2.V2D());
8816   __ Mov(v21.V2D(), v2.V2D());
8817   __ Mov(v22.V2D(), v3.V2D());
8818   __ Mov(v23.V2D(), v4.V2D());
8819   __ Mov(v24.V2D(), v3.V2D());
8820   __ Mov(v25.V2D(), v4.V2D());
8821 
8822   __ Ursra(v16.V8B(), v0.V8B(), 4);
8823   __ Ursra(v17.V16B(), v0.V16B(), 4);
8824 
8825   __ Ursra(v18.V4H(), v1.V4H(), 8);
8826   __ Ursra(v19.V8H(), v1.V8H(), 8);
8827 
8828   __ Ursra(v20.V2S(), v2.V2S(), 16);
8829   __ Ursra(v21.V4S(), v2.V4S(), 16);
8830 
8831   __ Ursra(v22.V2D(), v3.V2D(), 32);
8832   __ Ursra(v23.V2D(), v4.V2D(), 32);
8833 
8834   __ Ursra(d24, d3, 48);
8835 
8836   END();
8837 
8838   if (CAN_RUN()) {
8839     RUN();
8840 
8841     ASSERT_EQUAL_128(0x0000000000000000, 0x88890f0001878889, q16);
8842     ASSERT_EQUAL_128(0x8701000f89888701, 0x88890f0001878889, q17);
8843     ASSERT_EQUAL_128(0x0000000000000000, 0x00ff00000001807f, q18);
8844     ASSERT_EQUAL_128(0x8080808100ff0000, 0x00ff00000001807f, q19);
8845     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
8846     ASSERT_EQUAL_128(0x800080000000ffff, 0x0000000080007fff, q21);
8847     ASSERT_EQUAL_128(0x8000000080000001, 0x800000007fffffff, q22);
8848     ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
8849     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
8850   }
8851 }
8852 
8853 
TEST(neon_uqshl_scalar)8854 TEST(neon_uqshl_scalar) {
8855   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8856 
8857   START();
8858 
8859   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8860   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8861   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8862   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8863 
8864   __ Uqshl(b16, b0, b2);
8865   __ Uqshl(b17, b0, b3);
8866   __ Uqshl(b18, b1, b2);
8867   __ Uqshl(b19, b1, b3);
8868   __ Uqshl(h20, h0, h2);
8869   __ Uqshl(h21, h0, h3);
8870   __ Uqshl(h22, h1, h2);
8871   __ Uqshl(h23, h1, h3);
8872   __ Uqshl(s24, s0, s2);
8873   __ Uqshl(s25, s0, s3);
8874   __ Uqshl(s26, s1, s2);
8875   __ Uqshl(s27, s1, s3);
8876   __ Uqshl(d28, d0, d2);
8877   __ Uqshl(d29, d0, d3);
8878   __ Uqshl(d30, d1, d2);
8879   __ Uqshl(d31, d1, d3);
8880 
8881   END();
8882 
8883   if (CAN_RUN()) {
8884     RUN();
8885 
8886     ASSERT_EQUAL_128(0, 0xff, q16);
8887     ASSERT_EQUAL_128(0, 0x78, q17);
8888     ASSERT_EQUAL_128(0, 0xfe, q18);
8889     ASSERT_EQUAL_128(0, 0x3f, q19);
8890     ASSERT_EQUAL_128(0, 0xffff, q20);
8891     ASSERT_EQUAL_128(0, 0x7878, q21);
8892     ASSERT_EQUAL_128(0, 0xfefe, q22);
8893     ASSERT_EQUAL_128(0, 0x3fbf, q23);
8894     ASSERT_EQUAL_128(0, 0xffffffff, q24);
8895     ASSERT_EQUAL_128(0, 0x78007878, q25);
8896     ASSERT_EQUAL_128(0, 0xfffefefe, q26);
8897     ASSERT_EQUAL_128(0, 0x3fffbfbf, q27);
8898     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
8899     ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
8900     ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
8901     ASSERT_EQUAL_128(0, 0x3fffffffbfffbfbf, q31);
8902   }
8903 }
8904 
8905 
TEST(neon_sqshl_scalar)8906 TEST(neon_sqshl_scalar) {
8907   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8908 
8909   START();
8910 
8911   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
8912   __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
8913   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8914   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8915 
8916   __ Sqshl(b16, b0, b2);
8917   __ Sqshl(b17, b0, b3);
8918   __ Sqshl(b18, b1, b2);
8919   __ Sqshl(b19, b1, b3);
8920   __ Sqshl(h20, h0, h2);
8921   __ Sqshl(h21, h0, h3);
8922   __ Sqshl(h22, h1, h2);
8923   __ Sqshl(h23, h1, h3);
8924   __ Sqshl(s24, s0, s2);
8925   __ Sqshl(s25, s0, s3);
8926   __ Sqshl(s26, s1, s2);
8927   __ Sqshl(s27, s1, s3);
8928   __ Sqshl(d28, d0, d2);
8929   __ Sqshl(d29, d0, d3);
8930   __ Sqshl(d30, d1, d2);
8931   __ Sqshl(d31, d1, d3);
8932 
8933   END();
8934 
8935   if (CAN_RUN()) {
8936     RUN();
8937 
8938     ASSERT_EQUAL_128(0, 0x80, q16);
8939     ASSERT_EQUAL_128(0, 0xdf, q17);
8940     ASSERT_EQUAL_128(0, 0x7f, q18);
8941     ASSERT_EQUAL_128(0, 0x20, q19);
8942     ASSERT_EQUAL_128(0, 0x8000, q20);
8943     ASSERT_EQUAL_128(0, 0xdfdf, q21);
8944     ASSERT_EQUAL_128(0, 0x7fff, q22);
8945     ASSERT_EQUAL_128(0, 0x2020, q23);
8946     ASSERT_EQUAL_128(0, 0x80000000, q24);
8947     ASSERT_EQUAL_128(0, 0xdfffdfdf, q25);
8948     ASSERT_EQUAL_128(0, 0x7fffffff, q26);
8949     ASSERT_EQUAL_128(0, 0x20002020, q27);
8950     ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
8951     ASSERT_EQUAL_128(0, 0xdfffffffdfffdfdf, q29);
8952     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
8953     ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
8954   }
8955 }
8956 
8957 
TEST(neon_urshl_scalar)8958 TEST(neon_urshl_scalar) {
8959   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8960 
8961   START();
8962 
8963   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8964   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8965   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8966   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8967 
8968   __ Urshl(d28, d0, d2);
8969   __ Urshl(d29, d0, d3);
8970   __ Urshl(d30, d1, d2);
8971   __ Urshl(d31, d1, d3);
8972 
8973   END();
8974 
8975   if (CAN_RUN()) {
8976     RUN();
8977 
8978     ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q28);
8979     ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
8980     ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
8981     ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
8982   }
8983 }
8984 
8985 
TEST(neon_srshl_scalar)8986 TEST(neon_srshl_scalar) {
8987   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8988 
8989   START();
8990 
8991   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
8992   __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
8993   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8994   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8995 
8996   __ Srshl(d28, d0, d2);
8997   __ Srshl(d29, d0, d3);
8998   __ Srshl(d30, d1, d2);
8999   __ Srshl(d31, d1, d3);
9000 
9001   END();
9002 
9003   if (CAN_RUN()) {
9004     RUN();
9005 
9006     ASSERT_EQUAL_128(0, 0x7fffffff7fff7f7e, q28);
9007     ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
9008     ASSERT_EQUAL_128(0, 0x8000000080008080, q30);
9009     ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
9010   }
9011 }
9012 
9013 
TEST(neon_uqrshl_scalar)9014 TEST(neon_uqrshl_scalar) {
9015   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9016 
9017   START();
9018 
9019   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9020   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9021   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
9022   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
9023 
9024   __ Uqrshl(b16, b0, b2);
9025   __ Uqrshl(b17, b0, b3);
9026   __ Uqrshl(b18, b1, b2);
9027   __ Uqrshl(b19, b1, b3);
9028   __ Uqrshl(h20, h0, h2);
9029   __ Uqrshl(h21, h0, h3);
9030   __ Uqrshl(h22, h1, h2);
9031   __ Uqrshl(h23, h1, h3);
9032   __ Uqrshl(s24, s0, s2);
9033   __ Uqrshl(s25, s0, s3);
9034   __ Uqrshl(s26, s1, s2);
9035   __ Uqrshl(s27, s1, s3);
9036   __ Uqrshl(d28, d0, d2);
9037   __ Uqrshl(d29, d0, d3);
9038   __ Uqrshl(d30, d1, d2);
9039   __ Uqrshl(d31, d1, d3);
9040 
9041   END();
9042 
9043   if (CAN_RUN()) {
9044     RUN();
9045 
9046     ASSERT_EQUAL_128(0, 0xff, q16);
9047     ASSERT_EQUAL_128(0, 0x78, q17);
9048     ASSERT_EQUAL_128(0, 0xfe, q18);
9049     ASSERT_EQUAL_128(0, 0x40, q19);
9050     ASSERT_EQUAL_128(0, 0xffff, q20);
9051     ASSERT_EQUAL_128(0, 0x7878, q21);
9052     ASSERT_EQUAL_128(0, 0xfefe, q22);
9053     ASSERT_EQUAL_128(0, 0x3fc0, q23);
9054     ASSERT_EQUAL_128(0, 0xffffffff, q24);
9055     ASSERT_EQUAL_128(0, 0x78007878, q25);
9056     ASSERT_EQUAL_128(0, 0xfffefefe, q26);
9057     ASSERT_EQUAL_128(0, 0x3fffbfc0, q27);
9058     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
9059     ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
9060     ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
9061     ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
9062   }
9063 }
9064 
9065 
TEST(neon_sqrshl_scalar)9066 TEST(neon_sqrshl_scalar) {
9067   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9068 
9069   START();
9070 
9071   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
9072   __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
9073   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
9074   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
9075 
9076   __ Sqrshl(b16, b0, b2);
9077   __ Sqrshl(b17, b0, b3);
9078   __ Sqrshl(b18, b1, b2);
9079   __ Sqrshl(b19, b1, b3);
9080   __ Sqrshl(h20, h0, h2);
9081   __ Sqrshl(h21, h0, h3);
9082   __ Sqrshl(h22, h1, h2);
9083   __ Sqrshl(h23, h1, h3);
9084   __ Sqrshl(s24, s0, s2);
9085   __ Sqrshl(s25, s0, s3);
9086   __ Sqrshl(s26, s1, s2);
9087   __ Sqrshl(s27, s1, s3);
9088   __ Sqrshl(d28, d0, d2);
9089   __ Sqrshl(d29, d0, d3);
9090   __ Sqrshl(d30, d1, d2);
9091   __ Sqrshl(d31, d1, d3);
9092 
9093   END();
9094 
9095   if (CAN_RUN()) {
9096     RUN();
9097 
9098     ASSERT_EQUAL_128(0, 0x80, q16);
9099     ASSERT_EQUAL_128(0, 0xe0, q17);
9100     ASSERT_EQUAL_128(0, 0x7f, q18);
9101     ASSERT_EQUAL_128(0, 0x20, q19);
9102     ASSERT_EQUAL_128(0, 0x8000, q20);
9103     ASSERT_EQUAL_128(0, 0xdfe0, q21);
9104     ASSERT_EQUAL_128(0, 0x7fff, q22);
9105     ASSERT_EQUAL_128(0, 0x2020, q23);
9106     ASSERT_EQUAL_128(0, 0x80000000, q24);
9107     ASSERT_EQUAL_128(0, 0xdfffdfe0, q25);
9108     ASSERT_EQUAL_128(0, 0x7fffffff, q26);
9109     ASSERT_EQUAL_128(0, 0x20002020, q27);
9110     ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
9111     ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
9112     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
9113     ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
9114   }
9115 }
9116 
9117 
TEST(neon_uqadd_scalar)9118 TEST(neon_uqadd_scalar) {
9119   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9120 
9121   START();
9122 
9123   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9124   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9125   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
9126 
9127   __ Uqadd(b16, b0, b0);
9128   __ Uqadd(b17, b1, b1);
9129   __ Uqadd(b18, b2, b2);
9130   __ Uqadd(h19, h0, h0);
9131   __ Uqadd(h20, h1, h1);
9132   __ Uqadd(h21, h2, h2);
9133   __ Uqadd(s22, s0, s0);
9134   __ Uqadd(s23, s1, s1);
9135   __ Uqadd(s24, s2, s2);
9136   __ Uqadd(d25, d0, d0);
9137   __ Uqadd(d26, d1, d1);
9138   __ Uqadd(d27, d2, d2);
9139 
9140   END();
9141 
9142   if (CAN_RUN()) {
9143     RUN();
9144 
9145     ASSERT_EQUAL_128(0, 0xff, q16);
9146     ASSERT_EQUAL_128(0, 0xfe, q17);
9147     ASSERT_EQUAL_128(0, 0x20, q18);
9148     ASSERT_EQUAL_128(0, 0xffff, q19);
9149     ASSERT_EQUAL_128(0, 0xfefe, q20);
9150     ASSERT_EQUAL_128(0, 0x2020, q21);
9151     ASSERT_EQUAL_128(0, 0xffffffff, q22);
9152     ASSERT_EQUAL_128(0, 0xfffefefe, q23);
9153     ASSERT_EQUAL_128(0, 0x20002020, q24);
9154     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
9155     ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q26);
9156     ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
9157   }
9158 }
9159 
9160 
TEST(neon_sqadd_scalar)9161 TEST(neon_sqadd_scalar) {
9162   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9163 
9164   START();
9165 
9166   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0x8000000180018181);
9167   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9168   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
9169 
9170   __ Sqadd(b16, b0, b0);
9171   __ Sqadd(b17, b1, b1);
9172   __ Sqadd(b18, b2, b2);
9173   __ Sqadd(h19, h0, h0);
9174   __ Sqadd(h20, h1, h1);
9175   __ Sqadd(h21, h2, h2);
9176   __ Sqadd(s22, s0, s0);
9177   __ Sqadd(s23, s1, s1);
9178   __ Sqadd(s24, s2, s2);
9179   __ Sqadd(d25, d0, d0);
9180   __ Sqadd(d26, d1, d1);
9181   __ Sqadd(d27, d2, d2);
9182 
9183   END();
9184 
9185   if (CAN_RUN()) {
9186     RUN();
9187 
9188     ASSERT_EQUAL_128(0, 0x80, q16);
9189     ASSERT_EQUAL_128(0, 0x7f, q17);
9190     ASSERT_EQUAL_128(0, 0x20, q18);
9191     ASSERT_EQUAL_128(0, 0x8000, q19);
9192     ASSERT_EQUAL_128(0, 0x7fff, q20);
9193     ASSERT_EQUAL_128(0, 0x2020, q21);
9194     ASSERT_EQUAL_128(0, 0x80000000, q22);
9195     ASSERT_EQUAL_128(0, 0x7fffffff, q23);
9196     ASSERT_EQUAL_128(0, 0x20002020, q24);
9197     ASSERT_EQUAL_128(0, 0x8000000000000000, q25);
9198     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
9199     ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
9200   }
9201 }
9202 
9203 
TEST(neon_uqsub_scalar)9204 TEST(neon_uqsub_scalar) {
9205   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9206 
9207   START();
9208 
9209   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9210   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9211 
9212   __ Uqsub(b16, b0, b0);
9213   __ Uqsub(b17, b0, b1);
9214   __ Uqsub(b18, b1, b0);
9215   __ Uqsub(h19, h0, h0);
9216   __ Uqsub(h20, h0, h1);
9217   __ Uqsub(h21, h1, h0);
9218   __ Uqsub(s22, s0, s0);
9219   __ Uqsub(s23, s0, s1);
9220   __ Uqsub(s24, s1, s0);
9221   __ Uqsub(d25, d0, d0);
9222   __ Uqsub(d26, d0, d1);
9223   __ Uqsub(d27, d1, d0);
9224 
9225   END();
9226 
9227   if (CAN_RUN()) {
9228     RUN();
9229 
9230     ASSERT_EQUAL_128(0, 0, q16);
9231     ASSERT_EQUAL_128(0, 0x71, q17);
9232     ASSERT_EQUAL_128(0, 0, q18);
9233 
9234     ASSERT_EQUAL_128(0, 0, q19);
9235     ASSERT_EQUAL_128(0, 0x7171, q20);
9236     ASSERT_EQUAL_128(0, 0, q21);
9237 
9238     ASSERT_EQUAL_128(0, 0, q22);
9239     ASSERT_EQUAL_128(0, 0x70017171, q23);
9240     ASSERT_EQUAL_128(0, 0, q24);
9241 
9242     ASSERT_EQUAL_128(0, 0, q25);
9243     ASSERT_EQUAL_128(0, 0x7000000170017171, q26);
9244     ASSERT_EQUAL_128(0, 0, q27);
9245   }
9246 }
9247 
9248 
TEST(neon_sqsub_scalar)9249 TEST(neon_sqsub_scalar) {
9250   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9251 
9252   START();
9253 
9254   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9255   __ Movi(v1.V2D(), 0x5555555555555555, 0x7eeeeeee7eee7e7e);
9256 
9257   __ Sqsub(b16, b0, b0);
9258   __ Sqsub(b17, b0, b1);
9259   __ Sqsub(b18, b1, b0);
9260   __ Sqsub(h19, h0, h0);
9261   __ Sqsub(h20, h0, h1);
9262   __ Sqsub(h21, h1, h0);
9263   __ Sqsub(s22, s0, s0);
9264   __ Sqsub(s23, s0, s1);
9265   __ Sqsub(s24, s1, s0);
9266   __ Sqsub(d25, d0, d0);
9267   __ Sqsub(d26, d0, d1);
9268   __ Sqsub(d27, d1, d0);
9269 
9270   END();
9271 
9272   if (CAN_RUN()) {
9273     RUN();
9274 
9275     ASSERT_EQUAL_128(0, 0, q16);
9276     ASSERT_EQUAL_128(0, 0x80, q17);
9277     ASSERT_EQUAL_128(0, 0x7f, q18);
9278 
9279     ASSERT_EQUAL_128(0, 0, q19);
9280     ASSERT_EQUAL_128(0, 0x8000, q20);
9281     ASSERT_EQUAL_128(0, 0x7fff, q21);
9282 
9283     ASSERT_EQUAL_128(0, 0, q22);
9284     ASSERT_EQUAL_128(0, 0x80000000, q23);
9285     ASSERT_EQUAL_128(0, 0x7fffffff, q24);
9286 
9287     ASSERT_EQUAL_128(0, 0, q25);
9288     ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
9289     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
9290   }
9291 }
9292 
9293 
TEST(neon_fmla_fmls)9294 TEST(neon_fmla_fmls) {
9295   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9296 
9297   START();
9298   __ Movi(v0.V2D(), 0x3f80000040000000, 0x4100000000000000);
9299   __ Movi(v1.V2D(), 0x400000003f800000, 0x000000003f800000);
9300   __ Movi(v2.V2D(), 0x3f800000ffffffff, 0x7f800000ff800000);
9301   __ Mov(v16.V16B(), v0.V16B());
9302   __ Mov(v17.V16B(), v0.V16B());
9303   __ Mov(v18.V16B(), v0.V16B());
9304   __ Mov(v19.V16B(), v0.V16B());
9305   __ Mov(v20.V16B(), v0.V16B());
9306   __ Mov(v21.V16B(), v0.V16B());
9307 
9308   __ Fmla(v16.V2S(), v1.V2S(), v2.V2S());
9309   __ Fmla(v17.V4S(), v1.V4S(), v2.V4S());
9310   __ Fmla(v18.V2D(), v1.V2D(), v2.V2D());
9311   __ Fmls(v19.V2S(), v1.V2S(), v2.V2S());
9312   __ Fmls(v20.V4S(), v1.V4S(), v2.V4S());
9313   __ Fmls(v21.V2D(), v1.V2D(), v2.V2D());
9314   END();
9315 
9316   if (CAN_RUN()) {
9317     RUN();
9318 
9319     ASSERT_EQUAL_128(0x0000000000000000, 0x7fc00000ff800000, q16);
9320     ASSERT_EQUAL_128(0x40400000ffffffff, 0x7fc00000ff800000, q17);
9321     ASSERT_EQUAL_128(0x3f9800015f8003f7, 0x41000000000000fe, q18);
9322     ASSERT_EQUAL_128(0x0000000000000000, 0x7fc000007f800000, q19);
9323     ASSERT_EQUAL_128(0xbf800000ffffffff, 0x7fc000007f800000, q20);
9324     ASSERT_EQUAL_128(0xbf8000023f0007ee, 0x40fffffffffffe04, q21);
9325   }
9326 }
9327 
9328 
TEST(neon_fmla_h)9329 TEST(neon_fmla_h) {
9330   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9331                       CPUFeatures::kFP,
9332                       CPUFeatures::kNEONHalf);
9333 
9334   START();
9335   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9336   __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9337   __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9338   __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9339   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
9340   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
9341   __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
9342   __ Mov(v16.V2D(), v0.V2D());
9343   __ Mov(v17.V2D(), v0.V2D());
9344   __ Mov(v18.V2D(), v4.V2D());
9345   __ Mov(v19.V2D(), v5.V2D());
9346   __ Mov(v20.V2D(), v0.V2D());
9347   __ Mov(v21.V2D(), v0.V2D());
9348   __ Mov(v22.V2D(), v4.V2D());
9349   __ Mov(v23.V2D(), v5.V2D());
9350 
9351   __ Fmla(v16.V8H(), v0.V8H(), v1.V8H());
9352   __ Fmla(v17.V8H(), v2.V8H(), v3.V8H());
9353   __ Fmla(v18.V8H(), v2.V8H(), v6.V8H());
9354   __ Fmla(v19.V8H(), v3.V8H(), v6.V8H());
9355   __ Fmla(v20.V4H(), v0.V4H(), v1.V4H());
9356   __ Fmla(v21.V4H(), v2.V4H(), v3.V4H());
9357   __ Fmla(v22.V4H(), v2.V4H(), v6.V4H());
9358   __ Fmla(v23.V4H(), v3.V4H(), v6.V4H());
9359   END();
9360 
9361   if (CAN_RUN()) {
9362     RUN();
9363 
9364     ASSERT_EQUAL_128(0x55c055c055c055c0, 0x55c055c055c055c0, v16);
9365     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v17);
9366     ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v18);
9367     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v19);
9368     ASSERT_EQUAL_128(0, 0x55c055c055c055c0, v20);
9369     ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v21);
9370     ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v22);
9371     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v23);
9372   }
9373 }
9374 
9375 
TEST(neon_fmls_h)9376 TEST(neon_fmls_h) {
9377   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9378                       CPUFeatures::kFP,
9379                       CPUFeatures::kNEONHalf);
9380 
9381   START();
9382   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9383   __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9384   __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9385   __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9386   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
9387   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
9388   __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
9389   __ Mov(v16.V2D(), v0.V2D());
9390   __ Mov(v17.V2D(), v0.V2D());
9391   __ Mov(v18.V2D(), v4.V2D());
9392   __ Mov(v19.V2D(), v5.V2D());
9393   __ Mov(v20.V2D(), v0.V2D());
9394   __ Mov(v21.V2D(), v0.V2D());
9395   __ Mov(v22.V2D(), v4.V2D());
9396   __ Mov(v23.V2D(), v5.V2D());
9397 
9398   __ Fmls(v16.V8H(), v0.V8H(), v1.V8H());
9399   __ Fmls(v17.V8H(), v2.V8H(), v3.V8H());
9400   __ Fmls(v18.V8H(), v2.V8H(), v6.V8H());
9401   __ Fmls(v19.V8H(), v3.V8H(), v6.V8H());
9402   __ Fmls(v20.V4H(), v0.V4H(), v1.V4H());
9403   __ Fmls(v21.V4H(), v2.V4H(), v3.V4H());
9404   __ Fmls(v22.V4H(), v2.V4H(), v6.V4H());
9405   __ Fmls(v23.V4H(), v3.V4H(), v6.V4H());
9406   END();
9407 
9408   if (CAN_RUN()) {
9409     RUN();
9410 
9411     ASSERT_EQUAL_128(0xd580d580d580d580, 0xd580d580d580d580, v16);
9412     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v17);
9413     ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v18);
9414     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v19);
9415     ASSERT_EQUAL_128(0, 0xd580d580d580d580, v20);
9416     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v21);
9417     ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v22);
9418     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v23);
9419   }
9420 }
9421 
9422 
TEST(neon_fhm)9423 TEST(neon_fhm) {
9424   // Test basic operation of fmlal{2} and fmlsl{2}. The simulator tests have
9425   // more comprehensive input sets.
9426   SETUP_WITH_FEATURES(CPUFeatures::kFP,
9427                       CPUFeatures::kNEON,
9428                       CPUFeatures::kNEONHalf,
9429                       CPUFeatures::kFHM);
9430 
9431   START();
9432   // Test multiplications:
9433   //        v30                               v31
9434   //  [0]   65504 (max normal)          *     65504 (max normal)
9435   //  [1]   -1                          *     0
9436   //  [2]   2^-24 (min subnormal)       *     2^-24 (min subnormal)
9437   //  [3]   -2^-24 (min subnormal)      *     65504 (max normal)
9438   //  [4]   6.10e-5 (min normal)        *     0.99...
9439   //  [5]   0                           *     -0
9440   //  [6]   -0                          *     0
9441   //  [7]   -Inf                        *     -Inf
9442   __ Movi(v30.V8H(), 0xfc00800000000400, 0x80010001bc007bff);
9443   __ Movi(v31.V8H(), 0xfc00000080003bff, 0x7bff000100007bff);
9444 
9445   // Accumulators for use with Fmlal{2}:
9446   // v0.S[0] = 384
9447   // v0.S[1] = -0
9448   __ Movi(v0.V4S(), 0xdeadbeefdeadbeef, 0x8000000043c00000);
9449   // v1.S[0] = -(2^-48 + 2^-71)
9450   // v1.S[1] = 0
9451   __ Movi(v1.V4S(), 0xdeadbeefdeadbeef, 0x00000000a7800001);
9452   // v2.S[0] = 128
9453   // v2.S[1] = 0
9454   // v2.S[2] = 1
9455   // v2.S[3] = 1
9456   __ Movi(v2.V4S(), 0x3f8000003f800000, 0x0000000043000000);
9457   // v3.S[0] = 0
9458   // v3.S[1] = -0
9459   // v3.S[2] = -0
9460   // v3.S[3] = 0
9461   __ Movi(v3.V4S(), 0x0000000080000000, 0x8000000000000000);
9462   // For Fmlsl{2}, we simply negate the accumulators above so that the Fmlsl{2}
9463   // results are just the negation of the Fmlal{2} results.
9464   __ Fneg(v4.V4S(), v0.V4S());
9465   __ Fneg(v5.V4S(), v1.V4S());
9466   __ Fneg(v6.V4S(), v2.V4S());
9467   __ Fneg(v7.V4S(), v3.V4S());
9468 
9469   __ Fmlal(v0.V2S(), v30.V2H(), v31.V2H());
9470   __ Fmlal2(v1.V2S(), v30.V2H(), v31.V2H());
9471   __ Fmlal(v2.V4S(), v30.V4H(), v31.V4H());
9472   __ Fmlal2(v3.V4S(), v30.V4H(), v31.V4H());
9473 
9474   __ Fmlsl(v4.V2S(), v30.V2H(), v31.V2H());
9475   __ Fmlsl2(v5.V2S(), v30.V2H(), v31.V2H());
9476   __ Fmlsl(v6.V4S(), v30.V4H(), v31.V4H());
9477   __ Fmlsl2(v7.V4S(), v30.V4H(), v31.V4H());
9478   END();
9479 
9480   if (CAN_RUN()) {
9481     RUN();
9482 
9483     // Fmlal(2S)
9484     // v0.S[0] = 384 + (65504 * 65504) = 4290774528 (rounded from 4290774400)
9485     // v0.S[1] = -0 + (-1 * 0) = -0
9486     ASSERT_EQUAL_128(0x0000000000000000, 0x800000004f7fc006, v0);
9487     // Fmlal2(2S)
9488     // v1.S[0] = -(2^-48 + 2^-71) + (2^-24 * 2^-24) = -2^-71
9489     // v1.S[1] = 0 + (-2^-24 * 65504) = -0.003904...
9490     ASSERT_EQUAL_128(0x0000000000000000, 0xbb7fe0009c000000, v1);
9491     // Fmlal(4S)
9492     // v2.S[0] = 128 + (65504 * 65504) = 4290774016 (rounded from 4290774144)
9493     // v2.S[1] = 0 + (-1 * 0) = 0
9494     // v2.S[2] = 1 + (2^-24 * 2^-24) = 1 (rounded)
9495     // v2.S[3] = 1 + (-2^-24 * 65504) = 0.996...
9496     ASSERT_EQUAL_128(0x3f7f00203f800000, 0x000000004f7fc004, v2);
9497     // Fmlal2(4S)
9498     // v3.S[0] = 0 + (6.103516e-5 * 0.99...) = 6.100535e-5
9499     // v3.S[1] = -0 + (0 * -0) = -0
9500     // v3.S[2] = -0 + (-0 * 0) = -0
9501     // v3.S[3] = 0 + (-Inf * -Inf) = Inf
9502     ASSERT_EQUAL_128(0x7f80000080000000, 0x80000000387fe000, v3);
9503 
9504     // Fmlsl results are mostly the same, but negated.
9505     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000cf7fc006, v4);
9506     ASSERT_EQUAL_128(0x0000000000000000, 0x3b7fe0001c000000, v5);
9507     // In this case: v6.S[1] = 0 - (0 * -0) = 0
9508     ASSERT_EQUAL_128(0xbf7f0020bf800000, 0x00000000cf7fc004, v6);
9509     ASSERT_EQUAL_128(0xff80000000000000, 0x00000000b87fe000, v7);
9510   }
9511 }
9512 
9513 
TEST(neon_byelement_fhm)9514 TEST(neon_byelement_fhm) {
9515   // Test basic operation of fmlal{2} and fmlsl{2} (by element). The simulator
9516   // tests have more comprehensive input sets.
9517   SETUP_WITH_FEATURES(CPUFeatures::kFP,
9518                       CPUFeatures::kNEON,
9519                       CPUFeatures::kNEONHalf,
9520                       CPUFeatures::kFHM);
9521 
9522   START();
9523   // Set up multiplication inputs.
9524   //
9525   // v30.H[0] = 65504 (max normal)
9526   // v30.H[1] = -1
9527   // v30.H[2] = 2^-24 (min subnormal)
9528   // v30.H[3] = -2^-24 (min subnormal)
9529   // v30.H[4] = 6.10e-5 (min normal)
9530   // v30.H[5] = 0
9531   // v30.H[6] = -0
9532   // v30.H[7] = -Inf
9533   __ Movi(v30.V8H(), 0xfc00800000000400, 0x80010001bc007bff);
9534 
9535   // Each test instruction should only use one lane of vm, so set up unique
9536   // registers with poison values in other lanes. The poison NaN avoids the
9537   // default NaN (so it shouldn't be encountered accidentally), but is otherwise
9538   // arbitrary.
9539   VRegister poison = v29;
9540   __ Movi(v29.V8H(), 0x7f417f417f417f41, 0x7f417f417f417f41);
9541   // v31.H[0,2,4,...]: 0.9995117 (the value just below 1)
9542   // v31.H[1,3,5,...]: 1.000977 (the value just above 1)
9543   __ Movi(v31.V8H(), 0x3bff3c013bff3c01, 0x3bff3c013bff3c01);
9544   // Set up [v8,v15] as vm inputs.
9545   for (int i = 0; i <= 7; i++) {
9546     VRegister vm(i + 8);
9547     __ Mov(vm, poison);
9548     __ Ins(vm.V8H(), i, v31.V8H(), i);
9549   }
9550 
9551   // Accumulators for use with Fmlal{2}:
9552   // v0.S[0] = 2^-8
9553   // v0.S[1] = 1
9554   __ Movi(v0.V4S(), 0xdeadbeefdeadbeef, 0x3f8000003b800000);
9555   // v1.S[0] = -1.5 * 2^-49
9556   // v1.S[1] = 0
9557   __ Movi(v1.V4S(), 0xdeadbeefdeadbeef, 0x00000000a7400000);
9558   // v2.S[0] = 0
9559   // v2.S[1] = 2^14
9560   // v2.S[2] = 1.5 * 2^-48
9561   // v2.S[3] = Inf
9562   __ Movi(v2.V4S(), 0x7f80000027c00000, 0xc680000000000000);
9563   // v3.S[0] = 0
9564   // v3.S[1] = -0
9565   // v3.S[2] = -0
9566   // v3.S[3] = 0
9567   __ Movi(v3.V4S(), 0x0000000080000000, 0x8000000000000000);
9568   // For Fmlsl{2}, we simply negate the accumulators above so that the Fmlsl{2}
9569   // results are just the negation of the Fmlal{2} results.
9570   __ Fneg(v4.V4S(), v0.V4S());
9571   __ Fneg(v5.V4S(), v1.V4S());
9572   __ Fneg(v6.V4S(), v2.V4S());
9573   __ Fneg(v7.V4S(), v3.V4S());
9574 
9575   __ Fmlal(v0.V2S(), v30.V2H(), v8.H(), 0);
9576   __ Fmlal2(v1.V2S(), v30.V2H(), v9.H(), 1);
9577   __ Fmlal(v2.V4S(), v30.V4H(), v10.H(), 2);
9578   __ Fmlal2(v3.V4S(), v30.V4H(), v11.H(), 3);
9579 
9580   __ Fmlsl(v4.V2S(), v30.V2H(), v12.H(), 4);
9581   __ Fmlsl2(v5.V2S(), v30.V2H(), v13.H(), 5);
9582   __ Fmlsl(v6.V4S(), v30.V4H(), v14.H(), 6);
9583   __ Fmlsl2(v7.V4S(), v30.V4H(), v15.H(), 7);
9584   END();
9585 
9586   if (CAN_RUN()) {
9587     RUN();
9588 
9589     // Fmlal(2S)
9590     // v0.S[0] = 2^-8 + (65504 * 1.000977) = 65567.96875 (rounded)
9591     // v0.S[1] = 1 + (-1 * 1.000977) = -0.000976...
9592     ASSERT_EQUAL_128(0x0000000000000000, 0xba80000047800ffc, v0);
9593     // Fmlal2(2S)
9594     // v1.S[0] = (-1.5 * 2^-49) + (2^-24 * 0.9995117) = 5.958e-8 (rounded)
9595     // v1.S[1] = 0 + (-2^-24 * 0.9995117) = -5.958e-8
9596     ASSERT_EQUAL_128(0x0000000000000000, 0xb37fe000337fdfff, v1);
9597     // Fmlal(4S)
9598     // v2.S[0] = 0 + (65504 * 1.000977) = 65566.96875
9599     // v2.S[1] = 2^14 + (-1 * 1.000977) = -16385 (rounded from -16385.5)
9600     // v2.S[2] = (1.5 * 2^-48) + (2^-24 * 1.000977) = 5.966e-8 (rounded up)
9601     // v2.S[3] = Inf + (-2^-24 * 1.000977) = Inf
9602     ASSERT_EQUAL_128(0x7f80000033802001, 0xc680020047800ffc, v2);
9603     // Fmlal2(4S)
9604     // v3.S[0] = 0 + (6.103516e-5 * 0.9995117) = 6.100535e-5
9605     // v3.S[1] = -0 + (0 * 0.9995117) = 0
9606     // v3.S[2] = -0 + (-0 * 0.9995117) = -0
9607     // v3.S[3] = 0 + (-Inf * 0.9995117) = -Inf
9608     ASSERT_EQUAL_128(0xff80000080000000, 0x00000000387fe000, v3);
9609 
9610     // Fmlsl results are mostly the same, but negated.
9611     ASSERT_EQUAL_128(0x0000000000000000, 0x3a800000c7800ffc, v4);
9612     ASSERT_EQUAL_128(0x0000000000000000, 0x337fe000b37fdfff, v5);
9613     ASSERT_EQUAL_128(0xff800000b3802001, 0x46800200c7800ffc, v6);
9614     // In this case: v7.S[2] = 0 - (-0 * 0.9995117) = 0
9615     ASSERT_EQUAL_128(0x7f80000000000000, 0x00000000b87fe000, v7);
9616   }
9617 }
9618 
9619 
TEST(neon_fmulx_scalar)9620 TEST(neon_fmulx_scalar) {
9621   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9622 
9623   START();
9624   __ Fmov(s0, 2.0);
9625   __ Fmov(s1, 0.5);
9626   __ Fmov(s2, 0.0);
9627   __ Fmov(s3, -0.0);
9628   __ Fmov(s4, kFP32PositiveInfinity);
9629   __ Fmov(s5, kFP32NegativeInfinity);
9630   __ Fmulx(s16, s0, s1);
9631   __ Fmulx(s17, s2, s4);
9632   __ Fmulx(s18, s2, s5);
9633   __ Fmulx(s19, s3, s4);
9634   __ Fmulx(s20, s3, s5);
9635 
9636   __ Fmov(d21, 2.0);
9637   __ Fmov(d22, 0.5);
9638   __ Fmov(d23, 0.0);
9639   __ Fmov(d24, -0.0);
9640   __ Fmov(d25, kFP64PositiveInfinity);
9641   __ Fmov(d26, kFP64NegativeInfinity);
9642   __ Fmulx(d27, d21, d22);
9643   __ Fmulx(d28, d23, d25);
9644   __ Fmulx(d29, d23, d26);
9645   __ Fmulx(d30, d24, d25);
9646   __ Fmulx(d31, d24, d26);
9647   END();
9648 
9649   if (CAN_RUN()) {
9650     RUN();
9651 
9652     ASSERT_EQUAL_FP32(1.0, s16);
9653     ASSERT_EQUAL_FP32(2.0, s17);
9654     ASSERT_EQUAL_FP32(-2.0, s18);
9655     ASSERT_EQUAL_FP32(-2.0, s19);
9656     ASSERT_EQUAL_FP32(2.0, s20);
9657     ASSERT_EQUAL_FP64(1.0, d27);
9658     ASSERT_EQUAL_FP64(2.0, d28);
9659     ASSERT_EQUAL_FP64(-2.0, d29);
9660     ASSERT_EQUAL_FP64(-2.0, d30);
9661     ASSERT_EQUAL_FP64(2.0, d31);
9662   }
9663 }
9664 
9665 
TEST(neon_fmulx_h)9666 TEST(neon_fmulx_h) {
9667   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9668                       CPUFeatures::kFP,
9669                       CPUFeatures::kNEONHalf);
9670 
9671   START();
9672   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9673   __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
9674   __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
9675   __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
9676   __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9677   __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9678   __ Fmulx(v6.V8H(), v0.V8H(), v1.V8H());
9679   __ Fmulx(v7.V8H(), v2.V8H(), v4.V8H());
9680   __ Fmulx(v8.V8H(), v2.V8H(), v5.V8H());
9681   __ Fmulx(v9.V8H(), v3.V8H(), v4.V8H());
9682   __ Fmulx(v10.V8H(), v3.V8H(), v5.V8H());
9683   __ Fmulx(v11.V4H(), v0.V4H(), v1.V4H());
9684   __ Fmulx(v12.V4H(), v2.V4H(), v4.V4H());
9685   __ Fmulx(v13.V4H(), v2.V4H(), v5.V4H());
9686   __ Fmulx(v14.V4H(), v3.V4H(), v4.V4H());
9687   __ Fmulx(v15.V4H(), v3.V4H(), v5.V4H());
9688   END();
9689 
9690   if (CAN_RUN()) {
9691     RUN();
9692     ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
9693     ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v7);
9694     ASSERT_EQUAL_128(0xc000c000c000c000, 0xc000c000c000c000, v8);
9695     ASSERT_EQUAL_128(0xc000c000c000c000, 0xc000c000c000c000, v9);
9696     ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v10);
9697     ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v11);
9698     ASSERT_EQUAL_128(0, 0x4000400040004000, v12);
9699     ASSERT_EQUAL_128(0, 0xc000c000c000c000, v13);
9700     ASSERT_EQUAL_128(0, 0xc000c000c000c000, v14);
9701     ASSERT_EQUAL_128(0, 0x4000400040004000, v15);
9702   }
9703 }
9704 
9705 
TEST(neon_fmulx_h_scalar)9706 TEST(neon_fmulx_h_scalar) {
9707   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9708                       CPUFeatures::kFP,
9709                       CPUFeatures::kNEONHalf,
9710                       CPUFeatures::kFPHalf);
9711 
9712   START();
9713   __ Fmov(h0, Float16(2.0));
9714   __ Fmov(h1, Float16(0.5));
9715   __ Fmov(h2, Float16(0.0));
9716   __ Fmov(h3, Float16(-0.0));
9717   __ Fmov(h4, kFP16PositiveInfinity);
9718   __ Fmov(h5, kFP16NegativeInfinity);
9719   __ Fmulx(h6, h0, h1);
9720   __ Fmulx(h7, h2, h4);
9721   __ Fmulx(h8, h2, h5);
9722   __ Fmulx(h9, h3, h4);
9723   __ Fmulx(h10, h3, h5);
9724   END();
9725 
9726   if (CAN_RUN()) {
9727     RUN();
9728     ASSERT_EQUAL_FP16(Float16(1.0), h6);
9729     ASSERT_EQUAL_FP16(Float16(2.0), h7);
9730     ASSERT_EQUAL_FP16(Float16(-2.0), h8);
9731     ASSERT_EQUAL_FP16(Float16(-2.0), h9);
9732     ASSERT_EQUAL_FP16(Float16(2.0), h10);
9733   }
9734 }
9735 
TEST(neon_fabd_h)9736 TEST(neon_fabd_h) {
9737   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9738                       CPUFeatures::kFP,
9739                       CPUFeatures::kNEONHalf);
9740 
9741   START();
9742   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9743   __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
9744   __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
9745   __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
9746   __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9747   __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9748 
9749   __ Fabd(v6.V8H(), v1.V8H(), v0.V8H());
9750   __ Fabd(v7.V8H(), v2.V8H(), v3.V8H());
9751   __ Fabd(v8.V8H(), v2.V8H(), v5.V8H());
9752   __ Fabd(v9.V8H(), v3.V8H(), v4.V8H());
9753   __ Fabd(v10.V8H(), v3.V8H(), v5.V8H());
9754   __ Fabd(v11.V4H(), v1.V4H(), v0.V4H());
9755   __ Fabd(v12.V4H(), v2.V4H(), v3.V4H());
9756   __ Fabd(v13.V4H(), v2.V4H(), v5.V4H());
9757   __ Fabd(v14.V4H(), v3.V4H(), v4.V4H());
9758   __ Fabd(v15.V4H(), v3.V4H(), v5.V4H());
9759   END();
9760 
9761   if (CAN_RUN()) {
9762     RUN();
9763 
9764     ASSERT_EQUAL_128(0x3e003e003e003e00, 0x3e003e003e003e00, v6);
9765     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v7);
9766     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9767     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v9);
9768     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v10);
9769     ASSERT_EQUAL_128(0, 0x3e003e003e003e00, v11);
9770     ASSERT_EQUAL_128(0, 0x0000000000000000, v12);
9771     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v13);
9772     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v14);
9773     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v15);
9774   }
9775 }
9776 
9777 
TEST(neon_fabd_h_scalar)9778 TEST(neon_fabd_h_scalar) {
9779   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9780                       CPUFeatures::kFP,
9781                       CPUFeatures::kNEONHalf,
9782                       CPUFeatures::kFPHalf);
9783 
9784   START();
9785   __ Fmov(h0, Float16(2.0));
9786   __ Fmov(h1, Float16(0.5));
9787   __ Fmov(h2, Float16(0.0));
9788   __ Fmov(h3, Float16(-0.0));
9789   __ Fmov(h4, kFP16PositiveInfinity);
9790   __ Fmov(h5, kFP16NegativeInfinity);
9791   __ Fabd(h16, h1, h0);
9792   __ Fabd(h17, h2, h3);
9793   __ Fabd(h18, h2, h5);
9794   __ Fabd(h19, h3, h4);
9795   __ Fabd(h20, h3, h5);
9796   END();
9797 
9798   if (CAN_RUN()) {
9799     RUN();
9800     ASSERT_EQUAL_FP16(Float16(1.5), h16);
9801     ASSERT_EQUAL_FP16(Float16(0.0), h17);
9802     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h18);
9803     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h19);
9804     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h20);
9805   }
9806 }
9807 
9808 
TEST(neon_fabd_scalar)9809 TEST(neon_fabd_scalar) {
9810   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9811 
9812   START();
9813   __ Fmov(s0, 2.0);
9814   __ Fmov(s1, 0.5);
9815   __ Fmov(s2, 0.0);
9816   __ Fmov(s3, -0.0);
9817   __ Fmov(s4, kFP32PositiveInfinity);
9818   __ Fmov(s5, kFP32NegativeInfinity);
9819   __ Fabd(s16, s1, s0);
9820   __ Fabd(s17, s2, s3);
9821   __ Fabd(s18, s2, s5);
9822   __ Fabd(s19, s3, s4);
9823   __ Fabd(s20, s3, s5);
9824 
9825   __ Fmov(d21, 2.0);
9826   __ Fmov(d22, 0.5);
9827   __ Fmov(d23, 0.0);
9828   __ Fmov(d24, -0.0);
9829   __ Fmov(d25, kFP64PositiveInfinity);
9830   __ Fmov(d26, kFP64NegativeInfinity);
9831   __ Fabd(d27, d21, d22);
9832   __ Fabd(d28, d23, d24);
9833   __ Fabd(d29, d23, d26);
9834   __ Fabd(d30, d24, d25);
9835   __ Fabd(d31, d24, d26);
9836   END();
9837 
9838   if (CAN_RUN()) {
9839     RUN();
9840 
9841     ASSERT_EQUAL_FP32(1.5, s16);
9842     ASSERT_EQUAL_FP32(0.0, s17);
9843     ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s18);
9844     ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s19);
9845     ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s20);
9846     ASSERT_EQUAL_FP64(1.5, d27);
9847     ASSERT_EQUAL_FP64(0.0, d28);
9848     ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d29);
9849     ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d30);
9850     ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d31);
9851   }
9852 }
9853 
9854 
TEST(neon_frecps_h)9855 TEST(neon_frecps_h) {
9856   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9857                       CPUFeatures::kFP,
9858                       CPUFeatures::kNEONHalf);
9859 
9860   START();
9861   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9862   __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
9863   __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9864   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9865   __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9866 
9867   __ Frecps(v5.V8H(), v0.V8H(), v2.V8H());
9868   __ Frecps(v6.V8H(), v1.V8H(), v2.V8H());
9869   __ Frecps(v7.V8H(), v0.V8H(), v3.V8H());
9870   __ Frecps(v8.V8H(), v0.V8H(), v4.V8H());
9871   __ Frecps(v9.V4H(), v0.V4H(), v2.V4H());
9872   __ Frecps(v10.V4H(), v1.V4H(), v2.V4H());
9873   __ Frecps(v11.V4H(), v0.V4H(), v3.V4H());
9874   __ Frecps(v12.V4H(), v0.V4H(), v4.V4H());
9875   END();
9876 
9877   if (CAN_RUN()) {
9878     RUN();
9879 
9880     ASSERT_EQUAL_128(0xd580d580d580d580, 0xd580d580d580d580, v5);
9881     ASSERT_EQUAL_128(0x51e051e051e051e0, 0x51e051e051e051e0, v6);
9882     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
9883     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9884     ASSERT_EQUAL_128(0, 0xd580d580d580d580, v9);
9885     ASSERT_EQUAL_128(0, 0x51e051e051e051e0, v10);
9886     ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
9887     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v12);
9888   }
9889 }
9890 
9891 
TEST(neon_frecps_h_scalar)9892 TEST(neon_frecps_h_scalar) {
9893   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9894                       CPUFeatures::kFP,
9895                       CPUFeatures::kNEONHalf,
9896                       CPUFeatures::kFPHalf);
9897 
9898   START();
9899   __ Fmov(h0, Float16(2.0));
9900   __ Fmov(h1, Float16(-1.0));
9901   __ Fmov(h2, Float16(45.0));
9902   __ Fmov(h3, kFP16PositiveInfinity);
9903   __ Fmov(h4, kFP16NegativeInfinity);
9904 
9905   __ Frecps(h5, h0, h2);
9906   __ Frecps(h6, h1, h2);
9907   __ Frecps(h7, h0, h3);
9908   __ Frecps(h8, h0, h4);
9909   END();
9910 
9911   if (CAN_RUN()) {
9912     RUN();
9913 
9914     ASSERT_EQUAL_FP16(Float16(-88.0), h5);
9915     ASSERT_EQUAL_FP16(Float16(47.0), h6);
9916     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h7);
9917     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
9918   }
9919 }
9920 
9921 
TEST(neon_frsqrts_h)9922 TEST(neon_frsqrts_h) {
9923   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9924                       CPUFeatures::kFP,
9925                       CPUFeatures::kNEONHalf);
9926 
9927   START();
9928   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9929   __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
9930   __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9931   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9932   __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9933 
9934   __ Frsqrts(v5.V8H(), v0.V8H(), v2.V8H());
9935   __ Frsqrts(v6.V8H(), v1.V8H(), v2.V8H());
9936   __ Frsqrts(v7.V8H(), v0.V8H(), v3.V8H());
9937   __ Frsqrts(v8.V8H(), v0.V8H(), v4.V8H());
9938   __ Frsqrts(v9.V4H(), v0.V4H(), v2.V4H());
9939   __ Frsqrts(v10.V4H(), v1.V4H(), v2.V4H());
9940   __ Frsqrts(v11.V4H(), v0.V4H(), v3.V4H());
9941   __ Frsqrts(v12.V4H(), v0.V4H(), v4.V4H());
9942   END();
9943 
9944   if (CAN_RUN()) {
9945     RUN();
9946 
9947     ASSERT_EQUAL_128(0xd170d170d170d170, 0xd170d170d170d170, v5);
9948     ASSERT_EQUAL_128(0x4e004e004e004e00, 0x4e004e004e004e00, v6);
9949     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
9950     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9951     ASSERT_EQUAL_128(0, 0xd170d170d170d170, v9);
9952     ASSERT_EQUAL_128(0, 0x4e004e004e004e00, v10);
9953     ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
9954     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v12);
9955   }
9956 }
9957 
9958 
TEST(neon_frsqrts_h_scalar)9959 TEST(neon_frsqrts_h_scalar) {
9960   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9961                       CPUFeatures::kFP,
9962                       CPUFeatures::kNEONHalf,
9963                       CPUFeatures::kFPHalf);
9964 
9965   START();
9966   __ Fmov(h0, Float16(2.0));
9967   __ Fmov(h1, Float16(-1.0));
9968   __ Fmov(h2, Float16(45.0));
9969   __ Fmov(h3, kFP16PositiveInfinity);
9970   __ Fmov(h4, kFP16NegativeInfinity);
9971 
9972   __ Frsqrts(h5, h0, h2);
9973   __ Frsqrts(h6, h1, h2);
9974   __ Frsqrts(h7, h0, h3);
9975   __ Frsqrts(h8, h0, h4);
9976   END();
9977 
9978   if (CAN_RUN()) {
9979     RUN();
9980 
9981     ASSERT_EQUAL_FP16(Float16(-43.5), h5);
9982     ASSERT_EQUAL_FP16(Float16(24.0), h6);
9983     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h7);
9984     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
9985   }
9986 }
9987 
9988 
TEST(neon_faddp_h)9989 TEST(neon_faddp_h) {
9990   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9991                       CPUFeatures::kFP,
9992                       CPUFeatures::kNEONHalf);
9993 
9994   START();
9995   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
9996   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
9997   __ Movi(v2.V2D(), 0x0000800000008000, 0x0000800000008000);
9998   __ Movi(v3.V2D(), 0x7e007c017e007c01, 0x7e007c017e007c01);
9999 
10000   __ Faddp(v4.V8H(), v1.V8H(), v0.V8H());
10001   __ Faddp(v5.V8H(), v3.V8H(), v2.V8H());
10002   __ Faddp(v6.V4H(), v1.V4H(), v0.V4H());
10003   __ Faddp(v7.V4H(), v3.V4H(), v2.V4H());
10004   END();
10005 
10006   if (CAN_RUN()) {
10007     RUN();
10008 
10009     ASSERT_EQUAL_128(0x4200420042004200, 0x7e007e007e007e00, v4);
10010     ASSERT_EQUAL_128(0x0000000000000000, 0x7e017e017e017e01, v5);
10011     ASSERT_EQUAL_128(0, 0x420042007e007e00, v6);
10012     ASSERT_EQUAL_128(0, 0x000000007e017e01, v7);
10013   }
10014 }
10015 
10016 
TEST(neon_faddp_scalar)10017 TEST(neon_faddp_scalar) {
10018   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10019 
10020   START();
10021   __ Movi(d0, 0x3f80000040000000);
10022   __ Movi(d1, 0xff8000007f800000);
10023   __ Movi(d2, 0x0000000080000000);
10024   __ Faddp(s0, v0.V2S());
10025   __ Faddp(s1, v1.V2S());
10026   __ Faddp(s2, v2.V2S());
10027 
10028   __ Movi(v3.V2D(), 0xc000000000000000, 0x4000000000000000);
10029   __ Movi(v4.V2D(), 0xfff8000000000000, 0x7ff8000000000000);
10030   __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
10031   __ Faddp(d3, v3.V2D());
10032   __ Faddp(d4, v4.V2D());
10033   __ Faddp(d5, v5.V2D());
10034   END();
10035 
10036   if (CAN_RUN()) {
10037     RUN();
10038 
10039     ASSERT_EQUAL_FP32(3.0, s0);
10040     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s1);
10041     ASSERT_EQUAL_FP32(0.0, s2);
10042     ASSERT_EQUAL_FP64(0.0, d3);
10043     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d4);
10044     ASSERT_EQUAL_FP64(0.0, d5);
10045   }
10046 }
10047 
10048 
TEST(neon_faddp_h_scalar)10049 TEST(neon_faddp_h_scalar) {
10050   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10051                       CPUFeatures::kFP,
10052                       CPUFeatures::kNEONHalf);
10053 
10054   START();
10055   __ Movi(s0, 0x3c004000);
10056   __ Movi(s1, 0xfc007c00);
10057   __ Movi(s2, 0x00008000);
10058   __ Faddp(h0, v0.V2H());
10059   __ Faddp(h1, v1.V2H());
10060   __ Faddp(h2, v2.V2H());
10061   END();
10062 
10063   if (CAN_RUN()) {
10064     RUN();
10065 
10066     ASSERT_EQUAL_FP16(Float16(3.0), h0);
10067     ASSERT_EQUAL_FP16(kFP16DefaultNaN, h1);
10068     ASSERT_EQUAL_FP16(Float16(0.0), h2);
10069   }
10070 }
10071 
10072 
TEST(neon_fmaxp_scalar)10073 TEST(neon_fmaxp_scalar) {
10074   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10075 
10076   START();
10077   __ Movi(d0, 0x3f80000040000000);
10078   __ Movi(d1, 0xff8000007f800000);
10079   __ Movi(d2, 0x7fc00000ff800000);
10080   __ Fmaxp(s0, v0.V2S());
10081   __ Fmaxp(s1, v1.V2S());
10082   __ Fmaxp(s2, v2.V2S());
10083 
10084   __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10085   __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10086   __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
10087   __ Fmaxp(d3, v3.V2D());
10088   __ Fmaxp(d4, v4.V2D());
10089   __ Fmaxp(d5, v5.V2D());
10090   END();
10091 
10092   if (CAN_RUN()) {
10093     RUN();
10094 
10095     ASSERT_EQUAL_FP32(2.0, s0);
10096     ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
10097     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
10098     ASSERT_EQUAL_FP64(2.0, d3);
10099     ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
10100     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
10101   }
10102 }
10103 
10104 
TEST(neon_fmaxp_h_scalar)10105 TEST(neon_fmaxp_h_scalar) {
10106   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10107                       CPUFeatures::kFP,
10108                       CPUFeatures::kNEONHalf);
10109 
10110   START();
10111   __ Movi(s0, 0x3c004000);
10112   __ Movi(s1, 0xfc007c00);
10113   __ Movi(s2, 0x7e00fc00);
10114   __ Fmaxp(h0, v0.V2H());
10115   __ Fmaxp(h1, v1.V2H());
10116   __ Fmaxp(h2, v2.V2H());
10117   END();
10118 
10119   if (CAN_RUN()) {
10120     RUN();
10121 
10122     ASSERT_EQUAL_FP16(Float16(2.0), h0);
10123     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h1);
10124     ASSERT_EQUAL_FP16(kFP16DefaultNaN, h2);
10125   }
10126 }
10127 
10128 
TEST(neon_fmax_h)10129 TEST(neon_fmax_h) {
10130   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10131                       CPUFeatures::kFP,
10132                       CPUFeatures::kNEONHalf);
10133 
10134   START();
10135   __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10136   __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10137   __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10138   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10139   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10140   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10141 
10142   __ Fmax(v6.V8H(), v0.V8H(), v1.V8H());
10143   __ Fmax(v7.V8H(), v2.V8H(), v3.V8H());
10144   __ Fmax(v8.V8H(), v4.V8H(), v0.V8H());
10145   __ Fmax(v9.V8H(), v5.V8H(), v1.V8H());
10146   __ Fmax(v10.V4H(), v0.V4H(), v1.V4H());
10147   __ Fmax(v11.V4H(), v2.V4H(), v3.V4H());
10148   __ Fmax(v12.V4H(), v4.V4H(), v0.V4H());
10149   __ Fmax(v13.V4H(), v5.V4H(), v1.V4H());
10150   END();
10151 
10152   if (CAN_RUN()) {
10153     RUN();
10154 
10155     ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v6);
10156     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v7);
10157     ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v8);
10158     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10159     ASSERT_EQUAL_128(0, 0x4000400040004000, v10);
10160     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v11);
10161     ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v12);
10162     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10163   }
10164 }
10165 
10166 
TEST(neon_fmaxp_h)10167 TEST(neon_fmaxp_h) {
10168   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10169                       CPUFeatures::kFP,
10170                       CPUFeatures::kNEONHalf);
10171 
10172   START();
10173   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10174   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10175   __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10176   __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10177 
10178   __ Fmaxp(v6.V8H(), v0.V8H(), v1.V8H());
10179   __ Fmaxp(v7.V8H(), v2.V8H(), v3.V8H());
10180   __ Fmaxp(v8.V4H(), v0.V4H(), v1.V4H());
10181   __ Fmaxp(v9.V4H(), v2.V4H(), v3.V4H());
10182   END();
10183 
10184   if (CAN_RUN()) {
10185     RUN();
10186 
10187     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x4000400040004000, v6);
10188     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e007e007e007e00, v7);
10189     ASSERT_EQUAL_128(0, 0x7c007c0040004000, v8);
10190     ASSERT_EQUAL_128(0, 0x7e017e017e007e00, v9);
10191   }
10192 }
10193 
10194 
TEST(neon_fmaxnm_h)10195 TEST(neon_fmaxnm_h) {
10196   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10197                       CPUFeatures::kFP,
10198                       CPUFeatures::kNEONHalf);
10199 
10200   START();
10201   __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10202   __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10203   __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10204   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10205   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10206   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10207 
10208   __ Fmaxnm(v6.V8H(), v0.V8H(), v1.V8H());
10209   __ Fmaxnm(v7.V8H(), v2.V8H(), v3.V8H());
10210   __ Fmaxnm(v8.V8H(), v4.V8H(), v0.V8H());
10211   __ Fmaxnm(v9.V8H(), v5.V8H(), v1.V8H());
10212   __ Fmaxnm(v10.V4H(), v0.V4H(), v1.V4H());
10213   __ Fmaxnm(v11.V4H(), v2.V4H(), v3.V4H());
10214   __ Fmaxnm(v12.V4H(), v4.V4H(), v0.V4H());
10215   __ Fmaxnm(v13.V4H(), v5.V4H(), v1.V4H());
10216   END();
10217 
10218   if (CAN_RUN()) {
10219     RUN();
10220 
10221     ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v6);
10222     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v7);
10223     ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v8);
10224     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10225     ASSERT_EQUAL_128(0, 0x4000400040004000, v10);
10226     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v11);
10227     ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v12);
10228     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10229   }
10230 }
10231 
10232 
TEST(neon_fmaxnmp_h)10233 TEST(neon_fmaxnmp_h) {
10234   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10235                       CPUFeatures::kFP,
10236                       CPUFeatures::kNEONHalf);
10237 
10238   START();
10239   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10240   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10241   __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10242   __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10243 
10244   __ Fmaxnmp(v6.V8H(), v0.V8H(), v1.V8H());
10245   __ Fmaxnmp(v7.V8H(), v2.V8H(), v3.V8H());
10246   __ Fmaxnmp(v8.V4H(), v0.V4H(), v1.V4H());
10247   __ Fmaxnmp(v9.V4H(), v2.V4H(), v3.V4H());
10248   END();
10249 
10250   if (CAN_RUN()) {
10251     RUN();
10252 
10253     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x4000400040004000, v6);
10254     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x3c003c003c003c00, v7);
10255     ASSERT_EQUAL_128(0, 0x7c007c0040004000, v8);
10256     ASSERT_EQUAL_128(0, 0x7e017e013c003c00, v9);
10257   }
10258 }
10259 
10260 
TEST(neon_fmaxnmp_scalar)10261 TEST(neon_fmaxnmp_scalar) {
10262   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10263 
10264   START();
10265   __ Movi(d0, 0x3f80000040000000);
10266   __ Movi(d1, 0xff8000007f800000);
10267   __ Movi(d2, 0x7fc00000ff800000);
10268   __ Fmaxnmp(s0, v0.V2S());
10269   __ Fmaxnmp(s1, v1.V2S());
10270   __ Fmaxnmp(s2, v2.V2S());
10271 
10272   __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10273   __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10274   __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
10275   __ Fmaxnmp(d3, v3.V2D());
10276   __ Fmaxnmp(d4, v4.V2D());
10277   __ Fmaxnmp(d5, v5.V2D());
10278   END();
10279 
10280   if (CAN_RUN()) {
10281     RUN();
10282 
10283     ASSERT_EQUAL_FP32(2.0, s0);
10284     ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
10285     ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
10286     ASSERT_EQUAL_FP64(2.0, d3);
10287     ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
10288     ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
10289   }
10290 }
10291 
10292 
TEST(neon_fmaxnmp_h_scalar)10293 TEST(neon_fmaxnmp_h_scalar) {
10294   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10295                       CPUFeatures::kFP,
10296                       CPUFeatures::kNEONHalf);
10297 
10298   START();
10299   __ Movi(s0, 0x3c004000);
10300   __ Movi(s1, 0xfc007c00);
10301   __ Movi(s2, 0x7e00fc00);
10302   __ Fmaxnmp(h0, v0.V2H());
10303   __ Fmaxnmp(h1, v1.V2H());
10304   __ Fmaxnmp(h2, v2.V2H());
10305   END();
10306 
10307   if (CAN_RUN()) {
10308     RUN();
10309 
10310     ASSERT_EQUAL_FP16(Float16(2.0), h0);
10311     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h1);
10312     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h2);
10313   }
10314 }
10315 
10316 
TEST(neon_fminp_scalar)10317 TEST(neon_fminp_scalar) {
10318   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10319 
10320   START();
10321   __ Movi(d0, 0x3f80000040000000);
10322   __ Movi(d1, 0xff8000007f800000);
10323   __ Movi(d2, 0x7fc00000ff800000);
10324   __ Fminp(s0, v0.V2S());
10325   __ Fminp(s1, v1.V2S());
10326   __ Fminp(s2, v2.V2S());
10327 
10328   __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10329   __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10330   __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
10331   __ Fminp(d3, v3.V2D());
10332   __ Fminp(d4, v4.V2D());
10333   __ Fminp(d5, v5.V2D());
10334   END();
10335 
10336   if (CAN_RUN()) {
10337     RUN();
10338 
10339     ASSERT_EQUAL_FP32(1.0, s0);
10340     ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
10341     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
10342     ASSERT_EQUAL_FP64(1.0, d3);
10343     ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
10344     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
10345   }
10346 }
10347 
10348 
TEST(neon_fminp_h_scalar)10349 TEST(neon_fminp_h_scalar) {
10350   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10351                       CPUFeatures::kFP,
10352                       CPUFeatures::kNEONHalf);
10353 
10354   START();
10355   __ Movi(s0, 0x3c004000);
10356   __ Movi(s1, 0xfc007c00);
10357   __ Movi(s2, 0x7e00fc00);
10358   __ Fminp(h0, v0.V2H());
10359   __ Fminp(h1, v1.V2H());
10360   __ Fminp(h2, v2.V2H());
10361   END();
10362 
10363   if (CAN_RUN()) {
10364     RUN();
10365 
10366     ASSERT_EQUAL_FP16(Float16(1.0), h0);
10367     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h1);
10368     ASSERT_EQUAL_FP16(kFP16DefaultNaN, h2);
10369   }
10370 }
10371 
10372 
TEST(neon_fmin_h)10373 TEST(neon_fmin_h) {
10374   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10375                       CPUFeatures::kFP,
10376                       CPUFeatures::kNEONHalf);
10377 
10378   START();
10379   __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10380   __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10381   __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10382   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10383   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10384   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10385 
10386   __ Fmin(v6.V8H(), v0.V8H(), v1.V8H());
10387   __ Fmin(v7.V8H(), v2.V8H(), v3.V8H());
10388   __ Fmin(v8.V8H(), v4.V8H(), v0.V8H());
10389   __ Fmin(v9.V8H(), v5.V8H(), v1.V8H());
10390   __ Fmin(v10.V4H(), v0.V4H(), v1.V4H());
10391   __ Fmin(v11.V4H(), v2.V4H(), v3.V4H());
10392   __ Fmin(v12.V4H(), v4.V4H(), v0.V4H());
10393   __ Fmin(v13.V4H(), v5.V4H(), v1.V4H());
10394   END();
10395 
10396   if (CAN_RUN()) {
10397     RUN();
10398 
10399     ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
10400     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
10401     ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v8);
10402     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10403     ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v10);
10404     ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
10405     ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v12);
10406     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10407   }
10408 }
10409 
10410 
TEST(neon_fminp_h)10411 TEST(neon_fminp_h) {
10412   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10413                       CPUFeatures::kFP,
10414                       CPUFeatures::kNEONHalf);
10415 
10416   START();
10417   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10418   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10419   __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10420   __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10421 
10422   __ Fminp(v6.V8H(), v0.V8H(), v1.V8H());
10423   __ Fminp(v7.V8H(), v2.V8H(), v3.V8H());
10424   __ Fminp(v8.V4H(), v0.V4H(), v1.V4H());
10425   __ Fminp(v9.V4H(), v2.V4H(), v3.V4H());
10426   END();
10427 
10428   if (CAN_RUN()) {
10429     RUN();
10430 
10431     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0x3c003c003c003c00, v6);
10432     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e007e007e007e00, v7);
10433     ASSERT_EQUAL_128(0, 0xfc00fc003c003c00, v8);
10434     ASSERT_EQUAL_128(0, 0x7e017e017e007e00, v9);
10435   }
10436 }
10437 
10438 
TEST(neon_fminnm_h)10439 TEST(neon_fminnm_h) {
10440   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10441                       CPUFeatures::kFP,
10442                       CPUFeatures::kNEONHalf);
10443 
10444   START();
10445   __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10446   __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10447   __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10448   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10449   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10450   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10451 
10452   __ Fminnm(v6.V8H(), v0.V8H(), v1.V8H());
10453   __ Fminnm(v7.V8H(), v2.V8H(), v3.V8H());
10454   __ Fminnm(v8.V8H(), v4.V8H(), v0.V8H());
10455   __ Fminnm(v9.V8H(), v5.V8H(), v1.V8H());
10456   __ Fminnm(v10.V4H(), v0.V4H(), v1.V4H());
10457   __ Fminnm(v11.V4H(), v2.V4H(), v3.V4H());
10458   __ Fminnm(v12.V4H(), v4.V4H(), v0.V4H());
10459   __ Fminnm(v13.V4H(), v5.V4H(), v1.V4H());
10460   END();
10461 
10462   if (CAN_RUN()) {
10463     RUN();
10464 
10465     ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
10466     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
10467     ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v8);
10468     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10469     ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v10);
10470     ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
10471     ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v12);
10472     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10473   }
10474 }
10475 
10476 
TEST(neon_fminnmp_h)10477 TEST(neon_fminnmp_h) {
10478   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10479                       CPUFeatures::kFP,
10480                       CPUFeatures::kNEONHalf);
10481 
10482   START();
10483   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10484   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10485   __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10486   __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10487 
10488   __ Fminnmp(v6.V8H(), v0.V8H(), v1.V8H());
10489   __ Fminnmp(v7.V8H(), v2.V8H(), v3.V8H());
10490   __ Fminnmp(v8.V4H(), v0.V4H(), v1.V4H());
10491   __ Fminnmp(v9.V4H(), v2.V4H(), v3.V4H());
10492   END();
10493 
10494   if (CAN_RUN()) {
10495     RUN();
10496 
10497     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0x3c003c003c003c00, v6);
10498     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x3c003c003c003c00, v7);
10499     ASSERT_EQUAL_128(0, 0xfc00fc003c003c00, v8);
10500     ASSERT_EQUAL_128(0, 0x7e017e013c003c00, v9);
10501   }
10502 }
10503 
10504 
TEST(neon_fminnmp_scalar)10505 TEST(neon_fminnmp_scalar) {
10506   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10507 
10508   START();
10509   __ Movi(d0, 0x3f80000040000000);
10510   __ Movi(d1, 0xff8000007f800000);
10511   __ Movi(d2, 0x7fc00000ff800000);
10512   __ Fminnmp(s0, v0.V2S());
10513   __ Fminnmp(s1, v1.V2S());
10514   __ Fminnmp(s2, v2.V2S());
10515 
10516   __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10517   __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10518   __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
10519   __ Fminnmp(d3, v3.V2D());
10520   __ Fminnmp(d4, v4.V2D());
10521   __ Fminnmp(d5, v5.V2D());
10522   END();
10523 
10524   if (CAN_RUN()) {
10525     RUN();
10526 
10527     ASSERT_EQUAL_FP32(1.0, s0);
10528     ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
10529     ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
10530     ASSERT_EQUAL_FP64(1.0, d3);
10531     ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
10532     ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
10533   }
10534 }
10535 
10536 
TEST(neon_fminnmp_h_scalar)10537 TEST(neon_fminnmp_h_scalar) {
10538   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10539                       CPUFeatures::kFP,
10540                       CPUFeatures::kNEONHalf);
10541 
10542   START();
10543   __ Movi(s0, 0x3c004000);
10544   __ Movi(s1, 0xfc007c00);
10545   __ Movi(s2, 0x7e00fc00);
10546   __ Fminnmp(h0, v0.V2H());
10547   __ Fminnmp(h1, v1.V2H());
10548   __ Fminnmp(h2, v2.V2H());
10549   END();
10550 
10551   if (CAN_RUN()) {
10552     RUN();
10553 
10554     ASSERT_EQUAL_FP16(Float16(1.0), h0);
10555     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h1);
10556     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h2);
10557   }
10558 }
10559 
Float16ToV4H(Float16 f)10560 static uint64_t Float16ToV4H(Float16 f) {
10561   uint64_t bits = static_cast<uint64_t>(Float16ToRawbits(f));
10562   return (bits << 48) | (bits << 32) | (bits << 16) | bits;
10563 }
10564 
10565 
FminFmaxFloat16Helper(Float16 n, Float16 m, Float16 min, Float16 max, Float16 minnm, Float16 maxnm)10566 static void FminFmaxFloat16Helper(Float16 n,
10567                                   Float16 m,
10568                                   Float16 min,
10569                                   Float16 max,
10570                                   Float16 minnm,
10571                                   Float16 maxnm) {
10572   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10573                       CPUFeatures::kFP,
10574                       CPUFeatures::kNEONHalf,
10575                       CPUFeatures::kFPHalf);
10576 
10577   START();
10578   __ Fmov(h0, n);
10579   __ Fmov(h1, m);
10580   __ Fmov(v0.V8H(), n);
10581   __ Fmov(v1.V8H(), m);
10582   __ Fmin(h28, h0, h1);
10583   __ Fmin(v2.V4H(), v0.V4H(), v1.V4H());
10584   __ Fmin(v3.V8H(), v0.V8H(), v1.V8H());
10585   __ Fmax(h29, h0, h1);
10586   __ Fmax(v4.V4H(), v0.V4H(), v1.V4H());
10587   __ Fmax(v5.V8H(), v0.V8H(), v1.V8H());
10588   __ Fminnm(h30, h0, h1);
10589   __ Fminnm(v6.V4H(), v0.V4H(), v1.V4H());
10590   __ Fminnm(v7.V8H(), v0.V8H(), v1.V8H());
10591   __ Fmaxnm(h31, h0, h1);
10592   __ Fmaxnm(v8.V4H(), v0.V4H(), v1.V4H());
10593   __ Fmaxnm(v9.V8H(), v0.V8H(), v1.V8H());
10594   END();
10595 
10596   uint64_t min_vec = Float16ToV4H(min);
10597   uint64_t max_vec = Float16ToV4H(max);
10598   uint64_t minnm_vec = Float16ToV4H(minnm);
10599   uint64_t maxnm_vec = Float16ToV4H(maxnm);
10600 
10601   if (CAN_RUN()) {
10602     RUN();
10603 
10604     ASSERT_EQUAL_FP16(min, h28);
10605     ASSERT_EQUAL_FP16(max, h29);
10606     ASSERT_EQUAL_FP16(minnm, h30);
10607     ASSERT_EQUAL_FP16(maxnm, h31);
10608 
10609 
10610     ASSERT_EQUAL_128(0, min_vec, v2);
10611     ASSERT_EQUAL_128(min_vec, min_vec, v3);
10612     ASSERT_EQUAL_128(0, max_vec, v4);
10613     ASSERT_EQUAL_128(max_vec, max_vec, v5);
10614     ASSERT_EQUAL_128(0, minnm_vec, v6);
10615     ASSERT_EQUAL_128(minnm_vec, minnm_vec, v7);
10616     ASSERT_EQUAL_128(0, maxnm_vec, v8);
10617     ASSERT_EQUAL_128(maxnm_vec, maxnm_vec, v9);
10618   }
10619 }
10620 
MinMaxHelper(Float16 n, Float16 m, bool min, Float16 quiet_nan_substitute = Float16(0.0))10621 static Float16 MinMaxHelper(Float16 n,
10622                             Float16 m,
10623                             bool min,
10624                             Float16 quiet_nan_substitute = Float16(0.0)) {
10625   const uint64_t kFP16QuietNaNMask = 0x0200;
10626   uint16_t raw_n = Float16ToRawbits(n);
10627   uint16_t raw_m = Float16ToRawbits(m);
10628 
10629   if (IsSignallingNaN(n)) {
10630     // n is signalling NaN.
10631     return RawbitsToFloat16(raw_n | kFP16QuietNaNMask);
10632   } else if (IsSignallingNaN(m)) {
10633     // m is signalling NaN.
10634     return RawbitsToFloat16(raw_m | kFP16QuietNaNMask);
10635   } else if (IsZero(quiet_nan_substitute)) {
10636     if (IsNaN(n)) {
10637       // n is quiet NaN.
10638       return n;
10639     } else if (IsNaN(m)) {
10640       // m is quiet NaN.
10641       return m;
10642     }
10643   } else {
10644     // Substitute n or m if one is quiet, but not both.
10645     if (IsNaN(n) && !IsNaN(m)) {
10646       // n is quiet NaN: replace with substitute.
10647       n = quiet_nan_substitute;
10648     } else if (!IsNaN(n) && IsNaN(m)) {
10649       // m is quiet NaN: replace with substitute.
10650       m = quiet_nan_substitute;
10651     }
10652   }
10653 
10654   uint16_t sign_mask = 0x8000;
10655   if (IsZero(n) && IsZero(m) && ((raw_n & sign_mask) != (raw_m & sign_mask))) {
10656     return min ? Float16(-0.0) : Float16(0.0);
10657   }
10658 
10659   if (FPToDouble(n, kIgnoreDefaultNaN) < FPToDouble(m, kIgnoreDefaultNaN)) {
10660     return min ? n : m;
10661   }
10662   return min ? m : n;
10663 }
10664 
TEST(fmax_fmin_h)10665 TEST(fmax_fmin_h) {
10666   // Use non-standard NaNs to check that the payload bits are preserved.
10667   Float16 snan = RawbitsToFloat16(0x7c12);
10668   Float16 qnan = RawbitsToFloat16(0x7e34);
10669 
10670   Float16 snan_processed = RawbitsToFloat16(0x7e12);
10671   Float16 qnan_processed = qnan;
10672 
10673   VIXL_ASSERT(IsSignallingNaN(snan));
10674   VIXL_ASSERT(IsQuietNaN(qnan));
10675   VIXL_ASSERT(IsQuietNaN(snan_processed));
10676   VIXL_ASSERT(IsQuietNaN(qnan_processed));
10677 
10678   // Bootstrap tests.
10679   FminFmaxFloat16Helper(Float16(0),
10680                         Float16(0),
10681                         Float16(0),
10682                         Float16(0),
10683                         Float16(0),
10684                         Float16(0));
10685   FminFmaxFloat16Helper(Float16(0),
10686                         Float16(1),
10687                         Float16(0),
10688                         Float16(1),
10689                         Float16(0),
10690                         Float16(1));
10691   FminFmaxFloat16Helper(kFP16PositiveInfinity,
10692                         kFP16NegativeInfinity,
10693                         kFP16NegativeInfinity,
10694                         kFP16PositiveInfinity,
10695                         kFP16NegativeInfinity,
10696                         kFP16PositiveInfinity);
10697   FminFmaxFloat16Helper(snan,
10698                         Float16(0),
10699                         snan_processed,
10700                         snan_processed,
10701                         snan_processed,
10702                         snan_processed);
10703   FminFmaxFloat16Helper(Float16(0),
10704                         snan,
10705                         snan_processed,
10706                         snan_processed,
10707                         snan_processed,
10708                         snan_processed);
10709   FminFmaxFloat16Helper(qnan,
10710                         Float16(0),
10711                         qnan_processed,
10712                         qnan_processed,
10713                         Float16(0),
10714                         Float16(0));
10715   FminFmaxFloat16Helper(Float16(0),
10716                         qnan,
10717                         qnan_processed,
10718                         qnan_processed,
10719                         Float16(0),
10720                         Float16(0));
10721   FminFmaxFloat16Helper(qnan,
10722                         snan,
10723                         snan_processed,
10724                         snan_processed,
10725                         snan_processed,
10726                         snan_processed);
10727   FminFmaxFloat16Helper(snan,
10728                         qnan,
10729                         snan_processed,
10730                         snan_processed,
10731                         snan_processed,
10732                         snan_processed);
10733 
10734   // Iterate over all combinations of inputs.
10735   Float16 inputs[] = {RawbitsToFloat16(0x7bff),
10736                       RawbitsToFloat16(0x0400),
10737                       Float16(1.0),
10738                       Float16(0.0),
10739                       RawbitsToFloat16(0xfbff),
10740                       RawbitsToFloat16(0x8400),
10741                       Float16(-1.0),
10742                       Float16(-0.0),
10743                       kFP16PositiveInfinity,
10744                       kFP16NegativeInfinity,
10745                       kFP16QuietNaN,
10746                       kFP16SignallingNaN};
10747 
10748   const int count = sizeof(inputs) / sizeof(inputs[0]);
10749 
10750   for (int in = 0; in < count; in++) {
10751     Float16 n = inputs[in];
10752     for (int im = 0; im < count; im++) {
10753       Float16 m = inputs[im];
10754       FminFmaxFloat16Helper(n,
10755                             m,
10756                             MinMaxHelper(n, m, true),
10757                             MinMaxHelper(n, m, false),
10758                             MinMaxHelper(n, m, true, kFP16PositiveInfinity),
10759                             MinMaxHelper(n, m, false, kFP16NegativeInfinity));
10760     }
10761   }
10762 }
10763 
TEST(neon_frint_saturating)10764 TEST(neon_frint_saturating) {
10765   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10766                       CPUFeatures::kFP,
10767                       CPUFeatures::kFrintToFixedSizedInt);
10768 
10769   START();
10770 
10771   __ Movi(v0.V2D(), 0x3f8000003f8ccccd, 0x3fc000003ff33333);
10772   __ Movi(v1.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
10773   __ Movi(v2.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10774   __ Frint32x(v16.V2S(), v0.V2S());
10775   __ Frint32x(v17.V4S(), v1.V4S());
10776   __ Frint32x(v18.V2D(), v2.V2D());
10777   __ Frint64x(v19.V2S(), v0.V2S());
10778   __ Frint64x(v20.V4S(), v1.V4S());
10779   __ Frint64x(v21.V2D(), v2.V2D());
10780   __ Frint32z(v22.V2S(), v0.V2S());
10781   __ Frint32z(v23.V4S(), v1.V4S());
10782   __ Frint32z(v24.V2D(), v2.V2D());
10783   __ Frint64z(v25.V2S(), v0.V2S());
10784   __ Frint64z(v26.V4S(), v1.V4S());
10785   __ Frint64z(v27.V2D(), v2.V2D());
10786 
10787   END();
10788 
10789   if (CAN_RUN()) {
10790     RUN();
10791 
10792     ASSERT_EQUAL_128(0x0000000000000000, 0x4000000040000000, q16);
10793     ASSERT_EQUAL_128(0x0000000080000000, 0xcf000000cf000000, q17);
10794     ASSERT_EQUAL_128(0xc1e0000000000000, 0xc1e0000000000000, q18);
10795     ASSERT_EQUAL_128(0x0000000000000000, 0x4000000040000000, q19);
10796     ASSERT_EQUAL_128(0x0000000080000000, 0xdf000000df000000, q20);
10797     ASSERT_EQUAL_128(0xc3e0000000000000, 0xc3e0000000000000, q21);
10798     ASSERT_EQUAL_128(0x0000000000000000, 0x3f8000003f800000, q22);
10799     ASSERT_EQUAL_128(0x0000000080000000, 0xcf000000cf000000, q23);
10800     ASSERT_EQUAL_128(0xc1e0000000000000, 0xc1e0000000000000, q24);
10801     ASSERT_EQUAL_128(0x0000000000000000, 0x3f8000003f800000, q25);
10802     ASSERT_EQUAL_128(0x0000000080000000, 0xdf000000df000000, q26);
10803     ASSERT_EQUAL_128(0xc3e0000000000000, 0xc3e0000000000000, q27);
10804   }
10805 }
10806 
10807 
TEST(neon_tbl)10808 TEST(neon_tbl) {
10809   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
10810 
10811   START();
10812   __ Movi(v30.V2D(), 0xbf561e188b1280e9, 0xbd542b8cbd24e8e8);
10813   __ Movi(v31.V2D(), 0xb5e9883d2c88a46d, 0x12276d5b614c915e);
10814   __ Movi(v0.V2D(), 0xc45b7782bc5ecd72, 0x5dd4fe5a4bc6bf5e);
10815   __ Movi(v1.V2D(), 0x1e3254094bd1746a, 0xf099ecf50e861c80);
10816 
10817   __ Movi(v4.V2D(), 0xf80c030100031f16, 0x00070504031201ff);
10818   __ Movi(v5.V2D(), 0x1f01001afc14202a, 0x2a081e1b0c02020c);
10819   __ Movi(v6.V2D(), 0x353f1a13022a2360, 0x2c464a00203a0a33);
10820   __ Movi(v7.V2D(), 0x64801a1c054cf30d, 0x793a2c052e213739);
10821 
10822   __ Movi(v8.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
10823   __ Movi(v9.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
10824   __ Movi(v10.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
10825   __ Movi(v11.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
10826   __ Movi(v12.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
10827   __ Movi(v13.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
10828   __ Movi(v14.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
10829   __ Movi(v15.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
10830 
10831   __ Tbl(v8.V16B(), v1.V16B(), v4.V16B());
10832   __ Tbl(v9.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
10833   __ Tbl(v10.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
10834   __ Tbl(v11.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
10835   __ Tbl(v12.V8B(), v1.V16B(), v4.V8B());
10836   __ Tbl(v13.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
10837   __ Tbl(v14.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
10838   __ Tbl(v15.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
10839 
10840   __ Movi(v16.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
10841   __ Movi(v17.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
10842   __ Movi(v18.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
10843   __ Movi(v19.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
10844   __ Movi(v20.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
10845   __ Movi(v21.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
10846   __ Movi(v22.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
10847   __ Movi(v23.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
10848 
10849   __ Tbx(v16.V16B(), v1.V16B(), v4.V16B());
10850   __ Tbx(v17.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
10851   __ Tbx(v18.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
10852   __ Tbx(v19.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
10853   __ Tbx(v20.V8B(), v1.V16B(), v4.V8B());
10854   __ Tbx(v21.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
10855   __ Tbx(v22.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
10856   __ Tbx(v23.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
10857   END();
10858 
10859   if (CAN_RUN()) {
10860     RUN();
10861 
10862     ASSERT_EQUAL_128(0x00090e1c800e0000, 0x80f0ecf50e001c00, v8);
10863     ASSERT_EQUAL_128(0x1ebf5ed100f50000, 0x0072324b82c6c682, v9);
10864     ASSERT_EQUAL_128(0x00005e4b4cd10e00, 0x0900005e80008800, v10);
10865     ASSERT_EQUAL_128(0x0000883d2b00001e, 0x00d1822b5bbff074, v11);
10866     ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e001c00, v12);
10867     ASSERT_EQUAL_128(0x0000000000000000, 0x0072324b82c6c682, v13);
10868     ASSERT_EQUAL_128(0x0000000000000000, 0x0900005e80008800, v14);
10869     ASSERT_EQUAL_128(0x0000000000000000, 0x00d1822b5bbff074, v15);
10870 
10871     ASSERT_EQUAL_128(0xb7090e1c800e8f13, 0x80f0ecf50e961c42, v16);
10872     ASSERT_EQUAL_128(0x1ebf5ed1c6f547ec, 0x8e72324b82c6c682, v17);
10873     ASSERT_EQUAL_128(0x9bd25e4b4cd10e8f, 0x0943d05e802688d2, v18);
10874     ASSERT_EQUAL_128(0xc31d883d2b39301e, 0x1ed1822b5bbff074, v19);
10875     ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e011c3b, v20);
10876     ASSERT_EQUAL_128(0x0000000000000000, 0x2072324b82c6c682, v21);
10877     ASSERT_EQUAL_128(0x0000000000000000, 0x0946cd5e80ba8882, v22);
10878     ASSERT_EQUAL_128(0x0000000000000000, 0xe6d1822b5bbff074, v23);
10879   }
10880 }
10881 
TEST(neon_usdot)10882 TEST(neon_usdot) {
10883   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10884                       CPUFeatures::kDotProduct,
10885                       CPUFeatures::kI8MM);
10886 
10887   START();
10888   __ Movi(v0.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);
10889   __ Movi(v1.V2D(), 0x7f7f7f7f7f7f7f7f, 0x7f7f7f7f7f7f7f7f);
10890   __ Movi(v2.V2D(), 0x8080808080808080, 0x8080808080808080);
10891   __ Movi(v3.V2D(), 0, 0);
10892   __ Mov(q4, q3);
10893   __ Mov(q5, q3);
10894   __ Mov(q6, q3);
10895   __ Mov(q7, q3);
10896   __ Mov(q8, q3);
10897   __ Mov(q9, q3);
10898   __ Mov(q10, q3);
10899   __ Mov(q11, q3);
10900 
10901   // Test Usdot against Udot/Sdot over the range of inputs where they should be
10902   // equal.
10903   __ Usdot(v3.V2S(), v0.V8B(), v1.V8B());
10904   __ Udot(v4.V2S(), v0.V8B(), v1.V8B());
10905   __ Cmeq(v3.V4S(), v3.V4S(), v4.V4S());
10906   __ Usdot(v5.V4S(), v0.V16B(), v1.V16B());
10907   __ Udot(v6.V4S(), v0.V16B(), v1.V16B());
10908   __ Cmeq(v5.V4S(), v5.V4S(), v6.V4S());
10909 
10910   __ Usdot(v7.V2S(), v1.V8B(), v2.V8B());
10911   __ Sdot(v8.V2S(), v1.V8B(), v2.V8B());
10912   __ Cmeq(v7.V4S(), v7.V4S(), v8.V4S());
10913   __ Usdot(v9.V4S(), v1.V16B(), v2.V16B());
10914   __ Sdot(v10.V4S(), v1.V16B(), v2.V16B());
10915   __ Cmeq(v9.V4S(), v9.V4S(), v10.V4S());
10916 
10917   // Construct values which, when interpreted correctly as signed/unsigned,
10918   // should give a zero result for dot product.
10919   __ Mov(w0, 0x8101ff40);  // [-127, 1, -1, 64] as signed bytes.
10920   __ Mov(w1, 0x02fe8002);  // [2, 254, 128, 2] as unsigned bytes.
10921   __ Dup(v0.V4S(), w0);
10922   __ Dup(v1.V4S(), w1);
10923   __ Usdot(v11.V4S(), v1.V16B(), v0.V16B());
10924 
10925   END();
10926 
10927   if (CAN_RUN()) {
10928     RUN();
10929 
10930     ASSERT_EQUAL_128(-1, -1, q3);
10931     ASSERT_EQUAL_128(-1, -1, q5);
10932     ASSERT_EQUAL_128(-1, -1, q7);
10933     ASSERT_EQUAL_128(-1, -1, q9);
10934     ASSERT_EQUAL_128(0, 0, q11);
10935   }
10936 }
10937 
TEST(neon_usdot_element)10938 TEST(neon_usdot_element) {
10939   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kI8MM);
10940 
10941   START();
10942   __ Movi(v0.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
10943   __ Movi(v1.V2D(), 0x4242424242424242, 0x5555aaaaaaaa5555);
10944 
10945   // Test element Usdot against vector variant.
10946   __ Dup(v2.V4S(), v1.V4S(), 0);
10947   __ Dup(v3.V4S(), v1.V4S(), 1);
10948   __ Dup(v4.V4S(), v1.V4S(), 3);
10949 
10950   __ Mov(q10, q1);
10951   __ Usdot(v10.V2S(), v0.V8B(), v2.V8B());
10952   __ Mov(q11, q1);
10953   __ Usdot(v11.V2S(), v0.V8B(), v1.S4B(), 0);
10954   __ Cmeq(v11.V4S(), v11.V4S(), v10.V4S());
10955 
10956   __ Mov(q12, q1);
10957   __ Usdot(v12.V4S(), v0.V16B(), v3.V16B());
10958   __ Mov(q13, q1);
10959   __ Usdot(v13.V4S(), v0.V16B(), v1.S4B(), 1);
10960   __ Cmeq(v13.V4S(), v13.V4S(), v12.V4S());
10961 
10962   __ Mov(q14, q1);
10963   __ Usdot(v14.V4S(), v4.V16B(), v0.V16B());
10964   __ Mov(q15, q1);
10965   __ Sudot(v15.V4S(), v0.V16B(), v1.S4B(), 3);
10966   __ Cmeq(v15.V4S(), v15.V4S(), v14.V4S());
10967   END();
10968 
10969   if (CAN_RUN()) {
10970     RUN();
10971 
10972     ASSERT_EQUAL_128(-1, -1, q11);
10973     ASSERT_EQUAL_128(-1, -1, q13);
10974     ASSERT_EQUAL_128(-1, -1, q15);
10975   }
10976 }
10977 
TEST(zero_high_b)10978 TEST(zero_high_b) {
10979   SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON, CPUFeatures::kRDM);
10980   START();
10981 
10982   __ Mov(x0, 0x55aa42ffaa42ff55);
10983   __ Mov(x1, 4);
10984   __ Movi(q30.V16B(), 0);
10985 
10986   // Iterate over the SISD instructions using different input values on each
10987   // loop.
10988   Label loop;
10989   __ Bind(&loop);
10990 
10991   __ Dup(q0.V16B(), w0);
10992   __ Ror(x0, x0, 8);
10993   __ Dup(q1.V16B(), w0);
10994   __ Ror(x0, x0, 8);
10995   __ Dup(q2.V16B(), w0);
10996   __ Ror(x0, x0, 8);
10997 
10998   {
10999     ExactAssemblyScope scope(&masm, 81 * kInstructionSize);
11000     __ movi(q9.V16B(), 0x55);
11001     __ dci(0x5e010409);  // mov b9, v0.b[0]
11002     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11003 
11004     __ movi(q9.V16B(), 0x55);
11005     __ dci(0x5e207809);  // sqabs b9, b0
11006     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11007 
11008     __ movi(q9.V16B(), 0x55);
11009     __ dci(0x5e200c29);  // sqadd b9, b1, b0
11010     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11011 
11012     __ movi(q9.V16B(), 0x55);
11013     __ dci(0x7e207809);  // sqneg b9, b0
11014     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11015 
11016     __ movi(q9.V16B(), 0x55);
11017     __ dci(0x7e008429);  // sqrdmlah b9, b1, b0
11018     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11019 
11020     __ movi(q9.V16B(), 0x55);
11021     __ dci(0x7e008c29);  // sqrdmlsh b9, b1, b0
11022     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11023 
11024     __ movi(q9.V16B(), 0x55);
11025     __ dci(0x5e205c29);  // sqrshl b9, b1, b0
11026     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11027 
11028     __ movi(q9.V16B(), 0x55);
11029     __ dci(0x5f089c09);  // sqrshrn b9, h0, #8
11030     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11031 
11032     __ movi(q9.V16B(), 0x55);
11033     __ dci(0x7f088c09);  // sqrshrun b9, h0, #8
11034     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11035 
11036     __ movi(q9.V16B(), 0x55);
11037     __ dci(0x5e204c29);  // sqshl b9, b1, b0
11038     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11039 
11040     __ movi(q9.V16B(), 0x55);
11041     __ dci(0x5f087409);  // sqshl b9, b0, #0
11042     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11043 
11044     __ movi(q9.V16B(), 0x55);
11045     __ dci(0x7f086409);  // sqshlu b9, b0, #0
11046     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11047 
11048     __ movi(q9.V16B(), 0x55);
11049     __ dci(0x5f089409);  // sqshrn b9, h0, #8
11050     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11051 
11052     __ movi(q9.V16B(), 0x55);
11053     __ dci(0x7f088409);  // sqshrun b9, h0, #8
11054     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11055 
11056     __ movi(q9.V16B(), 0x55);
11057     __ dci(0x5e202c29);  // sqsub b9, b1, b0
11058     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11059 
11060     __ movi(q9.V16B(), 0x55);
11061     __ dci(0x5e214809);  // sqxtn b9, h0
11062     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11063 
11064     __ movi(q9.V16B(), 0x55);
11065     __ dci(0x7e212809);  // sqxtun b9, h0
11066     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11067 
11068     __ movi(q9.V16B(), 0x55);
11069     __ dci(0x5e203809);  // suqadd b9, b0
11070     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11071 
11072     __ movi(q9.V16B(), 0x55);
11073     __ dci(0x7e200c29);  // uqadd b9, b1, b0
11074     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11075 
11076     __ movi(q9.V16B(), 0x55);
11077     __ dci(0x7e205c29);  // uqrshl b9, b1, b0
11078     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11079 
11080     __ movi(q9.V16B(), 0x55);
11081     __ dci(0x7f089c09);  // uqrshrn b9, h0, #8
11082     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11083 
11084     __ movi(q9.V16B(), 0x55);
11085     __ dci(0x7e204c29);  // uqshl b9, b1, b0
11086     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11087 
11088     __ movi(q9.V16B(), 0x55);
11089     __ dci(0x7f087409);  // uqshl b9, b0, #0
11090     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11091 
11092     __ movi(q9.V16B(), 0x55);
11093     __ dci(0x7f089409);  // uqshrn b9, h0, #8
11094     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11095 
11096     __ movi(q9.V16B(), 0x55);
11097     __ dci(0x7e202c29);  // uqsub b9, b1, b0
11098     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11099 
11100     __ movi(q9.V16B(), 0x55);
11101     __ dci(0x7e214809);  // uqxtn b9, h0
11102     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11103 
11104     __ movi(q9.V16B(), 0x55);
11105     __ dci(0x7e203809);  // usqadd b9, b0
11106     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11107   }
11108   __ Sub(x1, x1, 1);
11109   __ Cbnz(x1, &loop);
11110 
11111   __ Ins(q30.V16B(), 0, wzr);
11112 
11113   END();
11114   if (CAN_RUN()) {
11115     RUN();
11116     ASSERT_EQUAL_128(0, 0, q30);
11117   }
11118 }
11119 
TEST(zero_high_h)11120 TEST(zero_high_h) {
11121   SETUP_WITH_FEATURES(CPUFeatures::kSVE,
11122                       CPUFeatures::kNEON,
11123                       CPUFeatures::kFP,
11124                       CPUFeatures::kNEONHalf,
11125                       CPUFeatures::kRDM);
11126   START();
11127 
11128   __ Mov(x0, 0x55aa42ffaa42ff55);
11129   __ Mov(x1, 4);
11130   __ Movi(q30.V16B(), 0);
11131 
11132   // Iterate over the SISD instructions using different input values on each
11133   // loop.
11134   Label loop;
11135   __ Bind(&loop);
11136 
11137   __ Dup(q0.V8H(), w0);
11138   __ Ror(x0, x0, 8);
11139   __ Dup(q1.V8H(), w0);
11140   __ Ror(x0, x0, 8);
11141   __ Dup(q2.V8H(), w0);
11142   __ Ror(x0, x0, 8);
11143 
11144   {
11145     ExactAssemblyScope scope(&masm, 225 * kInstructionSize);
11146     __ movi(q9.V16B(), 0x55);
11147     __ dci(0x5e020409);  // mov h9, v0.h[0]
11148     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11149 
11150     __ movi(q9.V16B(), 0x55);
11151     __ dci(0x7ec01429);  // fabd h9, h1, h0
11152     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11153 
11154     __ movi(q9.V16B(), 0x55);
11155     __ dci(0x7e402c29);  // facge h9, h1, h0
11156     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11157 
11158     __ movi(q9.V16B(), 0x55);
11159     __ dci(0x7ec02c29);  // facgt h9, h1, h0
11160     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11161 
11162     __ movi(q9.V16B(), 0x55);
11163     __ dci(0x5e30d809);  // faddp h9, v0.2h
11164     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11165 
11166     __ movi(q9.V16B(), 0x55);
11167     __ dci(0x5ef8d809);  // fcmeq h9, h0, #0.0
11168     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11169 
11170     __ movi(q9.V16B(), 0x55);
11171     __ dci(0x5e402429);  // fcmeq h9, h1, h0
11172     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11173 
11174     __ movi(q9.V16B(), 0x55);
11175     __ dci(0x7ef8c809);  // fcmge h9, h0, #0.0
11176     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11177 
11178     __ movi(q9.V16B(), 0x55);
11179     __ dci(0x7e402429);  // fcmge h9, h1, h0
11180     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11181 
11182     __ movi(q9.V16B(), 0x55);
11183     __ dci(0x5ef8c809);  // fcmgt h9, h0, #0.0
11184     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11185 
11186     __ movi(q9.V16B(), 0x55);
11187     __ dci(0x7ec02429);  // fcmgt h9, h1, h0
11188     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11189 
11190     __ movi(q9.V16B(), 0x55);
11191     __ dci(0x7ef8d809);  // fcmle h9, h0, #0.0
11192     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11193 
11194     __ movi(q9.V16B(), 0x55);
11195     __ dci(0x5ef8e809);  // fcmlt h9, h0, #0.0
11196     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11197 
11198     __ movi(q9.V16B(), 0x55);
11199     __ dci(0x5e79c809);  // fcvtas h9, h0
11200     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11201 
11202     __ movi(q9.V16B(), 0x55);
11203     __ dci(0x7e79c809);  // fcvtau h9, h0
11204     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11205 
11206     __ movi(q9.V16B(), 0x55);
11207     __ dci(0x5e79b809);  // fcvtms h9, h0
11208     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11209 
11210     __ movi(q9.V16B(), 0x55);
11211     __ dci(0x7e79b809);  // fcvtmu h9, h0
11212     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11213 
11214     __ movi(q9.V16B(), 0x55);
11215     __ dci(0x5e79a809);  // fcvtns h9, h0
11216     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11217 
11218     __ movi(q9.V16B(), 0x55);
11219     __ dci(0x7e79a809);  // fcvtnu h9, h0
11220     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11221 
11222     __ movi(q9.V16B(), 0x55);
11223     __ dci(0x5ef9a809);  // fcvtps h9, h0
11224     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11225 
11226     __ movi(q9.V16B(), 0x55);
11227     __ dci(0x7ef9a809);  // fcvtpu h9, h0
11228     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11229 
11230     __ movi(q9.V16B(), 0x55);
11231     __ dci(0x5ef9b809);  // fcvtzs h9, h0
11232     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11233 
11234     __ movi(q9.V16B(), 0x55);
11235     __ dci(0x5f10fc09);  // fcvtzs h9, h0, #16
11236     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11237 
11238     __ movi(q9.V16B(), 0x55);
11239     __ dci(0x7ef9b809);  // fcvtzu h9, h0
11240     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11241 
11242     __ movi(q9.V16B(), 0x55);
11243     __ dci(0x7f10fc09);  // fcvtzu h9, h0, #16
11244     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11245 
11246     __ movi(q9.V16B(), 0x55);
11247     __ dci(0x5e30c809);  // fmaxnmp h9, v0.2h
11248     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11249 
11250     __ movi(q9.V16B(), 0x55);
11251     __ dci(0x5e30f809);  // fmaxp h9, v0.2h
11252     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11253 
11254     __ movi(q9.V16B(), 0x55);
11255     __ dci(0x5eb0c809);  // fminnmp h9, v0.2h
11256     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11257 
11258     __ movi(q9.V16B(), 0x55);
11259     __ dci(0x5eb0f809);  // fminp h9, v0.2h
11260     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11261 
11262     __ movi(q9.V16B(), 0x55);
11263     __ dci(0x5f001029);  // fmla h9, h1, v0.h[0]
11264     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11265 
11266     __ movi(q9.V16B(), 0x55);
11267     __ dci(0x5f005029);  // fmls h9, h1, v0.h[0]
11268     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11269 
11270     __ movi(q9.V16B(), 0x55);
11271     __ dci(0x5f009029);  // fmul h9, h1, v0.h[0]
11272     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11273 
11274     __ movi(q9.V16B(), 0x55);
11275     __ dci(0x7f009029);  // fmulx h9, h1, v0.h[0]
11276     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11277 
11278     __ movi(q9.V16B(), 0x55);
11279     __ dci(0x5e401c29);  // fmulx h9, h1, h0
11280     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11281 
11282     __ movi(q9.V16B(), 0x55);
11283     __ dci(0x5ef9d809);  // frecpe h9, h0
11284     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11285 
11286     __ movi(q9.V16B(), 0x55);
11287     __ dci(0x5e403c29);  // frecps h9, h1, h0
11288     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11289 
11290     __ movi(q9.V16B(), 0x55);
11291     __ dci(0x5ef9f809);  // frecpx h9, h0
11292     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11293 
11294     __ movi(q9.V16B(), 0x55);
11295     __ dci(0x7ef9d809);  // frsqrte h9, h0
11296     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11297 
11298     __ movi(q9.V16B(), 0x55);
11299     __ dci(0x5ec03c29);  // frsqrts h9, h1, h0
11300     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11301 
11302     __ movi(q9.V16B(), 0x55);
11303     __ dci(0x5e79d809);  // scvtf h9, h0
11304     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11305 
11306     __ movi(q9.V16B(), 0x55);
11307     __ dci(0x5f10e409);  // scvtf h9, h0, #16
11308     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11309 
11310     __ movi(q9.V16B(), 0x55);
11311     __ dci(0x5e607809);  // sqabs h9, h0
11312     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11313 
11314     __ movi(q9.V16B(), 0x55);
11315     __ dci(0x5e600c29);  // sqadd h9, h1, h0
11316     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11317 
11318     __ movi(q9.V16B(), 0x55);
11319     __ dci(0x5f40c029);  // sqdmulh h9, h1, v0.h[0]
11320     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11321 
11322     __ movi(q9.V16B(), 0x55);
11323     __ dci(0x5e60b429);  // sqdmulh h9, h1, h0
11324     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11325 
11326     __ movi(q9.V16B(), 0x55);
11327     __ dci(0x7e607809);  // sqneg h9, h0
11328     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11329 
11330     __ movi(q9.V16B(), 0x55);
11331     __ dci(0x7f40d029);  // sqrdmlah h9, h1, v0.h[0]
11332     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11333 
11334     __ movi(q9.V16B(), 0x55);
11335     __ dci(0x7e408429);  // sqrdmlah h9, h1, h0
11336     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11337 
11338     __ movi(q9.V16B(), 0x55);
11339     __ dci(0x7f40f029);  // sqrdmlsh h9, h1, v0.h[0]
11340     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11341 
11342     __ movi(q9.V16B(), 0x55);
11343     __ dci(0x7e408c29);  // sqrdmlsh h9, h1, h0
11344     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11345 
11346     __ movi(q9.V16B(), 0x55);
11347     __ dci(0x5f40d029);  // sqrdmulh h9, h1, v0.h[0]
11348     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11349 
11350     __ movi(q9.V16B(), 0x55);
11351     __ dci(0x7e60b429);  // sqrdmulh h9, h1, h0
11352     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11353 
11354     __ movi(q9.V16B(), 0x55);
11355     __ dci(0x5e605c29);  // sqrshl h9, h1, h0
11356     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11357 
11358     __ movi(q9.V16B(), 0x55);
11359     __ dci(0x5f109c09);  // sqrshrn h9, s0, #16
11360     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11361 
11362     __ movi(q9.V16B(), 0x55);
11363     __ dci(0x7f108c09);  // sqrshrun h9, s0, #16
11364     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11365 
11366     __ movi(q9.V16B(), 0x55);
11367     __ dci(0x5e604c29);  // sqshl h9, h1, h0
11368     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11369 
11370     __ movi(q9.V16B(), 0x55);
11371     __ dci(0x5f107409);  // sqshl h9, h0, #0
11372     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11373 
11374     __ movi(q9.V16B(), 0x55);
11375     __ dci(0x7f106409);  // sqshlu h9, h0, #0
11376     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11377 
11378     __ movi(q9.V16B(), 0x55);
11379     __ dci(0x5f109409);  // sqshrn h9, s0, #16
11380     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11381 
11382     __ movi(q9.V16B(), 0x55);
11383     __ dci(0x7f108409);  // sqshrun h9, s0, #16
11384     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11385 
11386     __ movi(q9.V16B(), 0x55);
11387     __ dci(0x5e602c29);  // sqsub h9, h1, h0
11388     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11389 
11390     __ movi(q9.V16B(), 0x55);
11391     __ dci(0x5e614809);  // sqxtn h9, s0
11392     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11393 
11394     __ movi(q9.V16B(), 0x55);
11395     __ dci(0x7e612809);  // sqxtun h9, s0
11396     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11397 
11398     __ movi(q9.V16B(), 0x55);
11399     __ dci(0x5e603809);  // suqadd h9, h0
11400     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11401 
11402     __ movi(q9.V16B(), 0x55);
11403     __ dci(0x7e79d809);  // ucvtf h9, h0
11404     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11405 
11406     __ movi(q9.V16B(), 0x55);
11407     __ dci(0x7f10e409);  // ucvtf h9, h0, #16
11408     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11409 
11410     __ movi(q9.V16B(), 0x55);
11411     __ dci(0x7e600c29);  // uqadd h9, h1, h0
11412     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11413 
11414     __ movi(q9.V16B(), 0x55);
11415     __ dci(0x7e605c29);  // uqrshl h9, h1, h0
11416     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11417 
11418     __ movi(q9.V16B(), 0x55);
11419     __ dci(0x7f109c09);  // uqrshrn h9, s0, #16
11420     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11421 
11422     __ movi(q9.V16B(), 0x55);
11423     __ dci(0x7e604c29);  // uqshl h9, h1, h0
11424     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11425 
11426     __ movi(q9.V16B(), 0x55);
11427     __ dci(0x7f107409);  // uqshl h9, h0, #0
11428     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11429 
11430     __ movi(q9.V16B(), 0x55);
11431     __ dci(0x7f109409);  // uqshrn h9, s0, #16
11432     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11433 
11434     __ movi(q9.V16B(), 0x55);
11435     __ dci(0x7e602c29);  // uqsub h9, h1, h0
11436     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11437 
11438     __ movi(q9.V16B(), 0x55);
11439     __ dci(0x7e614809);  // uqxtn h9, s0
11440     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11441 
11442     __ movi(q9.V16B(), 0x55);
11443     __ dci(0x7e603809);  // usqadd h9, h0
11444     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11445   }
11446   __ Sub(x1, x1, 1);
11447   __ Cbnz(x1, &loop);
11448 
11449   __ Ins(q30.V8H(), 0, wzr);
11450 
11451   END();
11452   if (CAN_RUN()) {
11453     RUN();
11454     ASSERT_EQUAL_128(0, 0, q30);
11455   }
11456 }
11457 
TEST(zero_high_s)11458 TEST(zero_high_s) {
11459   SETUP_WITH_FEATURES(CPUFeatures::kSVE,
11460                       CPUFeatures::kNEON,
11461                       CPUFeatures::kFP,
11462                       CPUFeatures::kRDM);
11463   START();
11464 
11465   __ Mov(x0, 0x55aa42ffaa42ff55);
11466   __ Mov(x1, 4);
11467   __ Movi(q30.V16B(), 0);
11468 
11469   // Iterate over the SISD instructions using different input values on each
11470   // loop.
11471   Label loop;
11472   __ Bind(&loop);
11473 
11474   __ Dup(q0.V4S(), w0);
11475   __ Ror(x0, x0, 8);
11476   __ Dup(q1.V4S(), w0);
11477   __ Ror(x0, x0, 8);
11478   __ Dup(q2.V4S(), w0);
11479   __ Ror(x0, x0, 8);
11480 
11481   {
11482     ExactAssemblyScope scope(&masm, 246 * kInstructionSize);
11483     __ movi(q9.V16B(), 0x55);
11484     __ dci(0x5e040409);  // mov s9, v0.s[0]
11485     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11486 
11487     __ movi(q9.V16B(), 0x55);
11488     __ dci(0x7ea0d429);  // fabd s9, s1, s0
11489     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11490 
11491     __ movi(q9.V16B(), 0x55);
11492     __ dci(0x7e20ec29);  // facge s9, s1, s0
11493     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11494 
11495     __ movi(q9.V16B(), 0x55);
11496     __ dci(0x7ea0ec29);  // facgt s9, s1, s0
11497     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11498 
11499     __ movi(q9.V16B(), 0x55);
11500     __ dci(0x7e30d809);  // faddp s9, v0.2s
11501     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11502 
11503     __ movi(q9.V16B(), 0x55);
11504     __ dci(0x5ea0d809);  // fcmeq s9, s0, #0.0
11505     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11506 
11507     __ movi(q9.V16B(), 0x55);
11508     __ dci(0x5e20e429);  // fcmeq s9, s1, s0
11509     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11510 
11511     __ movi(q9.V16B(), 0x55);
11512     __ dci(0x7ea0c809);  // fcmge s9, s0, #0.0
11513     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11514 
11515     __ movi(q9.V16B(), 0x55);
11516     __ dci(0x7e20e429);  // fcmge s9, s1, s0
11517     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11518 
11519     __ movi(q9.V16B(), 0x55);
11520     __ dci(0x5ea0c809);  // fcmgt s9, s0, #0.0
11521     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11522 
11523     __ movi(q9.V16B(), 0x55);
11524     __ dci(0x7ea0e429);  // fcmgt s9, s1, s0
11525     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11526 
11527     __ movi(q9.V16B(), 0x55);
11528     __ dci(0x7ea0d809);  // fcmle s9, s0, #0.0
11529     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11530 
11531     __ movi(q9.V16B(), 0x55);
11532     __ dci(0x5ea0e809);  // fcmlt s9, s0, #0.0
11533     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11534 
11535     __ movi(q9.V16B(), 0x55);
11536     __ dci(0x5e21c809);  // fcvtas s9, s0
11537     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11538 
11539     __ movi(q9.V16B(), 0x55);
11540     __ dci(0x7e21c809);  // fcvtau s9, s0
11541     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11542 
11543     __ movi(q9.V16B(), 0x55);
11544     __ dci(0x5e21b809);  // fcvtms s9, s0
11545     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11546 
11547     __ movi(q9.V16B(), 0x55);
11548     __ dci(0x7e21b809);  // fcvtmu s9, s0
11549     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11550 
11551     __ movi(q9.V16B(), 0x55);
11552     __ dci(0x5e21a809);  // fcvtns s9, s0
11553     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11554 
11555     __ movi(q9.V16B(), 0x55);
11556     __ dci(0x7e21a809);  // fcvtnu s9, s0
11557     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11558 
11559     __ movi(q9.V16B(), 0x55);
11560     __ dci(0x5ea1a809);  // fcvtps s9, s0
11561     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11562 
11563     __ movi(q9.V16B(), 0x55);
11564     __ dci(0x7ea1a809);  // fcvtpu s9, s0
11565     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11566 
11567     __ movi(q9.V16B(), 0x55);
11568     __ dci(0x7e616809);  // fcvtxn s9, d0
11569     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11570 
11571     __ movi(q9.V16B(), 0x55);
11572     __ dci(0x5ea1b809);  // fcvtzs s9, s0
11573     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11574 
11575     __ movi(q9.V16B(), 0x55);
11576     __ dci(0x5f20fc09);  // fcvtzs s9, s0, #32
11577     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11578 
11579     __ movi(q9.V16B(), 0x55);
11580     __ dci(0x7ea1b809);  // fcvtzu s9, s0
11581     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11582 
11583     __ movi(q9.V16B(), 0x55);
11584     __ dci(0x7f20fc09);  // fcvtzu s9, s0, #32
11585     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11586 
11587     __ movi(q9.V16B(), 0x55);
11588     __ dci(0x7e30c809);  // fmaxnmp s9, v0.2s
11589     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11590 
11591     __ movi(q9.V16B(), 0x55);
11592     __ dci(0x7e30f809);  // fmaxp s9, v0.2s
11593     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11594 
11595     __ movi(q9.V16B(), 0x55);
11596     __ dci(0x7eb0c809);  // fminnmp s9, v0.2s
11597     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11598 
11599     __ movi(q9.V16B(), 0x55);
11600     __ dci(0x7eb0f809);  // fminp s9, v0.2s
11601     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11602 
11603     __ movi(q9.V16B(), 0x55);
11604     __ dci(0x5f801029);  // fmla s9, s1, v0.s[0]
11605     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11606 
11607     __ movi(q9.V16B(), 0x55);
11608     __ dci(0x5f805029);  // fmls s9, s1, v0.s[0]
11609     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11610 
11611     __ movi(q9.V16B(), 0x55);
11612     __ dci(0x5f809029);  // fmul s9, s1, v0.s[0]
11613     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11614 
11615     __ movi(q9.V16B(), 0x55);
11616     __ dci(0x7f809029);  // fmulx s9, s1, v0.s[0]
11617     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11618 
11619     __ movi(q9.V16B(), 0x55);
11620     __ dci(0x5e20dc29);  // fmulx s9, s1, s0
11621     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11622 
11623     __ movi(q9.V16B(), 0x55);
11624     __ dci(0x5ea1d809);  // frecpe s9, s0
11625     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11626 
11627     __ movi(q9.V16B(), 0x55);
11628     __ dci(0x5e20fc29);  // frecps s9, s1, s0
11629     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11630 
11631     __ movi(q9.V16B(), 0x55);
11632     __ dci(0x5ea1f809);  // frecpx s9, s0
11633     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11634 
11635     __ movi(q9.V16B(), 0x55);
11636     __ dci(0x7ea1d809);  // frsqrte s9, s0
11637     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11638 
11639     __ movi(q9.V16B(), 0x55);
11640     __ dci(0x5ea0fc29);  // frsqrts s9, s1, s0
11641     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11642 
11643     __ movi(q9.V16B(), 0x55);
11644     __ dci(0x5e21d809);  // scvtf s9, s0
11645     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11646 
11647     __ movi(q9.V16B(), 0x55);
11648     __ dci(0x5f20e409);  // scvtf s9, s0, #32
11649     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11650 
11651     __ movi(q9.V16B(), 0x55);
11652     __ dci(0x5ea07809);  // sqabs s9, s0
11653     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11654 
11655     __ movi(q9.V16B(), 0x55);
11656     __ dci(0x5ea00c29);  // sqadd s9, s1, s0
11657     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11658 
11659     __ movi(q9.V16B(), 0x55);
11660     __ dci(0x5e609029);  // sqdmlal s9, h1, h0
11661     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11662 
11663     __ movi(q9.V16B(), 0x55);
11664     __ dci(0x5f403029);  // sqdmlal s9, h1, v0.h[0]
11665     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11666 
11667     __ movi(q9.V16B(), 0x55);
11668     __ dci(0x5e60b029);  // sqdmlsl s9, h1, h0
11669     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11670 
11671     __ movi(q9.V16B(), 0x55);
11672     __ dci(0x5f407029);  // sqdmlsl s9, h1, v0.h[0]
11673     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11674 
11675     __ movi(q9.V16B(), 0x55);
11676     __ dci(0x5f80c029);  // sqdmulh s9, s1, v0.s[0]
11677     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11678 
11679     __ movi(q9.V16B(), 0x55);
11680     __ dci(0x5ea0b429);  // sqdmulh s9, s1, s0
11681     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11682 
11683     __ movi(q9.V16B(), 0x55);
11684     __ dci(0x5e60d029);  // sqdmull s9, h1, h0
11685     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11686 
11687     __ movi(q9.V16B(), 0x55);
11688     __ dci(0x5f40b029);  // sqdmull s9, h1, v0.h[0]
11689     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11690 
11691     __ movi(q9.V16B(), 0x55);
11692     __ dci(0x7ea07809);  // sqneg s9, s0
11693     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11694 
11695     __ movi(q9.V16B(), 0x55);
11696     __ dci(0x7f80d029);  // sqrdmlah s9, s1, v0.s[0]
11697     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11698 
11699     __ movi(q9.V16B(), 0x55);
11700     __ dci(0x7e808429);  // sqrdmlah s9, s1, s0
11701     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11702 
11703     __ movi(q9.V16B(), 0x55);
11704     __ dci(0x7f80f029);  // sqrdmlsh s9, s1, v0.s[0]
11705     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11706 
11707     __ movi(q9.V16B(), 0x55);
11708     __ dci(0x7e808c29);  // sqrdmlsh s9, s1, s0
11709     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11710 
11711     __ movi(q9.V16B(), 0x55);
11712     __ dci(0x5f80d029);  // sqrdmulh s9, s1, v0.s[0]
11713     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11714 
11715     __ movi(q9.V16B(), 0x55);
11716     __ dci(0x7ea0b429);  // sqrdmulh s9, s1, s0
11717     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11718 
11719     __ movi(q9.V16B(), 0x55);
11720     __ dci(0x5ea05c29);  // sqrshl s9, s1, s0
11721     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11722 
11723     __ movi(q9.V16B(), 0x55);
11724     __ dci(0x5f209c09);  // sqrshrn s9, d0, #32
11725     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11726 
11727     __ movi(q9.V16B(), 0x55);
11728     __ dci(0x7f208c09);  // sqrshrun s9, d0, #32
11729     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11730 
11731     __ movi(q9.V16B(), 0x55);
11732     __ dci(0x5ea04c29);  // sqshl s9, s1, s0
11733     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11734 
11735     __ movi(q9.V16B(), 0x55);
11736     __ dci(0x5f207409);  // sqshl s9, s0, #0
11737     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11738 
11739     __ movi(q9.V16B(), 0x55);
11740     __ dci(0x7f206409);  // sqshlu s9, s0, #0
11741     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11742 
11743     __ movi(q9.V16B(), 0x55);
11744     __ dci(0x5f209409);  // sqshrn s9, d0, #32
11745     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11746 
11747     __ movi(q9.V16B(), 0x55);
11748     __ dci(0x7f208409);  // sqshrun s9, d0, #32
11749     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11750 
11751     __ movi(q9.V16B(), 0x55);
11752     __ dci(0x5ea02c29);  // sqsub s9, s1, s0
11753     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11754 
11755     __ movi(q9.V16B(), 0x55);
11756     __ dci(0x5ea14809);  // sqxtn s9, d0
11757     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11758 
11759     __ movi(q9.V16B(), 0x55);
11760     __ dci(0x7ea12809);  // sqxtun s9, d0
11761     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11762 
11763     __ movi(q9.V16B(), 0x55);
11764     __ dci(0x5ea03809);  // suqadd s9, s0
11765     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11766 
11767     __ movi(q9.V16B(), 0x55);
11768     __ dci(0x7e21d809);  // ucvtf s9, s0
11769     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11770 
11771     __ movi(q9.V16B(), 0x55);
11772     __ dci(0x7f20e409);  // ucvtf s9, s0, #32
11773     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11774 
11775     __ movi(q9.V16B(), 0x55);
11776     __ dci(0x7ea00c29);  // uqadd s9, s1, s0
11777     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11778 
11779     __ movi(q9.V16B(), 0x55);
11780     __ dci(0x7ea05c29);  // uqrshl s9, s1, s0
11781     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11782 
11783     __ movi(q9.V16B(), 0x55);
11784     __ dci(0x7f209c09);  // uqrshrn s9, d0, #32
11785     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11786 
11787     __ movi(q9.V16B(), 0x55);
11788     __ dci(0x7ea04c29);  // uqshl s9, s1, s0
11789     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11790 
11791     __ movi(q9.V16B(), 0x55);
11792     __ dci(0x7f207409);  // uqshl s9, s0, #0
11793     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11794 
11795     __ movi(q9.V16B(), 0x55);
11796     __ dci(0x7f209409);  // uqshrn s9, d0, #32
11797     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11798 
11799     __ movi(q9.V16B(), 0x55);
11800     __ dci(0x7ea02c29);  // uqsub s9, s1, s0
11801     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11802 
11803     __ movi(q9.V16B(), 0x55);
11804     __ dci(0x7ea14809);  // uqxtn s9, d0
11805     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11806 
11807     __ movi(q9.V16B(), 0x55);
11808     __ dci(0x7ea03809);  // usqadd s9, s0
11809     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11810   }
11811   __ Sub(x1, x1, 1);
11812   __ Cbnz(x1, &loop);
11813 
11814   __ Ins(q30.V4S(), 0, wzr);
11815 
11816   END();
11817   if (CAN_RUN()) {
11818     RUN();
11819     ASSERT_EQUAL_128(0, 0, q30);
11820   }
11821 }
11822 
TEST(zero_high_d)11823 TEST(zero_high_d) {
11824   SETUP_WITH_FEATURES(CPUFeatures::kSVE,
11825                       CPUFeatures::kNEON,
11826                       CPUFeatures::kFP,
11827                       CPUFeatures::kRDM);
11828   START();
11829 
11830   __ Mov(x0, 0x55aa42ffaa42ff55);
11831   __ Mov(x1, 4);
11832   __ Movi(q30.V16B(), 0);
11833 
11834   // Iterate over the SISD instructions using different input values on each
11835   // loop.
11836   Label loop;
11837   __ Bind(&loop);
11838 
11839   __ Dup(q0.V2D(), x0);
11840   __ Ror(x0, x0, 8);
11841   __ Dup(q1.V2D(), x0);
11842   __ Ror(x0, x0, 8);
11843   __ Dup(q2.V2D(), x0);
11844   __ Ror(x0, x0, 8);
11845 
11846   {
11847     ExactAssemblyScope scope(&masm, 291 * kInstructionSize);
11848     __ movi(q9.V16B(), 0x55);
11849     __ dci(0x5ee0b809);  // abs d9, d0
11850     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11851 
11852     __ movi(q9.V16B(), 0x55);
11853     __ dci(0x5ee08429);  // add d9, d1, d0
11854     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11855 
11856     __ movi(q9.V16B(), 0x55);
11857     __ dci(0x5ef1b809);  // addp d9, v0.2d
11858     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11859 
11860     __ movi(q9.V16B(), 0x55);
11861     __ dci(0x5ee09809);  // cmeq d9, d0, #0
11862     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11863 
11864     __ movi(q9.V16B(), 0x55);
11865     __ dci(0x7ee08c29);  // cmeq d9, d1, d0
11866     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11867 
11868     __ movi(q9.V16B(), 0x55);
11869     __ dci(0x7ee08809);  // cmge d9, d0, #0
11870     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11871 
11872     __ movi(q9.V16B(), 0x55);
11873     __ dci(0x5ee03c29);  // cmge d9, d1, d0
11874     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11875 
11876     __ movi(q9.V16B(), 0x55);
11877     __ dci(0x5ee08809);  // cmgt d9, d0, #0
11878     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11879 
11880     __ movi(q9.V16B(), 0x55);
11881     __ dci(0x5ee03429);  // cmgt d9, d1, d0
11882     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11883 
11884     __ movi(q9.V16B(), 0x55);
11885     __ dci(0x7ee03429);  // cmhi d9, d1, d0
11886     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11887 
11888     __ movi(q9.V16B(), 0x55);
11889     __ dci(0x7ee03c29);  // cmhs d9, d1, d0
11890     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11891 
11892     __ movi(q9.V16B(), 0x55);
11893     __ dci(0x7ee09809);  // cmle d9, d0, #0
11894     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11895 
11896     __ movi(q9.V16B(), 0x55);
11897     __ dci(0x5ee0a809);  // cmlt d9, d0, #0
11898     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11899 
11900     __ movi(q9.V16B(), 0x55);
11901     __ dci(0x5ee08c29);  // cmtst d9, d1, d0
11902     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11903 
11904     __ movi(q9.V16B(), 0x55);
11905     __ dci(0x5e080409);  // mov d9, v0.d[0]
11906     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11907 
11908     __ movi(q9.V16B(), 0x55);
11909     __ dci(0x7ee0d429);  // fabd d9, d1, d0
11910     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11911 
11912     __ movi(q9.V16B(), 0x55);
11913     __ dci(0x7e60ec29);  // facge d9, d1, d0
11914     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11915 
11916     __ movi(q9.V16B(), 0x55);
11917     __ dci(0x7ee0ec29);  // facgt d9, d1, d0
11918     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11919 
11920     __ movi(q9.V16B(), 0x55);
11921     __ dci(0x7e70d809);  // faddp d9, v0.2d
11922     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11923 
11924     __ movi(q9.V16B(), 0x55);
11925     __ dci(0x5ee0d809);  // fcmeq d9, d0, #0.0
11926     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11927 
11928     __ movi(q9.V16B(), 0x55);
11929     __ dci(0x5e60e429);  // fcmeq d9, d1, d0
11930     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11931 
11932     __ movi(q9.V16B(), 0x55);
11933     __ dci(0x7ee0c809);  // fcmge d9, d0, #0.0
11934     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11935 
11936     __ movi(q9.V16B(), 0x55);
11937     __ dci(0x7e60e429);  // fcmge d9, d1, d0
11938     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11939 
11940     __ movi(q9.V16B(), 0x55);
11941     __ dci(0x5ee0c809);  // fcmgt d9, d0, #0.0
11942     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11943 
11944     __ movi(q9.V16B(), 0x55);
11945     __ dci(0x7ee0e429);  // fcmgt d9, d1, d0
11946     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11947 
11948     __ movi(q9.V16B(), 0x55);
11949     __ dci(0x7ee0d809);  // fcmle d9, d0, #0.0
11950     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11951 
11952     __ movi(q9.V16B(), 0x55);
11953     __ dci(0x5ee0e809);  // fcmlt d9, d0, #0.0
11954     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11955 
11956     __ movi(q9.V16B(), 0x55);
11957     __ dci(0x5e61c809);  // fcvtas d9, d0
11958     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11959 
11960     __ movi(q9.V16B(), 0x55);
11961     __ dci(0x7e61c809);  // fcvtau d9, d0
11962     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11963 
11964     __ movi(q9.V16B(), 0x55);
11965     __ dci(0x5e61b809);  // fcvtms d9, d0
11966     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11967 
11968     __ movi(q9.V16B(), 0x55);
11969     __ dci(0x7e61b809);  // fcvtmu d9, d0
11970     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11971 
11972     __ movi(q9.V16B(), 0x55);
11973     __ dci(0x5e61a809);  // fcvtns d9, d0
11974     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11975 
11976     __ movi(q9.V16B(), 0x55);
11977     __ dci(0x7e61a809);  // fcvtnu d9, d0
11978     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11979 
11980     __ movi(q9.V16B(), 0x55);
11981     __ dci(0x5ee1a809);  // fcvtps d9, d0
11982     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11983 
11984     __ movi(q9.V16B(), 0x55);
11985     __ dci(0x7ee1a809);  // fcvtpu d9, d0
11986     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11987 
11988     __ movi(q9.V16B(), 0x55);
11989     __ dci(0x5ee1b809);  // fcvtzs d9, d0
11990     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11991 
11992     __ movi(q9.V16B(), 0x55);
11993     __ dci(0x5f40fc09);  // fcvtzs d9, d0, #64
11994     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11995 
11996     __ movi(q9.V16B(), 0x55);
11997     __ dci(0x7ee1b809);  // fcvtzu d9, d0
11998     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11999 
12000     __ movi(q9.V16B(), 0x55);
12001     __ dci(0x7f40fc09);  // fcvtzu d9, d0, #64
12002     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12003 
12004     __ movi(q9.V16B(), 0x55);
12005     __ dci(0x7e70c809);  // fmaxnmp d9, v0.2d
12006     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12007 
12008     __ movi(q9.V16B(), 0x55);
12009     __ dci(0x7e70f809);  // fmaxp d9, v0.2d
12010     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12011 
12012     __ movi(q9.V16B(), 0x55);
12013     __ dci(0x7ef0c809);  // fminnmp d9, v0.2d
12014     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12015 
12016     __ movi(q9.V16B(), 0x55);
12017     __ dci(0x7ef0f809);  // fminp d9, v0.2d
12018     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12019 
12020     __ movi(q9.V16B(), 0x55);
12021     __ dci(0x5fc01029);  // fmla d9, d1, v0.d[0]
12022     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12023 
12024     __ movi(q9.V16B(), 0x55);
12025     __ dci(0x5fc05029);  // fmls d9, d1, v0.d[0]
12026     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12027 
12028     __ movi(q9.V16B(), 0x55);
12029     __ dci(0x5fc09029);  // fmul d9, d1, v0.d[0]
12030     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12031 
12032     __ movi(q9.V16B(), 0x55);
12033     __ dci(0x7fc09029);  // fmulx d9, d1, v0.d[0]
12034     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12035 
12036     __ movi(q9.V16B(), 0x55);
12037     __ dci(0x5e60dc29);  // fmulx d9, d1, d0
12038     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12039 
12040     __ movi(q9.V16B(), 0x55);
12041     __ dci(0x5ee1d809);  // frecpe d9, d0
12042     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12043 
12044     __ movi(q9.V16B(), 0x55);
12045     __ dci(0x5e60fc29);  // frecps d9, d1, d0
12046     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12047 
12048     __ movi(q9.V16B(), 0x55);
12049     __ dci(0x5ee1f809);  // frecpx d9, d0
12050     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12051 
12052     __ movi(q9.V16B(), 0x55);
12053     __ dci(0x7ee1d809);  // frsqrte d9, d0
12054     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12055 
12056     __ movi(q9.V16B(), 0x55);
12057     __ dci(0x5ee0fc29);  // frsqrts d9, d1, d0
12058     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12059 
12060     __ movi(q9.V16B(), 0x55);
12061     __ dci(0x7ee0b809);  // neg d9, d0
12062     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12063 
12064     __ movi(q9.V16B(), 0x55);
12065     __ dci(0x5e61d809);  // scvtf d9, d0
12066     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12067 
12068     __ movi(q9.V16B(), 0x55);
12069     __ dci(0x5f40e409);  // scvtf d9, d0, #64
12070     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12071 
12072     __ movi(q9.V16B(), 0x55);
12073     __ dci(0x5f405409);  // shl d9, d0, #0
12074     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12075 
12076     __ movi(q9.V16B(), 0x55);
12077     __ dci(0x7f405409);  // sli d9, d0, #0
12078     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12079 
12080     __ movi(q9.V16B(), 0x55);
12081     __ dci(0x5ee07809);  // sqabs d9, d0
12082     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12083 
12084     __ movi(q9.V16B(), 0x55);
12085     __ dci(0x5ee00c29);  // sqadd d9, d1, d0
12086     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12087 
12088     __ movi(q9.V16B(), 0x55);
12089     __ dci(0x5ea09029);  // sqdmlal d9, s1, s0
12090     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12091 
12092     __ movi(q9.V16B(), 0x55);
12093     __ dci(0x5f803029);  // sqdmlal d9, s1, v0.s[0]
12094     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12095 
12096     __ movi(q9.V16B(), 0x55);
12097     __ dci(0x5ea0b029);  // sqdmlsl d9, s1, s0
12098     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12099 
12100     __ movi(q9.V16B(), 0x55);
12101     __ dci(0x5f807029);  // sqdmlsl d9, s1, v0.s[0]
12102     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12103 
12104     __ movi(q9.V16B(), 0x55);
12105     __ dci(0x5ea0d029);  // sqdmull d9, s1, s0
12106     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12107 
12108     __ movi(q9.V16B(), 0x55);
12109     __ dci(0x5f80b029);  // sqdmull d9, s1, v0.s[0]
12110     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12111 
12112     __ movi(q9.V16B(), 0x55);
12113     __ dci(0x7ee07809);  // sqneg d9, d0
12114     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12115 
12116     __ movi(q9.V16B(), 0x55);
12117     __ dci(0x7ec08429);  // sqrdmlah d9, d1, d0
12118     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12119 
12120     __ movi(q9.V16B(), 0x55);
12121     __ dci(0x7ec08c29);  // sqrdmlsh d9, d1, d0
12122     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12123 
12124     __ movi(q9.V16B(), 0x55);
12125     __ dci(0x5ee05c29);  // sqrshl d9, d1, d0
12126     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12127 
12128     __ movi(q9.V16B(), 0x55);
12129     __ dci(0x5ee04c29);  // sqshl d9, d1, d0
12130     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12131 
12132     __ movi(q9.V16B(), 0x55);
12133     __ dci(0x5f407409);  // sqshl d9, d0, #0
12134     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12135 
12136     __ movi(q9.V16B(), 0x55);
12137     __ dci(0x7f406409);  // sqshlu d9, d0, #0
12138     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12139 
12140     __ movi(q9.V16B(), 0x55);
12141     __ dci(0x5ee02c29);  // sqsub d9, d1, d0
12142     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12143 
12144     __ movi(q9.V16B(), 0x55);
12145     __ dci(0x7f404409);  // sri d9, d0, #64
12146     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12147 
12148     __ movi(q9.V16B(), 0x55);
12149     __ dci(0x5ee05429);  // srshl d9, d1, d0
12150     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12151 
12152     __ movi(q9.V16B(), 0x55);
12153     __ dci(0x5f402409);  // srshr d9, d0, #64
12154     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12155 
12156     __ movi(q9.V16B(), 0x55);
12157     __ dci(0x5f403409);  // srsra d9, d0, #64
12158     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12159 
12160     __ movi(q9.V16B(), 0x55);
12161     __ dci(0x5ee04429);  // sshl d9, d1, d0
12162     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12163 
12164     __ movi(q9.V16B(), 0x55);
12165     __ dci(0x5f400409);  // sshr d9, d0, #64
12166     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12167 
12168     __ movi(q9.V16B(), 0x55);
12169     __ dci(0x5f401409);  // ssra d9, d0, #64
12170     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12171 
12172     __ movi(q9.V16B(), 0x55);
12173     __ dci(0x7ee08429);  // sub d9, d1, d0
12174     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12175 
12176     __ movi(q9.V16B(), 0x55);
12177     __ dci(0x5ee03809);  // suqadd d9, d0
12178     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12179 
12180     __ movi(q9.V16B(), 0x55);
12181     __ dci(0x7e61d809);  // ucvtf d9, d0
12182     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12183 
12184     __ movi(q9.V16B(), 0x55);
12185     __ dci(0x7f40e409);  // ucvtf d9, d0, #64
12186     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12187 
12188     __ movi(q9.V16B(), 0x55);
12189     __ dci(0x7ee00c29);  // uqadd d9, d1, d0
12190     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12191 
12192     __ movi(q9.V16B(), 0x55);
12193     __ dci(0x7ee05c29);  // uqrshl d9, d1, d0
12194     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12195 
12196     __ movi(q9.V16B(), 0x55);
12197     __ dci(0x7ee04c29);  // uqshl d9, d1, d0
12198     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12199 
12200     __ movi(q9.V16B(), 0x55);
12201     __ dci(0x7f407409);  // uqshl d9, d0, #0
12202     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12203 
12204     __ movi(q9.V16B(), 0x55);
12205     __ dci(0x7ee02c29);  // uqsub d9, d1, d0
12206     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12207 
12208     __ movi(q9.V16B(), 0x55);
12209     __ dci(0x7ee05429);  // urshl d9, d1, d0
12210     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12211 
12212     __ movi(q9.V16B(), 0x55);
12213     __ dci(0x7f402409);  // urshr d9, d0, #64
12214     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12215 
12216     __ movi(q9.V16B(), 0x55);
12217     __ dci(0x7f403409);  // ursra d9, d0, #64
12218     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12219 
12220     __ movi(q9.V16B(), 0x55);
12221     __ dci(0x7ee04429);  // ushl d9, d1, d0
12222     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12223 
12224     __ movi(q9.V16B(), 0x55);
12225     __ dci(0x7f400409);  // ushr d9, d0, #64
12226     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12227 
12228     __ movi(q9.V16B(), 0x55);
12229     __ dci(0x7ee03809);  // usqadd d9, d0
12230     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12231 
12232     __ movi(q9.V16B(), 0x55);
12233     __ dci(0x7f401409);  // usra d9, d0, #64
12234     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12235   }
12236   __ Sub(x1, x1, 1);
12237   __ Cbnz(x1, &loop);
12238 
12239   __ Ins(q30.V2D(), 0, xzr);
12240 
12241   END();
12242   if (CAN_RUN()) {
12243     RUN();
12244     ASSERT_EQUAL_128(0, 0, q30);
12245   }
12246 }
12247 
12248 }  // namespace aarch64
12249 }  // namespace vixl
12250