1 // Copyright 2019, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include <cfloat>
28 #include <cmath>
29 #include <cstdio>
30 #include <cstdlib>
31 #include <cstring>
32 #include <sys/mman.h>
33
34 #include "test-runner.h"
35 #include "test-utils.h"
36
37 #include "aarch64/cpu-aarch64.h"
38 #include "aarch64/disasm-aarch64.h"
39 #include "aarch64/macro-assembler-aarch64.h"
40 #include "aarch64/simulator-aarch64.h"
41 #include "aarch64/test-utils-aarch64.h"
42 #include "test-assembler-aarch64.h"
43
44 namespace vixl {
45 namespace aarch64 {
46
TEST(load_store_b)47 TEST(load_store_b) {
48 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
49
50 uint8_t src[3] = {0x12, 0x23, 0x34};
51 uint8_t dst[3] = {0, 0, 0};
52 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
53 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
54
55 START();
56 __ Mov(x17, src_base);
57 __ Mov(x18, dst_base);
58 __ Mov(x19, src_base);
59 __ Mov(x20, dst_base);
60 __ Mov(x21, src_base);
61 __ Mov(x22, dst_base);
62 __ Ldr(b0, MemOperand(x17, sizeof(src[0])));
63 __ Str(b0, MemOperand(x18, sizeof(dst[0]), PostIndex));
64 __ Ldr(b1, MemOperand(x19, sizeof(src[0]), PostIndex));
65 __ Str(b1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
66 __ Ldr(b2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
67 __ Str(b2, MemOperand(x22, sizeof(dst[0])));
68 END();
69
70 if (CAN_RUN()) {
71 RUN();
72
73 ASSERT_EQUAL_128(0, 0x23, q0);
74 ASSERT_EQUAL_64(0x23, dst[0]);
75 ASSERT_EQUAL_128(0, 0x12, q1);
76 ASSERT_EQUAL_64(0x12, dst[2]);
77 ASSERT_EQUAL_128(0, 0x34, q2);
78 ASSERT_EQUAL_64(0x34, dst[1]);
79 ASSERT_EQUAL_64(src_base, x17);
80 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
81 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
82 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
83 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
84 ASSERT_EQUAL_64(dst_base, x22);
85 }
86 }
87
88
TEST(load_store_h)89 TEST(load_store_h) {
90 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
91
92 uint16_t src[3] = {0x1234, 0x2345, 0x3456};
93 uint16_t dst[3] = {0, 0, 0};
94 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
95 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
96
97 START();
98 __ Mov(x17, src_base);
99 __ Mov(x18, dst_base);
100 __ Mov(x19, src_base);
101 __ Mov(x20, dst_base);
102 __ Mov(x21, src_base);
103 __ Mov(x22, dst_base);
104 __ Ldr(h0, MemOperand(x17, sizeof(src[0])));
105 __ Str(h0, MemOperand(x18, sizeof(dst[0]), PostIndex));
106 __ Ldr(h1, MemOperand(x19, sizeof(src[0]), PostIndex));
107 __ Str(h1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
108 __ Ldr(h2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
109 __ Str(h2, MemOperand(x22, sizeof(dst[0])));
110 END();
111
112 if (CAN_RUN()) {
113 RUN();
114
115 ASSERT_EQUAL_128(0, 0x2345, q0);
116 ASSERT_EQUAL_64(0x2345, dst[0]);
117 ASSERT_EQUAL_128(0, 0x1234, q1);
118 ASSERT_EQUAL_64(0x1234, dst[2]);
119 ASSERT_EQUAL_128(0, 0x3456, q2);
120 ASSERT_EQUAL_64(0x3456, dst[1]);
121 ASSERT_EQUAL_64(src_base, x17);
122 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
123 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
124 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
125 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
126 ASSERT_EQUAL_64(dst_base, x22);
127 }
128 }
129
130
TEST(load_store_q)131 TEST(load_store_q) {
132 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
133
134 uint8_t src[48] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe, 0x01, 0x23,
135 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x21, 0x43, 0x65, 0x87,
136 0xa9, 0xcb, 0xed, 0x0f, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc,
137 0xde, 0xf0, 0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02,
138 0x42, 0x64, 0x86, 0xa8, 0xca, 0xec, 0x0e, 0x20};
139
140 uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
141 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
142 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
143
144 START();
145 __ Mov(x17, src_base);
146 __ Mov(x18, dst_base);
147 __ Mov(x19, src_base);
148 __ Mov(x20, dst_base);
149 __ Mov(x21, src_base);
150 __ Mov(x22, dst_base);
151 __ Ldr(q0, MemOperand(x17, 16));
152 __ Str(q0, MemOperand(x18, 16, PostIndex));
153 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
154 __ Str(q1, MemOperand(x20, 32, PreIndex));
155 __ Ldr(q2, MemOperand(x21, 32, PreIndex));
156 __ Str(q2, MemOperand(x22, 16));
157 END();
158
159 if (CAN_RUN()) {
160 RUN();
161
162 ASSERT_EQUAL_128(0xf0debc9a78563412, 0x0fedcba987654321, q0);
163 ASSERT_EQUAL_64(0x0fedcba987654321, dst[0]);
164 ASSERT_EQUAL_64(0xf0debc9a78563412, dst[1]);
165 ASSERT_EQUAL_128(0xefcdab8967452301, 0xfedcba9876543210, q1);
166 ASSERT_EQUAL_64(0xfedcba9876543210, dst[4]);
167 ASSERT_EQUAL_64(0xefcdab8967452301, dst[5]);
168 ASSERT_EQUAL_128(0x200eeccaa8866442, 0x02e0ceac8a684624, q2);
169 ASSERT_EQUAL_64(0x02e0ceac8a684624, dst[2]);
170 ASSERT_EQUAL_64(0x200eeccaa8866442, dst[3]);
171 ASSERT_EQUAL_64(src_base, x17);
172 ASSERT_EQUAL_64(dst_base + 16, x18);
173 ASSERT_EQUAL_64(src_base + 16, x19);
174 ASSERT_EQUAL_64(dst_base + 32, x20);
175 ASSERT_EQUAL_64(src_base + 32, x21);
176 ASSERT_EQUAL_64(dst_base, x22);
177 }
178 }
179
180
TEST(load_store_v_regoffset)181 TEST(load_store_v_regoffset) {
182 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
183
184 uint8_t src[64];
185 for (unsigned i = 0; i < sizeof(src); i++) {
186 src[i] = i;
187 }
188 uint8_t dst[64];
189 memset(dst, 0, sizeof(dst));
190
191 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
192 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
193
194 START();
195 __ Mov(x17, src_base + 16);
196 __ Mov(x18, 1);
197 __ Mov(w19, -1);
198 __ Mov(x20, dst_base - 1);
199
200 __ Ldr(b0, MemOperand(x17, x18));
201 __ Ldr(b1, MemOperand(x17, x19, SXTW));
202
203 __ Ldr(h2, MemOperand(x17, x18));
204 __ Ldr(h3, MemOperand(x17, x18, UXTW, 1));
205 __ Ldr(h4, MemOperand(x17, x19, SXTW, 1));
206 __ Ldr(h5, MemOperand(x17, x18, LSL, 1));
207
208 __ Ldr(s16, MemOperand(x17, x18));
209 __ Ldr(s17, MemOperand(x17, x18, UXTW, 2));
210 __ Ldr(s18, MemOperand(x17, x19, SXTW, 2));
211 __ Ldr(s19, MemOperand(x17, x18, LSL, 2));
212
213 __ Ldr(d20, MemOperand(x17, x18));
214 __ Ldr(d21, MemOperand(x17, x18, UXTW, 3));
215 __ Ldr(d22, MemOperand(x17, x19, SXTW, 3));
216 __ Ldr(d23, MemOperand(x17, x18, LSL, 3));
217
218 __ Ldr(q24, MemOperand(x17, x18));
219 __ Ldr(q25, MemOperand(x17, x18, UXTW, 4));
220 __ Ldr(q26, MemOperand(x17, x19, SXTW, 4));
221 __ Ldr(q27, MemOperand(x17, x18, LSL, 4));
222
223 // Store [bhsdq]27 to adjacent memory locations, then load again to check.
224 __ Str(b27, MemOperand(x20, x18));
225 __ Str(h27, MemOperand(x20, x18, UXTW, 1));
226 __ Add(x20, x20, 8);
227 __ Str(s27, MemOperand(x20, x19, SXTW, 2));
228 __ Sub(x20, x20, 8);
229 __ Str(d27, MemOperand(x20, x18, LSL, 3));
230 __ Add(x20, x20, 32);
231 __ Str(q27, MemOperand(x20, x19, SXTW, 4));
232
233 __ Sub(x20, x20, 32);
234 __ Ldr(q6, MemOperand(x20, x18));
235 __ Ldr(q7, MemOperand(x20, x18, LSL, 4));
236
237 END();
238
239 if (CAN_RUN()) {
240 RUN();
241
242 ASSERT_EQUAL_128(0, 0x11, q0);
243 ASSERT_EQUAL_128(0, 0x0f, q1);
244 ASSERT_EQUAL_128(0, 0x1211, q2);
245 ASSERT_EQUAL_128(0, 0x1312, q3);
246 ASSERT_EQUAL_128(0, 0x0f0e, q4);
247 ASSERT_EQUAL_128(0, 0x1312, q5);
248 ASSERT_EQUAL_128(0, 0x14131211, q16);
249 ASSERT_EQUAL_128(0, 0x17161514, q17);
250 ASSERT_EQUAL_128(0, 0x0f0e0d0c, q18);
251 ASSERT_EQUAL_128(0, 0x17161514, q19);
252 ASSERT_EQUAL_128(0, 0x1817161514131211, q20);
253 ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q21);
254 ASSERT_EQUAL_128(0, 0x0f0e0d0c0b0a0908, q22);
255 ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q23);
256 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q24);
257 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q25);
258 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q26);
259 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q27);
260 ASSERT_EQUAL_128(0x2027262524232221, 0x2023222120212020, q6);
261 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q7);
262 }
263 }
264
TEST(ldp_stp_quad)265 TEST(ldp_stp_quad) {
266 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
267
268 uint64_t src[4] = {0x0123456789abcdef,
269 0xaaaaaaaa55555555,
270 0xfedcba9876543210,
271 0x55555555aaaaaaaa};
272 uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
273 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
274 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
275
276 START();
277 __ Mov(x16, src_base);
278 __ Mov(x17, dst_base);
279 __ Ldp(q31, q0, MemOperand(x16, 4 * sizeof(src[0]), PostIndex));
280 __ Stp(q0, q31, MemOperand(x17, 2 * sizeof(dst[1]), PreIndex));
281 END();
282
283 if (CAN_RUN()) {
284 RUN();
285
286 ASSERT_EQUAL_128(0xaaaaaaaa55555555, 0x0123456789abcdef, q31);
287 ASSERT_EQUAL_128(0x55555555aaaaaaaa, 0xfedcba9876543210, q0);
288 ASSERT_EQUAL_64(0, dst[0]);
289 ASSERT_EQUAL_64(0, dst[1]);
290 ASSERT_EQUAL_64(0xfedcba9876543210, dst[2]);
291 ASSERT_EQUAL_64(0x55555555aaaaaaaa, dst[3]);
292 ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]);
293 ASSERT_EQUAL_64(0xaaaaaaaa55555555, dst[5]);
294 ASSERT_EQUAL_64(src_base + 4 * sizeof(src[0]), x16);
295 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[1]), x17);
296 }
297 }
298
TEST(neon_ld1_d)299 TEST(neon_ld1_d) {
300 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
301
302 uint8_t src[32 + 5];
303 for (unsigned i = 0; i < sizeof(src); i++) {
304 src[i] = i;
305 }
306 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
307
308 START();
309 __ Mov(x17, src_base);
310 __ Ldr(q2, MemOperand(x17)); // Initialise top 64-bits of Q register.
311 __ Ld1(v2.V8B(), MemOperand(x17));
312 __ Add(x17, x17, 1);
313 __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x17));
314 __ Add(x17, x17, 1);
315 __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x17));
316 __ Add(x17, x17, 1);
317 __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
318 __ Add(x17, x17, 1);
319 __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
320 __ Add(x17, x17, 1);
321 __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(), MemOperand(x17));
322 END();
323
324 if (CAN_RUN()) {
325 RUN();
326
327 ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
328 ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
329 ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
330 ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
331 ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
332 ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
333 ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
334 ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
335 ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
336 ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
337 ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
338 ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
339 ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
340 ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
341 ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
342 ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
343 ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
344 ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
345 }
346 }
347
348
TEST(neon_ld1_d_postindex)349 TEST(neon_ld1_d_postindex) {
350 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
351
352 uint8_t src[32 + 5];
353 for (unsigned i = 0; i < sizeof(src); i++) {
354 src[i] = i;
355 }
356 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
357
358 START();
359 __ Mov(x17, src_base);
360 __ Mov(x18, src_base + 1);
361 __ Mov(x19, src_base + 2);
362 __ Mov(x20, src_base + 3);
363 __ Mov(x21, src_base + 4);
364 __ Mov(x22, src_base + 5);
365 __ Mov(x23, 1);
366 __ Ldr(q2, MemOperand(x17)); // Initialise top 64-bits of Q register.
367 __ Ld1(v2.V8B(), MemOperand(x17, x23, PostIndex));
368 __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x18, 16, PostIndex));
369 __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x19, 24, PostIndex));
370 __ Ld1(v16.V2S(),
371 v17.V2S(),
372 v18.V2S(),
373 v19.V2S(),
374 MemOperand(x20, 32, PostIndex));
375 __ Ld1(v30.V2S(),
376 v31.V2S(),
377 v0.V2S(),
378 v1.V2S(),
379 MemOperand(x21, 32, PostIndex));
380 __ Ld1(v20.V1D(),
381 v21.V1D(),
382 v22.V1D(),
383 v23.V1D(),
384 MemOperand(x22, 32, PostIndex));
385 END();
386
387 if (CAN_RUN()) {
388 RUN();
389
390 ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
391 ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
392 ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
393 ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
394 ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
395 ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
396 ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
397 ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
398 ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
399 ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
400 ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
401 ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
402 ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
403 ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
404 ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
405 ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
406 ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
407 ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
408 ASSERT_EQUAL_64(src_base + 1, x17);
409 ASSERT_EQUAL_64(src_base + 1 + 16, x18);
410 ASSERT_EQUAL_64(src_base + 2 + 24, x19);
411 ASSERT_EQUAL_64(src_base + 3 + 32, x20);
412 ASSERT_EQUAL_64(src_base + 4 + 32, x21);
413 ASSERT_EQUAL_64(src_base + 5 + 32, x22);
414 }
415 }
416
417
TEST(neon_ld1_q)418 TEST(neon_ld1_q) {
419 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
420
421 uint8_t src[64 + 4];
422 for (unsigned i = 0; i < sizeof(src); i++) {
423 src[i] = i;
424 }
425 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
426
427 START();
428 __ Mov(x17, src_base);
429 __ Ld1(v2.V16B(), MemOperand(x17));
430 __ Add(x17, x17, 1);
431 __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x17));
432 __ Add(x17, x17, 1);
433 __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x17));
434 __ Add(x17, x17, 1);
435 __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x17));
436 __ Add(x17, x17, 1);
437 __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
438 END();
439
440 if (CAN_RUN()) {
441 RUN();
442
443 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
444 ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
445 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
446 ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
447 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
448 ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
449 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
450 ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
451 ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
452 ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
453 ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
454 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
455 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
456 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
457 }
458 }
459
460
TEST(neon_ld1_q_postindex)461 TEST(neon_ld1_q_postindex) {
462 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
463
464 uint8_t src[64 + 4];
465 for (unsigned i = 0; i < sizeof(src); i++) {
466 src[i] = i;
467 }
468 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
469
470 START();
471 __ Mov(x17, src_base);
472 __ Mov(x18, src_base + 1);
473 __ Mov(x19, src_base + 2);
474 __ Mov(x20, src_base + 3);
475 __ Mov(x21, src_base + 4);
476 __ Mov(x22, 1);
477 __ Ld1(v2.V16B(), MemOperand(x17, x22, PostIndex));
478 __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x18, 32, PostIndex));
479 __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x19, 48, PostIndex));
480 __ Ld1(v16.V4S(),
481 v17.V4S(),
482 v18.V4S(),
483 v19.V4S(),
484 MemOperand(x20, 64, PostIndex));
485 __ Ld1(v30.V2D(),
486 v31.V2D(),
487 v0.V2D(),
488 v1.V2D(),
489 MemOperand(x21, 64, PostIndex));
490 END();
491
492 if (CAN_RUN()) {
493 RUN();
494
495 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
496 ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
497 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
498 ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
499 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
500 ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
501 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
502 ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
503 ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
504 ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
505 ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
506 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
507 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
508 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
509 ASSERT_EQUAL_64(src_base + 1, x17);
510 ASSERT_EQUAL_64(src_base + 1 + 32, x18);
511 ASSERT_EQUAL_64(src_base + 2 + 48, x19);
512 ASSERT_EQUAL_64(src_base + 3 + 64, x20);
513 ASSERT_EQUAL_64(src_base + 4 + 64, x21);
514 }
515 }
516
517
TEST(neon_ld1_lane)518 TEST(neon_ld1_lane) {
519 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
520
521 uint8_t src[64];
522 for (unsigned i = 0; i < sizeof(src); i++) {
523 src[i] = i;
524 }
525 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
526
527 START();
528
529 // Test loading whole register by element.
530 __ Mov(x17, src_base);
531 for (int i = 15; i >= 0; i--) {
532 __ Ld1(v0.B(), i, MemOperand(x17));
533 __ Add(x17, x17, 1);
534 }
535
536 __ Mov(x17, src_base);
537 for (int i = 7; i >= 0; i--) {
538 __ Ld1(v1.H(), i, MemOperand(x17));
539 __ Add(x17, x17, 1);
540 }
541
542 __ Mov(x17, src_base);
543 for (int i = 3; i >= 0; i--) {
544 __ Ld1(v2.S(), i, MemOperand(x17));
545 __ Add(x17, x17, 1);
546 }
547
548 __ Mov(x17, src_base);
549 for (int i = 1; i >= 0; i--) {
550 __ Ld1(v3.D(), i, MemOperand(x17));
551 __ Add(x17, x17, 1);
552 }
553
554 // Test loading a single element into an initialised register.
555 __ Mov(x17, src_base);
556 __ Ldr(q4, MemOperand(x17));
557 __ Ld1(v4.B(), 4, MemOperand(x17));
558 __ Ldr(q5, MemOperand(x17));
559 __ Ld1(v5.H(), 3, MemOperand(x17));
560 __ Ldr(q6, MemOperand(x17));
561 __ Ld1(v6.S(), 2, MemOperand(x17));
562 __ Ldr(q7, MemOperand(x17));
563 __ Ld1(v7.D(), 1, MemOperand(x17));
564
565 END();
566
567 if (CAN_RUN()) {
568 RUN();
569
570 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
571 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q1);
572 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q2);
573 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q3);
574 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
575 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
576 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
577 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
578 }
579 }
580
TEST(neon_ld2_d)581 TEST(neon_ld2_d) {
582 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
583
584 uint8_t src[64 + 4];
585 for (unsigned i = 0; i < sizeof(src); i++) {
586 src[i] = i;
587 }
588 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
589
590 START();
591 __ Mov(x17, src_base);
592 __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17));
593 __ Add(x17, x17, 1);
594 __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x17));
595 __ Add(x17, x17, 1);
596 __ Ld2(v6.V4H(), v7.V4H(), MemOperand(x17));
597 __ Add(x17, x17, 1);
598 __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x17));
599 END();
600
601 if (CAN_RUN()) {
602 RUN();
603
604 ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
605 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
606 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
607 ASSERT_EQUAL_128(0, 0x100e0c0a08060402, q5);
608 ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q6);
609 ASSERT_EQUAL_128(0, 0x11100d0c09080504, q7);
610 ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q31);
611 ASSERT_EQUAL_128(0, 0x1211100f0a090807, q0);
612 }
613 }
614
TEST(neon_ld2_d_postindex)615 TEST(neon_ld2_d_postindex) {
616 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
617
618 uint8_t src[32 + 4];
619 for (unsigned i = 0; i < sizeof(src); i++) {
620 src[i] = i;
621 }
622 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
623
624 START();
625 __ Mov(x17, src_base);
626 __ Mov(x18, src_base + 1);
627 __ Mov(x19, src_base + 2);
628 __ Mov(x20, src_base + 3);
629 __ Mov(x21, src_base + 4);
630 __ Mov(x22, 1);
631 __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17, x22, PostIndex));
632 __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x18, 16, PostIndex));
633 __ Ld2(v5.V4H(), v6.V4H(), MemOperand(x19, 16, PostIndex));
634 __ Ld2(v16.V2S(), v17.V2S(), MemOperand(x20, 16, PostIndex));
635 __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x21, 16, PostIndex));
636 END();
637
638 if (CAN_RUN()) {
639 RUN();
640
641 ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
642 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
643 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
644 ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q5);
645 ASSERT_EQUAL_128(0, 0x11100d0c09080504, q6);
646 ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q16);
647 ASSERT_EQUAL_128(0, 0x1211100f0a090807, q17);
648 ASSERT_EQUAL_128(0, 0x0f0e0d0c07060504, q31);
649 ASSERT_EQUAL_128(0, 0x131211100b0a0908, q0);
650
651 ASSERT_EQUAL_64(src_base + 1, x17);
652 ASSERT_EQUAL_64(src_base + 1 + 16, x18);
653 ASSERT_EQUAL_64(src_base + 2 + 16, x19);
654 ASSERT_EQUAL_64(src_base + 3 + 16, x20);
655 ASSERT_EQUAL_64(src_base + 4 + 16, x21);
656 }
657 }
658
659
TEST(neon_ld2_q)660 TEST(neon_ld2_q) {
661 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
662
663 uint8_t src[64 + 4];
664 for (unsigned i = 0; i < sizeof(src); i++) {
665 src[i] = i;
666 }
667 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
668
669 START();
670 __ Mov(x17, src_base);
671 __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17));
672 __ Add(x17, x17, 1);
673 __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x17));
674 __ Add(x17, x17, 1);
675 __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x17));
676 __ Add(x17, x17, 1);
677 __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x17));
678 __ Add(x17, x17, 1);
679 __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x17));
680 END();
681
682 if (CAN_RUN()) {
683 RUN();
684
685 ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
686 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
687 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
688 ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
689 ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
690 ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
691 ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
692 ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
693 ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
694 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
695 }
696 }
697
698
TEST(neon_ld2_q_postindex)699 TEST(neon_ld2_q_postindex) {
700 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
701
702 uint8_t src[64 + 4];
703 for (unsigned i = 0; i < sizeof(src); i++) {
704 src[i] = i;
705 }
706 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
707
708 START();
709 __ Mov(x17, src_base);
710 __ Mov(x18, src_base + 1);
711 __ Mov(x19, src_base + 2);
712 __ Mov(x20, src_base + 3);
713 __ Mov(x21, src_base + 4);
714 __ Mov(x22, 1);
715 __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17, x22, PostIndex));
716 __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x18, 32, PostIndex));
717 __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x19, 32, PostIndex));
718 __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x20, 32, PostIndex));
719 __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x21, 32, PostIndex));
720 END();
721
722 if (CAN_RUN()) {
723 RUN();
724
725 ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
726 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
727 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
728 ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
729 ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
730 ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
731 ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
732 ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
733 ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
734 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
735
736
737 ASSERT_EQUAL_64(src_base + 1, x17);
738 ASSERT_EQUAL_64(src_base + 1 + 32, x18);
739 ASSERT_EQUAL_64(src_base + 2 + 32, x19);
740 ASSERT_EQUAL_64(src_base + 3 + 32, x20);
741 ASSERT_EQUAL_64(src_base + 4 + 32, x21);
742 }
743 }
744
745
TEST(neon_ld2_lane)746 TEST(neon_ld2_lane) {
747 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
748
749 uint8_t src[64];
750 for (unsigned i = 0; i < sizeof(src); i++) {
751 src[i] = i;
752 }
753 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
754
755 START();
756
757 // Test loading whole register by element.
758 __ Mov(x17, src_base);
759 for (int i = 15; i >= 0; i--) {
760 __ Ld2(v0.B(), v1.B(), i, MemOperand(x17));
761 __ Add(x17, x17, 1);
762 }
763
764 __ Mov(x17, src_base);
765 for (int i = 7; i >= 0; i--) {
766 __ Ld2(v2.H(), v3.H(), i, MemOperand(x17));
767 __ Add(x17, x17, 1);
768 }
769
770 __ Mov(x17, src_base);
771 for (int i = 3; i >= 0; i--) {
772 __ Ld2(v4.S(), v5.S(), i, MemOperand(x17));
773 __ Add(x17, x17, 1);
774 }
775
776 __ Mov(x17, src_base);
777 for (int i = 1; i >= 0; i--) {
778 __ Ld2(v6.D(), v7.D(), i, MemOperand(x17));
779 __ Add(x17, x17, 1);
780 }
781
782 // Test loading a single element into an initialised register.
783 __ Mov(x17, src_base);
784 __ Mov(x4, x17);
785 __ Ldr(q8, MemOperand(x4, 16, PostIndex));
786 __ Ldr(q9, MemOperand(x4));
787 __ Ld2(v8.B(), v9.B(), 4, MemOperand(x17));
788 __ Mov(x5, x17);
789 __ Ldr(q10, MemOperand(x5, 16, PostIndex));
790 __ Ldr(q11, MemOperand(x5));
791 __ Ld2(v10.H(), v11.H(), 3, MemOperand(x17));
792 __ Mov(x6, x17);
793 __ Ldr(q12, MemOperand(x6, 16, PostIndex));
794 __ Ldr(q13, MemOperand(x6));
795 __ Ld2(v12.S(), v13.S(), 2, MemOperand(x17));
796 __ Mov(x7, x17);
797 __ Ldr(q14, MemOperand(x7, 16, PostIndex));
798 __ Ldr(q15, MemOperand(x7));
799 __ Ld2(v14.D(), v15.D(), 1, MemOperand(x17));
800
801 END();
802
803 if (CAN_RUN()) {
804 RUN();
805
806 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
807 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
808 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q2);
809 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q3);
810 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q4);
811 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q5);
812 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q6);
813 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q7);
814 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
815 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
816 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
817 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
818 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
819 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
820 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
821 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
822 }
823 }
824
825
TEST(neon_ld2_lane_postindex)826 TEST(neon_ld2_lane_postindex) {
827 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
828
829 uint8_t src[64];
830 for (unsigned i = 0; i < sizeof(src); i++) {
831 src[i] = i;
832 }
833 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
834
835 START();
836 __ Mov(x17, src_base);
837 __ Mov(x18, src_base);
838 __ Mov(x19, src_base);
839 __ Mov(x20, src_base);
840 __ Mov(x21, src_base);
841 __ Mov(x22, src_base);
842 __ Mov(x23, src_base);
843 __ Mov(x24, src_base);
844
845 // Test loading whole register by element.
846 for (int i = 15; i >= 0; i--) {
847 __ Ld2(v0.B(), v1.B(), i, MemOperand(x17, 2, PostIndex));
848 }
849
850 for (int i = 7; i >= 0; i--) {
851 __ Ld2(v2.H(), v3.H(), i, MemOperand(x18, 4, PostIndex));
852 }
853
854 for (int i = 3; i >= 0; i--) {
855 __ Ld2(v4.S(), v5.S(), i, MemOperand(x19, 8, PostIndex));
856 }
857
858 for (int i = 1; i >= 0; i--) {
859 __ Ld2(v6.D(), v7.D(), i, MemOperand(x20, 16, PostIndex));
860 }
861
862 // Test loading a single element into an initialised register.
863 __ Mov(x25, 1);
864 __ Mov(x4, x21);
865 __ Ldr(q8, MemOperand(x4, 16, PostIndex));
866 __ Ldr(q9, MemOperand(x4));
867 __ Ld2(v8.B(), v9.B(), 4, MemOperand(x21, x25, PostIndex));
868 __ Add(x25, x25, 1);
869
870 __ Mov(x5, x22);
871 __ Ldr(q10, MemOperand(x5, 16, PostIndex));
872 __ Ldr(q11, MemOperand(x5));
873 __ Ld2(v10.H(), v11.H(), 3, MemOperand(x22, x25, PostIndex));
874 __ Add(x25, x25, 1);
875
876 __ Mov(x6, x23);
877 __ Ldr(q12, MemOperand(x6, 16, PostIndex));
878 __ Ldr(q13, MemOperand(x6));
879 __ Ld2(v12.S(), v13.S(), 2, MemOperand(x23, x25, PostIndex));
880 __ Add(x25, x25, 1);
881
882 __ Mov(x7, x24);
883 __ Ldr(q14, MemOperand(x7, 16, PostIndex));
884 __ Ldr(q15, MemOperand(x7));
885 __ Ld2(v14.D(), v15.D(), 1, MemOperand(x24, x25, PostIndex));
886
887 END();
888
889 if (CAN_RUN()) {
890 RUN();
891
892 ASSERT_EQUAL_128(0x00020406080a0c0e, 0x10121416181a1c1e, q0);
893 ASSERT_EQUAL_128(0x01030507090b0d0f, 0x11131517191b1d1f, q1);
894 ASSERT_EQUAL_128(0x0100050409080d0c, 0x1110151419181d1c, q2);
895 ASSERT_EQUAL_128(0x030207060b0a0f0e, 0x131217161b1a1f1e, q3);
896 ASSERT_EQUAL_128(0x030201000b0a0908, 0x131211101b1a1918, q4);
897 ASSERT_EQUAL_128(0x070605040f0e0d0c, 0x171615141f1e1d1c, q5);
898 ASSERT_EQUAL_128(0x0706050403020100, 0x1716151413121110, q6);
899 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1f1e1d1c1b1a1918, q7);
900 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
901 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
902 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
903 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
904 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
905 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
906 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
907 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
908
909
910 ASSERT_EQUAL_64(src_base + 32, x17);
911 ASSERT_EQUAL_64(src_base + 32, x18);
912 ASSERT_EQUAL_64(src_base + 32, x19);
913 ASSERT_EQUAL_64(src_base + 32, x20);
914 ASSERT_EQUAL_64(src_base + 1, x21);
915 ASSERT_EQUAL_64(src_base + 2, x22);
916 ASSERT_EQUAL_64(src_base + 3, x23);
917 ASSERT_EQUAL_64(src_base + 4, x24);
918 }
919 }
920
921
TEST(neon_ld2_alllanes)922 TEST(neon_ld2_alllanes) {
923 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
924
925 uint8_t src[64];
926 for (unsigned i = 0; i < sizeof(src); i++) {
927 src[i] = i;
928 }
929 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
930
931 START();
932 __ Mov(x17, src_base + 1);
933 __ Mov(x18, 1);
934 __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17));
935 __ Add(x17, x17, 2);
936 __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17));
937 __ Add(x17, x17, 1);
938 __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17));
939 __ Add(x17, x17, 1);
940 __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17));
941 __ Add(x17, x17, 4);
942 __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17));
943 __ Add(x17, x17, 1);
944 __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17));
945 __ Add(x17, x17, 8);
946 __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17));
947 END();
948
949 if (CAN_RUN()) {
950 RUN();
951
952 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
953 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
954 ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
955 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
956 ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
957 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
958 ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
959 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
960 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
961 ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
962 ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
963 ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
964 ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
965 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
966 }
967 }
968
969
TEST(neon_ld2_alllanes_postindex)970 TEST(neon_ld2_alllanes_postindex) {
971 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
972
973 uint8_t src[64];
974 for (unsigned i = 0; i < sizeof(src); i++) {
975 src[i] = i;
976 }
977 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
978
979 START();
980 __ Mov(x17, src_base + 1);
981 __ Mov(x18, 1);
982 __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17, 2, PostIndex));
983 __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17, x18, PostIndex));
984 __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17, x18, PostIndex));
985 __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17, 4, PostIndex));
986 __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17, x18, PostIndex));
987 __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17, 8, PostIndex));
988 __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17, 16, PostIndex));
989 END();
990
991 if (CAN_RUN()) {
992 RUN();
993
994 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
995 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
996 ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
997 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
998 ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
999 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
1000 ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
1001 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
1002 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
1003 ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
1004 ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
1005 ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
1006 ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
1007 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
1008 ASSERT_EQUAL_64(src_base + 34, x17);
1009 }
1010 }
1011
1012
TEST(neon_ld3_d)1013 TEST(neon_ld3_d) {
1014 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1015
1016 uint8_t src[64 + 4];
1017 for (unsigned i = 0; i < sizeof(src); i++) {
1018 src[i] = i;
1019 }
1020 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1021
1022 START();
1023 __ Mov(x17, src_base);
1024 __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17));
1025 __ Add(x17, x17, 1);
1026 __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x17));
1027 __ Add(x17, x17, 1);
1028 __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x17));
1029 __ Add(x17, x17, 1);
1030 __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
1031 END();
1032
1033 if (CAN_RUN()) {
1034 RUN();
1035
1036 ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
1037 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
1038 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
1039 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
1040 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
1041 ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
1042 ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
1043 ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
1044 ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
1045 ASSERT_EQUAL_128(0, 0x1211100f06050403, q31);
1046 ASSERT_EQUAL_128(0, 0x161514130a090807, q0);
1047 ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q1);
1048 }
1049 }
1050
1051
TEST(neon_ld3_d_postindex)1052 TEST(neon_ld3_d_postindex) {
1053 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1054
1055 uint8_t src[32 + 4];
1056 for (unsigned i = 0; i < sizeof(src); i++) {
1057 src[i] = i;
1058 }
1059 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1060
1061 START();
1062 __ Mov(x17, src_base);
1063 __ Mov(x18, src_base + 1);
1064 __ Mov(x19, src_base + 2);
1065 __ Mov(x20, src_base + 3);
1066 __ Mov(x21, src_base + 4);
1067 __ Mov(x22, 1);
1068 __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17, x22, PostIndex));
1069 __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x18, 24, PostIndex));
1070 __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x19, 24, PostIndex));
1071 __ Ld3(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x20, 24, PostIndex));
1072 __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x21, 24, PostIndex));
1073 END();
1074
1075 if (CAN_RUN()) {
1076 RUN();
1077
1078 ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
1079 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
1080 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
1081 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
1082 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
1083 ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
1084 ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
1085 ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
1086 ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
1087 ASSERT_EQUAL_128(0, 0x1211100f06050403, q11);
1088 ASSERT_EQUAL_128(0, 0x161514130a090807, q12);
1089 ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q13);
1090 ASSERT_EQUAL_128(0, 0x1312111007060504, q31);
1091 ASSERT_EQUAL_128(0, 0x171615140b0a0908, q0);
1092 ASSERT_EQUAL_128(0, 0x1b1a19180f0e0d0c, q1);
1093
1094 ASSERT_EQUAL_64(src_base + 1, x17);
1095 ASSERT_EQUAL_64(src_base + 1 + 24, x18);
1096 ASSERT_EQUAL_64(src_base + 2 + 24, x19);
1097 ASSERT_EQUAL_64(src_base + 3 + 24, x20);
1098 ASSERT_EQUAL_64(src_base + 4 + 24, x21);
1099 }
1100 }
1101
1102
TEST(neon_ld3_q)1103 TEST(neon_ld3_q) {
1104 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1105
1106 uint8_t src[64 + 4];
1107 for (unsigned i = 0; i < sizeof(src); i++) {
1108 src[i] = i;
1109 }
1110 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1111
1112 START();
1113 __ Mov(x17, src_base);
1114 __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17));
1115 __ Add(x17, x17, 1);
1116 __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
1117 __ Add(x17, x17, 1);
1118 __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x17));
1119 __ Add(x17, x17, 1);
1120 __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x17));
1121 __ Add(x17, x17, 1);
1122 __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
1123 END();
1124
1125 if (CAN_RUN()) {
1126 RUN();
1127
1128 ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
1129 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
1130 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
1131 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
1132 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
1133 ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
1134 ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
1135 ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
1136 ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
1137 ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
1138 ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
1139 ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
1140 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
1141 ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
1142 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
1143 }
1144 }
1145
1146
TEST(neon_ld3_q_postindex)1147 TEST(neon_ld3_q_postindex) {
1148 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1149
1150 uint8_t src[64 + 4];
1151 for (unsigned i = 0; i < sizeof(src); i++) {
1152 src[i] = i;
1153 }
1154 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1155
1156 START();
1157 __ Mov(x17, src_base);
1158 __ Mov(x18, src_base + 1);
1159 __ Mov(x19, src_base + 2);
1160 __ Mov(x20, src_base + 3);
1161 __ Mov(x21, src_base + 4);
1162 __ Mov(x22, 1);
1163
1164 __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17, x22, PostIndex));
1165 __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x18, 48, PostIndex));
1166 __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x19, 48, PostIndex));
1167 __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x20, 48, PostIndex));
1168 __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x21, 48, PostIndex));
1169 END();
1170
1171 if (CAN_RUN()) {
1172 RUN();
1173
1174 ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
1175 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
1176 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
1177 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
1178 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
1179 ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
1180 ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
1181 ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
1182 ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
1183 ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
1184 ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
1185 ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
1186 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
1187 ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
1188 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
1189
1190 ASSERT_EQUAL_64(src_base + 1, x17);
1191 ASSERT_EQUAL_64(src_base + 1 + 48, x18);
1192 ASSERT_EQUAL_64(src_base + 2 + 48, x19);
1193 ASSERT_EQUAL_64(src_base + 3 + 48, x20);
1194 ASSERT_EQUAL_64(src_base + 4 + 48, x21);
1195 }
1196 }
1197
1198
TEST(neon_ld3_lane)1199 TEST(neon_ld3_lane) {
1200 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1201
1202 uint8_t src[64];
1203 for (unsigned i = 0; i < sizeof(src); i++) {
1204 src[i] = i;
1205 }
1206 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1207
1208 START();
1209
1210 // Test loading whole register by element.
1211 __ Mov(x17, src_base);
1212 for (int i = 15; i >= 0; i--) {
1213 __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17));
1214 __ Add(x17, x17, 1);
1215 }
1216
1217 __ Mov(x17, src_base);
1218 for (int i = 7; i >= 0; i--) {
1219 __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x17));
1220 __ Add(x17, x17, 1);
1221 }
1222
1223 __ Mov(x17, src_base);
1224 for (int i = 3; i >= 0; i--) {
1225 __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x17));
1226 __ Add(x17, x17, 1);
1227 }
1228
1229 __ Mov(x17, src_base);
1230 for (int i = 1; i >= 0; i--) {
1231 __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x17));
1232 __ Add(x17, x17, 1);
1233 }
1234
1235 // Test loading a single element into an initialised register.
1236 __ Mov(x17, src_base);
1237 __ Mov(x4, x17);
1238 __ Ldr(q12, MemOperand(x4, 16, PostIndex));
1239 __ Ldr(q13, MemOperand(x4, 16, PostIndex));
1240 __ Ldr(q14, MemOperand(x4));
1241 __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x17));
1242 __ Mov(x5, x17);
1243 __ Ldr(q15, MemOperand(x5, 16, PostIndex));
1244 __ Ldr(q16, MemOperand(x5, 16, PostIndex));
1245 __ Ldr(q17, MemOperand(x5));
1246 __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x17));
1247 __ Mov(x6, x17);
1248 __ Ldr(q18, MemOperand(x6, 16, PostIndex));
1249 __ Ldr(q19, MemOperand(x6, 16, PostIndex));
1250 __ Ldr(q20, MemOperand(x6));
1251 __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x17));
1252 __ Mov(x7, x17);
1253 __ Ldr(q21, MemOperand(x7, 16, PostIndex));
1254 __ Ldr(q22, MemOperand(x7, 16, PostIndex));
1255 __ Ldr(q23, MemOperand(x7));
1256 __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x17));
1257
1258 END();
1259
1260 if (CAN_RUN()) {
1261 RUN();
1262
1263 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
1264 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
1265 ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
1266 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q3);
1267 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q4);
1268 ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q5);
1269 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q6);
1270 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q7);
1271 ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q8);
1272 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q9);
1273 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q10);
1274 ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q11);
1275 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
1276 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
1277 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
1278 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
1279 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
1280 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
1281 }
1282 }
1283
1284
TEST(neon_ld3_lane_postindex)1285 TEST(neon_ld3_lane_postindex) {
1286 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1287
1288 uint8_t src[64];
1289 for (unsigned i = 0; i < sizeof(src); i++) {
1290 src[i] = i;
1291 }
1292 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1293
1294 START();
1295
1296 // Test loading whole register by element.
1297 __ Mov(x17, src_base);
1298 __ Mov(x18, src_base);
1299 __ Mov(x19, src_base);
1300 __ Mov(x20, src_base);
1301 __ Mov(x21, src_base);
1302 __ Mov(x22, src_base);
1303 __ Mov(x23, src_base);
1304 __ Mov(x24, src_base);
1305 for (int i = 15; i >= 0; i--) {
1306 __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17, 3, PostIndex));
1307 }
1308
1309 for (int i = 7; i >= 0; i--) {
1310 __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x18, 6, PostIndex));
1311 }
1312
1313 for (int i = 3; i >= 0; i--) {
1314 __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x19, 12, PostIndex));
1315 }
1316
1317 for (int i = 1; i >= 0; i--) {
1318 __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x20, 24, PostIndex));
1319 }
1320
1321
1322 // Test loading a single element into an initialised register.
1323 __ Mov(x25, 1);
1324 __ Mov(x4, x21);
1325 __ Ldr(q12, MemOperand(x4, 16, PostIndex));
1326 __ Ldr(q13, MemOperand(x4, 16, PostIndex));
1327 __ Ldr(q14, MemOperand(x4));
1328 __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x21, x25, PostIndex));
1329 __ Add(x25, x25, 1);
1330
1331 __ Mov(x5, x22);
1332 __ Ldr(q15, MemOperand(x5, 16, PostIndex));
1333 __ Ldr(q16, MemOperand(x5, 16, PostIndex));
1334 __ Ldr(q17, MemOperand(x5));
1335 __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x22, x25, PostIndex));
1336 __ Add(x25, x25, 1);
1337
1338 __ Mov(x6, x23);
1339 __ Ldr(q18, MemOperand(x6, 16, PostIndex));
1340 __ Ldr(q19, MemOperand(x6, 16, PostIndex));
1341 __ Ldr(q20, MemOperand(x6));
1342 __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x23, x25, PostIndex));
1343 __ Add(x25, x25, 1);
1344
1345 __ Mov(x7, x24);
1346 __ Ldr(q21, MemOperand(x7, 16, PostIndex));
1347 __ Ldr(q22, MemOperand(x7, 16, PostIndex));
1348 __ Ldr(q23, MemOperand(x7));
1349 __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x24, x25, PostIndex));
1350
1351 END();
1352
1353 if (CAN_RUN()) {
1354 RUN();
1355
1356 ASSERT_EQUAL_128(0x000306090c0f1215, 0x181b1e2124272a2d, q0);
1357 ASSERT_EQUAL_128(0x0104070a0d101316, 0x191c1f2225282b2e, q1);
1358 ASSERT_EQUAL_128(0x0205080b0e111417, 0x1a1d202326292c2f, q2);
1359 ASSERT_EQUAL_128(0x010007060d0c1312, 0x19181f1e25242b2a, q3);
1360 ASSERT_EQUAL_128(0x030209080f0e1514, 0x1b1a212027262d2c, q4);
1361 ASSERT_EQUAL_128(0x05040b0a11101716, 0x1d1c232229282f2e, q5);
1362 ASSERT_EQUAL_128(0x030201000f0e0d0c, 0x1b1a191827262524, q6);
1363 ASSERT_EQUAL_128(0x0706050413121110, 0x1f1e1d1c2b2a2928, q7);
1364 ASSERT_EQUAL_128(0x0b0a090817161514, 0x232221202f2e2d2c, q8);
1365 ASSERT_EQUAL_128(0x0706050403020100, 0x1f1e1d1c1b1a1918, q9);
1366 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2726252423222120, q10);
1367 ASSERT_EQUAL_128(0x1716151413121110, 0x2f2e2d2c2b2a2928, q11);
1368 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
1369 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
1370 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
1371 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
1372 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
1373 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
1374 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q18);
1375 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q19);
1376 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q20);
1377 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q21);
1378 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q22);
1379 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q23);
1380
1381 ASSERT_EQUAL_64(src_base + 48, x17);
1382 ASSERT_EQUAL_64(src_base + 48, x18);
1383 ASSERT_EQUAL_64(src_base + 48, x19);
1384 ASSERT_EQUAL_64(src_base + 48, x20);
1385 ASSERT_EQUAL_64(src_base + 1, x21);
1386 ASSERT_EQUAL_64(src_base + 2, x22);
1387 ASSERT_EQUAL_64(src_base + 3, x23);
1388 ASSERT_EQUAL_64(src_base + 4, x24);
1389 }
1390 }
1391
1392
TEST(neon_ld3_alllanes)1393 TEST(neon_ld3_alllanes) {
1394 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1395
1396 uint8_t src[64];
1397 for (unsigned i = 0; i < sizeof(src); i++) {
1398 src[i] = i;
1399 }
1400 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1401
1402 START();
1403 __ Mov(x17, src_base + 1);
1404 __ Mov(x18, 1);
1405 __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17));
1406 __ Add(x17, x17, 3);
1407 __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
1408 __ Add(x17, x17, 1);
1409 __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17));
1410 __ Add(x17, x17, 1);
1411 __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17));
1412 __ Add(x17, x17, 6);
1413 __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17));
1414 __ Add(x17, x17, 1);
1415 __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
1416 __ Add(x17, x17, 12);
1417 __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17));
1418 END();
1419
1420 if (CAN_RUN()) {
1421 RUN();
1422
1423 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
1424 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
1425 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
1426 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
1427 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
1428 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
1429 ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
1430 ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
1431 ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
1432 ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
1433 ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
1434 ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
1435 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
1436 ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
1437 ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
1438 ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
1439 ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
1440 ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
1441 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
1442 ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
1443 ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
1444 }
1445 }
1446
1447
TEST(neon_ld3_alllanes_postindex)1448 TEST(neon_ld3_alllanes_postindex) {
1449 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1450
1451 uint8_t src[64];
1452 for (unsigned i = 0; i < sizeof(src); i++) {
1453 src[i] = i;
1454 }
1455 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1456 __ Mov(x17, src_base + 1);
1457 __ Mov(x18, 1);
1458
1459 START();
1460 __ Mov(x17, src_base + 1);
1461 __ Mov(x18, 1);
1462 __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17, 3, PostIndex));
1463 __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17, x18, PostIndex));
1464 __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17, x18, PostIndex));
1465 __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17, 6, PostIndex));
1466 __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17, x18, PostIndex));
1467 __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17, 12, PostIndex));
1468 __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17, 24, PostIndex));
1469 END();
1470
1471 if (CAN_RUN()) {
1472 RUN();
1473
1474 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
1475 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
1476 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
1477 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
1478 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
1479 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
1480 ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
1481 ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
1482 ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
1483 ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
1484 ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
1485 ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
1486 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
1487 ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
1488 ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
1489 ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
1490 ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
1491 ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
1492 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
1493 ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
1494 ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
1495 }
1496 }
1497
1498
TEST(neon_ld4_d)1499 TEST(neon_ld4_d) {
1500 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1501
1502 uint8_t src[64 + 4];
1503 for (unsigned i = 0; i < sizeof(src); i++) {
1504 src[i] = i;
1505 }
1506 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1507
1508 START();
1509 __ Mov(x17, src_base);
1510 __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(), MemOperand(x17));
1511 __ Add(x17, x17, 1);
1512 __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(), MemOperand(x17));
1513 __ Add(x17, x17, 1);
1514 __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x17));
1515 __ Add(x17, x17, 1);
1516 __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
1517 END();
1518
1519 if (CAN_RUN()) {
1520 RUN();
1521
1522 ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
1523 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
1524 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
1525 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
1526 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
1527 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
1528 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
1529 ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
1530 ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
1531 ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
1532 ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
1533 ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
1534 ASSERT_EQUAL_128(0, 0x1615141306050403, q30);
1535 ASSERT_EQUAL_128(0, 0x1a1918170a090807, q31);
1536 ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q0);
1537 ASSERT_EQUAL_128(0, 0x2221201f1211100f, q1);
1538 }
1539 }
1540
1541
TEST(neon_ld4_d_postindex)1542 TEST(neon_ld4_d_postindex) {
1543 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1544
1545 uint8_t src[32 + 4];
1546 for (unsigned i = 0; i < sizeof(src); i++) {
1547 src[i] = i;
1548 }
1549 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1550
1551 START();
1552 __ Mov(x17, src_base);
1553 __ Mov(x18, src_base + 1);
1554 __ Mov(x19, src_base + 2);
1555 __ Mov(x20, src_base + 3);
1556 __ Mov(x21, src_base + 4);
1557 __ Mov(x22, 1);
1558 __ Ld4(v2.V8B(),
1559 v3.V8B(),
1560 v4.V8B(),
1561 v5.V8B(),
1562 MemOperand(x17, x22, PostIndex));
1563 __ Ld4(v6.V8B(),
1564 v7.V8B(),
1565 v8.V8B(),
1566 v9.V8B(),
1567 MemOperand(x18, 32, PostIndex));
1568 __ Ld4(v10.V4H(),
1569 v11.V4H(),
1570 v12.V4H(),
1571 v13.V4H(),
1572 MemOperand(x19, 32, PostIndex));
1573 __ Ld4(v14.V2S(),
1574 v15.V2S(),
1575 v16.V2S(),
1576 v17.V2S(),
1577 MemOperand(x20, 32, PostIndex));
1578 __ Ld4(v30.V2S(),
1579 v31.V2S(),
1580 v0.V2S(),
1581 v1.V2S(),
1582 MemOperand(x21, 32, PostIndex));
1583 END();
1584
1585 if (CAN_RUN()) {
1586 RUN();
1587
1588 ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
1589 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
1590 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
1591 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
1592 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
1593 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
1594 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
1595 ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
1596 ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
1597 ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
1598 ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
1599 ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
1600 ASSERT_EQUAL_128(0, 0x1615141306050403, q14);
1601 ASSERT_EQUAL_128(0, 0x1a1918170a090807, q15);
1602 ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q16);
1603 ASSERT_EQUAL_128(0, 0x2221201f1211100f, q17);
1604 ASSERT_EQUAL_128(0, 0x1716151407060504, q30);
1605 ASSERT_EQUAL_128(0, 0x1b1a19180b0a0908, q31);
1606 ASSERT_EQUAL_128(0, 0x1f1e1d1c0f0e0d0c, q0);
1607 ASSERT_EQUAL_128(0, 0x2322212013121110, q1);
1608
1609
1610 ASSERT_EQUAL_64(src_base + 1, x17);
1611 ASSERT_EQUAL_64(src_base + 1 + 32, x18);
1612 ASSERT_EQUAL_64(src_base + 2 + 32, x19);
1613 ASSERT_EQUAL_64(src_base + 3 + 32, x20);
1614 ASSERT_EQUAL_64(src_base + 4 + 32, x21);
1615 }
1616 }
1617
1618
TEST(neon_ld4_q)1619 TEST(neon_ld4_q) {
1620 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1621
1622 uint8_t src[64 + 4];
1623 for (unsigned i = 0; i < sizeof(src); i++) {
1624 src[i] = i;
1625 }
1626 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1627
1628 START();
1629 __ Mov(x17, src_base);
1630 __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
1631 __ Add(x17, x17, 1);
1632 __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(), MemOperand(x17));
1633 __ Add(x17, x17, 1);
1634 __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), MemOperand(x17));
1635 __ Add(x17, x17, 1);
1636 __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
1637 __ Add(x17, x17, 1);
1638 __ Ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x17));
1639 END();
1640
1641 if (CAN_RUN()) {
1642 RUN();
1643
1644 ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
1645 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
1646 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
1647 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
1648 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
1649 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
1650 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
1651 ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
1652 ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
1653 ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
1654 ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
1655 ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
1656 ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
1657 ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
1658 ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
1659 ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
1660 ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q18);
1661 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q19);
1662 ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q20);
1663 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q21);
1664 }
1665 }
1666
1667
TEST(neon_ld4_q_postindex)1668 TEST(neon_ld4_q_postindex) {
1669 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1670
1671 uint8_t src[64 + 4];
1672 for (unsigned i = 0; i < sizeof(src); i++) {
1673 src[i] = i;
1674 }
1675 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1676
1677 START();
1678 __ Mov(x17, src_base);
1679 __ Mov(x18, src_base + 1);
1680 __ Mov(x19, src_base + 2);
1681 __ Mov(x20, src_base + 3);
1682 __ Mov(x21, src_base + 4);
1683 __ Mov(x22, 1);
1684
1685 __ Ld4(v2.V16B(),
1686 v3.V16B(),
1687 v4.V16B(),
1688 v5.V16B(),
1689 MemOperand(x17, x22, PostIndex));
1690 __ Ld4(v6.V16B(),
1691 v7.V16B(),
1692 v8.V16B(),
1693 v9.V16B(),
1694 MemOperand(x18, 64, PostIndex));
1695 __ Ld4(v10.V8H(),
1696 v11.V8H(),
1697 v12.V8H(),
1698 v13.V8H(),
1699 MemOperand(x19, 64, PostIndex));
1700 __ Ld4(v14.V4S(),
1701 v15.V4S(),
1702 v16.V4S(),
1703 v17.V4S(),
1704 MemOperand(x20, 64, PostIndex));
1705 __ Ld4(v30.V2D(),
1706 v31.V2D(),
1707 v0.V2D(),
1708 v1.V2D(),
1709 MemOperand(x21, 64, PostIndex));
1710 END();
1711
1712 if (CAN_RUN()) {
1713 RUN();
1714
1715 ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
1716 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
1717 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
1718 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
1719 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
1720 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
1721 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
1722 ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
1723 ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
1724 ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
1725 ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
1726 ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
1727 ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
1728 ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
1729 ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
1730 ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
1731 ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q30);
1732 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q31);
1733 ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q0);
1734 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q1);
1735
1736
1737 ASSERT_EQUAL_64(src_base + 1, x17);
1738 ASSERT_EQUAL_64(src_base + 1 + 64, x18);
1739 ASSERT_EQUAL_64(src_base + 2 + 64, x19);
1740 ASSERT_EQUAL_64(src_base + 3 + 64, x20);
1741 ASSERT_EQUAL_64(src_base + 4 + 64, x21);
1742 }
1743 }
1744
1745
TEST(neon_ld4_lane)1746 TEST(neon_ld4_lane) {
1747 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1748
1749 uint8_t src[64];
1750 for (unsigned i = 0; i < sizeof(src); i++) {
1751 src[i] = i;
1752 }
1753 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1754
1755 START();
1756
1757 // Test loading whole register by element.
1758 __ Mov(x17, src_base);
1759 for (int i = 15; i >= 0; i--) {
1760 __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17));
1761 __ Add(x17, x17, 1);
1762 }
1763
1764 __ Mov(x17, src_base);
1765 for (int i = 7; i >= 0; i--) {
1766 __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x17));
1767 __ Add(x17, x17, 1);
1768 }
1769
1770 __ Mov(x17, src_base);
1771 for (int i = 3; i >= 0; i--) {
1772 __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x17));
1773 __ Add(x17, x17, 1);
1774 }
1775
1776 __ Mov(x17, src_base);
1777 for (int i = 1; i >= 0; i--) {
1778 __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i, MemOperand(x17));
1779 __ Add(x17, x17, 1);
1780 }
1781
1782 // Test loading a single element into an initialised register.
1783 __ Mov(x17, src_base);
1784 __ Mov(x4, x17);
1785 __ Ldr(q16, MemOperand(x4, 16, PostIndex));
1786 __ Ldr(q17, MemOperand(x4, 16, PostIndex));
1787 __ Ldr(q18, MemOperand(x4, 16, PostIndex));
1788 __ Ldr(q19, MemOperand(x4));
1789 __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4, MemOperand(x17));
1790
1791 __ Mov(x5, x17);
1792 __ Ldr(q20, MemOperand(x5, 16, PostIndex));
1793 __ Ldr(q21, MemOperand(x5, 16, PostIndex));
1794 __ Ldr(q22, MemOperand(x5, 16, PostIndex));
1795 __ Ldr(q23, MemOperand(x5));
1796 __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3, MemOperand(x17));
1797
1798 __ Mov(x6, x17);
1799 __ Ldr(q24, MemOperand(x6, 16, PostIndex));
1800 __ Ldr(q25, MemOperand(x6, 16, PostIndex));
1801 __ Ldr(q26, MemOperand(x6, 16, PostIndex));
1802 __ Ldr(q27, MemOperand(x6));
1803 __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2, MemOperand(x17));
1804
1805 __ Mov(x7, x17);
1806 __ Ldr(q28, MemOperand(x7, 16, PostIndex));
1807 __ Ldr(q29, MemOperand(x7, 16, PostIndex));
1808 __ Ldr(q30, MemOperand(x7, 16, PostIndex));
1809 __ Ldr(q31, MemOperand(x7));
1810 __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x17));
1811
1812 END();
1813
1814 if (CAN_RUN()) {
1815 RUN();
1816
1817 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
1818 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
1819 ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
1820 ASSERT_EQUAL_128(0x030405060708090a, 0x0b0c0d0e0f101112, q3);
1821 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q4);
1822 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q5);
1823 ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q6);
1824 ASSERT_EQUAL_128(0x0706080709080a09, 0x0b0a0c0b0d0c0e0d, q7);
1825 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q8);
1826 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q9);
1827 ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q10);
1828 ASSERT_EQUAL_128(0x0f0e0d0c100f0e0d, 0x11100f0e1211100f, q11);
1829 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q12);
1830 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q13);
1831 ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q14);
1832 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x201f1e1d1c1b1a19, q15);
1833 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
1834 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
1835 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
1836 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
1837 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
1838 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
1839 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
1840 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
1841 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
1842 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
1843 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
1844 ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
1845 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
1846 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
1847 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
1848 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
1849 }
1850 }
1851
1852
TEST(neon_ld4_lane_postindex)1853 TEST(neon_ld4_lane_postindex) {
1854 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1855
1856 uint8_t src[64];
1857 for (unsigned i = 0; i < sizeof(src); i++) {
1858 src[i] = i;
1859 }
1860 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1861
1862 START();
1863
1864 // Test loading whole register by element.
1865 __ Mov(x17, src_base);
1866 for (int i = 15; i >= 0; i--) {
1867 __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17, 4, PostIndex));
1868 }
1869
1870 __ Mov(x18, src_base);
1871 for (int i = 7; i >= 0; i--) {
1872 __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x18, 8, PostIndex));
1873 }
1874
1875 __ Mov(x19, src_base);
1876 for (int i = 3; i >= 0; i--) {
1877 __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x19, 16, PostIndex));
1878 }
1879
1880 __ Mov(x20, src_base);
1881 for (int i = 1; i >= 0; i--) {
1882 __ Ld4(v12.D(),
1883 v13.D(),
1884 v14.D(),
1885 v15.D(),
1886 i,
1887 MemOperand(x20, 32, PostIndex));
1888 }
1889
1890 // Test loading a single element into an initialised register.
1891 __ Mov(x25, 1);
1892 __ Mov(x21, src_base);
1893 __ Mov(x22, src_base);
1894 __ Mov(x23, src_base);
1895 __ Mov(x24, src_base);
1896
1897 __ Mov(x4, x21);
1898 __ Ldr(q16, MemOperand(x4, 16, PostIndex));
1899 __ Ldr(q17, MemOperand(x4, 16, PostIndex));
1900 __ Ldr(q18, MemOperand(x4, 16, PostIndex));
1901 __ Ldr(q19, MemOperand(x4));
1902 __ Ld4(v16.B(),
1903 v17.B(),
1904 v18.B(),
1905 v19.B(),
1906 4,
1907 MemOperand(x21, x25, PostIndex));
1908 __ Add(x25, x25, 1);
1909
1910 __ Mov(x5, x22);
1911 __ Ldr(q20, MemOperand(x5, 16, PostIndex));
1912 __ Ldr(q21, MemOperand(x5, 16, PostIndex));
1913 __ Ldr(q22, MemOperand(x5, 16, PostIndex));
1914 __ Ldr(q23, MemOperand(x5));
1915 __ Ld4(v20.H(),
1916 v21.H(),
1917 v22.H(),
1918 v23.H(),
1919 3,
1920 MemOperand(x22, x25, PostIndex));
1921 __ Add(x25, x25, 1);
1922
1923 __ Mov(x6, x23);
1924 __ Ldr(q24, MemOperand(x6, 16, PostIndex));
1925 __ Ldr(q25, MemOperand(x6, 16, PostIndex));
1926 __ Ldr(q26, MemOperand(x6, 16, PostIndex));
1927 __ Ldr(q27, MemOperand(x6));
1928 __ Ld4(v24.S(),
1929 v25.S(),
1930 v26.S(),
1931 v27.S(),
1932 2,
1933 MemOperand(x23, x25, PostIndex));
1934 __ Add(x25, x25, 1);
1935
1936 __ Mov(x7, x24);
1937 __ Ldr(q28, MemOperand(x7, 16, PostIndex));
1938 __ Ldr(q29, MemOperand(x7, 16, PostIndex));
1939 __ Ldr(q30, MemOperand(x7, 16, PostIndex));
1940 __ Ldr(q31, MemOperand(x7));
1941 __ Ld4(v28.D(),
1942 v29.D(),
1943 v30.D(),
1944 v31.D(),
1945 1,
1946 MemOperand(x24, x25, PostIndex));
1947
1948 END();
1949
1950 if (CAN_RUN()) {
1951 RUN();
1952
1953 ASSERT_EQUAL_128(0x0004080c1014181c, 0x2024282c3034383c, q0);
1954 ASSERT_EQUAL_128(0x0105090d1115191d, 0x2125292d3135393d, q1);
1955 ASSERT_EQUAL_128(0x02060a0e12161a1e, 0x22262a2e32363a3e, q2);
1956 ASSERT_EQUAL_128(0x03070b0f13171b1f, 0x23272b2f33373b3f, q3);
1957 ASSERT_EQUAL_128(0x0100090811101918, 0x2120292831303938, q4);
1958 ASSERT_EQUAL_128(0x03020b0a13121b1a, 0x23222b2a33323b3a, q5);
1959 ASSERT_EQUAL_128(0x05040d0c15141d1c, 0x25242d2c35343d3c, q6);
1960 ASSERT_EQUAL_128(0x07060f0e17161f1e, 0x27262f2e37363f3e, q7);
1961 ASSERT_EQUAL_128(0x0302010013121110, 0x2322212033323130, q8);
1962 ASSERT_EQUAL_128(0x0706050417161514, 0x2726252437363534, q9);
1963 ASSERT_EQUAL_128(0x0b0a09081b1a1918, 0x2b2a29283b3a3938, q10);
1964 ASSERT_EQUAL_128(0x0f0e0d0c1f1e1d1c, 0x2f2e2d2c3f3e3d3c, q11);
1965 ASSERT_EQUAL_128(0x0706050403020100, 0x2726252423222120, q12);
1966 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2f2e2d2c2b2a2928, q13);
1967 ASSERT_EQUAL_128(0x1716151413121110, 0x3736353433323130, q14);
1968 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3f3e3d3c3b3a3938, q15);
1969 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
1970 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
1971 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
1972 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
1973 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
1974 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
1975 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
1976 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
1977 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
1978 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
1979 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
1980 ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
1981 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
1982 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
1983 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
1984 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
1985
1986 ASSERT_EQUAL_64(src_base + 64, x17);
1987 ASSERT_EQUAL_64(src_base + 64, x18);
1988 ASSERT_EQUAL_64(src_base + 64, x19);
1989 ASSERT_EQUAL_64(src_base + 64, x20);
1990 ASSERT_EQUAL_64(src_base + 1, x21);
1991 ASSERT_EQUAL_64(src_base + 2, x22);
1992 ASSERT_EQUAL_64(src_base + 3, x23);
1993 ASSERT_EQUAL_64(src_base + 4, x24);
1994 }
1995 }
1996
1997
TEST(neon_ld4_alllanes)1998 TEST(neon_ld4_alllanes) {
1999 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2000
2001 uint8_t src[64];
2002 for (unsigned i = 0; i < sizeof(src); i++) {
2003 src[i] = i;
2004 }
2005 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2006
2007 START();
2008 __ Mov(x17, src_base + 1);
2009 __ Mov(x18, 1);
2010 __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x17));
2011 __ Add(x17, x17, 4);
2012 __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
2013 __ Add(x17, x17, 1);
2014 __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(), MemOperand(x17));
2015 __ Add(x17, x17, 1);
2016 __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x17));
2017 __ Add(x17, x17, 8);
2018 __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
2019 __ Add(x17, x17, 1);
2020 __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(), MemOperand(x17));
2021 __ Add(x17, x17, 16);
2022 __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(), MemOperand(x17));
2023
2024
2025 END();
2026
2027 if (CAN_RUN()) {
2028 RUN();
2029
2030 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
2031 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
2032 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
2033 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
2034 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
2035 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
2036 ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
2037 ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
2038 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
2039 ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
2040 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
2041 ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
2042 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
2043 ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
2044 ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
2045 ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
2046 ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
2047 ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
2048 ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
2049 ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
2050 ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
2051 ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
2052 ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
2053 ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
2054 ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
2055 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
2056 ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
2057 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
2058 }
2059 }
2060
2061
TEST(neon_ld4_alllanes_postindex)2062 TEST(neon_ld4_alllanes_postindex) {
2063 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2064
2065 uint8_t src[64];
2066 for (unsigned i = 0; i < sizeof(src); i++) {
2067 src[i] = i;
2068 }
2069 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2070 __ Mov(x17, src_base + 1);
2071 __ Mov(x18, 1);
2072
2073 START();
2074 __ Mov(x17, src_base + 1);
2075 __ Mov(x18, 1);
2076 __ Ld4r(v0.V8B(),
2077 v1.V8B(),
2078 v2.V8B(),
2079 v3.V8B(),
2080 MemOperand(x17, 4, PostIndex));
2081 __ Ld4r(v4.V16B(),
2082 v5.V16B(),
2083 v6.V16B(),
2084 v7.V16B(),
2085 MemOperand(x17, x18, PostIndex));
2086 __ Ld4r(v8.V4H(),
2087 v9.V4H(),
2088 v10.V4H(),
2089 v11.V4H(),
2090 MemOperand(x17, x18, PostIndex));
2091 __ Ld4r(v12.V8H(),
2092 v13.V8H(),
2093 v14.V8H(),
2094 v15.V8H(),
2095 MemOperand(x17, 8, PostIndex));
2096 __ Ld4r(v16.V2S(),
2097 v17.V2S(),
2098 v18.V2S(),
2099 v19.V2S(),
2100 MemOperand(x17, x18, PostIndex));
2101 __ Ld4r(v20.V4S(),
2102 v21.V4S(),
2103 v22.V4S(),
2104 v23.V4S(),
2105 MemOperand(x17, 16, PostIndex));
2106 __ Ld4r(v24.V2D(),
2107 v25.V2D(),
2108 v26.V2D(),
2109 v27.V2D(),
2110 MemOperand(x17, 32, PostIndex));
2111 END();
2112
2113 if (CAN_RUN()) {
2114 RUN();
2115
2116 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
2117 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
2118 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
2119 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
2120 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
2121 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
2122 ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
2123 ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
2124 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
2125 ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
2126 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
2127 ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
2128 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
2129 ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
2130 ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
2131 ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
2132 ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
2133 ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
2134 ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
2135 ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
2136 ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
2137 ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
2138 ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
2139 ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
2140 ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
2141 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
2142 ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
2143 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
2144 ASSERT_EQUAL_64(src_base + 64, x17);
2145 }
2146 }
2147
2148
TEST(neon_st1_lane)2149 TEST(neon_st1_lane) {
2150 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2151
2152 uint8_t src[64];
2153 for (unsigned i = 0; i < sizeof(src); i++) {
2154 src[i] = i;
2155 }
2156 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2157
2158 START();
2159 __ Mov(x17, src_base);
2160 __ Mov(x18, -16);
2161 __ Ldr(q0, MemOperand(x17));
2162
2163 for (int i = 15; i >= 0; i--) {
2164 __ St1(v0.B(), i, MemOperand(x17));
2165 __ Add(x17, x17, 1);
2166 }
2167 __ Ldr(q1, MemOperand(x17, x18));
2168
2169 for (int i = 7; i >= 0; i--) {
2170 __ St1(v0.H(), i, MemOperand(x17));
2171 __ Add(x17, x17, 2);
2172 }
2173 __ Ldr(q2, MemOperand(x17, x18));
2174
2175 for (int i = 3; i >= 0; i--) {
2176 __ St1(v0.S(), i, MemOperand(x17));
2177 __ Add(x17, x17, 4);
2178 }
2179 __ Ldr(q3, MemOperand(x17, x18));
2180
2181 for (int i = 1; i >= 0; i--) {
2182 __ St1(v0.D(), i, MemOperand(x17));
2183 __ Add(x17, x17, 8);
2184 }
2185 __ Ldr(q4, MemOperand(x17, x18));
2186
2187 END();
2188
2189 if (CAN_RUN()) {
2190 RUN();
2191
2192 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
2193 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
2194 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
2195 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
2196 }
2197 }
2198
2199
TEST(neon_st2_lane)2200 TEST(neon_st2_lane) {
2201 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2202
2203 // Struct size * addressing modes * element sizes * vector size.
2204 uint8_t dst[2 * 2 * 4 * 16];
2205 memset(dst, 0, sizeof(dst));
2206 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2207
2208 START();
2209 __ Mov(x17, dst_base);
2210 __ Mov(x18, dst_base);
2211 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2212 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2213
2214 // Test B stores with and without post index.
2215 for (int i = 15; i >= 0; i--) {
2216 __ St2(v0.B(), v1.B(), i, MemOperand(x18));
2217 __ Add(x18, x18, 2);
2218 }
2219 for (int i = 15; i >= 0; i--) {
2220 __ St2(v0.B(), v1.B(), i, MemOperand(x18, 2, PostIndex));
2221 }
2222 __ Ldr(q2, MemOperand(x17, 0 * 16));
2223 __ Ldr(q3, MemOperand(x17, 1 * 16));
2224 __ Ldr(q4, MemOperand(x17, 2 * 16));
2225 __ Ldr(q5, MemOperand(x17, 3 * 16));
2226
2227 // Test H stores with and without post index.
2228 __ Mov(x0, 4);
2229 for (int i = 7; i >= 0; i--) {
2230 __ St2(v0.H(), v1.H(), i, MemOperand(x18));
2231 __ Add(x18, x18, 4);
2232 }
2233 for (int i = 7; i >= 0; i--) {
2234 __ St2(v0.H(), v1.H(), i, MemOperand(x18, x0, PostIndex));
2235 }
2236 __ Ldr(q6, MemOperand(x17, 4 * 16));
2237 __ Ldr(q7, MemOperand(x17, 5 * 16));
2238 __ Ldr(q16, MemOperand(x17, 6 * 16));
2239 __ Ldr(q17, MemOperand(x17, 7 * 16));
2240
2241 // Test S stores with and without post index.
2242 for (int i = 3; i >= 0; i--) {
2243 __ St2(v0.S(), v1.S(), i, MemOperand(x18));
2244 __ Add(x18, x18, 8);
2245 }
2246 for (int i = 3; i >= 0; i--) {
2247 __ St2(v0.S(), v1.S(), i, MemOperand(x18, 8, PostIndex));
2248 }
2249 __ Ldr(q18, MemOperand(x17, 8 * 16));
2250 __ Ldr(q19, MemOperand(x17, 9 * 16));
2251 __ Ldr(q20, MemOperand(x17, 10 * 16));
2252 __ Ldr(q21, MemOperand(x17, 11 * 16));
2253
2254 // Test D stores with and without post index.
2255 __ Mov(x0, 16);
2256 __ St2(v0.D(), v1.D(), 1, MemOperand(x18));
2257 __ Add(x18, x18, 16);
2258 __ St2(v0.D(), v1.D(), 0, MemOperand(x18, 16, PostIndex));
2259 __ St2(v0.D(), v1.D(), 1, MemOperand(x18, x0, PostIndex));
2260 __ St2(v0.D(), v1.D(), 0, MemOperand(x18, x0, PostIndex));
2261 __ Ldr(q22, MemOperand(x17, 12 * 16));
2262 __ Ldr(q23, MemOperand(x17, 13 * 16));
2263 __ Ldr(q24, MemOperand(x17, 14 * 16));
2264 __ Ldr(q25, MemOperand(x17, 15 * 16));
2265 END();
2266
2267 if (CAN_RUN()) {
2268 RUN();
2269
2270 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q2);
2271 ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q3);
2272 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q4);
2273 ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q5);
2274
2275 ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q6);
2276 ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q7);
2277 ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q16);
2278 ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q17);
2279
2280 ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q18);
2281 ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q19);
2282 ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q20);
2283 ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q21);
2284
2285 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
2286 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
2287 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
2288 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
2289 }
2290 }
2291
2292
TEST(neon_st3_lane)2293 TEST(neon_st3_lane) {
2294 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2295
2296 // Struct size * addressing modes * element sizes * vector size.
2297 uint8_t dst[3 * 2 * 4 * 16];
2298 memset(dst, 0, sizeof(dst));
2299 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2300
2301 START();
2302 __ Mov(x17, dst_base);
2303 __ Mov(x18, dst_base);
2304 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2305 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2306 __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2307
2308 // Test B stores with and without post index.
2309 for (int i = 15; i >= 0; i--) {
2310 __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18));
2311 __ Add(x18, x18, 3);
2312 }
2313 for (int i = 15; i >= 0; i--) {
2314 __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18, 3, PostIndex));
2315 }
2316 __ Ldr(q3, MemOperand(x17, 0 * 16));
2317 __ Ldr(q4, MemOperand(x17, 1 * 16));
2318 __ Ldr(q5, MemOperand(x17, 2 * 16));
2319 __ Ldr(q6, MemOperand(x17, 3 * 16));
2320 __ Ldr(q7, MemOperand(x17, 4 * 16));
2321 __ Ldr(q16, MemOperand(x17, 5 * 16));
2322
2323 // Test H stores with and without post index.
2324 __ Mov(x0, 6);
2325 for (int i = 7; i >= 0; i--) {
2326 __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18));
2327 __ Add(x18, x18, 6);
2328 }
2329 for (int i = 7; i >= 0; i--) {
2330 __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18, x0, PostIndex));
2331 }
2332 __ Ldr(q17, MemOperand(x17, 6 * 16));
2333 __ Ldr(q18, MemOperand(x17, 7 * 16));
2334 __ Ldr(q19, MemOperand(x17, 8 * 16));
2335 __ Ldr(q20, MemOperand(x17, 9 * 16));
2336 __ Ldr(q21, MemOperand(x17, 10 * 16));
2337 __ Ldr(q22, MemOperand(x17, 11 * 16));
2338
2339 // Test S stores with and without post index.
2340 for (int i = 3; i >= 0; i--) {
2341 __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18));
2342 __ Add(x18, x18, 12);
2343 }
2344 for (int i = 3; i >= 0; i--) {
2345 __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18, 12, PostIndex));
2346 }
2347 __ Ldr(q23, MemOperand(x17, 12 * 16));
2348 __ Ldr(q24, MemOperand(x17, 13 * 16));
2349 __ Ldr(q25, MemOperand(x17, 14 * 16));
2350 __ Ldr(q26, MemOperand(x17, 15 * 16));
2351 __ Ldr(q27, MemOperand(x17, 16 * 16));
2352 __ Ldr(q28, MemOperand(x17, 17 * 16));
2353
2354 // Test D stores with and without post index.
2355 __ Mov(x0, 24);
2356 __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18));
2357 __ Add(x18, x18, 24);
2358 __ St3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x18, 24, PostIndex));
2359 __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18, x0, PostIndex));
2360 __ Ldr(q29, MemOperand(x17, 18 * 16));
2361 __ Ldr(q30, MemOperand(x17, 19 * 16));
2362 __ Ldr(q31, MemOperand(x17, 20 * 16));
2363 END();
2364
2365 if (CAN_RUN()) {
2366 RUN();
2367
2368 ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q3);
2369 ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q4);
2370 ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q5);
2371 ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q6);
2372 ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q7);
2373 ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q16);
2374
2375 ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q17);
2376 ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q18);
2377 ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q19);
2378 ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q20);
2379 ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q21);
2380 ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q22);
2381
2382 ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q23);
2383 ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q24);
2384 ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q25);
2385 ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q26);
2386 ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q27);
2387 ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q28);
2388 }
2389 }
2390
2391
TEST(neon_st4_lane)2392 TEST(neon_st4_lane) {
2393 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2394
2395 // Struct size * element sizes * vector size.
2396 uint8_t dst[4 * 4 * 16];
2397 memset(dst, 0, sizeof(dst));
2398 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2399
2400 START();
2401 __ Mov(x17, dst_base);
2402 __ Mov(x18, dst_base);
2403 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2404 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2405 __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2406 __ Movi(v3.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2407
2408 // Test B stores without post index.
2409 for (int i = 15; i >= 0; i--) {
2410 __ St4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x18));
2411 __ Add(x18, x18, 4);
2412 }
2413 __ Ldr(q4, MemOperand(x17, 0 * 16));
2414 __ Ldr(q5, MemOperand(x17, 1 * 16));
2415 __ Ldr(q6, MemOperand(x17, 2 * 16));
2416 __ Ldr(q7, MemOperand(x17, 3 * 16));
2417
2418 // Test H stores with post index.
2419 __ Mov(x0, 8);
2420 for (int i = 7; i >= 0; i--) {
2421 __ St4(v0.H(), v1.H(), v2.H(), v3.H(), i, MemOperand(x18, x0, PostIndex));
2422 }
2423 __ Ldr(q16, MemOperand(x17, 4 * 16));
2424 __ Ldr(q17, MemOperand(x17, 5 * 16));
2425 __ Ldr(q18, MemOperand(x17, 6 * 16));
2426 __ Ldr(q19, MemOperand(x17, 7 * 16));
2427
2428 // Test S stores without post index.
2429 for (int i = 3; i >= 0; i--) {
2430 __ St4(v0.S(), v1.S(), v2.S(), v3.S(), i, MemOperand(x18));
2431 __ Add(x18, x18, 16);
2432 }
2433 __ Ldr(q20, MemOperand(x17, 8 * 16));
2434 __ Ldr(q21, MemOperand(x17, 9 * 16));
2435 __ Ldr(q22, MemOperand(x17, 10 * 16));
2436 __ Ldr(q23, MemOperand(x17, 11 * 16));
2437
2438 // Test D stores with post index.
2439 __ Mov(x0, 32);
2440 __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 0, MemOperand(x18, 32, PostIndex));
2441 __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 1, MemOperand(x18, x0, PostIndex));
2442
2443 __ Ldr(q24, MemOperand(x17, 12 * 16));
2444 __ Ldr(q25, MemOperand(x17, 13 * 16));
2445 __ Ldr(q26, MemOperand(x17, 14 * 16));
2446 __ Ldr(q27, MemOperand(x17, 15 * 16));
2447 END();
2448
2449 if (CAN_RUN()) {
2450 RUN();
2451
2452 ASSERT_EQUAL_128(0x2323130322221202, 0x2121110120201000, q4);
2453 ASSERT_EQUAL_128(0x2727170726261606, 0x2525150524241404, q5);
2454 ASSERT_EQUAL_128(0x2b2b1b0b2a2a1a0a, 0x2929190928281808, q6);
2455 ASSERT_EQUAL_128(0x2f2f1f0f2e2e1e0e, 0x2d2d1d0d2c2c1c0c, q7);
2456
2457 ASSERT_EQUAL_128(0x2223222312130203, 0x2021202110110001, q16);
2458 ASSERT_EQUAL_128(0x2627262716170607, 0x2425242514150405, q17);
2459 ASSERT_EQUAL_128(0x2a2b2a2b1a1b0a0b, 0x2829282918190809, q18);
2460 ASSERT_EQUAL_128(0x2e2f2e2f1e1f0e0f, 0x2c2d2c2d1c1d0c0d, q19);
2461
2462 ASSERT_EQUAL_128(0x2021222320212223, 0x1011121300010203, q20);
2463 ASSERT_EQUAL_128(0x2425262724252627, 0x1415161704050607, q21);
2464 ASSERT_EQUAL_128(0x28292a2b28292a2b, 0x18191a1b08090a0b, q22);
2465 ASSERT_EQUAL_128(0x2c2d2e2f2c2d2e2f, 0x1c1d1e1f0c0d0e0f, q23);
2466
2467 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q24);
2468 ASSERT_EQUAL_128(0x28292a2b2c2d2e2f, 0x28292a2b2c2d2e2f, q25);
2469 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q26);
2470 ASSERT_EQUAL_128(0x2021222324252627, 0x2021222324252627, q27);
2471 }
2472 }
2473
2474
TEST(neon_ld1_lane_postindex)2475 TEST(neon_ld1_lane_postindex) {
2476 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2477
2478 uint8_t src[64];
2479 for (unsigned i = 0; i < sizeof(src); i++) {
2480 src[i] = i;
2481 }
2482 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2483
2484 START();
2485 __ Mov(x17, src_base);
2486 __ Mov(x18, src_base);
2487 __ Mov(x19, src_base);
2488 __ Mov(x20, src_base);
2489 __ Mov(x21, src_base);
2490 __ Mov(x22, src_base);
2491 __ Mov(x23, src_base);
2492 __ Mov(x24, src_base);
2493
2494 // Test loading whole register by element.
2495 for (int i = 15; i >= 0; i--) {
2496 __ Ld1(v0.B(), i, MemOperand(x17, 1, PostIndex));
2497 }
2498
2499 for (int i = 7; i >= 0; i--) {
2500 __ Ld1(v1.H(), i, MemOperand(x18, 2, PostIndex));
2501 }
2502
2503 for (int i = 3; i >= 0; i--) {
2504 __ Ld1(v2.S(), i, MemOperand(x19, 4, PostIndex));
2505 }
2506
2507 for (int i = 1; i >= 0; i--) {
2508 __ Ld1(v3.D(), i, MemOperand(x20, 8, PostIndex));
2509 }
2510
2511 // Test loading a single element into an initialised register.
2512 __ Mov(x25, 1);
2513 __ Ldr(q4, MemOperand(x21));
2514 __ Ld1(v4.B(), 4, MemOperand(x21, x25, PostIndex));
2515 __ Add(x25, x25, 1);
2516
2517 __ Ldr(q5, MemOperand(x22));
2518 __ Ld1(v5.H(), 3, MemOperand(x22, x25, PostIndex));
2519 __ Add(x25, x25, 1);
2520
2521 __ Ldr(q6, MemOperand(x23));
2522 __ Ld1(v6.S(), 2, MemOperand(x23, x25, PostIndex));
2523 __ Add(x25, x25, 1);
2524
2525 __ Ldr(q7, MemOperand(x24));
2526 __ Ld1(v7.D(), 1, MemOperand(x24, x25, PostIndex));
2527
2528 END();
2529
2530 if (CAN_RUN()) {
2531 RUN();
2532
2533 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
2534 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q1);
2535 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q2);
2536 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q3);
2537 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
2538 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
2539 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
2540 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
2541 ASSERT_EQUAL_64(src_base + 16, x17);
2542 ASSERT_EQUAL_64(src_base + 16, x18);
2543 ASSERT_EQUAL_64(src_base + 16, x19);
2544 ASSERT_EQUAL_64(src_base + 16, x20);
2545 ASSERT_EQUAL_64(src_base + 1, x21);
2546 ASSERT_EQUAL_64(src_base + 2, x22);
2547 ASSERT_EQUAL_64(src_base + 3, x23);
2548 ASSERT_EQUAL_64(src_base + 4, x24);
2549 }
2550 }
2551
2552
TEST(neon_st1_lane_postindex)2553 TEST(neon_st1_lane_postindex) {
2554 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2555
2556 uint8_t src[64];
2557 for (unsigned i = 0; i < sizeof(src); i++) {
2558 src[i] = i;
2559 }
2560 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2561
2562 START();
2563 __ Mov(x17, src_base);
2564 __ Mov(x18, -16);
2565 __ Ldr(q0, MemOperand(x17));
2566
2567 for (int i = 15; i >= 0; i--) {
2568 __ St1(v0.B(), i, MemOperand(x17, 1, PostIndex));
2569 }
2570 __ Ldr(q1, MemOperand(x17, x18));
2571
2572 for (int i = 7; i >= 0; i--) {
2573 __ St1(v0.H(), i, MemOperand(x17, 2, PostIndex));
2574 }
2575 __ Ldr(q2, MemOperand(x17, x18));
2576
2577 for (int i = 3; i >= 0; i--) {
2578 __ St1(v0.S(), i, MemOperand(x17, 4, PostIndex));
2579 }
2580 __ Ldr(q3, MemOperand(x17, x18));
2581
2582 for (int i = 1; i >= 0; i--) {
2583 __ St1(v0.D(), i, MemOperand(x17, 8, PostIndex));
2584 }
2585 __ Ldr(q4, MemOperand(x17, x18));
2586
2587 END();
2588
2589 if (CAN_RUN()) {
2590 RUN();
2591
2592 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
2593 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
2594 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
2595 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
2596 }
2597 }
2598
2599
TEST(neon_ld1_alllanes)2600 TEST(neon_ld1_alllanes) {
2601 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2602
2603 uint8_t src[64];
2604 for (unsigned i = 0; i < sizeof(src); i++) {
2605 src[i] = i;
2606 }
2607 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2608
2609 START();
2610 __ Mov(x17, src_base + 1);
2611 __ Ld1r(v0.V8B(), MemOperand(x17));
2612 __ Add(x17, x17, 1);
2613 __ Ld1r(v1.V16B(), MemOperand(x17));
2614 __ Add(x17, x17, 1);
2615 __ Ld1r(v2.V4H(), MemOperand(x17));
2616 __ Add(x17, x17, 1);
2617 __ Ld1r(v3.V8H(), MemOperand(x17));
2618 __ Add(x17, x17, 1);
2619 __ Ld1r(v4.V2S(), MemOperand(x17));
2620 __ Add(x17, x17, 1);
2621 __ Ld1r(v5.V4S(), MemOperand(x17));
2622 __ Add(x17, x17, 1);
2623 __ Ld1r(v6.V1D(), MemOperand(x17));
2624 __ Add(x17, x17, 1);
2625 __ Ld1r(v7.V2D(), MemOperand(x17));
2626 END();
2627
2628 if (CAN_RUN()) {
2629 RUN();
2630
2631 ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
2632 ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
2633 ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
2634 ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
2635 ASSERT_EQUAL_128(0, 0x0807060508070605, q4);
2636 ASSERT_EQUAL_128(0x0908070609080706, 0x0908070609080706, q5);
2637 ASSERT_EQUAL_128(0, 0x0e0d0c0b0a090807, q6);
2638 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0f0e0d0c0b0a0908, q7);
2639 }
2640 }
2641
2642
TEST(neon_ld1_alllanes_postindex)2643 TEST(neon_ld1_alllanes_postindex) {
2644 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2645
2646 uint8_t src[64];
2647 for (unsigned i = 0; i < sizeof(src); i++) {
2648 src[i] = i;
2649 }
2650 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2651
2652 START();
2653 __ Mov(x17, src_base + 1);
2654 __ Mov(x18, 1);
2655 __ Ld1r(v0.V8B(), MemOperand(x17, 1, PostIndex));
2656 __ Ld1r(v1.V16B(), MemOperand(x17, x18, PostIndex));
2657 __ Ld1r(v2.V4H(), MemOperand(x17, x18, PostIndex));
2658 __ Ld1r(v3.V8H(), MemOperand(x17, 2, PostIndex));
2659 __ Ld1r(v4.V2S(), MemOperand(x17, x18, PostIndex));
2660 __ Ld1r(v5.V4S(), MemOperand(x17, 4, PostIndex));
2661 __ Ld1r(v6.V2D(), MemOperand(x17, 8, PostIndex));
2662 END();
2663
2664 if (CAN_RUN()) {
2665 RUN();
2666
2667 ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
2668 ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
2669 ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
2670 ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
2671 ASSERT_EQUAL_128(0, 0x0908070609080706, q4);
2672 ASSERT_EQUAL_128(0x0a0908070a090807, 0x0a0908070a090807, q5);
2673 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x1211100f0e0d0c0b, q6);
2674 ASSERT_EQUAL_64(src_base + 19, x17);
2675 }
2676 }
2677
2678
TEST(neon_st1_d)2679 TEST(neon_st1_d) {
2680 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2681
2682 uint8_t src[14 * kDRegSizeInBytes];
2683 for (unsigned i = 0; i < sizeof(src); i++) {
2684 src[i] = i;
2685 }
2686 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2687
2688 START();
2689 __ Mov(x17, src_base);
2690 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2691 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2692 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2693 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2694 __ Mov(x17, src_base);
2695
2696 __ St1(v0.V8B(), MemOperand(x17));
2697 __ Ldr(d16, MemOperand(x17, 8, PostIndex));
2698
2699 __ St1(v0.V8B(), v1.V8B(), MemOperand(x17));
2700 __ Ldr(q17, MemOperand(x17, 16, PostIndex));
2701
2702 __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17));
2703 __ Ldr(d18, MemOperand(x17, 8, PostIndex));
2704 __ Ldr(d19, MemOperand(x17, 8, PostIndex));
2705 __ Ldr(d20, MemOperand(x17, 8, PostIndex));
2706
2707 __ St1(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x17));
2708 __ Ldr(q21, MemOperand(x17, 16, PostIndex));
2709 __ Ldr(q22, MemOperand(x17, 16, PostIndex));
2710
2711 __ St1(v0.V1D(), v1.V1D(), v2.V1D(), v3.V1D(), MemOperand(x17));
2712 __ Ldr(q23, MemOperand(x17, 16, PostIndex));
2713 __ Ldr(q24, MemOperand(x17));
2714 END();
2715
2716 if (CAN_RUN()) {
2717 RUN();
2718
2719 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0);
2720 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q1);
2721 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q2);
2722 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q3);
2723 ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
2724 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
2725 ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
2726 ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
2727 ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
2728 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
2729 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
2730 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
2731 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
2732 }
2733 }
2734
2735
TEST(neon_st1_d_postindex)2736 TEST(neon_st1_d_postindex) {
2737 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2738
2739 uint8_t src[64 + 14 * kDRegSizeInBytes];
2740 for (unsigned i = 0; i < sizeof(src); i++) {
2741 src[i] = i;
2742 }
2743 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2744
2745 START();
2746 __ Mov(x17, src_base);
2747 __ Mov(x18, -8);
2748 __ Mov(x19, -16);
2749 __ Mov(x20, -24);
2750 __ Mov(x21, -32);
2751 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2752 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2753 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2754 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2755 __ Mov(x17, src_base);
2756
2757 __ St1(v0.V8B(), MemOperand(x17, 8, PostIndex));
2758 __ Ldr(d16, MemOperand(x17, x18));
2759
2760 __ St1(v0.V8B(), v1.V8B(), MemOperand(x17, 16, PostIndex));
2761 __ Ldr(q17, MemOperand(x17, x19));
2762
2763 __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17, 24, PostIndex));
2764 __ Ldr(d18, MemOperand(x17, x20));
2765 __ Ldr(d19, MemOperand(x17, x19));
2766 __ Ldr(d20, MemOperand(x17, x18));
2767
2768 __ St1(v0.V2S(),
2769 v1.V2S(),
2770 v2.V2S(),
2771 v3.V2S(),
2772 MemOperand(x17, 32, PostIndex));
2773 __ Ldr(q21, MemOperand(x17, x21));
2774 __ Ldr(q22, MemOperand(x17, x19));
2775
2776 __ St1(v0.V1D(),
2777 v1.V1D(),
2778 v2.V1D(),
2779 v3.V1D(),
2780 MemOperand(x17, 32, PostIndex));
2781 __ Ldr(q23, MemOperand(x17, x21));
2782 __ Ldr(q24, MemOperand(x17, x19));
2783 END();
2784
2785 if (CAN_RUN()) {
2786 RUN();
2787
2788 ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
2789 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
2790 ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
2791 ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
2792 ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
2793 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
2794 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
2795 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
2796 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
2797 }
2798 }
2799
2800
TEST(neon_st1_q)2801 TEST(neon_st1_q) {
2802 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2803
2804 uint8_t src[64 + 160];
2805 for (unsigned i = 0; i < sizeof(src); i++) {
2806 src[i] = i;
2807 }
2808 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2809
2810 START();
2811 __ Mov(x17, src_base);
2812 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2813 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2814 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2815 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2816
2817 __ St1(v0.V16B(), MemOperand(x17));
2818 __ Ldr(q16, MemOperand(x17, 16, PostIndex));
2819
2820 __ St1(v0.V8H(), v1.V8H(), MemOperand(x17));
2821 __ Ldr(q17, MemOperand(x17, 16, PostIndex));
2822 __ Ldr(q18, MemOperand(x17, 16, PostIndex));
2823
2824 __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17));
2825 __ Ldr(q19, MemOperand(x17, 16, PostIndex));
2826 __ Ldr(q20, MemOperand(x17, 16, PostIndex));
2827 __ Ldr(q21, MemOperand(x17, 16, PostIndex));
2828
2829 __ St1(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x17));
2830 __ Ldr(q22, MemOperand(x17, 16, PostIndex));
2831 __ Ldr(q23, MemOperand(x17, 16, PostIndex));
2832 __ Ldr(q24, MemOperand(x17, 16, PostIndex));
2833 __ Ldr(q25, MemOperand(x17));
2834 END();
2835
2836 if (CAN_RUN()) {
2837 RUN();
2838
2839 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
2840 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
2841 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
2842 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
2843 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
2844 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
2845 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
2846 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
2847 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
2848 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
2849 }
2850 }
2851
2852
TEST(neon_st1_q_postindex)2853 TEST(neon_st1_q_postindex) {
2854 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2855
2856 uint8_t src[64 + 160];
2857 for (unsigned i = 0; i < sizeof(src); i++) {
2858 src[i] = i;
2859 }
2860 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2861
2862 START();
2863 __ Mov(x17, src_base);
2864 __ Mov(x18, -16);
2865 __ Mov(x19, -32);
2866 __ Mov(x20, -48);
2867 __ Mov(x21, -64);
2868 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2869 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2870 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2871 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2872
2873 __ St1(v0.V16B(), MemOperand(x17, 16, PostIndex));
2874 __ Ldr(q16, MemOperand(x17, x18));
2875
2876 __ St1(v0.V8H(), v1.V8H(), MemOperand(x17, 32, PostIndex));
2877 __ Ldr(q17, MemOperand(x17, x19));
2878 __ Ldr(q18, MemOperand(x17, x18));
2879
2880 __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17, 48, PostIndex));
2881 __ Ldr(q19, MemOperand(x17, x20));
2882 __ Ldr(q20, MemOperand(x17, x19));
2883 __ Ldr(q21, MemOperand(x17, x18));
2884
2885 __ St1(v0.V2D(),
2886 v1.V2D(),
2887 v2.V2D(),
2888 v3.V2D(),
2889 MemOperand(x17, 64, PostIndex));
2890 __ Ldr(q22, MemOperand(x17, x21));
2891 __ Ldr(q23, MemOperand(x17, x20));
2892 __ Ldr(q24, MemOperand(x17, x19));
2893 __ Ldr(q25, MemOperand(x17, x18));
2894
2895 END();
2896
2897 if (CAN_RUN()) {
2898 RUN();
2899
2900 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
2901 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
2902 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
2903 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
2904 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
2905 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
2906 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
2907 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
2908 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
2909 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
2910 }
2911 }
2912
2913
TEST(neon_st2_d)2914 TEST(neon_st2_d) {
2915 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2916
2917 uint8_t src[4 * 16];
2918 for (unsigned i = 0; i < sizeof(src); i++) {
2919 src[i] = i;
2920 }
2921 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2922
2923 START();
2924 __ Mov(x17, src_base);
2925 __ Mov(x18, src_base);
2926 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2927 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2928
2929 __ St2(v0.V8B(), v1.V8B(), MemOperand(x18));
2930 __ Add(x18, x18, 22);
2931 __ St2(v0.V4H(), v1.V4H(), MemOperand(x18));
2932 __ Add(x18, x18, 11);
2933 __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
2934
2935 __ Mov(x19, src_base);
2936 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
2937 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
2938 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
2939 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
2940
2941 END();
2942
2943 if (CAN_RUN()) {
2944 RUN();
2945
2946 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q0);
2947 ASSERT_EQUAL_128(0x0504131203021110, 0x0100151413121110, q1);
2948 ASSERT_EQUAL_128(0x1615140706050413, 0x1211100302010014, q2);
2949 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323117, q3);
2950 }
2951 }
2952
2953
TEST(neon_st2_d_postindex)2954 TEST(neon_st2_d_postindex) {
2955 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2956
2957 uint8_t src[4 * 16];
2958 for (unsigned i = 0; i < sizeof(src); i++) {
2959 src[i] = i;
2960 }
2961 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2962
2963 START();
2964 __ Mov(x22, 5);
2965 __ Mov(x17, src_base);
2966 __ Mov(x18, src_base);
2967 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2968 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2969
2970 __ St2(v0.V8B(), v1.V8B(), MemOperand(x18, x22, PostIndex));
2971 __ St2(v0.V4H(), v1.V4H(), MemOperand(x18, 16, PostIndex));
2972 __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
2973
2974
2975 __ Mov(x19, src_base);
2976 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
2977 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
2978 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
2979
2980 END();
2981
2982 if (CAN_RUN()) {
2983 RUN();
2984
2985 ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
2986 ASSERT_EQUAL_128(0x0605041312111003, 0x0201001716070615, q1);
2987 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726251716151407, q2);
2988 }
2989 }
2990
2991
TEST(neon_st2_q)2992 TEST(neon_st2_q) {
2993 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2994
2995 uint8_t src[5 * 16];
2996 for (unsigned i = 0; i < sizeof(src); i++) {
2997 src[i] = i;
2998 }
2999 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3000
3001 START();
3002 __ Mov(x17, src_base);
3003 __ Mov(x18, src_base);
3004 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3005 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3006
3007 __ St2(v0.V16B(), v1.V16B(), MemOperand(x18));
3008 __ Add(x18, x18, 8);
3009 __ St2(v0.V8H(), v1.V8H(), MemOperand(x18));
3010 __ Add(x18, x18, 22);
3011 __ St2(v0.V4S(), v1.V4S(), MemOperand(x18));
3012 __ Add(x18, x18, 2);
3013 __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
3014
3015 __ Mov(x19, src_base);
3016 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3017 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3018 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3019 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3020
3021 END();
3022
3023 if (CAN_RUN()) {
3024 RUN();
3025
3026 ASSERT_EQUAL_128(0x1312030211100100, 0x1303120211011000, q0);
3027 ASSERT_EQUAL_128(0x01000b0a19180908, 0x1716070615140504, q1);
3028 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q2);
3029 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0f0e0d0c0b0a0908, q3);
3030 }
3031 }
3032
3033
TEST(neon_st2_q_postindex)3034 TEST(neon_st2_q_postindex) {
3035 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3036
3037 uint8_t src[5 * 16];
3038 for (unsigned i = 0; i < sizeof(src); i++) {
3039 src[i] = i;
3040 }
3041 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3042
3043 START();
3044 __ Mov(x22, 5);
3045 __ Mov(x17, src_base);
3046 __ Mov(x18, src_base);
3047 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3048 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3049
3050 __ St2(v0.V16B(), v1.V16B(), MemOperand(x18, x22, PostIndex));
3051 __ St2(v0.V8H(), v1.V8H(), MemOperand(x18, 32, PostIndex));
3052 __ St2(v0.V4S(), v1.V4S(), MemOperand(x18, x22, PostIndex));
3053 __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
3054
3055 __ Mov(x19, src_base);
3056 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3057 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3058 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3059 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3060 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3061
3062 END();
3063
3064 if (CAN_RUN()) {
3065 RUN();
3066
3067 ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
3068 ASSERT_EQUAL_128(0x1c0d0c1b1a0b0a19, 0x1809081716070615, q1);
3069 ASSERT_EQUAL_128(0x0504030201001003, 0x0201001f1e0f0e1d, q2);
3070 ASSERT_EQUAL_128(0x0d0c0b0a09081716, 0x1514131211100706, q3);
3071 ASSERT_EQUAL_128(0x4f4e4d4c4b4a1f1e, 0x1d1c1b1a19180f0e, q4);
3072 }
3073 }
3074
3075
TEST(neon_st3_d)3076 TEST(neon_st3_d) {
3077 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3078
3079 uint8_t src[3 * 16];
3080 for (unsigned i = 0; i < sizeof(src); i++) {
3081 src[i] = i;
3082 }
3083 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3084
3085 START();
3086 __ Mov(x17, src_base);
3087 __ Mov(x18, src_base);
3088 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3089 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3090 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3091
3092 __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18));
3093 __ Add(x18, x18, 3);
3094 __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18));
3095 __ Add(x18, x18, 2);
3096 __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
3097
3098
3099 __ Mov(x19, src_base);
3100 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3101 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3102
3103 END();
3104
3105 if (CAN_RUN()) {
3106 RUN();
3107
3108 ASSERT_EQUAL_128(0x2221201312111003, 0x0201000100201000, q0);
3109 ASSERT_EQUAL_128(0x1f1e1d2726252417, 0x1615140706050423, q1);
3110 }
3111 }
3112
3113
TEST(neon_st3_d_postindex)3114 TEST(neon_st3_d_postindex) {
3115 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3116
3117 uint8_t src[4 * 16];
3118 for (unsigned i = 0; i < sizeof(src); i++) {
3119 src[i] = i;
3120 }
3121 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3122
3123 START();
3124 __ Mov(x22, 5);
3125 __ Mov(x17, src_base);
3126 __ Mov(x18, src_base);
3127 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3128 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3129 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3130
3131 __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18, x22, PostIndex));
3132 __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18, 24, PostIndex));
3133 __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
3134
3135
3136 __ Mov(x19, src_base);
3137 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3138 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3139 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3140 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3141
3142 END();
3143
3144 if (CAN_RUN()) {
3145 RUN();
3146
3147 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3148 ASSERT_EQUAL_128(0x0201002726171607, 0x0625241514050423, q1);
3149 ASSERT_EQUAL_128(0x1615140706050423, 0x2221201312111003, q2);
3150 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736352726252417, q3);
3151 }
3152 }
3153
3154
TEST(neon_st3_q)3155 TEST(neon_st3_q) {
3156 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3157
3158 uint8_t src[6 * 16];
3159 for (unsigned i = 0; i < sizeof(src); i++) {
3160 src[i] = i;
3161 }
3162 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3163
3164 START();
3165 __ Mov(x17, src_base);
3166 __ Mov(x18, src_base);
3167 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3168 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3169 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3170
3171 __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18));
3172 __ Add(x18, x18, 5);
3173 __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18));
3174 __ Add(x18, x18, 12);
3175 __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18));
3176 __ Add(x18, x18, 22);
3177 __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
3178
3179 __ Mov(x19, src_base);
3180 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3181 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3182 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3183 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3184 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3185 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3186
3187 END();
3188
3189 if (CAN_RUN()) {
3190 RUN();
3191
3192 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3193 ASSERT_EQUAL_128(0x0605042322212013, 0x1211100302010023, q1);
3194 ASSERT_EQUAL_128(0x1007060504030201, 0x0025241716151407, q2);
3195 ASSERT_EQUAL_128(0x0827262524232221, 0x2017161514131211, q3);
3196 ASSERT_EQUAL_128(0x281f1e1d1c1b1a19, 0x180f0e0d0c0b0a09, q4);
3197 ASSERT_EQUAL_128(0x5f5e5d5c5b5a5958, 0x572f2e2d2c2b2a29, q5);
3198 }
3199 }
3200
3201
TEST(neon_st3_q_postindex)3202 TEST(neon_st3_q_postindex) {
3203 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3204
3205 uint8_t src[7 * 16];
3206 for (unsigned i = 0; i < sizeof(src); i++) {
3207 src[i] = i;
3208 }
3209 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3210
3211 START();
3212 __ Mov(x22, 5);
3213 __ Mov(x17, src_base);
3214 __ Mov(x18, src_base);
3215 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3216 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3217 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3218
3219 __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18, x22, PostIndex));
3220 __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18, 48, PostIndex));
3221 __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18, x22, PostIndex));
3222 __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
3223
3224 __ Mov(x19, src_base);
3225 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3226 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3227 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3228 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3229 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3230 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3231 __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3232
3233 END();
3234
3235 if (CAN_RUN()) {
3236 RUN();
3237
3238 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3239 ASSERT_EQUAL_128(0x1809082726171607, 0x0625241514050423, q1);
3240 ASSERT_EQUAL_128(0x0e2d2c1d1c0d0c2b, 0x2a1b1a0b0a292819, q2);
3241 ASSERT_EQUAL_128(0x0504030201001003, 0x0201002f2e1f1e0f, q3);
3242 ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q4);
3243 ASSERT_EQUAL_128(0x1d1c1b1a19180f0e, 0x0d0c0b0a09082726, q5);
3244 ASSERT_EQUAL_128(0x6f6e6d6c6b6a2f2e, 0x2d2c2b2a29281f1e, q6);
3245 }
3246 }
3247
3248
TEST(neon_st4_d)3249 TEST(neon_st4_d) {
3250 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3251
3252 uint8_t src[4 * 16];
3253 for (unsigned i = 0; i < sizeof(src); i++) {
3254 src[i] = i;
3255 }
3256 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3257
3258 START();
3259 __ Mov(x17, src_base);
3260 __ Mov(x18, src_base);
3261 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3262 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3263 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3264 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3265
3266 __ St4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x18));
3267 __ Add(x18, x18, 12);
3268 __ St4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), MemOperand(x18));
3269 __ Add(x18, x18, 15);
3270 __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
3271
3272
3273 __ Mov(x19, src_base);
3274 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3275 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3276 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3277 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3278
3279 END();
3280
3281 if (CAN_RUN()) {
3282 RUN();
3283
3284 ASSERT_EQUAL_128(0x1110010032221202, 0X3121110130201000, q0);
3285 ASSERT_EQUAL_128(0x1003020100322322, 0X1312030231302120, q1);
3286 ASSERT_EQUAL_128(0x1407060504333231, 0X3023222120131211, q2);
3287 ASSERT_EQUAL_128(0x3f3e3d3c3b373635, 0x3427262524171615, q3);
3288 }
3289 }
3290
3291
TEST(neon_st4_d_postindex)3292 TEST(neon_st4_d_postindex) {
3293 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3294
3295 uint8_t src[5 * 16];
3296 for (unsigned i = 0; i < sizeof(src); i++) {
3297 src[i] = i;
3298 }
3299 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3300
3301 START();
3302 __ Mov(x22, 5);
3303 __ Mov(x17, src_base);
3304 __ Mov(x18, src_base);
3305 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3306 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3307 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3308 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3309
3310 __ St4(v0.V8B(),
3311 v1.V8B(),
3312 v2.V8B(),
3313 v3.V8B(),
3314 MemOperand(x18, x22, PostIndex));
3315 __ St4(v0.V4H(),
3316 v1.V4H(),
3317 v2.V4H(),
3318 v3.V4H(),
3319 MemOperand(x18, 32, PostIndex));
3320 __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
3321
3322
3323 __ Mov(x19, src_base);
3324 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3325 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3326 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3327 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3328 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3329
3330 END();
3331
3332 if (CAN_RUN()) {
3333 RUN();
3334
3335 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3336 ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
3337 ASSERT_EQUAL_128(0x2221201312111003, 0x0201003736272617, q2);
3338 ASSERT_EQUAL_128(0x2625241716151407, 0x0605043332313023, q3);
3339 ASSERT_EQUAL_128(0x4f4e4d4c4b4a4948, 0x4746453736353427, q4);
3340 }
3341 }
3342
3343
TEST(neon_st4_q)3344 TEST(neon_st4_q) {
3345 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3346
3347 uint8_t src[7 * 16];
3348 for (unsigned i = 0; i < sizeof(src); i++) {
3349 src[i] = i;
3350 }
3351 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3352
3353 START();
3354 __ Mov(x17, src_base);
3355 __ Mov(x18, src_base);
3356 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3357 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3358 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3359 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3360
3361 __ St4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), MemOperand(x18));
3362 __ Add(x18, x18, 5);
3363 __ St4(v0.V8H(), v1.V8H(), v2.V8H(), v3.V8H(), MemOperand(x18));
3364 __ Add(x18, x18, 12);
3365 __ St4(v0.V4S(), v1.V4S(), v2.V4S(), v3.V4S(), MemOperand(x18));
3366 __ Add(x18, x18, 22);
3367 __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
3368 __ Add(x18, x18, 10);
3369
3370 __ Mov(x19, src_base);
3371 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3372 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3373 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3374 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3375 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3376 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3377 __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3378
3379 END();
3380
3381 if (CAN_RUN()) {
3382 RUN();
3383
3384 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3385 ASSERT_EQUAL_128(0x3231302322212013, 0x1211100302010013, q1);
3386 ASSERT_EQUAL_128(0x1007060504030201, 0x0015140706050433, q2);
3387 ASSERT_EQUAL_128(0x3027262524232221, 0x2017161514131211, q3);
3388 ASSERT_EQUAL_128(0x180f0e0d0c0b0a09, 0x0837363534333231, q4);
3389 ASSERT_EQUAL_128(0x382f2e2d2c2b2a29, 0x281f1e1d1c1b1a19, q5);
3390 ASSERT_EQUAL_128(0x6f6e6d6c6b6a6968, 0x673f3e3d3c3b3a39, q6);
3391 }
3392 }
3393
3394
TEST(neon_st4_q_postindex)3395 TEST(neon_st4_q_postindex) {
3396 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3397
3398 uint8_t src[9 * 16];
3399 for (unsigned i = 0; i < sizeof(src); i++) {
3400 src[i] = i;
3401 }
3402 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3403
3404 START();
3405 __ Mov(x22, 5);
3406 __ Mov(x17, src_base);
3407 __ Mov(x18, src_base);
3408 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3409 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3410 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3411 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3412
3413 __ St4(v0.V16B(),
3414 v1.V16B(),
3415 v2.V16B(),
3416 v3.V16B(),
3417 MemOperand(x18, x22, PostIndex));
3418 __ St4(v0.V8H(),
3419 v1.V8H(),
3420 v2.V8H(),
3421 v3.V8H(),
3422 MemOperand(x18, 64, PostIndex));
3423 __ St4(v0.V4S(),
3424 v1.V4S(),
3425 v2.V4S(),
3426 v3.V4S(),
3427 MemOperand(x18, x22, PostIndex));
3428 __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
3429
3430 __ Mov(x19, src_base);
3431 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3432 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3433 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3434 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3435 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3436 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3437 __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3438 __ Ldr(q7, MemOperand(x19, 16, PostIndex));
3439 __ Ldr(q8, MemOperand(x19, 16, PostIndex));
3440
3441 END();
3442
3443 if (CAN_RUN()) {
3444 RUN();
3445
3446 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3447 ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
3448 ASSERT_EQUAL_128(0x1a0b0a3938292819, 0x1809083736272617, q2);
3449 ASSERT_EQUAL_128(0x1e0f0e3d3c2d2c1d, 0x1c0d0c3b3a2b2a1b, q3);
3450 ASSERT_EQUAL_128(0x0504030201001003, 0x0201003f3e2f2e1f, q4);
3451 ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q5);
3452 ASSERT_EQUAL_128(0x0d0c0b0a09083736, 0x3534333231302726, q6);
3453 ASSERT_EQUAL_128(0x2d2c2b2a29281f1e, 0x1d1c1b1a19180f0e, q7);
3454 ASSERT_EQUAL_128(0x8f8e8d8c8b8a3f3e, 0x3d3c3b3a39382f2e, q8);
3455 }
3456 }
3457
3458
TEST(neon_destructive_minmaxp)3459 TEST(neon_destructive_minmaxp) {
3460 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3461
3462 START();
3463 __ Movi(v0.V2D(), 0, 0x2222222233333333);
3464 __ Movi(v1.V2D(), 0, 0x0000000011111111);
3465
3466 __ Sminp(v16.V2S(), v0.V2S(), v1.V2S());
3467 __ Mov(v17, v0);
3468 __ Sminp(v17.V2S(), v17.V2S(), v1.V2S());
3469 __ Mov(v18, v1);
3470 __ Sminp(v18.V2S(), v0.V2S(), v18.V2S());
3471 __ Mov(v19, v0);
3472 __ Sminp(v19.V2S(), v19.V2S(), v19.V2S());
3473
3474 __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
3475 __ Mov(v21, v0);
3476 __ Smaxp(v21.V2S(), v21.V2S(), v1.V2S());
3477 __ Mov(v22, v1);
3478 __ Smaxp(v22.V2S(), v0.V2S(), v22.V2S());
3479 __ Mov(v23, v0);
3480 __ Smaxp(v23.V2S(), v23.V2S(), v23.V2S());
3481
3482 __ Uminp(v24.V2S(), v0.V2S(), v1.V2S());
3483 __ Mov(v25, v0);
3484 __ Uminp(v25.V2S(), v25.V2S(), v1.V2S());
3485 __ Mov(v26, v1);
3486 __ Uminp(v26.V2S(), v0.V2S(), v26.V2S());
3487 __ Mov(v27, v0);
3488 __ Uminp(v27.V2S(), v27.V2S(), v27.V2S());
3489
3490 __ Umaxp(v28.V2S(), v0.V2S(), v1.V2S());
3491 __ Mov(v29, v0);
3492 __ Umaxp(v29.V2S(), v29.V2S(), v1.V2S());
3493 __ Mov(v30, v1);
3494 __ Umaxp(v30.V2S(), v0.V2S(), v30.V2S());
3495 __ Mov(v31, v0);
3496 __ Umaxp(v31.V2S(), v31.V2S(), v31.V2S());
3497 END();
3498
3499 if (CAN_RUN()) {
3500 RUN();
3501
3502 ASSERT_EQUAL_128(0, 0x0000000022222222, q16);
3503 ASSERT_EQUAL_128(0, 0x0000000022222222, q17);
3504 ASSERT_EQUAL_128(0, 0x0000000022222222, q18);
3505 ASSERT_EQUAL_128(0, 0x2222222222222222, q19);
3506
3507 ASSERT_EQUAL_128(0, 0x1111111133333333, q20);
3508 ASSERT_EQUAL_128(0, 0x1111111133333333, q21);
3509 ASSERT_EQUAL_128(0, 0x1111111133333333, q22);
3510 ASSERT_EQUAL_128(0, 0x3333333333333333, q23);
3511
3512 ASSERT_EQUAL_128(0, 0x0000000022222222, q24);
3513 ASSERT_EQUAL_128(0, 0x0000000022222222, q25);
3514 ASSERT_EQUAL_128(0, 0x0000000022222222, q26);
3515 ASSERT_EQUAL_128(0, 0x2222222222222222, q27);
3516
3517 ASSERT_EQUAL_128(0, 0x1111111133333333, q28);
3518 ASSERT_EQUAL_128(0, 0x1111111133333333, q29);
3519 ASSERT_EQUAL_128(0, 0x1111111133333333, q30);
3520 ASSERT_EQUAL_128(0, 0x3333333333333333, q31);
3521 }
3522 }
3523
3524
TEST(neon_destructive_tbl)3525 TEST(neon_destructive_tbl) {
3526 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3527
3528 START();
3529 __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
3530 __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
3531 __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
3532 __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
3533 __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
3534
3535 __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
3536 __ Tbl(v16.V16B(), v1.V16B(), v0.V16B());
3537 __ Mov(v17, v0);
3538 __ Tbl(v17.V16B(), v1.V16B(), v17.V16B());
3539 __ Mov(v18, v1);
3540 __ Tbl(v18.V16B(), v18.V16B(), v0.V16B());
3541 __ Mov(v19, v0);
3542 __ Tbl(v19.V16B(), v19.V16B(), v19.V16B());
3543
3544 __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
3545 __ Tbl(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
3546 __ Mov(v21, v0);
3547 __ Tbl(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
3548 __ Mov(v22, v1);
3549 __ Mov(v23, v2);
3550 __ Mov(v24, v3);
3551 __ Mov(v25, v4);
3552 __ Tbl(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
3553 __ Mov(v26, v0);
3554 __ Mov(v27, v1);
3555 __ Mov(v28, v2);
3556 __ Mov(v29, v3);
3557 __ Tbl(v26.V16B(),
3558 v26.V16B(),
3559 v27.V16B(),
3560 v28.V16B(),
3561 v29.V16B(),
3562 v26.V16B());
3563 END();
3564
3565 if (CAN_RUN()) {
3566 RUN();
3567
3568 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q16);
3569 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q17);
3570 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q18);
3571 ASSERT_EQUAL_128(0x0f00000000000000, 0x0000000000424100, q19);
3572
3573 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q20);
3574 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q21);
3575 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q22);
3576 ASSERT_EQUAL_128(0x0f000000c4c5c6b7, 0xb8b9aaabac424100, q26);
3577 }
3578 }
3579
3580
TEST(neon_destructive_tbx)3581 TEST(neon_destructive_tbx) {
3582 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3583
3584 START();
3585 __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
3586 __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
3587 __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
3588 __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
3589 __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
3590
3591 __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
3592 __ Tbx(v16.V16B(), v1.V16B(), v0.V16B());
3593 __ Mov(v17, v0);
3594 __ Tbx(v17.V16B(), v1.V16B(), v17.V16B());
3595 __ Mov(v18, v1);
3596 __ Tbx(v18.V16B(), v18.V16B(), v0.V16B());
3597 __ Mov(v19, v0);
3598 __ Tbx(v19.V16B(), v19.V16B(), v19.V16B());
3599
3600 __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
3601 __ Tbx(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
3602 __ Mov(v21, v0);
3603 __ Tbx(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
3604 __ Mov(v22, v1);
3605 __ Mov(v23, v2);
3606 __ Mov(v24, v3);
3607 __ Mov(v25, v4);
3608 __ Tbx(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
3609 __ Mov(v26, v0);
3610 __ Mov(v27, v1);
3611 __ Mov(v28, v2);
3612 __ Mov(v29, v3);
3613 __ Tbx(v26.V16B(),
3614 v26.V16B(),
3615 v27.V16B(),
3616 v28.V16B(),
3617 v29.V16B(),
3618 v26.V16B());
3619 END();
3620
3621 if (CAN_RUN()) {
3622 RUN();
3623
3624 ASSERT_EQUAL_128(0xa055555555555555, 0x5555555555adaeaf, q16);
3625 ASSERT_EQUAL_128(0xa041424334353627, 0x28291a1b1cadaeaf, q17);
3626 ASSERT_EQUAL_128(0xa0aeadacabaaa9a8, 0xa7a6a5a4a3adaeaf, q18);
3627 ASSERT_EQUAL_128(0x0f41424334353627, 0x28291a1b1c424100, q19);
3628
3629 ASSERT_EQUAL_128(0xa0555555d4d5d6c7, 0xc8c9babbbcadaeaf, q20);
3630 ASSERT_EQUAL_128(0xa0414243d4d5d6c7, 0xc8c9babbbcadaeaf, q21);
3631 ASSERT_EQUAL_128(0xa0aeadacd4d5d6c7, 0xc8c9babbbcadaeaf, q22);
3632 ASSERT_EQUAL_128(0x0f414243c4c5c6b7, 0xb8b9aaabac424100, q26);
3633 }
3634 }
3635
3636
TEST(neon_destructive_fcvtl)3637 TEST(neon_destructive_fcvtl) {
3638 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3639
3640 START();
3641 __ Movi(v0.V2D(), 0x400000003f800000, 0xbf800000c0000000);
3642 __ Fcvtl(v16.V2D(), v0.V2S());
3643 __ Fcvtl2(v17.V2D(), v0.V4S());
3644 __ Mov(v18, v0);
3645 __ Mov(v19, v0);
3646 __ Fcvtl(v18.V2D(), v18.V2S());
3647 __ Fcvtl2(v19.V2D(), v19.V4S());
3648
3649 __ Movi(v1.V2D(), 0x40003c003c004000, 0xc000bc00bc00c000);
3650 __ Fcvtl(v20.V4S(), v1.V4H());
3651 __ Fcvtl2(v21.V4S(), v1.V8H());
3652 __ Mov(v22, v1);
3653 __ Mov(v23, v1);
3654 __ Fcvtl(v22.V4S(), v22.V4H());
3655 __ Fcvtl2(v23.V4S(), v23.V8H());
3656
3657 END();
3658
3659 if (CAN_RUN()) {
3660 RUN();
3661
3662 ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q16);
3663 ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q17);
3664 ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q18);
3665 ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q19);
3666
3667 ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q20);
3668 ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q21);
3669 ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q22);
3670 ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q23);
3671 }
3672 }
3673
TEST(fadd_h_neon)3674 TEST(fadd_h_neon) {
3675 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3676 CPUFeatures::kFP,
3677 CPUFeatures::kNEONHalf);
3678
3679 START();
3680 __ Fmov(v0.V4H(), 24.0);
3681 __ Fmov(v1.V4H(), 1024.0);
3682 __ Fmov(v2.V8H(), 5.5);
3683 __ Fmov(v3.V8H(), 2048.0);
3684 __ Fmov(v4.V8H(), kFP16PositiveInfinity);
3685 __ Fmov(v5.V8H(), kFP16NegativeInfinity);
3686 __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c2f));
3687 __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe0f));
3688
3689 __ Fadd(v8.V4H(), v1.V4H(), v0.V4H());
3690 __ Fadd(v9.V8H(), v3.V8H(), v2.V8H());
3691 __ Fadd(v10.V4H(), v4.V4H(), v3.V4H());
3692
3693 __ Fadd(v11.V4H(), v6.V4H(), v1.V4H());
3694 __ Fadd(v12.V4H(), v7.V4H(), v7.V4H());
3695
3696 END();
3697
3698 if (CAN_RUN()) {
3699 RUN();
3700
3701 ASSERT_EQUAL_128(0x0000000000000000, 0x6418641864186418, q8);
3702 // 2053.5 is unrepresentable in FP16.
3703 ASSERT_EQUAL_128(0x6803680368036803, 0x6803680368036803, q9);
3704
3705 // Note: we test NaNs here as vectors aren't covered by process_nans_half
3706 // and we don't have traces for half-precision enabled hardware.
3707 // Default (Signalling NaN)
3708 ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q10);
3709 // Quiet NaN from Signalling.
3710 ASSERT_EQUAL_128(0x0000000000000000, 0x7e2f7e2f7e2f7e2f, q11);
3711 // Quiet NaN.
3712 ASSERT_EQUAL_128(0x0000000000000000, 0xfe0ffe0ffe0ffe0f, q12);
3713 }
3714 }
3715
TEST(fsub_h_neon)3716 TEST(fsub_h_neon) {
3717 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3718 CPUFeatures::kFP,
3719 CPUFeatures::kNEONHalf);
3720
3721 START();
3722 __ Fmov(v0.V4H(), 24.0);
3723 __ Fmov(v1.V4H(), 1024.0);
3724 __ Fmov(v2.V8H(), 5.5);
3725 __ Fmov(v3.V8H(), 2048.0);
3726 __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3727 __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3728 __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c22));
3729 __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe02));
3730
3731 __ Fsub(v0.V4H(), v1.V4H(), v0.V4H());
3732 __ Fsub(v8.V8H(), v3.V8H(), v2.V8H());
3733 __ Fsub(v9.V4H(), v4.V4H(), v3.V4H());
3734 __ Fsub(v10.V4H(), v0.V4H(), v1.V4H());
3735
3736 __ Fsub(v11.V4H(), v6.V4H(), v2.V4H());
3737 __ Fsub(v12.V4H(), v7.V4H(), v7.V4H());
3738 END();
3739
3740 if (CAN_RUN()) {
3741 RUN();
3742
3743 ASSERT_EQUAL_128(0x0000000000000000, 0x63d063d063d063d0, q0);
3744 // 2042.5 is unpresentable in FP16:
3745 ASSERT_EQUAL_128(0x67fa67fa67fa67fa, 0x67fa67fa67fa67fa, q8);
3746
3747 // Note: we test NaNs here as vectors aren't covered by process_nans_half
3748 // and we don't have traces for half-precision enabled hardware.
3749 // Signalling (Default) NaN.
3750 ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q9);
3751 ASSERT_EQUAL_128(0x0000000000000000, 0xce00ce00ce00ce00, q10);
3752 // Quiet NaN from Signalling.
3753 ASSERT_EQUAL_128(0x0000000000000000, 0x7e227e227e227e22, q11);
3754 // Quiet NaN.
3755 ASSERT_EQUAL_128(0x0000000000000000, 0xfe02fe02fe02fe02, q12);
3756 }
3757 }
3758
TEST(fmul_h_neon)3759 TEST(fmul_h_neon) {
3760 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3761 CPUFeatures::kFP,
3762 CPUFeatures::kNEONHalf);
3763
3764 START();
3765 __ Fmov(v0.V4H(), 24.0);
3766 __ Fmov(v1.V4H(), -2.0);
3767 __ Fmov(v2.V8H(), 5.5);
3768 __ Fmov(v3.V8H(), 0.5);
3769 __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3770 __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3771
3772 __ Fmul(v6.V4H(), v1.V4H(), v0.V4H());
3773 __ Fmul(v7.V8H(), v3.V8H(), v2.V8H());
3774 __ Fmul(v8.V4H(), v4.V4H(), v3.V4H());
3775 __ Fmul(v9.V4H(), v0.V4H(), v1.V4H());
3776 __ Fmul(v10.V4H(), v5.V4H(), v0.V4H());
3777 END();
3778
3779 if (CAN_RUN()) {
3780 RUN();
3781
3782 ASSERT_EQUAL_128(0x0000000000000000, 0xd200d200d200d200, q6);
3783 ASSERT_EQUAL_128(0x4180418041804180, 0x4180418041804180, q7);
3784 ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q8);
3785 ASSERT_EQUAL_128(0x0000000000000000, 0xd200d200d200d200, q9);
3786 ASSERT_EQUAL_128(0x0000000000000000, 0xfc00fc00fc00fc00, q10);
3787 }
3788 }
3789
TEST(fdiv_h_neon)3790 TEST(fdiv_h_neon) {
3791 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3792 CPUFeatures::kFP,
3793 CPUFeatures::kNEONHalf);
3794
3795 START();
3796 __ Fmov(v0.V4H(), 24.0);
3797 __ Fmov(v1.V4H(), -2.0);
3798 __ Fmov(v2.V8H(), 5.5);
3799 __ Fmov(v3.V8H(), 0.5);
3800 __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3801 __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3802
3803 __ Fdiv(v6.V4H(), v0.V4H(), v1.V4H());
3804 __ Fdiv(v7.V8H(), v2.V8H(), v3.V8H());
3805 __ Fdiv(v8.V4H(), v4.V4H(), v3.V4H());
3806 __ Fdiv(v9.V4H(), v1.V4H(), v0.V4H());
3807 __ Fdiv(v10.V4H(), v5.V4H(), v0.V4H());
3808 END();
3809
3810 if (CAN_RUN()) {
3811 RUN();
3812
3813 ASSERT_EQUAL_128(0x0000000000000000, 0xca00ca00ca00ca00, q6);
3814 ASSERT_EQUAL_128(0x4980498049804980, 0x4980498049804980, q7);
3815 ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q8);
3816 // -0.083333... is unrepresentable in FP16:
3817 ASSERT_EQUAL_128(0x0000000000000000, 0xad55ad55ad55ad55, q9);
3818 ASSERT_EQUAL_128(0x0000000000000000, 0xfc00fc00fc00fc00, q10);
3819 }
3820 }
3821
TEST(neon_fcvtl)3822 TEST(neon_fcvtl) {
3823 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3824
3825 START();
3826
3827 __ Movi(v0.V2D(), 0x000080007efffeff, 0x3100b1007c00fc00);
3828 __ Movi(v1.V2D(), 0x03ff83ff00038003, 0x000180017c01fc01);
3829 __ Movi(v2.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3830 __ Movi(v3.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3831 __ Movi(v4.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3832 __ Fcvtl(v16.V4S(), v0.V4H());
3833 __ Fcvtl2(v17.V4S(), v0.V8H());
3834 __ Fcvtl(v18.V4S(), v1.V4H());
3835 __ Fcvtl2(v19.V4S(), v1.V8H());
3836
3837 __ Fcvtl(v20.V2D(), v2.V2S());
3838 __ Fcvtl2(v21.V2D(), v2.V4S());
3839 __ Fcvtl(v22.V2D(), v3.V2S());
3840 __ Fcvtl2(v23.V2D(), v3.V4S());
3841 __ Fcvtl(v24.V2D(), v4.V2S());
3842 __ Fcvtl2(v25.V2D(), v4.V4S());
3843
3844 END();
3845
3846 if (CAN_RUN()) {
3847 RUN();
3848 ASSERT_EQUAL_128(0x3e200000be200000, 0x7f800000ff800000, q16);
3849 ASSERT_EQUAL_128(0x0000000080000000, 0x7fdfe000ffdfe000, q17);
3850 ASSERT_EQUAL_128(0x33800000b3800000, 0x7fc02000ffc02000, q18);
3851 ASSERT_EQUAL_128(0x387fc000b87fc000, 0x34400000b4400000, q19);
3852 ASSERT_EQUAL_128(0x7ff0000000000000, 0xfff0000000000000, q20);
3853 ASSERT_EQUAL_128(0x3fc4000000000000, 0xbfc4000000000000, q21);
3854 ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q22);
3855 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000000, q23);
3856 ASSERT_EQUAL_128(0x36a0000000000000, 0xb6a0000000000000, q24);
3857 ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q25);
3858 }
3859 }
3860
3861
TEST(neon_fcvtn)3862 TEST(neon_fcvtn) {
3863 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3864
3865 START();
3866
3867 __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3868 __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3869 __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3870 __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
3871 __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
3872 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
3873 __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
3874 __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
3875 __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
3876
3877 __ Fcvtn(v16.V4H(), v0.V4S());
3878 __ Fcvtn2(v16.V8H(), v1.V4S());
3879 __ Fcvtn(v17.V4H(), v2.V4S());
3880 __ Fcvtn(v18.V2S(), v3.V2D());
3881 __ Fcvtn2(v18.V4S(), v4.V2D());
3882 __ Fcvtn(v19.V2S(), v5.V2D());
3883 __ Fcvtn2(v19.V4S(), v6.V2D());
3884 __ Fcvtn(v20.V2S(), v7.V2D());
3885 __ Fcvtn2(v20.V4S(), v8.V2D());
3886 END();
3887
3888 if (CAN_RUN()) {
3889 RUN();
3890 ASSERT_EQUAL_128(0x000080007e7ffe7f, 0x3100b1007c00fc00, q16);
3891 ASSERT_EQUAL_64(0x7e7ffe7f00008000, d17);
3892 ASSERT_EQUAL_128(0x7f800000ff800000, 0x3e200000be200000, q18);
3893 ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x0000000080000000, q19);
3894 ASSERT_EQUAL_128(0x0000000080000000, 0x7fc7ffffffc7ffff, q20);
3895 }
3896 }
3897
TEST(neon_fcvtn_fcvtxn_regression_test)3898 TEST(neon_fcvtn_fcvtxn_regression_test) {
3899 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3900
3901 START();
3902 __ Movi(v0.V2D(), 0x3ff0000000000000, 0xbff0000000000000);
3903 __ Movi(v1.V2D(), 0x3f800000bf800000, 0x40000000c0000000);
3904 __ Movi(v2.V2D(), 0x3ff0000000000000, 0xbff0000000000000);
3905
3906 __ Fcvtn(v16.V2S(), v0.V2D());
3907 __ Fcvtn(v17.V4H(), v1.V4S());
3908 __ Fcvtn(v0.V2S(), v0.V2D());
3909 __ Fcvtn(v1.V4H(), v1.V4S());
3910 __ Fcvtxn(v2.V2S(), v2.V2D());
3911 END();
3912
3913 if (CAN_RUN()) {
3914 RUN();
3915 ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q16);
3916 ASSERT_EQUAL_128(0x0000000000000000, 0x3c00bc004000c000, q17);
3917 ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q0);
3918 ASSERT_EQUAL_128(0x0000000000000000, 0x3c00bc004000c000, q1);
3919 ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q2);
3920 }
3921 }
3922
TEST(neon_fcvtxn)3923 TEST(neon_fcvtxn) {
3924 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3925
3926 START();
3927 __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3928 __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3929 __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3930 __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
3931 __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
3932 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
3933 __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
3934 __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
3935 __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
3936 __ Movi(v9.V2D(), 0x41ed000000000000, 0x41efffffffefffff);
3937 __ Fcvtxn(v16.V2S(), v0.V2D());
3938 __ Fcvtxn2(v16.V4S(), v1.V2D());
3939 __ Fcvtxn(v17.V2S(), v2.V2D());
3940 __ Fcvtxn2(v17.V4S(), v3.V2D());
3941 __ Fcvtxn(v18.V2S(), v4.V2D());
3942 __ Fcvtxn2(v18.V4S(), v5.V2D());
3943 __ Fcvtxn(v19.V2S(), v6.V2D());
3944 __ Fcvtxn2(v19.V4S(), v7.V2D());
3945 __ Fcvtxn(v20.V2S(), v8.V2D());
3946 __ Fcvtxn2(v20.V4S(), v9.V2D());
3947 __ Fcvtxn(s21, d0);
3948 END();
3949
3950 if (CAN_RUN()) {
3951 RUN();
3952 ASSERT_EQUAL_128(0x000000017f7fffff, 0x310000057f7fffff, q16);
3953 ASSERT_EQUAL_128(0x3e200000be200000, 0x7f7fffff00000001, q17);
3954 ASSERT_EQUAL_128(0x0000000080000000, 0x7f800000ff800000, q18);
3955 ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x7fc7ffffffc7ffff, q19);
3956 ASSERT_EQUAL_128(0x4f6800004f7fffff, 0x0000000180000001, q20);
3957 ASSERT_EQUAL_128(0, 0x7f7fffff, q21);
3958 }
3959 }
3960
TEST(neon_3same_addp)3961 TEST(neon_3same_addp) {
3962 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3963
3964 START();
3965
3966 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
3967 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
3968 __ Addp(v16.V16B(), v0.V16B(), v1.V16B());
3969
3970 END();
3971
3972 if (CAN_RUN()) {
3973 RUN();
3974 ASSERT_EQUAL_128(0x00ff54ffff54aaff, 0xffffffffffffffff, q16);
3975 }
3976 }
3977
TEST(neon_3same_sqdmulh_sqrdmulh)3978 TEST(neon_3same_sqdmulh_sqrdmulh) {
3979 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3980
3981 START();
3982
3983 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
3984 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
3985 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
3986 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
3987
3988 __ Sqdmulh(v16.V4H(), v0.V4H(), v1.V4H());
3989 __ Sqdmulh(v17.V4S(), v2.V4S(), v3.V4S());
3990 __ Sqdmulh(h18, h0, h1);
3991 __ Sqdmulh(s19, s2, s3);
3992
3993 __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.V4H());
3994 __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.V4S());
3995 __ Sqrdmulh(h22, h0, h1);
3996 __ Sqrdmulh(s23, s2, s3);
3997
3998 END();
3999
4000 if (CAN_RUN()) {
4001 RUN();
4002 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100007fff, q16);
4003 ASSERT_EQUAL_128(0x000000017fffffff, 0x000000007fffffff, q17);
4004 ASSERT_EQUAL_128(0, 0x7fff, q18);
4005 ASSERT_EQUAL_128(0, 0x7fffffff, q19);
4006 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100017fff, q20);
4007 ASSERT_EQUAL_128(0x000000017fffffff, 0x000000017fffffff, q21);
4008 ASSERT_EQUAL_128(0, 0x7fff, q22);
4009 ASSERT_EQUAL_128(0, 0x7fffffff, q23);
4010 }
4011 }
4012
TEST(neon_byelement_sqdmulh_sqrdmulh)4013 TEST(neon_byelement_sqdmulh_sqrdmulh) {
4014 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4015
4016 START();
4017
4018 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4019 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4020 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4021 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4022
4023 __ Sqdmulh(v16.V4H(), v0.V4H(), v1.H(), 1);
4024 __ Sqdmulh(v17.V4S(), v2.V4S(), v3.S(), 1);
4025 __ Sqdmulh(h18, h0, v1.H(), 0);
4026 __ Sqdmulh(s19, s2, v3.S(), 0);
4027
4028 __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.H(), 1);
4029 __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.S(), 1);
4030 __ Sqrdmulh(h22, h0, v1.H(), 0);
4031 __ Sqrdmulh(s23, s2, v3.S(), 0);
4032
4033 END();
4034
4035 if (CAN_RUN()) {
4036 RUN();
4037 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000fff0, q16);
4038 ASSERT_EQUAL_128(0x00000000fffffff0, 0x00000000fffffff0, q17);
4039 ASSERT_EQUAL_128(0, 0x7fff, q18);
4040 ASSERT_EQUAL_128(0, 0x7fffffff, q19);
4041 ASSERT_EQUAL_128(0x0000000000000000, 0x000000010001fff0, q20);
4042 ASSERT_EQUAL_128(0x00000001fffffff0, 0x00000001fffffff0, q21);
4043 ASSERT_EQUAL_128(0, 0x7fff, q22);
4044 ASSERT_EQUAL_128(0, 0x7fffffff, q23);
4045 }
4046 }
4047
TEST(neon_3same_sqrdmlah)4048 TEST(neon_3same_sqrdmlah) {
4049 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4050
4051 START();
4052
4053 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4054 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4055 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4056 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4057
4058 __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
4059 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
4060 __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
4061 __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
4062
4063 __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.V4H());
4064 __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.V4S());
4065 __ Sqrdmlah(h18, h0, h1);
4066 __ Sqrdmlah(s19, s2, s3);
4067
4068 END();
4069
4070 if (CAN_RUN()) {
4071 RUN();
4072 ASSERT_EQUAL_128(0, 0x0000040104010000, q16);
4073 ASSERT_EQUAL_128(0x000000017fffffff, 0x000000217fffffff, q17);
4074 ASSERT_EQUAL_128(0, 0x7fff, q18);
4075 ASSERT_EQUAL_128(0, 0, q19);
4076 }
4077 }
4078
TEST(neon_byelement_sqrdmlah)4079 TEST(neon_byelement_sqrdmlah) {
4080 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4081
4082 START();
4083
4084 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4085 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4086 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4087 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4088
4089 __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
4090 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
4091 __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
4092 __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
4093
4094 __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.H(), 1);
4095 __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.S(), 1);
4096 __ Sqrdmlah(h18, h0, v1.H(), 0);
4097 __ Sqrdmlah(s19, s2, v3.S(), 0);
4098
4099 END();
4100
4101 if (CAN_RUN()) {
4102 RUN();
4103 ASSERT_EQUAL_128(0, 0x0000040104018000, q16);
4104 ASSERT_EQUAL_128(0x00000001fffffff0, 0x0000002100107ff0, q17);
4105 ASSERT_EQUAL_128(0, 0x7fff, q18);
4106 ASSERT_EQUAL_128(0, 0, q19);
4107 }
4108 }
4109
TEST(neon_3same_sqrdmlsh)4110 TEST(neon_3same_sqrdmlsh) {
4111 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4112
4113 START();
4114
4115 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004000500);
4116 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000100080);
4117 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4118 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4119
4120 __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
4121 __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
4122 __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
4123 __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
4124
4125 __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.V4H());
4126 __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.V4S());
4127 __ Sqrdmlsh(h18, h0, h1);
4128 __ Sqrdmlsh(s19, s2, s3);
4129
4130 END();
4131
4132 if (CAN_RUN()) {
4133 RUN();
4134 ASSERT_EQUAL_128(0, 0x40003fff40003ffb, q16);
4135 ASSERT_EQUAL_128(0x40003fffc0004000, 0x40004000c0004000, q17);
4136 ASSERT_EQUAL_128(0, 0x3ffb, q18);
4137 ASSERT_EQUAL_128(0, 0xc0004000, q19);
4138 }
4139 }
4140
TEST(neon_byelement_sqrdmlsh)4141 TEST(neon_byelement_sqrdmlsh) {
4142 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4143
4144 START();
4145
4146 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4147 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4148 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4149 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4150
4151 __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
4152 __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
4153 __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
4154 __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
4155
4156 __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.H(), 1);
4157 __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.S(), 1);
4158 __ Sqrdmlsh(h18, h0, v1.H(), 0);
4159 __ Sqrdmlsh(s19, s2, v3.S(), 0);
4160
4161 END();
4162
4163 if (CAN_RUN()) {
4164 RUN();
4165 ASSERT_EQUAL_128(0, 0x4000400040004010, q16);
4166 ASSERT_EQUAL_128(0x4000400040004010, 0x4000400040004010, q17);
4167 ASSERT_EQUAL_128(0, 0xc000, q18);
4168 ASSERT_EQUAL_128(0, 0xc0004000, q19);
4169 }
4170 }
4171
TEST(neon_3same_sdot_udot)4172 TEST(neon_3same_sdot_udot) {
4173 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kDotProduct);
4174
4175 START();
4176
4177 __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
4178 __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
4179 __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
4180
4181 __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
4182 __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
4183 __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
4184 __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
4185
4186 __ Sdot(v16.V4S(), v0.V16B(), v1.V16B());
4187 __ Sdot(v17.V2S(), v1.V8B(), v2.V8B());
4188
4189 __ Udot(v18.V4S(), v0.V16B(), v1.V16B());
4190 __ Udot(v19.V2S(), v1.V8B(), v2.V8B());
4191
4192 END();
4193
4194 if (CAN_RUN()) {
4195 RUN();
4196 ASSERT_EQUAL_128(0x000037d8000045f8, 0x000037d8000045f8, q16);
4197 ASSERT_EQUAL_128(0, 0x0000515e00004000, q17);
4198 ASSERT_EQUAL_128(0x000119d8000127f8, 0x000119d8000127f8, q18);
4199 ASSERT_EQUAL_128(0, 0x0000c35e00004000, q19);
4200 }
4201 }
4202
TEST(neon_byelement_sdot_udot)4203 TEST(neon_byelement_sdot_udot) {
4204 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kDotProduct);
4205
4206 START();
4207
4208 __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
4209 __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
4210 __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
4211
4212 __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
4213 __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
4214 __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
4215 __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
4216
4217 __ Sdot(v16.V4S(), v0.V16B(), v1.S4B(), 1);
4218 __ Sdot(v17.V2S(), v1.V8B(), v2.S4B(), 1);
4219
4220 __ Udot(v18.V4S(), v0.V16B(), v1.S4B(), 1);
4221 __ Udot(v19.V2S(), v1.V8B(), v2.S4B(), 1);
4222
4223 END();
4224
4225 if (CAN_RUN()) {
4226 RUN();
4227 ASSERT_EQUAL_128(0x000037d8000037d8, 0x000037d8000037d8, q16);
4228 ASSERT_EQUAL_128(0, 0x0000515e0000587e, q17);
4229 ASSERT_EQUAL_128(0x000119d8000119d8, 0x000119d8000119d8, q18);
4230 ASSERT_EQUAL_128(0, 0x0000c35e0000ca7e, q19);
4231 }
4232 }
4233
4234
TEST(neon_2regmisc_saddlp)4235 TEST(neon_2regmisc_saddlp) {
4236 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4237
4238 START();
4239
4240 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4241
4242 __ Saddlp(v16.V8H(), v0.V16B());
4243 __ Saddlp(v17.V4H(), v0.V8B());
4244
4245 __ Saddlp(v18.V4S(), v0.V8H());
4246 __ Saddlp(v19.V2S(), v0.V4H());
4247
4248 __ Saddlp(v20.V2D(), v0.V4S());
4249 __ Saddlp(v21.V1D(), v0.V2S());
4250
4251 END();
4252
4253 if (CAN_RUN()) {
4254 RUN();
4255 ASSERT_EQUAL_128(0x0080ffffff010080, 0xff01ffff0080ff01, q16);
4256 ASSERT_EQUAL_128(0x0000000000000000, 0xff01ffff0080ff01, q17);
4257 ASSERT_EQUAL_128(0x0000800000000081, 0xffff7f81ffff8200, q18);
4258 ASSERT_EQUAL_128(0x0000000000000000, 0xffff7f81ffff8200, q19);
4259 ASSERT_EQUAL_128(0x0000000000818000, 0xffffffff82017f81, q20);
4260 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff82017f81, q21);
4261 }
4262 }
4263
TEST(neon_2regmisc_uaddlp)4264 TEST(neon_2regmisc_uaddlp) {
4265 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4266
4267 START();
4268
4269 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4270
4271 __ Uaddlp(v16.V8H(), v0.V16B());
4272 __ Uaddlp(v17.V4H(), v0.V8B());
4273
4274 __ Uaddlp(v18.V4S(), v0.V8H());
4275 __ Uaddlp(v19.V2S(), v0.V4H());
4276
4277 __ Uaddlp(v20.V2D(), v0.V4S());
4278 __ Uaddlp(v21.V1D(), v0.V2S());
4279
4280 END();
4281
4282 if (CAN_RUN()) {
4283 RUN();
4284 ASSERT_EQUAL_128(0x008000ff01010080, 0x010100ff00800101, q16);
4285 ASSERT_EQUAL_128(0x0000000000000000, 0x010100ff00800101, q17);
4286 ASSERT_EQUAL_128(0x0000800000010081, 0x00017f8100008200, q18);
4287 ASSERT_EQUAL_128(0x0000000000000000, 0x00017f8100008200, q19);
4288 ASSERT_EQUAL_128(0x0000000100818000, 0x0000000082017f81, q20);
4289 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000082017f81, q21);
4290 }
4291 }
4292
TEST(neon_2regmisc_sadalp)4293 TEST(neon_2regmisc_sadalp) {
4294 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4295
4296 START();
4297
4298 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4299 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
4300 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
4301 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
4302 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
4303
4304 __ Mov(v16.V16B(), v1.V16B());
4305 __ Mov(v17.V16B(), v1.V16B());
4306 __ Sadalp(v16.V8H(), v0.V16B());
4307 __ Sadalp(v17.V4H(), v0.V8B());
4308
4309 __ Mov(v18.V16B(), v2.V16B());
4310 __ Mov(v19.V16B(), v2.V16B());
4311 __ Sadalp(v18.V4S(), v1.V8H());
4312 __ Sadalp(v19.V2S(), v1.V4H());
4313
4314 __ Mov(v20.V16B(), v3.V16B());
4315 __ Mov(v21.V16B(), v4.V16B());
4316 __ Sadalp(v20.V2D(), v2.V4S());
4317 __ Sadalp(v21.V1D(), v2.V2S());
4318
4319 END();
4320
4321 if (CAN_RUN()) {
4322 RUN();
4323 ASSERT_EQUAL_128(0x80808000ff000080, 0xff00ffff00817f00, q16);
4324 ASSERT_EQUAL_128(0x0000000000000000, 0xff00ffff00817f00, q17);
4325 ASSERT_EQUAL_128(0x7fff0001fffffffe, 0xffffffff80007fff, q18);
4326 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff80007fff, q19);
4327 ASSERT_EQUAL_128(0x7fffffff80000000, 0x800000007ffffffe, q20);
4328 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
4329 }
4330 }
4331
TEST(neon_2regmisc_uadalp)4332 TEST(neon_2regmisc_uadalp) {
4333 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4334
4335 START();
4336
4337 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4338 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
4339 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
4340 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
4341 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
4342
4343 __ Mov(v16.V16B(), v1.V16B());
4344 __ Mov(v17.V16B(), v1.V16B());
4345 __ Uadalp(v16.V8H(), v0.V16B());
4346 __ Uadalp(v17.V4H(), v0.V8B());
4347
4348 __ Mov(v18.V16B(), v2.V16B());
4349 __ Mov(v19.V16B(), v2.V16B());
4350 __ Uadalp(v18.V4S(), v1.V8H());
4351 __ Uadalp(v19.V2S(), v1.V4H());
4352
4353 __ Mov(v20.V16B(), v3.V16B());
4354 __ Mov(v21.V16B(), v4.V16B());
4355 __ Uadalp(v20.V2D(), v2.V4S());
4356 __ Uadalp(v21.V1D(), v2.V2S());
4357
4358 END();
4359
4360 if (CAN_RUN()) {
4361 RUN();
4362 ASSERT_EQUAL_128(0x8080810001000080, 0x010000ff00818100, q16);
4363 ASSERT_EQUAL_128(0x0000000000000000, 0x010000ff00818100, q17);
4364 ASSERT_EQUAL_128(0x800100010000fffe, 0x0000ffff80007fff, q18);
4365 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff80007fff, q19);
4366 ASSERT_EQUAL_128(0x8000000180000000, 0x800000007ffffffe, q20);
4367 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
4368 }
4369 }
4370
TEST(neon_3same_mul)4371 TEST(neon_3same_mul) {
4372 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4373
4374 START();
4375
4376 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4377 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4378 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4379 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4380
4381 __ Mla(v16.V16B(), v0.V16B(), v1.V16B());
4382 __ Mls(v17.V16B(), v0.V16B(), v1.V16B());
4383 __ Mul(v18.V16B(), v0.V16B(), v1.V16B());
4384
4385 END();
4386
4387 if (CAN_RUN()) {
4388 RUN();
4389 ASSERT_EQUAL_128(0x0102757605b1b208, 0x5f0a61450db90f56, q16);
4390 ASSERT_EQUAL_128(0x01029192055b5c08, 0xb30ab5d30d630faa, q17);
4391 ASSERT_EQUAL_128(0x0000727200abab00, 0x5600563900ab0056, q18);
4392 }
4393 }
4394
4395
TEST(neon_3same_absdiff)4396 TEST(neon_3same_absdiff) {
4397 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4398
4399 START();
4400
4401 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4402 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4403 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4404 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4405
4406 __ Saba(v16.V16B(), v0.V16B(), v1.V16B());
4407 __ Uaba(v17.V16B(), v0.V16B(), v1.V16B());
4408 __ Sabd(v18.V16B(), v0.V16B(), v1.V16B());
4409 __ Uabd(v19.V16B(), v0.V16B(), v1.V16B());
4410
4411 END();
4412
4413 if (CAN_RUN()) {
4414 RUN();
4415 ASSERT_EQUAL_128(0x0202aeaf065c5d5e, 0x5e5f600c62646455, q16);
4416 ASSERT_EQUAL_128(0x0002585904b0b1b2, 0x5e5f600c62b86455, q17);
4417 ASSERT_EQUAL_128(0x0100abab01565656, 0x5555550055565555, q18);
4418 ASSERT_EQUAL_128(0xff005555ffaaaaaa, 0x5555550055aa5555, q19);
4419 }
4420 }
4421
4422
TEST(neon_byelement_mul)4423 TEST(neon_byelement_mul) {
4424 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4425
4426 START();
4427
4428 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4429 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4430
4431
4432 __ Mul(v16.V4H(), v0.V4H(), v1.H(), 0);
4433 __ Mul(v17.V8H(), v0.V8H(), v1.H(), 7);
4434 __ Mul(v18.V2S(), v0.V2S(), v1.S(), 0);
4435 __ Mul(v19.V4S(), v0.V4S(), v1.S(), 3);
4436
4437 __ Movi(v20.V2D(), 0x0000000000000000, 0x0001000200030004);
4438 __ Movi(v21.V2D(), 0x0005000600070008, 0x0001000200030004);
4439 __ Mla(v20.V4H(), v0.V4H(), v1.H(), 0);
4440 __ Mla(v21.V8H(), v0.V8H(), v1.H(), 7);
4441
4442 __ Movi(v22.V2D(), 0x0000000000000000, 0x0000000200000004);
4443 __ Movi(v23.V2D(), 0x0000000600000008, 0x0000000200000004);
4444 __ Mla(v22.V2S(), v0.V2S(), v1.S(), 0);
4445 __ Mla(v23.V4S(), v0.V4S(), v1.S(), 3);
4446
4447 __ Movi(v24.V2D(), 0x0000000000000000, 0x0100aaabfe015456);
4448 __ Movi(v25.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4449 __ Mls(v24.V4H(), v0.V4H(), v1.H(), 0);
4450 __ Mls(v25.V8H(), v0.V8H(), v1.H(), 7);
4451
4452 __ Movi(v26.V2D(), 0x0000000000000000, 0xc8e2aaabe1c85456);
4453 __ Movi(v27.V2D(), 0x39545572c6aa54e4, 0x39545572c6aa54e4);
4454 __ Mls(v26.V2S(), v0.V2S(), v1.S(), 0);
4455 __ Mls(v27.V4S(), v0.V4S(), v1.S(), 3);
4456
4457 END();
4458
4459 if (CAN_RUN()) {
4460 RUN();
4461 ASSERT_EQUAL_128(0x0000000000000000, 0x0100aaabfe015456, q16);
4462 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
4463 ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaabe1c85456, q18);
4464 ASSERT_EQUAL_128(0x39545572c6aa54e4, 0x39545572c6aa54e4, q19);
4465
4466 ASSERT_EQUAL_128(0x0000000000000000, 0x0101aaadfe04545a, q20);
4467 ASSERT_EQUAL_128(0xff05aa5b010655b2, 0xff01aa57010255ae, q21);
4468 ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaade1c8545a, q22);
4469 ASSERT_EQUAL_128(0x39545578c6aa54ec, 0x39545574c6aa54e8, q23);
4470
4471 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4472 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4473 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
4474 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
4475 }
4476 }
4477
4478
TEST(neon_byelement_mull)4479 TEST(neon_byelement_mull) {
4480 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4481
4482 START();
4483
4484 __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
4485 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4486
4487
4488 __ Smull(v16.V4S(), v0.V4H(), v1.H(), 7);
4489 __ Smull2(v17.V4S(), v0.V8H(), v1.H(), 0);
4490 __ Umull(v18.V4S(), v0.V4H(), v1.H(), 7);
4491 __ Umull2(v19.V4S(), v0.V8H(), v1.H(), 0);
4492
4493 __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
4494 __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
4495 __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
4496 __ Movi(v23.V2D(), 0x0000000100000002, 0x0000000200000001);
4497
4498 __ Smlal(v20.V4S(), v0.V4H(), v1.H(), 7);
4499 __ Smlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
4500 __ Umlal(v22.V4S(), v0.V4H(), v1.H(), 7);
4501 __ Umlal2(v23.V4S(), v0.V8H(), v1.H(), 0);
4502
4503 __ Movi(v24.V2D(), 0xffffff00ffffaa55, 0x000000ff000055aa);
4504 __ Movi(v25.V2D(), 0xffaaaaabffff55ab, 0x0054ffab0000fe01);
4505 __ Movi(v26.V2D(), 0x0000ff000000aa55, 0x000000ff000055aa);
4506 __ Movi(v27.V2D(), 0x00a9aaab00fe55ab, 0x0054ffab0000fe01);
4507
4508 __ Smlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
4509 __ Smlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
4510 __ Umlsl(v26.V4S(), v0.V4H(), v1.H(), 7);
4511 __ Umlsl2(v27.V4S(), v0.V8H(), v1.H(), 0);
4512
4513 END();
4514
4515 if (CAN_RUN()) {
4516 RUN();
4517
4518 ASSERT_EQUAL_128(0xffffff00ffffaa55, 0x000000ff000055aa, q16);
4519 ASSERT_EQUAL_128(0xffaaaaabffff55ab, 0x0054ffab0000fe01, q17);
4520 ASSERT_EQUAL_128(0x0000ff000000aa55, 0x000000ff000055aa, q18);
4521 ASSERT_EQUAL_128(0x00a9aaab00fe55ab, 0x0054ffab0000fe01, q19);
4522
4523 ASSERT_EQUAL_128(0xffffff01ffffaa57, 0x00000101000055ab, q20);
4524 ASSERT_EQUAL_128(0xffaaaaacffff55ad, 0x0054ffad0000fe02, q21);
4525 ASSERT_EQUAL_128(0x0000ff010000aa57, 0x00000101000055ab, q22);
4526 ASSERT_EQUAL_128(0x00a9aaac00fe55ad, 0x0054ffad0000fe02, q23);
4527
4528 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4529 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4530 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
4531 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
4532 }
4533 }
4534
4535
TEST(neon_byelement_sqdmull)4536 TEST(neon_byelement_sqdmull) {
4537 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4538
4539 START();
4540
4541 __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
4542 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4543
4544 __ Sqdmull(v16.V4S(), v0.V4H(), v1.H(), 7);
4545 __ Sqdmull2(v17.V4S(), v0.V8H(), v1.H(), 0);
4546 __ Sqdmull(s18, h0, v1.H(), 7);
4547
4548 __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
4549 __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
4550 __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
4551
4552 __ Sqdmlal(v20.V4S(), v0.V4H(), v1.H(), 7);
4553 __ Sqdmlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
4554 __ Sqdmlal(s22, h0, v1.H(), 7);
4555
4556 __ Movi(v24.V2D(), 0xfffffe00ffff54aa, 0x000001fe0000ab54);
4557 __ Movi(v25.V2D(), 0xff555556fffeab56, 0x00a9ff560001fc02);
4558 __ Movi(v26.V2D(), 0x0000000000000000, 0x000000000000ab54);
4559
4560 __ Sqdmlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
4561 __ Sqdmlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
4562 __ Sqdmlsl(s26, h0, v1.H(), 7);
4563
4564 END();
4565
4566 if (CAN_RUN()) {
4567 RUN();
4568
4569 ASSERT_EQUAL_128(0xfffffe00ffff54aa, 0x000001fe0000ab54, q16);
4570 ASSERT_EQUAL_128(0xff555556fffeab56, 0x00a9ff560001fc02, q17);
4571 ASSERT_EQUAL_128(0, 0x0000ab54, q18);
4572
4573 ASSERT_EQUAL_128(0xfffffe01ffff54ac, 0x000002000000ab55, q20);
4574 ASSERT_EQUAL_128(0xff555557fffeab58, 0x00a9ff580001fc03, q21);
4575 ASSERT_EQUAL_128(0, 0x0000ab55, q22);
4576
4577 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4578 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4579 ASSERT_EQUAL_128(0, 0x00000000, q26);
4580 }
4581 }
4582
4583
TEST(neon_3diff_absdiff)4584 TEST(neon_3diff_absdiff) {
4585 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4586
4587 START();
4588
4589 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4590 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4591 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4592 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4593 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4594 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4595
4596 __ Sabal(v16.V8H(), v0.V8B(), v1.V8B());
4597 __ Uabal(v17.V8H(), v0.V8B(), v1.V8B());
4598 __ Sabal2(v18.V8H(), v0.V16B(), v1.V16B());
4599 __ Uabal2(v19.V8H(), v0.V16B(), v1.V16B());
4600
4601 END();
4602
4603 if (CAN_RUN()) {
4604 RUN();
4605 ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0b620d630f55, q16);
4606 ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0bb60d630f55, q17);
4607 ASSERT_EQUAL_128(0x0103030405b107b3, 0x090b0b620d640f55, q18);
4608 ASSERT_EQUAL_128(0x02010304055b075d, 0x0a090bb60db80fab, q19);
4609 }
4610 }
4611
4612
TEST(neon_3diff_sqdmull)4613 TEST(neon_3diff_sqdmull) {
4614 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4615
4616 START();
4617
4618 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4619 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4620 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4621 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4622
4623 __ Sqdmull(v16.V4S(), v0.V4H(), v1.V4H());
4624 __ Sqdmull2(v17.V4S(), v0.V8H(), v1.V8H());
4625 __ Sqdmull(v18.V2D(), v2.V2S(), v3.V2S());
4626 __ Sqdmull2(v19.V2D(), v2.V4S(), v3.V4S());
4627 __ Sqdmull(s20, h0, h1);
4628 __ Sqdmull(d21, s2, s3);
4629
4630 END();
4631
4632 if (CAN_RUN()) {
4633 RUN();
4634 ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q16);
4635 ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q17);
4636 ASSERT_EQUAL_128(0x8000000100000000, 0x7fffffffffffffff, q18);
4637 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000100000000, q19);
4638 ASSERT_EQUAL_128(0, 0x7fffffff, q20);
4639 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
4640 }
4641 }
4642
4643
TEST(neon_3diff_sqdmlal)4644 TEST(neon_3diff_sqdmlal) {
4645 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4646
4647 START();
4648
4649 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4650 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4651 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4652 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4653
4654 __ Movi(v16.V2D(), 0xffffffff00000001, 0x8fffffff00000001);
4655 __ Movi(v17.V2D(), 0x00000001ffffffff, 0x00000001ffffffff);
4656 __ Movi(v18.V2D(), 0x8000000000000001, 0x0000000000000001);
4657 __ Movi(v19.V2D(), 0xffffffffffffffff, 0x7fffffffffffffff);
4658 __ Movi(v20.V2D(), 0, 0x00000001);
4659 __ Movi(v21.V2D(), 0, 0x00000001);
4660
4661 __ Sqdmlal(v16.V4S(), v0.V4H(), v1.V4H());
4662 __ Sqdmlal2(v17.V4S(), v0.V8H(), v1.V8H());
4663 __ Sqdmlal(v18.V2D(), v2.V2S(), v3.V2S());
4664 __ Sqdmlal2(v19.V2D(), v2.V4S(), v3.V4S());
4665 __ Sqdmlal(s20, h0, h1);
4666 __ Sqdmlal(d21, s2, s3);
4667
4668 END();
4669
4670 if (CAN_RUN()) {
4671 RUN();
4672 ASSERT_EQUAL_128(0x8000ffff7ffe0003, 0x800000007fffffff, q16);
4673 ASSERT_EQUAL_128(0x800100017ffe0001, 0x800100017ffffffe, q17);
4674 ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q18);
4675 ASSERT_EQUAL_128(0x7ffffffffffffffe, 0x00000000ffffffff, q19);
4676 ASSERT_EQUAL_128(0, 0x7fffffff, q20);
4677 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
4678 }
4679 }
4680
4681
TEST(neon_3diff_sqdmlsl)4682 TEST(neon_3diff_sqdmlsl) {
4683 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4684
4685 START();
4686
4687 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4688 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4689 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4690 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4691
4692 __ Movi(v16.V2D(), 0xffffffff00000001, 0x7ffffffe80000001);
4693 __ Movi(v17.V2D(), 0x00000001ffffffff, 0x7ffffffe00000001);
4694 __ Movi(v18.V2D(), 0x8000000000000001, 0x8000000000000001);
4695 __ Movi(v19.V2D(), 0xfffffffffffffffe, 0x7fffffffffffffff);
4696 __ Movi(v20.V2D(), 0, 0x00000001);
4697 __ Movi(v21.V2D(), 0, 0x00000001);
4698
4699 __ Sqdmlsl(v16.V4S(), v0.V4H(), v1.V4H());
4700 __ Sqdmlsl2(v17.V4S(), v0.V8H(), v1.V8H());
4701 __ Sqdmlsl(v18.V2D(), v2.V2S(), v3.V2S());
4702 __ Sqdmlsl2(v19.V2D(), v2.V4S(), v3.V4S());
4703 __ Sqdmlsl(s20, h0, h1);
4704 __ Sqdmlsl(d21, s2, s3);
4705
4706 END();
4707
4708 if (CAN_RUN()) {
4709 RUN();
4710 ASSERT_EQUAL_128(0x7ffeffff8001ffff, 0x7fffffff80000000, q16);
4711 ASSERT_EQUAL_128(0x7fff00018001fffd, 0x7fffffff80000002, q17);
4712 ASSERT_EQUAL_128(0xffffffff00000001, 0x8000000000000000, q18);
4713 ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q19);
4714 ASSERT_EQUAL_128(0, 0x80000002, q20);
4715 ASSERT_EQUAL_128(0, 0x8000000000000002, q21);
4716 }
4717 }
4718
4719
TEST(neon_3diff_mla)4720 TEST(neon_3diff_mla) {
4721 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4722
4723 START();
4724
4725 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4726 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4727 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4728 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4729 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4730 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4731
4732 __ Smlal(v16.V8H(), v0.V8B(), v1.V8B());
4733 __ Umlal(v17.V8H(), v0.V8B(), v1.V8B());
4734 __ Smlal2(v18.V8H(), v0.V16B(), v1.V16B());
4735 __ Umlal2(v19.V8H(), v0.V16B(), v1.V16B());
4736
4737 END();
4738
4739 if (CAN_RUN()) {
4740 RUN();
4741 ASSERT_EQUAL_128(0x01580304055c2341, 0x090a0ab70d0e0f56, q16);
4742 ASSERT_EQUAL_128(0xaa580304ae5c2341, 0x090a5fb70d0eb856, q17);
4743 ASSERT_EQUAL_128(0x01020304e878ea7a, 0x090a0ab70cb90f00, q18);
4744 ASSERT_EQUAL_128(0x010203043d783f7a, 0x090a5fb761b90f00, q19);
4745 }
4746 }
4747
4748
TEST(neon_3diff_mls)4749 TEST(neon_3diff_mls) {
4750 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4751
4752 START();
4753
4754 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4755 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4756 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4757 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4758 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4759 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4760
4761 __ Smlsl(v16.V8H(), v0.V8B(), v1.V8B());
4762 __ Umlsl(v17.V8H(), v0.V8B(), v1.V8B());
4763 __ Smlsl2(v18.V8H(), v0.V16B(), v1.V16B());
4764 __ Umlsl2(v19.V8H(), v0.V16B(), v1.V16B());
4765
4766 END();
4767
4768 if (CAN_RUN()) {
4769 RUN();
4770 ASSERT_EQUAL_128(0x00ac030404b0eacf, 0x090a0b610d0e0eaa, q16);
4771 ASSERT_EQUAL_128(0x57ac03045bb0eacf, 0x090ab6610d0e65aa, q17);
4772 ASSERT_EQUAL_128(0x0102030421942396, 0x090a0b610d630f00, q18);
4773 ASSERT_EQUAL_128(0x01020304cc94ce96, 0x090ab661b8630f00, q19);
4774 }
4775 }
4776
4777
TEST(neon_3same_compare)4778 TEST(neon_3same_compare) {
4779 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4780
4781 START();
4782
4783 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4784 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4785
4786 __ Cmeq(v16.V16B(), v0.V16B(), v0.V16B());
4787 __ Cmeq(v17.V16B(), v0.V16B(), v1.V16B());
4788 __ Cmge(v18.V16B(), v0.V16B(), v0.V16B());
4789 __ Cmge(v19.V16B(), v0.V16B(), v1.V16B());
4790 __ Cmgt(v20.V16B(), v0.V16B(), v0.V16B());
4791 __ Cmgt(v21.V16B(), v0.V16B(), v1.V16B());
4792 __ Cmhi(v22.V16B(), v0.V16B(), v0.V16B());
4793 __ Cmhi(v23.V16B(), v0.V16B(), v1.V16B());
4794 __ Cmhs(v24.V16B(), v0.V16B(), v0.V16B());
4795 __ Cmhs(v25.V16B(), v0.V16B(), v1.V16B());
4796
4797 END();
4798
4799 if (CAN_RUN()) {
4800 RUN();
4801 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
4802 ASSERT_EQUAL_128(0x00ff000000000000, 0x000000ff00000000, q17);
4803 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
4804 ASSERT_EQUAL_128(0x00ff00ffff00ff00, 0xff0000ff0000ff00, q19);
4805 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
4806 ASSERT_EQUAL_128(0x000000ffff00ff00, 0xff0000000000ff00, q21);
4807 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
4808 ASSERT_EQUAL_128(0xff00ff0000ff00ff, 0xff00000000ffff00, q23);
4809 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q24);
4810 ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xff0000ff00ffff00, q25);
4811 }
4812 }
4813
4814
TEST(neon_3same_scalar_compare)4815 TEST(neon_3same_scalar_compare) {
4816 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4817
4818 START();
4819
4820 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4821 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4822
4823 __ Cmeq(d16, d0, d0);
4824 __ Cmeq(d17, d0, d1);
4825 __ Cmeq(d18, d1, d0);
4826 __ Cmge(d19, d0, d0);
4827 __ Cmge(d20, d0, d1);
4828 __ Cmge(d21, d1, d0);
4829 __ Cmgt(d22, d0, d0);
4830 __ Cmgt(d23, d0, d1);
4831 __ Cmhi(d24, d0, d0);
4832 __ Cmhi(d25, d0, d1);
4833 __ Cmhs(d26, d0, d0);
4834 __ Cmhs(d27, d0, d1);
4835 __ Cmhs(d28, d1, d0);
4836
4837 END();
4838
4839 if (CAN_RUN()) {
4840 RUN();
4841
4842 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q16);
4843 ASSERT_EQUAL_128(0, 0x0000000000000000, q17);
4844 ASSERT_EQUAL_128(0, 0x0000000000000000, q18);
4845 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
4846 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
4847 ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
4848 ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
4849 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q23);
4850 ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
4851 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
4852 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
4853 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q27);
4854 ASSERT_EQUAL_128(0, 0x0000000000000000, q28);
4855 }
4856 }
4857
TEST(neon_fcmeq_h)4858 TEST(neon_fcmeq_h) {
4859 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4860 CPUFeatures::kFP,
4861 CPUFeatures::kNEONHalf);
4862
4863 START();
4864
4865 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
4866 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
4867 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
4868 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
4869
4870 __ Fcmeq(v4.V8H(), v0.V8H(), v0.V8H());
4871 __ Fcmeq(v5.V8H(), v1.V8H(), v0.V8H());
4872 __ Fcmeq(v6.V8H(), v2.V8H(), v0.V8H());
4873 __ Fcmeq(v7.V8H(), v3.V8H(), v0.V8H());
4874 __ Fcmeq(v8.V4H(), v0.V4H(), v0.V4H());
4875 __ Fcmeq(v9.V4H(), v1.V4H(), v0.V4H());
4876 __ Fcmeq(v10.V4H(), v2.V4H(), v0.V4H());
4877 __ Fcmeq(v11.V4H(), v3.V4H(), v0.V4H());
4878
4879 END();
4880
4881 if (CAN_RUN()) {
4882 RUN();
4883
4884 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
4885 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
4886 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
4887 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v7);
4888 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
4889 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
4890 ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
4891 ASSERT_EQUAL_128(0, 0x0000000000000000, v11);
4892 }
4893 }
4894
TEST(neon_fcmeq_h_scalar)4895 TEST(neon_fcmeq_h_scalar) {
4896 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4897 CPUFeatures::kFP,
4898 CPUFeatures::kNEONHalf,
4899 CPUFeatures::kFPHalf);
4900
4901 START();
4902
4903 __ Fmov(h0, Float16(0.0));
4904 __ Fmov(h1, RawbitsToFloat16(0xffff));
4905 __ Fmov(h2, Float16(-1.0));
4906 __ Fmov(h3, Float16(1.0));
4907 __ Fcmeq(h4, h0, h0);
4908 __ Fcmeq(h5, h1, h0);
4909 __ Fcmeq(h6, h2, h0);
4910 __ Fcmeq(h7, h3, h0);
4911
4912 END();
4913
4914 if (CAN_RUN()) {
4915 RUN();
4916
4917 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
4918 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
4919 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
4920 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h7);
4921 }
4922 }
4923
TEST(neon_fcmge_h)4924 TEST(neon_fcmge_h) {
4925 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4926 CPUFeatures::kFP,
4927 CPUFeatures::kNEONHalf);
4928
4929 START();
4930
4931 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
4932 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
4933 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
4934 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
4935
4936 __ Fcmge(v4.V8H(), v0.V8H(), v0.V8H());
4937 __ Fcmge(v5.V8H(), v1.V8H(), v0.V8H());
4938 __ Fcmge(v6.V8H(), v2.V8H(), v0.V8H());
4939 __ Fcmge(v7.V8H(), v3.V8H(), v0.V8H());
4940 __ Fcmge(v8.V4H(), v0.V4H(), v0.V4H());
4941 __ Fcmge(v9.V4H(), v1.V4H(), v0.V4H());
4942 __ Fcmge(v10.V4H(), v2.V4H(), v0.V4H());
4943 __ Fcmge(v11.V4H(), v3.V4H(), v0.V4H());
4944
4945 END();
4946
4947 if (CAN_RUN()) {
4948 RUN();
4949
4950 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
4951 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
4952 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
4953 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
4954 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
4955 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
4956 ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
4957 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
4958 }
4959 }
4960
TEST(neon_fcmge_h_scalar)4961 TEST(neon_fcmge_h_scalar) {
4962 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4963 CPUFeatures::kFP,
4964 CPUFeatures::kNEONHalf,
4965 CPUFeatures::kFPHalf);
4966
4967 START();
4968
4969 __ Fmov(h0, Float16(0.0));
4970 __ Fmov(h1, RawbitsToFloat16(0xffff));
4971 __ Fmov(h2, Float16(-1.0));
4972 __ Fmov(h3, Float16(1.0));
4973 __ Fcmge(h4, h0, h0);
4974 __ Fcmge(h5, h1, h0);
4975 __ Fcmge(h6, h2, h0);
4976 __ Fcmge(h7, h3, h0);
4977
4978 END();
4979
4980 if (CAN_RUN()) {
4981 RUN();
4982
4983 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
4984 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
4985 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
4986 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
4987 }
4988 }
4989
TEST(neon_fcmgt_h)4990 TEST(neon_fcmgt_h) {
4991 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4992 CPUFeatures::kFP,
4993 CPUFeatures::kNEONHalf);
4994
4995 START();
4996
4997 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
4998 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
4999 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
5000 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
5001
5002 __ Fcmgt(v4.V8H(), v0.V8H(), v0.V8H());
5003 __ Fcmgt(v5.V8H(), v1.V8H(), v0.V8H());
5004 __ Fcmgt(v6.V8H(), v2.V8H(), v0.V8H());
5005 __ Fcmgt(v7.V8H(), v3.V8H(), v0.V8H());
5006 __ Fcmgt(v8.V4H(), v0.V4H(), v0.V4H());
5007 __ Fcmgt(v9.V4H(), v1.V4H(), v0.V4H());
5008 __ Fcmgt(v10.V4H(), v2.V4H(), v0.V4H());
5009 __ Fcmgt(v11.V4H(), v3.V4H(), v0.V4H());
5010
5011 END();
5012
5013 if (CAN_RUN()) {
5014 RUN();
5015
5016 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v4);
5017 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
5018 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
5019 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
5020 ASSERT_EQUAL_128(0, 0x0000000000000000, v8);
5021 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
5022 ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
5023 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5024 }
5025 }
5026
TEST(neon_fcmgt_h_scalar)5027 TEST(neon_fcmgt_h_scalar) {
5028 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5029 CPUFeatures::kFP,
5030 CPUFeatures::kNEONHalf,
5031 CPUFeatures::kFPHalf);
5032
5033 START();
5034
5035 __ Fmov(h0, Float16(0.0));
5036 __ Fmov(h1, RawbitsToFloat16(0xffff));
5037 __ Fmov(h2, Float16(-1.0));
5038 __ Fmov(h3, Float16(1.0));
5039 __ Fcmgt(h4, h0, h0);
5040 __ Fcmgt(h5, h1, h0);
5041 __ Fcmgt(h6, h2, h0);
5042 __ Fcmgt(h7, h3, h0);
5043
5044 END();
5045
5046 if (CAN_RUN()) {
5047 RUN();
5048
5049 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h4);
5050 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5051 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
5052 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5053 }
5054 }
5055
TEST(neon_facge_h)5056 TEST(neon_facge_h) {
5057 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5058 CPUFeatures::kFP,
5059 CPUFeatures::kNEONHalf);
5060
5061 START();
5062
5063 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
5064 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
5065 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
5066 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
5067
5068 __ Facge(v4.V8H(), v0.V8H(), v0.V8H());
5069 __ Facge(v5.V8H(), v1.V8H(), v0.V8H());
5070 __ Facge(v6.V8H(), v2.V8H(), v0.V8H());
5071 __ Facge(v7.V8H(), v3.V8H(), v0.V8H());
5072 __ Facge(v8.V4H(), v0.V4H(), v0.V4H());
5073 __ Facge(v9.V4H(), v1.V4H(), v0.V4H());
5074 __ Facge(v10.V4H(), v2.V4H(), v0.V4H());
5075 __ Facge(v11.V4H(), v3.V4H(), v0.V4H());
5076
5077 END();
5078
5079 if (CAN_RUN()) {
5080 RUN();
5081
5082 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
5083 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
5084 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v6);
5085 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
5086 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
5087 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
5088 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v10);
5089 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5090 }
5091 }
5092
TEST(neon_facge_h_scalar)5093 TEST(neon_facge_h_scalar) {
5094 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5095 CPUFeatures::kFP,
5096 CPUFeatures::kNEONHalf,
5097 CPUFeatures::kFPHalf);
5098
5099 START();
5100
5101 __ Fmov(h0, Float16(0.0));
5102 __ Fmov(h1, RawbitsToFloat16(0xffff));
5103 __ Fmov(h2, Float16(-1.0));
5104 __ Fmov(h3, Float16(1.0));
5105 __ Facge(h4, h0, h0);
5106 __ Facge(h5, h1, h0);
5107 __ Facge(h6, h2, h0);
5108 __ Facge(h7, h3, h0);
5109
5110 END();
5111
5112 if (CAN_RUN()) {
5113 RUN();
5114
5115 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
5116 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5117 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h6);
5118 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5119 }
5120 }
5121
TEST(neon_facgt_h)5122 TEST(neon_facgt_h) {
5123 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5124 CPUFeatures::kFP,
5125 CPUFeatures::kNEONHalf);
5126
5127 START();
5128
5129 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
5130 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
5131 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
5132 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
5133
5134 __ Facgt(v4.V8H(), v0.V8H(), v0.V8H());
5135 __ Facgt(v5.V8H(), v1.V8H(), v0.V8H());
5136 __ Facgt(v6.V8H(), v2.V8H(), v0.V8H());
5137 __ Facgt(v7.V8H(), v3.V8H(), v0.V8H());
5138 __ Facgt(v8.V4H(), v0.V4H(), v0.V4H());
5139 __ Facgt(v9.V4H(), v1.V4H(), v0.V4H());
5140 __ Facgt(v10.V4H(), v2.V4H(), v0.V4H());
5141 __ Facgt(v11.V4H(), v3.V4H(), v0.V4H());
5142
5143 END();
5144
5145 if (CAN_RUN()) {
5146 RUN();
5147
5148 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v4);
5149 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
5150 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v6);
5151 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
5152 ASSERT_EQUAL_128(0, 0x0000000000000000, v8);
5153 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
5154 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v10);
5155 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5156 }
5157 }
5158
TEST(neon_facgt_h_scalar)5159 TEST(neon_facgt_h_scalar) {
5160 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5161 CPUFeatures::kFP,
5162 CPUFeatures::kNEONHalf,
5163 CPUFeatures::kFPHalf);
5164
5165 START();
5166
5167 __ Fmov(h0, Float16(0.0));
5168 __ Fmov(h1, RawbitsToFloat16(0xffff));
5169 __ Fmov(h2, Float16(-1.0));
5170 __ Fmov(h3, Float16(1.0));
5171 __ Facgt(h4, h0, h0);
5172 __ Facgt(h5, h1, h0);
5173 __ Facgt(h6, h2, h0);
5174 __ Facgt(h7, h3, h0);
5175
5176 END();
5177
5178 if (CAN_RUN()) {
5179 RUN();
5180
5181 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h4);
5182 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5183 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h6);
5184 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5185 }
5186 }
5187
TEST(neon_2regmisc_fcmeq)5188 TEST(neon_2regmisc_fcmeq) {
5189 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5190
5191 START();
5192
5193 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5194 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5195 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5196 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5197
5198 __ Fcmeq(s16, s0, 0.0);
5199 __ Fcmeq(s17, s1, 0.0);
5200 __ Fcmeq(s18, s2, 0.0);
5201 __ Fcmeq(d19, d0, 0.0);
5202 __ Fcmeq(d20, d1, 0.0);
5203 __ Fcmeq(d21, d2, 0.0);
5204 __ Fcmeq(v22.V2S(), v0.V2S(), 0.0);
5205 __ Fcmeq(v23.V4S(), v1.V4S(), 0.0);
5206 __ Fcmeq(v24.V2D(), v1.V2D(), 0.0);
5207 __ Fcmeq(v25.V2D(), v2.V2D(), 0.0);
5208
5209 END();
5210
5211 if (CAN_RUN()) {
5212 RUN();
5213 ASSERT_EQUAL_128(0, 0xffffffff, q16);
5214 ASSERT_EQUAL_128(0, 0x00000000, q17);
5215 ASSERT_EQUAL_128(0, 0x00000000, q18);
5216 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5217 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5218 ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
5219 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5220 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5221 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5222 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
5223 }
5224 }
5225
TEST(neon_2regmisc_fcmge)5226 TEST(neon_2regmisc_fcmge) {
5227 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5228
5229 START();
5230
5231 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5232 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5233 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5234 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5235
5236 __ Fcmge(s16, s0, 0.0);
5237 __ Fcmge(s17, s1, 0.0);
5238 __ Fcmge(s18, s2, 0.0);
5239 __ Fcmge(d19, d0, 0.0);
5240 __ Fcmge(d20, d1, 0.0);
5241 __ Fcmge(d21, d3, 0.0);
5242 __ Fcmge(v22.V2S(), v0.V2S(), 0.0);
5243 __ Fcmge(v23.V4S(), v1.V4S(), 0.0);
5244 __ Fcmge(v24.V2D(), v1.V2D(), 0.0);
5245 __ Fcmge(v25.V2D(), v3.V2D(), 0.0);
5246
5247 END();
5248
5249 if (CAN_RUN()) {
5250 RUN();
5251 ASSERT_EQUAL_128(0, 0xffffffff, q16);
5252 ASSERT_EQUAL_128(0, 0x00000000, q17);
5253 ASSERT_EQUAL_128(0, 0x00000000, q18);
5254 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5255 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5256 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5257 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5258 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5259 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5260 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5261 }
5262 }
5263
5264
TEST(neon_2regmisc_fcmgt)5265 TEST(neon_2regmisc_fcmgt) {
5266 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5267
5268 START();
5269
5270 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5271 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5272 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5273 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5274
5275 __ Fcmgt(s16, s0, 0.0);
5276 __ Fcmgt(s17, s1, 0.0);
5277 __ Fcmgt(s18, s2, 0.0);
5278 __ Fcmgt(d19, d0, 0.0);
5279 __ Fcmgt(d20, d1, 0.0);
5280 __ Fcmgt(d21, d3, 0.0);
5281 __ Fcmgt(v22.V2S(), v0.V2S(), 0.0);
5282 __ Fcmgt(v23.V4S(), v1.V4S(), 0.0);
5283 __ Fcmgt(v24.V2D(), v1.V2D(), 0.0);
5284 __ Fcmgt(v25.V2D(), v3.V2D(), 0.0);
5285
5286 END();
5287
5288 if (CAN_RUN()) {
5289 RUN();
5290 ASSERT_EQUAL_128(0, 0x00000000, q16);
5291 ASSERT_EQUAL_128(0, 0x00000000, q17);
5292 ASSERT_EQUAL_128(0, 0x00000000, q18);
5293 ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
5294 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5295 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5296 ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
5297 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5298 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5299 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5300 }
5301 }
5302
TEST(neon_2regmisc_fcmle)5303 TEST(neon_2regmisc_fcmle) {
5304 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5305
5306 START();
5307
5308 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5309 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5310 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5311 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5312
5313 __ Fcmle(s16, s0, 0.0);
5314 __ Fcmle(s17, s1, 0.0);
5315 __ Fcmle(s18, s3, 0.0);
5316 __ Fcmle(d19, d0, 0.0);
5317 __ Fcmle(d20, d1, 0.0);
5318 __ Fcmle(d21, d2, 0.0);
5319 __ Fcmle(v22.V2S(), v0.V2S(), 0.0);
5320 __ Fcmle(v23.V4S(), v1.V4S(), 0.0);
5321 __ Fcmle(v24.V2D(), v1.V2D(), 0.0);
5322 __ Fcmle(v25.V2D(), v2.V2D(), 0.0);
5323
5324 END();
5325
5326 if (CAN_RUN()) {
5327 RUN();
5328 ASSERT_EQUAL_128(0, 0xffffffff, q16);
5329 ASSERT_EQUAL_128(0, 0x00000000, q17);
5330 ASSERT_EQUAL_128(0, 0x00000000, q18);
5331 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5332 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5333 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5334 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5335 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5336 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5337 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5338 }
5339 }
5340
5341
TEST(neon_2regmisc_fcmlt)5342 TEST(neon_2regmisc_fcmlt) {
5343 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5344
5345 START();
5346
5347 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5348 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5349 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5350 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5351
5352 __ Fcmlt(s16, s0, 0.0);
5353 __ Fcmlt(s17, s1, 0.0);
5354 __ Fcmlt(s18, s3, 0.0);
5355 __ Fcmlt(d19, d0, 0.0);
5356 __ Fcmlt(d20, d1, 0.0);
5357 __ Fcmlt(d21, d2, 0.0);
5358 __ Fcmlt(v22.V2S(), v0.V2S(), 0.0);
5359 __ Fcmlt(v23.V4S(), v1.V4S(), 0.0);
5360 __ Fcmlt(v24.V2D(), v1.V2D(), 0.0);
5361 __ Fcmlt(v25.V2D(), v2.V2D(), 0.0);
5362
5363 END();
5364
5365 if (CAN_RUN()) {
5366 RUN();
5367 ASSERT_EQUAL_128(0, 0x00000000, q16);
5368 ASSERT_EQUAL_128(0, 0x00000000, q17);
5369 ASSERT_EQUAL_128(0, 0x00000000, q18);
5370 ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
5371 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5372 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5373 ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
5374 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5375 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5376 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5377 }
5378 }
5379
TEST(neon_2regmisc_cmeq)5380 TEST(neon_2regmisc_cmeq) {
5381 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5382
5383 START();
5384
5385 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5386 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5387
5388 __ Cmeq(v16.V8B(), v1.V8B(), 0);
5389 __ Cmeq(v17.V16B(), v1.V16B(), 0);
5390 __ Cmeq(v18.V4H(), v1.V4H(), 0);
5391 __ Cmeq(v19.V8H(), v1.V8H(), 0);
5392 __ Cmeq(v20.V2S(), v0.V2S(), 0);
5393 __ Cmeq(v21.V4S(), v0.V4S(), 0);
5394 __ Cmeq(d22, d0, 0);
5395 __ Cmeq(d23, d1, 0);
5396 __ Cmeq(v24.V2D(), v0.V2D(), 0);
5397
5398 END();
5399
5400 if (CAN_RUN()) {
5401 RUN();
5402 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000000ff00, q16);
5403 ASSERT_EQUAL_128(0xffff0000000000ff, 0xffff00000000ff00, q17);
5404 ASSERT_EQUAL_128(0x0000000000000000, 0xffff000000000000, q18);
5405 ASSERT_EQUAL_128(0xffff000000000000, 0xffff000000000000, q19);
5406 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
5407 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q21);
5408 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5409 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5410 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5411 }
5412 }
5413
5414
TEST(neon_2regmisc_cmge)5415 TEST(neon_2regmisc_cmge) {
5416 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5417
5418 START();
5419
5420 __ Movi(v0.V2D(), 0xff01000200030004, 0x0000000000000000);
5421 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5422
5423 __ Cmge(v16.V8B(), v1.V8B(), 0);
5424 __ Cmge(v17.V16B(), v1.V16B(), 0);
5425 __ Cmge(v18.V4H(), v1.V4H(), 0);
5426 __ Cmge(v19.V8H(), v1.V8H(), 0);
5427 __ Cmge(v20.V2S(), v0.V2S(), 0);
5428 __ Cmge(v21.V4S(), v0.V4S(), 0);
5429 __ Cmge(d22, d0, 0);
5430 __ Cmge(d23, d1, 0);
5431 __ Cmge(v24.V2D(), v0.V2D(), 0);
5432
5433 END();
5434
5435 if (CAN_RUN()) {
5436 RUN();
5437 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00ffffffff00, q16);
5438 ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xffff00ffffffff00, q17);
5439 ASSERT_EQUAL_128(0x0000000000000000, 0xffff0000ffffffff, q18);
5440 ASSERT_EQUAL_128(0xffffffff00000000, 0xffff0000ffffffff, q19);
5441 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
5442 ASSERT_EQUAL_128(0x00000000ffffffff, 0xffffffffffffffff, q21);
5443 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5444 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
5445 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5446 }
5447 }
5448
5449
TEST(neon_2regmisc_cmlt)5450 TEST(neon_2regmisc_cmlt) {
5451 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5452
5453 START();
5454
5455 __ Movi(v0.V2D(), 0x0001000200030004, 0xff00000000000000);
5456 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5457
5458 __ Cmlt(v16.V8B(), v1.V8B(), 0);
5459 __ Cmlt(v17.V16B(), v1.V16B(), 0);
5460 __ Cmlt(v18.V4H(), v1.V4H(), 0);
5461 __ Cmlt(v19.V8H(), v1.V8H(), 0);
5462 __ Cmlt(v20.V2S(), v1.V2S(), 0);
5463 __ Cmlt(v21.V4S(), v1.V4S(), 0);
5464 __ Cmlt(d22, d0, 0);
5465 __ Cmlt(d23, d1, 0);
5466 __ Cmlt(v24.V2D(), v0.V2D(), 0);
5467
5468 END();
5469
5470 if (CAN_RUN()) {
5471 RUN();
5472 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ff00000000ff, q16);
5473 ASSERT_EQUAL_128(0x000000ffff00ff00, 0x0000ff00000000ff, q17);
5474 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff00000000, q18);
5475 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000ffff00000000, q19);
5476 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5477 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
5478 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5479 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5480 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5481 }
5482 }
5483
5484
TEST(neon_2regmisc_cmle)5485 TEST(neon_2regmisc_cmle) {
5486 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5487
5488 START();
5489
5490 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5491 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5492
5493 __ Cmle(v16.V8B(), v1.V8B(), 0);
5494 __ Cmle(v17.V16B(), v1.V16B(), 0);
5495 __ Cmle(v18.V4H(), v1.V4H(), 0);
5496 __ Cmle(v19.V8H(), v1.V8H(), 0);
5497 __ Cmle(v20.V2S(), v1.V2S(), 0);
5498 __ Cmle(v21.V4S(), v1.V4S(), 0);
5499 __ Cmle(d22, d0, 0);
5500 __ Cmle(d23, d1, 0);
5501 __ Cmle(v24.V2D(), v0.V2D(), 0);
5502
5503 END();
5504
5505 if (CAN_RUN()) {
5506 RUN();
5507 ASSERT_EQUAL_128(0x0000000000000000, 0xffffff000000ffff, q16);
5508 ASSERT_EQUAL_128(0xffff00ffff00ffff, 0xffffff000000ffff, q17);
5509 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff00000000, q18);
5510 ASSERT_EQUAL_128(0xffff0000ffffffff, 0xffffffff00000000, q19);
5511 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5512 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
5513 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5514 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5515 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5516 }
5517 }
5518
5519
TEST(neon_2regmisc_cmgt)5520 TEST(neon_2regmisc_cmgt) {
5521 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5522
5523 START();
5524
5525 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5526 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5527
5528 __ Cmgt(v16.V8B(), v1.V8B(), 0);
5529 __ Cmgt(v17.V16B(), v1.V16B(), 0);
5530 __ Cmgt(v18.V4H(), v1.V4H(), 0);
5531 __ Cmgt(v19.V8H(), v1.V8H(), 0);
5532 __ Cmgt(v20.V2S(), v0.V2S(), 0);
5533 __ Cmgt(v21.V4S(), v0.V4S(), 0);
5534 __ Cmgt(d22, d0, 0);
5535 __ Cmgt(d23, d1, 0);
5536 __ Cmgt(v24.V2D(), v0.V2D(), 0);
5537
5538 END();
5539
5540 if (CAN_RUN()) {
5541 RUN();
5542 ASSERT_EQUAL_128(0x0000000000000000, 0x000000ffffff0000, q16);
5543 ASSERT_EQUAL_128(0x0000ff0000ff0000, 0x000000ffffff0000, q17);
5544 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
5545 ASSERT_EQUAL_128(0x0000ffff00000000, 0x00000000ffffffff, q19);
5546 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5547 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q21);
5548 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
5549 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
5550 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q24);
5551 }
5552 }
5553
5554
TEST(neon_2regmisc_neg)5555 TEST(neon_2regmisc_neg) {
5556 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5557
5558 START();
5559
5560 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5561 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5562 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5563 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5564 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5565
5566 __ Neg(v16.V8B(), v0.V8B());
5567 __ Neg(v17.V16B(), v0.V16B());
5568 __ Neg(v18.V4H(), v1.V4H());
5569 __ Neg(v19.V8H(), v1.V8H());
5570 __ Neg(v20.V2S(), v2.V2S());
5571 __ Neg(v21.V4S(), v2.V4S());
5572 __ Neg(d22, d3);
5573 __ Neg(v23.V2D(), v3.V2D());
5574 __ Neg(v24.V2D(), v4.V2D());
5575
5576 END();
5577
5578 if (CAN_RUN()) {
5579 RUN();
5580 ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100ff81807f, q16);
5581 ASSERT_EQUAL_128(0x81ff00017f8081ff, 0x807f0100ff81807f, q17);
5582 ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
5583 ASSERT_EQUAL_128(0x80007fff00010000, 0x00010000ffff8001, q19);
5584 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
5585 ASSERT_EQUAL_128(0x8000000000000001, 0x0000000080000001, q21);
5586 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000001, q22);
5587 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q23);
5588 ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
5589 }
5590 }
5591
5592
TEST(neon_2regmisc_sqneg)5593 TEST(neon_2regmisc_sqneg) {
5594 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5595
5596 START();
5597
5598 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5599 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5600 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5601 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5602 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5603
5604 __ Sqneg(v16.V8B(), v0.V8B());
5605 __ Sqneg(v17.V16B(), v0.V16B());
5606 __ Sqneg(v18.V4H(), v1.V4H());
5607 __ Sqneg(v19.V8H(), v1.V8H());
5608 __ Sqneg(v20.V2S(), v2.V2S());
5609 __ Sqneg(v21.V4S(), v2.V4S());
5610 __ Sqneg(v22.V2D(), v3.V2D());
5611 __ Sqneg(v23.V2D(), v4.V2D());
5612
5613 __ Sqneg(b24, b0);
5614 __ Sqneg(h25, h1);
5615 __ Sqneg(s26, s2);
5616 __ Sqneg(d27, d3);
5617
5618 END();
5619
5620 if (CAN_RUN()) {
5621 RUN();
5622 ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100ff817f7f, q16);
5623 ASSERT_EQUAL_128(0x81ff00017f7f81ff, 0x7f7f0100ff817f7f, q17);
5624 ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
5625 ASSERT_EQUAL_128(0x7fff7fff00010000, 0x00010000ffff8001, q19);
5626 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
5627 ASSERT_EQUAL_128(0x7fffffff00000001, 0x0000000080000001, q21);
5628 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q22);
5629 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
5630
5631 ASSERT_EQUAL_128(0, 0x7f, q24);
5632 ASSERT_EQUAL_128(0, 0x8001, q25);
5633 ASSERT_EQUAL_128(0, 0x80000001, q26);
5634 ASSERT_EQUAL_128(0, 0x8000000000000001, q27);
5635 }
5636 }
5637
5638
TEST(neon_2regmisc_abs)5639 TEST(neon_2regmisc_abs) {
5640 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5641
5642 START();
5643
5644 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5645 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5646 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5647 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5648 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5649
5650 __ Abs(v16.V8B(), v0.V8B());
5651 __ Abs(v17.V16B(), v0.V16B());
5652 __ Abs(v18.V4H(), v1.V4H());
5653 __ Abs(v19.V8H(), v1.V8H());
5654 __ Abs(v20.V2S(), v2.V2S());
5655 __ Abs(v21.V4S(), v2.V4S());
5656 __ Abs(d22, d3);
5657 __ Abs(v23.V2D(), v3.V2D());
5658 __ Abs(v24.V2D(), v4.V2D());
5659
5660 END();
5661
5662 if (CAN_RUN()) {
5663 RUN();
5664 ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100017f807f, q16);
5665 ASSERT_EQUAL_128(0x7f0100017f807f01, 0x807f0100017f807f, q17);
5666 ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
5667 ASSERT_EQUAL_128(0x80007fff00010000, 0x0001000000017fff, q19);
5668 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
5669 ASSERT_EQUAL_128(0x8000000000000001, 0x000000007fffffff, q21);
5670 ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffffffffffff, q22);
5671 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q23);
5672 ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
5673 }
5674 }
5675
5676
TEST(neon_2regmisc_sqabs)5677 TEST(neon_2regmisc_sqabs) {
5678 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5679
5680 START();
5681
5682 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5683 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5684 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5685 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5686 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5687
5688 __ Sqabs(v16.V8B(), v0.V8B());
5689 __ Sqabs(v17.V16B(), v0.V16B());
5690 __ Sqabs(v18.V4H(), v1.V4H());
5691 __ Sqabs(v19.V8H(), v1.V8H());
5692 __ Sqabs(v20.V2S(), v2.V2S());
5693 __ Sqabs(v21.V4S(), v2.V4S());
5694 __ Sqabs(v22.V2D(), v3.V2D());
5695 __ Sqabs(v23.V2D(), v4.V2D());
5696
5697 __ Sqabs(b24, b0);
5698 __ Sqabs(h25, h1);
5699 __ Sqabs(s26, s2);
5700 __ Sqabs(d27, d3);
5701
5702 END();
5703
5704 if (CAN_RUN()) {
5705 RUN();
5706 ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100017f7f7f, q16);
5707 ASSERT_EQUAL_128(0x7f0100017f7f7f01, 0x7f7f0100017f7f7f, q17);
5708 ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
5709 ASSERT_EQUAL_128(0x7fff7fff00010000, 0x0001000000017fff, q19);
5710 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
5711 ASSERT_EQUAL_128(0x7fffffff00000001, 0x000000007fffffff, q21);
5712 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q22);
5713 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
5714
5715 ASSERT_EQUAL_128(0, 0x7f, q24);
5716 ASSERT_EQUAL_128(0, 0x7fff, q25);
5717 ASSERT_EQUAL_128(0, 0x7fffffff, q26);
5718 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
5719 }
5720 }
5721
TEST(neon_2regmisc_suqadd)5722 TEST(neon_2regmisc_suqadd) {
5723 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5724
5725 START();
5726
5727 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5728 __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f0180ff);
5729
5730 __ Movi(v2.V2D(), 0x80008001ffff0000, 0xffff000000017ffd);
5731 __ Movi(v3.V2D(), 0xffff000080008001, 0x00017fffffff0001);
5732
5733 __ Movi(v4.V2D(), 0x80000000fffffffe, 0xfffffff17ffffffe);
5734 __ Movi(v5.V2D(), 0xffffffff80000000, 0x7fffffff00000002);
5735
5736 __ Movi(v6.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5737 __ Movi(v7.V2D(), 0x8000000000000000, 0x8000000000000002);
5738
5739 __ Mov(v16.V2D(), v0.V2D());
5740 __ Mov(v17.V2D(), v0.V2D());
5741 __ Mov(v18.V2D(), v2.V2D());
5742 __ Mov(v19.V2D(), v2.V2D());
5743 __ Mov(v20.V2D(), v4.V2D());
5744 __ Mov(v21.V2D(), v4.V2D());
5745 __ Mov(v22.V2D(), v6.V2D());
5746
5747 __ Mov(v23.V2D(), v0.V2D());
5748 __ Mov(v24.V2D(), v2.V2D());
5749 __ Mov(v25.V2D(), v4.V2D());
5750 __ Mov(v26.V2D(), v6.V2D());
5751
5752 __ Suqadd(v16.V8B(), v1.V8B());
5753 __ Suqadd(v17.V16B(), v1.V16B());
5754 __ Suqadd(v18.V4H(), v3.V4H());
5755 __ Suqadd(v19.V8H(), v3.V8H());
5756 __ Suqadd(v20.V2S(), v5.V2S());
5757 __ Suqadd(v21.V4S(), v5.V4S());
5758 __ Suqadd(v22.V2D(), v7.V2D());
5759
5760 __ Suqadd(b23, b1);
5761 __ Suqadd(h24, h3);
5762 __ Suqadd(s25, s5);
5763 __ Suqadd(d26, d7);
5764
5765 END();
5766
5767 if (CAN_RUN()) {
5768 RUN();
5769 ASSERT_EQUAL_128(0x0000000000000000, 0x81817f7f7f7f007f, q16);
5770 ASSERT_EQUAL_128(0x7f7f7f7f7f807f7f, 0x81817f7f7f7f007f, q17);
5771 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fff7fff7ffe, q18);
5772 ASSERT_EQUAL_128(0x7fff80017fff7fff, 0x00007fff7fff7ffe, q19);
5773 ASSERT_EQUAL_128(0x0000000000000000, 0x7ffffff07fffffff, q20);
5774 ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x7ffffff07fffffff, q21);
5775 ASSERT_EQUAL_128(0x0000000000000001, 0x7fffffffffffffff, q22);
5776
5777 ASSERT_EQUAL_128(0, 0x7f, q23);
5778 ASSERT_EQUAL_128(0, 0x7ffe, q24);
5779 ASSERT_EQUAL_128(0, 0x7fffffff, q25);
5780 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
5781 }
5782 }
5783
TEST(neon_2regmisc_usqadd)5784 TEST(neon_2regmisc_usqadd) {
5785 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5786
5787 START();
5788
5789 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f7ffe);
5790 __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f018002);
5791
5792 __ Movi(v2.V2D(), 0x80008001fffe0000, 0xffff000000017ffd);
5793 __ Movi(v3.V2D(), 0xffff000000028001, 0x00017fffffff0001);
5794
5795 __ Movi(v4.V2D(), 0x80000000fffffffe, 0x00000001fffffffe);
5796 __ Movi(v5.V2D(), 0xffffffff80000000, 0xfffffffe00000002);
5797
5798 __ Movi(v6.V2D(), 0x8000000000000002, 0x7fffffffffffffff);
5799 __ Movi(v7.V2D(), 0x7fffffffffffffff, 0x8000000000000000);
5800
5801 __ Mov(v16.V2D(), v0.V2D());
5802 __ Mov(v17.V2D(), v0.V2D());
5803 __ Mov(v18.V2D(), v2.V2D());
5804 __ Mov(v19.V2D(), v2.V2D());
5805 __ Mov(v20.V2D(), v4.V2D());
5806 __ Mov(v21.V2D(), v4.V2D());
5807 __ Mov(v22.V2D(), v6.V2D());
5808
5809 __ Mov(v23.V2D(), v0.V2D());
5810 __ Mov(v24.V2D(), v2.V2D());
5811 __ Mov(v25.V2D(), v4.V2D());
5812 __ Mov(v26.V2D(), v6.V2D());
5813
5814 __ Usqadd(v16.V8B(), v1.V8B());
5815 __ Usqadd(v17.V16B(), v1.V16B());
5816 __ Usqadd(v18.V4H(), v3.V4H());
5817 __ Usqadd(v19.V8H(), v3.V8H());
5818 __ Usqadd(v20.V2S(), v5.V2S());
5819 __ Usqadd(v21.V4S(), v5.V4S());
5820 __ Usqadd(v22.V2D(), v7.V2D());
5821
5822 __ Usqadd(b23, b1);
5823 __ Usqadd(h24, h3);
5824 __ Usqadd(s25, s5);
5825 __ Usqadd(d26, d7);
5826
5827 END();
5828
5829 if (CAN_RUN()) {
5830 RUN();
5831 ASSERT_EQUAL_128(0x0000000000000000, 0x81817f00808000ff, q16);
5832 ASSERT_EQUAL_128(0x8080008080808080, 0x81817f00808000ff, q17);
5833 ASSERT_EQUAL_128(0x0000000000000000, 0xffff7fff00007ffe, q18);
5834 ASSERT_EQUAL_128(0x7fff8001ffff0000, 0xffff7fff00007ffe, q19);
5835 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q20);
5836 ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x00000000ffffffff, q21);
5837 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q22);
5838
5839 ASSERT_EQUAL_128(0, 0xff, q23);
5840 ASSERT_EQUAL_128(0, 0x7ffe, q24);
5841 ASSERT_EQUAL_128(0, 0xffffffff, q25);
5842 ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
5843 }
5844 }
5845
TEST(neon_2regmisc_xtn)5846 TEST(neon_2regmisc_xtn) {
5847 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5848
5849 START();
5850
5851 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5852 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5853 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5854 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5855 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5856
5857 __ Xtn(v16.V8B(), v0.V8H());
5858 __ Xtn2(v16.V16B(), v1.V8H());
5859 __ Xtn(v17.V4H(), v1.V4S());
5860 __ Xtn2(v17.V8H(), v2.V4S());
5861 __ Xtn(v18.V2S(), v3.V2D());
5862 __ Xtn2(v18.V4S(), v4.V2D());
5863
5864 END();
5865
5866 if (CAN_RUN()) {
5867 RUN();
5868 ASSERT_EQUAL_128(0x0001ff00ff0001ff, 0x01ff800181007f81, q16);
5869 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8001000000007fff, q17);
5870 ASSERT_EQUAL_128(0x0000000000000000, 0x00000001ffffffff, q18);
5871 }
5872 }
5873
5874
TEST(neon_2regmisc_sqxtn)5875 TEST(neon_2regmisc_sqxtn) {
5876 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5877
5878 START();
5879
5880 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5881 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5882 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5883 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5884 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5885
5886 __ Sqxtn(v16.V8B(), v0.V8H());
5887 __ Sqxtn2(v16.V16B(), v1.V8H());
5888 __ Sqxtn(v17.V4H(), v1.V4S());
5889 __ Sqxtn2(v17.V8H(), v2.V4S());
5890 __ Sqxtn(v18.V2S(), v3.V2D());
5891 __ Sqxtn2(v18.V4S(), v4.V2D());
5892 __ Sqxtn(b19, h0);
5893 __ Sqxtn(h20, s0);
5894 __ Sqxtn(s21, d0);
5895
5896 END();
5897
5898 if (CAN_RUN()) {
5899 RUN();
5900 ASSERT_EQUAL_128(0x8080ff00ff00017f, 0x7f7a807f80807f80, q16);
5901 ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000800080007fff, q17);
5902 ASSERT_EQUAL_128(0x8000000000000000, 0x800000007fffffff, q18);
5903 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
5904 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000007fff, q20);
5905 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
5906 }
5907 }
5908
5909
TEST(neon_2regmisc_uqxtn)5910 TEST(neon_2regmisc_uqxtn) {
5911 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5912
5913 START();
5914
5915 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5916 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5917 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5918 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5919 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5920
5921 __ Uqxtn(v16.V8B(), v0.V8H());
5922 __ Uqxtn2(v16.V16B(), v1.V8H());
5923 __ Uqxtn(v17.V4H(), v1.V4S());
5924 __ Uqxtn2(v17.V8H(), v2.V4S());
5925 __ Uqxtn(v18.V2S(), v3.V2D());
5926 __ Uqxtn2(v18.V4S(), v4.V2D());
5927 __ Uqxtn(b19, h0);
5928 __ Uqxtn(h20, s0);
5929 __ Uqxtn(s21, d0);
5930
5931 END();
5932
5933 if (CAN_RUN()) {
5934 RUN();
5935 ASSERT_EQUAL_128(0xffffff00ff0001ff, 0xff7affffffffffff, q16);
5936 ASSERT_EQUAL_128(0xffffffff0000ffff, 0xffffffffffffffff, q17);
5937 ASSERT_EQUAL_128(0xffffffff00000000, 0xffffffffffffffff, q18);
5938 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000ff, q19);
5939 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
5940 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q21);
5941 }
5942 }
5943
5944
TEST(neon_2regmisc_sqxtun)5945 TEST(neon_2regmisc_sqxtun) {
5946 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5947
5948 START();
5949
5950 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5951 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5952 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5953 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5954 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5955
5956 __ Sqxtun(v16.V8B(), v0.V8H());
5957 __ Sqxtun2(v16.V16B(), v1.V8H());
5958 __ Sqxtun(v17.V4H(), v1.V4S());
5959 __ Sqxtun2(v17.V8H(), v2.V4S());
5960 __ Sqxtun(v18.V2S(), v3.V2D());
5961 __ Sqxtun2(v18.V4S(), v4.V2D());
5962 __ Sqxtun(b19, h0);
5963 __ Sqxtun(h20, s0);
5964 __ Sqxtun(s21, d0);
5965
5966 END();
5967
5968 if (CAN_RUN()) {
5969 RUN();
5970 ASSERT_EQUAL_128(0x00000000000001ff, 0xff7a00ff0000ff00, q16);
5971 ASSERT_EQUAL_128(0x000000000000ffff, 0x000000000000ffff, q17);
5972 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
5973 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
5974 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
5975 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q21);
5976 }
5977 }
5978
TEST(neon_2regmisc_xtn_regression_test)5979 TEST(neon_2regmisc_xtn_regression_test) {
5980 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5981
5982 START();
5983
5984 __ Movi(v0.V2D(), 0x5555555555555555, 0x5555555555555555);
5985 __ Movi(v1.V2D(), 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
5986 __ Movi(v2.V2D(), 0x5555555555555555, 0x5555555555555555);
5987 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
5988 __ Movi(v4.V2D(), 0x5555555555555555, 0x5555555555555555);
5989 __ Movi(v5.V2D(), 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
5990 __ Movi(v6.V2D(), 0x5555555555555555, 0x5555555555555555);
5991 __ Movi(v7.V2D(), 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
5992
5993 __ Xtn(v0.V2S(), v0.V2D());
5994 __ Xtn2(v1.V4S(), v1.V2D());
5995 __ Sqxtn(v2.V2S(), v2.V2D());
5996 __ Sqxtn2(v3.V4S(), v3.V2D());
5997 __ Uqxtn(v4.V2S(), v4.V2D());
5998 __ Uqxtn2(v5.V4S(), v5.V2D());
5999 __ Sqxtun(v6.V2S(), v6.V2D());
6000 __ Sqxtun2(v7.V4S(), v7.V2D());
6001
6002 END();
6003
6004 if (CAN_RUN()) {
6005 RUN();
6006 ASSERT_EQUAL_128(0x0000000000000000, 0x5555555555555555, q0);
6007 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa, q1);
6008 ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffff7fffffff, q2);
6009 ASSERT_EQUAL_128(0x8000000080000000, 0xaaaaaaaaaaaaaaaa, q3);
6010 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q4);
6011 ASSERT_EQUAL_128(0xffffffffffffffff, 0xaaaaaaaaaaaaaaaa, q5);
6012 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q6);
6013 ASSERT_EQUAL_128(0x0000000000000000, 0xaaaaaaaaaaaaaaaa, q7);
6014 }
6015 }
6016
TEST(neon_3same_and)6017 TEST(neon_3same_and) {
6018 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6019
6020 START();
6021
6022 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6023 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6024
6025 __ And(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6026 __ And(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6027 __ And(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6028 __ And(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6029 END();
6030
6031 if (CAN_RUN()) {
6032 RUN();
6033 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
6034 ASSERT_EQUAL_128(0x0000000000555500, 0xaa00aa00005500aa, q17);
6035 ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
6036 ASSERT_EQUAL_128(0, 0xaa00aa00005500aa, q25);
6037 }
6038 }
6039
TEST(neon_3same_bic)6040 TEST(neon_3same_bic) {
6041 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6042
6043 START();
6044
6045 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6046 __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
6047
6048 __ Bic(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6049 __ Bic(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6050 __ Bic(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6051 __ Bic(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6052 END();
6053
6054 if (CAN_RUN()) {
6055 RUN();
6056 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
6057 ASSERT_EQUAL_128(0xff00005500aa5500, 0x0000aa0000005500, q17);
6058 ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
6059 ASSERT_EQUAL_128(0, 0x0000aa0000005500, q25);
6060 }
6061 }
6062
TEST(neon_3same_orr)6063 TEST(neon_3same_orr) {
6064 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6065
6066 START();
6067
6068 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6069 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6070
6071 __ Orr(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6072 __ Orr(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6073 __ Orr(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6074 __ Orr(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6075 END();
6076
6077 if (CAN_RUN()) {
6078 RUN();
6079 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
6080 ASSERT_EQUAL_128(0xffaaffffffffffaa, 0xff55ff5555ff55ff, q17);
6081 ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
6082 ASSERT_EQUAL_128(0, 0xff55ff5555ff55ff, q25);
6083 }
6084 }
6085
TEST(neon_3same_mov)6086 TEST(neon_3same_mov) {
6087 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6088
6089 START();
6090
6091 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6092
6093 __ Mov(v16.V16B(), v0.V16B());
6094 __ Mov(v17.V8H(), v0.V8H());
6095 __ Mov(v18.V4S(), v0.V4S());
6096 __ Mov(v19.V2D(), v0.V2D());
6097
6098 __ Mov(v24.V8B(), v0.V8B());
6099 __ Mov(v25.V4H(), v0.V4H());
6100 __ Mov(v26.V2S(), v0.V2S());
6101 END();
6102
6103 if (CAN_RUN()) {
6104 RUN();
6105
6106 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
6107 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
6108 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q18);
6109 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q19);
6110
6111 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q24);
6112 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q25);
6113 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q26);
6114 }
6115 }
6116
TEST(neon_3same_orn)6117 TEST(neon_3same_orn) {
6118 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6119
6120 START();
6121
6122 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6123 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6124
6125 __ Orn(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6126 __ Orn(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6127 __ Orn(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6128 __ Orn(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6129 END();
6130
6131 if (CAN_RUN()) {
6132 RUN();
6133 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
6134 ASSERT_EQUAL_128(0xff55aa5500ff55ff, 0xffaaaaffaaffffaa, q17);
6135 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q24);
6136 ASSERT_EQUAL_128(0, 0xffaaaaffaaffffaa, q25);
6137 }
6138 }
6139
TEST(neon_3same_eor)6140 TEST(neon_3same_eor) {
6141 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6142
6143 START();
6144
6145 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6146 __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
6147
6148 __ Eor(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6149 __ Eor(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6150 __ Eor(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6151 __ Eor(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6152 END();
6153
6154 if (CAN_RUN()) {
6155 RUN();
6156 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
6157 ASSERT_EQUAL_128(0xffff0055aaaaff55, 0x00ffaa0000005555, q17);
6158 ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
6159 ASSERT_EQUAL_128(0, 0x00ffaa0000005555, q25);
6160 }
6161 }
6162
TEST(neon_3same_bif)6163 TEST(neon_3same_bif) {
6164 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6165
6166 START();
6167
6168 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6169 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6170 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6171
6172 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6173 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6174 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6175
6176 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6177 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6178 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6179
6180 __ Bif(v16.V16B(), v0.V16B(), v1.V16B());
6181 __ Bif(v17.V16B(), v2.V16B(), v3.V16B());
6182 __ Bif(v18.V8B(), v4.V8B(), v5.V8B());
6183 END();
6184
6185 if (CAN_RUN()) {
6186 RUN();
6187
6188 ASSERT_EQUAL_128(0xffffff00ff0055ff, 0xffaa0055aa00aaaa, q16);
6189 ASSERT_EQUAL_128(0x5555ffffffcccc00, 0xff333300fff0f000, q17);
6190 ASSERT_EQUAL_128(0, 0xf0f0f0f0f00f0ff0, q18);
6191 }
6192 }
6193
TEST(neon_3same_bit)6194 TEST(neon_3same_bit) {
6195 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6196
6197 START();
6198
6199 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6200 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6201 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6202
6203 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6204 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6205 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6206
6207 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6208 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6209 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6210
6211 __ Bit(v16.V16B(), v0.V16B(), v1.V16B());
6212 __ Bit(v17.V16B(), v2.V16B(), v3.V16B());
6213 __ Bit(v18.V8B(), v4.V8B(), v5.V8B());
6214 END();
6215
6216 if (CAN_RUN()) {
6217 RUN();
6218
6219 ASSERT_EQUAL_128(0xff000000ff00ff55, 0xaaff550000aaaaaa, q16);
6220 ASSERT_EQUAL_128(0x55550000cc00ffcc, 0x3300ff33f000fff0, q17);
6221 ASSERT_EQUAL_128(0, 0xf0f0f0f00ff0f00f, q18);
6222 }
6223 }
6224
TEST(neon_3same_bsl)6225 TEST(neon_3same_bsl) {
6226 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6227
6228 START();
6229
6230 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6231 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6232 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6233
6234 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6235 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6236 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6237
6238 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6239 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6240 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6241
6242 __ Bsl(v16.V16B(), v0.V16B(), v1.V16B());
6243 __ Bsl(v17.V16B(), v2.V16B(), v3.V16B());
6244 __ Bsl(v18.V8B(), v4.V8B(), v5.V8B());
6245 END();
6246
6247 if (CAN_RUN()) {
6248 RUN();
6249
6250 ASSERT_EQUAL_128(0xff0000ffff005555, 0xaaaa55aa55aaffaa, q16);
6251 ASSERT_EQUAL_128(0xff550000cc33ff00, 0x33ccff00f00fff00, q17);
6252 ASSERT_EQUAL_128(0, 0xf0fffff000f0f000, q18);
6253 }
6254 }
6255
6256
TEST(neon_3same_smax)6257 TEST(neon_3same_smax) {
6258 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6259
6260 START();
6261
6262 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6263 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6264
6265 __ Smax(v16.V8B(), v0.V8B(), v1.V8B());
6266 __ Smax(v18.V4H(), v0.V4H(), v1.V4H());
6267 __ Smax(v20.V2S(), v0.V2S(), v1.V2S());
6268
6269 __ Smax(v17.V16B(), v0.V16B(), v1.V16B());
6270 __ Smax(v19.V8H(), v0.V8H(), v1.V8H());
6271 __ Smax(v21.V4S(), v0.V4S(), v1.V4S());
6272 END();
6273
6274 if (CAN_RUN()) {
6275 RUN();
6276
6277 ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
6278 ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
6279 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6280 ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
6281 ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
6282 ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
6283 }
6284 }
6285
6286
TEST(neon_3same_smaxp)6287 TEST(neon_3same_smaxp) {
6288 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6289
6290 START();
6291
6292 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6293 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6294
6295 __ Smaxp(v16.V8B(), v0.V8B(), v1.V8B());
6296 __ Smaxp(v18.V4H(), v0.V4H(), v1.V4H());
6297 __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
6298
6299 __ Smaxp(v17.V16B(), v0.V16B(), v1.V16B());
6300 __ Smaxp(v19.V8H(), v0.V8H(), v1.V8H());
6301 __ Smaxp(v21.V4S(), v0.V4S(), v1.V4S());
6302 END();
6303
6304 if (CAN_RUN()) {
6305 RUN();
6306
6307 ASSERT_EQUAL_128(0x0, 0x0000ff55ffff0055, q16);
6308 ASSERT_EQUAL_128(0x0, 0x000055ffffff0000, q18);
6309 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6310 ASSERT_EQUAL_128(0x5555aaaa0000ff55, 0xaaaa5555ffff0055, q17);
6311 ASSERT_EQUAL_128(0x55aaaaaa000055ff, 0xaaaa5555ffff0000, q19);
6312 ASSERT_EQUAL_128(0x55aa555500000000, 0x555555550000aa55, q21);
6313 }
6314 }
6315
6316
TEST(neon_addp_scalar)6317 TEST(neon_addp_scalar) {
6318 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6319
6320 START();
6321
6322 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6323 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6324 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6325
6326 __ Addp(d16, v0.V2D());
6327 __ Addp(d17, v1.V2D());
6328 __ Addp(d18, v2.V2D());
6329
6330 END();
6331
6332 if (CAN_RUN()) {
6333 RUN();
6334
6335 ASSERT_EQUAL_128(0x0, 0x00224466ef66fa80, q16);
6336 ASSERT_EQUAL_128(0x0, 0x55aa5556aa5500a9, q17);
6337 ASSERT_EQUAL_128(0x0, 0xaaaaaaa96655ff55, q18);
6338 }
6339 }
6340
TEST(neon_acrosslanes_addv)6341 TEST(neon_acrosslanes_addv) {
6342 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6343
6344 START();
6345
6346 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6347 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6348 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6349
6350 __ Addv(b16, v0.V8B());
6351 __ Addv(b17, v0.V16B());
6352 __ Addv(h18, v1.V4H());
6353 __ Addv(h19, v1.V8H());
6354 __ Addv(s20, v2.V4S());
6355
6356 END();
6357
6358 if (CAN_RUN()) {
6359 RUN();
6360
6361 ASSERT_EQUAL_128(0x0, 0xc7, q16);
6362 ASSERT_EQUAL_128(0x0, 0x99, q17);
6363 ASSERT_EQUAL_128(0x0, 0x55a9, q18);
6364 ASSERT_EQUAL_128(0x0, 0x55fc, q19);
6365 ASSERT_EQUAL_128(0x0, 0x1100a9fe, q20);
6366 }
6367 }
6368
6369
TEST(neon_acrosslanes_saddlv)6370 TEST(neon_acrosslanes_saddlv) {
6371 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6372
6373 START();
6374
6375 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6376 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6377 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6378
6379 __ Saddlv(h16, v0.V8B());
6380 __ Saddlv(h17, v0.V16B());
6381 __ Saddlv(s18, v1.V4H());
6382 __ Saddlv(s19, v1.V8H());
6383 __ Saddlv(d20, v2.V4S());
6384
6385 END();
6386
6387 if (CAN_RUN()) {
6388 RUN();
6389
6390 ASSERT_EQUAL_128(0x0, 0xffc7, q16);
6391 ASSERT_EQUAL_128(0x0, 0xff99, q17);
6392 ASSERT_EQUAL_128(0x0, 0x000055a9, q18);
6393 ASSERT_EQUAL_128(0x0, 0x000055fc, q19);
6394 ASSERT_EQUAL_128(0x0, 0x0000001100a9fe, q20);
6395 }
6396 }
6397
6398
TEST(neon_acrosslanes_uaddlv)6399 TEST(neon_acrosslanes_uaddlv) {
6400 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6401
6402 START();
6403
6404 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6405 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6406 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6407
6408 __ Uaddlv(h16, v0.V8B());
6409 __ Uaddlv(h17, v0.V16B());
6410 __ Uaddlv(s18, v1.V4H());
6411 __ Uaddlv(s19, v1.V8H());
6412 __ Uaddlv(d20, v2.V4S());
6413
6414 END();
6415
6416 if (CAN_RUN()) {
6417 RUN();
6418
6419 ASSERT_EQUAL_128(0x0, 0x02c7, q16);
6420 ASSERT_EQUAL_128(0x0, 0x0599, q17);
6421 ASSERT_EQUAL_128(0x0, 0x000155a9, q18);
6422 ASSERT_EQUAL_128(0x0, 0x000355fc, q19);
6423 ASSERT_EQUAL_128(0x0, 0x000000021100a9fe, q20);
6424 }
6425 }
6426
6427
TEST(neon_acrosslanes_smaxv)6428 TEST(neon_acrosslanes_smaxv) {
6429 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6430
6431 START();
6432
6433 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6434 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6435 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6436
6437 __ Smaxv(b16, v0.V8B());
6438 __ Smaxv(b17, v0.V16B());
6439 __ Smaxv(h18, v1.V4H());
6440 __ Smaxv(h19, v1.V8H());
6441 __ Smaxv(s20, v2.V4S());
6442
6443 END();
6444
6445 if (CAN_RUN()) {
6446 RUN();
6447
6448 ASSERT_EQUAL_128(0x0, 0x33, q16);
6449 ASSERT_EQUAL_128(0x0, 0x44, q17);
6450 ASSERT_EQUAL_128(0x0, 0x55ff, q18);
6451 ASSERT_EQUAL_128(0x0, 0x55ff, q19);
6452 ASSERT_EQUAL_128(0x0, 0x66555555, q20);
6453 }
6454 }
6455
6456
TEST(neon_acrosslanes_sminv)6457 TEST(neon_acrosslanes_sminv) {
6458 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6459
6460 START();
6461
6462 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6463 __ Movi(v1.V2D(), 0xfffa5555aaaaaaaa, 0x00000000ffaa55ff);
6464 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6465
6466 __ Sminv(b16, v0.V8B());
6467 __ Sminv(b17, v0.V16B());
6468 __ Sminv(h18, v1.V4H());
6469 __ Sminv(h19, v1.V8H());
6470 __ Sminv(s20, v2.V4S());
6471
6472 END();
6473
6474 if (CAN_RUN()) {
6475 RUN();
6476
6477 ASSERT_EQUAL_128(0x0, 0xaa, q16);
6478 ASSERT_EQUAL_128(0x0, 0x80, q17);
6479 ASSERT_EQUAL_128(0x0, 0xffaa, q18);
6480 ASSERT_EQUAL_128(0x0, 0xaaaa, q19);
6481 ASSERT_EQUAL_128(0x0, 0xaaaaaaaa, q20);
6482 }
6483 }
6484
TEST(neon_acrosslanes_umaxv)6485 TEST(neon_acrosslanes_umaxv) {
6486 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6487
6488 START();
6489
6490 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6491 __ Movi(v1.V2D(), 0x55aa5555aaaaffab, 0x00000000ffaa55ff);
6492 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6493
6494 __ Umaxv(b16, v0.V8B());
6495 __ Umaxv(b17, v0.V16B());
6496 __ Umaxv(h18, v1.V4H());
6497 __ Umaxv(h19, v1.V8H());
6498 __ Umaxv(s20, v2.V4S());
6499
6500 END();
6501
6502 if (CAN_RUN()) {
6503 RUN();
6504
6505 ASSERT_EQUAL_128(0x0, 0xfc, q16);
6506 ASSERT_EQUAL_128(0x0, 0xfe, q17);
6507 ASSERT_EQUAL_128(0x0, 0xffaa, q18);
6508 ASSERT_EQUAL_128(0x0, 0xffab, q19);
6509 ASSERT_EQUAL_128(0x0, 0xffffffff, q20);
6510 }
6511 }
6512
6513
TEST(neon_acrosslanes_uminv)6514 TEST(neon_acrosslanes_uminv) {
6515 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6516
6517 START();
6518
6519 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x02112233aabbfc01);
6520 __ Movi(v1.V2D(), 0xfffa5555aaaa0000, 0x00010003ffaa55ff);
6521 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6522
6523 __ Uminv(b16, v0.V8B());
6524 __ Uminv(b17, v0.V16B());
6525 __ Uminv(h18, v1.V4H());
6526 __ Uminv(h19, v1.V8H());
6527 __ Uminv(s20, v2.V4S());
6528
6529 END();
6530
6531 if (CAN_RUN()) {
6532 RUN();
6533
6534 ASSERT_EQUAL_128(0x0, 0x01, q16);
6535 ASSERT_EQUAL_128(0x0, 0x00, q17);
6536 ASSERT_EQUAL_128(0x0, 0x0001, q18);
6537 ASSERT_EQUAL_128(0x0, 0x0000, q19);
6538 ASSERT_EQUAL_128(0x0, 0x0000aa00, q20);
6539 }
6540 }
6541
6542
TEST(neon_3same_smin)6543 TEST(neon_3same_smin) {
6544 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6545
6546 START();
6547
6548 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6549 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6550
6551 __ Smin(v16.V8B(), v0.V8B(), v1.V8B());
6552 __ Smin(v18.V4H(), v0.V4H(), v1.V4H());
6553 __ Smin(v20.V2S(), v0.V2S(), v1.V2S());
6554
6555 __ Smin(v17.V16B(), v0.V16B(), v1.V16B());
6556 __ Smin(v19.V8H(), v0.V8H(), v1.V8H());
6557 __ Smin(v21.V4S(), v0.V4S(), v1.V4S());
6558 END();
6559
6560 if (CAN_RUN()) {
6561 RUN();
6562
6563 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
6564 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
6565 ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
6566 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
6567 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
6568 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
6569 }
6570 }
6571
6572
TEST(neon_3same_umax)6573 TEST(neon_3same_umax) {
6574 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6575
6576 START();
6577
6578 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6579 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6580
6581 __ Umax(v16.V8B(), v0.V8B(), v1.V8B());
6582 __ Umax(v18.V4H(), v0.V4H(), v1.V4H());
6583 __ Umax(v20.V2S(), v0.V2S(), v1.V2S());
6584
6585 __ Umax(v17.V16B(), v0.V16B(), v1.V16B());
6586 __ Umax(v19.V8H(), v0.V8H(), v1.V8H());
6587 __ Umax(v21.V4S(), v0.V4S(), v1.V4S());
6588 END();
6589
6590 if (CAN_RUN()) {
6591 RUN();
6592
6593 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
6594 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
6595 ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
6596 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
6597 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
6598 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
6599 }
6600 }
6601
6602
TEST(neon_3same_umin)6603 TEST(neon_3same_umin) {
6604 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6605
6606 START();
6607
6608 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6609 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6610
6611 __ Umin(v16.V8B(), v0.V8B(), v1.V8B());
6612 __ Umin(v18.V4H(), v0.V4H(), v1.V4H());
6613 __ Umin(v20.V2S(), v0.V2S(), v1.V2S());
6614
6615 __ Umin(v17.V16B(), v0.V16B(), v1.V16B());
6616 __ Umin(v19.V8H(), v0.V8H(), v1.V8H());
6617 __ Umin(v21.V4S(), v0.V4S(), v1.V4S());
6618 END();
6619
6620 if (CAN_RUN()) {
6621 RUN();
6622
6623 ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
6624 ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
6625 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6626 ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
6627 ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
6628 ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
6629 }
6630 }
6631
6632
TEST(neon_3same_extra_fcadd)6633 TEST(neon_3same_extra_fcadd) {
6634 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6635
6636 START();
6637
6638 // (0i, 5) (d)
6639 __ Movi(v0.V2D(), 0x0, 0x4014000000000000);
6640 // (5i, 0) (d)
6641 __ Movi(v1.V2D(), 0x4014000000000000, 0x0);
6642 // (10i, 10) (d)
6643 __ Movi(v2.V2D(), 0x4024000000000000, 0x4024000000000000);
6644 // (5i, 5), (5i, 5) (f)
6645 __ Movi(v3.V2D(), 0x40A0000040A00000, 0x40A0000040A00000);
6646 // (5i, 5), (0i, 0) (f)
6647 __ Movi(v4.V2D(), 0x40A0000040A00000, 0x0);
6648 // 324567i, 16000 (f)
6649 __ Movi(v5.V2D(), 0x0, 0x489E7AE0467A0000);
6650
6651 // Subtraction (10, 10) - (5, 5) == (5, 5)
6652 __ Fcadd(v31.V2D(), v2.V2D(), v1.V2D(), 90);
6653 __ Fcadd(v31.V2D(), v31.V2D(), v0.V2D(), 270);
6654
6655 // Addition (10, 10) + (5, 5) == (15, 15)
6656 __ Fcadd(v30.V2D(), v2.V2D(), v1.V2D(), 270);
6657 __ Fcadd(v30.V2D(), v30.V2D(), v0.V2D(), 90);
6658
6659 // 2S
6660 __ Fcadd(v29.V2S(), v4.V2S(), v5.V2S(), 90);
6661 __ Fcadd(v28.V2S(), v4.V2S(), v5.V2S(), 270);
6662
6663 // 4S
6664 __ Fcadd(v27.V4S(), v3.V4S(), v4.V4S(), 90);
6665 __ Fcadd(v26.V4S(), v3.V4S(), v4.V4S(), 270);
6666
6667 END();
6668
6669 if (CAN_RUN()) {
6670 RUN();
6671 ASSERT_EQUAL_128(0x4014000000000000, 0x4014000000000000, q31);
6672 ASSERT_EQUAL_128(0x402E000000000000, 0x402E000000000000, q30);
6673 ASSERT_EQUAL_128(0x0, 0x467a0000c89e7ae0, q29); // (16000i, -324567)
6674 ASSERT_EQUAL_128(0x0, 0xc67a0000489e7ae0, q28); // (-16000i, 324567)
6675 ASSERT_EQUAL_128(0x4120000000000000, 0x40A0000040A00000, q27);
6676 ASSERT_EQUAL_128(0x0000000041200000, 0x40A0000040A00000, q26);
6677 }
6678 }
6679
6680
TEST(neon_3same_extra_fcmla)6681 TEST(neon_3same_extra_fcmla) {
6682 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6683
6684 START();
6685
6686 __ Movi(v1.V2D(), 0x0, 0x40A0000040400000); // (5i, 3) (f)
6687 __ Movi(v2.V2D(), 0x0, 0x4040000040A00000); // (3i, 5) (f)
6688
6689 __ Movi(v3.V2D(), 0x0, 0x4000000040400000); // (2i, 3) (f)
6690 __ Movi(v4.V2D(), 0x0, 0x40E000003F800000); // (7i, 1) (f)
6691
6692 __ Movi(v5.V2D(), 0x0, 0x4000000040400000); // (2i, 3) (f)
6693 __ Movi(v6.V2D(), 0x0, 0x408000003F800000); // (4i, 1) (f)
6694
6695 // (1.5i, 2.5), (31.5i, 1024) (f)
6696 __ Movi(v7.V2D(), 0x3FC0000040200000, 0x41FC000044800000);
6697 // (2048i, 412.75), (3645i, 0) (f)
6698 __ Movi(v8.V2D(), 0x4500000043CE6000, 0x4563D00000000000);
6699 // (2000i, 450,000) (d)
6700 __ Movi(v9.V2D(), 0x409F400000000000, 0x411B774000000000);
6701 // (30,000i, 1250) (d)
6702 __ Movi(v10.V2D(), 0x40DD4C0000000000, 0x4093880000000000);
6703
6704 // DST
6705 __ Movi(v24.V2D(), 0x0, 0x0);
6706 __ Movi(v25.V2D(), 0x0, 0x0);
6707 __ Movi(v26.V2D(), 0x0, 0x0);
6708 __ Movi(v27.V2D(), 0x0, 0x0);
6709 __ Movi(v28.V2D(), 0x0, 0x0);
6710 __ Movi(v29.V2D(), 0x0, 0x0);
6711 __ Movi(v30.V2D(), 0x0, 0x0);
6712 __ Movi(v31.V2D(), 0x0, 0x0);
6713
6714 // Full calculations
6715 __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 90);
6716 __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 0);
6717
6718 __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 0);
6719 __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 90);
6720
6721 __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 90);
6722 __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 0);
6723
6724 __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 0);
6725 __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 90);
6726
6727 // Partial checks
6728 __ Fcmla(v27.V2S(), v1.V2S(), v2.V2S(), 0);
6729 __ Fcmla(v26.V2S(), v2.V2S(), v1.V2S(), 0);
6730
6731 __ Fcmla(v25.V4S(), v7.V4S(), v8.V4S(), 270);
6732 __ Fcmla(v24.V4S(), v7.V4S(), v8.V4S(), 180);
6733
6734 END();
6735
6736 if (CAN_RUN()) {
6737 RUN();
6738
6739 ASSERT_EQUAL_128(0x0, 0x4208000000000000, q31); // (34i, 0)
6740 ASSERT_EQUAL_128(0x0, 0x41B80000C1300000, q30); // (23i, -11)
6741 ASSERT_EQUAL_128(0x0, 0x41600000C0A00000, q29); // (14i, -5)
6742
6743 // (13502500000i, 502500000)
6744 ASSERT_EQUAL_128(0x4209267E65000000, 0x41BDF38AA0000000, q28);
6745 ASSERT_EQUAL_128(0x0, 0x4110000041700000, q27); // (9i, 15)
6746 ASSERT_EQUAL_128(0x0, 0x41C8000041700000, q26); // (25i, 15)
6747 // (512i, 1.031875E3), (373248i, 0)
6748 ASSERT_EQUAL_128(0xc41ac80045400000, 0x0000000047e040c0, q25);
6749 // (619.125i, -3072), (0i, -114817.5)
6750 ASSERT_EQUAL_128(0xc5a00000c480fc00, 0xca63d00000000000, q24);
6751 }
6752 }
6753
6754
TEST(neon_byelement_fcmla)6755 TEST(neon_byelement_fcmla) {
6756 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6757
6758 START();
6759
6760 // (5i, 3), (5i, 3) (f)
6761 __ Movi(v1.V2D(), 0x40A0000040400000, 0x40A0000040400000);
6762 // (3i, 5), (3i, 5) (f)
6763 __ Movi(v2.V2D(), 0x4040000040A00000, 0x4040000040A00000);
6764 // (7i, 1), (5i, 3) (f)
6765 __ Movi(v3.V2D(), 0x40E000003F800000, 0x40A0000040400000);
6766 // (4i, 1), (3i, 5) (f)
6767 __ Movi(v4.V2D(), 0x408000003F800000, 0x4040000040A00000);
6768 // (4i, 1), (7i, 1) (f)
6769 __ Movi(v5.V2D(), 0x408000003F800000, 0x40E000003F800000);
6770 // (2i, 3), (0, 0) (f)
6771 __ Movi(v6.V2D(), 0x4000000040400000, 0x0);
6772
6773 // DST
6774 __ Movi(v22.V2D(), 0x0, 0x0);
6775 __ Movi(v23.V2D(), 0x0, 0x0);
6776 __ Movi(v24.V2D(), 0x0, 0x0);
6777 __ Movi(v25.V2D(), 0x0, 0x0);
6778 __ Movi(v26.V2D(), 0x0, 0x0);
6779 __ Movi(v27.V2D(), 0x0, 0x0);
6780 __ Movi(v28.V2D(), 0x0, 0x0);
6781 __ Movi(v29.V2D(), 0x0, 0x0);
6782 __ Movi(v30.V2D(), 0x0, 0x0);
6783 __ Movi(v31.V2D(), 0x0, 0x0);
6784
6785 // Full calculation (pairs)
6786 __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 90);
6787 __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 0);
6788 __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 90);
6789 __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 0);
6790
6791 // Rotations
6792 __ Fcmla(v29.V4S(), v3.V4S(), v4.S(), 1, 0);
6793 __ Fcmla(v28.V4S(), v3.V4S(), v4.S(), 1, 90);
6794 __ Fcmla(v27.V4S(), v3.V4S(), v4.S(), 1, 180);
6795 __ Fcmla(v26.V4S(), v3.V4S(), v4.S(), 1, 270);
6796 __ Fcmla(v25.V4S(), v3.V4S(), v4.S(), 0, 270);
6797 __ Fcmla(v24.V4S(), v3.V4S(), v4.S(), 0, 180);
6798 __ Fcmla(v23.V4S(), v3.V4S(), v4.S(), 0, 90);
6799 __ Fcmla(v22.V4S(), v3.V4S(), v4.S(), 0, 0);
6800
6801 END();
6802
6803 if (CAN_RUN()) {
6804 RUN();
6805 // (34i, 0), (34i, 0)
6806 ASSERT_EQUAL_128(0x4208000000000000, 0x4208000000000000, q31);
6807 // (14i, -5), (23i, -11)
6808 ASSERT_EQUAL_128(0x41600000C0A00000, 0x41B80000C1300000, q30);
6809 // (4i, 1), (12i, 3)
6810 ASSERT_EQUAL_128(0x408000003f800000, 0x4140000040400000, q29);
6811 // (7i, -28), (5i, -20)
6812 ASSERT_EQUAL_128(0x40e00000c1e00000, 0x40a00000c1a00000, q28);
6813 // (-4i, -1), (-12i, -3)
6814 ASSERT_EQUAL_128(0xc0800000bf800000, 0xc1400000c0400000, q27);
6815 // (-7i, 28), (-5i, 20)
6816 ASSERT_EQUAL_128(0xc0e0000041e00000, 0xc0a0000041a00000, q26);
6817 // (-35i, 21), (-25i, 15)
6818 ASSERT_EQUAL_128(0xc20c000041a80000, 0xc1c8000041700000, q25);
6819 // (-3i, -5), (-9i, -15)
6820 ASSERT_EQUAL_128(0xc0400000c0a00000, 0xc1100000c1700000, q24);
6821 // (35i, -21), (25i, -15)
6822 ASSERT_EQUAL_128(0x420c0000c1a80000, 0x41c80000c1700000, q23);
6823 // (3i, 5), (9i, 15)
6824 ASSERT_EQUAL_128(0x4040000040a00000, 0x4110000041700000, q22);
6825 }
6826 }
6827
6828
TEST(neon_2regmisc_mvn)6829 TEST(neon_2regmisc_mvn) {
6830 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6831
6832 START();
6833
6834 __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6835
6836 __ Mvn(v16.V16B(), v0.V16B());
6837 __ Mvn(v17.V8H(), v0.V8H());
6838 __ Mvn(v18.V4S(), v0.V4S());
6839 __ Mvn(v19.V2D(), v0.V2D());
6840
6841 __ Mvn(v24.V8B(), v0.V8B());
6842 __ Mvn(v25.V4H(), v0.V4H());
6843 __ Mvn(v26.V2S(), v0.V2S());
6844
6845 END();
6846
6847 if (CAN_RUN()) {
6848 RUN();
6849
6850 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
6851 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q17);
6852 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q18);
6853 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q19);
6854
6855 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q24);
6856 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q25);
6857 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q26);
6858 }
6859 }
6860
6861
TEST(neon_2regmisc_not)6862 TEST(neon_2regmisc_not) {
6863 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6864
6865 START();
6866
6867 __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6868 __ Movi(v1.V2D(), 0, 0x00ffff0000ffff00);
6869
6870 __ Not(v16.V16B(), v0.V16B());
6871 __ Not(v17.V8B(), v1.V8B());
6872 END();
6873
6874 if (CAN_RUN()) {
6875 RUN();
6876
6877 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
6878 ASSERT_EQUAL_128(0x0, 0xff0000ffff0000ff, q17);
6879 }
6880 }
6881
6882
TEST(neon_2regmisc_cls_clz_cnt)6883 TEST(neon_2regmisc_cls_clz_cnt) {
6884 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6885
6886 START();
6887
6888 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6889 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6890
6891 __ Cls(v16.V8B(), v1.V8B());
6892 __ Cls(v17.V16B(), v1.V16B());
6893 __ Cls(v18.V4H(), v1.V4H());
6894 __ Cls(v19.V8H(), v1.V8H());
6895 __ Cls(v20.V2S(), v1.V2S());
6896 __ Cls(v21.V4S(), v1.V4S());
6897
6898 __ Clz(v22.V8B(), v0.V8B());
6899 __ Clz(v23.V16B(), v0.V16B());
6900 __ Clz(v24.V4H(), v0.V4H());
6901 __ Clz(v25.V8H(), v0.V8H());
6902 __ Clz(v26.V2S(), v0.V2S());
6903 __ Clz(v27.V4S(), v0.V4S());
6904
6905 __ Cnt(v28.V8B(), v0.V8B());
6906 __ Cnt(v29.V16B(), v1.V16B());
6907
6908 END();
6909
6910 if (CAN_RUN()) {
6911 RUN();
6912
6913 ASSERT_EQUAL_128(0x0000000000000000, 0x0601000000000102, q16);
6914 ASSERT_EQUAL_128(0x0601000000000102, 0x0601000000000102, q17);
6915 ASSERT_EQUAL_128(0x0000000000000000, 0x0006000000000001, q18);
6916 ASSERT_EQUAL_128(0x0006000000000001, 0x0006000000000001, q19);
6917 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000600000000, q20);
6918 ASSERT_EQUAL_128(0x0000000600000000, 0x0000000600000000, q21);
6919
6920 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q22);
6921 ASSERT_EQUAL_128(0x0807060605050505, 0x0404040404040404, q23);
6922 ASSERT_EQUAL_128(0x0000000000000000, 0x0004000400040004, q24);
6923 ASSERT_EQUAL_128(0x000f000600050005, 0x0004000400040004, q25);
6924 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000400000004, q26);
6925 ASSERT_EQUAL_128(0x0000000f00000005, 0x0000000400000004, q27);
6926
6927 ASSERT_EQUAL_128(0x0000000000000000, 0x0102020302030304, q28);
6928 ASSERT_EQUAL_128(0x0705050305030301, 0x0103030503050507, q29);
6929 }
6930 }
6931
TEST(neon_2regmisc_rev)6932 TEST(neon_2regmisc_rev) {
6933 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6934
6935 START();
6936
6937 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6938 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6939
6940 __ Rev16(v16.V8B(), v0.V8B());
6941 __ Rev16(v17.V16B(), v0.V16B());
6942
6943 __ Rev32(v18.V8B(), v0.V8B());
6944 __ Rev32(v19.V16B(), v0.V16B());
6945 __ Rev32(v20.V4H(), v0.V4H());
6946 __ Rev32(v21.V8H(), v0.V8H());
6947
6948 __ Rev64(v22.V8B(), v0.V8B());
6949 __ Rev64(v23.V16B(), v0.V16B());
6950 __ Rev64(v24.V4H(), v0.V4H());
6951 __ Rev64(v25.V8H(), v0.V8H());
6952 __ Rev64(v26.V2S(), v0.V2S());
6953 __ Rev64(v27.V4S(), v0.V4S());
6954
6955 __ Rbit(v28.V8B(), v1.V8B());
6956 __ Rbit(v29.V16B(), v1.V16B());
6957
6958 END();
6959
6960 if (CAN_RUN()) {
6961 RUN();
6962
6963 ASSERT_EQUAL_128(0x0000000000000000, 0x09080b0a0d0c0f0e, q16);
6964 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q17);
6965
6966 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a09080f0e0d0c, q18);
6967 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q19);
6968 ASSERT_EQUAL_128(0x0000000000000000, 0x0a0b08090e0f0c0d, q20);
6969 ASSERT_EQUAL_128(0x0203000106070405, 0x0a0b08090e0f0c0d, q21);
6970
6971 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0b0a0908, q22);
6972 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q23);
6973 ASSERT_EQUAL_128(0x0000000000000000, 0x0e0f0c0d0a0b0809, q24);
6974 ASSERT_EQUAL_128(0x0607040502030001, 0x0e0f0c0d0a0b0809, q25);
6975 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0d0e0f08090a0b, q26);
6976 ASSERT_EQUAL_128(0x0405060700010203, 0x0c0d0e0f08090a0b, q27);
6977
6978 ASSERT_EQUAL_128(0x0000000000000000, 0x80c4a2e691d5b3f7, q28);
6979 ASSERT_EQUAL_128(0x7f3b5d196e2a4c08, 0x80c4a2e691d5b3f7, q29);
6980 }
6981 }
6982
6983
TEST(neon_sli)6984 TEST(neon_sli) {
6985 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6986
6987 START();
6988
6989 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6990 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6991
6992 __ Mov(v16.V2D(), v0.V2D());
6993 __ Mov(v17.V2D(), v0.V2D());
6994 __ Mov(v18.V2D(), v0.V2D());
6995 __ Mov(v19.V2D(), v0.V2D());
6996 __ Mov(v20.V2D(), v0.V2D());
6997 __ Mov(v21.V2D(), v0.V2D());
6998 __ Mov(v22.V2D(), v0.V2D());
6999 __ Mov(v23.V2D(), v0.V2D());
7000
7001 __ Sli(v16.V8B(), v1.V8B(), 4);
7002 __ Sli(v17.V16B(), v1.V16B(), 7);
7003 __ Sli(v18.V4H(), v1.V4H(), 8);
7004 __ Sli(v19.V8H(), v1.V8H(), 15);
7005 __ Sli(v20.V2S(), v1.V2S(), 0);
7006 __ Sli(v21.V4S(), v1.V4S(), 31);
7007 __ Sli(v22.V2D(), v1.V2D(), 48);
7008
7009 __ Sli(d23, d1, 48);
7010
7011 END();
7012
7013 if (CAN_RUN()) {
7014 RUN();
7015
7016 ASSERT_EQUAL_128(0x0000000000000000, 0x18395a7b9cbddeff, q16);
7017 ASSERT_EQUAL_128(0x0001020304050607, 0x88898a8b8c8d8e8f, q17);
7018 ASSERT_EQUAL_128(0x0000000000000000, 0x2309670bab0def0f, q18);
7019 ASSERT_EQUAL_128(0x0001020304050607, 0x88098a0b8c0d8e0f, q19);
7020 ASSERT_EQUAL_128(0x0000000000000000, 0x0123456789abcdef, q20);
7021 ASSERT_EQUAL_128(0x0001020304050607, 0x88090a0b8c0d0e0f, q21);
7022 ASSERT_EQUAL_128(0x3210020304050607, 0xcdef0a0b0c0d0e0f, q22);
7023
7024 ASSERT_EQUAL_128(0x0000000000000000, 0xcdef0a0b0c0d0e0f, q23);
7025 }
7026 }
7027
7028
TEST(neon_sri)7029 TEST(neon_sri) {
7030 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7031
7032 START();
7033
7034 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
7035 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7036
7037 __ Mov(v16.V2D(), v0.V2D());
7038 __ Mov(v17.V2D(), v0.V2D());
7039 __ Mov(v18.V2D(), v0.V2D());
7040 __ Mov(v19.V2D(), v0.V2D());
7041 __ Mov(v20.V2D(), v0.V2D());
7042 __ Mov(v21.V2D(), v0.V2D());
7043 __ Mov(v22.V2D(), v0.V2D());
7044 __ Mov(v23.V2D(), v0.V2D());
7045
7046 __ Sri(v16.V8B(), v1.V8B(), 4);
7047 __ Sri(v17.V16B(), v1.V16B(), 7);
7048 __ Sri(v18.V4H(), v1.V4H(), 8);
7049 __ Sri(v19.V8H(), v1.V8H(), 15);
7050 __ Sri(v20.V2S(), v1.V2S(), 1);
7051 __ Sri(v21.V4S(), v1.V4S(), 31);
7052 __ Sri(v22.V2D(), v1.V2D(), 48);
7053
7054 __ Sri(d23, d1, 48);
7055
7056 END();
7057
7058 if (CAN_RUN()) {
7059 RUN();
7060
7061 ASSERT_EQUAL_128(0x0000000000000000, 0x00020406080a0c0e, q16);
7062 ASSERT_EQUAL_128(0x0101030304040606, 0x08080a0a0d0d0f0f, q17);
7063 ASSERT_EQUAL_128(0x0000000000000000, 0x08010a450c890ecd, q18);
7064 ASSERT_EQUAL_128(0x0001020304040606, 0x08080a0a0c0d0e0f, q19);
7065 ASSERT_EQUAL_128(0x0000000000000000, 0x0091a2b344d5e6f7, q20);
7066 ASSERT_EQUAL_128(0x0001020304050606, 0x08090a0a0c0d0e0f, q21);
7067 ASSERT_EQUAL_128(0x000102030405fedc, 0x08090a0b0c0d0123, q22);
7068
7069 ASSERT_EQUAL_128(0x0000000000000000, 0x08090a0b0c0d0123, q23);
7070 }
7071 }
7072
7073
TEST(neon_shrn)7074 TEST(neon_shrn) {
7075 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7076
7077 START();
7078
7079 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7080 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7081 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7082 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7083 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7084
7085 __ Shrn(v16.V8B(), v0.V8H(), 8);
7086 __ Shrn2(v16.V16B(), v1.V8H(), 1);
7087 __ Shrn(v17.V4H(), v1.V4S(), 16);
7088 __ Shrn2(v17.V8H(), v2.V4S(), 1);
7089 __ Shrn(v18.V2S(), v3.V2D(), 32);
7090 __ Shrn2(v18.V4S(), v3.V2D(), 1);
7091
7092 END();
7093
7094 if (CAN_RUN()) {
7095 RUN();
7096 ASSERT_EQUAL_128(0x0000ff00ff0000ff, 0x7f00817f80ff0180, q16);
7097 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8000ffffffff0001, q17);
7098 ASSERT_EQUAL_128(0x00000000ffffffff, 0x800000007fffffff, q18);
7099 }
7100 }
7101
7102
TEST(neon_rshrn)7103 TEST(neon_rshrn) {
7104 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7105
7106 START();
7107
7108 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7109 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7110 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7111 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7112 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7113
7114 __ Rshrn(v16.V8B(), v0.V8H(), 8);
7115 __ Rshrn2(v16.V16B(), v1.V8H(), 1);
7116 __ Rshrn(v17.V4H(), v1.V4S(), 16);
7117 __ Rshrn2(v17.V8H(), v2.V4S(), 1);
7118 __ Rshrn(v18.V2S(), v3.V2D(), 32);
7119 __ Rshrn2(v18.V4S(), v3.V2D(), 1);
7120
7121 END();
7122
7123 if (CAN_RUN()) {
7124 RUN();
7125 ASSERT_EQUAL_128(0x0001000000000100, 0x7f01827f81ff0181, q16);
7126 ASSERT_EQUAL_128(0x0000000000000000, 0x8001ffffffff0001, q17);
7127 ASSERT_EQUAL_128(0x0000000100000000, 0x8000000080000000, q18);
7128 }
7129 }
7130
7131
TEST(neon_uqshrn)7132 TEST(neon_uqshrn) {
7133 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7134
7135 START();
7136
7137 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7138 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7139 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7140 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7141 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7142
7143 __ Uqshrn(v16.V8B(), v0.V8H(), 8);
7144 __ Uqshrn2(v16.V16B(), v1.V8H(), 1);
7145 __ Uqshrn(v17.V4H(), v1.V4S(), 16);
7146 __ Uqshrn2(v17.V8H(), v2.V4S(), 1);
7147 __ Uqshrn(v18.V2S(), v3.V2D(), 32);
7148 __ Uqshrn2(v18.V4S(), v3.V2D(), 1);
7149
7150 __ Uqshrn(b19, h0, 8);
7151 __ Uqshrn(h20, s1, 16);
7152 __ Uqshrn(s21, d3, 32);
7153
7154 END();
7155
7156 if (CAN_RUN()) {
7157 RUN();
7158 ASSERT_EQUAL_128(0xffffff00ff0000ff, 0x7f00817f80ff0180, q16);
7159 ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8000ffffffff0001, q17);
7160 ASSERT_EQUAL_128(0xffffffffffffffff, 0x800000007fffffff, q18);
7161 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
7162 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7163 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7164 }
7165 }
7166
7167
TEST(neon_uqrshrn)7168 TEST(neon_uqrshrn) {
7169 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7170
7171 START();
7172
7173 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7174 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7175 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7176 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7177 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7178
7179 __ Uqrshrn(v16.V8B(), v0.V8H(), 8);
7180 __ Uqrshrn2(v16.V16B(), v1.V8H(), 1);
7181 __ Uqrshrn(v17.V4H(), v1.V4S(), 16);
7182 __ Uqrshrn2(v17.V8H(), v2.V4S(), 1);
7183 __ Uqrshrn(v18.V2S(), v3.V2D(), 32);
7184 __ Uqrshrn2(v18.V4S(), v3.V2D(), 1);
7185
7186 __ Uqrshrn(b19, h0, 8);
7187 __ Uqrshrn(h20, s1, 16);
7188 __ Uqrshrn(s21, d3, 32);
7189
7190 END();
7191
7192 if (CAN_RUN()) {
7193 RUN();
7194 ASSERT_EQUAL_128(0xffffff00ff0001ff, 0x7f01827f81ff0181, q16);
7195 ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8001ffffffff0001, q17);
7196 ASSERT_EQUAL_128(0xffffffffffffffff, 0x8000000080000000, q18);
7197 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
7198 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7199 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
7200 }
7201 }
7202
7203
TEST(neon_sqshrn)7204 TEST(neon_sqshrn) {
7205 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7206
7207 START();
7208
7209 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7210 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7211 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7212 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7213 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7214
7215 __ Sqshrn(v16.V8B(), v0.V8H(), 8);
7216 __ Sqshrn2(v16.V16B(), v1.V8H(), 1);
7217 __ Sqshrn(v17.V4H(), v1.V4S(), 16);
7218 __ Sqshrn2(v17.V8H(), v2.V4S(), 1);
7219 __ Sqshrn(v18.V2S(), v3.V2D(), 32);
7220 __ Sqshrn2(v18.V4S(), v3.V2D(), 1);
7221
7222 __ Sqshrn(b19, h0, 8);
7223 __ Sqshrn(h20, s1, 16);
7224 __ Sqshrn(s21, d3, 32);
7225
7226 END();
7227
7228 if (CAN_RUN()) {
7229 RUN();
7230 ASSERT_EQUAL_128(0x8080ff00ff00007f, 0x7f00817f80ff0180, q16);
7231 ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000ffffffff0001, q17);
7232 ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
7233 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
7234 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7235 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7236 }
7237 }
7238
7239
TEST(neon_sqrshrn)7240 TEST(neon_sqrshrn) {
7241 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7242
7243 START();
7244
7245 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7246 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7247 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7248 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7249 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7250
7251 __ Sqrshrn(v16.V8B(), v0.V8H(), 8);
7252 __ Sqrshrn2(v16.V16B(), v1.V8H(), 1);
7253 __ Sqrshrn(v17.V4H(), v1.V4S(), 16);
7254 __ Sqrshrn2(v17.V8H(), v2.V4S(), 1);
7255 __ Sqrshrn(v18.V2S(), v3.V2D(), 32);
7256 __ Sqrshrn2(v18.V4S(), v3.V2D(), 1);
7257
7258 __ Sqrshrn(b19, h0, 8);
7259 __ Sqrshrn(h20, s1, 16);
7260 __ Sqrshrn(s21, d3, 32);
7261
7262 END();
7263
7264 if (CAN_RUN()) {
7265 RUN();
7266 ASSERT_EQUAL_128(0x808000000000017f, 0x7f01827f81ff0181, q16);
7267 ASSERT_EQUAL_128(0x8000000000007fff, 0x8001ffffffff0001, q17);
7268 ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
7269 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
7270 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7271 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7272 }
7273 }
7274
7275
TEST(neon_sqshrun)7276 TEST(neon_sqshrun) {
7277 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7278
7279 START();
7280
7281 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7282 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7283 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7284 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7285 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7286
7287 __ Sqshrun(v16.V8B(), v0.V8H(), 8);
7288 __ Sqshrun2(v16.V16B(), v1.V8H(), 1);
7289 __ Sqshrun(v17.V4H(), v1.V4S(), 16);
7290 __ Sqshrun2(v17.V8H(), v2.V4S(), 1);
7291 __ Sqshrun(v18.V2S(), v3.V2D(), 32);
7292 __ Sqshrun2(v18.V4S(), v3.V2D(), 1);
7293
7294 __ Sqshrun(b19, h0, 8);
7295 __ Sqshrun(h20, s1, 16);
7296 __ Sqshrun(s21, d3, 32);
7297
7298 END();
7299
7300 if (CAN_RUN()) {
7301 RUN();
7302 ASSERT_EQUAL_128(0x00000000000000ff, 0x7f00007f00000100, q16);
7303 ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
7304 ASSERT_EQUAL_128(0x00000000ffffffff, 0x000000007fffffff, q18);
7305 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
7306 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7307 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7308 }
7309 }
7310
7311
TEST(neon_sqrshrun)7312 TEST(neon_sqrshrun) {
7313 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7314
7315 START();
7316
7317 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7318 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7319 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7320 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7321 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7322
7323 __ Sqrshrun(v16.V8B(), v0.V8H(), 8);
7324 __ Sqrshrun2(v16.V16B(), v1.V8H(), 1);
7325 __ Sqrshrun(v17.V4H(), v1.V4S(), 16);
7326 __ Sqrshrun2(v17.V8H(), v2.V4S(), 1);
7327 __ Sqrshrun(v18.V2S(), v3.V2D(), 32);
7328 __ Sqrshrun2(v18.V4S(), v3.V2D(), 1);
7329
7330 __ Sqrshrun(b19, h0, 8);
7331 __ Sqrshrun(h20, s1, 16);
7332 __ Sqrshrun(s21, d3, 32);
7333
7334 END();
7335
7336 if (CAN_RUN()) {
7337 RUN();
7338 ASSERT_EQUAL_128(0x00000000000001ff, 0x7f01007f00000100, q16);
7339 ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
7340 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000080000000, q18);
7341 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
7342 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7343 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
7344 }
7345 }
7346
TEST(neon_modimm_bic)7347 TEST(neon_modimm_bic) {
7348 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7349
7350 START();
7351
7352 __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7353 __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7354 __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7355 __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7356 __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7357 __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7358 __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7359 __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7360 __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7361 __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7362 __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7363 __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7364
7365 __ Bic(v16.V4H(), 0x00, 0);
7366 __ Bic(v17.V4H(), 0xff, 8);
7367 __ Bic(v18.V8H(), 0x00, 0);
7368 __ Bic(v19.V8H(), 0xff, 8);
7369
7370 __ Bic(v20.V2S(), 0x00, 0);
7371 __ Bic(v21.V2S(), 0xff, 8);
7372 __ Bic(v22.V2S(), 0x00, 16);
7373 __ Bic(v23.V2S(), 0xff, 24);
7374
7375 __ Bic(v24.V4S(), 0xff, 0);
7376 __ Bic(v25.V4S(), 0x00, 8);
7377 __ Bic(v26.V4S(), 0xff, 16);
7378 __ Bic(v27.V4S(), 0x00, 24);
7379
7380 END();
7381
7382 if (CAN_RUN()) {
7383 RUN();
7384
7385 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
7386 ASSERT_EQUAL_128(0x0, 0x005500ff000000aa, q17);
7387 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
7388 ASSERT_EQUAL_128(0x00aa0055000000aa, 0x005500ff000000aa, q19);
7389
7390 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
7391 ASSERT_EQUAL_128(0x0, 0x555500ff000000aa, q21);
7392 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
7393 ASSERT_EQUAL_128(0x0, 0x0055ffff0000aaaa, q23);
7394
7395 ASSERT_EQUAL_128(0x00aaff00ff005500, 0x5555ff000000aa00, q24);
7396 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
7397 ASSERT_EQUAL_128(0x0000ff55ff0055aa, 0x5500ffff0000aaaa, q26);
7398 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
7399 }
7400 }
7401
7402
TEST(neon_modimm_movi_16bit_any)7403 TEST(neon_modimm_movi_16bit_any) {
7404 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7405
7406 START();
7407
7408 __ Movi(v0.V4H(), 0xabab);
7409 __ Movi(v1.V4H(), 0xab00);
7410 __ Movi(v2.V4H(), 0xabff);
7411 __ Movi(v3.V8H(), 0x00ab);
7412 __ Movi(v4.V8H(), 0xffab);
7413 __ Movi(v5.V8H(), 0xabcd);
7414
7415 END();
7416
7417 if (CAN_RUN()) {
7418 RUN();
7419
7420 ASSERT_EQUAL_128(0x0, 0xabababababababab, q0);
7421 ASSERT_EQUAL_128(0x0, 0xab00ab00ab00ab00, q1);
7422 ASSERT_EQUAL_128(0x0, 0xabffabffabffabff, q2);
7423 ASSERT_EQUAL_128(0x00ab00ab00ab00ab, 0x00ab00ab00ab00ab, q3);
7424 ASSERT_EQUAL_128(0xffabffabffabffab, 0xffabffabffabffab, q4);
7425 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q5);
7426 }
7427 }
7428
7429
TEST(neon_modimm_movi_32bit_any)7430 TEST(neon_modimm_movi_32bit_any) {
7431 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7432
7433 START();
7434
7435 __ Movi(v0.V2S(), 0x000000ab);
7436 __ Movi(v1.V2S(), 0x0000ab00);
7437 __ Movi(v2.V4S(), 0x00ab0000);
7438 __ Movi(v3.V4S(), 0xab000000);
7439
7440 __ Movi(v4.V2S(), 0xffffffab);
7441 __ Movi(v5.V2S(), 0xffffabff);
7442 __ Movi(v6.V4S(), 0xffabffff);
7443 __ Movi(v7.V4S(), 0xabffffff);
7444
7445 __ Movi(v16.V2S(), 0x0000abff);
7446 __ Movi(v17.V2S(), 0x00abffff);
7447 __ Movi(v18.V4S(), 0xffab0000);
7448 __ Movi(v19.V4S(), 0xffffab00);
7449
7450 __ Movi(v20.V4S(), 0xabababab);
7451 __ Movi(v21.V4S(), 0xabcdabcd);
7452 __ Movi(v22.V4S(), 0xabcdef01);
7453 __ Movi(v23.V4S(), 0x00ffff00);
7454
7455 END();
7456
7457 if (CAN_RUN()) {
7458 RUN();
7459
7460 ASSERT_EQUAL_128(0x0, 0x000000ab000000ab, q0);
7461 ASSERT_EQUAL_128(0x0, 0x0000ab000000ab00, q1);
7462 ASSERT_EQUAL_128(0x00ab000000ab0000, 0x00ab000000ab0000, q2);
7463 ASSERT_EQUAL_128(0xab000000ab000000, 0xab000000ab000000, q3);
7464
7465 ASSERT_EQUAL_128(0x0, 0xffffffabffffffab, q4);
7466 ASSERT_EQUAL_128(0x0, 0xffffabffffffabff, q5);
7467 ASSERT_EQUAL_128(0xffabffffffabffff, 0xffabffffffabffff, q6);
7468 ASSERT_EQUAL_128(0xabffffffabffffff, 0xabffffffabffffff, q7);
7469
7470 ASSERT_EQUAL_128(0x0, 0x0000abff0000abff, q16);
7471 ASSERT_EQUAL_128(0x0, 0x00abffff00abffff, q17);
7472 ASSERT_EQUAL_128(0xffab0000ffab0000, 0xffab0000ffab0000, q18);
7473 ASSERT_EQUAL_128(0xffffab00ffffab00, 0xffffab00ffffab00, q19);
7474
7475 ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q20);
7476 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q21);
7477 ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q22);
7478 ASSERT_EQUAL_128(0x00ffff0000ffff00, 0x00ffff0000ffff00, q23);
7479 }
7480 }
7481
7482
TEST(neon_modimm_movi_64bit_any)7483 TEST(neon_modimm_movi_64bit_any) {
7484 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7485
7486 START();
7487
7488 __ Movi(v0.V1D(), 0x00ffff0000ffffff);
7489 __ Movi(v1.V2D(), 0xabababababababab);
7490 __ Movi(v2.V2D(), 0xabcdabcdabcdabcd);
7491 __ Movi(v3.V2D(), 0xabcdef01abcdef01);
7492 __ Movi(v4.V1D(), 0xabcdef0123456789);
7493 __ Movi(v5.V2D(), 0xabcdef0123456789);
7494
7495 END();
7496
7497 if (CAN_RUN()) {
7498 RUN();
7499
7500 ASSERT_EQUAL_64(0x00ffff0000ffffff, d0);
7501 ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q1);
7502 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q2);
7503 ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q3);
7504 ASSERT_EQUAL_64(0xabcdef0123456789, d4);
7505 ASSERT_EQUAL_128(0xabcdef0123456789, 0xabcdef0123456789, q5);
7506 }
7507 }
7508
7509
TEST(neon_modimm_movi)7510 TEST(neon_modimm_movi) {
7511 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7512
7513 START();
7514
7515 __ Movi(v0.V8B(), 0xaa);
7516 __ Movi(v1.V16B(), 0x55);
7517
7518 __ Movi(d2, 0x00ffff0000ffffff);
7519 __ Movi(v3.V2D(), 0x00ffff0000ffffff);
7520
7521 __ Movi(v16.V4H(), 0x00, LSL, 0);
7522 __ Movi(v17.V4H(), 0xff, LSL, 8);
7523 __ Movi(v18.V8H(), 0x00, LSL, 0);
7524 __ Movi(v19.V8H(), 0xff, LSL, 8);
7525
7526 __ Movi(v20.V2S(), 0x00, LSL, 0);
7527 __ Movi(v21.V2S(), 0xff, LSL, 8);
7528 __ Movi(v22.V2S(), 0x00, LSL, 16);
7529 __ Movi(v23.V2S(), 0xff, LSL, 24);
7530
7531 __ Movi(v24.V4S(), 0xff, LSL, 0);
7532 __ Movi(v25.V4S(), 0x00, LSL, 8);
7533 __ Movi(v26.V4S(), 0xff, LSL, 16);
7534 __ Movi(v27.V4S(), 0x00, LSL, 24);
7535
7536 __ Movi(v28.V2S(), 0xaa, MSL, 8);
7537 __ Movi(v29.V2S(), 0x55, MSL, 16);
7538 __ Movi(v30.V4S(), 0xff, MSL, 8);
7539 __ Movi(v31.V4S(), 0x00, MSL, 16);
7540
7541 END();
7542
7543 if (CAN_RUN()) {
7544 RUN();
7545
7546 ASSERT_EQUAL_128(0x0, 0xaaaaaaaaaaaaaaaa, q0);
7547 ASSERT_EQUAL_128(0x5555555555555555, 0x5555555555555555, q1);
7548
7549 ASSERT_EQUAL_128(0x0, 0x00ffff0000ffffff, q2);
7550 ASSERT_EQUAL_128(0x00ffff0000ffffff, 0x00ffff0000ffffff, q3);
7551
7552 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q16);
7553 ASSERT_EQUAL_128(0x0, 0xff00ff00ff00ff00, q17);
7554 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q18);
7555 ASSERT_EQUAL_128(0xff00ff00ff00ff00, 0xff00ff00ff00ff00, q19);
7556
7557 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q20);
7558 ASSERT_EQUAL_128(0x0, 0x0000ff000000ff00, q21);
7559 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q22);
7560 ASSERT_EQUAL_128(0x0, 0xff000000ff000000, q23);
7561
7562 ASSERT_EQUAL_128(0x000000ff000000ff, 0x000000ff000000ff, q24);
7563 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
7564 ASSERT_EQUAL_128(0x00ff000000ff0000, 0x00ff000000ff0000, q26);
7565 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
7566
7567 ASSERT_EQUAL_128(0x0, 0x0000aaff0000aaff, q28);
7568 ASSERT_EQUAL_128(0x0, 0x0055ffff0055ffff, q29);
7569 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q30);
7570 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q31);
7571 }
7572 }
7573
7574
TEST(neon_modimm_mvni)7575 TEST(neon_modimm_mvni) {
7576 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7577
7578 START();
7579
7580 __ Mvni(v16.V4H(), 0x00, LSL, 0);
7581 __ Mvni(v17.V4H(), 0xff, LSL, 8);
7582 __ Mvni(v18.V8H(), 0x00, LSL, 0);
7583 __ Mvni(v19.V8H(), 0xff, LSL, 8);
7584
7585 __ Mvni(v20.V2S(), 0x00, LSL, 0);
7586 __ Mvni(v21.V2S(), 0xff, LSL, 8);
7587 __ Mvni(v22.V2S(), 0x00, LSL, 16);
7588 __ Mvni(v23.V2S(), 0xff, LSL, 24);
7589
7590 __ Mvni(v24.V4S(), 0xff, LSL, 0);
7591 __ Mvni(v25.V4S(), 0x00, LSL, 8);
7592 __ Mvni(v26.V4S(), 0xff, LSL, 16);
7593 __ Mvni(v27.V4S(), 0x00, LSL, 24);
7594
7595 __ Mvni(v28.V2S(), 0xaa, MSL, 8);
7596 __ Mvni(v29.V2S(), 0x55, MSL, 16);
7597 __ Mvni(v30.V4S(), 0xff, MSL, 8);
7598 __ Mvni(v31.V4S(), 0x00, MSL, 16);
7599
7600 END();
7601
7602 if (CAN_RUN()) {
7603 RUN();
7604
7605 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q16);
7606 ASSERT_EQUAL_128(0x0, 0x00ff00ff00ff00ff, q17);
7607 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
7608 ASSERT_EQUAL_128(0x00ff00ff00ff00ff, 0x00ff00ff00ff00ff, q19);
7609
7610 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q20);
7611 ASSERT_EQUAL_128(0x0, 0xffff00ffffff00ff, q21);
7612 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q22);
7613 ASSERT_EQUAL_128(0x0, 0x00ffffff00ffffff, q23);
7614
7615 ASSERT_EQUAL_128(0xffffff00ffffff00, 0xffffff00ffffff00, q24);
7616 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
7617 ASSERT_EQUAL_128(0xff00ffffff00ffff, 0xff00ffffff00ffff, q26);
7618 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q27);
7619
7620 ASSERT_EQUAL_128(0x0, 0xffff5500ffff5500, q28);
7621 ASSERT_EQUAL_128(0x0, 0xffaa0000ffaa0000, q29);
7622 ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q30);
7623 ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q31);
7624 }
7625 }
7626
7627
TEST(neon_modimm_orr)7628 TEST(neon_modimm_orr) {
7629 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7630
7631 START();
7632
7633 __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7634 __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7635 __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7636 __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7637 __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7638 __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7639 __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7640 __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7641 __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7642 __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7643 __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7644 __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7645
7646 __ Orr(v16.V4H(), 0x00, 0);
7647 __ Orr(v17.V4H(), 0xff, 8);
7648 __ Orr(v18.V8H(), 0x00, 0);
7649 __ Orr(v19.V8H(), 0xff, 8);
7650
7651 __ Orr(v20.V2S(), 0x00, 0);
7652 __ Orr(v21.V2S(), 0xff, 8);
7653 __ Orr(v22.V2S(), 0x00, 16);
7654 __ Orr(v23.V2S(), 0xff, 24);
7655
7656 __ Orr(v24.V4S(), 0xff, 0);
7657 __ Orr(v25.V4S(), 0x00, 8);
7658 __ Orr(v26.V4S(), 0xff, 16);
7659 __ Orr(v27.V4S(), 0x00, 24);
7660
7661 END();
7662
7663 if (CAN_RUN()) {
7664 RUN();
7665
7666 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
7667 ASSERT_EQUAL_128(0x0, 0xff55ffffff00ffaa, q17);
7668 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
7669 ASSERT_EQUAL_128(0xffaaff55ff00ffaa, 0xff55ffffff00ffaa, q19);
7670
7671 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
7672 ASSERT_EQUAL_128(0x0, 0x5555ffff0000ffaa, q21);
7673 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
7674 ASSERT_EQUAL_128(0x0, 0xff55ffffff00aaaa, q23);
7675
7676 ASSERT_EQUAL_128(0x00aaffffff0055ff, 0x5555ffff0000aaff, q24);
7677 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
7678 ASSERT_EQUAL_128(0x00ffff55ffff55aa, 0x55ffffff00ffaaaa, q26);
7679 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
7680 }
7681 }
7682
TEST(ldr_literal_values_q)7683 TEST(ldr_literal_values_q) {
7684 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7685
7686 static const uint64_t kHalfValues[] = {0x8000000000000000,
7687 0x7fffffffffffffff,
7688 0x0000000000000000,
7689 0xffffffffffffffff,
7690 0x00ff00ff00ff00ff,
7691 0x1234567890abcdef};
7692 const int card = sizeof(kHalfValues) / sizeof(kHalfValues[0]);
7693 const Register& ref_low64 = x1;
7694 const Register& ref_high64 = x2;
7695 const Register& loaded_low64 = x3;
7696 const Register& loaded_high64 = x4;
7697 const VRegister& tgt = q0;
7698
7699 START();
7700 __ Mov(x0, 0);
7701
7702 for (int i = 0; i < card; i++) {
7703 __ Mov(ref_low64, kHalfValues[i]);
7704 for (int j = 0; j < card; j++) {
7705 __ Mov(ref_high64, kHalfValues[j]);
7706 __ Ldr(tgt, kHalfValues[j], kHalfValues[i]);
7707 __ Mov(loaded_low64, tgt.V2D(), 0);
7708 __ Mov(loaded_high64, tgt.V2D(), 1);
7709 __ Cmp(loaded_low64, ref_low64);
7710 __ Ccmp(loaded_high64, ref_high64, NoFlag, eq);
7711 __ Cset(x0, ne);
7712 }
7713 }
7714 END();
7715
7716 if (CAN_RUN()) {
7717 RUN();
7718
7719 // If one of the values differs, the trace can be used to identify which
7720 // one.
7721 ASSERT_EQUAL_64(0, x0);
7722 }
7723 }
7724
TEST(fmov_vec_imm)7725 TEST(fmov_vec_imm) {
7726 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
7727 CPUFeatures::kFP,
7728 CPUFeatures::kNEONHalf);
7729
7730 START();
7731
7732 __ Fmov(v0.V2S(), 20.0);
7733 __ Fmov(v1.V4S(), 1024.0);
7734
7735 __ Fmov(v2.V4H(), RawbitsToFloat16(0xC500U));
7736 __ Fmov(v3.V8H(), RawbitsToFloat16(0x4A80U));
7737
7738 END();
7739 if (CAN_RUN()) {
7740 RUN();
7741
7742 ASSERT_EQUAL_64(0x41A0000041A00000, d0);
7743 ASSERT_EQUAL_128(0x4480000044800000, 0x4480000044800000, q1);
7744 ASSERT_EQUAL_64(0xC500C500C500C500, d2);
7745 ASSERT_EQUAL_128(0x4A804A804A804A80, 0x4A804A804A804A80, q3);
7746 }
7747 }
7748
7749 // TODO: add arbitrary values once load literal to Q registers is supported.
TEST(neon_modimm_fmov)7750 TEST(neon_modimm_fmov) {
7751 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
7752
7753 // Immediates which can be encoded in the instructions.
7754 const float kOne = 1.0f;
7755 const float kPointFive = 0.5f;
7756 const double kMinusThirteen = -13.0;
7757 // Immediates which cannot be encoded in the instructions.
7758 const float kNonImmFP32 = 255.0f;
7759 const double kNonImmFP64 = 12.3456;
7760
7761 START();
7762 __ Fmov(v11.V2S(), kOne);
7763 __ Fmov(v12.V4S(), kPointFive);
7764 __ Fmov(v22.V2D(), kMinusThirteen);
7765 __ Fmov(v13.V2S(), kNonImmFP32);
7766 __ Fmov(v14.V4S(), kNonImmFP32);
7767 __ Fmov(v23.V2D(), kNonImmFP64);
7768 __ Fmov(v1.V2S(), 0.0);
7769 __ Fmov(v2.V4S(), 0.0);
7770 __ Fmov(v3.V2D(), 0.0);
7771 __ Fmov(v4.V2S(), kFP32PositiveInfinity);
7772 __ Fmov(v5.V4S(), kFP32PositiveInfinity);
7773 __ Fmov(v6.V2D(), kFP64PositiveInfinity);
7774 END();
7775
7776 if (CAN_RUN()) {
7777 RUN();
7778
7779 const uint64_t kOne1S = FloatToRawbits(1.0);
7780 const uint64_t kOne2S = (kOne1S << 32) | kOne1S;
7781 const uint64_t kPointFive1S = FloatToRawbits(0.5);
7782 const uint64_t kPointFive2S = (kPointFive1S << 32) | kPointFive1S;
7783 const uint64_t kMinusThirteen1D = DoubleToRawbits(-13.0);
7784 const uint64_t kNonImmFP321S = FloatToRawbits(kNonImmFP32);
7785 const uint64_t kNonImmFP322S = (kNonImmFP321S << 32) | kNonImmFP321S;
7786 const uint64_t kNonImmFP641D = DoubleToRawbits(kNonImmFP64);
7787 const uint64_t kFP32Inf1S = FloatToRawbits(kFP32PositiveInfinity);
7788 const uint64_t kFP32Inf2S = (kFP32Inf1S << 32) | kFP32Inf1S;
7789 const uint64_t kFP64Inf1D = DoubleToRawbits(kFP64PositiveInfinity);
7790
7791 ASSERT_EQUAL_128(0x0, kOne2S, q11);
7792 ASSERT_EQUAL_128(kPointFive2S, kPointFive2S, q12);
7793 ASSERT_EQUAL_128(kMinusThirteen1D, kMinusThirteen1D, q22);
7794 ASSERT_EQUAL_128(0x0, kNonImmFP322S, q13);
7795 ASSERT_EQUAL_128(kNonImmFP322S, kNonImmFP322S, q14);
7796 ASSERT_EQUAL_128(kNonImmFP641D, kNonImmFP641D, q23);
7797 ASSERT_EQUAL_128(0x0, 0x0, q1);
7798 ASSERT_EQUAL_128(0x0, 0x0, q2);
7799 ASSERT_EQUAL_128(0x0, 0x0, q3);
7800 ASSERT_EQUAL_128(0x0, kFP32Inf2S, q4);
7801 ASSERT_EQUAL_128(kFP32Inf2S, kFP32Inf2S, q5);
7802 ASSERT_EQUAL_128(kFP64Inf1D, kFP64Inf1D, q6);
7803 }
7804 }
7805
7806
TEST(neon_perm)7807 TEST(neon_perm) {
7808 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7809
7810 START();
7811
7812 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
7813 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
7814
7815 __ Trn1(v16.V16B(), v0.V16B(), v1.V16B());
7816 __ Trn2(v17.V16B(), v0.V16B(), v1.V16B());
7817 __ Zip1(v18.V16B(), v0.V16B(), v1.V16B());
7818 __ Zip2(v19.V16B(), v0.V16B(), v1.V16B());
7819 __ Uzp1(v20.V16B(), v0.V16B(), v1.V16B());
7820 __ Uzp2(v21.V16B(), v0.V16B(), v1.V16B());
7821
7822 END();
7823
7824 if (CAN_RUN()) {
7825 RUN();
7826
7827 ASSERT_EQUAL_128(0x1101130315051707, 0x19091b0b1d0d1f0f, q16);
7828 ASSERT_EQUAL_128(0x1000120214041606, 0x18081a0a1c0c1e0e, q17);
7829 ASSERT_EQUAL_128(0x180819091a0a1b0b, 0x1c0c1d0d1e0e1f0f, q18);
7830 ASSERT_EQUAL_128(0x1000110112021303, 0x1404150516061707, q19);
7831 ASSERT_EQUAL_128(0x11131517191b1d1f, 0x01030507090b0d0f, q20);
7832 ASSERT_EQUAL_128(0x10121416181a1c1e, 0x00020406080a0c0e, q21);
7833 }
7834 }
7835
7836
TEST(neon_copy_dup_element)7837 TEST(neon_copy_dup_element) {
7838 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7839
7840 START();
7841
7842 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7843 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7844 __ Movi(v2.V2D(), 0xffeddccbbaae9988, 0x0011223344556677);
7845 __ Movi(v3.V2D(), 0x7766554433221100, 0x8899aabbccddeeff);
7846 __ Movi(v4.V2D(), 0x7766554433221100, 0x0123456789abcdef);
7847 __ Movi(v5.V2D(), 0x0011223344556677, 0x0123456789abcdef);
7848
7849 __ Dup(v16.V16B(), v0.B(), 0);
7850 __ Dup(v17.V8H(), v1.H(), 7);
7851 __ Dup(v18.V4S(), v1.S(), 3);
7852 __ Dup(v19.V2D(), v0.D(), 0);
7853
7854 __ Dup(v20.V8B(), v0.B(), 0);
7855 __ Dup(v21.V4H(), v1.H(), 7);
7856 __ Dup(v22.V2S(), v1.S(), 3);
7857
7858 __ Dup(v23.B(), v0.B(), 0);
7859 __ Dup(v24.H(), v1.H(), 7);
7860 __ Dup(v25.S(), v1.S(), 3);
7861 __ Dup(v26.D(), v0.D(), 0);
7862
7863 __ Dup(v2.V16B(), v2.B(), 0);
7864 __ Dup(v3.V8H(), v3.H(), 7);
7865 __ Dup(v4.V4S(), v4.S(), 0);
7866 __ Dup(v5.V2D(), v5.D(), 1);
7867
7868 END();
7869
7870 if (CAN_RUN()) {
7871 RUN();
7872
7873 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
7874 ASSERT_EQUAL_128(0xffedffedffedffed, 0xffedffedffedffed, q17);
7875 ASSERT_EQUAL_128(0xffeddccbffeddccb, 0xffeddccbffeddccb, q18);
7876 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
7877
7878 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
7879 ASSERT_EQUAL_128(0, 0xffedffedffedffed, q21);
7880 ASSERT_EQUAL_128(0, 0xffeddccbffeddccb, q22);
7881
7882 ASSERT_EQUAL_128(0, 0x00000000000000ff, q23);
7883 ASSERT_EQUAL_128(0, 0x000000000000ffed, q24);
7884 ASSERT_EQUAL_128(0, 0x00000000ffeddccb, q25);
7885 ASSERT_EQUAL_128(0, 0x8899aabbccddeeff, q26);
7886
7887 ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q2);
7888 ASSERT_EQUAL_128(0x7766776677667766, 0x7766776677667766, q3);
7889 ASSERT_EQUAL_128(0x89abcdef89abcdef, 0x89abcdef89abcdef, q4);
7890 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q5);
7891 }
7892 }
7893
7894
TEST(neon_copy_dup_general)7895 TEST(neon_copy_dup_general) {
7896 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7897
7898 START();
7899
7900 __ Mov(x0, 0x0011223344556677);
7901
7902 __ Dup(v16.V16B(), w0);
7903 __ Dup(v17.V8H(), w0);
7904 __ Dup(v18.V4S(), w0);
7905 __ Dup(v19.V2D(), x0);
7906
7907 __ Dup(v20.V8B(), w0);
7908 __ Dup(v21.V4H(), w0);
7909 __ Dup(v22.V2S(), w0);
7910
7911 __ Dup(v2.V16B(), wzr);
7912 __ Dup(v3.V8H(), wzr);
7913 __ Dup(v4.V4S(), wzr);
7914 __ Dup(v5.V2D(), xzr);
7915
7916 END();
7917
7918 if (CAN_RUN()) {
7919 RUN();
7920
7921 ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q16);
7922 ASSERT_EQUAL_128(0x6677667766776677, 0x6677667766776677, q17);
7923 ASSERT_EQUAL_128(0x4455667744556677, 0x4455667744556677, q18);
7924 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
7925
7926 ASSERT_EQUAL_128(0, 0x7777777777777777, q20);
7927 ASSERT_EQUAL_128(0, 0x6677667766776677, q21);
7928 ASSERT_EQUAL_128(0, 0x4455667744556677, q22);
7929
7930 ASSERT_EQUAL_128(0, 0, q2);
7931 ASSERT_EQUAL_128(0, 0, q3);
7932 ASSERT_EQUAL_128(0, 0, q4);
7933 ASSERT_EQUAL_128(0, 0, q5);
7934 }
7935 }
7936
7937
TEST(neon_copy_ins_element)7938 TEST(neon_copy_ins_element) {
7939 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7940
7941 START();
7942
7943 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7944 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7945 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7946 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7947 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7948 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7949
7950 __ Movi(v2.V2D(), 0, 0x0011223344556677);
7951 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
7952 __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
7953 __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
7954
7955 __ Ins(v16.V16B(), 15, v0.V16B(), 0);
7956 __ Ins(v17.V8H(), 0, v1.V8H(), 7);
7957 __ Ins(v18.V4S(), 3, v1.V4S(), 0);
7958 __ Ins(v19.V2D(), 1, v0.V2D(), 0);
7959
7960 __ Ins(v2.V16B(), 2, v2.V16B(), 0);
7961 __ Ins(v3.V8H(), 0, v3.V8H(), 7);
7962 __ Ins(v4.V4S(), 3, v4.V4S(), 0);
7963 __ Ins(v5.V2D(), 0, v5.V2D(), 1);
7964
7965 END();
7966
7967 if (CAN_RUN()) {
7968 RUN();
7969
7970 ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
7971 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
7972 ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
7973 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
7974
7975 ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
7976 ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
7977 ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
7978 ASSERT_EQUAL_128(0, 0, q5);
7979 }
7980 }
7981
7982
TEST(neon_copy_mov_element)7983 TEST(neon_copy_mov_element) {
7984 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7985
7986 START();
7987
7988 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7989 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7990 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7991 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7992 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7993 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7994
7995 __ Movi(v2.V2D(), 0, 0x0011223344556677);
7996 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
7997 __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
7998 __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
7999
8000 __ Mov(v16.V16B(), 15, v0.V16B(), 0);
8001 __ Mov(v17.V8H(), 0, v1.V8H(), 7);
8002 __ Mov(v18.V4S(), 3, v1.V4S(), 0);
8003 __ Mov(v19.V2D(), 1, v0.V2D(), 0);
8004
8005 __ Mov(v2.V16B(), 2, v2.V16B(), 0);
8006 __ Mov(v3.V8H(), 0, v3.V8H(), 7);
8007 __ Mov(v4.V4S(), 3, v4.V4S(), 0);
8008 __ Mov(v5.V2D(), 0, v5.V2D(), 1);
8009
8010 END();
8011
8012 if (CAN_RUN()) {
8013 RUN();
8014
8015 ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
8016 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
8017 ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
8018 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
8019
8020 ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
8021 ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
8022 ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
8023 ASSERT_EQUAL_128(0, 0, q5);
8024 }
8025 }
8026
8027
TEST(neon_copy_smov)8028 TEST(neon_copy_smov) {
8029 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8030
8031 START();
8032
8033 __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8034
8035 __ Smov(w0, v0.B(), 7);
8036 __ Smov(w1, v0.B(), 15);
8037
8038 __ Smov(w2, v0.H(), 0);
8039 __ Smov(w3, v0.H(), 3);
8040
8041 __ Smov(x4, v0.B(), 7);
8042 __ Smov(x5, v0.B(), 15);
8043
8044 __ Smov(x6, v0.H(), 0);
8045 __ Smov(x7, v0.H(), 3);
8046
8047 __ Smov(x16, v0.S(), 0);
8048 __ Smov(x17, v0.S(), 1);
8049
8050 END();
8051
8052 if (CAN_RUN()) {
8053 RUN();
8054
8055 ASSERT_EQUAL_32(0xfffffffe, w0);
8056 ASSERT_EQUAL_32(0x00000001, w1);
8057 ASSERT_EQUAL_32(0x00003210, w2);
8058 ASSERT_EQUAL_32(0xfffffedc, w3);
8059 ASSERT_EQUAL_64(0xfffffffffffffffe, x4);
8060 ASSERT_EQUAL_64(0x0000000000000001, x5);
8061 ASSERT_EQUAL_64(0x0000000000003210, x6);
8062 ASSERT_EQUAL_64(0xfffffffffffffedc, x7);
8063 ASSERT_EQUAL_64(0x0000000076543210, x16);
8064 ASSERT_EQUAL_64(0xfffffffffedcba98, x17);
8065 }
8066 }
8067
8068
TEST(neon_copy_umov_mov)8069 TEST(neon_copy_umov_mov) {
8070 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8071
8072 START();
8073
8074 __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8075
8076 __ Umov(w0, v0.B(), 15);
8077 __ Umov(w1, v0.H(), 0);
8078 __ Umov(w2, v0.S(), 3);
8079 __ Umov(x3, v0.D(), 1);
8080
8081 __ Mov(w4, v0.S(), 3);
8082 __ Mov(x5, v0.D(), 1);
8083
8084 END();
8085
8086 if (CAN_RUN()) {
8087 RUN();
8088
8089 ASSERT_EQUAL_32(0x00000001, w0);
8090 ASSERT_EQUAL_32(0x00003210, w1);
8091 ASSERT_EQUAL_32(0x01234567, w2);
8092 ASSERT_EQUAL_64(0x0123456789abcdef, x3);
8093 ASSERT_EQUAL_32(0x01234567, w4);
8094 ASSERT_EQUAL_64(0x0123456789abcdef, x5);
8095 }
8096 }
8097
8098
TEST(neon_copy_ins_general)8099 TEST(neon_copy_ins_general) {
8100 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8101
8102 START();
8103
8104 __ Mov(x0, 0x0011223344556677);
8105 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8106 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
8107 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8108 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8109
8110 __ Movi(v2.V2D(), 0, 0x0011223344556677);
8111 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
8112 __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
8113 __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
8114
8115 __ Ins(v16.V16B(), 15, w0);
8116 __ Ins(v17.V8H(), 0, w0);
8117 __ Ins(v18.V4S(), 3, w0);
8118 __ Ins(v19.V2D(), 0, x0);
8119
8120 __ Ins(v2.V16B(), 2, w0);
8121 __ Ins(v3.V8H(), 0, w0);
8122 __ Ins(v4.V4S(), 3, w0);
8123 __ Ins(v5.V2D(), 1, x0);
8124
8125 END();
8126
8127 if (CAN_RUN()) {
8128 RUN();
8129
8130 ASSERT_EQUAL_128(0x7723456789abcdef, 0xfedcba9876543210, q16);
8131 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789ab6677, q17);
8132 ASSERT_EQUAL_128(0x4455667744556677, 0x8899aabbccddeeff, q18);
8133 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
8134
8135 ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
8136 ASSERT_EQUAL_128(0, 0x8899aabbccdd6677, q3);
8137 ASSERT_EQUAL_128(0x4455667700000000, 0x0123456789abcdef, q4);
8138 ASSERT_EQUAL_128(0x0011223344556677, 0x0123456789abcdef, q5);
8139 }
8140 }
8141
8142
TEST(neon_extract_ext)8143 TEST(neon_extract_ext) {
8144 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8145
8146 START();
8147
8148 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8149 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
8150
8151 __ Movi(v2.V2D(), 0, 0x0011223344556677);
8152 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
8153
8154 __ Ext(v16.V16B(), v0.V16B(), v1.V16B(), 0);
8155 __ Ext(v17.V16B(), v0.V16B(), v1.V16B(), 15);
8156 __ Ext(v1.V16B(), v0.V16B(), v1.V16B(), 8); // Dest is same as one Src
8157 __ Ext(v0.V16B(), v0.V16B(), v0.V16B(), 8); // All reg are the same
8158
8159 __ Ext(v18.V8B(), v2.V8B(), v3.V8B(), 0);
8160 __ Ext(v19.V8B(), v2.V8B(), v3.V8B(), 7);
8161 __ Ext(v2.V8B(), v2.V8B(), v3.V8B(), 4); // Dest is same as one Src
8162 __ Ext(v3.V8B(), v3.V8B(), v3.V8B(), 4); // All reg are the same
8163
8164 END();
8165
8166 if (CAN_RUN()) {
8167 RUN();
8168
8169 ASSERT_EQUAL_128(0x0011223344556677, 0x8899aabbccddeeff, q16);
8170 ASSERT_EQUAL_128(0xeddccbbaae998877, 0x6655443322110000, q17);
8171 ASSERT_EQUAL_128(0x7766554433221100, 0x0011223344556677, q1);
8172 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q0);
8173
8174 ASSERT_EQUAL_128(0, 0x0011223344556677, q18);
8175 ASSERT_EQUAL_128(0, 0x99aabbccddeeff00, q19);
8176 ASSERT_EQUAL_128(0, 0xccddeeff00112233, q2);
8177 ASSERT_EQUAL_128(0, 0xccddeeff8899aabb, q3);
8178 }
8179 }
8180
8181
TEST(neon_3different_uaddl)8182 TEST(neon_3different_uaddl) {
8183 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8184
8185 START();
8186
8187 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);
8188 __ Movi(v1.V2D(), 0, 0x00010280810e0fff);
8189 __ Movi(v2.V2D(), 0, 0x0101010101010101);
8190
8191 __ Movi(v3.V2D(), 0x0000000000000000, 0x0000000000000000);
8192 __ Movi(v4.V2D(), 0x0000000000000000, 0x0000000000000000);
8193 __ Movi(v5.V2D(), 0, 0x0000000180008001);
8194 __ Movi(v6.V2D(), 0, 0x000e000ff000ffff);
8195 __ Movi(v7.V2D(), 0, 0x0001000100010001);
8196
8197 __ Movi(v16.V2D(), 0x0000000000000000, 0x0000000000000000);
8198 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000000000000000);
8199 __ Movi(v18.V2D(), 0, 0x0000000000000001);
8200 __ Movi(v19.V2D(), 0, 0x80000001ffffffff);
8201 __ Movi(v20.V2D(), 0, 0x0000000100000001);
8202
8203 __ Uaddl(v0.V8H(), v1.V8B(), v2.V8B());
8204
8205 __ Uaddl(v3.V4S(), v5.V4H(), v7.V4H());
8206 __ Uaddl(v4.V4S(), v6.V4H(), v7.V4H());
8207
8208 __ Uaddl(v16.V2D(), v18.V2S(), v20.V2S());
8209 __ Uaddl(v17.V2D(), v19.V2S(), v20.V2S());
8210
8211
8212 END();
8213
8214 if (CAN_RUN()) {
8215 RUN();
8216
8217 ASSERT_EQUAL_128(0x0001000200030081, 0x0082000f00100100, q0);
8218 ASSERT_EQUAL_128(0x0000000100000002, 0x0000800100008002, q3);
8219 ASSERT_EQUAL_128(0x0000000f00000010, 0x0000f00100010000, q4);
8220 ASSERT_EQUAL_128(0x0000000000000001, 0x0000000000000002, q16);
8221 ASSERT_EQUAL_128(0x0000000080000002, 0x0000000100000000, q17);
8222 }
8223 }
8224
8225
TEST(neon_3different_addhn_subhn)8226 TEST(neon_3different_addhn_subhn) {
8227 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8228
8229 START();
8230
8231 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8232 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8233 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8234 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8235 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8236
8237 __ Addhn(v16.V8B(), v0.V8H(), v1.V8H());
8238 __ Addhn2(v16.V16B(), v2.V8H(), v3.V8H());
8239 __ Raddhn(v17.V8B(), v0.V8H(), v1.V8H());
8240 __ Raddhn2(v17.V16B(), v2.V8H(), v3.V8H());
8241 __ Subhn(v18.V8B(), v0.V8H(), v1.V8H());
8242 __ Subhn2(v18.V16B(), v2.V8H(), v3.V8H());
8243 __ Rsubhn(v19.V8B(), v0.V8H(), v1.V8H());
8244 __ Rsubhn2(v19.V16B(), v2.V8H(), v3.V8H());
8245
8246 END();
8247
8248 if (CAN_RUN()) {
8249 RUN();
8250
8251 ASSERT_EQUAL_128(0x0000ff007fff7fff, 0xff81817f80ff0100, q16);
8252 ASSERT_EQUAL_128(0x0000000080008000, 0xff81817f81ff0201, q17);
8253 ASSERT_EQUAL_128(0x0000ffff80008000, 0xff80817f80ff0100, q18);
8254 ASSERT_EQUAL_128(0x0000000080008000, 0xff81827f81ff0101, q19);
8255 }
8256 }
8257
TEST(neon_d_only_scalar)8258 TEST(neon_d_only_scalar) {
8259 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8260
8261 START();
8262
8263 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8264 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8265 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
8266 __ Movi(v3.V2D(), 0xffffffffffffffff, 2);
8267 __ Movi(v4.V2D(), 0xffffffffffffffff, -2);
8268
8269 __ Add(d16, d0, d0);
8270 __ Add(d17, d1, d1);
8271 __ Add(d18, d2, d2);
8272 __ Sub(d19, d0, d0);
8273 __ Sub(d20, d0, d1);
8274 __ Sub(d21, d1, d0);
8275 __ Ushl(d22, d0, d3);
8276 __ Ushl(d23, d0, d4);
8277 __ Sshl(d24, d0, d3);
8278 __ Sshl(d25, d0, d4);
8279 __ Ushr(d26, d0, 1);
8280 __ Sshr(d27, d0, 3);
8281 __ Shl(d28, d0, 0);
8282 __ Shl(d29, d0, 16);
8283
8284 END();
8285
8286 if (CAN_RUN()) {
8287 RUN();
8288
8289 ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q16);
8290 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q17);
8291 ASSERT_EQUAL_128(0, 0x2000000020002020, q18);
8292 ASSERT_EQUAL_128(0, 0, q19);
8293 ASSERT_EQUAL_128(0, 0x7000000170017171, q20);
8294 ASSERT_EQUAL_128(0, 0x8ffffffe8ffe8e8f, q21);
8295 ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q22);
8296 ASSERT_EQUAL_128(0, 0x3c0000003c003c3c, q23);
8297 ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q24);
8298 ASSERT_EQUAL_128(0, 0xfc0000003c003c3c, q25);
8299 ASSERT_EQUAL_128(0, 0x7800000078007878, q26);
8300 ASSERT_EQUAL_128(0, 0xfe0000001e001e1e, q27);
8301 ASSERT_EQUAL_128(0, 0xf0000000f000f0f0, q28);
8302 ASSERT_EQUAL_128(0, 0x0000f000f0f00000, q29);
8303 }
8304 }
8305
8306
TEST(neon_sqshl_imm_scalar)8307 TEST(neon_sqshl_imm_scalar) {
8308 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8309
8310 START();
8311
8312 __ Movi(v0.V2D(), 0x0, 0x7f);
8313 __ Movi(v1.V2D(), 0x0, 0x80);
8314 __ Movi(v2.V2D(), 0x0, 0x01);
8315 __ Sqshl(b16, b0, 1);
8316 __ Sqshl(b17, b1, 1);
8317 __ Sqshl(b18, b2, 1);
8318
8319 __ Movi(v0.V2D(), 0x0, 0x7fff);
8320 __ Movi(v1.V2D(), 0x0, 0x8000);
8321 __ Movi(v2.V2D(), 0x0, 0x0001);
8322 __ Sqshl(h19, h0, 1);
8323 __ Sqshl(h20, h1, 1);
8324 __ Sqshl(h21, h2, 1);
8325
8326 __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8327 __ Movi(v1.V2D(), 0x0, 0x80000000);
8328 __ Movi(v2.V2D(), 0x0, 0x00000001);
8329 __ Sqshl(s22, s0, 1);
8330 __ Sqshl(s23, s1, 1);
8331 __ Sqshl(s24, s2, 1);
8332
8333 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8334 __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8335 __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8336 __ Sqshl(d25, d0, 1);
8337 __ Sqshl(d26, d1, 1);
8338 __ Sqshl(d27, d2, 1);
8339
8340 END();
8341
8342 if (CAN_RUN()) {
8343 RUN();
8344
8345 ASSERT_EQUAL_128(0, 0x7f, q16);
8346 ASSERT_EQUAL_128(0, 0x80, q17);
8347 ASSERT_EQUAL_128(0, 0x02, q18);
8348
8349 ASSERT_EQUAL_128(0, 0x7fff, q19);
8350 ASSERT_EQUAL_128(0, 0x8000, q20);
8351 ASSERT_EQUAL_128(0, 0x0002, q21);
8352
8353 ASSERT_EQUAL_128(0, 0x7fffffff, q22);
8354 ASSERT_EQUAL_128(0, 0x80000000, q23);
8355 ASSERT_EQUAL_128(0, 0x00000002, q24);
8356
8357 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q25);
8358 ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
8359 ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
8360 }
8361 }
8362
8363
TEST(neon_uqshl_imm_scalar)8364 TEST(neon_uqshl_imm_scalar) {
8365 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8366
8367 START();
8368
8369 __ Movi(v0.V2D(), 0x0, 0x7f);
8370 __ Movi(v1.V2D(), 0x0, 0x80);
8371 __ Movi(v2.V2D(), 0x0, 0x01);
8372 __ Uqshl(b16, b0, 1);
8373 __ Uqshl(b17, b1, 1);
8374 __ Uqshl(b18, b2, 1);
8375
8376 __ Movi(v0.V2D(), 0x0, 0x7fff);
8377 __ Movi(v1.V2D(), 0x0, 0x8000);
8378 __ Movi(v2.V2D(), 0x0, 0x0001);
8379 __ Uqshl(h19, h0, 1);
8380 __ Uqshl(h20, h1, 1);
8381 __ Uqshl(h21, h2, 1);
8382
8383 __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8384 __ Movi(v1.V2D(), 0x0, 0x80000000);
8385 __ Movi(v2.V2D(), 0x0, 0x00000001);
8386 __ Uqshl(s22, s0, 1);
8387 __ Uqshl(s23, s1, 1);
8388 __ Uqshl(s24, s2, 1);
8389
8390 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8391 __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8392 __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8393 __ Uqshl(d25, d0, 1);
8394 __ Uqshl(d26, d1, 1);
8395 __ Uqshl(d27, d2, 1);
8396
8397 END();
8398
8399 if (CAN_RUN()) {
8400 RUN();
8401
8402 ASSERT_EQUAL_128(0, 0xfe, q16);
8403 ASSERT_EQUAL_128(0, 0xff, q17);
8404 ASSERT_EQUAL_128(0, 0x02, q18);
8405
8406 ASSERT_EQUAL_128(0, 0xfffe, q19);
8407 ASSERT_EQUAL_128(0, 0xffff, q20);
8408 ASSERT_EQUAL_128(0, 0x0002, q21);
8409
8410 ASSERT_EQUAL_128(0, 0xfffffffe, q22);
8411 ASSERT_EQUAL_128(0, 0xffffffff, q23);
8412 ASSERT_EQUAL_128(0, 0x00000002, q24);
8413
8414 ASSERT_EQUAL_128(0, 0xfffffffffffffffe, q25);
8415 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
8416 ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
8417 }
8418 }
8419
8420
TEST(neon_sqshlu_scalar)8421 TEST(neon_sqshlu_scalar) {
8422 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8423
8424 START();
8425
8426 __ Movi(v0.V2D(), 0x0, 0x7f);
8427 __ Movi(v1.V2D(), 0x0, 0x80);
8428 __ Movi(v2.V2D(), 0x0, 0x01);
8429 __ Sqshlu(b16, b0, 2);
8430 __ Sqshlu(b17, b1, 2);
8431 __ Sqshlu(b18, b2, 2);
8432
8433 __ Movi(v0.V2D(), 0x0, 0x7fff);
8434 __ Movi(v1.V2D(), 0x0, 0x8000);
8435 __ Movi(v2.V2D(), 0x0, 0x0001);
8436 __ Sqshlu(h19, h0, 2);
8437 __ Sqshlu(h20, h1, 2);
8438 __ Sqshlu(h21, h2, 2);
8439
8440 __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8441 __ Movi(v1.V2D(), 0x0, 0x80000000);
8442 __ Movi(v2.V2D(), 0x0, 0x00000001);
8443 __ Sqshlu(s22, s0, 2);
8444 __ Sqshlu(s23, s1, 2);
8445 __ Sqshlu(s24, s2, 2);
8446
8447 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8448 __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8449 __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8450 __ Sqshlu(d25, d0, 2);
8451 __ Sqshlu(d26, d1, 2);
8452 __ Sqshlu(d27, d2, 2);
8453
8454 END();
8455
8456 if (CAN_RUN()) {
8457 RUN();
8458
8459 ASSERT_EQUAL_128(0, 0xff, q16);
8460 ASSERT_EQUAL_128(0, 0x00, q17);
8461 ASSERT_EQUAL_128(0, 0x04, q18);
8462
8463 ASSERT_EQUAL_128(0, 0xffff, q19);
8464 ASSERT_EQUAL_128(0, 0x0000, q20);
8465 ASSERT_EQUAL_128(0, 0x0004, q21);
8466
8467 ASSERT_EQUAL_128(0, 0xffffffff, q22);
8468 ASSERT_EQUAL_128(0, 0x00000000, q23);
8469 ASSERT_EQUAL_128(0, 0x00000004, q24);
8470
8471 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
8472 ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
8473 ASSERT_EQUAL_128(0, 0x0000000000000004, q27);
8474 }
8475 }
8476
8477
TEST(neon_sshll)8478 TEST(neon_sshll) {
8479 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8480
8481 START();
8482
8483 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8484 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8485 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8486
8487 __ Sshll(v16.V8H(), v0.V8B(), 4);
8488 __ Sshll2(v17.V8H(), v0.V16B(), 4);
8489
8490 __ Sshll(v18.V4S(), v1.V4H(), 8);
8491 __ Sshll2(v19.V4S(), v1.V8H(), 8);
8492
8493 __ Sshll(v20.V2D(), v2.V2S(), 16);
8494 __ Sshll2(v21.V2D(), v2.V4S(), 16);
8495
8496 END();
8497
8498 if (CAN_RUN()) {
8499 RUN();
8500
8501 ASSERT_EQUAL_128(0xf800f810fff00000, 0x001007f0f800f810, q16);
8502 ASSERT_EQUAL_128(0x07f000100000fff0, 0xf810f80007f00010, q17);
8503 ASSERT_EQUAL_128(0xffffff0000000000, 0x00000100007fff00, q18);
8504 ASSERT_EQUAL_128(0xff800000ff800100, 0xffffff0000000000, q19);
8505 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
8506 ASSERT_EQUAL_128(0xffff800000000000, 0xffffffffffff0000, q21);
8507 }
8508 }
8509
TEST(neon_shll)8510 TEST(neon_shll) {
8511 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8512
8513 START();
8514
8515 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8516 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8517 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8518
8519 __ Shll(v16.V8H(), v0.V8B(), 8);
8520 __ Shll2(v17.V8H(), v0.V16B(), 8);
8521
8522 __ Shll(v18.V4S(), v1.V4H(), 16);
8523 __ Shll2(v19.V4S(), v1.V8H(), 16);
8524
8525 __ Shll(v20.V2D(), v2.V2S(), 32);
8526 __ Shll2(v21.V2D(), v2.V4S(), 32);
8527
8528 END();
8529
8530 if (CAN_RUN()) {
8531 RUN();
8532
8533 ASSERT_EQUAL_128(0x80008100ff000000, 0x01007f0080008100, q16);
8534 ASSERT_EQUAL_128(0x7f0001000000ff00, 0x810080007f000100, q17);
8535 ASSERT_EQUAL_128(0xffff000000000000, 0x000100007fff0000, q18);
8536 ASSERT_EQUAL_128(0x8000000080010000, 0xffff000000000000, q19);
8537 ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffff00000000, q20);
8538 ASSERT_EQUAL_128(0x8000000000000000, 0xffffffff00000000, q21);
8539 }
8540 }
8541
TEST(neon_ushll)8542 TEST(neon_ushll) {
8543 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8544
8545 START();
8546
8547 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8548 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8549 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8550
8551 __ Ushll(v16.V8H(), v0.V8B(), 4);
8552 __ Ushll2(v17.V8H(), v0.V16B(), 4);
8553
8554 __ Ushll(v18.V4S(), v1.V4H(), 8);
8555 __ Ushll2(v19.V4S(), v1.V8H(), 8);
8556
8557 __ Ushll(v20.V2D(), v2.V2S(), 16);
8558 __ Ushll2(v21.V2D(), v2.V4S(), 16);
8559
8560 END();
8561
8562 if (CAN_RUN()) {
8563 RUN();
8564
8565 ASSERT_EQUAL_128(0x080008100ff00000, 0x001007f008000810, q16);
8566 ASSERT_EQUAL_128(0x07f0001000000ff0, 0x0810080007f00010, q17);
8567 ASSERT_EQUAL_128(0x00ffff0000000000, 0x00000100007fff00, q18);
8568 ASSERT_EQUAL_128(0x0080000000800100, 0x00ffff0000000000, q19);
8569 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
8570 ASSERT_EQUAL_128(0x0000800000000000, 0x0000ffffffff0000, q21);
8571 }
8572 }
8573
8574
TEST(neon_sxtl)8575 TEST(neon_sxtl) {
8576 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8577
8578 START();
8579
8580 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8581 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8582 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8583
8584 __ Sxtl(v16.V8H(), v0.V8B());
8585 __ Sxtl2(v17.V8H(), v0.V16B());
8586
8587 __ Sxtl(v18.V4S(), v1.V4H());
8588 __ Sxtl2(v19.V4S(), v1.V8H());
8589
8590 __ Sxtl(v20.V2D(), v2.V2S());
8591 __ Sxtl2(v21.V2D(), v2.V4S());
8592
8593 END();
8594
8595 if (CAN_RUN()) {
8596 RUN();
8597
8598 ASSERT_EQUAL_128(0xff80ff81ffff0000, 0x0001007fff80ff81, q16);
8599 ASSERT_EQUAL_128(0x007f00010000ffff, 0xff81ff80007f0001, q17);
8600 ASSERT_EQUAL_128(0xffffffff00000000, 0x0000000100007fff, q18);
8601 ASSERT_EQUAL_128(0xffff8000ffff8001, 0xffffffff00000000, q19);
8602 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
8603 ASSERT_EQUAL_128(0xffffffff80000000, 0xffffffffffffffff, q21);
8604 }
8605 }
8606
8607
TEST(neon_uxtl)8608 TEST(neon_uxtl) {
8609 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8610
8611 START();
8612
8613 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8614 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8615 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8616
8617 __ Uxtl(v16.V8H(), v0.V8B());
8618 __ Uxtl2(v17.V8H(), v0.V16B());
8619
8620 __ Uxtl(v18.V4S(), v1.V4H());
8621 __ Uxtl2(v19.V4S(), v1.V8H());
8622
8623 __ Uxtl(v20.V2D(), v2.V2S());
8624 __ Uxtl2(v21.V2D(), v2.V4S());
8625
8626 END();
8627
8628 if (CAN_RUN()) {
8629 RUN();
8630
8631 ASSERT_EQUAL_128(0x0080008100ff0000, 0x0001007f00800081, q16);
8632 ASSERT_EQUAL_128(0x007f0001000000ff, 0x00810080007f0001, q17);
8633 ASSERT_EQUAL_128(0x0000ffff00000000, 0x0000000100007fff, q18);
8634 ASSERT_EQUAL_128(0x0000800000008001, 0x0000ffff00000000, q19);
8635 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
8636 ASSERT_EQUAL_128(0x0000000080000000, 0x00000000ffffffff, q21);
8637 }
8638 }
8639
8640
TEST(neon_ssra)8641 TEST(neon_ssra) {
8642 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8643
8644 START();
8645
8646 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8647 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8648 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8649 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8650 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8651
8652 __ Mov(v16.V2D(), v0.V2D());
8653 __ Mov(v17.V2D(), v0.V2D());
8654 __ Mov(v18.V2D(), v1.V2D());
8655 __ Mov(v19.V2D(), v1.V2D());
8656 __ Mov(v20.V2D(), v2.V2D());
8657 __ Mov(v21.V2D(), v2.V2D());
8658 __ Mov(v22.V2D(), v3.V2D());
8659 __ Mov(v23.V2D(), v4.V2D());
8660 __ Mov(v24.V2D(), v3.V2D());
8661 __ Mov(v25.V2D(), v4.V2D());
8662
8663 __ Ssra(v16.V8B(), v0.V8B(), 4);
8664 __ Ssra(v17.V16B(), v0.V16B(), 4);
8665
8666 __ Ssra(v18.V4H(), v1.V4H(), 8);
8667 __ Ssra(v19.V8H(), v1.V8H(), 8);
8668
8669 __ Ssra(v20.V2S(), v2.V2S(), 16);
8670 __ Ssra(v21.V4S(), v2.V4S(), 16);
8671
8672 __ Ssra(v22.V2D(), v3.V2D(), 32);
8673 __ Ssra(v23.V2D(), v4.V2D(), 32);
8674
8675 __ Ssra(d24, d3, 48);
8676
8677 END();
8678
8679 if (CAN_RUN()) {
8680 RUN();
8681
8682 ASSERT_EQUAL_128(0x0000000000000000, 0x7879fe0001867879, q16);
8683 ASSERT_EQUAL_128(0x860100fe79788601, 0x7879fe0001867879, q17);
8684 ASSERT_EQUAL_128(0x0000000000000000, 0xfffe00000001807e, q18);
8685 ASSERT_EQUAL_128(0x7f807f81fffe0000, 0xfffe00000001807e, q19);
8686 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
8687 ASSERT_EQUAL_128(0x7fff8000fffffffe, 0x0000000080007ffe, q21);
8688 ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007ffffffe, q22);
8689 ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
8690 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
8691 }
8692 }
8693
TEST(neon_srsra)8694 TEST(neon_srsra) {
8695 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8696
8697 START();
8698
8699 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8700 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8701 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8702 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8703 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8704
8705 __ Mov(v16.V2D(), v0.V2D());
8706 __ Mov(v17.V2D(), v0.V2D());
8707 __ Mov(v18.V2D(), v1.V2D());
8708 __ Mov(v19.V2D(), v1.V2D());
8709 __ Mov(v20.V2D(), v2.V2D());
8710 __ Mov(v21.V2D(), v2.V2D());
8711 __ Mov(v22.V2D(), v3.V2D());
8712 __ Mov(v23.V2D(), v4.V2D());
8713 __ Mov(v24.V2D(), v3.V2D());
8714 __ Mov(v25.V2D(), v4.V2D());
8715
8716 __ Srsra(v16.V8B(), v0.V8B(), 4);
8717 __ Srsra(v17.V16B(), v0.V16B(), 4);
8718
8719 __ Srsra(v18.V4H(), v1.V4H(), 8);
8720 __ Srsra(v19.V8H(), v1.V8H(), 8);
8721
8722 __ Srsra(v20.V2S(), v2.V2S(), 16);
8723 __ Srsra(v21.V4S(), v2.V4S(), 16);
8724
8725 __ Srsra(v22.V2D(), v3.V2D(), 32);
8726 __ Srsra(v23.V2D(), v4.V2D(), 32);
8727
8728 __ Srsra(d24, d3, 48);
8729
8730 END();
8731
8732 if (CAN_RUN()) {
8733 RUN();
8734
8735 ASSERT_EQUAL_128(0x0000000000000000, 0x7879ff0001877879, q16);
8736 ASSERT_EQUAL_128(0x870100ff79788701, 0x7879ff0001877879, q17);
8737 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000001807f, q18);
8738 ASSERT_EQUAL_128(0x7f807f81ffff0000, 0xffff00000001807f, q19);
8739 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
8740 ASSERT_EQUAL_128(0x7fff8000ffffffff, 0x0000000080007fff, q21);
8741 ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007fffffff, q22);
8742 ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
8743 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
8744 }
8745 }
8746
TEST(neon_usra)8747 TEST(neon_usra) {
8748 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8749
8750 START();
8751
8752 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8753 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8754 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8755 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8756 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8757
8758 __ Mov(v16.V2D(), v0.V2D());
8759 __ Mov(v17.V2D(), v0.V2D());
8760 __ Mov(v18.V2D(), v1.V2D());
8761 __ Mov(v19.V2D(), v1.V2D());
8762 __ Mov(v20.V2D(), v2.V2D());
8763 __ Mov(v21.V2D(), v2.V2D());
8764 __ Mov(v22.V2D(), v3.V2D());
8765 __ Mov(v23.V2D(), v4.V2D());
8766 __ Mov(v24.V2D(), v3.V2D());
8767 __ Mov(v25.V2D(), v4.V2D());
8768
8769 __ Usra(v16.V8B(), v0.V8B(), 4);
8770 __ Usra(v17.V16B(), v0.V16B(), 4);
8771
8772 __ Usra(v18.V4H(), v1.V4H(), 8);
8773 __ Usra(v19.V8H(), v1.V8H(), 8);
8774
8775 __ Usra(v20.V2S(), v2.V2S(), 16);
8776 __ Usra(v21.V4S(), v2.V4S(), 16);
8777
8778 __ Usra(v22.V2D(), v3.V2D(), 32);
8779 __ Usra(v23.V2D(), v4.V2D(), 32);
8780
8781 __ Usra(d24, d3, 48);
8782
8783 END();
8784
8785 if (CAN_RUN()) {
8786 RUN();
8787
8788 ASSERT_EQUAL_128(0x0000000000000000, 0x88890e0001868889, q16);
8789 ASSERT_EQUAL_128(0x8601000e89888601, 0x88890e0001868889, q17);
8790 ASSERT_EQUAL_128(0x0000000000000000, 0x00fe00000001807e, q18);
8791 ASSERT_EQUAL_128(0x8080808100fe0000, 0x00fe00000001807e, q19);
8792 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
8793 ASSERT_EQUAL_128(0x800080000000fffe, 0x0000000080007ffe, q21);
8794 ASSERT_EQUAL_128(0x8000000080000001, 0x800000007ffffffe, q22);
8795 ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
8796 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
8797 }
8798 }
8799
TEST(neon_ursra)8800 TEST(neon_ursra) {
8801 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8802
8803 START();
8804
8805 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8806 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8807 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8808 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8809 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8810
8811 __ Mov(v16.V2D(), v0.V2D());
8812 __ Mov(v17.V2D(), v0.V2D());
8813 __ Mov(v18.V2D(), v1.V2D());
8814 __ Mov(v19.V2D(), v1.V2D());
8815 __ Mov(v20.V2D(), v2.V2D());
8816 __ Mov(v21.V2D(), v2.V2D());
8817 __ Mov(v22.V2D(), v3.V2D());
8818 __ Mov(v23.V2D(), v4.V2D());
8819 __ Mov(v24.V2D(), v3.V2D());
8820 __ Mov(v25.V2D(), v4.V2D());
8821
8822 __ Ursra(v16.V8B(), v0.V8B(), 4);
8823 __ Ursra(v17.V16B(), v0.V16B(), 4);
8824
8825 __ Ursra(v18.V4H(), v1.V4H(), 8);
8826 __ Ursra(v19.V8H(), v1.V8H(), 8);
8827
8828 __ Ursra(v20.V2S(), v2.V2S(), 16);
8829 __ Ursra(v21.V4S(), v2.V4S(), 16);
8830
8831 __ Ursra(v22.V2D(), v3.V2D(), 32);
8832 __ Ursra(v23.V2D(), v4.V2D(), 32);
8833
8834 __ Ursra(d24, d3, 48);
8835
8836 END();
8837
8838 if (CAN_RUN()) {
8839 RUN();
8840
8841 ASSERT_EQUAL_128(0x0000000000000000, 0x88890f0001878889, q16);
8842 ASSERT_EQUAL_128(0x8701000f89888701, 0x88890f0001878889, q17);
8843 ASSERT_EQUAL_128(0x0000000000000000, 0x00ff00000001807f, q18);
8844 ASSERT_EQUAL_128(0x8080808100ff0000, 0x00ff00000001807f, q19);
8845 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
8846 ASSERT_EQUAL_128(0x800080000000ffff, 0x0000000080007fff, q21);
8847 ASSERT_EQUAL_128(0x8000000080000001, 0x800000007fffffff, q22);
8848 ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
8849 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
8850 }
8851 }
8852
8853
TEST(neon_uqshl_scalar)8854 TEST(neon_uqshl_scalar) {
8855 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8856
8857 START();
8858
8859 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8860 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8861 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8862 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8863
8864 __ Uqshl(b16, b0, b2);
8865 __ Uqshl(b17, b0, b3);
8866 __ Uqshl(b18, b1, b2);
8867 __ Uqshl(b19, b1, b3);
8868 __ Uqshl(h20, h0, h2);
8869 __ Uqshl(h21, h0, h3);
8870 __ Uqshl(h22, h1, h2);
8871 __ Uqshl(h23, h1, h3);
8872 __ Uqshl(s24, s0, s2);
8873 __ Uqshl(s25, s0, s3);
8874 __ Uqshl(s26, s1, s2);
8875 __ Uqshl(s27, s1, s3);
8876 __ Uqshl(d28, d0, d2);
8877 __ Uqshl(d29, d0, d3);
8878 __ Uqshl(d30, d1, d2);
8879 __ Uqshl(d31, d1, d3);
8880
8881 END();
8882
8883 if (CAN_RUN()) {
8884 RUN();
8885
8886 ASSERT_EQUAL_128(0, 0xff, q16);
8887 ASSERT_EQUAL_128(0, 0x78, q17);
8888 ASSERT_EQUAL_128(0, 0xfe, q18);
8889 ASSERT_EQUAL_128(0, 0x3f, q19);
8890 ASSERT_EQUAL_128(0, 0xffff, q20);
8891 ASSERT_EQUAL_128(0, 0x7878, q21);
8892 ASSERT_EQUAL_128(0, 0xfefe, q22);
8893 ASSERT_EQUAL_128(0, 0x3fbf, q23);
8894 ASSERT_EQUAL_128(0, 0xffffffff, q24);
8895 ASSERT_EQUAL_128(0, 0x78007878, q25);
8896 ASSERT_EQUAL_128(0, 0xfffefefe, q26);
8897 ASSERT_EQUAL_128(0, 0x3fffbfbf, q27);
8898 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
8899 ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
8900 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
8901 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfbf, q31);
8902 }
8903 }
8904
8905
TEST(neon_sqshl_scalar)8906 TEST(neon_sqshl_scalar) {
8907 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8908
8909 START();
8910
8911 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
8912 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
8913 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8914 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8915
8916 __ Sqshl(b16, b0, b2);
8917 __ Sqshl(b17, b0, b3);
8918 __ Sqshl(b18, b1, b2);
8919 __ Sqshl(b19, b1, b3);
8920 __ Sqshl(h20, h0, h2);
8921 __ Sqshl(h21, h0, h3);
8922 __ Sqshl(h22, h1, h2);
8923 __ Sqshl(h23, h1, h3);
8924 __ Sqshl(s24, s0, s2);
8925 __ Sqshl(s25, s0, s3);
8926 __ Sqshl(s26, s1, s2);
8927 __ Sqshl(s27, s1, s3);
8928 __ Sqshl(d28, d0, d2);
8929 __ Sqshl(d29, d0, d3);
8930 __ Sqshl(d30, d1, d2);
8931 __ Sqshl(d31, d1, d3);
8932
8933 END();
8934
8935 if (CAN_RUN()) {
8936 RUN();
8937
8938 ASSERT_EQUAL_128(0, 0x80, q16);
8939 ASSERT_EQUAL_128(0, 0xdf, q17);
8940 ASSERT_EQUAL_128(0, 0x7f, q18);
8941 ASSERT_EQUAL_128(0, 0x20, q19);
8942 ASSERT_EQUAL_128(0, 0x8000, q20);
8943 ASSERT_EQUAL_128(0, 0xdfdf, q21);
8944 ASSERT_EQUAL_128(0, 0x7fff, q22);
8945 ASSERT_EQUAL_128(0, 0x2020, q23);
8946 ASSERT_EQUAL_128(0, 0x80000000, q24);
8947 ASSERT_EQUAL_128(0, 0xdfffdfdf, q25);
8948 ASSERT_EQUAL_128(0, 0x7fffffff, q26);
8949 ASSERT_EQUAL_128(0, 0x20002020, q27);
8950 ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
8951 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfdf, q29);
8952 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
8953 ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
8954 }
8955 }
8956
8957
TEST(neon_urshl_scalar)8958 TEST(neon_urshl_scalar) {
8959 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8960
8961 START();
8962
8963 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8964 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8965 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8966 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8967
8968 __ Urshl(d28, d0, d2);
8969 __ Urshl(d29, d0, d3);
8970 __ Urshl(d30, d1, d2);
8971 __ Urshl(d31, d1, d3);
8972
8973 END();
8974
8975 if (CAN_RUN()) {
8976 RUN();
8977
8978 ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q28);
8979 ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
8980 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
8981 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
8982 }
8983 }
8984
8985
TEST(neon_srshl_scalar)8986 TEST(neon_srshl_scalar) {
8987 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8988
8989 START();
8990
8991 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
8992 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
8993 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8994 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8995
8996 __ Srshl(d28, d0, d2);
8997 __ Srshl(d29, d0, d3);
8998 __ Srshl(d30, d1, d2);
8999 __ Srshl(d31, d1, d3);
9000
9001 END();
9002
9003 if (CAN_RUN()) {
9004 RUN();
9005
9006 ASSERT_EQUAL_128(0, 0x7fffffff7fff7f7e, q28);
9007 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
9008 ASSERT_EQUAL_128(0, 0x8000000080008080, q30);
9009 ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
9010 }
9011 }
9012
9013
TEST(neon_uqrshl_scalar)9014 TEST(neon_uqrshl_scalar) {
9015 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9016
9017 START();
9018
9019 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9020 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9021 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
9022 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
9023
9024 __ Uqrshl(b16, b0, b2);
9025 __ Uqrshl(b17, b0, b3);
9026 __ Uqrshl(b18, b1, b2);
9027 __ Uqrshl(b19, b1, b3);
9028 __ Uqrshl(h20, h0, h2);
9029 __ Uqrshl(h21, h0, h3);
9030 __ Uqrshl(h22, h1, h2);
9031 __ Uqrshl(h23, h1, h3);
9032 __ Uqrshl(s24, s0, s2);
9033 __ Uqrshl(s25, s0, s3);
9034 __ Uqrshl(s26, s1, s2);
9035 __ Uqrshl(s27, s1, s3);
9036 __ Uqrshl(d28, d0, d2);
9037 __ Uqrshl(d29, d0, d3);
9038 __ Uqrshl(d30, d1, d2);
9039 __ Uqrshl(d31, d1, d3);
9040
9041 END();
9042
9043 if (CAN_RUN()) {
9044 RUN();
9045
9046 ASSERT_EQUAL_128(0, 0xff, q16);
9047 ASSERT_EQUAL_128(0, 0x78, q17);
9048 ASSERT_EQUAL_128(0, 0xfe, q18);
9049 ASSERT_EQUAL_128(0, 0x40, q19);
9050 ASSERT_EQUAL_128(0, 0xffff, q20);
9051 ASSERT_EQUAL_128(0, 0x7878, q21);
9052 ASSERT_EQUAL_128(0, 0xfefe, q22);
9053 ASSERT_EQUAL_128(0, 0x3fc0, q23);
9054 ASSERT_EQUAL_128(0, 0xffffffff, q24);
9055 ASSERT_EQUAL_128(0, 0x78007878, q25);
9056 ASSERT_EQUAL_128(0, 0xfffefefe, q26);
9057 ASSERT_EQUAL_128(0, 0x3fffbfc0, q27);
9058 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
9059 ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
9060 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
9061 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
9062 }
9063 }
9064
9065
TEST(neon_sqrshl_scalar)9066 TEST(neon_sqrshl_scalar) {
9067 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9068
9069 START();
9070
9071 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
9072 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
9073 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
9074 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
9075
9076 __ Sqrshl(b16, b0, b2);
9077 __ Sqrshl(b17, b0, b3);
9078 __ Sqrshl(b18, b1, b2);
9079 __ Sqrshl(b19, b1, b3);
9080 __ Sqrshl(h20, h0, h2);
9081 __ Sqrshl(h21, h0, h3);
9082 __ Sqrshl(h22, h1, h2);
9083 __ Sqrshl(h23, h1, h3);
9084 __ Sqrshl(s24, s0, s2);
9085 __ Sqrshl(s25, s0, s3);
9086 __ Sqrshl(s26, s1, s2);
9087 __ Sqrshl(s27, s1, s3);
9088 __ Sqrshl(d28, d0, d2);
9089 __ Sqrshl(d29, d0, d3);
9090 __ Sqrshl(d30, d1, d2);
9091 __ Sqrshl(d31, d1, d3);
9092
9093 END();
9094
9095 if (CAN_RUN()) {
9096 RUN();
9097
9098 ASSERT_EQUAL_128(0, 0x80, q16);
9099 ASSERT_EQUAL_128(0, 0xe0, q17);
9100 ASSERT_EQUAL_128(0, 0x7f, q18);
9101 ASSERT_EQUAL_128(0, 0x20, q19);
9102 ASSERT_EQUAL_128(0, 0x8000, q20);
9103 ASSERT_EQUAL_128(0, 0xdfe0, q21);
9104 ASSERT_EQUAL_128(0, 0x7fff, q22);
9105 ASSERT_EQUAL_128(0, 0x2020, q23);
9106 ASSERT_EQUAL_128(0, 0x80000000, q24);
9107 ASSERT_EQUAL_128(0, 0xdfffdfe0, q25);
9108 ASSERT_EQUAL_128(0, 0x7fffffff, q26);
9109 ASSERT_EQUAL_128(0, 0x20002020, q27);
9110 ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
9111 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
9112 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
9113 ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
9114 }
9115 }
9116
9117
TEST(neon_uqadd_scalar)9118 TEST(neon_uqadd_scalar) {
9119 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9120
9121 START();
9122
9123 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9124 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9125 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
9126
9127 __ Uqadd(b16, b0, b0);
9128 __ Uqadd(b17, b1, b1);
9129 __ Uqadd(b18, b2, b2);
9130 __ Uqadd(h19, h0, h0);
9131 __ Uqadd(h20, h1, h1);
9132 __ Uqadd(h21, h2, h2);
9133 __ Uqadd(s22, s0, s0);
9134 __ Uqadd(s23, s1, s1);
9135 __ Uqadd(s24, s2, s2);
9136 __ Uqadd(d25, d0, d0);
9137 __ Uqadd(d26, d1, d1);
9138 __ Uqadd(d27, d2, d2);
9139
9140 END();
9141
9142 if (CAN_RUN()) {
9143 RUN();
9144
9145 ASSERT_EQUAL_128(0, 0xff, q16);
9146 ASSERT_EQUAL_128(0, 0xfe, q17);
9147 ASSERT_EQUAL_128(0, 0x20, q18);
9148 ASSERT_EQUAL_128(0, 0xffff, q19);
9149 ASSERT_EQUAL_128(0, 0xfefe, q20);
9150 ASSERT_EQUAL_128(0, 0x2020, q21);
9151 ASSERT_EQUAL_128(0, 0xffffffff, q22);
9152 ASSERT_EQUAL_128(0, 0xfffefefe, q23);
9153 ASSERT_EQUAL_128(0, 0x20002020, q24);
9154 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
9155 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q26);
9156 ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
9157 }
9158 }
9159
9160
TEST(neon_sqadd_scalar)9161 TEST(neon_sqadd_scalar) {
9162 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9163
9164 START();
9165
9166 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0x8000000180018181);
9167 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9168 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
9169
9170 __ Sqadd(b16, b0, b0);
9171 __ Sqadd(b17, b1, b1);
9172 __ Sqadd(b18, b2, b2);
9173 __ Sqadd(h19, h0, h0);
9174 __ Sqadd(h20, h1, h1);
9175 __ Sqadd(h21, h2, h2);
9176 __ Sqadd(s22, s0, s0);
9177 __ Sqadd(s23, s1, s1);
9178 __ Sqadd(s24, s2, s2);
9179 __ Sqadd(d25, d0, d0);
9180 __ Sqadd(d26, d1, d1);
9181 __ Sqadd(d27, d2, d2);
9182
9183 END();
9184
9185 if (CAN_RUN()) {
9186 RUN();
9187
9188 ASSERT_EQUAL_128(0, 0x80, q16);
9189 ASSERT_EQUAL_128(0, 0x7f, q17);
9190 ASSERT_EQUAL_128(0, 0x20, q18);
9191 ASSERT_EQUAL_128(0, 0x8000, q19);
9192 ASSERT_EQUAL_128(0, 0x7fff, q20);
9193 ASSERT_EQUAL_128(0, 0x2020, q21);
9194 ASSERT_EQUAL_128(0, 0x80000000, q22);
9195 ASSERT_EQUAL_128(0, 0x7fffffff, q23);
9196 ASSERT_EQUAL_128(0, 0x20002020, q24);
9197 ASSERT_EQUAL_128(0, 0x8000000000000000, q25);
9198 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
9199 ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
9200 }
9201 }
9202
9203
TEST(neon_uqsub_scalar)9204 TEST(neon_uqsub_scalar) {
9205 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9206
9207 START();
9208
9209 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9210 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9211
9212 __ Uqsub(b16, b0, b0);
9213 __ Uqsub(b17, b0, b1);
9214 __ Uqsub(b18, b1, b0);
9215 __ Uqsub(h19, h0, h0);
9216 __ Uqsub(h20, h0, h1);
9217 __ Uqsub(h21, h1, h0);
9218 __ Uqsub(s22, s0, s0);
9219 __ Uqsub(s23, s0, s1);
9220 __ Uqsub(s24, s1, s0);
9221 __ Uqsub(d25, d0, d0);
9222 __ Uqsub(d26, d0, d1);
9223 __ Uqsub(d27, d1, d0);
9224
9225 END();
9226
9227 if (CAN_RUN()) {
9228 RUN();
9229
9230 ASSERT_EQUAL_128(0, 0, q16);
9231 ASSERT_EQUAL_128(0, 0x71, q17);
9232 ASSERT_EQUAL_128(0, 0, q18);
9233
9234 ASSERT_EQUAL_128(0, 0, q19);
9235 ASSERT_EQUAL_128(0, 0x7171, q20);
9236 ASSERT_EQUAL_128(0, 0, q21);
9237
9238 ASSERT_EQUAL_128(0, 0, q22);
9239 ASSERT_EQUAL_128(0, 0x70017171, q23);
9240 ASSERT_EQUAL_128(0, 0, q24);
9241
9242 ASSERT_EQUAL_128(0, 0, q25);
9243 ASSERT_EQUAL_128(0, 0x7000000170017171, q26);
9244 ASSERT_EQUAL_128(0, 0, q27);
9245 }
9246 }
9247
9248
TEST(neon_sqsub_scalar)9249 TEST(neon_sqsub_scalar) {
9250 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9251
9252 START();
9253
9254 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9255 __ Movi(v1.V2D(), 0x5555555555555555, 0x7eeeeeee7eee7e7e);
9256
9257 __ Sqsub(b16, b0, b0);
9258 __ Sqsub(b17, b0, b1);
9259 __ Sqsub(b18, b1, b0);
9260 __ Sqsub(h19, h0, h0);
9261 __ Sqsub(h20, h0, h1);
9262 __ Sqsub(h21, h1, h0);
9263 __ Sqsub(s22, s0, s0);
9264 __ Sqsub(s23, s0, s1);
9265 __ Sqsub(s24, s1, s0);
9266 __ Sqsub(d25, d0, d0);
9267 __ Sqsub(d26, d0, d1);
9268 __ Sqsub(d27, d1, d0);
9269
9270 END();
9271
9272 if (CAN_RUN()) {
9273 RUN();
9274
9275 ASSERT_EQUAL_128(0, 0, q16);
9276 ASSERT_EQUAL_128(0, 0x80, q17);
9277 ASSERT_EQUAL_128(0, 0x7f, q18);
9278
9279 ASSERT_EQUAL_128(0, 0, q19);
9280 ASSERT_EQUAL_128(0, 0x8000, q20);
9281 ASSERT_EQUAL_128(0, 0x7fff, q21);
9282
9283 ASSERT_EQUAL_128(0, 0, q22);
9284 ASSERT_EQUAL_128(0, 0x80000000, q23);
9285 ASSERT_EQUAL_128(0, 0x7fffffff, q24);
9286
9287 ASSERT_EQUAL_128(0, 0, q25);
9288 ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
9289 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
9290 }
9291 }
9292
9293
TEST(neon_fmla_fmls)9294 TEST(neon_fmla_fmls) {
9295 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9296
9297 START();
9298 __ Movi(v0.V2D(), 0x3f80000040000000, 0x4100000000000000);
9299 __ Movi(v1.V2D(), 0x400000003f800000, 0x000000003f800000);
9300 __ Movi(v2.V2D(), 0x3f800000ffffffff, 0x7f800000ff800000);
9301 __ Mov(v16.V16B(), v0.V16B());
9302 __ Mov(v17.V16B(), v0.V16B());
9303 __ Mov(v18.V16B(), v0.V16B());
9304 __ Mov(v19.V16B(), v0.V16B());
9305 __ Mov(v20.V16B(), v0.V16B());
9306 __ Mov(v21.V16B(), v0.V16B());
9307
9308 __ Fmla(v16.V2S(), v1.V2S(), v2.V2S());
9309 __ Fmla(v17.V4S(), v1.V4S(), v2.V4S());
9310 __ Fmla(v18.V2D(), v1.V2D(), v2.V2D());
9311 __ Fmls(v19.V2S(), v1.V2S(), v2.V2S());
9312 __ Fmls(v20.V4S(), v1.V4S(), v2.V4S());
9313 __ Fmls(v21.V2D(), v1.V2D(), v2.V2D());
9314 END();
9315
9316 if (CAN_RUN()) {
9317 RUN();
9318
9319 ASSERT_EQUAL_128(0x0000000000000000, 0x7fc00000ff800000, q16);
9320 ASSERT_EQUAL_128(0x40400000ffffffff, 0x7fc00000ff800000, q17);
9321 ASSERT_EQUAL_128(0x3f9800015f8003f7, 0x41000000000000fe, q18);
9322 ASSERT_EQUAL_128(0x0000000000000000, 0x7fc000007f800000, q19);
9323 ASSERT_EQUAL_128(0xbf800000ffffffff, 0x7fc000007f800000, q20);
9324 ASSERT_EQUAL_128(0xbf8000023f0007ee, 0x40fffffffffffe04, q21);
9325 }
9326 }
9327
9328
TEST(neon_fmla_h)9329 TEST(neon_fmla_h) {
9330 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9331 CPUFeatures::kFP,
9332 CPUFeatures::kNEONHalf);
9333
9334 START();
9335 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9336 __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9337 __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9338 __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9339 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
9340 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
9341 __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
9342 __ Mov(v16.V2D(), v0.V2D());
9343 __ Mov(v17.V2D(), v0.V2D());
9344 __ Mov(v18.V2D(), v4.V2D());
9345 __ Mov(v19.V2D(), v5.V2D());
9346 __ Mov(v20.V2D(), v0.V2D());
9347 __ Mov(v21.V2D(), v0.V2D());
9348 __ Mov(v22.V2D(), v4.V2D());
9349 __ Mov(v23.V2D(), v5.V2D());
9350
9351 __ Fmla(v16.V8H(), v0.V8H(), v1.V8H());
9352 __ Fmla(v17.V8H(), v2.V8H(), v3.V8H());
9353 __ Fmla(v18.V8H(), v2.V8H(), v6.V8H());
9354 __ Fmla(v19.V8H(), v3.V8H(), v6.V8H());
9355 __ Fmla(v20.V4H(), v0.V4H(), v1.V4H());
9356 __ Fmla(v21.V4H(), v2.V4H(), v3.V4H());
9357 __ Fmla(v22.V4H(), v2.V4H(), v6.V4H());
9358 __ Fmla(v23.V4H(), v3.V4H(), v6.V4H());
9359 END();
9360
9361 if (CAN_RUN()) {
9362 RUN();
9363
9364 ASSERT_EQUAL_128(0x55c055c055c055c0, 0x55c055c055c055c0, v16);
9365 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v17);
9366 ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v18);
9367 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v19);
9368 ASSERT_EQUAL_128(0, 0x55c055c055c055c0, v20);
9369 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v21);
9370 ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v22);
9371 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v23);
9372 }
9373 }
9374
9375
TEST(neon_fmls_h)9376 TEST(neon_fmls_h) {
9377 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9378 CPUFeatures::kFP,
9379 CPUFeatures::kNEONHalf);
9380
9381 START();
9382 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9383 __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9384 __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9385 __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9386 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
9387 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
9388 __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
9389 __ Mov(v16.V2D(), v0.V2D());
9390 __ Mov(v17.V2D(), v0.V2D());
9391 __ Mov(v18.V2D(), v4.V2D());
9392 __ Mov(v19.V2D(), v5.V2D());
9393 __ Mov(v20.V2D(), v0.V2D());
9394 __ Mov(v21.V2D(), v0.V2D());
9395 __ Mov(v22.V2D(), v4.V2D());
9396 __ Mov(v23.V2D(), v5.V2D());
9397
9398 __ Fmls(v16.V8H(), v0.V8H(), v1.V8H());
9399 __ Fmls(v17.V8H(), v2.V8H(), v3.V8H());
9400 __ Fmls(v18.V8H(), v2.V8H(), v6.V8H());
9401 __ Fmls(v19.V8H(), v3.V8H(), v6.V8H());
9402 __ Fmls(v20.V4H(), v0.V4H(), v1.V4H());
9403 __ Fmls(v21.V4H(), v2.V4H(), v3.V4H());
9404 __ Fmls(v22.V4H(), v2.V4H(), v6.V4H());
9405 __ Fmls(v23.V4H(), v3.V4H(), v6.V4H());
9406 END();
9407
9408 if (CAN_RUN()) {
9409 RUN();
9410
9411 ASSERT_EQUAL_128(0xd580d580d580d580, 0xd580d580d580d580, v16);
9412 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v17);
9413 ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v18);
9414 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v19);
9415 ASSERT_EQUAL_128(0, 0xd580d580d580d580, v20);
9416 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v21);
9417 ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v22);
9418 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v23);
9419 }
9420 }
9421
9422
TEST(neon_fhm)9423 TEST(neon_fhm) {
9424 // Test basic operation of fmlal{2} and fmlsl{2}. The simulator tests have
9425 // more comprehensive input sets.
9426 SETUP_WITH_FEATURES(CPUFeatures::kFP,
9427 CPUFeatures::kNEON,
9428 CPUFeatures::kNEONHalf,
9429 CPUFeatures::kFHM);
9430
9431 START();
9432 // Test multiplications:
9433 // v30 v31
9434 // [0] 65504 (max normal) * 65504 (max normal)
9435 // [1] -1 * 0
9436 // [2] 2^-24 (min subnormal) * 2^-24 (min subnormal)
9437 // [3] -2^-24 (min subnormal) * 65504 (max normal)
9438 // [4] 6.10e-5 (min normal) * 0.99...
9439 // [5] 0 * -0
9440 // [6] -0 * 0
9441 // [7] -Inf * -Inf
9442 __ Movi(v30.V8H(), 0xfc00800000000400, 0x80010001bc007bff);
9443 __ Movi(v31.V8H(), 0xfc00000080003bff, 0x7bff000100007bff);
9444
9445 // Accumulators for use with Fmlal{2}:
9446 // v0.S[0] = 384
9447 // v0.S[1] = -0
9448 __ Movi(v0.V4S(), 0xdeadbeefdeadbeef, 0x8000000043c00000);
9449 // v1.S[0] = -(2^-48 + 2^-71)
9450 // v1.S[1] = 0
9451 __ Movi(v1.V4S(), 0xdeadbeefdeadbeef, 0x00000000a7800001);
9452 // v2.S[0] = 128
9453 // v2.S[1] = 0
9454 // v2.S[2] = 1
9455 // v2.S[3] = 1
9456 __ Movi(v2.V4S(), 0x3f8000003f800000, 0x0000000043000000);
9457 // v3.S[0] = 0
9458 // v3.S[1] = -0
9459 // v3.S[2] = -0
9460 // v3.S[3] = 0
9461 __ Movi(v3.V4S(), 0x0000000080000000, 0x8000000000000000);
9462 // For Fmlsl{2}, we simply negate the accumulators above so that the Fmlsl{2}
9463 // results are just the negation of the Fmlal{2} results.
9464 __ Fneg(v4.V4S(), v0.V4S());
9465 __ Fneg(v5.V4S(), v1.V4S());
9466 __ Fneg(v6.V4S(), v2.V4S());
9467 __ Fneg(v7.V4S(), v3.V4S());
9468
9469 __ Fmlal(v0.V2S(), v30.V2H(), v31.V2H());
9470 __ Fmlal2(v1.V2S(), v30.V2H(), v31.V2H());
9471 __ Fmlal(v2.V4S(), v30.V4H(), v31.V4H());
9472 __ Fmlal2(v3.V4S(), v30.V4H(), v31.V4H());
9473
9474 __ Fmlsl(v4.V2S(), v30.V2H(), v31.V2H());
9475 __ Fmlsl2(v5.V2S(), v30.V2H(), v31.V2H());
9476 __ Fmlsl(v6.V4S(), v30.V4H(), v31.V4H());
9477 __ Fmlsl2(v7.V4S(), v30.V4H(), v31.V4H());
9478 END();
9479
9480 if (CAN_RUN()) {
9481 RUN();
9482
9483 // Fmlal(2S)
9484 // v0.S[0] = 384 + (65504 * 65504) = 4290774528 (rounded from 4290774400)
9485 // v0.S[1] = -0 + (-1 * 0) = -0
9486 ASSERT_EQUAL_128(0x0000000000000000, 0x800000004f7fc006, v0);
9487 // Fmlal2(2S)
9488 // v1.S[0] = -(2^-48 + 2^-71) + (2^-24 * 2^-24) = -2^-71
9489 // v1.S[1] = 0 + (-2^-24 * 65504) = -0.003904...
9490 ASSERT_EQUAL_128(0x0000000000000000, 0xbb7fe0009c000000, v1);
9491 // Fmlal(4S)
9492 // v2.S[0] = 128 + (65504 * 65504) = 4290774016 (rounded from 4290774144)
9493 // v2.S[1] = 0 + (-1 * 0) = 0
9494 // v2.S[2] = 1 + (2^-24 * 2^-24) = 1 (rounded)
9495 // v2.S[3] = 1 + (-2^-24 * 65504) = 0.996...
9496 ASSERT_EQUAL_128(0x3f7f00203f800000, 0x000000004f7fc004, v2);
9497 // Fmlal2(4S)
9498 // v3.S[0] = 0 + (6.103516e-5 * 0.99...) = 6.100535e-5
9499 // v3.S[1] = -0 + (0 * -0) = -0
9500 // v3.S[2] = -0 + (-0 * 0) = -0
9501 // v3.S[3] = 0 + (-Inf * -Inf) = Inf
9502 ASSERT_EQUAL_128(0x7f80000080000000, 0x80000000387fe000, v3);
9503
9504 // Fmlsl results are mostly the same, but negated.
9505 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000cf7fc006, v4);
9506 ASSERT_EQUAL_128(0x0000000000000000, 0x3b7fe0001c000000, v5);
9507 // In this case: v6.S[1] = 0 - (0 * -0) = 0
9508 ASSERT_EQUAL_128(0xbf7f0020bf800000, 0x00000000cf7fc004, v6);
9509 ASSERT_EQUAL_128(0xff80000000000000, 0x00000000b87fe000, v7);
9510 }
9511 }
9512
9513
TEST(neon_byelement_fhm)9514 TEST(neon_byelement_fhm) {
9515 // Test basic operation of fmlal{2} and fmlsl{2} (by element). The simulator
9516 // tests have more comprehensive input sets.
9517 SETUP_WITH_FEATURES(CPUFeatures::kFP,
9518 CPUFeatures::kNEON,
9519 CPUFeatures::kNEONHalf,
9520 CPUFeatures::kFHM);
9521
9522 START();
9523 // Set up multiplication inputs.
9524 //
9525 // v30.H[0] = 65504 (max normal)
9526 // v30.H[1] = -1
9527 // v30.H[2] = 2^-24 (min subnormal)
9528 // v30.H[3] = -2^-24 (min subnormal)
9529 // v30.H[4] = 6.10e-5 (min normal)
9530 // v30.H[5] = 0
9531 // v30.H[6] = -0
9532 // v30.H[7] = -Inf
9533 __ Movi(v30.V8H(), 0xfc00800000000400, 0x80010001bc007bff);
9534
9535 // Each test instruction should only use one lane of vm, so set up unique
9536 // registers with poison values in other lanes. The poison NaN avoids the
9537 // default NaN (so it shouldn't be encountered accidentally), but is otherwise
9538 // arbitrary.
9539 VRegister poison = v29;
9540 __ Movi(v29.V8H(), 0x7f417f417f417f41, 0x7f417f417f417f41);
9541 // v31.H[0,2,4,...]: 0.9995117 (the value just below 1)
9542 // v31.H[1,3,5,...]: 1.000977 (the value just above 1)
9543 __ Movi(v31.V8H(), 0x3bff3c013bff3c01, 0x3bff3c013bff3c01);
9544 // Set up [v8,v15] as vm inputs.
9545 for (int i = 0; i <= 7; i++) {
9546 VRegister vm(i + 8);
9547 __ Mov(vm, poison);
9548 __ Ins(vm.V8H(), i, v31.V8H(), i);
9549 }
9550
9551 // Accumulators for use with Fmlal{2}:
9552 // v0.S[0] = 2^-8
9553 // v0.S[1] = 1
9554 __ Movi(v0.V4S(), 0xdeadbeefdeadbeef, 0x3f8000003b800000);
9555 // v1.S[0] = -1.5 * 2^-49
9556 // v1.S[1] = 0
9557 __ Movi(v1.V4S(), 0xdeadbeefdeadbeef, 0x00000000a7400000);
9558 // v2.S[0] = 0
9559 // v2.S[1] = 2^14
9560 // v2.S[2] = 1.5 * 2^-48
9561 // v2.S[3] = Inf
9562 __ Movi(v2.V4S(), 0x7f80000027c00000, 0xc680000000000000);
9563 // v3.S[0] = 0
9564 // v3.S[1] = -0
9565 // v3.S[2] = -0
9566 // v3.S[3] = 0
9567 __ Movi(v3.V4S(), 0x0000000080000000, 0x8000000000000000);
9568 // For Fmlsl{2}, we simply negate the accumulators above so that the Fmlsl{2}
9569 // results are just the negation of the Fmlal{2} results.
9570 __ Fneg(v4.V4S(), v0.V4S());
9571 __ Fneg(v5.V4S(), v1.V4S());
9572 __ Fneg(v6.V4S(), v2.V4S());
9573 __ Fneg(v7.V4S(), v3.V4S());
9574
9575 __ Fmlal(v0.V2S(), v30.V2H(), v8.H(), 0);
9576 __ Fmlal2(v1.V2S(), v30.V2H(), v9.H(), 1);
9577 __ Fmlal(v2.V4S(), v30.V4H(), v10.H(), 2);
9578 __ Fmlal2(v3.V4S(), v30.V4H(), v11.H(), 3);
9579
9580 __ Fmlsl(v4.V2S(), v30.V2H(), v12.H(), 4);
9581 __ Fmlsl2(v5.V2S(), v30.V2H(), v13.H(), 5);
9582 __ Fmlsl(v6.V4S(), v30.V4H(), v14.H(), 6);
9583 __ Fmlsl2(v7.V4S(), v30.V4H(), v15.H(), 7);
9584 END();
9585
9586 if (CAN_RUN()) {
9587 RUN();
9588
9589 // Fmlal(2S)
9590 // v0.S[0] = 2^-8 + (65504 * 1.000977) = 65567.96875 (rounded)
9591 // v0.S[1] = 1 + (-1 * 1.000977) = -0.000976...
9592 ASSERT_EQUAL_128(0x0000000000000000, 0xba80000047800ffc, v0);
9593 // Fmlal2(2S)
9594 // v1.S[0] = (-1.5 * 2^-49) + (2^-24 * 0.9995117) = 5.958e-8 (rounded)
9595 // v1.S[1] = 0 + (-2^-24 * 0.9995117) = -5.958e-8
9596 ASSERT_EQUAL_128(0x0000000000000000, 0xb37fe000337fdfff, v1);
9597 // Fmlal(4S)
9598 // v2.S[0] = 0 + (65504 * 1.000977) = 65566.96875
9599 // v2.S[1] = 2^14 + (-1 * 1.000977) = -16385 (rounded from -16385.5)
9600 // v2.S[2] = (1.5 * 2^-48) + (2^-24 * 1.000977) = 5.966e-8 (rounded up)
9601 // v2.S[3] = Inf + (-2^-24 * 1.000977) = Inf
9602 ASSERT_EQUAL_128(0x7f80000033802001, 0xc680020047800ffc, v2);
9603 // Fmlal2(4S)
9604 // v3.S[0] = 0 + (6.103516e-5 * 0.9995117) = 6.100535e-5
9605 // v3.S[1] = -0 + (0 * 0.9995117) = 0
9606 // v3.S[2] = -0 + (-0 * 0.9995117) = -0
9607 // v3.S[3] = 0 + (-Inf * 0.9995117) = -Inf
9608 ASSERT_EQUAL_128(0xff80000080000000, 0x00000000387fe000, v3);
9609
9610 // Fmlsl results are mostly the same, but negated.
9611 ASSERT_EQUAL_128(0x0000000000000000, 0x3a800000c7800ffc, v4);
9612 ASSERT_EQUAL_128(0x0000000000000000, 0x337fe000b37fdfff, v5);
9613 ASSERT_EQUAL_128(0xff800000b3802001, 0x46800200c7800ffc, v6);
9614 // In this case: v7.S[2] = 0 - (-0 * 0.9995117) = 0
9615 ASSERT_EQUAL_128(0x7f80000000000000, 0x00000000b87fe000, v7);
9616 }
9617 }
9618
9619
TEST(neon_fmulx_scalar)9620 TEST(neon_fmulx_scalar) {
9621 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9622
9623 START();
9624 __ Fmov(s0, 2.0);
9625 __ Fmov(s1, 0.5);
9626 __ Fmov(s2, 0.0);
9627 __ Fmov(s3, -0.0);
9628 __ Fmov(s4, kFP32PositiveInfinity);
9629 __ Fmov(s5, kFP32NegativeInfinity);
9630 __ Fmulx(s16, s0, s1);
9631 __ Fmulx(s17, s2, s4);
9632 __ Fmulx(s18, s2, s5);
9633 __ Fmulx(s19, s3, s4);
9634 __ Fmulx(s20, s3, s5);
9635
9636 __ Fmov(d21, 2.0);
9637 __ Fmov(d22, 0.5);
9638 __ Fmov(d23, 0.0);
9639 __ Fmov(d24, -0.0);
9640 __ Fmov(d25, kFP64PositiveInfinity);
9641 __ Fmov(d26, kFP64NegativeInfinity);
9642 __ Fmulx(d27, d21, d22);
9643 __ Fmulx(d28, d23, d25);
9644 __ Fmulx(d29, d23, d26);
9645 __ Fmulx(d30, d24, d25);
9646 __ Fmulx(d31, d24, d26);
9647 END();
9648
9649 if (CAN_RUN()) {
9650 RUN();
9651
9652 ASSERT_EQUAL_FP32(1.0, s16);
9653 ASSERT_EQUAL_FP32(2.0, s17);
9654 ASSERT_EQUAL_FP32(-2.0, s18);
9655 ASSERT_EQUAL_FP32(-2.0, s19);
9656 ASSERT_EQUAL_FP32(2.0, s20);
9657 ASSERT_EQUAL_FP64(1.0, d27);
9658 ASSERT_EQUAL_FP64(2.0, d28);
9659 ASSERT_EQUAL_FP64(-2.0, d29);
9660 ASSERT_EQUAL_FP64(-2.0, d30);
9661 ASSERT_EQUAL_FP64(2.0, d31);
9662 }
9663 }
9664
9665
TEST(neon_fmulx_h)9666 TEST(neon_fmulx_h) {
9667 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9668 CPUFeatures::kFP,
9669 CPUFeatures::kNEONHalf);
9670
9671 START();
9672 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9673 __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
9674 __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
9675 __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
9676 __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9677 __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9678 __ Fmulx(v6.V8H(), v0.V8H(), v1.V8H());
9679 __ Fmulx(v7.V8H(), v2.V8H(), v4.V8H());
9680 __ Fmulx(v8.V8H(), v2.V8H(), v5.V8H());
9681 __ Fmulx(v9.V8H(), v3.V8H(), v4.V8H());
9682 __ Fmulx(v10.V8H(), v3.V8H(), v5.V8H());
9683 __ Fmulx(v11.V4H(), v0.V4H(), v1.V4H());
9684 __ Fmulx(v12.V4H(), v2.V4H(), v4.V4H());
9685 __ Fmulx(v13.V4H(), v2.V4H(), v5.V4H());
9686 __ Fmulx(v14.V4H(), v3.V4H(), v4.V4H());
9687 __ Fmulx(v15.V4H(), v3.V4H(), v5.V4H());
9688 END();
9689
9690 if (CAN_RUN()) {
9691 RUN();
9692 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
9693 ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v7);
9694 ASSERT_EQUAL_128(0xc000c000c000c000, 0xc000c000c000c000, v8);
9695 ASSERT_EQUAL_128(0xc000c000c000c000, 0xc000c000c000c000, v9);
9696 ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v10);
9697 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v11);
9698 ASSERT_EQUAL_128(0, 0x4000400040004000, v12);
9699 ASSERT_EQUAL_128(0, 0xc000c000c000c000, v13);
9700 ASSERT_EQUAL_128(0, 0xc000c000c000c000, v14);
9701 ASSERT_EQUAL_128(0, 0x4000400040004000, v15);
9702 }
9703 }
9704
9705
TEST(neon_fmulx_h_scalar)9706 TEST(neon_fmulx_h_scalar) {
9707 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9708 CPUFeatures::kFP,
9709 CPUFeatures::kNEONHalf,
9710 CPUFeatures::kFPHalf);
9711
9712 START();
9713 __ Fmov(h0, Float16(2.0));
9714 __ Fmov(h1, Float16(0.5));
9715 __ Fmov(h2, Float16(0.0));
9716 __ Fmov(h3, Float16(-0.0));
9717 __ Fmov(h4, kFP16PositiveInfinity);
9718 __ Fmov(h5, kFP16NegativeInfinity);
9719 __ Fmulx(h6, h0, h1);
9720 __ Fmulx(h7, h2, h4);
9721 __ Fmulx(h8, h2, h5);
9722 __ Fmulx(h9, h3, h4);
9723 __ Fmulx(h10, h3, h5);
9724 END();
9725
9726 if (CAN_RUN()) {
9727 RUN();
9728 ASSERT_EQUAL_FP16(Float16(1.0), h6);
9729 ASSERT_EQUAL_FP16(Float16(2.0), h7);
9730 ASSERT_EQUAL_FP16(Float16(-2.0), h8);
9731 ASSERT_EQUAL_FP16(Float16(-2.0), h9);
9732 ASSERT_EQUAL_FP16(Float16(2.0), h10);
9733 }
9734 }
9735
TEST(neon_fabd_h)9736 TEST(neon_fabd_h) {
9737 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9738 CPUFeatures::kFP,
9739 CPUFeatures::kNEONHalf);
9740
9741 START();
9742 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9743 __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
9744 __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
9745 __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
9746 __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9747 __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9748
9749 __ Fabd(v6.V8H(), v1.V8H(), v0.V8H());
9750 __ Fabd(v7.V8H(), v2.V8H(), v3.V8H());
9751 __ Fabd(v8.V8H(), v2.V8H(), v5.V8H());
9752 __ Fabd(v9.V8H(), v3.V8H(), v4.V8H());
9753 __ Fabd(v10.V8H(), v3.V8H(), v5.V8H());
9754 __ Fabd(v11.V4H(), v1.V4H(), v0.V4H());
9755 __ Fabd(v12.V4H(), v2.V4H(), v3.V4H());
9756 __ Fabd(v13.V4H(), v2.V4H(), v5.V4H());
9757 __ Fabd(v14.V4H(), v3.V4H(), v4.V4H());
9758 __ Fabd(v15.V4H(), v3.V4H(), v5.V4H());
9759 END();
9760
9761 if (CAN_RUN()) {
9762 RUN();
9763
9764 ASSERT_EQUAL_128(0x3e003e003e003e00, 0x3e003e003e003e00, v6);
9765 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v7);
9766 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9767 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v9);
9768 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v10);
9769 ASSERT_EQUAL_128(0, 0x3e003e003e003e00, v11);
9770 ASSERT_EQUAL_128(0, 0x0000000000000000, v12);
9771 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v13);
9772 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v14);
9773 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v15);
9774 }
9775 }
9776
9777
TEST(neon_fabd_h_scalar)9778 TEST(neon_fabd_h_scalar) {
9779 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9780 CPUFeatures::kFP,
9781 CPUFeatures::kNEONHalf,
9782 CPUFeatures::kFPHalf);
9783
9784 START();
9785 __ Fmov(h0, Float16(2.0));
9786 __ Fmov(h1, Float16(0.5));
9787 __ Fmov(h2, Float16(0.0));
9788 __ Fmov(h3, Float16(-0.0));
9789 __ Fmov(h4, kFP16PositiveInfinity);
9790 __ Fmov(h5, kFP16NegativeInfinity);
9791 __ Fabd(h16, h1, h0);
9792 __ Fabd(h17, h2, h3);
9793 __ Fabd(h18, h2, h5);
9794 __ Fabd(h19, h3, h4);
9795 __ Fabd(h20, h3, h5);
9796 END();
9797
9798 if (CAN_RUN()) {
9799 RUN();
9800 ASSERT_EQUAL_FP16(Float16(1.5), h16);
9801 ASSERT_EQUAL_FP16(Float16(0.0), h17);
9802 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h18);
9803 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h19);
9804 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h20);
9805 }
9806 }
9807
9808
TEST(neon_fabd_scalar)9809 TEST(neon_fabd_scalar) {
9810 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9811
9812 START();
9813 __ Fmov(s0, 2.0);
9814 __ Fmov(s1, 0.5);
9815 __ Fmov(s2, 0.0);
9816 __ Fmov(s3, -0.0);
9817 __ Fmov(s4, kFP32PositiveInfinity);
9818 __ Fmov(s5, kFP32NegativeInfinity);
9819 __ Fabd(s16, s1, s0);
9820 __ Fabd(s17, s2, s3);
9821 __ Fabd(s18, s2, s5);
9822 __ Fabd(s19, s3, s4);
9823 __ Fabd(s20, s3, s5);
9824
9825 __ Fmov(d21, 2.0);
9826 __ Fmov(d22, 0.5);
9827 __ Fmov(d23, 0.0);
9828 __ Fmov(d24, -0.0);
9829 __ Fmov(d25, kFP64PositiveInfinity);
9830 __ Fmov(d26, kFP64NegativeInfinity);
9831 __ Fabd(d27, d21, d22);
9832 __ Fabd(d28, d23, d24);
9833 __ Fabd(d29, d23, d26);
9834 __ Fabd(d30, d24, d25);
9835 __ Fabd(d31, d24, d26);
9836 END();
9837
9838 if (CAN_RUN()) {
9839 RUN();
9840
9841 ASSERT_EQUAL_FP32(1.5, s16);
9842 ASSERT_EQUAL_FP32(0.0, s17);
9843 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s18);
9844 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s19);
9845 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s20);
9846 ASSERT_EQUAL_FP64(1.5, d27);
9847 ASSERT_EQUAL_FP64(0.0, d28);
9848 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d29);
9849 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d30);
9850 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d31);
9851 }
9852 }
9853
9854
TEST(neon_frecps_h)9855 TEST(neon_frecps_h) {
9856 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9857 CPUFeatures::kFP,
9858 CPUFeatures::kNEONHalf);
9859
9860 START();
9861 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9862 __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
9863 __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9864 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9865 __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9866
9867 __ Frecps(v5.V8H(), v0.V8H(), v2.V8H());
9868 __ Frecps(v6.V8H(), v1.V8H(), v2.V8H());
9869 __ Frecps(v7.V8H(), v0.V8H(), v3.V8H());
9870 __ Frecps(v8.V8H(), v0.V8H(), v4.V8H());
9871 __ Frecps(v9.V4H(), v0.V4H(), v2.V4H());
9872 __ Frecps(v10.V4H(), v1.V4H(), v2.V4H());
9873 __ Frecps(v11.V4H(), v0.V4H(), v3.V4H());
9874 __ Frecps(v12.V4H(), v0.V4H(), v4.V4H());
9875 END();
9876
9877 if (CAN_RUN()) {
9878 RUN();
9879
9880 ASSERT_EQUAL_128(0xd580d580d580d580, 0xd580d580d580d580, v5);
9881 ASSERT_EQUAL_128(0x51e051e051e051e0, 0x51e051e051e051e0, v6);
9882 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
9883 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9884 ASSERT_EQUAL_128(0, 0xd580d580d580d580, v9);
9885 ASSERT_EQUAL_128(0, 0x51e051e051e051e0, v10);
9886 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
9887 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v12);
9888 }
9889 }
9890
9891
TEST(neon_frecps_h_scalar)9892 TEST(neon_frecps_h_scalar) {
9893 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9894 CPUFeatures::kFP,
9895 CPUFeatures::kNEONHalf,
9896 CPUFeatures::kFPHalf);
9897
9898 START();
9899 __ Fmov(h0, Float16(2.0));
9900 __ Fmov(h1, Float16(-1.0));
9901 __ Fmov(h2, Float16(45.0));
9902 __ Fmov(h3, kFP16PositiveInfinity);
9903 __ Fmov(h4, kFP16NegativeInfinity);
9904
9905 __ Frecps(h5, h0, h2);
9906 __ Frecps(h6, h1, h2);
9907 __ Frecps(h7, h0, h3);
9908 __ Frecps(h8, h0, h4);
9909 END();
9910
9911 if (CAN_RUN()) {
9912 RUN();
9913
9914 ASSERT_EQUAL_FP16(Float16(-88.0), h5);
9915 ASSERT_EQUAL_FP16(Float16(47.0), h6);
9916 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h7);
9917 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
9918 }
9919 }
9920
9921
TEST(neon_frsqrts_h)9922 TEST(neon_frsqrts_h) {
9923 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9924 CPUFeatures::kFP,
9925 CPUFeatures::kNEONHalf);
9926
9927 START();
9928 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9929 __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
9930 __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9931 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9932 __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9933
9934 __ Frsqrts(v5.V8H(), v0.V8H(), v2.V8H());
9935 __ Frsqrts(v6.V8H(), v1.V8H(), v2.V8H());
9936 __ Frsqrts(v7.V8H(), v0.V8H(), v3.V8H());
9937 __ Frsqrts(v8.V8H(), v0.V8H(), v4.V8H());
9938 __ Frsqrts(v9.V4H(), v0.V4H(), v2.V4H());
9939 __ Frsqrts(v10.V4H(), v1.V4H(), v2.V4H());
9940 __ Frsqrts(v11.V4H(), v0.V4H(), v3.V4H());
9941 __ Frsqrts(v12.V4H(), v0.V4H(), v4.V4H());
9942 END();
9943
9944 if (CAN_RUN()) {
9945 RUN();
9946
9947 ASSERT_EQUAL_128(0xd170d170d170d170, 0xd170d170d170d170, v5);
9948 ASSERT_EQUAL_128(0x4e004e004e004e00, 0x4e004e004e004e00, v6);
9949 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
9950 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9951 ASSERT_EQUAL_128(0, 0xd170d170d170d170, v9);
9952 ASSERT_EQUAL_128(0, 0x4e004e004e004e00, v10);
9953 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
9954 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v12);
9955 }
9956 }
9957
9958
TEST(neon_frsqrts_h_scalar)9959 TEST(neon_frsqrts_h_scalar) {
9960 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9961 CPUFeatures::kFP,
9962 CPUFeatures::kNEONHalf,
9963 CPUFeatures::kFPHalf);
9964
9965 START();
9966 __ Fmov(h0, Float16(2.0));
9967 __ Fmov(h1, Float16(-1.0));
9968 __ Fmov(h2, Float16(45.0));
9969 __ Fmov(h3, kFP16PositiveInfinity);
9970 __ Fmov(h4, kFP16NegativeInfinity);
9971
9972 __ Frsqrts(h5, h0, h2);
9973 __ Frsqrts(h6, h1, h2);
9974 __ Frsqrts(h7, h0, h3);
9975 __ Frsqrts(h8, h0, h4);
9976 END();
9977
9978 if (CAN_RUN()) {
9979 RUN();
9980
9981 ASSERT_EQUAL_FP16(Float16(-43.5), h5);
9982 ASSERT_EQUAL_FP16(Float16(24.0), h6);
9983 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h7);
9984 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
9985 }
9986 }
9987
9988
TEST(neon_faddp_h)9989 TEST(neon_faddp_h) {
9990 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9991 CPUFeatures::kFP,
9992 CPUFeatures::kNEONHalf);
9993
9994 START();
9995 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
9996 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
9997 __ Movi(v2.V2D(), 0x0000800000008000, 0x0000800000008000);
9998 __ Movi(v3.V2D(), 0x7e007c017e007c01, 0x7e007c017e007c01);
9999
10000 __ Faddp(v4.V8H(), v1.V8H(), v0.V8H());
10001 __ Faddp(v5.V8H(), v3.V8H(), v2.V8H());
10002 __ Faddp(v6.V4H(), v1.V4H(), v0.V4H());
10003 __ Faddp(v7.V4H(), v3.V4H(), v2.V4H());
10004 END();
10005
10006 if (CAN_RUN()) {
10007 RUN();
10008
10009 ASSERT_EQUAL_128(0x4200420042004200, 0x7e007e007e007e00, v4);
10010 ASSERT_EQUAL_128(0x0000000000000000, 0x7e017e017e017e01, v5);
10011 ASSERT_EQUAL_128(0, 0x420042007e007e00, v6);
10012 ASSERT_EQUAL_128(0, 0x000000007e017e01, v7);
10013 }
10014 }
10015
10016
TEST(neon_faddp_scalar)10017 TEST(neon_faddp_scalar) {
10018 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10019
10020 START();
10021 __ Movi(d0, 0x3f80000040000000);
10022 __ Movi(d1, 0xff8000007f800000);
10023 __ Movi(d2, 0x0000000080000000);
10024 __ Faddp(s0, v0.V2S());
10025 __ Faddp(s1, v1.V2S());
10026 __ Faddp(s2, v2.V2S());
10027
10028 __ Movi(v3.V2D(), 0xc000000000000000, 0x4000000000000000);
10029 __ Movi(v4.V2D(), 0xfff8000000000000, 0x7ff8000000000000);
10030 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
10031 __ Faddp(d3, v3.V2D());
10032 __ Faddp(d4, v4.V2D());
10033 __ Faddp(d5, v5.V2D());
10034 END();
10035
10036 if (CAN_RUN()) {
10037 RUN();
10038
10039 ASSERT_EQUAL_FP32(3.0, s0);
10040 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s1);
10041 ASSERT_EQUAL_FP32(0.0, s2);
10042 ASSERT_EQUAL_FP64(0.0, d3);
10043 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d4);
10044 ASSERT_EQUAL_FP64(0.0, d5);
10045 }
10046 }
10047
10048
TEST(neon_faddp_h_scalar)10049 TEST(neon_faddp_h_scalar) {
10050 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10051 CPUFeatures::kFP,
10052 CPUFeatures::kNEONHalf);
10053
10054 START();
10055 __ Movi(s0, 0x3c004000);
10056 __ Movi(s1, 0xfc007c00);
10057 __ Movi(s2, 0x00008000);
10058 __ Faddp(h0, v0.V2H());
10059 __ Faddp(h1, v1.V2H());
10060 __ Faddp(h2, v2.V2H());
10061 END();
10062
10063 if (CAN_RUN()) {
10064 RUN();
10065
10066 ASSERT_EQUAL_FP16(Float16(3.0), h0);
10067 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h1);
10068 ASSERT_EQUAL_FP16(Float16(0.0), h2);
10069 }
10070 }
10071
10072
TEST(neon_fmaxp_scalar)10073 TEST(neon_fmaxp_scalar) {
10074 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10075
10076 START();
10077 __ Movi(d0, 0x3f80000040000000);
10078 __ Movi(d1, 0xff8000007f800000);
10079 __ Movi(d2, 0x7fc00000ff800000);
10080 __ Fmaxp(s0, v0.V2S());
10081 __ Fmaxp(s1, v1.V2S());
10082 __ Fmaxp(s2, v2.V2S());
10083
10084 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10085 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10086 __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
10087 __ Fmaxp(d3, v3.V2D());
10088 __ Fmaxp(d4, v4.V2D());
10089 __ Fmaxp(d5, v5.V2D());
10090 END();
10091
10092 if (CAN_RUN()) {
10093 RUN();
10094
10095 ASSERT_EQUAL_FP32(2.0, s0);
10096 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
10097 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
10098 ASSERT_EQUAL_FP64(2.0, d3);
10099 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
10100 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
10101 }
10102 }
10103
10104
TEST(neon_fmaxp_h_scalar)10105 TEST(neon_fmaxp_h_scalar) {
10106 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10107 CPUFeatures::kFP,
10108 CPUFeatures::kNEONHalf);
10109
10110 START();
10111 __ Movi(s0, 0x3c004000);
10112 __ Movi(s1, 0xfc007c00);
10113 __ Movi(s2, 0x7e00fc00);
10114 __ Fmaxp(h0, v0.V2H());
10115 __ Fmaxp(h1, v1.V2H());
10116 __ Fmaxp(h2, v2.V2H());
10117 END();
10118
10119 if (CAN_RUN()) {
10120 RUN();
10121
10122 ASSERT_EQUAL_FP16(Float16(2.0), h0);
10123 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h1);
10124 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h2);
10125 }
10126 }
10127
10128
TEST(neon_fmax_h)10129 TEST(neon_fmax_h) {
10130 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10131 CPUFeatures::kFP,
10132 CPUFeatures::kNEONHalf);
10133
10134 START();
10135 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10136 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10137 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10138 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10139 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10140 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10141
10142 __ Fmax(v6.V8H(), v0.V8H(), v1.V8H());
10143 __ Fmax(v7.V8H(), v2.V8H(), v3.V8H());
10144 __ Fmax(v8.V8H(), v4.V8H(), v0.V8H());
10145 __ Fmax(v9.V8H(), v5.V8H(), v1.V8H());
10146 __ Fmax(v10.V4H(), v0.V4H(), v1.V4H());
10147 __ Fmax(v11.V4H(), v2.V4H(), v3.V4H());
10148 __ Fmax(v12.V4H(), v4.V4H(), v0.V4H());
10149 __ Fmax(v13.V4H(), v5.V4H(), v1.V4H());
10150 END();
10151
10152 if (CAN_RUN()) {
10153 RUN();
10154
10155 ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v6);
10156 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v7);
10157 ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v8);
10158 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10159 ASSERT_EQUAL_128(0, 0x4000400040004000, v10);
10160 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v11);
10161 ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v12);
10162 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10163 }
10164 }
10165
10166
TEST(neon_fmaxp_h)10167 TEST(neon_fmaxp_h) {
10168 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10169 CPUFeatures::kFP,
10170 CPUFeatures::kNEONHalf);
10171
10172 START();
10173 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10174 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10175 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10176 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10177
10178 __ Fmaxp(v6.V8H(), v0.V8H(), v1.V8H());
10179 __ Fmaxp(v7.V8H(), v2.V8H(), v3.V8H());
10180 __ Fmaxp(v8.V4H(), v0.V4H(), v1.V4H());
10181 __ Fmaxp(v9.V4H(), v2.V4H(), v3.V4H());
10182 END();
10183
10184 if (CAN_RUN()) {
10185 RUN();
10186
10187 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x4000400040004000, v6);
10188 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e007e007e007e00, v7);
10189 ASSERT_EQUAL_128(0, 0x7c007c0040004000, v8);
10190 ASSERT_EQUAL_128(0, 0x7e017e017e007e00, v9);
10191 }
10192 }
10193
10194
TEST(neon_fmaxnm_h)10195 TEST(neon_fmaxnm_h) {
10196 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10197 CPUFeatures::kFP,
10198 CPUFeatures::kNEONHalf);
10199
10200 START();
10201 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10202 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10203 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10204 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10205 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10206 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10207
10208 __ Fmaxnm(v6.V8H(), v0.V8H(), v1.V8H());
10209 __ Fmaxnm(v7.V8H(), v2.V8H(), v3.V8H());
10210 __ Fmaxnm(v8.V8H(), v4.V8H(), v0.V8H());
10211 __ Fmaxnm(v9.V8H(), v5.V8H(), v1.V8H());
10212 __ Fmaxnm(v10.V4H(), v0.V4H(), v1.V4H());
10213 __ Fmaxnm(v11.V4H(), v2.V4H(), v3.V4H());
10214 __ Fmaxnm(v12.V4H(), v4.V4H(), v0.V4H());
10215 __ Fmaxnm(v13.V4H(), v5.V4H(), v1.V4H());
10216 END();
10217
10218 if (CAN_RUN()) {
10219 RUN();
10220
10221 ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v6);
10222 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v7);
10223 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v8);
10224 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10225 ASSERT_EQUAL_128(0, 0x4000400040004000, v10);
10226 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v11);
10227 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v12);
10228 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10229 }
10230 }
10231
10232
TEST(neon_fmaxnmp_h)10233 TEST(neon_fmaxnmp_h) {
10234 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10235 CPUFeatures::kFP,
10236 CPUFeatures::kNEONHalf);
10237
10238 START();
10239 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10240 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10241 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10242 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10243
10244 __ Fmaxnmp(v6.V8H(), v0.V8H(), v1.V8H());
10245 __ Fmaxnmp(v7.V8H(), v2.V8H(), v3.V8H());
10246 __ Fmaxnmp(v8.V4H(), v0.V4H(), v1.V4H());
10247 __ Fmaxnmp(v9.V4H(), v2.V4H(), v3.V4H());
10248 END();
10249
10250 if (CAN_RUN()) {
10251 RUN();
10252
10253 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x4000400040004000, v6);
10254 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x3c003c003c003c00, v7);
10255 ASSERT_EQUAL_128(0, 0x7c007c0040004000, v8);
10256 ASSERT_EQUAL_128(0, 0x7e017e013c003c00, v9);
10257 }
10258 }
10259
10260
TEST(neon_fmaxnmp_scalar)10261 TEST(neon_fmaxnmp_scalar) {
10262 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10263
10264 START();
10265 __ Movi(d0, 0x3f80000040000000);
10266 __ Movi(d1, 0xff8000007f800000);
10267 __ Movi(d2, 0x7fc00000ff800000);
10268 __ Fmaxnmp(s0, v0.V2S());
10269 __ Fmaxnmp(s1, v1.V2S());
10270 __ Fmaxnmp(s2, v2.V2S());
10271
10272 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10273 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10274 __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
10275 __ Fmaxnmp(d3, v3.V2D());
10276 __ Fmaxnmp(d4, v4.V2D());
10277 __ Fmaxnmp(d5, v5.V2D());
10278 END();
10279
10280 if (CAN_RUN()) {
10281 RUN();
10282
10283 ASSERT_EQUAL_FP32(2.0, s0);
10284 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
10285 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
10286 ASSERT_EQUAL_FP64(2.0, d3);
10287 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
10288 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
10289 }
10290 }
10291
10292
TEST(neon_fmaxnmp_h_scalar)10293 TEST(neon_fmaxnmp_h_scalar) {
10294 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10295 CPUFeatures::kFP,
10296 CPUFeatures::kNEONHalf);
10297
10298 START();
10299 __ Movi(s0, 0x3c004000);
10300 __ Movi(s1, 0xfc007c00);
10301 __ Movi(s2, 0x7e00fc00);
10302 __ Fmaxnmp(h0, v0.V2H());
10303 __ Fmaxnmp(h1, v1.V2H());
10304 __ Fmaxnmp(h2, v2.V2H());
10305 END();
10306
10307 if (CAN_RUN()) {
10308 RUN();
10309
10310 ASSERT_EQUAL_FP16(Float16(2.0), h0);
10311 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h1);
10312 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h2);
10313 }
10314 }
10315
10316
TEST(neon_fminp_scalar)10317 TEST(neon_fminp_scalar) {
10318 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10319
10320 START();
10321 __ Movi(d0, 0x3f80000040000000);
10322 __ Movi(d1, 0xff8000007f800000);
10323 __ Movi(d2, 0x7fc00000ff800000);
10324 __ Fminp(s0, v0.V2S());
10325 __ Fminp(s1, v1.V2S());
10326 __ Fminp(s2, v2.V2S());
10327
10328 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10329 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10330 __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
10331 __ Fminp(d3, v3.V2D());
10332 __ Fminp(d4, v4.V2D());
10333 __ Fminp(d5, v5.V2D());
10334 END();
10335
10336 if (CAN_RUN()) {
10337 RUN();
10338
10339 ASSERT_EQUAL_FP32(1.0, s0);
10340 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
10341 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
10342 ASSERT_EQUAL_FP64(1.0, d3);
10343 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
10344 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
10345 }
10346 }
10347
10348
TEST(neon_fminp_h_scalar)10349 TEST(neon_fminp_h_scalar) {
10350 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10351 CPUFeatures::kFP,
10352 CPUFeatures::kNEONHalf);
10353
10354 START();
10355 __ Movi(s0, 0x3c004000);
10356 __ Movi(s1, 0xfc007c00);
10357 __ Movi(s2, 0x7e00fc00);
10358 __ Fminp(h0, v0.V2H());
10359 __ Fminp(h1, v1.V2H());
10360 __ Fminp(h2, v2.V2H());
10361 END();
10362
10363 if (CAN_RUN()) {
10364 RUN();
10365
10366 ASSERT_EQUAL_FP16(Float16(1.0), h0);
10367 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h1);
10368 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h2);
10369 }
10370 }
10371
10372
TEST(neon_fmin_h)10373 TEST(neon_fmin_h) {
10374 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10375 CPUFeatures::kFP,
10376 CPUFeatures::kNEONHalf);
10377
10378 START();
10379 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10380 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10381 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10382 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10383 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10384 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10385
10386 __ Fmin(v6.V8H(), v0.V8H(), v1.V8H());
10387 __ Fmin(v7.V8H(), v2.V8H(), v3.V8H());
10388 __ Fmin(v8.V8H(), v4.V8H(), v0.V8H());
10389 __ Fmin(v9.V8H(), v5.V8H(), v1.V8H());
10390 __ Fmin(v10.V4H(), v0.V4H(), v1.V4H());
10391 __ Fmin(v11.V4H(), v2.V4H(), v3.V4H());
10392 __ Fmin(v12.V4H(), v4.V4H(), v0.V4H());
10393 __ Fmin(v13.V4H(), v5.V4H(), v1.V4H());
10394 END();
10395
10396 if (CAN_RUN()) {
10397 RUN();
10398
10399 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
10400 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
10401 ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v8);
10402 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10403 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v10);
10404 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
10405 ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v12);
10406 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10407 }
10408 }
10409
10410
TEST(neon_fminp_h)10411 TEST(neon_fminp_h) {
10412 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10413 CPUFeatures::kFP,
10414 CPUFeatures::kNEONHalf);
10415
10416 START();
10417 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10418 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10419 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10420 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10421
10422 __ Fminp(v6.V8H(), v0.V8H(), v1.V8H());
10423 __ Fminp(v7.V8H(), v2.V8H(), v3.V8H());
10424 __ Fminp(v8.V4H(), v0.V4H(), v1.V4H());
10425 __ Fminp(v9.V4H(), v2.V4H(), v3.V4H());
10426 END();
10427
10428 if (CAN_RUN()) {
10429 RUN();
10430
10431 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0x3c003c003c003c00, v6);
10432 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e007e007e007e00, v7);
10433 ASSERT_EQUAL_128(0, 0xfc00fc003c003c00, v8);
10434 ASSERT_EQUAL_128(0, 0x7e017e017e007e00, v9);
10435 }
10436 }
10437
10438
TEST(neon_fminnm_h)10439 TEST(neon_fminnm_h) {
10440 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10441 CPUFeatures::kFP,
10442 CPUFeatures::kNEONHalf);
10443
10444 START();
10445 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10446 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10447 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10448 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10449 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10450 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10451
10452 __ Fminnm(v6.V8H(), v0.V8H(), v1.V8H());
10453 __ Fminnm(v7.V8H(), v2.V8H(), v3.V8H());
10454 __ Fminnm(v8.V8H(), v4.V8H(), v0.V8H());
10455 __ Fminnm(v9.V8H(), v5.V8H(), v1.V8H());
10456 __ Fminnm(v10.V4H(), v0.V4H(), v1.V4H());
10457 __ Fminnm(v11.V4H(), v2.V4H(), v3.V4H());
10458 __ Fminnm(v12.V4H(), v4.V4H(), v0.V4H());
10459 __ Fminnm(v13.V4H(), v5.V4H(), v1.V4H());
10460 END();
10461
10462 if (CAN_RUN()) {
10463 RUN();
10464
10465 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
10466 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
10467 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v8);
10468 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10469 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v10);
10470 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
10471 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v12);
10472 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10473 }
10474 }
10475
10476
TEST(neon_fminnmp_h)10477 TEST(neon_fminnmp_h) {
10478 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10479 CPUFeatures::kFP,
10480 CPUFeatures::kNEONHalf);
10481
10482 START();
10483 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10484 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10485 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10486 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10487
10488 __ Fminnmp(v6.V8H(), v0.V8H(), v1.V8H());
10489 __ Fminnmp(v7.V8H(), v2.V8H(), v3.V8H());
10490 __ Fminnmp(v8.V4H(), v0.V4H(), v1.V4H());
10491 __ Fminnmp(v9.V4H(), v2.V4H(), v3.V4H());
10492 END();
10493
10494 if (CAN_RUN()) {
10495 RUN();
10496
10497 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0x3c003c003c003c00, v6);
10498 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x3c003c003c003c00, v7);
10499 ASSERT_EQUAL_128(0, 0xfc00fc003c003c00, v8);
10500 ASSERT_EQUAL_128(0, 0x7e017e013c003c00, v9);
10501 }
10502 }
10503
10504
TEST(neon_fminnmp_scalar)10505 TEST(neon_fminnmp_scalar) {
10506 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10507
10508 START();
10509 __ Movi(d0, 0x3f80000040000000);
10510 __ Movi(d1, 0xff8000007f800000);
10511 __ Movi(d2, 0x7fc00000ff800000);
10512 __ Fminnmp(s0, v0.V2S());
10513 __ Fminnmp(s1, v1.V2S());
10514 __ Fminnmp(s2, v2.V2S());
10515
10516 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10517 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10518 __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
10519 __ Fminnmp(d3, v3.V2D());
10520 __ Fminnmp(d4, v4.V2D());
10521 __ Fminnmp(d5, v5.V2D());
10522 END();
10523
10524 if (CAN_RUN()) {
10525 RUN();
10526
10527 ASSERT_EQUAL_FP32(1.0, s0);
10528 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
10529 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
10530 ASSERT_EQUAL_FP64(1.0, d3);
10531 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
10532 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
10533 }
10534 }
10535
10536
TEST(neon_fminnmp_h_scalar)10537 TEST(neon_fminnmp_h_scalar) {
10538 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10539 CPUFeatures::kFP,
10540 CPUFeatures::kNEONHalf);
10541
10542 START();
10543 __ Movi(s0, 0x3c004000);
10544 __ Movi(s1, 0xfc007c00);
10545 __ Movi(s2, 0x7e00fc00);
10546 __ Fminnmp(h0, v0.V2H());
10547 __ Fminnmp(h1, v1.V2H());
10548 __ Fminnmp(h2, v2.V2H());
10549 END();
10550
10551 if (CAN_RUN()) {
10552 RUN();
10553
10554 ASSERT_EQUAL_FP16(Float16(1.0), h0);
10555 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h1);
10556 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h2);
10557 }
10558 }
10559
Float16ToV4H(Float16 f)10560 static uint64_t Float16ToV4H(Float16 f) {
10561 uint64_t bits = static_cast<uint64_t>(Float16ToRawbits(f));
10562 return (bits << 48) | (bits << 32) | (bits << 16) | bits;
10563 }
10564
10565
FminFmaxFloat16Helper(Float16 n, Float16 m, Float16 min, Float16 max, Float16 minnm, Float16 maxnm)10566 static void FminFmaxFloat16Helper(Float16 n,
10567 Float16 m,
10568 Float16 min,
10569 Float16 max,
10570 Float16 minnm,
10571 Float16 maxnm) {
10572 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10573 CPUFeatures::kFP,
10574 CPUFeatures::kNEONHalf,
10575 CPUFeatures::kFPHalf);
10576
10577 START();
10578 __ Fmov(h0, n);
10579 __ Fmov(h1, m);
10580 __ Fmov(v0.V8H(), n);
10581 __ Fmov(v1.V8H(), m);
10582 __ Fmin(h28, h0, h1);
10583 __ Fmin(v2.V4H(), v0.V4H(), v1.V4H());
10584 __ Fmin(v3.V8H(), v0.V8H(), v1.V8H());
10585 __ Fmax(h29, h0, h1);
10586 __ Fmax(v4.V4H(), v0.V4H(), v1.V4H());
10587 __ Fmax(v5.V8H(), v0.V8H(), v1.V8H());
10588 __ Fminnm(h30, h0, h1);
10589 __ Fminnm(v6.V4H(), v0.V4H(), v1.V4H());
10590 __ Fminnm(v7.V8H(), v0.V8H(), v1.V8H());
10591 __ Fmaxnm(h31, h0, h1);
10592 __ Fmaxnm(v8.V4H(), v0.V4H(), v1.V4H());
10593 __ Fmaxnm(v9.V8H(), v0.V8H(), v1.V8H());
10594 END();
10595
10596 uint64_t min_vec = Float16ToV4H(min);
10597 uint64_t max_vec = Float16ToV4H(max);
10598 uint64_t minnm_vec = Float16ToV4H(minnm);
10599 uint64_t maxnm_vec = Float16ToV4H(maxnm);
10600
10601 if (CAN_RUN()) {
10602 RUN();
10603
10604 ASSERT_EQUAL_FP16(min, h28);
10605 ASSERT_EQUAL_FP16(max, h29);
10606 ASSERT_EQUAL_FP16(minnm, h30);
10607 ASSERT_EQUAL_FP16(maxnm, h31);
10608
10609
10610 ASSERT_EQUAL_128(0, min_vec, v2);
10611 ASSERT_EQUAL_128(min_vec, min_vec, v3);
10612 ASSERT_EQUAL_128(0, max_vec, v4);
10613 ASSERT_EQUAL_128(max_vec, max_vec, v5);
10614 ASSERT_EQUAL_128(0, minnm_vec, v6);
10615 ASSERT_EQUAL_128(minnm_vec, minnm_vec, v7);
10616 ASSERT_EQUAL_128(0, maxnm_vec, v8);
10617 ASSERT_EQUAL_128(maxnm_vec, maxnm_vec, v9);
10618 }
10619 }
10620
MinMaxHelper(Float16 n, Float16 m, bool min, Float16 quiet_nan_substitute = Float16(0.0))10621 static Float16 MinMaxHelper(Float16 n,
10622 Float16 m,
10623 bool min,
10624 Float16 quiet_nan_substitute = Float16(0.0)) {
10625 const uint64_t kFP16QuietNaNMask = 0x0200;
10626 uint16_t raw_n = Float16ToRawbits(n);
10627 uint16_t raw_m = Float16ToRawbits(m);
10628
10629 if (IsSignallingNaN(n)) {
10630 // n is signalling NaN.
10631 return RawbitsToFloat16(raw_n | kFP16QuietNaNMask);
10632 } else if (IsSignallingNaN(m)) {
10633 // m is signalling NaN.
10634 return RawbitsToFloat16(raw_m | kFP16QuietNaNMask);
10635 } else if (IsZero(quiet_nan_substitute)) {
10636 if (IsNaN(n)) {
10637 // n is quiet NaN.
10638 return n;
10639 } else if (IsNaN(m)) {
10640 // m is quiet NaN.
10641 return m;
10642 }
10643 } else {
10644 // Substitute n or m if one is quiet, but not both.
10645 if (IsNaN(n) && !IsNaN(m)) {
10646 // n is quiet NaN: replace with substitute.
10647 n = quiet_nan_substitute;
10648 } else if (!IsNaN(n) && IsNaN(m)) {
10649 // m is quiet NaN: replace with substitute.
10650 m = quiet_nan_substitute;
10651 }
10652 }
10653
10654 uint16_t sign_mask = 0x8000;
10655 if (IsZero(n) && IsZero(m) && ((raw_n & sign_mask) != (raw_m & sign_mask))) {
10656 return min ? Float16(-0.0) : Float16(0.0);
10657 }
10658
10659 if (FPToDouble(n, kIgnoreDefaultNaN) < FPToDouble(m, kIgnoreDefaultNaN)) {
10660 return min ? n : m;
10661 }
10662 return min ? m : n;
10663 }
10664
TEST(fmax_fmin_h)10665 TEST(fmax_fmin_h) {
10666 // Use non-standard NaNs to check that the payload bits are preserved.
10667 Float16 snan = RawbitsToFloat16(0x7c12);
10668 Float16 qnan = RawbitsToFloat16(0x7e34);
10669
10670 Float16 snan_processed = RawbitsToFloat16(0x7e12);
10671 Float16 qnan_processed = qnan;
10672
10673 VIXL_ASSERT(IsSignallingNaN(snan));
10674 VIXL_ASSERT(IsQuietNaN(qnan));
10675 VIXL_ASSERT(IsQuietNaN(snan_processed));
10676 VIXL_ASSERT(IsQuietNaN(qnan_processed));
10677
10678 // Bootstrap tests.
10679 FminFmaxFloat16Helper(Float16(0),
10680 Float16(0),
10681 Float16(0),
10682 Float16(0),
10683 Float16(0),
10684 Float16(0));
10685 FminFmaxFloat16Helper(Float16(0),
10686 Float16(1),
10687 Float16(0),
10688 Float16(1),
10689 Float16(0),
10690 Float16(1));
10691 FminFmaxFloat16Helper(kFP16PositiveInfinity,
10692 kFP16NegativeInfinity,
10693 kFP16NegativeInfinity,
10694 kFP16PositiveInfinity,
10695 kFP16NegativeInfinity,
10696 kFP16PositiveInfinity);
10697 FminFmaxFloat16Helper(snan,
10698 Float16(0),
10699 snan_processed,
10700 snan_processed,
10701 snan_processed,
10702 snan_processed);
10703 FminFmaxFloat16Helper(Float16(0),
10704 snan,
10705 snan_processed,
10706 snan_processed,
10707 snan_processed,
10708 snan_processed);
10709 FminFmaxFloat16Helper(qnan,
10710 Float16(0),
10711 qnan_processed,
10712 qnan_processed,
10713 Float16(0),
10714 Float16(0));
10715 FminFmaxFloat16Helper(Float16(0),
10716 qnan,
10717 qnan_processed,
10718 qnan_processed,
10719 Float16(0),
10720 Float16(0));
10721 FminFmaxFloat16Helper(qnan,
10722 snan,
10723 snan_processed,
10724 snan_processed,
10725 snan_processed,
10726 snan_processed);
10727 FminFmaxFloat16Helper(snan,
10728 qnan,
10729 snan_processed,
10730 snan_processed,
10731 snan_processed,
10732 snan_processed);
10733
10734 // Iterate over all combinations of inputs.
10735 Float16 inputs[] = {RawbitsToFloat16(0x7bff),
10736 RawbitsToFloat16(0x0400),
10737 Float16(1.0),
10738 Float16(0.0),
10739 RawbitsToFloat16(0xfbff),
10740 RawbitsToFloat16(0x8400),
10741 Float16(-1.0),
10742 Float16(-0.0),
10743 kFP16PositiveInfinity,
10744 kFP16NegativeInfinity,
10745 kFP16QuietNaN,
10746 kFP16SignallingNaN};
10747
10748 const int count = sizeof(inputs) / sizeof(inputs[0]);
10749
10750 for (int in = 0; in < count; in++) {
10751 Float16 n = inputs[in];
10752 for (int im = 0; im < count; im++) {
10753 Float16 m = inputs[im];
10754 FminFmaxFloat16Helper(n,
10755 m,
10756 MinMaxHelper(n, m, true),
10757 MinMaxHelper(n, m, false),
10758 MinMaxHelper(n, m, true, kFP16PositiveInfinity),
10759 MinMaxHelper(n, m, false, kFP16NegativeInfinity));
10760 }
10761 }
10762 }
10763
TEST(neon_frint_saturating)10764 TEST(neon_frint_saturating) {
10765 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10766 CPUFeatures::kFP,
10767 CPUFeatures::kFrintToFixedSizedInt);
10768
10769 START();
10770
10771 __ Movi(v0.V2D(), 0x3f8000003f8ccccd, 0x3fc000003ff33333);
10772 __ Movi(v1.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
10773 __ Movi(v2.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10774 __ Frint32x(v16.V2S(), v0.V2S());
10775 __ Frint32x(v17.V4S(), v1.V4S());
10776 __ Frint32x(v18.V2D(), v2.V2D());
10777 __ Frint64x(v19.V2S(), v0.V2S());
10778 __ Frint64x(v20.V4S(), v1.V4S());
10779 __ Frint64x(v21.V2D(), v2.V2D());
10780 __ Frint32z(v22.V2S(), v0.V2S());
10781 __ Frint32z(v23.V4S(), v1.V4S());
10782 __ Frint32z(v24.V2D(), v2.V2D());
10783 __ Frint64z(v25.V2S(), v0.V2S());
10784 __ Frint64z(v26.V4S(), v1.V4S());
10785 __ Frint64z(v27.V2D(), v2.V2D());
10786
10787 END();
10788
10789 if (CAN_RUN()) {
10790 RUN();
10791
10792 ASSERT_EQUAL_128(0x0000000000000000, 0x4000000040000000, q16);
10793 ASSERT_EQUAL_128(0x0000000080000000, 0xcf000000cf000000, q17);
10794 ASSERT_EQUAL_128(0xc1e0000000000000, 0xc1e0000000000000, q18);
10795 ASSERT_EQUAL_128(0x0000000000000000, 0x4000000040000000, q19);
10796 ASSERT_EQUAL_128(0x0000000080000000, 0xdf000000df000000, q20);
10797 ASSERT_EQUAL_128(0xc3e0000000000000, 0xc3e0000000000000, q21);
10798 ASSERT_EQUAL_128(0x0000000000000000, 0x3f8000003f800000, q22);
10799 ASSERT_EQUAL_128(0x0000000080000000, 0xcf000000cf000000, q23);
10800 ASSERT_EQUAL_128(0xc1e0000000000000, 0xc1e0000000000000, q24);
10801 ASSERT_EQUAL_128(0x0000000000000000, 0x3f8000003f800000, q25);
10802 ASSERT_EQUAL_128(0x0000000080000000, 0xdf000000df000000, q26);
10803 ASSERT_EQUAL_128(0xc3e0000000000000, 0xc3e0000000000000, q27);
10804 }
10805 }
10806
10807
TEST(neon_tbl)10808 TEST(neon_tbl) {
10809 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
10810
10811 START();
10812 __ Movi(v30.V2D(), 0xbf561e188b1280e9, 0xbd542b8cbd24e8e8);
10813 __ Movi(v31.V2D(), 0xb5e9883d2c88a46d, 0x12276d5b614c915e);
10814 __ Movi(v0.V2D(), 0xc45b7782bc5ecd72, 0x5dd4fe5a4bc6bf5e);
10815 __ Movi(v1.V2D(), 0x1e3254094bd1746a, 0xf099ecf50e861c80);
10816
10817 __ Movi(v4.V2D(), 0xf80c030100031f16, 0x00070504031201ff);
10818 __ Movi(v5.V2D(), 0x1f01001afc14202a, 0x2a081e1b0c02020c);
10819 __ Movi(v6.V2D(), 0x353f1a13022a2360, 0x2c464a00203a0a33);
10820 __ Movi(v7.V2D(), 0x64801a1c054cf30d, 0x793a2c052e213739);
10821
10822 __ Movi(v8.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
10823 __ Movi(v9.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
10824 __ Movi(v10.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
10825 __ Movi(v11.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
10826 __ Movi(v12.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
10827 __ Movi(v13.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
10828 __ Movi(v14.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
10829 __ Movi(v15.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
10830
10831 __ Tbl(v8.V16B(), v1.V16B(), v4.V16B());
10832 __ Tbl(v9.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
10833 __ Tbl(v10.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
10834 __ Tbl(v11.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
10835 __ Tbl(v12.V8B(), v1.V16B(), v4.V8B());
10836 __ Tbl(v13.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
10837 __ Tbl(v14.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
10838 __ Tbl(v15.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
10839
10840 __ Movi(v16.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
10841 __ Movi(v17.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
10842 __ Movi(v18.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
10843 __ Movi(v19.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
10844 __ Movi(v20.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
10845 __ Movi(v21.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
10846 __ Movi(v22.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
10847 __ Movi(v23.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
10848
10849 __ Tbx(v16.V16B(), v1.V16B(), v4.V16B());
10850 __ Tbx(v17.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
10851 __ Tbx(v18.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
10852 __ Tbx(v19.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
10853 __ Tbx(v20.V8B(), v1.V16B(), v4.V8B());
10854 __ Tbx(v21.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
10855 __ Tbx(v22.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
10856 __ Tbx(v23.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
10857 END();
10858
10859 if (CAN_RUN()) {
10860 RUN();
10861
10862 ASSERT_EQUAL_128(0x00090e1c800e0000, 0x80f0ecf50e001c00, v8);
10863 ASSERT_EQUAL_128(0x1ebf5ed100f50000, 0x0072324b82c6c682, v9);
10864 ASSERT_EQUAL_128(0x00005e4b4cd10e00, 0x0900005e80008800, v10);
10865 ASSERT_EQUAL_128(0x0000883d2b00001e, 0x00d1822b5bbff074, v11);
10866 ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e001c00, v12);
10867 ASSERT_EQUAL_128(0x0000000000000000, 0x0072324b82c6c682, v13);
10868 ASSERT_EQUAL_128(0x0000000000000000, 0x0900005e80008800, v14);
10869 ASSERT_EQUAL_128(0x0000000000000000, 0x00d1822b5bbff074, v15);
10870
10871 ASSERT_EQUAL_128(0xb7090e1c800e8f13, 0x80f0ecf50e961c42, v16);
10872 ASSERT_EQUAL_128(0x1ebf5ed1c6f547ec, 0x8e72324b82c6c682, v17);
10873 ASSERT_EQUAL_128(0x9bd25e4b4cd10e8f, 0x0943d05e802688d2, v18);
10874 ASSERT_EQUAL_128(0xc31d883d2b39301e, 0x1ed1822b5bbff074, v19);
10875 ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e011c3b, v20);
10876 ASSERT_EQUAL_128(0x0000000000000000, 0x2072324b82c6c682, v21);
10877 ASSERT_EQUAL_128(0x0000000000000000, 0x0946cd5e80ba8882, v22);
10878 ASSERT_EQUAL_128(0x0000000000000000, 0xe6d1822b5bbff074, v23);
10879 }
10880 }
10881
TEST(neon_usdot)10882 TEST(neon_usdot) {
10883 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10884 CPUFeatures::kDotProduct,
10885 CPUFeatures::kI8MM);
10886
10887 START();
10888 __ Movi(v0.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);
10889 __ Movi(v1.V2D(), 0x7f7f7f7f7f7f7f7f, 0x7f7f7f7f7f7f7f7f);
10890 __ Movi(v2.V2D(), 0x8080808080808080, 0x8080808080808080);
10891 __ Movi(v3.V2D(), 0, 0);
10892 __ Mov(q4, q3);
10893 __ Mov(q5, q3);
10894 __ Mov(q6, q3);
10895 __ Mov(q7, q3);
10896 __ Mov(q8, q3);
10897 __ Mov(q9, q3);
10898 __ Mov(q10, q3);
10899 __ Mov(q11, q3);
10900
10901 // Test Usdot against Udot/Sdot over the range of inputs where they should be
10902 // equal.
10903 __ Usdot(v3.V2S(), v0.V8B(), v1.V8B());
10904 __ Udot(v4.V2S(), v0.V8B(), v1.V8B());
10905 __ Cmeq(v3.V4S(), v3.V4S(), v4.V4S());
10906 __ Usdot(v5.V4S(), v0.V16B(), v1.V16B());
10907 __ Udot(v6.V4S(), v0.V16B(), v1.V16B());
10908 __ Cmeq(v5.V4S(), v5.V4S(), v6.V4S());
10909
10910 __ Usdot(v7.V2S(), v1.V8B(), v2.V8B());
10911 __ Sdot(v8.V2S(), v1.V8B(), v2.V8B());
10912 __ Cmeq(v7.V4S(), v7.V4S(), v8.V4S());
10913 __ Usdot(v9.V4S(), v1.V16B(), v2.V16B());
10914 __ Sdot(v10.V4S(), v1.V16B(), v2.V16B());
10915 __ Cmeq(v9.V4S(), v9.V4S(), v10.V4S());
10916
10917 // Construct values which, when interpreted correctly as signed/unsigned,
10918 // should give a zero result for dot product.
10919 __ Mov(w0, 0x8101ff40); // [-127, 1, -1, 64] as signed bytes.
10920 __ Mov(w1, 0x02fe8002); // [2, 254, 128, 2] as unsigned bytes.
10921 __ Dup(v0.V4S(), w0);
10922 __ Dup(v1.V4S(), w1);
10923 __ Usdot(v11.V4S(), v1.V16B(), v0.V16B());
10924
10925 END();
10926
10927 if (CAN_RUN()) {
10928 RUN();
10929
10930 ASSERT_EQUAL_128(-1, -1, q3);
10931 ASSERT_EQUAL_128(-1, -1, q5);
10932 ASSERT_EQUAL_128(-1, -1, q7);
10933 ASSERT_EQUAL_128(-1, -1, q9);
10934 ASSERT_EQUAL_128(0, 0, q11);
10935 }
10936 }
10937
TEST(neon_usdot_element)10938 TEST(neon_usdot_element) {
10939 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kI8MM);
10940
10941 START();
10942 __ Movi(v0.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
10943 __ Movi(v1.V2D(), 0x4242424242424242, 0x5555aaaaaaaa5555);
10944
10945 // Test element Usdot against vector variant.
10946 __ Dup(v2.V4S(), v1.V4S(), 0);
10947 __ Dup(v3.V4S(), v1.V4S(), 1);
10948 __ Dup(v4.V4S(), v1.V4S(), 3);
10949
10950 __ Mov(q10, q1);
10951 __ Usdot(v10.V2S(), v0.V8B(), v2.V8B());
10952 __ Mov(q11, q1);
10953 __ Usdot(v11.V2S(), v0.V8B(), v1.S4B(), 0);
10954 __ Cmeq(v11.V4S(), v11.V4S(), v10.V4S());
10955
10956 __ Mov(q12, q1);
10957 __ Usdot(v12.V4S(), v0.V16B(), v3.V16B());
10958 __ Mov(q13, q1);
10959 __ Usdot(v13.V4S(), v0.V16B(), v1.S4B(), 1);
10960 __ Cmeq(v13.V4S(), v13.V4S(), v12.V4S());
10961
10962 __ Mov(q14, q1);
10963 __ Usdot(v14.V4S(), v4.V16B(), v0.V16B());
10964 __ Mov(q15, q1);
10965 __ Sudot(v15.V4S(), v0.V16B(), v1.S4B(), 3);
10966 __ Cmeq(v15.V4S(), v15.V4S(), v14.V4S());
10967 END();
10968
10969 if (CAN_RUN()) {
10970 RUN();
10971
10972 ASSERT_EQUAL_128(-1, -1, q11);
10973 ASSERT_EQUAL_128(-1, -1, q13);
10974 ASSERT_EQUAL_128(-1, -1, q15);
10975 }
10976 }
10977
TEST(zero_high_b)10978 TEST(zero_high_b) {
10979 SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON, CPUFeatures::kRDM);
10980 START();
10981
10982 __ Mov(x0, 0x55aa42ffaa42ff55);
10983 __ Mov(x1, 4);
10984 __ Movi(q30.V16B(), 0);
10985
10986 // Iterate over the SISD instructions using different input values on each
10987 // loop.
10988 Label loop;
10989 __ Bind(&loop);
10990
10991 __ Dup(q0.V16B(), w0);
10992 __ Ror(x0, x0, 8);
10993 __ Dup(q1.V16B(), w0);
10994 __ Ror(x0, x0, 8);
10995 __ Dup(q2.V16B(), w0);
10996 __ Ror(x0, x0, 8);
10997
10998 {
10999 ExactAssemblyScope scope(&masm, 81 * kInstructionSize);
11000 __ movi(q9.V16B(), 0x55);
11001 __ dci(0x5e010409); // mov b9, v0.b[0]
11002 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11003
11004 __ movi(q9.V16B(), 0x55);
11005 __ dci(0x5e207809); // sqabs b9, b0
11006 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11007
11008 __ movi(q9.V16B(), 0x55);
11009 __ dci(0x5e200c29); // sqadd b9, b1, b0
11010 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11011
11012 __ movi(q9.V16B(), 0x55);
11013 __ dci(0x7e207809); // sqneg b9, b0
11014 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11015
11016 __ movi(q9.V16B(), 0x55);
11017 __ dci(0x7e008429); // sqrdmlah b9, b1, b0
11018 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11019
11020 __ movi(q9.V16B(), 0x55);
11021 __ dci(0x7e008c29); // sqrdmlsh b9, b1, b0
11022 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11023
11024 __ movi(q9.V16B(), 0x55);
11025 __ dci(0x5e205c29); // sqrshl b9, b1, b0
11026 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11027
11028 __ movi(q9.V16B(), 0x55);
11029 __ dci(0x5f089c09); // sqrshrn b9, h0, #8
11030 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11031
11032 __ movi(q9.V16B(), 0x55);
11033 __ dci(0x7f088c09); // sqrshrun b9, h0, #8
11034 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11035
11036 __ movi(q9.V16B(), 0x55);
11037 __ dci(0x5e204c29); // sqshl b9, b1, b0
11038 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11039
11040 __ movi(q9.V16B(), 0x55);
11041 __ dci(0x5f087409); // sqshl b9, b0, #0
11042 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11043
11044 __ movi(q9.V16B(), 0x55);
11045 __ dci(0x7f086409); // sqshlu b9, b0, #0
11046 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11047
11048 __ movi(q9.V16B(), 0x55);
11049 __ dci(0x5f089409); // sqshrn b9, h0, #8
11050 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11051
11052 __ movi(q9.V16B(), 0x55);
11053 __ dci(0x7f088409); // sqshrun b9, h0, #8
11054 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11055
11056 __ movi(q9.V16B(), 0x55);
11057 __ dci(0x5e202c29); // sqsub b9, b1, b0
11058 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11059
11060 __ movi(q9.V16B(), 0x55);
11061 __ dci(0x5e214809); // sqxtn b9, h0
11062 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11063
11064 __ movi(q9.V16B(), 0x55);
11065 __ dci(0x7e212809); // sqxtun b9, h0
11066 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11067
11068 __ movi(q9.V16B(), 0x55);
11069 __ dci(0x5e203809); // suqadd b9, b0
11070 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11071
11072 __ movi(q9.V16B(), 0x55);
11073 __ dci(0x7e200c29); // uqadd b9, b1, b0
11074 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11075
11076 __ movi(q9.V16B(), 0x55);
11077 __ dci(0x7e205c29); // uqrshl b9, b1, b0
11078 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11079
11080 __ movi(q9.V16B(), 0x55);
11081 __ dci(0x7f089c09); // uqrshrn b9, h0, #8
11082 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11083
11084 __ movi(q9.V16B(), 0x55);
11085 __ dci(0x7e204c29); // uqshl b9, b1, b0
11086 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11087
11088 __ movi(q9.V16B(), 0x55);
11089 __ dci(0x7f087409); // uqshl b9, b0, #0
11090 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11091
11092 __ movi(q9.V16B(), 0x55);
11093 __ dci(0x7f089409); // uqshrn b9, h0, #8
11094 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11095
11096 __ movi(q9.V16B(), 0x55);
11097 __ dci(0x7e202c29); // uqsub b9, b1, b0
11098 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11099
11100 __ movi(q9.V16B(), 0x55);
11101 __ dci(0x7e214809); // uqxtn b9, h0
11102 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11103
11104 __ movi(q9.V16B(), 0x55);
11105 __ dci(0x7e203809); // usqadd b9, b0
11106 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11107 }
11108 __ Sub(x1, x1, 1);
11109 __ Cbnz(x1, &loop);
11110
11111 __ Ins(q30.V16B(), 0, wzr);
11112
11113 END();
11114 if (CAN_RUN()) {
11115 RUN();
11116 ASSERT_EQUAL_128(0, 0, q30);
11117 }
11118 }
11119
TEST(zero_high_h)11120 TEST(zero_high_h) {
11121 SETUP_WITH_FEATURES(CPUFeatures::kSVE,
11122 CPUFeatures::kNEON,
11123 CPUFeatures::kFP,
11124 CPUFeatures::kNEONHalf,
11125 CPUFeatures::kRDM);
11126 START();
11127
11128 __ Mov(x0, 0x55aa42ffaa42ff55);
11129 __ Mov(x1, 4);
11130 __ Movi(q30.V16B(), 0);
11131
11132 // Iterate over the SISD instructions using different input values on each
11133 // loop.
11134 Label loop;
11135 __ Bind(&loop);
11136
11137 __ Dup(q0.V8H(), w0);
11138 __ Ror(x0, x0, 8);
11139 __ Dup(q1.V8H(), w0);
11140 __ Ror(x0, x0, 8);
11141 __ Dup(q2.V8H(), w0);
11142 __ Ror(x0, x0, 8);
11143
11144 {
11145 ExactAssemblyScope scope(&masm, 225 * kInstructionSize);
11146 __ movi(q9.V16B(), 0x55);
11147 __ dci(0x5e020409); // mov h9, v0.h[0]
11148 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11149
11150 __ movi(q9.V16B(), 0x55);
11151 __ dci(0x7ec01429); // fabd h9, h1, h0
11152 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11153
11154 __ movi(q9.V16B(), 0x55);
11155 __ dci(0x7e402c29); // facge h9, h1, h0
11156 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11157
11158 __ movi(q9.V16B(), 0x55);
11159 __ dci(0x7ec02c29); // facgt h9, h1, h0
11160 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11161
11162 __ movi(q9.V16B(), 0x55);
11163 __ dci(0x5e30d809); // faddp h9, v0.2h
11164 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11165
11166 __ movi(q9.V16B(), 0x55);
11167 __ dci(0x5ef8d809); // fcmeq h9, h0, #0.0
11168 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11169
11170 __ movi(q9.V16B(), 0x55);
11171 __ dci(0x5e402429); // fcmeq h9, h1, h0
11172 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11173
11174 __ movi(q9.V16B(), 0x55);
11175 __ dci(0x7ef8c809); // fcmge h9, h0, #0.0
11176 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11177
11178 __ movi(q9.V16B(), 0x55);
11179 __ dci(0x7e402429); // fcmge h9, h1, h0
11180 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11181
11182 __ movi(q9.V16B(), 0x55);
11183 __ dci(0x5ef8c809); // fcmgt h9, h0, #0.0
11184 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11185
11186 __ movi(q9.V16B(), 0x55);
11187 __ dci(0x7ec02429); // fcmgt h9, h1, h0
11188 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11189
11190 __ movi(q9.V16B(), 0x55);
11191 __ dci(0x7ef8d809); // fcmle h9, h0, #0.0
11192 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11193
11194 __ movi(q9.V16B(), 0x55);
11195 __ dci(0x5ef8e809); // fcmlt h9, h0, #0.0
11196 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11197
11198 __ movi(q9.V16B(), 0x55);
11199 __ dci(0x5e79c809); // fcvtas h9, h0
11200 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11201
11202 __ movi(q9.V16B(), 0x55);
11203 __ dci(0x7e79c809); // fcvtau h9, h0
11204 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11205
11206 __ movi(q9.V16B(), 0x55);
11207 __ dci(0x5e79b809); // fcvtms h9, h0
11208 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11209
11210 __ movi(q9.V16B(), 0x55);
11211 __ dci(0x7e79b809); // fcvtmu h9, h0
11212 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11213
11214 __ movi(q9.V16B(), 0x55);
11215 __ dci(0x5e79a809); // fcvtns h9, h0
11216 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11217
11218 __ movi(q9.V16B(), 0x55);
11219 __ dci(0x7e79a809); // fcvtnu h9, h0
11220 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11221
11222 __ movi(q9.V16B(), 0x55);
11223 __ dci(0x5ef9a809); // fcvtps h9, h0
11224 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11225
11226 __ movi(q9.V16B(), 0x55);
11227 __ dci(0x7ef9a809); // fcvtpu h9, h0
11228 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11229
11230 __ movi(q9.V16B(), 0x55);
11231 __ dci(0x5ef9b809); // fcvtzs h9, h0
11232 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11233
11234 __ movi(q9.V16B(), 0x55);
11235 __ dci(0x5f10fc09); // fcvtzs h9, h0, #16
11236 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11237
11238 __ movi(q9.V16B(), 0x55);
11239 __ dci(0x7ef9b809); // fcvtzu h9, h0
11240 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11241
11242 __ movi(q9.V16B(), 0x55);
11243 __ dci(0x7f10fc09); // fcvtzu h9, h0, #16
11244 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11245
11246 __ movi(q9.V16B(), 0x55);
11247 __ dci(0x5e30c809); // fmaxnmp h9, v0.2h
11248 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11249
11250 __ movi(q9.V16B(), 0x55);
11251 __ dci(0x5e30f809); // fmaxp h9, v0.2h
11252 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11253
11254 __ movi(q9.V16B(), 0x55);
11255 __ dci(0x5eb0c809); // fminnmp h9, v0.2h
11256 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11257
11258 __ movi(q9.V16B(), 0x55);
11259 __ dci(0x5eb0f809); // fminp h9, v0.2h
11260 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11261
11262 __ movi(q9.V16B(), 0x55);
11263 __ dci(0x5f001029); // fmla h9, h1, v0.h[0]
11264 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11265
11266 __ movi(q9.V16B(), 0x55);
11267 __ dci(0x5f005029); // fmls h9, h1, v0.h[0]
11268 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11269
11270 __ movi(q9.V16B(), 0x55);
11271 __ dci(0x5f009029); // fmul h9, h1, v0.h[0]
11272 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11273
11274 __ movi(q9.V16B(), 0x55);
11275 __ dci(0x7f009029); // fmulx h9, h1, v0.h[0]
11276 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11277
11278 __ movi(q9.V16B(), 0x55);
11279 __ dci(0x5e401c29); // fmulx h9, h1, h0
11280 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11281
11282 __ movi(q9.V16B(), 0x55);
11283 __ dci(0x5ef9d809); // frecpe h9, h0
11284 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11285
11286 __ movi(q9.V16B(), 0x55);
11287 __ dci(0x5e403c29); // frecps h9, h1, h0
11288 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11289
11290 __ movi(q9.V16B(), 0x55);
11291 __ dci(0x5ef9f809); // frecpx h9, h0
11292 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11293
11294 __ movi(q9.V16B(), 0x55);
11295 __ dci(0x7ef9d809); // frsqrte h9, h0
11296 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11297
11298 __ movi(q9.V16B(), 0x55);
11299 __ dci(0x5ec03c29); // frsqrts h9, h1, h0
11300 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11301
11302 __ movi(q9.V16B(), 0x55);
11303 __ dci(0x5e79d809); // scvtf h9, h0
11304 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11305
11306 __ movi(q9.V16B(), 0x55);
11307 __ dci(0x5f10e409); // scvtf h9, h0, #16
11308 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11309
11310 __ movi(q9.V16B(), 0x55);
11311 __ dci(0x5e607809); // sqabs h9, h0
11312 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11313
11314 __ movi(q9.V16B(), 0x55);
11315 __ dci(0x5e600c29); // sqadd h9, h1, h0
11316 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11317
11318 __ movi(q9.V16B(), 0x55);
11319 __ dci(0x5f40c029); // sqdmulh h9, h1, v0.h[0]
11320 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11321
11322 __ movi(q9.V16B(), 0x55);
11323 __ dci(0x5e60b429); // sqdmulh h9, h1, h0
11324 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11325
11326 __ movi(q9.V16B(), 0x55);
11327 __ dci(0x7e607809); // sqneg h9, h0
11328 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11329
11330 __ movi(q9.V16B(), 0x55);
11331 __ dci(0x7f40d029); // sqrdmlah h9, h1, v0.h[0]
11332 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11333
11334 __ movi(q9.V16B(), 0x55);
11335 __ dci(0x7e408429); // sqrdmlah h9, h1, h0
11336 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11337
11338 __ movi(q9.V16B(), 0x55);
11339 __ dci(0x7f40f029); // sqrdmlsh h9, h1, v0.h[0]
11340 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11341
11342 __ movi(q9.V16B(), 0x55);
11343 __ dci(0x7e408c29); // sqrdmlsh h9, h1, h0
11344 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11345
11346 __ movi(q9.V16B(), 0x55);
11347 __ dci(0x5f40d029); // sqrdmulh h9, h1, v0.h[0]
11348 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11349
11350 __ movi(q9.V16B(), 0x55);
11351 __ dci(0x7e60b429); // sqrdmulh h9, h1, h0
11352 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11353
11354 __ movi(q9.V16B(), 0x55);
11355 __ dci(0x5e605c29); // sqrshl h9, h1, h0
11356 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11357
11358 __ movi(q9.V16B(), 0x55);
11359 __ dci(0x5f109c09); // sqrshrn h9, s0, #16
11360 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11361
11362 __ movi(q9.V16B(), 0x55);
11363 __ dci(0x7f108c09); // sqrshrun h9, s0, #16
11364 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11365
11366 __ movi(q9.V16B(), 0x55);
11367 __ dci(0x5e604c29); // sqshl h9, h1, h0
11368 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11369
11370 __ movi(q9.V16B(), 0x55);
11371 __ dci(0x5f107409); // sqshl h9, h0, #0
11372 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11373
11374 __ movi(q9.V16B(), 0x55);
11375 __ dci(0x7f106409); // sqshlu h9, h0, #0
11376 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11377
11378 __ movi(q9.V16B(), 0x55);
11379 __ dci(0x5f109409); // sqshrn h9, s0, #16
11380 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11381
11382 __ movi(q9.V16B(), 0x55);
11383 __ dci(0x7f108409); // sqshrun h9, s0, #16
11384 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11385
11386 __ movi(q9.V16B(), 0x55);
11387 __ dci(0x5e602c29); // sqsub h9, h1, h0
11388 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11389
11390 __ movi(q9.V16B(), 0x55);
11391 __ dci(0x5e614809); // sqxtn h9, s0
11392 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11393
11394 __ movi(q9.V16B(), 0x55);
11395 __ dci(0x7e612809); // sqxtun h9, s0
11396 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11397
11398 __ movi(q9.V16B(), 0x55);
11399 __ dci(0x5e603809); // suqadd h9, h0
11400 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11401
11402 __ movi(q9.V16B(), 0x55);
11403 __ dci(0x7e79d809); // ucvtf h9, h0
11404 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11405
11406 __ movi(q9.V16B(), 0x55);
11407 __ dci(0x7f10e409); // ucvtf h9, h0, #16
11408 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11409
11410 __ movi(q9.V16B(), 0x55);
11411 __ dci(0x7e600c29); // uqadd h9, h1, h0
11412 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11413
11414 __ movi(q9.V16B(), 0x55);
11415 __ dci(0x7e605c29); // uqrshl h9, h1, h0
11416 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11417
11418 __ movi(q9.V16B(), 0x55);
11419 __ dci(0x7f109c09); // uqrshrn h9, s0, #16
11420 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11421
11422 __ movi(q9.V16B(), 0x55);
11423 __ dci(0x7e604c29); // uqshl h9, h1, h0
11424 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11425
11426 __ movi(q9.V16B(), 0x55);
11427 __ dci(0x7f107409); // uqshl h9, h0, #0
11428 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11429
11430 __ movi(q9.V16B(), 0x55);
11431 __ dci(0x7f109409); // uqshrn h9, s0, #16
11432 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11433
11434 __ movi(q9.V16B(), 0x55);
11435 __ dci(0x7e602c29); // uqsub h9, h1, h0
11436 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11437
11438 __ movi(q9.V16B(), 0x55);
11439 __ dci(0x7e614809); // uqxtn h9, s0
11440 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11441
11442 __ movi(q9.V16B(), 0x55);
11443 __ dci(0x7e603809); // usqadd h9, h0
11444 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11445 }
11446 __ Sub(x1, x1, 1);
11447 __ Cbnz(x1, &loop);
11448
11449 __ Ins(q30.V8H(), 0, wzr);
11450
11451 END();
11452 if (CAN_RUN()) {
11453 RUN();
11454 ASSERT_EQUAL_128(0, 0, q30);
11455 }
11456 }
11457
TEST(zero_high_s)11458 TEST(zero_high_s) {
11459 SETUP_WITH_FEATURES(CPUFeatures::kSVE,
11460 CPUFeatures::kNEON,
11461 CPUFeatures::kFP,
11462 CPUFeatures::kRDM);
11463 START();
11464
11465 __ Mov(x0, 0x55aa42ffaa42ff55);
11466 __ Mov(x1, 4);
11467 __ Movi(q30.V16B(), 0);
11468
11469 // Iterate over the SISD instructions using different input values on each
11470 // loop.
11471 Label loop;
11472 __ Bind(&loop);
11473
11474 __ Dup(q0.V4S(), w0);
11475 __ Ror(x0, x0, 8);
11476 __ Dup(q1.V4S(), w0);
11477 __ Ror(x0, x0, 8);
11478 __ Dup(q2.V4S(), w0);
11479 __ Ror(x0, x0, 8);
11480
11481 {
11482 ExactAssemblyScope scope(&masm, 246 * kInstructionSize);
11483 __ movi(q9.V16B(), 0x55);
11484 __ dci(0x5e040409); // mov s9, v0.s[0]
11485 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11486
11487 __ movi(q9.V16B(), 0x55);
11488 __ dci(0x7ea0d429); // fabd s9, s1, s0
11489 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11490
11491 __ movi(q9.V16B(), 0x55);
11492 __ dci(0x7e20ec29); // facge s9, s1, s0
11493 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11494
11495 __ movi(q9.V16B(), 0x55);
11496 __ dci(0x7ea0ec29); // facgt s9, s1, s0
11497 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11498
11499 __ movi(q9.V16B(), 0x55);
11500 __ dci(0x7e30d809); // faddp s9, v0.2s
11501 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11502
11503 __ movi(q9.V16B(), 0x55);
11504 __ dci(0x5ea0d809); // fcmeq s9, s0, #0.0
11505 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11506
11507 __ movi(q9.V16B(), 0x55);
11508 __ dci(0x5e20e429); // fcmeq s9, s1, s0
11509 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11510
11511 __ movi(q9.V16B(), 0x55);
11512 __ dci(0x7ea0c809); // fcmge s9, s0, #0.0
11513 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11514
11515 __ movi(q9.V16B(), 0x55);
11516 __ dci(0x7e20e429); // fcmge s9, s1, s0
11517 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11518
11519 __ movi(q9.V16B(), 0x55);
11520 __ dci(0x5ea0c809); // fcmgt s9, s0, #0.0
11521 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11522
11523 __ movi(q9.V16B(), 0x55);
11524 __ dci(0x7ea0e429); // fcmgt s9, s1, s0
11525 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11526
11527 __ movi(q9.V16B(), 0x55);
11528 __ dci(0x7ea0d809); // fcmle s9, s0, #0.0
11529 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11530
11531 __ movi(q9.V16B(), 0x55);
11532 __ dci(0x5ea0e809); // fcmlt s9, s0, #0.0
11533 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11534
11535 __ movi(q9.V16B(), 0x55);
11536 __ dci(0x5e21c809); // fcvtas s9, s0
11537 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11538
11539 __ movi(q9.V16B(), 0x55);
11540 __ dci(0x7e21c809); // fcvtau s9, s0
11541 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11542
11543 __ movi(q9.V16B(), 0x55);
11544 __ dci(0x5e21b809); // fcvtms s9, s0
11545 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11546
11547 __ movi(q9.V16B(), 0x55);
11548 __ dci(0x7e21b809); // fcvtmu s9, s0
11549 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11550
11551 __ movi(q9.V16B(), 0x55);
11552 __ dci(0x5e21a809); // fcvtns s9, s0
11553 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11554
11555 __ movi(q9.V16B(), 0x55);
11556 __ dci(0x7e21a809); // fcvtnu s9, s0
11557 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11558
11559 __ movi(q9.V16B(), 0x55);
11560 __ dci(0x5ea1a809); // fcvtps s9, s0
11561 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11562
11563 __ movi(q9.V16B(), 0x55);
11564 __ dci(0x7ea1a809); // fcvtpu s9, s0
11565 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11566
11567 __ movi(q9.V16B(), 0x55);
11568 __ dci(0x7e616809); // fcvtxn s9, d0
11569 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11570
11571 __ movi(q9.V16B(), 0x55);
11572 __ dci(0x5ea1b809); // fcvtzs s9, s0
11573 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11574
11575 __ movi(q9.V16B(), 0x55);
11576 __ dci(0x5f20fc09); // fcvtzs s9, s0, #32
11577 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11578
11579 __ movi(q9.V16B(), 0x55);
11580 __ dci(0x7ea1b809); // fcvtzu s9, s0
11581 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11582
11583 __ movi(q9.V16B(), 0x55);
11584 __ dci(0x7f20fc09); // fcvtzu s9, s0, #32
11585 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11586
11587 __ movi(q9.V16B(), 0x55);
11588 __ dci(0x7e30c809); // fmaxnmp s9, v0.2s
11589 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11590
11591 __ movi(q9.V16B(), 0x55);
11592 __ dci(0x7e30f809); // fmaxp s9, v0.2s
11593 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11594
11595 __ movi(q9.V16B(), 0x55);
11596 __ dci(0x7eb0c809); // fminnmp s9, v0.2s
11597 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11598
11599 __ movi(q9.V16B(), 0x55);
11600 __ dci(0x7eb0f809); // fminp s9, v0.2s
11601 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11602
11603 __ movi(q9.V16B(), 0x55);
11604 __ dci(0x5f801029); // fmla s9, s1, v0.s[0]
11605 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11606
11607 __ movi(q9.V16B(), 0x55);
11608 __ dci(0x5f805029); // fmls s9, s1, v0.s[0]
11609 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11610
11611 __ movi(q9.V16B(), 0x55);
11612 __ dci(0x5f809029); // fmul s9, s1, v0.s[0]
11613 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11614
11615 __ movi(q9.V16B(), 0x55);
11616 __ dci(0x7f809029); // fmulx s9, s1, v0.s[0]
11617 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11618
11619 __ movi(q9.V16B(), 0x55);
11620 __ dci(0x5e20dc29); // fmulx s9, s1, s0
11621 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11622
11623 __ movi(q9.V16B(), 0x55);
11624 __ dci(0x5ea1d809); // frecpe s9, s0
11625 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11626
11627 __ movi(q9.V16B(), 0x55);
11628 __ dci(0x5e20fc29); // frecps s9, s1, s0
11629 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11630
11631 __ movi(q9.V16B(), 0x55);
11632 __ dci(0x5ea1f809); // frecpx s9, s0
11633 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11634
11635 __ movi(q9.V16B(), 0x55);
11636 __ dci(0x7ea1d809); // frsqrte s9, s0
11637 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11638
11639 __ movi(q9.V16B(), 0x55);
11640 __ dci(0x5ea0fc29); // frsqrts s9, s1, s0
11641 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11642
11643 __ movi(q9.V16B(), 0x55);
11644 __ dci(0x5e21d809); // scvtf s9, s0
11645 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11646
11647 __ movi(q9.V16B(), 0x55);
11648 __ dci(0x5f20e409); // scvtf s9, s0, #32
11649 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11650
11651 __ movi(q9.V16B(), 0x55);
11652 __ dci(0x5ea07809); // sqabs s9, s0
11653 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11654
11655 __ movi(q9.V16B(), 0x55);
11656 __ dci(0x5ea00c29); // sqadd s9, s1, s0
11657 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11658
11659 __ movi(q9.V16B(), 0x55);
11660 __ dci(0x5e609029); // sqdmlal s9, h1, h0
11661 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11662
11663 __ movi(q9.V16B(), 0x55);
11664 __ dci(0x5f403029); // sqdmlal s9, h1, v0.h[0]
11665 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11666
11667 __ movi(q9.V16B(), 0x55);
11668 __ dci(0x5e60b029); // sqdmlsl s9, h1, h0
11669 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11670
11671 __ movi(q9.V16B(), 0x55);
11672 __ dci(0x5f407029); // sqdmlsl s9, h1, v0.h[0]
11673 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11674
11675 __ movi(q9.V16B(), 0x55);
11676 __ dci(0x5f80c029); // sqdmulh s9, s1, v0.s[0]
11677 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11678
11679 __ movi(q9.V16B(), 0x55);
11680 __ dci(0x5ea0b429); // sqdmulh s9, s1, s0
11681 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11682
11683 __ movi(q9.V16B(), 0x55);
11684 __ dci(0x5e60d029); // sqdmull s9, h1, h0
11685 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11686
11687 __ movi(q9.V16B(), 0x55);
11688 __ dci(0x5f40b029); // sqdmull s9, h1, v0.h[0]
11689 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11690
11691 __ movi(q9.V16B(), 0x55);
11692 __ dci(0x7ea07809); // sqneg s9, s0
11693 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11694
11695 __ movi(q9.V16B(), 0x55);
11696 __ dci(0x7f80d029); // sqrdmlah s9, s1, v0.s[0]
11697 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11698
11699 __ movi(q9.V16B(), 0x55);
11700 __ dci(0x7e808429); // sqrdmlah s9, s1, s0
11701 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11702
11703 __ movi(q9.V16B(), 0x55);
11704 __ dci(0x7f80f029); // sqrdmlsh s9, s1, v0.s[0]
11705 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11706
11707 __ movi(q9.V16B(), 0x55);
11708 __ dci(0x7e808c29); // sqrdmlsh s9, s1, s0
11709 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11710
11711 __ movi(q9.V16B(), 0x55);
11712 __ dci(0x5f80d029); // sqrdmulh s9, s1, v0.s[0]
11713 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11714
11715 __ movi(q9.V16B(), 0x55);
11716 __ dci(0x7ea0b429); // sqrdmulh s9, s1, s0
11717 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11718
11719 __ movi(q9.V16B(), 0x55);
11720 __ dci(0x5ea05c29); // sqrshl s9, s1, s0
11721 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11722
11723 __ movi(q9.V16B(), 0x55);
11724 __ dci(0x5f209c09); // sqrshrn s9, d0, #32
11725 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11726
11727 __ movi(q9.V16B(), 0x55);
11728 __ dci(0x7f208c09); // sqrshrun s9, d0, #32
11729 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11730
11731 __ movi(q9.V16B(), 0x55);
11732 __ dci(0x5ea04c29); // sqshl s9, s1, s0
11733 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11734
11735 __ movi(q9.V16B(), 0x55);
11736 __ dci(0x5f207409); // sqshl s9, s0, #0
11737 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11738
11739 __ movi(q9.V16B(), 0x55);
11740 __ dci(0x7f206409); // sqshlu s9, s0, #0
11741 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11742
11743 __ movi(q9.V16B(), 0x55);
11744 __ dci(0x5f209409); // sqshrn s9, d0, #32
11745 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11746
11747 __ movi(q9.V16B(), 0x55);
11748 __ dci(0x7f208409); // sqshrun s9, d0, #32
11749 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11750
11751 __ movi(q9.V16B(), 0x55);
11752 __ dci(0x5ea02c29); // sqsub s9, s1, s0
11753 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11754
11755 __ movi(q9.V16B(), 0x55);
11756 __ dci(0x5ea14809); // sqxtn s9, d0
11757 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11758
11759 __ movi(q9.V16B(), 0x55);
11760 __ dci(0x7ea12809); // sqxtun s9, d0
11761 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11762
11763 __ movi(q9.V16B(), 0x55);
11764 __ dci(0x5ea03809); // suqadd s9, s0
11765 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11766
11767 __ movi(q9.V16B(), 0x55);
11768 __ dci(0x7e21d809); // ucvtf s9, s0
11769 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11770
11771 __ movi(q9.V16B(), 0x55);
11772 __ dci(0x7f20e409); // ucvtf s9, s0, #32
11773 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11774
11775 __ movi(q9.V16B(), 0x55);
11776 __ dci(0x7ea00c29); // uqadd s9, s1, s0
11777 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11778
11779 __ movi(q9.V16B(), 0x55);
11780 __ dci(0x7ea05c29); // uqrshl s9, s1, s0
11781 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11782
11783 __ movi(q9.V16B(), 0x55);
11784 __ dci(0x7f209c09); // uqrshrn s9, d0, #32
11785 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11786
11787 __ movi(q9.V16B(), 0x55);
11788 __ dci(0x7ea04c29); // uqshl s9, s1, s0
11789 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11790
11791 __ movi(q9.V16B(), 0x55);
11792 __ dci(0x7f207409); // uqshl s9, s0, #0
11793 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11794
11795 __ movi(q9.V16B(), 0x55);
11796 __ dci(0x7f209409); // uqshrn s9, d0, #32
11797 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11798
11799 __ movi(q9.V16B(), 0x55);
11800 __ dci(0x7ea02c29); // uqsub s9, s1, s0
11801 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11802
11803 __ movi(q9.V16B(), 0x55);
11804 __ dci(0x7ea14809); // uqxtn s9, d0
11805 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11806
11807 __ movi(q9.V16B(), 0x55);
11808 __ dci(0x7ea03809); // usqadd s9, s0
11809 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11810 }
11811 __ Sub(x1, x1, 1);
11812 __ Cbnz(x1, &loop);
11813
11814 __ Ins(q30.V4S(), 0, wzr);
11815
11816 END();
11817 if (CAN_RUN()) {
11818 RUN();
11819 ASSERT_EQUAL_128(0, 0, q30);
11820 }
11821 }
11822
TEST(zero_high_d)11823 TEST(zero_high_d) {
11824 SETUP_WITH_FEATURES(CPUFeatures::kSVE,
11825 CPUFeatures::kNEON,
11826 CPUFeatures::kFP,
11827 CPUFeatures::kRDM);
11828 START();
11829
11830 __ Mov(x0, 0x55aa42ffaa42ff55);
11831 __ Mov(x1, 4);
11832 __ Movi(q30.V16B(), 0);
11833
11834 // Iterate over the SISD instructions using different input values on each
11835 // loop.
11836 Label loop;
11837 __ Bind(&loop);
11838
11839 __ Dup(q0.V2D(), x0);
11840 __ Ror(x0, x0, 8);
11841 __ Dup(q1.V2D(), x0);
11842 __ Ror(x0, x0, 8);
11843 __ Dup(q2.V2D(), x0);
11844 __ Ror(x0, x0, 8);
11845
11846 {
11847 ExactAssemblyScope scope(&masm, 291 * kInstructionSize);
11848 __ movi(q9.V16B(), 0x55);
11849 __ dci(0x5ee0b809); // abs d9, d0
11850 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11851
11852 __ movi(q9.V16B(), 0x55);
11853 __ dci(0x5ee08429); // add d9, d1, d0
11854 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11855
11856 __ movi(q9.V16B(), 0x55);
11857 __ dci(0x5ef1b809); // addp d9, v0.2d
11858 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11859
11860 __ movi(q9.V16B(), 0x55);
11861 __ dci(0x5ee09809); // cmeq d9, d0, #0
11862 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11863
11864 __ movi(q9.V16B(), 0x55);
11865 __ dci(0x7ee08c29); // cmeq d9, d1, d0
11866 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11867
11868 __ movi(q9.V16B(), 0x55);
11869 __ dci(0x7ee08809); // cmge d9, d0, #0
11870 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11871
11872 __ movi(q9.V16B(), 0x55);
11873 __ dci(0x5ee03c29); // cmge d9, d1, d0
11874 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11875
11876 __ movi(q9.V16B(), 0x55);
11877 __ dci(0x5ee08809); // cmgt d9, d0, #0
11878 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11879
11880 __ movi(q9.V16B(), 0x55);
11881 __ dci(0x5ee03429); // cmgt d9, d1, d0
11882 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11883
11884 __ movi(q9.V16B(), 0x55);
11885 __ dci(0x7ee03429); // cmhi d9, d1, d0
11886 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11887
11888 __ movi(q9.V16B(), 0x55);
11889 __ dci(0x7ee03c29); // cmhs d9, d1, d0
11890 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11891
11892 __ movi(q9.V16B(), 0x55);
11893 __ dci(0x7ee09809); // cmle d9, d0, #0
11894 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11895
11896 __ movi(q9.V16B(), 0x55);
11897 __ dci(0x5ee0a809); // cmlt d9, d0, #0
11898 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11899
11900 __ movi(q9.V16B(), 0x55);
11901 __ dci(0x5ee08c29); // cmtst d9, d1, d0
11902 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11903
11904 __ movi(q9.V16B(), 0x55);
11905 __ dci(0x5e080409); // mov d9, v0.d[0]
11906 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11907
11908 __ movi(q9.V16B(), 0x55);
11909 __ dci(0x7ee0d429); // fabd d9, d1, d0
11910 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11911
11912 __ movi(q9.V16B(), 0x55);
11913 __ dci(0x7e60ec29); // facge d9, d1, d0
11914 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11915
11916 __ movi(q9.V16B(), 0x55);
11917 __ dci(0x7ee0ec29); // facgt d9, d1, d0
11918 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11919
11920 __ movi(q9.V16B(), 0x55);
11921 __ dci(0x7e70d809); // faddp d9, v0.2d
11922 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11923
11924 __ movi(q9.V16B(), 0x55);
11925 __ dci(0x5ee0d809); // fcmeq d9, d0, #0.0
11926 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11927
11928 __ movi(q9.V16B(), 0x55);
11929 __ dci(0x5e60e429); // fcmeq d9, d1, d0
11930 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11931
11932 __ movi(q9.V16B(), 0x55);
11933 __ dci(0x7ee0c809); // fcmge d9, d0, #0.0
11934 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11935
11936 __ movi(q9.V16B(), 0x55);
11937 __ dci(0x7e60e429); // fcmge d9, d1, d0
11938 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11939
11940 __ movi(q9.V16B(), 0x55);
11941 __ dci(0x5ee0c809); // fcmgt d9, d0, #0.0
11942 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11943
11944 __ movi(q9.V16B(), 0x55);
11945 __ dci(0x7ee0e429); // fcmgt d9, d1, d0
11946 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11947
11948 __ movi(q9.V16B(), 0x55);
11949 __ dci(0x7ee0d809); // fcmle d9, d0, #0.0
11950 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11951
11952 __ movi(q9.V16B(), 0x55);
11953 __ dci(0x5ee0e809); // fcmlt d9, d0, #0.0
11954 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11955
11956 __ movi(q9.V16B(), 0x55);
11957 __ dci(0x5e61c809); // fcvtas d9, d0
11958 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11959
11960 __ movi(q9.V16B(), 0x55);
11961 __ dci(0x7e61c809); // fcvtau d9, d0
11962 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11963
11964 __ movi(q9.V16B(), 0x55);
11965 __ dci(0x5e61b809); // fcvtms d9, d0
11966 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11967
11968 __ movi(q9.V16B(), 0x55);
11969 __ dci(0x7e61b809); // fcvtmu d9, d0
11970 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11971
11972 __ movi(q9.V16B(), 0x55);
11973 __ dci(0x5e61a809); // fcvtns d9, d0
11974 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11975
11976 __ movi(q9.V16B(), 0x55);
11977 __ dci(0x7e61a809); // fcvtnu d9, d0
11978 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11979
11980 __ movi(q9.V16B(), 0x55);
11981 __ dci(0x5ee1a809); // fcvtps d9, d0
11982 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11983
11984 __ movi(q9.V16B(), 0x55);
11985 __ dci(0x7ee1a809); // fcvtpu d9, d0
11986 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11987
11988 __ movi(q9.V16B(), 0x55);
11989 __ dci(0x5ee1b809); // fcvtzs d9, d0
11990 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11991
11992 __ movi(q9.V16B(), 0x55);
11993 __ dci(0x5f40fc09); // fcvtzs d9, d0, #64
11994 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11995
11996 __ movi(q9.V16B(), 0x55);
11997 __ dci(0x7ee1b809); // fcvtzu d9, d0
11998 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11999
12000 __ movi(q9.V16B(), 0x55);
12001 __ dci(0x7f40fc09); // fcvtzu d9, d0, #64
12002 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12003
12004 __ movi(q9.V16B(), 0x55);
12005 __ dci(0x7e70c809); // fmaxnmp d9, v0.2d
12006 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12007
12008 __ movi(q9.V16B(), 0x55);
12009 __ dci(0x7e70f809); // fmaxp d9, v0.2d
12010 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12011
12012 __ movi(q9.V16B(), 0x55);
12013 __ dci(0x7ef0c809); // fminnmp d9, v0.2d
12014 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12015
12016 __ movi(q9.V16B(), 0x55);
12017 __ dci(0x7ef0f809); // fminp d9, v0.2d
12018 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12019
12020 __ movi(q9.V16B(), 0x55);
12021 __ dci(0x5fc01029); // fmla d9, d1, v0.d[0]
12022 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12023
12024 __ movi(q9.V16B(), 0x55);
12025 __ dci(0x5fc05029); // fmls d9, d1, v0.d[0]
12026 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12027
12028 __ movi(q9.V16B(), 0x55);
12029 __ dci(0x5fc09029); // fmul d9, d1, v0.d[0]
12030 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12031
12032 __ movi(q9.V16B(), 0x55);
12033 __ dci(0x7fc09029); // fmulx d9, d1, v0.d[0]
12034 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12035
12036 __ movi(q9.V16B(), 0x55);
12037 __ dci(0x5e60dc29); // fmulx d9, d1, d0
12038 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12039
12040 __ movi(q9.V16B(), 0x55);
12041 __ dci(0x5ee1d809); // frecpe d9, d0
12042 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12043
12044 __ movi(q9.V16B(), 0x55);
12045 __ dci(0x5e60fc29); // frecps d9, d1, d0
12046 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12047
12048 __ movi(q9.V16B(), 0x55);
12049 __ dci(0x5ee1f809); // frecpx d9, d0
12050 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12051
12052 __ movi(q9.V16B(), 0x55);
12053 __ dci(0x7ee1d809); // frsqrte d9, d0
12054 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12055
12056 __ movi(q9.V16B(), 0x55);
12057 __ dci(0x5ee0fc29); // frsqrts d9, d1, d0
12058 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12059
12060 __ movi(q9.V16B(), 0x55);
12061 __ dci(0x7ee0b809); // neg d9, d0
12062 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12063
12064 __ movi(q9.V16B(), 0x55);
12065 __ dci(0x5e61d809); // scvtf d9, d0
12066 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12067
12068 __ movi(q9.V16B(), 0x55);
12069 __ dci(0x5f40e409); // scvtf d9, d0, #64
12070 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12071
12072 __ movi(q9.V16B(), 0x55);
12073 __ dci(0x5f405409); // shl d9, d0, #0
12074 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12075
12076 __ movi(q9.V16B(), 0x55);
12077 __ dci(0x7f405409); // sli d9, d0, #0
12078 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12079
12080 __ movi(q9.V16B(), 0x55);
12081 __ dci(0x5ee07809); // sqabs d9, d0
12082 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12083
12084 __ movi(q9.V16B(), 0x55);
12085 __ dci(0x5ee00c29); // sqadd d9, d1, d0
12086 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12087
12088 __ movi(q9.V16B(), 0x55);
12089 __ dci(0x5ea09029); // sqdmlal d9, s1, s0
12090 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12091
12092 __ movi(q9.V16B(), 0x55);
12093 __ dci(0x5f803029); // sqdmlal d9, s1, v0.s[0]
12094 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12095
12096 __ movi(q9.V16B(), 0x55);
12097 __ dci(0x5ea0b029); // sqdmlsl d9, s1, s0
12098 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12099
12100 __ movi(q9.V16B(), 0x55);
12101 __ dci(0x5f807029); // sqdmlsl d9, s1, v0.s[0]
12102 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12103
12104 __ movi(q9.V16B(), 0x55);
12105 __ dci(0x5ea0d029); // sqdmull d9, s1, s0
12106 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12107
12108 __ movi(q9.V16B(), 0x55);
12109 __ dci(0x5f80b029); // sqdmull d9, s1, v0.s[0]
12110 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12111
12112 __ movi(q9.V16B(), 0x55);
12113 __ dci(0x7ee07809); // sqneg d9, d0
12114 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12115
12116 __ movi(q9.V16B(), 0x55);
12117 __ dci(0x7ec08429); // sqrdmlah d9, d1, d0
12118 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12119
12120 __ movi(q9.V16B(), 0x55);
12121 __ dci(0x7ec08c29); // sqrdmlsh d9, d1, d0
12122 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12123
12124 __ movi(q9.V16B(), 0x55);
12125 __ dci(0x5ee05c29); // sqrshl d9, d1, d0
12126 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12127
12128 __ movi(q9.V16B(), 0x55);
12129 __ dci(0x5ee04c29); // sqshl d9, d1, d0
12130 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12131
12132 __ movi(q9.V16B(), 0x55);
12133 __ dci(0x5f407409); // sqshl d9, d0, #0
12134 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12135
12136 __ movi(q9.V16B(), 0x55);
12137 __ dci(0x7f406409); // sqshlu d9, d0, #0
12138 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12139
12140 __ movi(q9.V16B(), 0x55);
12141 __ dci(0x5ee02c29); // sqsub d9, d1, d0
12142 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12143
12144 __ movi(q9.V16B(), 0x55);
12145 __ dci(0x7f404409); // sri d9, d0, #64
12146 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12147
12148 __ movi(q9.V16B(), 0x55);
12149 __ dci(0x5ee05429); // srshl d9, d1, d0
12150 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12151
12152 __ movi(q9.V16B(), 0x55);
12153 __ dci(0x5f402409); // srshr d9, d0, #64
12154 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12155
12156 __ movi(q9.V16B(), 0x55);
12157 __ dci(0x5f403409); // srsra d9, d0, #64
12158 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12159
12160 __ movi(q9.V16B(), 0x55);
12161 __ dci(0x5ee04429); // sshl d9, d1, d0
12162 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12163
12164 __ movi(q9.V16B(), 0x55);
12165 __ dci(0x5f400409); // sshr d9, d0, #64
12166 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12167
12168 __ movi(q9.V16B(), 0x55);
12169 __ dci(0x5f401409); // ssra d9, d0, #64
12170 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12171
12172 __ movi(q9.V16B(), 0x55);
12173 __ dci(0x7ee08429); // sub d9, d1, d0
12174 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12175
12176 __ movi(q9.V16B(), 0x55);
12177 __ dci(0x5ee03809); // suqadd d9, d0
12178 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12179
12180 __ movi(q9.V16B(), 0x55);
12181 __ dci(0x7e61d809); // ucvtf d9, d0
12182 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12183
12184 __ movi(q9.V16B(), 0x55);
12185 __ dci(0x7f40e409); // ucvtf d9, d0, #64
12186 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12187
12188 __ movi(q9.V16B(), 0x55);
12189 __ dci(0x7ee00c29); // uqadd d9, d1, d0
12190 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12191
12192 __ movi(q9.V16B(), 0x55);
12193 __ dci(0x7ee05c29); // uqrshl d9, d1, d0
12194 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12195
12196 __ movi(q9.V16B(), 0x55);
12197 __ dci(0x7ee04c29); // uqshl d9, d1, d0
12198 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12199
12200 __ movi(q9.V16B(), 0x55);
12201 __ dci(0x7f407409); // uqshl d9, d0, #0
12202 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12203
12204 __ movi(q9.V16B(), 0x55);
12205 __ dci(0x7ee02c29); // uqsub d9, d1, d0
12206 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12207
12208 __ movi(q9.V16B(), 0x55);
12209 __ dci(0x7ee05429); // urshl d9, d1, d0
12210 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12211
12212 __ movi(q9.V16B(), 0x55);
12213 __ dci(0x7f402409); // urshr d9, d0, #64
12214 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12215
12216 __ movi(q9.V16B(), 0x55);
12217 __ dci(0x7f403409); // ursra d9, d0, #64
12218 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12219
12220 __ movi(q9.V16B(), 0x55);
12221 __ dci(0x7ee04429); // ushl d9, d1, d0
12222 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12223
12224 __ movi(q9.V16B(), 0x55);
12225 __ dci(0x7f400409); // ushr d9, d0, #64
12226 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12227
12228 __ movi(q9.V16B(), 0x55);
12229 __ dci(0x7ee03809); // usqadd d9, d0
12230 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12231
12232 __ movi(q9.V16B(), 0x55);
12233 __ dci(0x7f401409); // usra d9, d0, #64
12234 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12235 }
12236 __ Sub(x1, x1, 1);
12237 __ Cbnz(x1, &loop);
12238
12239 __ Ins(q30.V2D(), 0, xzr);
12240
12241 END();
12242 if (CAN_RUN()) {
12243 RUN();
12244 ASSERT_EQUAL_128(0, 0, q30);
12245 }
12246 }
12247
12248 } // namespace aarch64
12249 } // namespace vixl
12250