Lines Matching refs:__

56   __ Mov(x17, src_base);
57 __ Mov(x18, dst_base);
58 __ Mov(x19, src_base);
59 __ Mov(x20, dst_base);
60 __ Mov(x21, src_base);
61 __ Mov(x22, dst_base);
62 __ Ldr(b0, MemOperand(x17, sizeof(src[0])));
63 __ Str(b0, MemOperand(x18, sizeof(dst[0]), PostIndex));
64 __ Ldr(b1, MemOperand(x19, sizeof(src[0]), PostIndex));
65 __ Str(b1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
66 __ Ldr(b2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
67 __ Str(b2, MemOperand(x22, sizeof(dst[0])));
98 __ Mov(x17, src_base);
99 __ Mov(x18, dst_base);
100 __ Mov(x19, src_base);
101 __ Mov(x20, dst_base);
102 __ Mov(x21, src_base);
103 __ Mov(x22, dst_base);
104 __ Ldr(h0, MemOperand(x17, sizeof(src[0])));
105 __ Str(h0, MemOperand(x18, sizeof(dst[0]), PostIndex));
106 __ Ldr(h1, MemOperand(x19, sizeof(src[0]), PostIndex));
107 __ Str(h1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
108 __ Ldr(h2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
109 __ Str(h2, MemOperand(x22, sizeof(dst[0])));
145 __ Mov(x17, src_base);
146 __ Mov(x18, dst_base);
147 __ Mov(x19, src_base);
148 __ Mov(x20, dst_base);
149 __ Mov(x21, src_base);
150 __ Mov(x22, dst_base);
151 __ Ldr(q0, MemOperand(x17, 16));
152 __ Str(q0, MemOperand(x18, 16, PostIndex));
153 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
154 __ Str(q1, MemOperand(x20, 32, PreIndex));
155 __ Ldr(q2, MemOperand(x21, 32, PreIndex));
156 __ Str(q2, MemOperand(x22, 16));
195 __ Mov(x17, src_base + 16);
196 __ Mov(x18, 1);
197 __ Mov(w19, -1);
198 __ Mov(x20, dst_base - 1);
200 __ Ldr(b0, MemOperand(x17, x18));
201 __ Ldr(b1, MemOperand(x17, x19, SXTW));
203 __ Ldr(h2, MemOperand(x17, x18));
204 __ Ldr(h3, MemOperand(x17, x18, UXTW, 1));
205 __ Ldr(h4, MemOperand(x17, x19, SXTW, 1));
206 __ Ldr(h5, MemOperand(x17, x18, LSL, 1));
208 __ Ldr(s16, MemOperand(x17, x18));
209 __ Ldr(s17, MemOperand(x17, x18, UXTW, 2));
210 __ Ldr(s18, MemOperand(x17, x19, SXTW, 2));
211 __ Ldr(s19, MemOperand(x17, x18, LSL, 2));
213 __ Ldr(d20, MemOperand(x17, x18));
214 __ Ldr(d21, MemOperand(x17, x18, UXTW, 3));
215 __ Ldr(d22, MemOperand(x17, x19, SXTW, 3));
216 __ Ldr(d23, MemOperand(x17, x18, LSL, 3));
218 __ Ldr(q24, MemOperand(x17, x18));
219 __ Ldr(q25, MemOperand(x17, x18, UXTW, 4));
220 __ Ldr(q26, MemOperand(x17, x19, SXTW, 4));
221 __ Ldr(q27, MemOperand(x17, x18, LSL, 4));
224 __ Str(b27, MemOperand(x20, x18));
225 __ Str(h27, MemOperand(x20, x18, UXTW, 1));
226 __ Add(x20, x20, 8);
227 __ Str(s27, MemOperand(x20, x19, SXTW, 2));
228 __ Sub(x20, x20, 8);
229 __ Str(d27, MemOperand(x20, x18, LSL, 3));
230 __ Add(x20, x20, 32);
231 __ Str(q27, MemOperand(x20, x19, SXTW, 4));
233 __ Sub(x20, x20, 32);
234 __ Ldr(q6, MemOperand(x20, x18));
235 __ Ldr(q7, MemOperand(x20, x18, LSL, 4));
277 __ Mov(x16, src_base);
278 __ Mov(x17, dst_base);
279 __ Ldp(q31, q0, MemOperand(x16, 4 * sizeof(src[0]), PostIndex));
280 __ Stp(q0, q31, MemOperand(x17, 2 * sizeof(dst[1]), PreIndex));
309 __ Mov(x17, src_base);
310 __ Ldr(q2, MemOperand(x17)); // Initialise top 64-bits of Q register.
311 __ Ld1(v2.V8B(), MemOperand(x17));
312 __ Add(x17, x17, 1);
313 __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x17));
314 __ Add(x17, x17, 1);
315 __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x17));
316 __ Add(x17, x17, 1);
317 __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
318 __ Add(x17, x17, 1);
319 __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
320 __ Add(x17, x17, 1);
321 __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(), MemOperand(x17));
359 __ Mov(x17, src_base);
360 __ Mov(x18, src_base + 1);
361 __ Mov(x19, src_base + 2);
362 __ Mov(x20, src_base + 3);
363 __ Mov(x21, src_base + 4);
364 __ Mov(x22, src_base + 5);
365 __ Mov(x23, 1);
366 __ Ldr(q2, MemOperand(x17)); // Initialise top 64-bits of Q register.
367 __ Ld1(v2.V8B(), MemOperand(x17, x23, PostIndex));
368 __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x18, 16, PostIndex));
369 __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x19, 24, PostIndex));
370 __ Ld1(v16.V2S(),
375 __ Ld1(v30.V2S(),
380 __ Ld1(v20.V1D(),
428 __ Mov(x17, src_base);
429 __ Ld1(v2.V16B(), MemOperand(x17));
430 __ Add(x17, x17, 1);
431 __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x17));
432 __ Add(x17, x17, 1);
433 __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x17));
434 __ Add(x17, x17, 1);
435 __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x17));
436 __ Add(x17, x17, 1);
437 __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
471 __ Mov(x17, src_base);
472 __ Mov(x18, src_base + 1);
473 __ Mov(x19, src_base + 2);
474 __ Mov(x20, src_base + 3);
475 __ Mov(x21, src_base + 4);
476 __ Mov(x22, 1);
477 __ Ld1(v2.V16B(), MemOperand(x17, x22, PostIndex));
478 __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x18, 32, PostIndex));
479 __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x19, 48, PostIndex));
480 __ Ld1(v16.V4S(),
485 __ Ld1(v30.V2D(),
530 __ Mov(x17, src_base);
532 __ Ld1(v0.B(), i, MemOperand(x17));
533 __ Add(x17, x17, 1);
536 __ Mov(x17, src_base);
538 __ Ld1(v1.H(), i, MemOperand(x17));
539 __ Add(x17, x17, 1);
542 __ Mov(x17, src_base);
544 __ Ld1(v2.S(), i, MemOperand(x17));
545 __ Add(x17, x17, 1);
548 __ Mov(x17, src_base);
550 __ Ld1(v3.D(), i, MemOperand(x17));
551 __ Add(x17, x17, 1);
555 __ Mov(x17, src_base);
556 __ Ldr(q4, MemOperand(x17));
557 __ Ld1(v4.B(), 4, MemOperand(x17));
558 __ Ldr(q5, MemOperand(x17));
559 __ Ld1(v5.H(), 3, MemOperand(x17));
560 __ Ldr(q6, MemOperand(x17));
561 __ Ld1(v6.S(), 2, MemOperand(x17));
562 __ Ldr(q7, MemOperand(x17));
563 __ Ld1(v7.D(), 1, MemOperand(x17));
591 __ Mov(x17, src_base);
592 __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17));
593 __ Add(x17, x17, 1);
594 __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x17));
595 __ Add(x17, x17, 1);
596 __ Ld2(v6.V4H(), v7.V4H(), MemOperand(x17));
597 __ Add(x17, x17, 1);
598 __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x17));
625 __ Mov(x17, src_base);
626 __ Mov(x18, src_base + 1);
627 __ Mov(x19, src_base + 2);
628 __ Mov(x20, src_base + 3);
629 __ Mov(x21, src_base + 4);
630 __ Mov(x22, 1);
631 __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17, x22, PostIndex));
632 __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x18, 16, PostIndex));
633 __ Ld2(v5.V4H(), v6.V4H(), MemOperand(x19, 16, PostIndex));
634 __ Ld2(v16.V2S(), v17.V2S(), MemOperand(x20, 16, PostIndex));
635 __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x21, 16, PostIndex));
670 __ Mov(x17, src_base);
671 __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17));
672 __ Add(x17, x17, 1);
673 __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x17));
674 __ Add(x17, x17, 1);
675 __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x17));
676 __ Add(x17, x17, 1);
677 __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x17));
678 __ Add(x17, x17, 1);
679 __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x17));
709 __ Mov(x17, src_base);
710 __ Mov(x18, src_base + 1);
711 __ Mov(x19, src_base + 2);
712 __ Mov(x20, src_base + 3);
713 __ Mov(x21, src_base + 4);
714 __ Mov(x22, 1);
715 __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17, x22, PostIndex));
716 __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x18, 32, PostIndex));
717 __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x19, 32, PostIndex));
718 __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x20, 32, PostIndex));
719 __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x21, 32, PostIndex));
758 __ Mov(x17, src_base);
760 __ Ld2(v0.B(), v1.B(), i, MemOperand(x17));
761 __ Add(x17, x17, 1);
764 __ Mov(x17, src_base);
766 __ Ld2(v2.H(), v3.H(), i, MemOperand(x17));
767 __ Add(x17, x17, 1);
770 __ Mov(x17, src_base);
772 __ Ld2(v4.S(), v5.S(), i, MemOperand(x17));
773 __ Add(x17, x17, 1);
776 __ Mov(x17, src_base);
778 __ Ld2(v6.D(), v7.D(), i, MemOperand(x17));
779 __ Add(x17, x17, 1);
783 __ Mov(x17, src_base);
784 __ Mov(x4, x17);
785 __ Ldr(q8, MemOperand(x4, 16, PostIndex));
786 __ Ldr(q9, MemOperand(x4));
787 __ Ld2(v8.B(), v9.B(), 4, MemOperand(x17));
788 __ Mov(x5, x17);
789 __ Ldr(q10, MemOperand(x5, 16, PostIndex));
790 __ Ldr(q11, MemOperand(x5));
791 __ Ld2(v10.H(), v11.H(), 3, MemOperand(x17));
792 __ Mov(x6, x17);
793 __ Ldr(q12, MemOperand(x6, 16, PostIndex));
794 __ Ldr(q13, MemOperand(x6));
795 __ Ld2(v12.S(), v13.S(), 2, MemOperand(x17));
796 __ Mov(x7, x17);
797 __ Ldr(q14, MemOperand(x7, 16, PostIndex));
798 __ Ldr(q15, MemOperand(x7));
799 __ Ld2(v14.D(), v15.D(), 1, MemOperand(x17));
836 __ Mov(x17, src_base);
837 __ Mov(x18, src_base);
838 __ Mov(x19, src_base);
839 __ Mov(x20, src_base);
840 __ Mov(x21, src_base);
841 __ Mov(x22, src_base);
842 __ Mov(x23, src_base);
843 __ Mov(x24, src_base);
847 __ Ld2(v0.B(), v1.B(), i, MemOperand(x17, 2, PostIndex));
851 __ Ld2(v2.H(), v3.H(), i, MemOperand(x18, 4, PostIndex));
855 __ Ld2(v4.S(), v5.S(), i, MemOperand(x19, 8, PostIndex));
859 __ Ld2(v6.D(), v7.D(), i, MemOperand(x20, 16, PostIndex));
863 __ Mov(x25, 1);
864 __ Mov(x4, x21);
865 __ Ldr(q8, MemOperand(x4, 16, PostIndex));
866 __ Ldr(q9, MemOperand(x4));
867 __ Ld2(v8.B(), v9.B(), 4, MemOperand(x21, x25, PostIndex));
868 __ Add(x25, x25, 1);
870 __ Mov(x5, x22);
871 __ Ldr(q10, MemOperand(x5, 16, PostIndex));
872 __ Ldr(q11, MemOperand(x5));
873 __ Ld2(v10.H(), v11.H(), 3, MemOperand(x22, x25, PostIndex));
874 __ Add(x25, x25, 1);
876 __ Mov(x6, x23);
877 __ Ldr(q12, MemOperand(x6, 16, PostIndex));
878 __ Ldr(q13, MemOperand(x6));
879 __ Ld2(v12.S(), v13.S(), 2, MemOperand(x23, x25, PostIndex));
880 __ Add(x25, x25, 1);
882 __ Mov(x7, x24);
883 __ Ldr(q14, MemOperand(x7, 16, PostIndex));
884 __ Ldr(q15, MemOperand(x7));
885 __ Ld2(v14.D(), v15.D(), 1, MemOperand(x24, x25, PostIndex));
932 __ Mov(x17, src_base + 1);
933 __ Mov(x18, 1);
934 __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17));
935 __ Add(x17, x17, 2);
936 __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17));
937 __ Add(x17, x17, 1);
938 __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17));
939 __ Add(x17, x17, 1);
940 __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17));
941 __ Add(x17, x17, 4);
942 __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17));
943 __ Add(x17, x17, 1);
944 __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17));
945 __ Add(x17, x17, 8);
946 __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17));
980 __ Mov(x17, src_base + 1);
981 __ Mov(x18, 1);
982 __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17, 2, PostIndex));
983 __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17, x18, PostIndex));
984 __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17, x18, PostIndex));
985 __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17, 4, PostIndex));
986 __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17, x18, PostIndex));
987 __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17, 8, PostIndex));
988 __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17, 16, PostIndex));
1023 __ Mov(x17, src_base);
1024 __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17));
1025 __ Add(x17, x17, 1);
1026 __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x17));
1027 __ Add(x17, x17, 1);
1028 __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x17));
1029 __ Add(x17, x17, 1);
1030 __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
1062 __ Mov(x17, src_base);
1063 __ Mov(x18, src_base + 1);
1064 __ Mov(x19, src_base + 2);
1065 __ Mov(x20, src_base + 3);
1066 __ Mov(x21, src_base + 4);
1067 __ Mov(x22, 1);
1068 __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17, x22, PostIndex));
1069 __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x18, 24, PostIndex));
1070 __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x19, 24, PostIndex));
1071 __ Ld3(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x20, 24, PostIndex));
1072 __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x21, 24, PostIndex));
1113 __ Mov(x17, src_base);
1114 __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17));
1115 __ Add(x17, x17, 1);
1116 __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
1117 __ Add(x17, x17, 1);
1118 __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x17));
1119 __ Add(x17, x17, 1);
1120 __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x17));
1121 __ Add(x17, x17, 1);
1122 __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
1157 __ Mov(x17, src_base);
1158 __ Mov(x18, src_base + 1);
1159 __ Mov(x19, src_base + 2);
1160 __ Mov(x20, src_base + 3);
1161 __ Mov(x21, src_base + 4);
1162 __ Mov(x22, 1);
1164 __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17, x22, PostIndex));
1165 __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x18, 48, PostIndex));
1166 __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x19, 48, PostIndex));
1167 __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x20, 48, PostIndex));
1168 __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x21, 48, PostIndex));
1211 __ Mov(x17, src_base);
1213 __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17));
1214 __ Add(x17, x17, 1);
1217 __ Mov(x17, src_base);
1219 __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x17));
1220 __ Add(x17, x17, 1);
1223 __ Mov(x17, src_base);
1225 __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x17));
1226 __ Add(x17, x17, 1);
1229 __ Mov(x17, src_base);
1231 __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x17));
1232 __ Add(x17, x17, 1);
1236 __ Mov(x17, src_base);
1237 __ Mov(x4, x17);
1238 __ Ldr(q12, MemOperand(x4, 16, PostIndex));
1239 __ Ldr(q13, MemOperand(x4, 16, PostIndex));
1240 __ Ldr(q14, MemOperand(x4));
1241 __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x17));
1242 __ Mov(x5, x17);
1243 __ Ldr(q15, MemOperand(x5, 16, PostIndex));
1244 __ Ldr(q16, MemOperand(x5, 16, PostIndex));
1245 __ Ldr(q17, MemOperand(x5));
1246 __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x17));
1247 __ Mov(x6, x17);
1248 __ Ldr(q18, MemOperand(x6, 16, PostIndex));
1249 __ Ldr(q19, MemOperand(x6, 16, PostIndex));
1250 __ Ldr(q20, MemOperand(x6));
1251 __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x17));
1252 __ Mov(x7, x17);
1253 __ Ldr(q21, MemOperand(x7, 16, PostIndex));
1254 __ Ldr(q22, MemOperand(x7, 16, PostIndex));
1255 __ Ldr(q23, MemOperand(x7));
1256 __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x17));
1297 __ Mov(x17, src_base);
1298 __ Mov(x18, src_base);
1299 __ Mov(x19, src_base);
1300 __ Mov(x20, src_base);
1301 __ Mov(x21, src_base);
1302 __ Mov(x22, src_base);
1303 __ Mov(x23, src_base);
1304 __ Mov(x24, src_base);
1306 __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17, 3, PostIndex));
1310 __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x18, 6, PostIndex));
1314 __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x19, 12, PostIndex));
1318 __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x20, 24, PostIndex));
1323 __ Mov(x25, 1);
1324 __ Mov(x4, x21);
1325 __ Ldr(q12, MemOperand(x4, 16, PostIndex));
1326 __ Ldr(q13, MemOperand(x4, 16, PostIndex));
1327 __ Ldr(q14, MemOperand(x4));
1328 __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x21, x25, PostIndex));
1329 __ Add(x25, x25, 1);
1331 __ Mov(x5, x22);
1332 __ Ldr(q15, MemOperand(x5, 16, PostIndex));
1333 __ Ldr(q16, MemOperand(x5, 16, PostIndex));
1334 __ Ldr(q17, MemOperand(x5));
1335 __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x22, x25, PostIndex));
1336 __ Add(x25, x25, 1);
1338 __ Mov(x6, x23);
1339 __ Ldr(q18, MemOperand(x6, 16, PostIndex));
1340 __ Ldr(q19, MemOperand(x6, 16, PostIndex));
1341 __ Ldr(q20, MemOperand(x6));
1342 __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x23, x25, PostIndex));
1343 __ Add(x25, x25, 1);
1345 __ Mov(x7, x24);
1346 __ Ldr(q21, MemOperand(x7, 16, PostIndex));
1347 __ Ldr(q22, MemOperand(x7, 16, PostIndex));
1348 __ Ldr(q23, MemOperand(x7));
1349 __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x24, x25, PostIndex));
1403 __ Mov(x17, src_base + 1);
1404 __ Mov(x18, 1);
1405 __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17));
1406 __ Add(x17, x17, 3);
1407 __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
1408 __ Add(x17, x17, 1);
1409 __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17));
1410 __ Add(x17, x17, 1);
1411 __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17));
1412 __ Add(x17, x17, 6);
1413 __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17));
1414 __ Add(x17, x17, 1);
1415 __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
1416 __ Add(x17, x17, 12);
1417 __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17));
1456 __ Mov(x17, src_base + 1);
1457 __ Mov(x18, 1);
1460 __ Mov(x17, src_base + 1);
1461 __ Mov(x18, 1);
1462 __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17, 3, PostIndex));
1463 __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17, x18, PostIndex));
1464 __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17, x18, PostIndex));
1465 __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17, 6, PostIndex));
1466 __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17, x18, PostIndex));
1467 __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17, 12, PostIndex));
1468 __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17, 24, PostIndex));
1509 __ Mov(x17, src_base);
1510 __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(), MemOperand(x17));
1511 __ Add(x17, x17, 1);
1512 __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(), MemOperand(x17));
1513 __ Add(x17, x17, 1);
1514 __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x17));
1515 __ Add(x17, x17, 1);
1516 __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
1552 __ Mov(x17, src_base);
1553 __ Mov(x18, src_base + 1);
1554 __ Mov(x19, src_base + 2);
1555 __ Mov(x20, src_base + 3);
1556 __ Mov(x21, src_base + 4);
1557 __ Mov(x22, 1);
1558 __ Ld4(v2.V8B(),
1563 __ Ld4(v6.V8B(),
1568 __ Ld4(v10.V4H(),
1573 __ Ld4(v14.V2S(),
1578 __ Ld4(v30.V2S(),
1629 __ Mov(x17, src_base);
1630 __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
1631 __ Add(x17, x17, 1);
1632 __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(), MemOperand(x17));
1633 __ Add(x17, x17, 1);
1634 __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), MemOperand(x17));
1635 __ Add(x17, x17, 1);
1636 __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
1637 __ Add(x17, x17, 1);
1638 __ Ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x17));
1678 __ Mov(x17, src_base);
1679 __ Mov(x18, src_base + 1);
1680 __ Mov(x19, src_base + 2);
1681 __ Mov(x20, src_base + 3);
1682 __ Mov(x21, src_base + 4);
1683 __ Mov(x22, 1);
1685 __ Ld4(v2.V16B(),
1690 __ Ld4(v6.V16B(),
1695 __ Ld4(v10.V8H(),
1700 __ Ld4(v14.V4S(),
1705 __ Ld4(v30.V2D(),
1758 __ Mov(x17, src_base);
1760 __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17));
1761 __ Add(x17, x17, 1);
1764 __ Mov(x17, src_base);
1766 __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x17));
1767 __ Add(x17, x17, 1);
1770 __ Mov(x17, src_base);
1772 __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x17));
1773 __ Add(x17, x17, 1);
1776 __ Mov(x17, src_base);
1778 __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i, MemOperand(x17));
1779 __ Add(x17, x17, 1);
1783 __ Mov(x17, src_base);
1784 __ Mov(x4, x17);
1785 __ Ldr(q16, MemOperand(x4, 16, PostIndex));
1786 __ Ldr(q17, MemOperand(x4, 16, PostIndex));
1787 __ Ldr(q18, MemOperand(x4, 16, PostIndex));
1788 __ Ldr(q19, MemOperand(x4));
1789 __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4, MemOperand(x17));
1791 __ Mov(x5, x17);
1792 __ Ldr(q20, MemOperand(x5, 16, PostIndex));
1793 __ Ldr(q21, MemOperand(x5, 16, PostIndex));
1794 __ Ldr(q22, MemOperand(x5, 16, PostIndex));
1795 __ Ldr(q23, MemOperand(x5));
1796 __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3, MemOperand(x17));
1798 __ Mov(x6, x17);
1799 __ Ldr(q24, MemOperand(x6, 16, PostIndex));
1800 __ Ldr(q25, MemOperand(x6, 16, PostIndex));
1801 __ Ldr(q26, MemOperand(x6, 16, PostIndex));
1802 __ Ldr(q27, MemOperand(x6));
1803 __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2, MemOperand(x17));
1805 __ Mov(x7, x17);
1806 __ Ldr(q28, MemOperand(x7, 16, PostIndex));
1807 __ Ldr(q29, MemOperand(x7, 16, PostIndex));
1808 __ Ldr(q30, MemOperand(x7, 16, PostIndex));
1809 __ Ldr(q31, MemOperand(x7));
1810 __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x17));
1865 __ Mov(x17, src_base);
1867 __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17, 4, PostIndex));
1870 __ Mov(x18, src_base);
1872 __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x18, 8, PostIndex));
1875 __ Mov(x19, src_base);
1877 __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x19, 16, PostIndex));
1880 __ Mov(x20, src_base);
1882 __ Ld4(v12.D(),
1891 __ Mov(x25, 1);
1892 __ Mov(x21, src_base);
1893 __ Mov(x22, src_base);
1894 __ Mov(x23, src_base);
1895 __ Mov(x24, src_base);
1897 __ Mov(x4, x21);
1898 __ Ldr(q16, MemOperand(x4, 16, PostIndex));
1899 __ Ldr(q17, MemOperand(x4, 16, PostIndex));
1900 __ Ldr(q18, MemOperand(x4, 16, PostIndex));
1901 __ Ldr(q19, MemOperand(x4));
1902 __ Ld4(v16.B(),
1908 __ Add(x25, x25, 1);
1910 __ Mov(x5, x22);
1911 __ Ldr(q20, MemOperand(x5, 16, PostIndex));
1912 __ Ldr(q21, MemOperand(x5, 16, PostIndex));
1913 __ Ldr(q22, MemOperand(x5, 16, PostIndex));
1914 __ Ldr(q23, MemOperand(x5));
1915 __ Ld4(v20.H(),
1921 __ Add(x25, x25, 1);
1923 __ Mov(x6, x23);
1924 __ Ldr(q24, MemOperand(x6, 16, PostIndex));
1925 __ Ldr(q25, MemOperand(x6, 16, PostIndex));
1926 __ Ldr(q26, MemOperand(x6, 16, PostIndex));
1927 __ Ldr(q27, MemOperand(x6));
1928 __ Ld4(v24.S(),
1934 __ Add(x25, x25, 1);
1936 __ Mov(x7, x24);
1937 __ Ldr(q28, MemOperand(x7, 16, PostIndex));
1938 __ Ldr(q29, MemOperand(x7, 16, PostIndex));
1939 __ Ldr(q30, MemOperand(x7, 16, PostIndex));
1940 __ Ldr(q31, MemOperand(x7));
1941 __ Ld4(v28.D(),
2008 __ Mov(x17, src_base + 1);
2009 __ Mov(x18, 1);
2010 __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x17));
2011 __ Add(x17, x17, 4);
2012 __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
2013 __ Add(x17, x17, 1);
2014 __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(), MemOperand(x17));
2015 __ Add(x17, x17, 1);
2016 __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x17));
2017 __ Add(x17, x17, 8);
2018 __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
2019 __ Add(x17, x17, 1);
2020 __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(), MemOperand(x17));
2021 __ Add(x17, x17, 16);
2022 __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(), MemOperand(x17));
2070 __ Mov(x17, src_base + 1);
2071 __ Mov(x18, 1);
2074 __ Mov(x17, src_base + 1);
2075 __ Mov(x18, 1);
2076 __ Ld4r(v0.V8B(),
2081 __ Ld4r(v4.V16B(),
2086 __ Ld4r(v8.V4H(),
2091 __ Ld4r(v12.V8H(),
2096 __ Ld4r(v16.V2S(),
2101 __ Ld4r(v20.V4S(),
2106 __ Ld4r(v24.V2D(),
2159 __ Mov(x17, src_base);
2160 __ Mov(x18, -16);
2161 __ Ldr(q0, MemOperand(x17));
2164 __ St1(v0.B(), i, MemOperand(x17));
2165 __ Add(x17, x17, 1);
2167 __ Ldr(q1, MemOperand(x17, x18));
2170 __ St1(v0.H(), i, MemOperand(x17));
2171 __ Add(x17, x17, 2);
2173 __ Ldr(q2, MemOperand(x17, x18));
2176 __ St1(v0.S(), i, MemOperand(x17));
2177 __ Add(x17, x17, 4);
2179 __ Ldr(q3, MemOperand(x17, x18));
2182 __ St1(v0.D(), i, MemOperand(x17));
2183 __ Add(x17, x17, 8);
2185 __ Ldr(q4, MemOperand(x17, x18));
2209 __ Mov(x17, dst_base);
2210 __ Mov(x18, dst_base);
2211 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2212 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2216 __ St2(v0.B(), v1.B(), i, MemOperand(x18));
2217 __ Add(x18, x18, 2);
2220 __ St2(v0.B(), v1.B(), i, MemOperand(x18, 2, PostIndex));
2222 __ Ldr(q2, MemOperand(x17, 0 * 16));
2223 __ Ldr(q3, MemOperand(x17, 1 * 16));
2224 __ Ldr(q4, MemOperand(x17, 2 * 16));
2225 __ Ldr(q5, MemOperand(x17, 3 * 16));
2228 __ Mov(x0, 4);
2230 __ St2(v0.H(), v1.H(), i, MemOperand(x18));
2231 __ Add(x18, x18, 4);
2234 __ St2(v0.H(), v1.H(), i, MemOperand(x18, x0, PostIndex));
2236 __ Ldr(q6, MemOperand(x17, 4 * 16));
2237 __ Ldr(q7, MemOperand(x17, 5 * 16));
2238 __ Ldr(q16, MemOperand(x17, 6 * 16));
2239 __ Ldr(q17, MemOperand(x17, 7 * 16));
2243 __ St2(v0.S(), v1.S(), i, MemOperand(x18));
2244 __ Add(x18, x18, 8);
2247 __ St2(v0.S(), v1.S(), i, MemOperand(x18, 8, PostIndex));
2249 __ Ldr(q18, MemOperand(x17, 8 * 16));
2250 __ Ldr(q19, MemOperand(x17, 9 * 16));
2251 __ Ldr(q20, MemOperand(x17, 10 * 16));
2252 __ Ldr(q21, MemOperand(x17, 11 * 16));
2255 __ Mov(x0, 16);
2256 __ St2(v0.D(), v1.D(), 1, MemOperand(x18));
2257 __ Add(x18, x18, 16);
2258 __ St2(v0.D(), v1.D(), 0, MemOperand(x18, 16, PostIndex));
2259 __ St2(v0.D(), v1.D(), 1, MemOperand(x18, x0, PostIndex));
2260 __ St2(v0.D(), v1.D(), 0, MemOperand(x18, x0, PostIndex));
2261 __ Ldr(q22, MemOperand(x17, 12 * 16));
2262 __ Ldr(q23, MemOperand(x17, 13 * 16));
2263 __ Ldr(q24, MemOperand(x17, 14 * 16));
2264 __ Ldr(q25, MemOperand(x17, 15 * 16));
2302 __ Mov(x17, dst_base);
2303 __ Mov(x18, dst_base);
2304 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2305 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2306 __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2310 __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18));
2311 __ Add(x18, x18, 3);
2314 __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18, 3, PostIndex));
2316 __ Ldr(q3, MemOperand(x17, 0 * 16));
2317 __ Ldr(q4, MemOperand(x17, 1 * 16));
2318 __ Ldr(q5, MemOperand(x17, 2 * 16));
2319 __ Ldr(q6, MemOperand(x17, 3 * 16));
2320 __ Ldr(q7, MemOperand(x17, 4 * 16));
2321 __ Ldr(q16, MemOperand(x17, 5 * 16));
2324 __ Mov(x0, 6);
2326 __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18));
2327 __ Add(x18, x18, 6);
2330 __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18, x0, PostIndex));
2332 __ Ldr(q17, MemOperand(x17, 6 * 16));
2333 __ Ldr(q18, MemOperand(x17, 7 * 16));
2334 __ Ldr(q19, MemOperand(x17, 8 * 16));
2335 __ Ldr(q20, MemOperand(x17, 9 * 16));
2336 __ Ldr(q21, MemOperand(x17, 10 * 16));
2337 __ Ldr(q22, MemOperand(x17, 11 * 16));
2341 __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18));
2342 __ Add(x18, x18, 12);
2345 __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18, 12, PostIndex));
2347 __ Ldr(q23, MemOperand(x17, 12 * 16));
2348 __ Ldr(q24, MemOperand(x17, 13 * 16));
2349 __ Ldr(q25, MemOperand(x17, 14 * 16));
2350 __ Ldr(q26, MemOperand(x17, 15 * 16));
2351 __ Ldr(q27, MemOperand(x17, 16 * 16));
2352 __ Ldr(q28, MemOperand(x17, 17 * 16));
2355 __ Mov(x0, 24);
2356 __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18));
2357 __ Add(x18, x18, 24);
2358 __ St3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x18, 24, PostIndex));
2359 __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18, x0, PostIndex));
2360 __ Ldr(q29, MemOperand(x17, 18 * 16));
2361 __ Ldr(q30, MemOperand(x17, 19 * 16));
2362 __ Ldr(q31, MemOperand(x17, 20 * 16));
2401 __ Mov(x17, dst_base);
2402 __ Mov(x18, dst_base);
2403 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2404 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2405 __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2406 __ Movi(v3.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2410 __ St4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x18));
2411 __ Add(x18, x18, 4);
2413 __ Ldr(q4, MemOperand(x17, 0 * 16));
2414 __ Ldr(q5, MemOperand(x17, 1 * 16));
2415 __ Ldr(q6, MemOperand(x17, 2 * 16));
2416 __ Ldr(q7, MemOperand(x17, 3 * 16));
2419 __ Mov(x0, 8);
2421 __ St4(v0.H(), v1.H(), v2.H(), v3.H(), i, MemOperand(x18, x0, PostIndex));
2423 __ Ldr(q16, MemOperand(x17, 4 * 16));
2424 __ Ldr(q17, MemOperand(x17, 5 * 16));
2425 __ Ldr(q18, MemOperand(x17, 6 * 16));
2426 __ Ldr(q19, MemOperand(x17, 7 * 16));
2430 __ St4(v0.S(), v1.S(), v2.S(), v3.S(), i, MemOperand(x18));
2431 __ Add(x18, x18, 16);
2433 __ Ldr(q20, MemOperand(x17, 8 * 16));
2434 __ Ldr(q21, MemOperand(x17, 9 * 16));
2435 __ Ldr(q22, MemOperand(x17, 10 * 16));
2436 __ Ldr(q23, MemOperand(x17, 11 * 16));
2439 __ Mov(x0, 32);
2440 __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 0, MemOperand(x18, 32, PostIndex));
2441 __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 1, MemOperand(x18, x0, PostIndex));
2443 __ Ldr(q24, MemOperand(x17, 12 * 16));
2444 __ Ldr(q25, MemOperand(x17, 13 * 16));
2445 __ Ldr(q26, MemOperand(x17, 14 * 16));
2446 __ Ldr(q27, MemOperand(x17, 15 * 16));
2485 __ Mov(x17, src_base);
2486 __ Mov(x18, src_base);
2487 __ Mov(x19, src_base);
2488 __ Mov(x20, src_base);
2489 __ Mov(x21, src_base);
2490 __ Mov(x22, src_base);
2491 __ Mov(x23, src_base);
2492 __ Mov(x24, src_base);
2496 __ Ld1(v0.B(), i, MemOperand(x17, 1, PostIndex));
2500 __ Ld1(v1.H(), i, MemOperand(x18, 2, PostIndex));
2504 __ Ld1(v2.S(), i, MemOperand(x19, 4, PostIndex));
2508 __ Ld1(v3.D(), i, MemOperand(x20, 8, PostIndex));
2512 __ Mov(x25, 1);
2513 __ Ldr(q4, MemOperand(x21));
2514 __ Ld1(v4.B(), 4, MemOperand(x21, x25, PostIndex));
2515 __ Add(x25, x25, 1);
2517 __ Ldr(q5, MemOperand(x22));
2518 __ Ld1(v5.H(), 3, MemOperand(x22, x25, PostIndex));
2519 __ Add(x25, x25, 1);
2521 __ Ldr(q6, MemOperand(x23));
2522 __ Ld1(v6.S(), 2, MemOperand(x23, x25, PostIndex));
2523 __ Add(x25, x25, 1);
2525 __ Ldr(q7, MemOperand(x24));
2526 __ Ld1(v7.D(), 1, MemOperand(x24, x25, PostIndex));
2563 __ Mov(x17, src_base);
2564 __ Mov(x18, -16);
2565 __ Ldr(q0, MemOperand(x17));
2568 __ St1(v0.B(), i, MemOperand(x17, 1, PostIndex));
2570 __ Ldr(q1, MemOperand(x17, x18));
2573 __ St1(v0.H(), i, MemOperand(x17, 2, PostIndex));
2575 __ Ldr(q2, MemOperand(x17, x18));
2578 __ St1(v0.S(), i, MemOperand(x17, 4, PostIndex));
2580 __ Ldr(q3, MemOperand(x17, x18));
2583 __ St1(v0.D(), i, MemOperand(x17, 8, PostIndex));
2585 __ Ldr(q4, MemOperand(x17, x18));
2610 __ Mov(x17, src_base + 1);
2611 __ Ld1r(v0.V8B(), MemOperand(x17));
2612 __ Add(x17, x17, 1);
2613 __ Ld1r(v1.V16B(), MemOperand(x17));
2614 __ Add(x17, x17, 1);
2615 __ Ld1r(v2.V4H(), MemOperand(x17));
2616 __ Add(x17, x17, 1);
2617 __ Ld1r(v3.V8H(), MemOperand(x17));
2618 __ Add(x17, x17, 1);
2619 __ Ld1r(v4.V2S(), MemOperand(x17));
2620 __ Add(x17, x17, 1);
2621 __ Ld1r(v5.V4S(), MemOperand(x17));
2622 __ Add(x17, x17, 1);
2623 __ Ld1r(v6.V1D(), MemOperand(x17));
2624 __ Add(x17, x17, 1);
2625 __ Ld1r(v7.V2D(), MemOperand(x17));
2653 __ Mov(x17, src_base + 1);
2654 __ Mov(x18, 1);
2655 __ Ld1r(v0.V8B(), MemOperand(x17, 1, PostIndex));
2656 __ Ld1r(v1.V16B(), MemOperand(x17, x18, PostIndex));
2657 __ Ld1r(v2.V4H(), MemOperand(x17, x18, PostIndex));
2658 __ Ld1r(v3.V8H(), MemOperand(x17, 2, PostIndex));
2659 __ Ld1r(v4.V2S(), MemOperand(x17, x18, PostIndex));
2660 __ Ld1r(v5.V4S(), MemOperand(x17, 4, PostIndex));
2661 __ Ld1r(v6.V2D(), MemOperand(x17, 8, PostIndex));
2689 __ Mov(x17, src_base);
2690 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2691 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2692 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2693 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2694 __ Mov(x17, src_base);
2696 __ St1(v0.V8B(), MemOperand(x17));
2697 __ Ldr(d16, MemOperand(x17, 8, PostIndex));
2699 __ St1(v0.V8B(), v1.V8B(), MemOperand(x17));
2700 __ Ldr(q17, MemOperand(x17, 16, PostIndex));
2702 __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17));
2703 __ Ldr(d18, MemOperand(x17, 8, PostIndex));
2704 __ Ldr(d19, MemOperand(x17, 8, PostIndex));
2705 __ Ldr(d20, MemOperand(x17, 8, PostIndex));
2707 __ St1(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x17));
2708 __ Ldr(q21, MemOperand(x17, 16, PostIndex));
2709 __ Ldr(q22, MemOperand(x17, 16, PostIndex));
2711 __ St1(v0.V1D(), v1.V1D(), v2.V1D(), v3.V1D(), MemOperand(x17));
2712 __ Ldr(q23, MemOperand(x17, 16, PostIndex));
2713 __ Ldr(q24, MemOperand(x17));
2746 __ Mov(x17, src_base);
2747 __ Mov(x18, -8);
2748 __ Mov(x19, -16);
2749 __ Mov(x20, -24);
2750 __ Mov(x21, -32);
2751 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2752 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2753 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2754 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2755 __ Mov(x17, src_base);
2757 __ St1(v0.V8B(), MemOperand(x17, 8, PostIndex));
2758 __ Ldr(d16, MemOperand(x17, x18));
2760 __ St1(v0.V8B(), v1.V8B(), MemOperand(x17, 16, PostIndex));
2761 __ Ldr(q17, MemOperand(x17, x19));
2763 __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17, 24, PostIndex));
2764 __ Ldr(d18, MemOperand(x17, x20));
2765 __ Ldr(d19, MemOperand(x17, x19));
2766 __ Ldr(d20, MemOperand(x17, x18));
2768 __ St1(v0.V2S(),
2773 __ Ldr(q21, MemOperand(x17, x21));
2774 __ Ldr(q22, MemOperand(x17, x19));
2776 __ St1(v0.V1D(),
2781 __ Ldr(q23, MemOperand(x17, x21));
2782 __ Ldr(q24, MemOperand(x17, x19));
2811 __ Mov(x17, src_base);
2812 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2813 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2814 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2815 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2817 __ St1(v0.V16B(), MemOperand(x17));
2818 __ Ldr(q16, MemOperand(x17, 16, PostIndex));
2820 __ St1(v0.V8H(), v1.V8H(), MemOperand(x17));
2821 __ Ldr(q17, MemOperand(x17, 16, PostIndex));
2822 __ Ldr(q18, MemOperand(x17, 16, PostIndex));
2824 __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17));
2825 __ Ldr(q19, MemOperand(x17, 16, PostIndex));
2826 __ Ldr(q20, MemOperand(x17, 16, PostIndex));
2827 __ Ldr(q21, MemOperand(x17, 16, PostIndex));
2829 __ St1(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x17));
2830 __ Ldr(q22, MemOperand(x17, 16, PostIndex));
2831 __ Ldr(q23, MemOperand(x17, 16, PostIndex));
2832 __ Ldr(q24, MemOperand(x17, 16, PostIndex));
2833 __ Ldr(q25, MemOperand(x17));
2863 __ Mov(x17, src_base);
2864 __ Mov(x18, -16);
2865 __ Mov(x19, -32);
2866 __ Mov(x20, -48);
2867 __ Mov(x21, -64);
2868 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2869 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2870 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2871 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2873 __ St1(v0.V16B(), MemOperand(x17, 16, PostIndex));
2874 __ Ldr(q16, MemOperand(x17, x18));
2876 __ St1(v0.V8H(), v1.V8H(), MemOperand(x17, 32, PostIndex));
2877 __ Ldr(q17, MemOperand(x17, x19));
2878 __ Ldr(q18, MemOperand(x17, x18));
2880 __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17, 48, PostIndex));
2881 __ Ldr(q19, MemOperand(x17, x20));
2882 __ Ldr(q20, MemOperand(x17, x19));
2883 __ Ldr(q21, MemOperand(x17, x18));
2885 __ St1(v0.V2D(),
2890 __ Ldr(q22, MemOperand(x17, x21));
2891 __ Ldr(q23, MemOperand(x17, x20));
2892 __ Ldr(q24, MemOperand(x17, x19));
2893 __ Ldr(q25, MemOperand(x17, x18));
2924 __ Mov(x17, src_base);
2925 __ Mov(x18, src_base);
2926 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2927 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2929 __ St2(v0.V8B(), v1.V8B(), MemOperand(x18));
2930 __ Add(x18, x18, 22);
2931 __ St2(v0.V4H(), v1.V4H(), MemOperand(x18));
2932 __ Add(x18, x18, 11);
2933 __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
2935 __ Mov(x19, src_base);
2936 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
2937 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
2938 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
2939 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
2964 __ Mov(x22, 5);
2965 __ Mov(x17, src_base);
2966 __ Mov(x18, src_base);
2967 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2968 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2970 __ St2(v0.V8B(), v1.V8B(), MemOperand(x18, x22, PostIndex));
2971 __ St2(v0.V4H(), v1.V4H(), MemOperand(x18, 16, PostIndex));
2972 __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
2975 __ Mov(x19, src_base);
2976 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
2977 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
2978 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3002 __ Mov(x17, src_base);
3003 __ Mov(x18, src_base);
3004 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3005 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3007 __ St2(v0.V16B(), v1.V16B(), MemOperand(x18));
3008 __ Add(x18, x18, 8);
3009 __ St2(v0.V8H(), v1.V8H(), MemOperand(x18));
3010 __ Add(x18, x18, 22);
3011 __ St2(v0.V4S(), v1.V4S(), MemOperand(x18));
3012 __ Add(x18, x18, 2);
3013 __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
3015 __ Mov(x19, src_base);
3016 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3017 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3018 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3019 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3044 __ Mov(x22, 5);
3045 __ Mov(x17, src_base);
3046 __ Mov(x18, src_base);
3047 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3048 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3050 __ St2(v0.V16B(), v1.V16B(), MemOperand(x18, x22, PostIndex));
3051 __ St2(v0.V8H(), v1.V8H(), MemOperand(x18, 32, PostIndex));
3052 __ St2(v0.V4S(), v1.V4S(), MemOperand(x18, x22, PostIndex));
3053 __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
3055 __ Mov(x19, src_base);
3056 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3057 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3058 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3059 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3060 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3086 __ Mov(x17, src_base);
3087 __ Mov(x18, src_base);
3088 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3089 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3090 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3092 __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18));
3093 __ Add(x18, x18, 3);
3094 __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18));
3095 __ Add(x18, x18, 2);
3096 __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
3099 __ Mov(x19, src_base);
3100 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3101 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3124 __ Mov(x22, 5);
3125 __ Mov(x17, src_base);
3126 __ Mov(x18, src_base);
3127 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3128 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3129 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3131 __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18, x22, PostIndex));
3132 __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18, 24, PostIndex));
3133 __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
3136 __ Mov(x19, src_base);
3137 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3138 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3139 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3140 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3165 __ Mov(x17, src_base);
3166 __ Mov(x18, src_base);
3167 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3168 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3169 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3171 __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18));
3172 __ Add(x18, x18, 5);
3173 __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18));
3174 __ Add(x18, x18, 12);
3175 __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18));
3176 __ Add(x18, x18, 22);
3177 __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
3179 __ Mov(x19, src_base);
3180 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3181 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3182 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3183 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3184 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3185 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3212 __ Mov(x22, 5);
3213 __ Mov(x17, src_base);
3214 __ Mov(x18, src_base);
3215 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3216 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3217 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3219 __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18, x22, PostIndex));
3220 __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18, 48, PostIndex));
3221 __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18, x22, PostIndex));
3222 __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
3224 __ Mov(x19, src_base);
3225 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3226 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3227 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3228 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3229 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3230 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3231 __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3259 __ Mov(x17, src_base);
3260 __ Mov(x18, src_base);
3261 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3262 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3263 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3264 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3266 __ St4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x18));
3267 __ Add(x18, x18, 12);
3268 __ St4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), MemOperand(x18));
3269 __ Add(x18, x18, 15);
3270 __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
3273 __ Mov(x19, src_base);
3274 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3275 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3276 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3277 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3302 __ Mov(x22, 5);
3303 __ Mov(x17, src_base);
3304 __ Mov(x18, src_base);
3305 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3306 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3307 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3308 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3310 __ St4(v0.V8B(),
3315 __ St4(v0.V4H(),
3320 __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
3323 __ Mov(x19, src_base);
3324 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3325 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3326 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3327 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3328 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3354 __ Mov(x17, src_base);
3355 __ Mov(x18, src_base);
3356 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3357 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3358 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3359 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3361 __ St4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), MemOperand(x18));
3362 __ Add(x18, x18, 5);
3363 __ St4(v0.V8H(), v1.V8H(), v2.V8H(), v3.V8H(), MemOperand(x18));
3364 __ Add(x18, x18, 12);
3365 __ St4(v0.V4S(), v1.V4S(), v2.V4S(), v3.V4S(), MemOperand(x18));
3366 __ Add(x18, x18, 22);
3367 __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
3368 __ Add(x18, x18, 10);
3370 __ Mov(x19, src_base);
3371 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3372 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3373 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3374 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3375 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3376 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3377 __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3405 __ Mov(x22, 5);
3406 __ Mov(x17, src_base);
3407 __ Mov(x18, src_base);
3408 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3409 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3410 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3411 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3413 __ St4(v0.V16B(),
3418 __ St4(v0.V8H(),
3423 __ St4(v0.V4S(),
3428 __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
3430 __ Mov(x19, src_base);
3431 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3432 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3433 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3434 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3435 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3436 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3437 __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3438 __ Ldr(q7, MemOperand(x19, 16, PostIndex));
3439 __ Ldr(q8, MemOperand(x19, 16, PostIndex));
3463 __ Movi(v0.V2D(), 0, 0x2222222233333333);
3464 __ Movi(v1.V2D(), 0, 0x0000000011111111);
3466 __ Sminp(v16.V2S(), v0.V2S(), v1.V2S());
3467 __ Mov(v17, v0);
3468 __ Sminp(v17.V2S(), v17.V2S(), v1.V2S());
3469 __ Mov(v18, v1);
3470 __ Sminp(v18.V2S(), v0.V2S(), v18.V2S());
3471 __ Mov(v19, v0);
3472 __ Sminp(v19.V2S(), v19.V2S(), v19.V2S());
3474 __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
3475 __ Mov(v21, v0);
3476 __ Smaxp(v21.V2S(), v21.V2S(), v1.V2S());
3477 __ Mov(v22, v1);
3478 __ Smaxp(v22.V2S(), v0.V2S(), v22.V2S());
3479 __ Mov(v23, v0);
3480 __ Smaxp(v23.V2S(), v23.V2S(), v23.V2S());
3482 __ Uminp(v24.V2S(), v0.V2S(), v1.V2S());
3483 __ Mov(v25, v0);
3484 __ Uminp(v25.V2S(), v25.V2S(), v1.V2S());
3485 __ Mov(v26, v1);
3486 __ Uminp(v26.V2S(), v0.V2S(), v26.V2S());
3487 __ Mov(v27, v0);
3488 __ Uminp(v27.V2S(), v27.V2S(), v27.V2S());
3490 __ Umaxp(v28.V2S(), v0.V2S(), v1.V2S());
3491 __ Mov(v29, v0);
3492 __ Umaxp(v29.V2S(), v29.V2S(), v1.V2S());
3493 __ Mov(v30, v1);
3494 __ Umaxp(v30.V2S(), v0.V2S(), v30.V2S());
3495 __ Mov(v31, v0);
3496 __ Umaxp(v31.V2S(), v31.V2S(), v31.V2S());
3529 __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
3530 __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
3531 __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
3532 __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
3533 __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
3535 __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
3536 __ Tbl(v16.V16B(), v1.V16B(), v0.V16B());
3537 __ Mov(v17, v0);
3538 __ Tbl(v17.V16B(), v1.V16B(), v17.V16B());
3539 __ Mov(v18, v1);
3540 __ Tbl(v18.V16B(), v18.V16B(), v0.V16B());
3541 __ Mov(v19, v0);
3542 __ Tbl(v19.V16B(), v19.V16B(), v19.V16B());
3544 __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
3545 __ Tbl(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
3546 __ Mov(v21, v0);
3547 __ Tbl(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
3548 __ Mov(v22, v1);
3549 __ Mov(v23, v2);
3550 __ Mov(v24, v3);
3551 __ Mov(v25, v4);
3552 __ Tbl(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
3553 __ Mov(v26, v0);
3554 __ Mov(v27, v1);
3555 __ Mov(v28, v2);
3556 __ Mov(v29, v3);
3557 __ Tbl(v26.V16B(),
3585 __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
3586 __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
3587 __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
3588 __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
3589 __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
3591 __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
3592 __ Tbx(v16.V16B(), v1.V16B(), v0.V16B());
3593 __ Mov(v17, v0);
3594 __ Tbx(v17.V16B(), v1.V16B(), v17.V16B());
3595 __ Mov(v18, v1);
3596 __ Tbx(v18.V16B(), v18.V16B(), v0.V16B());
3597 __ Mov(v19, v0);
3598 __ Tbx(v19.V16B(), v19.V16B(), v19.V16B());
3600 __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
3601 __ Tbx(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
3602 __ Mov(v21, v0);
3603 __ Tbx(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
3604 __ Mov(v22, v1);
3605 __ Mov(v23, v2);
3606 __ Mov(v24, v3);
3607 __ Mov(v25, v4);
3608 __ Tbx(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
3609 __ Mov(v26, v0);
3610 __ Mov(v27, v1);
3611 __ Mov(v28, v2);
3612 __ Mov(v29, v3);
3613 __ Tbx(v26.V16B(),
3641 __ Movi(v0.V2D(), 0x400000003f800000, 0xbf800000c0000000);
3642 __ Fcvtl(v16.V2D(), v0.V2S());
3643 __ Fcvtl2(v17.V2D(), v0.V4S());
3644 __ Mov(v18, v0);
3645 __ Mov(v19, v0);
3646 __ Fcvtl(v18.V2D(), v18.V2S());
3647 __ Fcvtl2(v19.V2D(), v19.V4S());
3649 __ Movi(v1.V2D(), 0x40003c003c004000, 0xc000bc00bc00c000);
3650 __ Fcvtl(v20.V4S(), v1.V4H());
3651 __ Fcvtl2(v21.V4S(), v1.V8H());
3652 __ Mov(v22, v1);
3653 __ Mov(v23, v1);
3654 __ Fcvtl(v22.V4S(), v22.V4H());
3655 __ Fcvtl2(v23.V4S(), v23.V8H());
3680 __ Fmov(v0.V4H(), 24.0);
3681 __ Fmov(v1.V4H(), 1024.0);
3682 __ Fmov(v2.V8H(), 5.5);
3683 __ Fmov(v3.V8H(), 2048.0);
3684 __ Fmov(v4.V8H(), kFP16PositiveInfinity);
3685 __ Fmov(v5.V8H(), kFP16NegativeInfinity);
3686 __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c2f));
3687 __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe0f));
3689 __ Fadd(v8.V4H(), v1.V4H(), v0.V4H());
3690 __ Fadd(v9.V8H(), v3.V8H(), v2.V8H());
3691 __ Fadd(v10.V4H(), v4.V4H(), v3.V4H());
3693 __ Fadd(v11.V4H(), v6.V4H(), v1.V4H());
3694 __ Fadd(v12.V4H(), v7.V4H(), v7.V4H());
3722 __ Fmov(v0.V4H(), 24.0);
3723 __ Fmov(v1.V4H(), 1024.0);
3724 __ Fmov(v2.V8H(), 5.5);
3725 __ Fmov(v3.V8H(), 2048.0);
3726 __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3727 __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3728 __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c22));
3729 __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe02));
3731 __ Fsub(v0.V4H(), v1.V4H(), v0.V4H());
3732 __ Fsub(v8.V8H(), v3.V8H(), v2.V8H());
3733 __ Fsub(v9.V4H(), v4.V4H(), v3.V4H());
3734 __ Fsub(v10.V4H(), v0.V4H(), v1.V4H());
3736 __ Fsub(v11.V4H(), v6.V4H(), v2.V4H());
3737 __ Fsub(v12.V4H(), v7.V4H(), v7.V4H());
3765 __ Fmov(v0.V4H(), 24.0);
3766 __ Fmov(v1.V4H(), -2.0);
3767 __ Fmov(v2.V8H(), 5.5);
3768 __ Fmov(v3.V8H(), 0.5);
3769 __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3770 __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3772 __ Fmul(v6.V4H(), v1.V4H(), v0.V4H());
3773 __ Fmul(v7.V8H(), v3.V8H(), v2.V8H());
3774 __ Fmul(v8.V4H(), v4.V4H(), v3.V4H());
3775 __ Fmul(v9.V4H(), v0.V4H(), v1.V4H());
3776 __ Fmul(v10.V4H(), v5.V4H(), v0.V4H());
3796 __ Fmov(v0.V4H(), 24.0);
3797 __ Fmov(v1.V4H(), -2.0);
3798 __ Fmov(v2.V8H(), 5.5);
3799 __ Fmov(v3.V8H(), 0.5);
3800 __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3801 __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3803 __ Fdiv(v6.V4H(), v0.V4H(), v1.V4H());
3804 __ Fdiv(v7.V8H(), v2.V8H(), v3.V8H());
3805 __ Fdiv(v8.V4H(), v4.V4H(), v3.V4H());
3806 __ Fdiv(v9.V4H(), v1.V4H(), v0.V4H());
3807 __ Fdiv(v10.V4H(), v5.V4H(), v0.V4H());
3827 __ Movi(v0.V2D(), 0x000080007efffeff, 0x3100b1007c00fc00);
3828 __ Movi(v1.V2D(), 0x03ff83ff00038003, 0x000180017c01fc01);
3829 __ Movi(v2.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3830 __ Movi(v3.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3831 __ Movi(v4.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3832 __ Fcvtl(v16.V4S(), v0.V4H());
3833 __ Fcvtl2(v17.V4S(), v0.V8H());
3834 __ Fcvtl(v18.V4S(), v1.V4H());
3835 __ Fcvtl2(v19.V4S(), v1.V8H());
3837 __ Fcvtl(v20.V2D(), v2.V2S());
3838 __ Fcvtl2(v21.V2D(), v2.V4S());
3839 __ Fcvtl(v22.V2D(), v3.V2S());
3840 __ Fcvtl2(v23.V2D(), v3.V4S());
3841 __ Fcvtl(v24.V2D(), v4.V2S());
3842 __ Fcvtl2(v25.V2D(), v4.V4S());
3867 __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3868 __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3869 __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3870 __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
3871 __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
3872 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
3873 __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
3874 __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
3875 __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
3877 __ Fcvtn(v16.V4H(), v0.V4S());
3878 __ Fcvtn2(v16.V8H(), v1.V4S());
3879 __ Fcvtn(v17.V4H(), v2.V4S());
3880 __ Fcvtn(v18.V2S(), v3.V2D());
3881 __ Fcvtn2(v18.V4S(), v4.V2D());
3882 __ Fcvtn(v19.V2S(), v5.V2D());
3883 __ Fcvtn2(v19.V4S(), v6.V2D());
3884 __ Fcvtn(v20.V2S(), v7.V2D());
3885 __ Fcvtn2(v20.V4S(), v8.V2D());
3902 __ Movi(v0.V2D(), 0x3ff0000000000000, 0xbff0000000000000);
3903 __ Movi(v1.V2D(), 0x3f800000bf800000, 0x40000000c0000000);
3904 __ Movi(v2.V2D(), 0x3ff0000000000000, 0xbff0000000000000);
3906 __ Fcvtn(v16.V2S(), v0.V2D());
3907 __ Fcvtn(v17.V4H(), v1.V4S());
3908 __ Fcvtn(v0.V2S(), v0.V2D());
3909 __ Fcvtn(v1.V4H(), v1.V4S());
3910 __ Fcvtxn(v2.V2S(), v2.V2D());
3927 __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3928 __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3929 __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3930 __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
3931 __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
3932 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
3933 __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
3934 __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
3935 __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
3936 __ Movi(v9.V2D(), 0x41ed000000000000, 0x41efffffffefffff);
3937 __ Fcvtxn(v16.V2S(), v0.V2D());
3938 __ Fcvtxn2(v16.V4S(), v1.V2D());
3939 __ Fcvtxn(v17.V2S(), v2.V2D());
3940 __ Fcvtxn2(v17.V4S(), v3.V2D());
3941 __ Fcvtxn(v18.V2S(), v4.V2D());
3942 __ Fcvtxn2(v18.V4S(), v5.V2D());
3943 __ Fcvtxn(v19.V2S(), v6.V2D());
3944 __ Fcvtxn2(v19.V4S(), v7.V2D());
3945 __ Fcvtxn(v20.V2S(), v8.V2D());
3946 __ Fcvtxn2(v20.V4S(), v9.V2D());
3947 __ Fcvtxn(s21, d0);
3966 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
3967 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
3968 __ Addp(v16.V16B(), v0.V16B(), v1.V16B());
3983 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
3984 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
3985 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
3986 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
3988 __ Sqdmulh(v16.V4H(), v0.V4H(), v1.V4H());
3989 __ Sqdmulh(v17.V4S(), v2.V4S(), v3.V4S());
3990 __ Sqdmulh(h18, h0, h1);
3991 __ Sqdmulh(s19, s2, s3);
3993 __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.V4H());
3994 __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.V4S());
3995 __ Sqrdmulh(h22, h0, h1);
3996 __ Sqrdmulh(s23, s2, s3);
4018 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4019 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4020 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4021 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4023 __ Sqdmulh(v16.V4H(), v0.V4H(), v1.H(), 1);
4024 __ Sqdmulh(v17.V4S(), v2.V4S(), v3.S(), 1);
4025 __ Sqdmulh(h18, h0, v1.H(), 0);
4026 __ Sqdmulh(s19, s2, v3.S(), 0);
4028 __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.H(), 1);
4029 __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.S(), 1);
4030 __ Sqrdmulh(h22, h0, v1.H(), 0);
4031 __ Sqrdmulh(s23, s2, v3.S(), 0);
4053 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4054 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4055 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4056 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4058 __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
4059 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
4060 __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
4061 __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
4063 __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.V4H());
4064 __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.V4S());
4065 __ Sqrdmlah(h18, h0, h1);
4066 __ Sqrdmlah(s19, s2, s3);
4084 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4085 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4086 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4087 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4089 __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
4090 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
4091 __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
4092 __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
4094 __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.H(), 1);
4095 __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.S(), 1);
4096 __ Sqrdmlah(h18, h0, v1.H(), 0);
4097 __ Sqrdmlah(s19, s2, v3.S(), 0);
4115 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004000500);
4116 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000100080);
4117 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4118 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4120 __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
4121 __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
4122 __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
4123 __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
4125 __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.V4H());
4126 __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.V4S());
4127 __ Sqrdmlsh(h18, h0, h1);
4128 __ Sqrdmlsh(s19, s2, s3);
4146 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4147 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4148 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4149 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4151 __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
4152 __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
4153 __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
4154 __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
4156 __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.H(), 1);
4157 __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.S(), 1);
4158 __ Sqrdmlsh(h18, h0, v1.H(), 0);
4159 __ Sqrdmlsh(s19, s2, v3.S(), 0);
4177 __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
4178 __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
4179 __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
4181 __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
4182 __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
4183 __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
4184 __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
4186 __ Sdot(v16.V4S(), v0.V16B(), v1.V16B());
4187 __ Sdot(v17.V2S(), v1.V8B(), v2.V8B());
4189 __ Udot(v18.V4S(), v0.V16B(), v1.V16B());
4190 __ Udot(v19.V2S(), v1.V8B(), v2.V8B());
4208 __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
4209 __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
4210 __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
4212 __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
4213 __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
4214 __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
4215 __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
4217 __ Sdot(v16.V4S(), v0.V16B(), v1.S4B(), 1);
4218 __ Sdot(v17.V2S(), v1.V8B(), v2.S4B(), 1);
4220 __ Udot(v18.V4S(), v0.V16B(), v1.S4B(), 1);
4221 __ Udot(v19.V2S(), v1.V8B(), v2.S4B(), 1);
4240 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4242 __ Saddlp(v16.V8H(), v0.V16B());
4243 __ Saddlp(v17.V4H(), v0.V8B());
4245 __ Saddlp(v18.V4S(), v0.V8H());
4246 __ Saddlp(v19.V2S(), v0.V4H());
4248 __ Saddlp(v20.V2D(), v0.V4S());
4249 __ Saddlp(v21.V1D(), v0.V2S());
4269 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4271 __ Uaddlp(v16.V8H(), v0.V16B());
4272 __ Uaddlp(v17.V4H(), v0.V8B());
4274 __ Uaddlp(v18.V4S(), v0.V8H());
4275 __ Uaddlp(v19.V2S(), v0.V4H());
4277 __ Uaddlp(v20.V2D(), v0.V4S());
4278 __ Uaddlp(v21.V1D(), v0.V2S());
4298 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4299 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
4300 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
4301 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
4302 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
4304 __ Mov(v16.V16B(), v1.V16B());
4305 __ Mov(v17.V16B(), v1.V16B());
4306 __ Sadalp(v16.V8H(), v0.V16B());
4307 __ Sadalp(v17.V4H(), v0.V8B());
4309 __ Mov(v18.V16B(), v2.V16B());
4310 __ Mov(v19.V16B(), v2.V16B());
4311 __ Sadalp(v18.V4S(), v1.V8H());
4312 __ Sadalp(v19.V2S(), v1.V4H());
4314 __ Mov(v20.V16B(), v3.V16B());
4315 __ Mov(v21.V16B(), v4.V16B());
4316 __ Sadalp(v20.V2D(), v2.V4S());
4317 __ Sadalp(v21.V1D(), v2.V2S());
4337 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4338 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
4339 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
4340 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
4341 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
4343 __ Mov(v16.V16B(), v1.V16B());
4344 __ Mov(v17.V16B(), v1.V16B());
4345 __ Uadalp(v16.V8H(), v0.V16B());
4346 __ Uadalp(v17.V4H(), v0.V8B());
4348 __ Mov(v18.V16B(), v2.V16B());
4349 __ Mov(v19.V16B(), v2.V16B());
4350 __ Uadalp(v18.V4S(), v1.V8H());
4351 __ Uadalp(v19.V2S(), v1.V4H());
4353 __ Mov(v20.V16B(), v3.V16B());
4354 __ Mov(v21.V16B(), v4.V16B());
4355 __ Uadalp(v20.V2D(), v2.V4S());
4356 __ Uadalp(v21.V1D(), v2.V2S());
4376 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4377 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4378 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4379 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4381 __ Mla(v16.V16B(), v0.V16B(), v1.V16B());
4382 __ Mls(v17.V16B(), v0.V16B(), v1.V16B());
4383 __ Mul(v18.V16B(), v0.V16B(), v1.V16B());
4401 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4402 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4403 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4404 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4406 __ Saba(v16.V16B(), v0.V16B(), v1.V16B());
4407 __ Uaba(v17.V16B(), v0.V16B(), v1.V16B());
4408 __ Sabd(v18.V16B(), v0.V16B(), v1.V16B());
4409 __ Uabd(v19.V16B(), v0.V16B(), v1.V16B());
4428 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4429 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4432 __ Mul(v16.V4H(), v0.V4H(), v1.H(), 0);
4433 __ Mul(v17.V8H(), v0.V8H(), v1.H(), 7);
4434 __ Mul(v18.V2S(), v0.V2S(), v1.S(), 0);
4435 __ Mul(v19.V4S(), v0.V4S(), v1.S(), 3);
4437 __ Movi(v20.V2D(), 0x0000000000000000, 0x0001000200030004);
4438 __ Movi(v21.V2D(), 0x0005000600070008, 0x0001000200030004);
4439 __ Mla(v20.V4H(), v0.V4H(), v1.H(), 0);
4440 __ Mla(v21.V8H(), v0.V8H(), v1.H(), 7);
4442 __ Movi(v22.V2D(), 0x0000000000000000, 0x0000000200000004);
4443 __ Movi(v23.V2D(), 0x0000000600000008, 0x0000000200000004);
4444 __ Mla(v22.V2S(), v0.V2S(), v1.S(), 0);
4445 __ Mla(v23.V4S(), v0.V4S(), v1.S(), 3);
4447 __ Movi(v24.V2D(), 0x0000000000000000, 0x0100aaabfe015456);
4448 __ Movi(v25.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4449 __ Mls(v24.V4H(), v0.V4H(), v1.H(), 0);
4450 __ Mls(v25.V8H(), v0.V8H(), v1.H(), 7);
4452 __ Movi(v26.V2D(), 0x0000000000000000, 0xc8e2aaabe1c85456);
4453 __ Movi(v27.V2D(), 0x39545572c6aa54e4, 0x39545572c6aa54e4);
4454 __ Mls(v26.V2S(), v0.V2S(), v1.S(), 0);
4455 __ Mls(v27.V4S(), v0.V4S(), v1.S(), 3);
4484 __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
4485 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4488 __ Smull(v16.V4S(), v0.V4H(), v1.H(), 7);
4489 __ Smull2(v17.V4S(), v0.V8H(), v1.H(), 0);
4490 __ Umull(v18.V4S(), v0.V4H(), v1.H(), 7);
4491 __ Umull2(v19.V4S(), v0.V8H(), v1.H(), 0);
4493 __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
4494 __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
4495 __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
4496 __ Movi(v23.V2D(), 0x0000000100000002, 0x0000000200000001);
4498 __ Smlal(v20.V4S(), v0.V4H(), v1.H(), 7);
4499 __ Smlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
4500 __ Umlal(v22.V4S(), v0.V4H(), v1.H(), 7);
4501 __ Umlal2(v23.V4S(), v0.V8H(), v1.H(), 0);
4503 __ Movi(v24.V2D(), 0xffffff00ffffaa55, 0x000000ff000055aa);
4504 __ Movi(v25.V2D(), 0xffaaaaabffff55ab, 0x0054ffab0000fe01);
4505 __ Movi(v26.V2D(), 0x0000ff000000aa55, 0x000000ff000055aa);
4506 __ Movi(v27.V2D(), 0x00a9aaab00fe55ab, 0x0054ffab0000fe01);
4508 __ Smlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
4509 __ Smlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
4510 __ Umlsl(v26.V4S(), v0.V4H(), v1.H(), 7);
4511 __ Umlsl2(v27.V4S(), v0.V8H(), v1.H(), 0);
4541 __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
4542 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4544 __ Sqdmull(v16.V4S(), v0.V4H(), v1.H(), 7);
4545 __ Sqdmull2(v17.V4S(), v0.V8H(), v1.H(), 0);
4546 __ Sqdmull(s18, h0, v1.H(), 7);
4548 __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
4549 __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
4550 __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
4552 __ Sqdmlal(v20.V4S(), v0.V4H(), v1.H(), 7);
4553 __ Sqdmlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
4554 __ Sqdmlal(s22, h0, v1.H(), 7);
4556 __ Movi(v24.V2D(), 0xfffffe00ffff54aa, 0x000001fe0000ab54);
4557 __ Movi(v25.V2D(), 0xff555556fffeab56, 0x00a9ff560001fc02);
4558 __ Movi(v26.V2D(), 0x0000000000000000, 0x000000000000ab54);
4560 __ Sqdmlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
4561 __ Sqdmlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
4562 __ Sqdmlsl(s26, h0, v1.H(), 7);
4589 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4590 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4591 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4592 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4593 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4594 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4596 __ Sabal(v16.V8H(), v0.V8B(), v1.V8B());
4597 __ Uabal(v17.V8H(), v0.V8B(), v1.V8B());
4598 __ Sabal2(v18.V8H(), v0.V16B(), v1.V16B());
4599 __ Uabal2(v19.V8H(), v0.V16B(), v1.V16B());
4618 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4619 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4620 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4621 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4623 __ Sqdmull(v16.V4S(), v0.V4H(), v1.V4H());
4624 __ Sqdmull2(v17.V4S(), v0.V8H(), v1.V8H());
4625 __ Sqdmull(v18.V2D(), v2.V2S(), v3.V2S());
4626 __ Sqdmull2(v19.V2D(), v2.V4S(), v3.V4S());
4627 __ Sqdmull(s20, h0, h1);
4628 __ Sqdmull(d21, s2, s3);
4649 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4650 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4651 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4652 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4654 __ Movi(v16.V2D(), 0xffffffff00000001, 0x8fffffff00000001);
4655 __ Movi(v17.V2D(), 0x00000001ffffffff, 0x00000001ffffffff);
4656 __ Movi(v18.V2D(), 0x8000000000000001, 0x0000000000000001);
4657 __ Movi(v19.V2D(), 0xffffffffffffffff, 0x7fffffffffffffff);
4658 __ Movi(v20.V2D(), 0, 0x00000001);
4659 __ Movi(v21.V2D(), 0, 0x00000001);
4661 __ Sqdmlal(v16.V4S(), v0.V4H(), v1.V4H());
4662 __ Sqdmlal2(v17.V4S(), v0.V8H(), v1.V8H());
4663 __ Sqdmlal(v18.V2D(), v2.V2S(), v3.V2S());
4664 __ Sqdmlal2(v19.V2D(), v2.V4S(), v3.V4S());
4665 __ Sqdmlal(s20, h0, h1);
4666 __ Sqdmlal(d21, s2, s3);
4687 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4688 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4689 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4690 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4692 __ Movi(v16.V2D(), 0xffffffff00000001, 0x7ffffffe80000001);
4693 __ Movi(v17.V2D(), 0x00000001ffffffff, 0x7ffffffe00000001);
4694 __ Movi(v18.V2D(), 0x8000000000000001, 0x8000000000000001);
4695 __ Movi(v19.V2D(), 0xfffffffffffffffe, 0x7fffffffffffffff);
4696 __ Movi(v20.V2D(), 0, 0x00000001);
4697 __ Movi(v21.V2D(), 0, 0x00000001);
4699 __ Sqdmlsl(v16.V4S(), v0.V4H(), v1.V4H());
4700 __ Sqdmlsl2(v17.V4S(), v0.V8H(), v1.V8H());
4701 __ Sqdmlsl(v18.V2D(), v2.V2S(), v3.V2S());
4702 __ Sqdmlsl2(v19.V2D(), v2.V4S(), v3.V4S());
4703 __ Sqdmlsl(s20, h0, h1);
4704 __ Sqdmlsl(d21, s2, s3);
4725 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4726 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4727 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4728 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4729 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4730 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4732 __ Smlal(v16.V8H(), v0.V8B(), v1.V8B());
4733 __ Umlal(v17.V8H(), v0.V8B(), v1.V8B());
4734 __ Smlal2(v18.V8H(), v0.V16B(), v1.V16B());
4735 __ Umlal2(v19.V8H(), v0.V16B(), v1.V16B());
4754 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4755 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4756 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4757 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4758 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4759 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4761 __ Smlsl(v16.V8H(), v0.V8B(), v1.V8B());
4762 __ Umlsl(v17.V8H(), v0.V8B(), v1.V8B());
4763 __ Smlsl2(v18.V8H(), v0.V16B(), v1.V16B());
4764 __ Umlsl2(v19.V8H(), v0.V16B(), v1.V16B());
4783 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4784 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4786 __ Cmeq(v16.V16B(), v0.V16B(), v0.V16B());
4787 __ Cmeq(v17.V16B(), v0.V16B(), v1.V16B());
4788 __ Cmge(v18.V16B(), v0.V16B(), v0.V16B());
4789 __ Cmge(v19.V16B(), v0.V16B(), v1.V16B());
4790 __ Cmgt(v20.V16B(), v0.V16B(), v0.V16B());
4791 __ Cmgt(v21.V16B(), v0.V16B(), v1.V16B());
4792 __ Cmhi(v22.V16B(), v0.V16B(), v0.V16B());
4793 __ Cmhi(v23.V16B(), v0.V16B(), v1.V16B());
4794 __ Cmhs(v24.V16B(), v0.V16B(), v0.V16B());
4795 __ Cmhs(v25.V16B(), v0.V16B(), v1.V16B());
4820 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4821 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4823 __ Cmeq(d16, d0, d0);
4824 __ Cmeq(d17, d0, d1);
4825 __ Cmeq(d18, d1, d0);
4826 __ Cmge(d19, d0, d0);
4827 __ Cmge(d20, d0, d1);
4828 __ Cmge(d21, d1, d0);
4829 __ Cmgt(d22, d0, d0);
4830 __ Cmgt(d23, d0, d1);
4831 __ Cmhi(d24, d0, d0);
4832 __ Cmhi(d25, d0, d1);
4833 __ Cmhs(d26, d0, d0);
4834 __ Cmhs(d27, d0, d1);
4835 __ Cmhs(d28, d1, d0);
4865 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
4866 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
4867 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
4868 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
4870 __ Fcmeq(v4.V8H(), v0.V8H(), v0.V8H());
4871 __ Fcmeq(v5.V8H(), v1.V8H(), v0.V8H());
4872 __ Fcmeq(v6.V8H(), v2.V8H(), v0.V8H());
4873 __ Fcmeq(v7.V8H(), v3.V8H(), v0.V8H());
4874 __ Fcmeq(v8.V4H(), v0.V4H(), v0.V4H());
4875 __ Fcmeq(v9.V4H(), v1.V4H(), v0.V4H());
4876 __ Fcmeq(v10.V4H(), v2.V4H(), v0.V4H());
4877 __ Fcmeq(v11.V4H(), v3.V4H(), v0.V4H());
4903 __ Fmov(h0, Float16(0.0));
4904 __ Fmov(h1, RawbitsToFloat16(0xffff));
4905 __ Fmov(h2, Float16(-1.0));
4906 __ Fmov(h3, Float16(1.0));
4907 __ Fcmeq(h4, h0, h0);
4908 __ Fcmeq(h5, h1, h0);
4909 __ Fcmeq(h6, h2, h0);
4910 __ Fcmeq(h7, h3, h0);
4931 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
4932 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
4933 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
4934 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
4936 __ Fcmge(v4.V8H(), v0.V8H(), v0.V8H());
4937 __ Fcmge(v5.V8H(), v1.V8H(), v0.V8H());
4938 __ Fcmge(v6.V8H(), v2.V8H(), v0.V8H());
4939 __ Fcmge(v7.V8H(), v3.V8H(), v0.V8H());
4940 __ Fcmge(v8.V4H(), v0.V4H(), v0.V4H());
4941 __ Fcmge(v9.V4H(), v1.V4H(), v0.V4H());
4942 __ Fcmge(v10.V4H(), v2.V4H(), v0.V4H());
4943 __ Fcmge(v11.V4H(), v3.V4H(), v0.V4H());
4969 __ Fmov(h0, Float16(0.0));
4970 __ Fmov(h1, RawbitsToFloat16(0xffff));
4971 __ Fmov(h2, Float16(-1.0));
4972 __ Fmov(h3, Float16(1.0));
4973 __ Fcmge(h4, h0, h0);
4974 __ Fcmge(h5, h1, h0);
4975 __ Fcmge(h6, h2, h0);
4976 __ Fcmge(h7, h3, h0);
4997 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
4998 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
4999 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
5000 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
5002 __ Fcmgt(v4.V8H(), v0.V8H(), v0.V8H());
5003 __ Fcmgt(v5.V8H(), v1.V8H(), v0.V8H());
5004 __ Fcmgt(v6.V8H(), v2.V8H(), v0.V8H());
5005 __ Fcmgt(v7.V8H(), v3.V8H(), v0.V8H());
5006 __ Fcmgt(v8.V4H(), v0.V4H(), v0.V4H());
5007 __ Fcmgt(v9.V4H(), v1.V4H(), v0.V4H());
5008 __ Fcmgt(v10.V4H(), v2.V4H(), v0.V4H());
5009 __ Fcmgt(v11.V4H(), v3.V4H(), v0.V4H());
5035 __ Fmov(h0, Float16(0.0));
5036 __ Fmov(h1, RawbitsToFloat16(0xffff));
5037 __ Fmov(h2, Float16(-1.0));
5038 __ Fmov(h3, Float16(1.0));
5039 __ Fcmgt(h4, h0, h0);
5040 __ Fcmgt(h5, h1, h0);
5041 __ Fcmgt(h6, h2, h0);
5042 __ Fcmgt(h7, h3, h0);
5063 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
5064 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
5065 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
5066 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
5068 __ Facge(v4.V8H(), v0.V8H(), v0.V8H());
5069 __ Facge(v5.V8H(), v1.V8H(), v0.V8H());
5070 __ Facge(v6.V8H(), v2.V8H(), v0.V8H());
5071 __ Facge(v7.V8H(), v3.V8H(), v0.V8H());
5072 __ Facge(v8.V4H(), v0.V4H(), v0.V4H());
5073 __ Facge(v9.V4H(), v1.V4H(), v0.V4H());
5074 __ Facge(v10.V4H(), v2.V4H(), v0.V4H());
5075 __ Facge(v11.V4H(), v3.V4H(), v0.V4H());
5101 __ Fmov(h0, Float16(0.0));
5102 __ Fmov(h1, RawbitsToFloat16(0xffff));
5103 __ Fmov(h2, Float16(-1.0));
5104 __ Fmov(h3, Float16(1.0));
5105 __ Facge(h4, h0, h0);
5106 __ Facge(h5, h1, h0);
5107 __ Facge(h6, h2, h0);
5108 __ Facge(h7, h3, h0);
5129 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
5130 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
5131 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
5132 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
5134 __ Facgt(v4.V8H(), v0.V8H(), v0.V8H());
5135 __ Facgt(v5.V8H(), v1.V8H(), v0.V8H());
5136 __ Facgt(v6.V8H(), v2.V8H(), v0.V8H());
5137 __ Facgt(v7.V8H(), v3.V8H(), v0.V8H());
5138 __ Facgt(v8.V4H(), v0.V4H(), v0.V4H());
5139 __ Facgt(v9.V4H(), v1.V4H(), v0.V4H());
5140 __ Facgt(v10.V4H(), v2.V4H(), v0.V4H());
5141 __ Facgt(v11.V4H(), v3.V4H(), v0.V4H());
5167 __ Fmov(h0, Float16(0.0));
5168 __ Fmov(h1, RawbitsToFloat16(0xffff));
5169 __ Fmov(h2, Float16(-1.0));
5170 __ Fmov(h3, Float16(1.0));
5171 __ Facgt(h4, h0, h0);
5172 __ Facgt(h5, h1, h0);
5173 __ Facgt(h6, h2, h0);
5174 __ Facgt(h7, h3, h0);
5193 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5194 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5195 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5196 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5198 __ Fcmeq(s16, s0, 0.0);
5199 __ Fcmeq(s17, s1, 0.0);
5200 __ Fcmeq(s18, s2, 0.0);
5201 __ Fcmeq(d19, d0, 0.0);
5202 __ Fcmeq(d20, d1, 0.0);
5203 __ Fcmeq(d21, d2, 0.0);
5204 __ Fcmeq(v22.V2S(), v0.V2S(), 0.0);
5205 __ Fcmeq(v23.V4S(), v1.V4S(), 0.0);
5206 __ Fcmeq(v24.V2D(), v1.V2D(), 0.0);
5207 __ Fcmeq(v25.V2D(), v2.V2D(), 0.0);
5231 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5232 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5233 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5234 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5236 __ Fcmge(s16, s0, 0.0);
5237 __ Fcmge(s17, s1, 0.0);
5238 __ Fcmge(s18, s2, 0.0);
5239 __ Fcmge(d19, d0, 0.0);
5240 __ Fcmge(d20, d1, 0.0);
5241 __ Fcmge(d21, d3, 0.0);
5242 __ Fcmge(v22.V2S(), v0.V2S(), 0.0);
5243 __ Fcmge(v23.V4S(), v1.V4S(), 0.0);
5244 __ Fcmge(v24.V2D(), v1.V2D(), 0.0);
5245 __ Fcmge(v25.V2D(), v3.V2D(), 0.0);
5270 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5271 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5272 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5273 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5275 __ Fcmgt(s16, s0, 0.0);
5276 __ Fcmgt(s17, s1, 0.0);
5277 __ Fcmgt(s18, s2, 0.0);
5278 __ Fcmgt(d19, d0, 0.0);
5279 __ Fcmgt(d20, d1, 0.0);
5280 __ Fcmgt(d21, d3, 0.0);
5281 __ Fcmgt(v22.V2S(), v0.V2S(), 0.0);
5282 __ Fcmgt(v23.V4S(), v1.V4S(), 0.0);
5283 __ Fcmgt(v24.V2D(), v1.V2D(), 0.0);
5284 __ Fcmgt(v25.V2D(), v3.V2D(), 0.0);
5308 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5309 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5310 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5311 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5313 __ Fcmle(s16, s0, 0.0);
5314 __ Fcmle(s17, s1, 0.0);
5315 __ Fcmle(s18, s3, 0.0);
5316 __ Fcmle(d19, d0, 0.0);
5317 __ Fcmle(d20, d1, 0.0);
5318 __ Fcmle(d21, d2, 0.0);
5319 __ Fcmle(v22.V2S(), v0.V2S(), 0.0);
5320 __ Fcmle(v23.V4S(), v1.V4S(), 0.0);
5321 __ Fcmle(v24.V2D(), v1.V2D(), 0.0);
5322 __ Fcmle(v25.V2D(), v2.V2D(), 0.0);
5347 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5348 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5349 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5350 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5352 __ Fcmlt(s16, s0, 0.0);
5353 __ Fcmlt(s17, s1, 0.0);
5354 __ Fcmlt(s18, s3, 0.0);
5355 __ Fcmlt(d19, d0, 0.0);
5356 __ Fcmlt(d20, d1, 0.0);
5357 __ Fcmlt(d21, d2, 0.0);
5358 __ Fcmlt(v22.V2S(), v0.V2S(), 0.0);
5359 __ Fcmlt(v23.V4S(), v1.V4S(), 0.0);
5360 __ Fcmlt(v24.V2D(), v1.V2D(), 0.0);
5361 __ Fcmlt(v25.V2D(), v2.V2D(), 0.0);
5385 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5386 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5388 __ Cmeq(v16.V8B(), v1.V8B(), 0);
5389 __ Cmeq(v17.V16B(), v1.V16B(), 0);
5390 __ Cmeq(v18.V4H(), v1.V4H(), 0);
5391 __ Cmeq(v19.V8H(), v1.V8H(), 0);
5392 __ Cmeq(v20.V2S(), v0.V2S(), 0);
5393 __ Cmeq(v21.V4S(), v0.V4S(), 0);
5394 __ Cmeq(d22, d0, 0);
5395 __ Cmeq(d23, d1, 0);
5396 __ Cmeq(v24.V2D(), v0.V2D(), 0);
5420 __ Movi(v0.V2D(), 0xff01000200030004, 0x0000000000000000);
5421 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5423 __ Cmge(v16.V8B(), v1.V8B(), 0);
5424 __ Cmge(v17.V16B(), v1.V16B(), 0);
5425 __ Cmge(v18.V4H(), v1.V4H(), 0);
5426 __ Cmge(v19.V8H(), v1.V8H(), 0);
5427 __ Cmge(v20.V2S(), v0.V2S(), 0);
5428 __ Cmge(v21.V4S(), v0.V4S(), 0);
5429 __ Cmge(d22, d0, 0);
5430 __ Cmge(d23, d1, 0);
5431 __ Cmge(v24.V2D(), v0.V2D(), 0);
5455 __ Movi(v0.V2D(), 0x0001000200030004, 0xff00000000000000);
5456 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5458 __ Cmlt(v16.V8B(), v1.V8B(), 0);
5459 __ Cmlt(v17.V16B(), v1.V16B(), 0);
5460 __ Cmlt(v18.V4H(), v1.V4H(), 0);
5461 __ Cmlt(v19.V8H(), v1.V8H(), 0);
5462 __ Cmlt(v20.V2S(), v1.V2S(), 0);
5463 __ Cmlt(v21.V4S(), v1.V4S(), 0);
5464 __ Cmlt(d22, d0, 0);
5465 __ Cmlt(d23, d1, 0);
5466 __ Cmlt(v24.V2D(), v0.V2D(), 0);
5490 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5491 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5493 __ Cmle(v16.V8B(), v1.V8B(), 0);
5494 __ Cmle(v17.V16B(), v1.V16B(), 0);
5495 __ Cmle(v18.V4H(), v1.V4H(), 0);
5496 __ Cmle(v19.V8H(), v1.V8H(), 0);
5497 __ Cmle(v20.V2S(), v1.V2S(), 0);
5498 __ Cmle(v21.V4S(), v1.V4S(), 0);
5499 __ Cmle(d22, d0, 0);
5500 __ Cmle(d23, d1, 0);
5501 __ Cmle(v24.V2D(), v0.V2D(), 0);
5525 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5526 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5528 __ Cmgt(v16.V8B(), v1.V8B(), 0);
5529 __ Cmgt(v17.V16B(), v1.V16B(), 0);
5530 __ Cmgt(v18.V4H(), v1.V4H(), 0);
5531 __ Cmgt(v19.V8H(), v1.V8H(), 0);
5532 __ Cmgt(v20.V2S(), v0.V2S(), 0);
5533 __ Cmgt(v21.V4S(), v0.V4S(), 0);
5534 __ Cmgt(d22, d0, 0);
5535 __ Cmgt(d23, d1, 0);
5536 __ Cmgt(v24.V2D(), v0.V2D(), 0);
5560 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5561 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5562 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5563 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5564 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5566 __ Neg(v16.V8B(), v0.V8B());
5567 __ Neg(v17.V16B(), v0.V16B());
5568 __ Neg(v18.V4H(), v1.V4H());
5569 __ Neg(v19.V8H(), v1.V8H());
5570 __ Neg(v20.V2S(), v2.V2S());
5571 __ Neg(v21.V4S(), v2.V4S());
5572 __ Neg(d22, d3);
5573 __ Neg(v23.V2D(), v3.V2D());
5574 __ Neg(v24.V2D(), v4.V2D());
5598 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5599 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5600 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5601 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5602 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5604 __ Sqneg(v16.V8B(), v0.V8B());
5605 __ Sqneg(v17.V16B(), v0.V16B());
5606 __ Sqneg(v18.V4H(), v1.V4H());
5607 __ Sqneg(v19.V8H(), v1.V8H());
5608 __ Sqneg(v20.V2S(), v2.V2S());
5609 __ Sqneg(v21.V4S(), v2.V4S());
5610 __ Sqneg(v22.V2D(), v3.V2D());
5611 __ Sqneg(v23.V2D(), v4.V2D());
5613 __ Sqneg(b24, b0);
5614 __ Sqneg(h25, h1);
5615 __ Sqneg(s26, s2);
5616 __ Sqneg(d27, d3);
5644 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5645 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5646 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5647 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5648 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5650 __ Abs(v16.V8B(), v0.V8B());
5651 __ Abs(v17.V16B(), v0.V16B());
5652 __ Abs(v18.V4H(), v1.V4H());
5653 __ Abs(v19.V8H(), v1.V8H());
5654 __ Abs(v20.V2S(), v2.V2S());
5655 __ Abs(v21.V4S(), v2.V4S());
5656 __ Abs(d22, d3);
5657 __ Abs(v23.V2D(), v3.V2D());
5658 __ Abs(v24.V2D(), v4.V2D());
5682 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5683 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5684 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5685 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5686 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5688 __ Sqabs(v16.V8B(), v0.V8B());
5689 __ Sqabs(v17.V16B(), v0.V16B());
5690 __ Sqabs(v18.V4H(), v1.V4H());
5691 __ Sqabs(v19.V8H(), v1.V8H());
5692 __ Sqabs(v20.V2S(), v2.V2S());
5693 __ Sqabs(v21.V4S(), v2.V4S());
5694 __ Sqabs(v22.V2D(), v3.V2D());
5695 __ Sqabs(v23.V2D(), v4.V2D());
5697 __ Sqabs(b24, b0);
5698 __ Sqabs(h25, h1);
5699 __ Sqabs(s26, s2);
5700 __ Sqabs(d27, d3);
5727 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5728 __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f0180ff);
5730 __ Movi(v2.V2D(), 0x80008001ffff0000, 0xffff000000017ffd);
5731 __ Movi(v3.V2D(), 0xffff000080008001, 0x00017fffffff0001);
5733 __ Movi(v4.V2D(), 0x80000000fffffffe, 0xfffffff17ffffffe);
5734 __ Movi(v5.V2D(), 0xffffffff80000000, 0x7fffffff00000002);
5736 __ Movi(v6.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5737 __ Movi(v7.V2D(), 0x8000000000000000, 0x8000000000000002);
5739 __ Mov(v16.V2D(), v0.V2D());
5740 __ Mov(v17.V2D(), v0.V2D());
5741 __ Mov(v18.V2D(), v2.V2D());
5742 __ Mov(v19.V2D(), v2.V2D());
5743 __ Mov(v20.V2D(), v4.V2D());
5744 __ Mov(v21.V2D(), v4.V2D());
5745 __ Mov(v22.V2D(), v6.V2D());
5747 __ Mov(v23.V2D(), v0.V2D());
5748 __ Mov(v24.V2D(), v2.V2D());
5749 __ Mov(v25.V2D(), v4.V2D());
5750 __ Mov(v26.V2D(), v6.V2D());
5752 __ Suqadd(v16.V8B(), v1.V8B());
5753 __ Suqadd(v17.V16B(), v1.V16B());
5754 __ Suqadd(v18.V4H(), v3.V4H());
5755 __ Suqadd(v19.V8H(), v3.V8H());
5756 __ Suqadd(v20.V2S(), v5.V2S());
5757 __ Suqadd(v21.V4S(), v5.V4S());
5758 __ Suqadd(v22.V2D(), v7.V2D());
5760 __ Suqadd(b23, b1);
5761 __ Suqadd(h24, h3);
5762 __ Suqadd(s25, s5);
5763 __ Suqadd(d26, d7);
5789 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f7ffe);
5790 __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f018002);
5792 __ Movi(v2.V2D(), 0x80008001fffe0000, 0xffff000000017ffd);
5793 __ Movi(v3.V2D(), 0xffff000000028001, 0x00017fffffff0001);
5795 __ Movi(v4.V2D(), 0x80000000fffffffe, 0x00000001fffffffe);
5796 __ Movi(v5.V2D(), 0xffffffff80000000, 0xfffffffe00000002);
5798 __ Movi(v6.V2D(), 0x8000000000000002, 0x7fffffffffffffff);
5799 __ Movi(v7.V2D(), 0x7fffffffffffffff, 0x8000000000000000);
5801 __ Mov(v16.V2D(), v0.V2D());
5802 __ Mov(v17.V2D(), v0.V2D());
5803 __ Mov(v18.V2D(), v2.V2D());
5804 __ Mov(v19.V2D(), v2.V2D());
5805 __ Mov(v20.V2D(), v4.V2D());
5806 __ Mov(v21.V2D(), v4.V2D());
5807 __ Mov(v22.V2D(), v6.V2D());
5809 __ Mov(v23.V2D(), v0.V2D());
5810 __ Mov(v24.V2D(), v2.V2D());
5811 __ Mov(v25.V2D(), v4.V2D());
5812 __ Mov(v26.V2D(), v6.V2D());
5814 __ Usqadd(v16.V8B(), v1.V8B());
5815 __ Usqadd(v17.V16B(), v1.V16B());
5816 __ Usqadd(v18.V4H(), v3.V4H());
5817 __ Usqadd(v19.V8H(), v3.V8H());
5818 __ Usqadd(v20.V2S(), v5.V2S());
5819 __ Usqadd(v21.V4S(), v5.V4S());
5820 __ Usqadd(v22.V2D(), v7.V2D());
5822 __ Usqadd(b23, b1);
5823 __ Usqadd(h24, h3);
5824 __ Usqadd(s25, s5);
5825 __ Usqadd(d26, d7);
5851 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5852 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5853 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5854 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5855 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5857 __ Xtn(v16.V8B(), v0.V8H());
5858 __ Xtn2(v16.V16B(), v1.V8H());
5859 __ Xtn(v17.V4H(), v1.V4S());
5860 __ Xtn2(v17.V8H(), v2.V4S());
5861 __ Xtn(v18.V2S(), v3.V2D());
5862 __ Xtn2(v18.V4S(), v4.V2D());
5880 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5881 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5882 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5883 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5884 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5886 __ Sqxtn(v16.V8B(), v0.V8H());
5887 __ Sqxtn2(v16.V16B(), v1.V8H());
5888 __ Sqxtn(v17.V4H(), v1.V4S());
5889 __ Sqxtn2(v17.V8H(), v2.V4S());
5890 __ Sqxtn(v18.V2S(), v3.V2D());
5891 __ Sqxtn2(v18.V4S(), v4.V2D());
5892 __ Sqxtn(b19, h0);
5893 __ Sqxtn(h20, s0);
5894 __ Sqxtn(s21, d0);
5915 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5916 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5917 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5918 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5919 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5921 __ Uqxtn(v16.V8B(), v0.V8H());
5922 __ Uqxtn2(v16.V16B(), v1.V8H());
5923 __ Uqxtn(v17.V4H(), v1.V4S());
5924 __ Uqxtn2(v17.V8H(), v2.V4S());
5925 __ Uqxtn(v18.V2S(), v3.V2D());
5926 __ Uqxtn2(v18.V4S(), v4.V2D());
5927 __ Uqxtn(b19, h0);
5928 __ Uqxtn(h20, s0);
5929 __ Uqxtn(s21, d0);
5950 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5951 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5952 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5953 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5954 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5956 __ Sqxtun(v16.V8B(), v0.V8H());
5957 __ Sqxtun2(v16.V16B(), v1.V8H());
5958 __ Sqxtun(v17.V4H(), v1.V4S());
5959 __ Sqxtun2(v17.V8H(), v2.V4S());
5960 __ Sqxtun(v18.V2S(), v3.V2D());
5961 __ Sqxtun2(v18.V4S(), v4.V2D());
5962 __ Sqxtun(b19, h0);
5963 __ Sqxtun(h20, s0);
5964 __ Sqxtun(s21, d0);
5984 __ Movi(v0.V2D(), 0x5555555555555555, 0x5555555555555555);
5985 __ Movi(v1.V2D(), 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
5986 __ Movi(v2.V2D(), 0x5555555555555555, 0x5555555555555555);
5987 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
5988 __ Movi(v4.V2D(), 0x5555555555555555, 0x5555555555555555);
5989 __ Movi(v5.V2D(), 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
5990 __ Movi(v6.V2D(), 0x5555555555555555, 0x5555555555555555);
5991 __ Movi(v7.V2D(), 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
5993 __ Xtn(v0.V2S(), v0.V2D());
5994 __ Xtn2(v1.V4S(), v1.V2D());
5995 __ Sqxtn(v2.V2S(), v2.V2D());
5996 __ Sqxtn2(v3.V4S(), v3.V2D());
5997 __ Uqxtn(v4.V2S(), v4.V2D());
5998 __ Uqxtn2(v5.V4S(), v5.V2D());
5999 __ Sqxtun(v6.V2S(), v6.V2D());
6000 __ Sqxtun2(v7.V4S(), v7.V2D());
6022 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6023 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6025 __ And(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6026 __ And(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6027 __ And(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6028 __ And(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6045 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6046 __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
6048 __ Bic(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6049 __ Bic(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6050 __ Bic(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6051 __ Bic(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6068 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6069 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6071 __ Orr(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6072 __ Orr(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6073 __ Orr(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6074 __ Orr(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6091 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6093 __ Mov(v16.V16B(), v0.V16B());
6094 __ Mov(v17.V8H(), v0.V8H());
6095 __ Mov(v18.V4S(), v0.V4S());
6096 __ Mov(v19.V2D(), v0.V2D());
6098 __ Mov(v24.V8B(), v0.V8B());
6099 __ Mov(v25.V4H(), v0.V4H());
6100 __ Mov(v26.V2S(), v0.V2S());
6122 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6123 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6125 __ Orn(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6126 __ Orn(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6127 __ Orn(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6128 __ Orn(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6145 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6146 __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
6148 __ Eor(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6149 __ Eor(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6150 __ Eor(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6151 __ Eor(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6168 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6169 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6170 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6172 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6173 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6174 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6176 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6177 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6178 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6180 __ Bif(v16.V16B(), v0.V16B(), v1.V16B());
6181 __ Bif(v17.V16B(), v2.V16B(), v3.V16B());
6182 __ Bif(v18.V8B(), v4.V8B(), v5.V8B());
6199 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6200 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6201 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6203 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6204 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6205 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6207 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6208 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6209 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6211 __ Bit(v16.V16B(), v0.V16B(), v1.V16B());
6212 __ Bit(v17.V16B(), v2.V16B(), v3.V16B());
6213 __ Bit(v18.V8B(), v4.V8B(), v5.V8B());
6230 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6231 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6232 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6234 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6235 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6236 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6238 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6239 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6240 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6242 __ Bsl(v16.V16B(), v0.V16B(), v1.V16B());
6243 __ Bsl(v17.V16B(), v2.V16B(), v3.V16B());
6244 __ Bsl(v18.V8B(), v4.V8B(), v5.V8B());
6262 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6263 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6265 __ Smax(v16.V8B(), v0.V8B(), v1.V8B());
6266 __ Smax(v18.V4H(), v0.V4H(), v1.V4H());
6267 __ Smax(v20.V2S(), v0.V2S(), v1.V2S());
6269 __ Smax(v17.V16B(), v0.V16B(), v1.V16B());
6270 __ Smax(v19.V8H(), v0.V8H(), v1.V8H());
6271 __ Smax(v21.V4S(), v0.V4S(), v1.V4S());
6292 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6293 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6295 __ Smaxp(v16.V8B(), v0.V8B(), v1.V8B());
6296 __ Smaxp(v18.V4H(), v0.V4H(), v1.V4H());
6297 __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
6299 __ Smaxp(v17.V16B(), v0.V16B(), v1.V16B());
6300 __ Smaxp(v19.V8H(), v0.V8H(), v1.V8H());
6301 __ Smaxp(v21.V4S(), v0.V4S(), v1.V4S());
6322 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6323 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6324 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6326 __ Addp(d16, v0.V2D());
6327 __ Addp(d17, v1.V2D());
6328 __ Addp(d18, v2.V2D());
6346 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6347 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6348 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6350 __ Addv(b16, v0.V8B());
6351 __ Addv(b17, v0.V16B());
6352 __ Addv(h18, v1.V4H());
6353 __ Addv(h19, v1.V8H());
6354 __ Addv(s20, v2.V4S());
6375 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6376 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6377 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6379 __ Saddlv(h16, v0.V8B());
6380 __ Saddlv(h17, v0.V16B());
6381 __ Saddlv(s18, v1.V4H());
6382 __ Saddlv(s19, v1.V8H());
6383 __ Saddlv(d20, v2.V4S());
6404 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6405 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6406 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6408 __ Uaddlv(h16, v0.V8B());
6409 __ Uaddlv(h17, v0.V16B());
6410 __ Uaddlv(s18, v1.V4H());
6411 __ Uaddlv(s19, v1.V8H());
6412 __ Uaddlv(d20, v2.V4S());
6433 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6434 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6435 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6437 __ Smaxv(b16, v0.V8B());
6438 __ Smaxv(b17, v0.V16B());
6439 __ Smaxv(h18, v1.V4H());
6440 __ Smaxv(h19, v1.V8H());
6441 __ Smaxv(s20, v2.V4S());
6462 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6463 __ Movi(v1.V2D(), 0xfffa5555aaaaaaaa, 0x00000000ffaa55ff);
6464 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6466 __ Sminv(b16, v0.V8B());
6467 __ Sminv(b17, v0.V16B());
6468 __ Sminv(h18, v1.V4H());
6469 __ Sminv(h19, v1.V8H());
6470 __ Sminv(s20, v2.V4S());
6490 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6491 __ Movi(v1.V2D(), 0x55aa5555aaaaffab, 0x00000000ffaa55ff);
6492 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6494 __ Umaxv(b16, v0.V8B());
6495 __ Umaxv(b17, v0.V16B());
6496 __ Umaxv(h18, v1.V4H());
6497 __ Umaxv(h19, v1.V8H());
6498 __ Umaxv(s20, v2.V4S());
6519 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x02112233aabbfc01);
6520 __ Movi(v1.V2D(), 0xfffa5555aaaa0000, 0x00010003ffaa55ff);
6521 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6523 __ Uminv(b16, v0.V8B());
6524 __ Uminv(b17, v0.V16B());
6525 __ Uminv(h18, v1.V4H());
6526 __ Uminv(h19, v1.V8H());
6527 __ Uminv(s20, v2.V4S());
6548 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6549 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6551 __ Smin(v16.V8B(), v0.V8B(), v1.V8B());
6552 __ Smin(v18.V4H(), v0.V4H(), v1.V4H());
6553 __ Smin(v20.V2S(), v0.V2S(), v1.V2S());
6555 __ Smin(v17.V16B(), v0.V16B(), v1.V16B());
6556 __ Smin(v19.V8H(), v0.V8H(), v1.V8H());
6557 __ Smin(v21.V4S(), v0.V4S(), v1.V4S());
6578 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6579 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6581 __ Umax(v16.V8B(), v0.V8B(), v1.V8B());
6582 __ Umax(v18.V4H(), v0.V4H(), v1.V4H());
6583 __ Umax(v20.V2S(), v0.V2S(), v1.V2S());
6585 __ Umax(v17.V16B(), v0.V16B(), v1.V16B());
6586 __ Umax(v19.V8H(), v0.V8H(), v1.V8H());
6587 __ Umax(v21.V4S(), v0.V4S(), v1.V4S());
6608 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6609 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6611 __ Umin(v16.V8B(), v0.V8B(), v1.V8B());
6612 __ Umin(v18.V4H(), v0.V4H(), v1.V4H());
6613 __ Umin(v20.V2S(), v0.V2S(), v1.V2S());
6615 __ Umin(v17.V16B(), v0.V16B(), v1.V16B());
6616 __ Umin(v19.V8H(), v0.V8H(), v1.V8H());
6617 __ Umin(v21.V4S(), v0.V4S(), v1.V4S());
6639 __ Movi(v0.V2D(), 0x0, 0x4014000000000000);
6641 __ Movi(v1.V2D(), 0x4014000000000000, 0x0);
6643 __ Movi(v2.V2D(), 0x4024000000000000, 0x4024000000000000);
6645 __ Movi(v3.V2D(), 0x40A0000040A00000, 0x40A0000040A00000);
6647 __ Movi(v4.V2D(), 0x40A0000040A00000, 0x0);
6649 __ Movi(v5.V2D(), 0x0, 0x489E7AE0467A0000);
6652 __ Fcadd(v31.V2D(), v2.V2D(), v1.V2D(), 90);
6653 __ Fcadd(v31.V2D(), v31.V2D(), v0.V2D(), 270);
6656 __ Fcadd(v30.V2D(), v2.V2D(), v1.V2D(), 270);
6657 __ Fcadd(v30.V2D(), v30.V2D(), v0.V2D(), 90);
6660 __ Fcadd(v29.V2S(), v4.V2S(), v5.V2S(), 90);
6661 __ Fcadd(v28.V2S(), v4.V2S(), v5.V2S(), 270);
6664 __ Fcadd(v27.V4S(), v3.V4S(), v4.V4S(), 90);
6665 __ Fcadd(v26.V4S(), v3.V4S(), v4.V4S(), 270);
6686 __ Movi(v1.V2D(), 0x0, 0x40A0000040400000); // (5i, 3) (f)
6687 __ Movi(v2.V2D(), 0x0, 0x4040000040A00000); // (3i, 5) (f)
6689 __ Movi(v3.V2D(), 0x0, 0x4000000040400000); // (2i, 3) (f)
6690 __ Movi(v4.V2D(), 0x0, 0x40E000003F800000); // (7i, 1) (f)
6692 __ Movi(v5.V2D(), 0x0, 0x4000000040400000); // (2i, 3) (f)
6693 __ Movi(v6.V2D(), 0x0, 0x408000003F800000); // (4i, 1) (f)
6696 __ Movi(v7.V2D(), 0x3FC0000040200000, 0x41FC000044800000);
6698 __ Movi(v8.V2D(), 0x4500000043CE6000, 0x4563D00000000000);
6700 __ Movi(v9.V2D(), 0x409F400000000000, 0x411B774000000000);
6702 __ Movi(v10.V2D(), 0x40DD4C0000000000, 0x4093880000000000);
6705 __ Movi(v24.V2D(), 0x0, 0x0);
6706 __ Movi(v25.V2D(), 0x0, 0x0);
6707 __ Movi(v26.V2D(), 0x0, 0x0);
6708 __ Movi(v27.V2D(), 0x0, 0x0);
6709 __ Movi(v28.V2D(), 0x0, 0x0);
6710 __ Movi(v29.V2D(), 0x0, 0x0);
6711 __ Movi(v30.V2D(), 0x0, 0x0);
6712 __ Movi(v31.V2D(), 0x0, 0x0);
6715 __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 90);
6716 __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 0);
6718 __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 0);
6719 __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 90);
6721 __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 90);
6722 __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 0);
6724 __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 0);
6725 __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 90);
6728 __ Fcmla(v27.V2S(), v1.V2S(), v2.V2S(), 0);
6729 __ Fcmla(v26.V2S(), v2.V2S(), v1.V2S(), 0);
6731 __ Fcmla(v25.V4S(), v7.V4S(), v8.V4S(), 270);
6732 __ Fcmla(v24.V4S(), v7.V4S(), v8.V4S(), 180);
6761 __ Movi(v1.V2D(), 0x40A0000040400000, 0x40A0000040400000);
6763 __ Movi(v2.V2D(), 0x4040000040A00000, 0x4040000040A00000);
6765 __ Movi(v3.V2D(), 0x40E000003F800000, 0x40A0000040400000);
6767 __ Movi(v4.V2D(), 0x408000003F800000, 0x4040000040A00000);
6769 __ Movi(v5.V2D(), 0x408000003F800000, 0x40E000003F800000);
6771 __ Movi(v6.V2D(), 0x4000000040400000, 0x0);
6774 __ Movi(v22.V2D(), 0x0, 0x0);
6775 __ Movi(v23.V2D(), 0x0, 0x0);
6776 __ Movi(v24.V2D(), 0x0, 0x0);
6777 __ Movi(v25.V2D(), 0x0, 0x0);
6778 __ Movi(v26.V2D(), 0x0, 0x0);
6779 __ Movi(v27.V2D(), 0x0, 0x0);
6780 __ Movi(v28.V2D(), 0x0, 0x0);
6781 __ Movi(v29.V2D(), 0x0, 0x0);
6782 __ Movi(v30.V2D(), 0x0, 0x0);
6783 __ Movi(v31.V2D(), 0x0, 0x0);
6786 __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 90);
6787 __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 0);
6788 __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 90);
6789 __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 0);
6792 __ Fcmla(v29.V4S(), v3.V4S(), v4.S(), 1, 0);
6793 __ Fcmla(v28.V4S(), v3.V4S(), v4.S(), 1, 90);
6794 __ Fcmla(v27.V4S(), v3.V4S(), v4.S(), 1, 180);
6795 __ Fcmla(v26.V4S(), v3.V4S(), v4.S(), 1, 270);
6796 __ Fcmla(v25.V4S(), v3.V4S(), v4.S(), 0, 270);
6797 __ Fcmla(v24.V4S(), v3.V4S(), v4.S(), 0, 180);
6798 __ Fcmla(v23.V4S(), v3.V4S(), v4.S(), 0, 90);
6799 __ Fcmla(v22.V4S(), v3.V4S(), v4.S(), 0, 0);
6834 __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6836 __ Mvn(v16.V16B(), v0.V16B());
6837 __ Mvn(v17.V8H(), v0.V8H());
6838 __ Mvn(v18.V4S(), v0.V4S());
6839 __ Mvn(v19.V2D(), v0.V2D());
6841 __ Mvn(v24.V8B(), v0.V8B());
6842 __ Mvn(v25.V4H(), v0.V4H());
6843 __ Mvn(v26.V2S(), v0.V2S());
6867 __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6868 __ Movi(v1.V2D(), 0, 0x00ffff0000ffff00);
6870 __ Not(v16.V16B(), v0.V16B());
6871 __ Not(v17.V8B(), v1.V8B());
6888 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6889 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6891 __ Cls(v16.V8B(), v1.V8B());
6892 __ Cls(v17.V16B(), v1.V16B());
6893 __ Cls(v18.V4H(), v1.V4H());
6894 __ Cls(v19.V8H(), v1.V8H());
6895 __ Cls(v20.V2S(), v1.V2S());
6896 __ Cls(v21.V4S(), v1.V4S());
6898 __ Clz(v22.V8B(), v0.V8B());
6899 __ Clz(v23.V16B(), v0.V16B());
6900 __ Clz(v24.V4H(), v0.V4H());
6901 __ Clz(v25.V8H(), v0.V8H());
6902 __ Clz(v26.V2S(), v0.V2S());
6903 __ Clz(v27.V4S(), v0.V4S());
6905 __ Cnt(v28.V8B(), v0.V8B());
6906 __ Cnt(v29.V16B(), v1.V16B());
6937 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6938 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6940 __ Rev16(v16.V8B(), v0.V8B());
6941 __ Rev16(v17.V16B(), v0.V16B());
6943 __ Rev32(v18.V8B(), v0.V8B());
6944 __ Rev32(v19.V16B(), v0.V16B());
6945 __ Rev32(v20.V4H(), v0.V4H());
6946 __ Rev32(v21.V8H(), v0.V8H());
6948 __ Rev64(v22.V8B(), v0.V8B());
6949 __ Rev64(v23.V16B(), v0.V16B());
6950 __ Rev64(v24.V4H(), v0.V4H());
6951 __ Rev64(v25.V8H(), v0.V8H());
6952 __ Rev64(v26.V2S(), v0.V2S());
6953 __ Rev64(v27.V4S(), v0.V4S());
6955 __ Rbit(v28.V8B(), v1.V8B());
6956 __ Rbit(v29.V16B(), v1.V16B());
6989 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6990 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6992 __ Mov(v16.V2D(), v0.V2D());
6993 __ Mov(v17.V2D(), v0.V2D());
6994 __ Mov(v18.V2D(), v0.V2D());
6995 __ Mov(v19.V2D(), v0.V2D());
6996 __ Mov(v20.V2D(), v0.V2D());
6997 __ Mov(v21.V2D(), v0.V2D());
6998 __ Mov(v22.V2D(), v0.V2D());
6999 __ Mov(v23.V2D(), v0.V2D());
7001 __ Sli(v16.V8B(), v1.V8B(), 4);
7002 __ Sli(v17.V16B(), v1.V16B(), 7);
7003 __ Sli(v18.V4H(), v1.V4H(), 8);
7004 __ Sli(v19.V8H(), v1.V8H(), 15);
7005 __ Sli(v20.V2S(), v1.V2S(), 0);
7006 __ Sli(v21.V4S(), v1.V4S(), 31);
7007 __ Sli(v22.V2D(), v1.V2D(), 48);
7009 __ Sli(d23, d1, 48);
7034 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
7035 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7037 __ Mov(v16.V2D(), v0.V2D());
7038 __ Mov(v17.V2D(), v0.V2D());
7039 __ Mov(v18.V2D(), v0.V2D());
7040 __ Mov(v19.V2D(), v0.V2D());
7041 __ Mov(v20.V2D(), v0.V2D());
7042 __ Mov(v21.V2D(), v0.V2D());
7043 __ Mov(v22.V2D(), v0.V2D());
7044 __ Mov(v23.V2D(), v0.V2D());
7046 __ Sri(v16.V8B(), v1.V8B(), 4);
7047 __ Sri(v17.V16B(), v1.V16B(), 7);
7048 __ Sri(v18.V4H(), v1.V4H(), 8);
7049 __ Sri(v19.V8H(), v1.V8H(), 15);
7050 __ Sri(v20.V2S(), v1.V2S(), 1);
7051 __ Sri(v21.V4S(), v1.V4S(), 31);
7052 __ Sri(v22.V2D(), v1.V2D(), 48);
7054 __ Sri(d23, d1, 48);
7079 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7080 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7081 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7082 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7083 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7085 __ Shrn(v16.V8B(), v0.V8H(), 8);
7086 __ Shrn2(v16.V16B(), v1.V8H(), 1);
7087 __ Shrn(v17.V4H(), v1.V4S(), 16);
7088 __ Shrn2(v17.V8H(), v2.V4S(), 1);
7089 __ Shrn(v18.V2S(), v3.V2D(), 32);
7090 __ Shrn2(v18.V4S(), v3.V2D(), 1);
7108 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7109 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7110 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7111 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7112 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7114 __ Rshrn(v16.V8B(), v0.V8H(), 8);
7115 __ Rshrn2(v16.V16B(), v1.V8H(), 1);
7116 __ Rshrn(v17.V4H(), v1.V4S(), 16);
7117 __ Rshrn2(v17.V8H(), v2.V4S(), 1);
7118 __ Rshrn(v18.V2S(), v3.V2D(), 32);
7119 __ Rshrn2(v18.V4S(), v3.V2D(), 1);
7137 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7138 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7139 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7140 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7141 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7143 __ Uqshrn(v16.V8B(), v0.V8H(), 8);
7144 __ Uqshrn2(v16.V16B(), v1.V8H(), 1);
7145 __ Uqshrn(v17.V4H(), v1.V4S(), 16);
7146 __ Uqshrn2(v17.V8H(), v2.V4S(), 1);
7147 __ Uqshrn(v18.V2S(), v3.V2D(), 32);
7148 __ Uqshrn2(v18.V4S(), v3.V2D(), 1);
7150 __ Uqshrn(b19, h0, 8);
7151 __ Uqshrn(h20, s1, 16);
7152 __ Uqshrn(s21, d3, 32);
7173 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7174 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7175 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7176 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7177 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7179 __ Uqrshrn(v16.V8B(), v0.V8H(), 8);
7180 __ Uqrshrn2(v16.V16B(), v1.V8H(), 1);
7181 __ Uqrshrn(v17.V4H(), v1.V4S(), 16);
7182 __ Uqrshrn2(v17.V8H(), v2.V4S(), 1);
7183 __ Uqrshrn(v18.V2S(), v3.V2D(), 32);
7184 __ Uqrshrn2(v18.V4S(), v3.V2D(), 1);
7186 __ Uqrshrn(b19, h0, 8);
7187 __ Uqrshrn(h20, s1, 16);
7188 __ Uqrshrn(s21, d3, 32);
7209 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7210 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7211 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7212 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7213 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7215 __ Sqshrn(v16.V8B(), v0.V8H(), 8);
7216 __ Sqshrn2(v16.V16B(), v1.V8H(), 1);
7217 __ Sqshrn(v17.V4H(), v1.V4S(), 16);
7218 __ Sqshrn2(v17.V8H(), v2.V4S(), 1);
7219 __ Sqshrn(v18.V2S(), v3.V2D(), 32);
7220 __ Sqshrn2(v18.V4S(), v3.V2D(), 1);
7222 __ Sqshrn(b19, h0, 8);
7223 __ Sqshrn(h20, s1, 16);
7224 __ Sqshrn(s21, d3, 32);
7245 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7246 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7247 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7248 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7249 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7251 __ Sqrshrn(v16.V8B(), v0.V8H(), 8);
7252 __ Sqrshrn2(v16.V16B(), v1.V8H(), 1);
7253 __ Sqrshrn(v17.V4H(), v1.V4S(), 16);
7254 __ Sqrshrn2(v17.V8H(), v2.V4S(), 1);
7255 __ Sqrshrn(v18.V2S(), v3.V2D(), 32);
7256 __ Sqrshrn2(v18.V4S(), v3.V2D(), 1);
7258 __ Sqrshrn(b19, h0, 8);
7259 __ Sqrshrn(h20, s1, 16);
7260 __ Sqrshrn(s21, d3, 32);
7281 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7282 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7283 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7284 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7285 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7287 __ Sqshrun(v16.V8B(), v0.V8H(), 8);
7288 __ Sqshrun2(v16.V16B(), v1.V8H(), 1);
7289 __ Sqshrun(v17.V4H(), v1.V4S(), 16);
7290 __ Sqshrun2(v17.V8H(), v2.V4S(), 1);
7291 __ Sqshrun(v18.V2S(), v3.V2D(), 32);
7292 __ Sqshrun2(v18.V4S(), v3.V2D(), 1);
7294 __ Sqshrun(b19, h0, 8);
7295 __ Sqshrun(h20, s1, 16);
7296 __ Sqshrun(s21, d3, 32);
7317 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7318 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7319 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7320 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7321 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7323 __ Sqrshrun(v16.V8B(), v0.V8H(), 8);
7324 __ Sqrshrun2(v16.V16B(), v1.V8H(), 1);
7325 __ Sqrshrun(v17.V4H(), v1.V4S(), 16);
7326 __ Sqrshrun2(v17.V8H(), v2.V4S(), 1);
7327 __ Sqrshrun(v18.V2S(), v3.V2D(), 32);
7328 __ Sqrshrun2(v18.V4S(), v3.V2D(), 1);
7330 __ Sqrshrun(b19, h0, 8);
7331 __ Sqrshrun(h20, s1, 16);
7332 __ Sqrshrun(s21, d3, 32);
7352 __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7353 __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7354 __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7355 __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7356 __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7357 __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7358 __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7359 __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7360 __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7361 __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7362 __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7363 __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7365 __ Bic(v16.V4H(), 0x00, 0);
7366 __ Bic(v17.V4H(), 0xff, 8);
7367 __ Bic(v18.V8H(), 0x00, 0);
7368 __ Bic(v19.V8H(), 0xff, 8);
7370 __ Bic(v20.V2S(), 0x00, 0);
7371 __ Bic(v21.V2S(), 0xff, 8);
7372 __ Bic(v22.V2S(), 0x00, 16);
7373 __ Bic(v23.V2S(), 0xff, 24);
7375 __ Bic(v24.V4S(), 0xff, 0);
7376 __ Bic(v25.V4S(), 0x00, 8);
7377 __ Bic(v26.V4S(), 0xff, 16);
7378 __ Bic(v27.V4S(), 0x00, 24);
7408 __ Movi(v0.V4H(), 0xabab);
7409 __ Movi(v1.V4H(), 0xab00);
7410 __ Movi(v2.V4H(), 0xabff);
7411 __ Movi(v3.V8H(), 0x00ab);
7412 __ Movi(v4.V8H(), 0xffab);
7413 __ Movi(v5.V8H(), 0xabcd);
7435 __ Movi(v0.V2S(), 0x000000ab);
7436 __ Movi(v1.V2S(), 0x0000ab00);
7437 __ Movi(v2.V4S(), 0x00ab0000);
7438 __ Movi(v3.V4S(), 0xab000000);
7440 __ Movi(v4.V2S(), 0xffffffab);
7441 __ Movi(v5.V2S(), 0xffffabff);
7442 __ Movi(v6.V4S(), 0xffabffff);
7443 __ Movi(v7.V4S(), 0xabffffff);
7445 __ Movi(v16.V2S(), 0x0000abff);
7446 __ Movi(v17.V2S(), 0x00abffff);
7447 __ Movi(v18.V4S(), 0xffab0000);
7448 __ Movi(v19.V4S(), 0xffffab00);
7450 __ Movi(v20.V4S(), 0xabababab);
7451 __ Movi(v21.V4S(), 0xabcdabcd);
7452 __ Movi(v22.V4S(), 0xabcdef01);
7453 __ Movi(v23.V4S(), 0x00ffff00);
7488 __ Movi(v0.V1D(), 0x00ffff0000ffffff);
7489 __ Movi(v1.V2D(), 0xabababababababab);
7490 __ Movi(v2.V2D(), 0xabcdabcdabcdabcd);
7491 __ Movi(v3.V2D(), 0xabcdef01abcdef01);
7492 __ Movi(v4.V1D(), 0xabcdef0123456789);
7493 __ Movi(v5.V2D(), 0xabcdef0123456789);
7515 __ Movi(v0.V8B(), 0xaa);
7516 __ Movi(v1.V16B(), 0x55);
7518 __ Movi(d2, 0x00ffff0000ffffff);
7519 __ Movi(v3.V2D(), 0x00ffff0000ffffff);
7521 __ Movi(v16.V4H(), 0x00, LSL, 0);
7522 __ Movi(v17.V4H(), 0xff, LSL, 8);
7523 __ Movi(v18.V8H(), 0x00, LSL, 0);
7524 __ Movi(v19.V8H(), 0xff, LSL, 8);
7526 __ Movi(v20.V2S(), 0x00, LSL, 0);
7527 __ Movi(v21.V2S(), 0xff, LSL, 8);
7528 __ Movi(v22.V2S(), 0x00, LSL, 16);
7529 __ Movi(v23.V2S(), 0xff, LSL, 24);
7531 __ Movi(v24.V4S(), 0xff, LSL, 0);
7532 __ Movi(v25.V4S(), 0x00, LSL, 8);
7533 __ Movi(v26.V4S(), 0xff, LSL, 16);
7534 __ Movi(v27.V4S(), 0x00, LSL, 24);
7536 __ Movi(v28.V2S(), 0xaa, MSL, 8);
7537 __ Movi(v29.V2S(), 0x55, MSL, 16);
7538 __ Movi(v30.V4S(), 0xff, MSL, 8);
7539 __ Movi(v31.V4S(), 0x00, MSL, 16);
7580 __ Mvni(v16.V4H(), 0x00, LSL, 0);
7581 __ Mvni(v17.V4H(), 0xff, LSL, 8);
7582 __ Mvni(v18.V8H(), 0x00, LSL, 0);
7583 __ Mvni(v19.V8H(), 0xff, LSL, 8);
7585 __ Mvni(v20.V2S(), 0x00, LSL, 0);
7586 __ Mvni(v21.V2S(), 0xff, LSL, 8);
7587 __ Mvni(v22.V2S(), 0x00, LSL, 16);
7588 __ Mvni(v23.V2S(), 0xff, LSL, 24);
7590 __ Mvni(v24.V4S(), 0xff, LSL, 0);
7591 __ Mvni(v25.V4S(), 0x00, LSL, 8);
7592 __ Mvni(v26.V4S(), 0xff, LSL, 16);
7593 __ Mvni(v27.V4S(), 0x00, LSL, 24);
7595 __ Mvni(v28.V2S(), 0xaa, MSL, 8);
7596 __ Mvni(v29.V2S(), 0x55, MSL, 16);
7597 __ Mvni(v30.V4S(), 0xff, MSL, 8);
7598 __ Mvni(v31.V4S(), 0x00, MSL, 16);
7633 __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7634 __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7635 __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7636 __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7637 __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7638 __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7639 __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7640 __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7641 __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7642 __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7643 __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7644 __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7646 __ Orr(v16.V4H(), 0x00, 0);
7647 __ Orr(v17.V4H(), 0xff, 8);
7648 __ Orr(v18.V8H(), 0x00, 0);
7649 __ Orr(v19.V8H(), 0xff, 8);
7651 __ Orr(v20.V2S(), 0x00, 0);
7652 __ Orr(v21.V2S(), 0xff, 8);
7653 __ Orr(v22.V2S(), 0x00, 16);
7654 __ Orr(v23.V2S(), 0xff, 24);
7656 __ Orr(v24.V4S(), 0xff, 0);
7657 __ Orr(v25.V4S(), 0x00, 8);
7658 __ Orr(v26.V4S(), 0xff, 16);
7659 __ Orr(v27.V4S(), 0x00, 24);
7700 __ Mov(x0, 0);
7703 __ Mov(ref_low64, kHalfValues[i]);
7705 __ Mov(ref_high64, kHalfValues[j]);
7706 __ Ldr(tgt, kHalfValues[j], kHalfValues[i]);
7707 __ Mov(loaded_low64, tgt.V2D(), 0);
7708 __ Mov(loaded_high64, tgt.V2D(), 1);
7709 __ Cmp(loaded_low64, ref_low64);
7710 __ Ccmp(loaded_high64, ref_high64, NoFlag, eq);
7711 __ Cset(x0, ne);
7732 __ Fmov(v0.V2S(), 20.0);
7733 __ Fmov(v1.V4S(), 1024.0);
7735 __ Fmov(v2.V4H(), RawbitsToFloat16(0xC500U));
7736 __ Fmov(v3.V8H(), RawbitsToFloat16(0x4A80U));
7762 __ Fmov(v11.V2S(), kOne);
7763 __ Fmov(v12.V4S(), kPointFive);
7764 __ Fmov(v22.V2D(), kMinusThirteen);
7765 __ Fmov(v13.V2S(), kNonImmFP32);
7766 __ Fmov(v14.V4S(), kNonImmFP32);
7767 __ Fmov(v23.V2D(), kNonImmFP64);
7768 __ Fmov(v1.V2S(), 0.0);
7769 __ Fmov(v2.V4S(), 0.0);
7770 __ Fmov(v3.V2D(), 0.0);
7771 __ Fmov(v4.V2S(), kFP32PositiveInfinity);
7772 __ Fmov(v5.V4S(), kFP32PositiveInfinity);
7773 __ Fmov(v6.V2D(), kFP64PositiveInfinity);
7812 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
7813 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
7815 __ Trn1(v16.V16B(), v0.V16B(), v1.V16B());
7816 __ Trn2(v17.V16B(), v0.V16B(), v1.V16B());
7817 __ Zip1(v18.V16B(), v0.V16B(), v1.V16B());
7818 __ Zip2(v19.V16B(), v0.V16B(), v1.V16B());
7819 __ Uzp1(v20.V16B(), v0.V16B(), v1.V16B());
7820 __ Uzp2(v21.V16B(), v0.V16B(), v1.V16B());
7842 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7843 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7844 __ Movi(v2.V2D(), 0xffeddccbbaae9988, 0x0011223344556677);
7845 __ Movi(v3.V2D(), 0x7766554433221100, 0x8899aabbccddeeff);
7846 __ Movi(v4.V2D(), 0x7766554433221100, 0x0123456789abcdef);
7847 __ Movi(v5.V2D(), 0x0011223344556677, 0x0123456789abcdef);
7849 __ Dup(v16.V16B(), v0.B(), 0);
7850 __ Dup(v17.V8H(), v1.H(), 7);
7851 __ Dup(v18.V4S(), v1.S(), 3);
7852 __ Dup(v19.V2D(), v0.D(), 0);
7854 __ Dup(v20.V8B(), v0.B(), 0);
7855 __ Dup(v21.V4H(), v1.H(), 7);
7856 __ Dup(v22.V2S(), v1.S(), 3);
7858 __ Dup(v23.B(), v0.B(), 0);
7859 __ Dup(v24.H(), v1.H(), 7);
7860 __ Dup(v25.S(), v1.S(), 3);
7861 __ Dup(v26.D(), v0.D(), 0);
7863 __ Dup(v2.V16B(), v2.B(), 0);
7864 __ Dup(v3.V8H(), v3.H(), 7);
7865 __ Dup(v4.V4S(), v4.S(), 0);
7866 __ Dup(v5.V2D(), v5.D(), 1);
7900 __ Mov(x0, 0x0011223344556677);
7902 __ Dup(v16.V16B(), w0);
7903 __ Dup(v17.V8H(), w0);
7904 __ Dup(v18.V4S(), w0);
7905 __ Dup(v19.V2D(), x0);
7907 __ Dup(v20.V8B(), w0);
7908 __ Dup(v21.V4H(), w0);
7909 __ Dup(v22.V2S(), w0);
7911 __ Dup(v2.V16B(), wzr);
7912 __ Dup(v3.V8H(), wzr);
7913 __ Dup(v4.V4S(), wzr);
7914 __ Dup(v5.V2D(), xzr);
7943 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7944 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7945 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7946 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7947 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7948 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7950 __ Movi(v2.V2D(), 0, 0x0011223344556677);
7951 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
7952 __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
7953 __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
7955 __ Ins(v16.V16B(), 15, v0.V16B(), 0);
7956 __ Ins(v17.V8H(), 0, v1.V8H(), 7);
7957 __ Ins(v18.V4S(), 3, v1.V4S(), 0);
7958 __ Ins(v19.V2D(), 1, v0.V2D(), 0);
7960 __ Ins(v2.V16B(), 2, v2.V16B(), 0);
7961 __ Ins(v3.V8H(), 0, v3.V8H(), 7);
7962 __ Ins(v4.V4S(), 3, v4.V4S(), 0);
7963 __ Ins(v5.V2D(), 0, v5.V2D(), 1);
7988 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7989 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7990 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7991 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7992 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7993 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7995 __ Movi(v2.V2D(), 0, 0x0011223344556677);
7996 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
7997 __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
7998 __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
8000 __ Mov(v16.V16B(), 15, v0.V16B(), 0);
8001 __ Mov(v17.V8H(), 0, v1.V8H(), 7);
8002 __ Mov(v18.V4S(), 3, v1.V4S(), 0);
8003 __ Mov(v19.V2D(), 1, v0.V2D(), 0);
8005 __ Mov(v2.V16B(), 2, v2.V16B(), 0);
8006 __ Mov(v3.V8H(), 0, v3.V8H(), 7);
8007 __ Mov(v4.V4S(), 3, v4.V4S(), 0);
8008 __ Mov(v5.V2D(), 0, v5.V2D(), 1);
8033 __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8035 __ Smov(w0, v0.B(), 7);
8036 __ Smov(w1, v0.B(), 15);
8038 __ Smov(w2, v0.H(), 0);
8039 __ Smov(w3, v0.H(), 3);
8041 __ Smov(x4, v0.B(), 7);
8042 __ Smov(x5, v0.B(), 15);
8044 __ Smov(x6, v0.H(), 0);
8045 __ Smov(x7, v0.H(), 3);
8047 __ Smov(x16, v0.S(), 0);
8048 __ Smov(x17, v0.S(), 1);
8074 __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8076 __ Umov(w0, v0.B(), 15);
8077 __ Umov(w1, v0.H(), 0);
8078 __ Umov(w2, v0.S(), 3);
8079 __ Umov(x3, v0.D(), 1);
8081 __ Mov(w4, v0.S(), 3);
8082 __ Mov(x5, v0.D(), 1);
8104 __ Mov(x0, 0x0011223344556677);
8105 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8106 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
8107 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8108 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8110 __ Movi(v2.V2D(), 0, 0x0011223344556677);
8111 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
8112 __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
8113 __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
8115 __ Ins(v16.V16B(), 15, w0);
8116 __ Ins(v17.V8H(), 0, w0);
8117 __ Ins(v18.V4S(), 3, w0);
8118 __ Ins(v19.V2D(), 0, x0);
8120 __ Ins(v2.V16B(), 2, w0);
8121 __ Ins(v3.V8H(), 0, w0);
8122 __ Ins(v4.V4S(), 3, w0);
8123 __ Ins(v5.V2D(), 1, x0);
8148 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8149 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
8151 __ Movi(v2.V2D(), 0, 0x0011223344556677);
8152 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
8154 __ Ext(v16.V16B(), v0.V16B(), v1.V16B(), 0);
8155 __ Ext(v17.V16B(), v0.V16B(), v1.V16B(), 15);
8156 __ Ext(v1.V16B(), v0.V16B(), v1.V16B(), 8); // Dest is same as one Src
8157 __ Ext(v0.V16B(), v0.V16B(), v0.V16B(), 8); // All reg are the same
8159 __ Ext(v18.V8B(), v2.V8B(), v3.V8B(), 0);
8160 __ Ext(v19.V8B(), v2.V8B(), v3.V8B(), 7);
8161 __ Ext(v2.V8B(), v2.V8B(), v3.V8B(), 4); // Dest is same as one Src
8162 __ Ext(v3.V8B(), v3.V8B(), v3.V8B(), 4); // All reg are the same
8187 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);
8188 __ Movi(v1.V2D(), 0, 0x00010280810e0fff);
8189 __ Movi(v2.V2D(), 0, 0x0101010101010101);
8191 __ Movi(v3.V2D(), 0x0000000000000000, 0x0000000000000000);
8192 __ Movi(v4.V2D(), 0x0000000000000000, 0x0000000000000000);
8193 __ Movi(v5.V2D(), 0, 0x0000000180008001);
8194 __ Movi(v6.V2D(), 0, 0x000e000ff000ffff);
8195 __ Movi(v7.V2D(), 0, 0x0001000100010001);
8197 __ Movi(v16.V2D(), 0x0000000000000000, 0x0000000000000000);
8198 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000000000000000);
8199 __ Movi(v18.V2D(), 0, 0x0000000000000001);
8200 __ Movi(v19.V2D(), 0, 0x80000001ffffffff);
8201 __ Movi(v20.V2D(), 0, 0x0000000100000001);
8203 __ Uaddl(v0.V8H(), v1.V8B(), v2.V8B());
8205 __ Uaddl(v3.V4S(), v5.V4H(), v7.V4H());
8206 __ Uaddl(v4.V4S(), v6.V4H(), v7.V4H());
8208 __ Uaddl(v16.V2D(), v18.V2S(), v20.V2S());
8209 __ Uaddl(v17.V2D(), v19.V2S(), v20.V2S());
8231 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8232 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8233 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8234 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8235 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8237 __ Addhn(v16.V8B(), v0.V8H(), v1.V8H());
8238 __ Addhn2(v16.V16B(), v2.V8H(), v3.V8H());
8239 __ Raddhn(v17.V8B(), v0.V8H(), v1.V8H());
8240 __ Raddhn2(v17.V16B(), v2.V8H(), v3.V8H());
8241 __ Subhn(v18.V8B(), v0.V8H(), v1.V8H());
8242 __ Subhn2(v18.V16B(), v2.V8H(), v3.V8H());
8243 __ Rsubhn(v19.V8B(), v0.V8H(), v1.V8H());
8244 __ Rsubhn2(v19.V16B(), v2.V8H(), v3.V8H());
8263 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8264 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8265 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
8266 __ Movi(v3.V2D(), 0xffffffffffffffff, 2);
8267 __ Movi(v4.V2D(), 0xffffffffffffffff, -2);
8269 __ Add(d16, d0, d0);
8270 __ Add(d17, d1, d1);
8271 __ Add(d18, d2, d2);
8272 __ Sub(d19, d0, d0);
8273 __ Sub(d20, d0, d1);
8274 __ Sub(d21, d1, d0);
8275 __ Ushl(d22, d0, d3);
8276 __ Ushl(d23, d0, d4);
8277 __ Sshl(d24, d0, d3);
8278 __ Sshl(d25, d0, d4);
8279 __ Ushr(d26, d0, 1);
8280 __ Sshr(d27, d0, 3);
8281 __ Shl(d28, d0, 0);
8282 __ Shl(d29, d0, 16);
8312 __ Movi(v0.V2D(), 0x0, 0x7f);
8313 __ Movi(v1.V2D(), 0x0, 0x80);
8314 __ Movi(v2.V2D(), 0x0, 0x01);
8315 __ Sqshl(b16, b0, 1);
8316 __ Sqshl(b17, b1, 1);
8317 __ Sqshl(b18, b2, 1);
8319 __ Movi(v0.V2D(), 0x0, 0x7fff);
8320 __ Movi(v1.V2D(), 0x0, 0x8000);
8321 __ Movi(v2.V2D(), 0x0, 0x0001);
8322 __ Sqshl(h19, h0, 1);
8323 __ Sqshl(h20, h1, 1);
8324 __ Sqshl(h21, h2, 1);
8326 __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8327 __ Movi(v1.V2D(), 0x0, 0x80000000);
8328 __ Movi(v2.V2D(), 0x0, 0x00000001);
8329 __ Sqshl(s22, s0, 1);
8330 __ Sqshl(s23, s1, 1);
8331 __ Sqshl(s24, s2, 1);
8333 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8334 __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8335 __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8336 __ Sqshl(d25, d0, 1);
8337 __ Sqshl(d26, d1, 1);
8338 __ Sqshl(d27, d2, 1);
8369 __ Movi(v0.V2D(), 0x0, 0x7f);
8370 __ Movi(v1.V2D(), 0x0, 0x80);
8371 __ Movi(v2.V2D(), 0x0, 0x01);
8372 __ Uqshl(b16, b0, 1);
8373 __ Uqshl(b17, b1, 1);
8374 __ Uqshl(b18, b2, 1);
8376 __ Movi(v0.V2D(), 0x0, 0x7fff);
8377 __ Movi(v1.V2D(), 0x0, 0x8000);
8378 __ Movi(v2.V2D(), 0x0, 0x0001);
8379 __ Uqshl(h19, h0, 1);
8380 __ Uqshl(h20, h1, 1);
8381 __ Uqshl(h21, h2, 1);
8383 __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8384 __ Movi(v1.V2D(), 0x0, 0x80000000);
8385 __ Movi(v2.V2D(), 0x0, 0x00000001);
8386 __ Uqshl(s22, s0, 1);
8387 __ Uqshl(s23, s1, 1);
8388 __ Uqshl(s24, s2, 1);
8390 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8391 __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8392 __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8393 __ Uqshl(d25, d0, 1);
8394 __ Uqshl(d26, d1, 1);
8395 __ Uqshl(d27, d2, 1);
8426 __ Movi(v0.V2D(), 0x0, 0x7f);
8427 __ Movi(v1.V2D(), 0x0, 0x80);
8428 __ Movi(v2.V2D(), 0x0, 0x01);
8429 __ Sqshlu(b16, b0, 2);
8430 __ Sqshlu(b17, b1, 2);
8431 __ Sqshlu(b18, b2, 2);
8433 __ Movi(v0.V2D(), 0x0, 0x7fff);
8434 __ Movi(v1.V2D(), 0x0, 0x8000);
8435 __ Movi(v2.V2D(), 0x0, 0x0001);
8436 __ Sqshlu(h19, h0, 2);
8437 __ Sqshlu(h20, h1, 2);
8438 __ Sqshlu(h21, h2, 2);
8440 __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8441 __ Movi(v1.V2D(), 0x0, 0x80000000);
8442 __ Movi(v2.V2D(), 0x0, 0x00000001);
8443 __ Sqshlu(s22, s0, 2);
8444 __ Sqshlu(s23, s1, 2);
8445 __ Sqshlu(s24, s2, 2);
8447 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8448 __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8449 __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8450 __ Sqshlu(d25, d0, 2);
8451 __ Sqshlu(d26, d1, 2);
8452 __ Sqshlu(d27, d2, 2);
8483 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8484 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8485 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8487 __ Sshll(v16.V8H(), v0.V8B(), 4);
8488 __ Sshll2(v17.V8H(), v0.V16B(), 4);
8490 __ Sshll(v18.V4S(), v1.V4H(), 8);
8491 __ Sshll2(v19.V4S(), v1.V8H(), 8);
8493 __ Sshll(v20.V2D(), v2.V2S(), 16);
8494 __ Sshll2(v21.V2D(), v2.V4S(), 16);
8515 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8516 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8517 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8519 __ Shll(v16.V8H(), v0.V8B(), 8);
8520 __ Shll2(v17.V8H(), v0.V16B(), 8);
8522 __ Shll(v18.V4S(), v1.V4H(), 16);
8523 __ Shll2(v19.V4S(), v1.V8H(), 16);
8525 __ Shll(v20.V2D(), v2.V2S(), 32);
8526 __ Shll2(v21.V2D(), v2.V4S(), 32);
8547 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8548 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8549 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8551 __ Ushll(v16.V8H(), v0.V8B(), 4);
8552 __ Ushll2(v17.V8H(), v0.V16B(), 4);
8554 __ Ushll(v18.V4S(), v1.V4H(), 8);
8555 __ Ushll2(v19.V4S(), v1.V8H(), 8);
8557 __ Ushll(v20.V2D(), v2.V2S(), 16);
8558 __ Ushll2(v21.V2D(), v2.V4S(), 16);
8580 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8581 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8582 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8584 __ Sxtl(v16.V8H(), v0.V8B());
8585 __ Sxtl2(v17.V8H(), v0.V16B());
8587 __ Sxtl(v18.V4S(), v1.V4H());
8588 __ Sxtl2(v19.V4S(), v1.V8H());
8590 __ Sxtl(v20.V2D(), v2.V2S());
8591 __ Sxtl2(v21.V2D(), v2.V4S());
8613 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8614 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8615 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8617 __ Uxtl(v16.V8H(), v0.V8B());
8618 __ Uxtl2(v17.V8H(), v0.V16B());
8620 __ Uxtl(v18.V4S(), v1.V4H());
8621 __ Uxtl2(v19.V4S(), v1.V8H());
8623 __ Uxtl(v20.V2D(), v2.V2S());
8624 __ Uxtl2(v21.V2D(), v2.V4S());
8646 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8647 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8648 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8649 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8650 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8652 __ Mov(v16.V2D(), v0.V2D());
8653 __ Mov(v17.V2D(), v0.V2D());
8654 __ Mov(v18.V2D(), v1.V2D());
8655 __ Mov(v19.V2D(), v1.V2D());
8656 __ Mov(v20.V2D(), v2.V2D());
8657 __ Mov(v21.V2D(), v2.V2D());
8658 __ Mov(v22.V2D(), v3.V2D());
8659 __ Mov(v23.V2D(), v4.V2D());
8660 __ Mov(v24.V2D(), v3.V2D());
8661 __ Mov(v25.V2D(), v4.V2D());
8663 __ Ssra(v16.V8B(), v0.V8B(), 4);
8664 __ Ssra(v17.V16B(), v0.V16B(), 4);
8666 __ Ssra(v18.V4H(), v1.V4H(), 8);
8667 __ Ssra(v19.V8H(), v1.V8H(), 8);
8669 __ Ssra(v20.V2S(), v2.V2S(), 16);
8670 __ Ssra(v21.V4S(), v2.V4S(), 16);
8672 __ Ssra(v22.V2D(), v3.V2D(), 32);
8673 __ Ssra(v23.V2D(), v4.V2D(), 32);
8675 __ Ssra(d24, d3, 48);
8699 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8700 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8701 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8702 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8703 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8705 __ Mov(v16.V2D(), v0.V2D());
8706 __ Mov(v17.V2D(), v0.V2D());
8707 __ Mov(v18.V2D(), v1.V2D());
8708 __ Mov(v19.V2D(), v1.V2D());
8709 __ Mov(v20.V2D(), v2.V2D());
8710 __ Mov(v21.V2D(), v2.V2D());
8711 __ Mov(v22.V2D(), v3.V2D());
8712 __ Mov(v23.V2D(), v4.V2D());
8713 __ Mov(v24.V2D(), v3.V2D());
8714 __ Mov(v25.V2D(), v4.V2D());
8716 __ Srsra(v16.V8B(), v0.V8B(), 4);
8717 __ Srsra(v17.V16B(), v0.V16B(), 4);
8719 __ Srsra(v18.V4H(), v1.V4H(), 8);
8720 __ Srsra(v19.V8H(), v1.V8H(), 8);
8722 __ Srsra(v20.V2S(), v2.V2S(), 16);
8723 __ Srsra(v21.V4S(), v2.V4S(), 16);
8725 __ Srsra(v22.V2D(), v3.V2D(), 32);
8726 __ Srsra(v23.V2D(), v4.V2D(), 32);
8728 __ Srsra(d24, d3, 48);
8752 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8753 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8754 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8755 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8756 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8758 __ Mov(v16.V2D(), v0.V2D());
8759 __ Mov(v17.V2D(), v0.V2D());
8760 __ Mov(v18.V2D(), v1.V2D());
8761 __ Mov(v19.V2D(), v1.V2D());
8762 __ Mov(v20.V2D(), v2.V2D());
8763 __ Mov(v21.V2D(), v2.V2D());
8764 __ Mov(v22.V2D(), v3.V2D());
8765 __ Mov(v23.V2D(), v4.V2D());
8766 __ Mov(v24.V2D(), v3.V2D());
8767 __ Mov(v25.V2D(), v4.V2D());
8769 __ Usra(v16.V8B(), v0.V8B(), 4);
8770 __ Usra(v17.V16B(), v0.V16B(), 4);
8772 __ Usra(v18.V4H(), v1.V4H(), 8);
8773 __ Usra(v19.V8H(), v1.V8H(), 8);
8775 __ Usra(v20.V2S(), v2.V2S(), 16);
8776 __ Usra(v21.V4S(), v2.V4S(), 16);
8778 __ Usra(v22.V2D(), v3.V2D(), 32);
8779 __ Usra(v23.V2D(), v4.V2D(), 32);
8781 __ Usra(d24, d3, 48);
8805 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8806 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8807 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8808 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8809 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8811 __ Mov(v16.V2D(), v0.V2D());
8812 __ Mov(v17.V2D(), v0.V2D());
8813 __ Mov(v18.V2D(), v1.V2D());
8814 __ Mov(v19.V2D(), v1.V2D());
8815 __ Mov(v20.V2D(), v2.V2D());
8816 __ Mov(v21.V2D(), v2.V2D());
8817 __ Mov(v22.V2D(), v3.V2D());
8818 __ Mov(v23.V2D(), v4.V2D());
8819 __ Mov(v24.V2D(), v3.V2D());
8820 __ Mov(v25.V2D(), v4.V2D());
8822 __ Ursra(v16.V8B(), v0.V8B(), 4);
8823 __ Ursra(v17.V16B(), v0.V16B(), 4);
8825 __ Ursra(v18.V4H(), v1.V4H(), 8);
8826 __ Ursra(v19.V8H(), v1.V8H(), 8);
8828 __ Ursra(v20.V2S(), v2.V2S(), 16);
8829 __ Ursra(v21.V4S(), v2.V4S(), 16);
8831 __ Ursra(v22.V2D(), v3.V2D(), 32);
8832 __ Ursra(v23.V2D(), v4.V2D(), 32);
8834 __ Ursra(d24, d3, 48);
8859 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8860 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8861 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8862 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8864 __ Uqshl(b16, b0, b2);
8865 __ Uqshl(b17, b0, b3);
8866 __ Uqshl(b18, b1, b2);
8867 __ Uqshl(b19, b1, b3);
8868 __ Uqshl(h20, h0, h2);
8869 __ Uqshl(h21, h0, h3);
8870 __ Uqshl(h22, h1, h2);
8871 __ Uqshl(h23, h1, h3);
8872 __ Uqshl(s24, s0, s2);
8873 __ Uqshl(s25, s0, s3);
8874 __ Uqshl(s26, s1, s2);
8875 __ Uqshl(s27, s1, s3);
8876 __ Uqshl(d28, d0, d2);
8877 __ Uqshl(d29, d0, d3);
8878 __ Uqshl(d30, d1, d2);
8879 __ Uqshl(d31, d1, d3);
8911 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
8912 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
8913 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8914 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8916 __ Sqshl(b16, b0, b2);
8917 __ Sqshl(b17, b0, b3);
8918 __ Sqshl(b18, b1, b2);
8919 __ Sqshl(b19, b1, b3);
8920 __ Sqshl(h20, h0, h2);
8921 __ Sqshl(h21, h0, h3);
8922 __ Sqshl(h22, h1, h2);
8923 __ Sqshl(h23, h1, h3);
8924 __ Sqshl(s24, s0, s2);
8925 __ Sqshl(s25, s0, s3);
8926 __ Sqshl(s26, s1, s2);
8927 __ Sqshl(s27, s1, s3);
8928 __ Sqshl(d28, d0, d2);
8929 __ Sqshl(d29, d0, d3);
8930 __ Sqshl(d30, d1, d2);
8931 __ Sqshl(d31, d1, d3);
8963 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8964 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8965 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8966 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8968 __ Urshl(d28, d0, d2);
8969 __ Urshl(d29, d0, d3);
8970 __ Urshl(d30, d1, d2);
8971 __ Urshl(d31, d1, d3);
8991 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
8992 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
8993 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8994 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8996 __ Srshl(d28, d0, d2);
8997 __ Srshl(d29, d0, d3);
8998 __ Srshl(d30, d1, d2);
8999 __ Srshl(d31, d1, d3);
9019 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9020 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9021 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
9022 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
9024 __ Uqrshl(b16, b0, b2);
9025 __ Uqrshl(b17, b0, b3);
9026 __ Uqrshl(b18, b1, b2);
9027 __ Uqrshl(b19, b1, b3);
9028 __ Uqrshl(h20, h0, h2);
9029 __ Uqrshl(h21, h0, h3);
9030 __ Uqrshl(h22, h1, h2);
9031 __ Uqrshl(h23, h1, h3);
9032 __ Uqrshl(s24, s0, s2);
9033 __ Uqrshl(s25, s0, s3);
9034 __ Uqrshl(s26, s1, s2);
9035 __ Uqrshl(s27, s1, s3);
9036 __ Uqrshl(d28, d0, d2);
9037 __ Uqrshl(d29, d0, d3);
9038 __ Uqrshl(d30, d1, d2);
9039 __ Uqrshl(d31, d1, d3);
9071 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
9072 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
9073 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
9074 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
9076 __ Sqrshl(b16, b0, b2);
9077 __ Sqrshl(b17, b0, b3);
9078 __ Sqrshl(b18, b1, b2);
9079 __ Sqrshl(b19, b1, b3);
9080 __ Sqrshl(h20, h0, h2);
9081 __ Sqrshl(h21, h0, h3);
9082 __ Sqrshl(h22, h1, h2);
9083 __ Sqrshl(h23, h1, h3);
9084 __ Sqrshl(s24, s0, s2);
9085 __ Sqrshl(s25, s0, s3);
9086 __ Sqrshl(s26, s1, s2);
9087 __ Sqrshl(s27, s1, s3);
9088 __ Sqrshl(d28, d0, d2);
9089 __ Sqrshl(d29, d0, d3);
9090 __ Sqrshl(d30, d1, d2);
9091 __ Sqrshl(d31, d1, d3);
9123 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9124 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9125 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
9127 __ Uqadd(b16, b0, b0);
9128 __ Uqadd(b17, b1, b1);
9129 __ Uqadd(b18, b2, b2);
9130 __ Uqadd(h19, h0, h0);
9131 __ Uqadd(h20, h1, h1);
9132 __ Uqadd(h21, h2, h2);
9133 __ Uqadd(s22, s0, s0);
9134 __ Uqadd(s23, s1, s1);
9135 __ Uqadd(s24, s2, s2);
9136 __ Uqadd(d25, d0, d0);
9137 __ Uqadd(d26, d1, d1);
9138 __ Uqadd(d27, d2, d2);
9166 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0x8000000180018181);
9167 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9168 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
9170 __ Sqadd(b16, b0, b0);
9171 __ Sqadd(b17, b1, b1);
9172 __ Sqadd(b18, b2, b2);
9173 __ Sqadd(h19, h0, h0);
9174 __ Sqadd(h20, h1, h1);
9175 __ Sqadd(h21, h2, h2);
9176 __ Sqadd(s22, s0, s0);
9177 __ Sqadd(s23, s1, s1);
9178 __ Sqadd(s24, s2, s2);
9179 __ Sqadd(d25, d0, d0);
9180 __ Sqadd(d26, d1, d1);
9181 __ Sqadd(d27, d2, d2);
9209 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9210 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9212 __ Uqsub(b16, b0, b0);
9213 __ Uqsub(b17, b0, b1);
9214 __ Uqsub(b18, b1, b0);
9215 __ Uqsub(h19, h0, h0);
9216 __ Uqsub(h20, h0, h1);
9217 __ Uqsub(h21, h1, h0);
9218 __ Uqsub(s22, s0, s0);
9219 __ Uqsub(s23, s0, s1);
9220 __ Uqsub(s24, s1, s0);
9221 __ Uqsub(d25, d0, d0);
9222 __ Uqsub(d26, d0, d1);
9223 __ Uqsub(d27, d1, d0);
9254 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9255 __ Movi(v1.V2D(), 0x5555555555555555, 0x7eeeeeee7eee7e7e);
9257 __ Sqsub(b16, b0, b0);
9258 __ Sqsub(b17, b0, b1);
9259 __ Sqsub(b18, b1, b0);
9260 __ Sqsub(h19, h0, h0);
9261 __ Sqsub(h20, h0, h1);
9262 __ Sqsub(h21, h1, h0);
9263 __ Sqsub(s22, s0, s0);
9264 __ Sqsub(s23, s0, s1);
9265 __ Sqsub(s24, s1, s0);
9266 __ Sqsub(d25, d0, d0);
9267 __ Sqsub(d26, d0, d1);
9268 __ Sqsub(d27, d1, d0);
9298 __ Movi(v0.V2D(), 0x3f80000040000000, 0x4100000000000000);
9299 __ Movi(v1.V2D(), 0x400000003f800000, 0x000000003f800000);
9300 __ Movi(v2.V2D(), 0x3f800000ffffffff, 0x7f800000ff800000);
9301 __ Mov(v16.V16B(), v0.V16B());
9302 __ Mov(v17.V16B(), v0.V16B());
9303 __ Mov(v18.V16B(), v0.V16B());
9304 __ Mov(v19.V16B(), v0.V16B());
9305 __ Mov(v20.V16B(), v0.V16B());
9306 __ Mov(v21.V16B(), v0.V16B());
9308 __ Fmla(v16.V2S(), v1.V2S(), v2.V2S());
9309 __ Fmla(v17.V4S(), v1.V4S(), v2.V4S());
9310 __ Fmla(v18.V2D(), v1.V2D(), v2.V2D());
9311 __ Fmls(v19.V2S(), v1.V2S(), v2.V2S());
9312 __ Fmls(v20.V4S(), v1.V4S(), v2.V4S());
9313 __ Fmls(v21.V2D(), v1.V2D(), v2.V2D());
9335 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9336 __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9337 __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9338 __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9339 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
9340 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
9341 __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
9342 __ Mov(v16.V2D(), v0.V2D());
9343 __ Mov(v17.V2D(), v0.V2D());
9344 __ Mov(v18.V2D(), v4.V2D());
9345 __ Mov(v19.V2D(), v5.V2D());
9346 __ Mov(v20.V2D(), v0.V2D());
9347 __ Mov(v21.V2D(), v0.V2D());
9348 __ Mov(v22.V2D(), v4.V2D());
9349 __ Mov(v23.V2D(), v5.V2D());
9351 __ Fmla(v16.V8H(), v0.V8H(), v1.V8H());
9352 __ Fmla(v17.V8H(), v2.V8H(), v3.V8H());
9353 __ Fmla(v18.V8H(), v2.V8H(), v6.V8H());
9354 __ Fmla(v19.V8H(), v3.V8H(), v6.V8H());
9355 __ Fmla(v20.V4H(), v0.V4H(), v1.V4H());
9356 __ Fmla(v21.V4H(), v2.V4H(), v3.V4H());
9357 __ Fmla(v22.V4H(), v2.V4H(), v6.V4H());
9358 __ Fmla(v23.V4H(), v3.V4H(), v6.V4H());
9382 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9383 __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9384 __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9385 __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9386 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
9387 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
9388 __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
9389 __ Mov(v16.V2D(), v0.V2D());
9390 __ Mov(v17.V2D(), v0.V2D());
9391 __ Mov(v18.V2D(), v4.V2D());
9392 __ Mov(v19.V2D(), v5.V2D());
9393 __ Mov(v20.V2D(), v0.V2D());
9394 __ Mov(v21.V2D(), v0.V2D());
9395 __ Mov(v22.V2D(), v4.V2D());
9396 __ Mov(v23.V2D(), v5.V2D());
9398 __ Fmls(v16.V8H(), v0.V8H(), v1.V8H());
9399 __ Fmls(v17.V8H(), v2.V8H(), v3.V8H());
9400 __ Fmls(v18.V8H(), v2.V8H(), v6.V8H());
9401 __ Fmls(v19.V8H(), v3.V8H(), v6.V8H());
9402 __ Fmls(v20.V4H(), v0.V4H(), v1.V4H());
9403 __ Fmls(v21.V4H(), v2.V4H(), v3.V4H());
9404 __ Fmls(v22.V4H(), v2.V4H(), v6.V4H());
9405 __ Fmls(v23.V4H(), v3.V4H(), v6.V4H());
9442 __ Movi(v30.V8H(), 0xfc00800000000400, 0x80010001bc007bff);
9443 __ Movi(v31.V8H(), 0xfc00000080003bff, 0x7bff000100007bff);
9448 __ Movi(v0.V4S(), 0xdeadbeefdeadbeef, 0x8000000043c00000);
9451 __ Movi(v1.V4S(), 0xdeadbeefdeadbeef, 0x00000000a7800001);
9456 __ Movi(v2.V4S(), 0x3f8000003f800000, 0x0000000043000000);
9461 __ Movi(v3.V4S(), 0x0000000080000000, 0x8000000000000000);
9464 __ Fneg(v4.V4S(), v0.V4S());
9465 __ Fneg(v5.V4S(), v1.V4S());
9466 __ Fneg(v6.V4S(), v2.V4S());
9467 __ Fneg(v7.V4S(), v3.V4S());
9469 __ Fmlal(v0.V2S(), v30.V2H(), v31.V2H());
9470 __ Fmlal2(v1.V2S(), v30.V2H(), v31.V2H());
9471 __ Fmlal(v2.V4S(), v30.V4H(), v31.V4H());
9472 __ Fmlal2(v3.V4S(), v30.V4H(), v31.V4H());
9474 __ Fmlsl(v4.V2S(), v30.V2H(), v31.V2H());
9475 __ Fmlsl2(v5.V2S(), v30.V2H(), v31.V2H());
9476 __ Fmlsl(v6.V4S(), v30.V4H(), v31.V4H());
9477 __ Fmlsl2(v7.V4S(), v30.V4H(), v31.V4H());
9533 __ Movi(v30.V8H(), 0xfc00800000000400, 0x80010001bc007bff);
9540 __ Movi(v29.V8H(), 0x7f417f417f417f41, 0x7f417f417f417f41);
9543 __ Movi(v31.V8H(), 0x3bff3c013bff3c01, 0x3bff3c013bff3c01);
9547 __ Mov(vm, poison);
9548 __ Ins(vm.V8H(), i, v31.V8H(), i);
9554 __ Movi(v0.V4S(), 0xdeadbeefdeadbeef, 0x3f8000003b800000);
9557 __ Movi(v1.V4S(), 0xdeadbeefdeadbeef, 0x00000000a7400000);
9562 __ Movi(v2.V4S(), 0x7f80000027c00000, 0xc680000000000000);
9567 __ Movi(v3.V4S(), 0x0000000080000000, 0x8000000000000000);
9570 __ Fneg(v4.V4S(), v0.V4S());
9571 __ Fneg(v5.V4S(), v1.V4S());
9572 __ Fneg(v6.V4S(), v2.V4S());
9573 __ Fneg(v7.V4S(), v3.V4S());
9575 __ Fmlal(v0.V2S(), v30.V2H(), v8.H(), 0);
9576 __ Fmlal2(v1.V2S(), v30.V2H(), v9.H(), 1);
9577 __ Fmlal(v2.V4S(), v30.V4H(), v10.H(), 2);
9578 __ Fmlal2(v3.V4S(), v30.V4H(), v11.H(), 3);
9580 __ Fmlsl(v4.V2S(), v30.V2H(), v12.H(), 4);
9581 __ Fmlsl2(v5.V2S(), v30.V2H(), v13.H(), 5);
9582 __ Fmlsl(v6.V4S(), v30.V4H(), v14.H(), 6);
9583 __ Fmlsl2(v7.V4S(), v30.V4H(), v15.H(), 7);
9624 __ Fmov(s0, 2.0);
9625 __ Fmov(s1, 0.5);
9626 __ Fmov(s2, 0.0);
9627 __ Fmov(s3, -0.0);
9628 __ Fmov(s4, kFP32PositiveInfinity);
9629 __ Fmov(s5, kFP32NegativeInfinity);
9630 __ Fmulx(s16, s0, s1);
9631 __ Fmulx(s17, s2, s4);
9632 __ Fmulx(s18, s2, s5);
9633 __ Fmulx(s19, s3, s4);
9634 __ Fmulx(s20, s3, s5);
9636 __ Fmov(d21, 2.0);
9637 __ Fmov(d22, 0.5);
9638 __ Fmov(d23, 0.0);
9639 __ Fmov(d24, -0.0);
9640 __ Fmov(d25, kFP64PositiveInfinity);
9641 __ Fmov(d26, kFP64NegativeInfinity);
9642 __ Fmulx(d27, d21, d22);
9643 __ Fmulx(d28, d23, d25);
9644 __ Fmulx(d29, d23, d26);
9645 __ Fmulx(d30, d24, d25);
9646 __ Fmulx(d31, d24, d26);
9672 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9673 __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
9674 __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
9675 __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
9676 __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9677 __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9678 __ Fmulx(v6.V8H(), v0.V8H(), v1.V8H());
9679 __ Fmulx(v7.V8H(), v2.V8H(), v4.V8H());
9680 __ Fmulx(v8.V8H(), v2.V8H(), v5.V8H());
9681 __ Fmulx(v9.V8H(), v3.V8H(), v4.V8H());
9682 __ Fmulx(v10.V8H(), v3.V8H(), v5.V8H());
9683 __ Fmulx(v11.V4H(), v0.V4H(), v1.V4H());
9684 __ Fmulx(v12.V4H(), v2.V4H(), v4.V4H());
9685 __ Fmulx(v13.V4H(), v2.V4H(), v5.V4H());
9686 __ Fmulx(v14.V4H(), v3.V4H(), v4.V4H());
9687 __ Fmulx(v15.V4H(), v3.V4H(), v5.V4H());
9713 __ Fmov(h0, Float16(2.0));
9714 __ Fmov(h1, Float16(0.5));
9715 __ Fmov(h2, Float16(0.0));
9716 __ Fmov(h3, Float16(-0.0));
9717 __ Fmov(h4, kFP16PositiveInfinity);
9718 __ Fmov(h5, kFP16NegativeInfinity);
9719 __ Fmulx(h6, h0, h1);
9720 __ Fmulx(h7, h2, h4);
9721 __ Fmulx(h8, h2, h5);
9722 __ Fmulx(h9, h3, h4);
9723 __ Fmulx(h10, h3, h5);
9742 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9743 __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
9744 __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
9745 __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
9746 __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9747 __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9749 __ Fabd(v6.V8H(), v1.V8H(), v0.V8H());
9750 __ Fabd(v7.V8H(), v2.V8H(), v3.V8H());
9751 __ Fabd(v8.V8H(), v2.V8H(), v5.V8H());
9752 __ Fabd(v9.V8H(), v3.V8H(), v4.V8H());
9753 __ Fabd(v10.V8H(), v3.V8H(), v5.V8H());
9754 __ Fabd(v11.V4H(), v1.V4H(), v0.V4H());
9755 __ Fabd(v12.V4H(), v2.V4H(), v3.V4H());
9756 __ Fabd(v13.V4H(), v2.V4H(), v5.V4H());
9757 __ Fabd(v14.V4H(), v3.V4H(), v4.V4H());
9758 __ Fabd(v15.V4H(), v3.V4H(), v5.V4H());
9785 __ Fmov(h0, Float16(2.0));
9786 __ Fmov(h1, Float16(0.5));
9787 __ Fmov(h2, Float16(0.0));
9788 __ Fmov(h3, Float16(-0.0));
9789 __ Fmov(h4, kFP16PositiveInfinity);
9790 __ Fmov(h5, kFP16NegativeInfinity);
9791 __ Fabd(h16, h1, h0);
9792 __ Fabd(h17, h2, h3);
9793 __ Fabd(h18, h2, h5);
9794 __ Fabd(h19, h3, h4);
9795 __ Fabd(h20, h3, h5);
9813 __ Fmov(s0, 2.0);
9814 __ Fmov(s1, 0.5);
9815 __ Fmov(s2, 0.0);
9816 __ Fmov(s3, -0.0);
9817 __ Fmov(s4, kFP32PositiveInfinity);
9818 __ Fmov(s5, kFP32NegativeInfinity);
9819 __ Fabd(s16, s1, s0);
9820 __ Fabd(s17, s2, s3);
9821 __ Fabd(s18, s2, s5);
9822 __ Fabd(s19, s3, s4);
9823 __ Fabd(s20, s3, s5);
9825 __ Fmov(d21, 2.0);
9826 __ Fmov(d22, 0.5);
9827 __ Fmov(d23, 0.0);
9828 __ Fmov(d24, -0.0);
9829 __ Fmov(d25, kFP64PositiveInfinity);
9830 __ Fmov(d26, kFP64NegativeInfinity);
9831 __ Fabd(d27, d21, d22);
9832 __ Fabd(d28, d23, d24);
9833 __ Fabd(d29, d23, d26);
9834 __ Fabd(d30, d24, d25);
9835 __ Fabd(d31, d24, d26);
9861 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9862 __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
9863 __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9864 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9865 __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9867 __ Frecps(v5.V8H(), v0.V8H(), v2.V8H());
9868 __ Frecps(v6.V8H(), v1.V8H(), v2.V8H());
9869 __ Frecps(v7.V8H(), v0.V8H(), v3.V8H());
9870 __ Frecps(v8.V8H(), v0.V8H(), v4.V8H());
9871 __ Frecps(v9.V4H(), v0.V4H(), v2.V4H());
9872 __ Frecps(v10.V4H(), v1.V4H(), v2.V4H());
9873 __ Frecps(v11.V4H(), v0.V4H(), v3.V4H());
9874 __ Frecps(v12.V4H(), v0.V4H(), v4.V4H());
9899 __ Fmov(h0, Float16(2.0));
9900 __ Fmov(h1, Float16(-1.0));
9901 __ Fmov(h2, Float16(45.0));
9902 __ Fmov(h3, kFP16PositiveInfinity);
9903 __ Fmov(h4, kFP16NegativeInfinity);
9905 __ Frecps(h5, h0, h2);
9906 __ Frecps(h6, h1, h2);
9907 __ Frecps(h7, h0, h3);
9908 __ Frecps(h8, h0, h4);
9928 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9929 __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
9930 __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9931 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9932 __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9934 __ Frsqrts(v5.V8H(), v0.V8H(), v2.V8H());
9935 __ Frsqrts(v6.V8H(), v1.V8H(), v2.V8H());
9936 __ Frsqrts(v7.V8H(), v0.V8H(), v3.V8H());
9937 __ Frsqrts(v8.V8H(), v0.V8H(), v4.V8H());
9938 __ Frsqrts(v9.V4H(), v0.V4H(), v2.V4H());
9939 __ Frsqrts(v10.V4H(), v1.V4H(), v2.V4H());
9940 __ Frsqrts(v11.V4H(), v0.V4H(), v3.V4H());
9941 __ Frsqrts(v12.V4H(), v0.V4H(), v4.V4H());
9966 __ Fmov(h0, Float16(2.0));
9967 __ Fmov(h1, Float16(-1.0));
9968 __ Fmov(h2, Float16(45.0));
9969 __ Fmov(h3, kFP16PositiveInfinity);
9970 __ Fmov(h4, kFP16NegativeInfinity);
9972 __ Frsqrts(h5, h0, h2);
9973 __ Frsqrts(h6, h1, h2);
9974 __ Frsqrts(h7, h0, h3);
9975 __ Frsqrts(h8, h0, h4);
9995 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
9996 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
9997 __ Movi(v2.V2D(), 0x0000800000008000, 0x0000800000008000);
9998 __ Movi(v3.V2D(), 0x7e007c017e007c01, 0x7e007c017e007c01);
10000 __ Faddp(v4.V8H(), v1.V8H(), v0.V8H());
10001 __ Faddp(v5.V8H(), v3.V8H(), v2.V8H());
10002 __ Faddp(v6.V4H(), v1.V4H(), v0.V4H());
10003 __ Faddp(v7.V4H(), v3.V4H(), v2.V4H());
10021 __ Movi(d0, 0x3f80000040000000);
10022 __ Movi(d1, 0xff8000007f800000);
10023 __ Movi(d2, 0x0000000080000000);
10024 __ Faddp(s0, v0.V2S());
10025 __ Faddp(s1, v1.V2S());
10026 __ Faddp(s2, v2.V2S());
10028 __ Movi(v3.V2D(), 0xc000000000000000, 0x4000000000000000);
10029 __ Movi(v4.V2D(), 0xfff8000000000000, 0x7ff8000000000000);
10030 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
10031 __ Faddp(d3, v3.V2D());
10032 __ Faddp(d4, v4.V2D());
10033 __ Faddp(d5, v5.V2D());
10055 __ Movi(s0, 0x3c004000);
10056 __ Movi(s1, 0xfc007c00);
10057 __ Movi(s2, 0x00008000);
10058 __ Faddp(h0, v0.V2H());
10059 __ Faddp(h1, v1.V2H());
10060 __ Faddp(h2, v2.V2H());
10077 __ Movi(d0, 0x3f80000040000000);
10078 __ Movi(d1, 0xff8000007f800000);
10079 __ Movi(d2, 0x7fc00000ff800000);
10080 __ Fmaxp(s0, v0.V2S());
10081 __ Fmaxp(s1, v1.V2S());
10082 __ Fmaxp(s2, v2.V2S());
10084 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10085 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10086 __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
10087 __ Fmaxp(d3, v3.V2D());
10088 __ Fmaxp(d4, v4.V2D());
10089 __ Fmaxp(d5, v5.V2D());
10111 __ Movi(s0, 0x3c004000);
10112 __ Movi(s1, 0xfc007c00);
10113 __ Movi(s2, 0x7e00fc00);
10114 __ Fmaxp(h0, v0.V2H());
10115 __ Fmaxp(h1, v1.V2H());
10116 __ Fmaxp(h2, v2.V2H());
10135 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10136 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10137 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10138 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10139 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10140 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10142 __ Fmax(v6.V8H(), v0.V8H(), v1.V8H());
10143 __ Fmax(v7.V8H(), v2.V8H(), v3.V8H());
10144 __ Fmax(v8.V8H(), v4.V8H(), v0.V8H());
10145 __ Fmax(v9.V8H(), v5.V8H(), v1.V8H());
10146 __ Fmax(v10.V4H(), v0.V4H(), v1.V4H());
10147 __ Fmax(v11.V4H(), v2.V4H(), v3.V4H());
10148 __ Fmax(v12.V4H(), v4.V4H(), v0.V4H());
10149 __ Fmax(v13.V4H(), v5.V4H(), v1.V4H());
10173 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10174 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10175 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10176 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10178 __ Fmaxp(v6.V8H(), v0.V8H(), v1.V8H());
10179 __ Fmaxp(v7.V8H(), v2.V8H(), v3.V8H());
10180 __ Fmaxp(v8.V4H(), v0.V4H(), v1.V4H());
10181 __ Fmaxp(v9.V4H(), v2.V4H(), v3.V4H());
10201 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10202 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10203 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10204 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10205 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10206 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10208 __ Fmaxnm(v6.V8H(), v0.V8H(), v1.V8H());
10209 __ Fmaxnm(v7.V8H(), v2.V8H(), v3.V8H());
10210 __ Fmaxnm(v8.V8H(), v4.V8H(), v0.V8H());
10211 __ Fmaxnm(v9.V8H(), v5.V8H(), v1.V8H());
10212 __ Fmaxnm(v10.V4H(), v0.V4H(), v1.V4H());
10213 __ Fmaxnm(v11.V4H(), v2.V4H(), v3.V4H());
10214 __ Fmaxnm(v12.V4H(), v4.V4H(), v0.V4H());
10215 __ Fmaxnm(v13.V4H(), v5.V4H(), v1.V4H());
10239 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10240 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10241 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10242 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10244 __ Fmaxnmp(v6.V8H(), v0.V8H(), v1.V8H());
10245 __ Fmaxnmp(v7.V8H(), v2.V8H(), v3.V8H());
10246 __ Fmaxnmp(v8.V4H(), v0.V4H(), v1.V4H());
10247 __ Fmaxnmp(v9.V4H(), v2.V4H(), v3.V4H());
10265 __ Movi(d0, 0x3f80000040000000);
10266 __ Movi(d1, 0xff8000007f800000);
10267 __ Movi(d2, 0x7fc00000ff800000);
10268 __ Fmaxnmp(s0, v0.V2S());
10269 __ Fmaxnmp(s1, v1.V2S());
10270 __ Fmaxnmp(s2, v2.V2S());
10272 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10273 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10274 __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
10275 __ Fmaxnmp(d3, v3.V2D());
10276 __ Fmaxnmp(d4, v4.V2D());
10277 __ Fmaxnmp(d5, v5.V2D());
10299 __ Movi(s0, 0x3c004000);
10300 __ Movi(s1, 0xfc007c00);
10301 __ Movi(s2, 0x7e00fc00);
10302 __ Fmaxnmp(h0, v0.V2H());
10303 __ Fmaxnmp(h1, v1.V2H());
10304 __ Fmaxnmp(h2, v2.V2H());
10321 __ Movi(d0, 0x3f80000040000000);
10322 __ Movi(d1, 0xff8000007f800000);
10323 __ Movi(d2, 0x7fc00000ff800000);
10324 __ Fminp(s0, v0.V2S());
10325 __ Fminp(s1, v1.V2S());
10326 __ Fminp(s2, v2.V2S());
10328 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10329 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10330 __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
10331 __ Fminp(d3, v3.V2D());
10332 __ Fminp(d4, v4.V2D());
10333 __ Fminp(d5, v5.V2D());
10355 __ Movi(s0, 0x3c004000);
10356 __ Movi(s1, 0xfc007c00);
10357 __ Movi(s2, 0x7e00fc00);
10358 __ Fminp(h0, v0.V2H());
10359 __ Fminp(h1, v1.V2H());
10360 __ Fminp(h2, v2.V2H());
10379 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10380 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10381 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10382 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10383 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10384 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10386 __ Fmin(v6.V8H(), v0.V8H(), v1.V8H());
10387 __ Fmin(v7.V8H(), v2.V8H(), v3.V8H());
10388 __ Fmin(v8.V8H(), v4.V8H(), v0.V8H());
10389 __ Fmin(v9.V8H(), v5.V8H(), v1.V8H());
10390 __ Fmin(v10.V4H(), v0.V4H(), v1.V4H());
10391 __ Fmin(v11.V4H(), v2.V4H(), v3.V4H());
10392 __ Fmin(v12.V4H(), v4.V4H(), v0.V4H());
10393 __ Fmin(v13.V4H(), v5.V4H(), v1.V4H());
10417 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10418 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10419 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10420 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10422 __ Fminp(v6.V8H(), v0.V8H(), v1.V8H());
10423 __ Fminp(v7.V8H(), v2.V8H(), v3.V8H());
10424 __ Fminp(v8.V4H(), v0.V4H(), v1.V4H());
10425 __ Fminp(v9.V4H(), v2.V4H(), v3.V4H());
10445 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10446 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10447 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10448 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10449 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10450 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10452 __ Fminnm(v6.V8H(), v0.V8H(), v1.V8H());
10453 __ Fminnm(v7.V8H(), v2.V8H(), v3.V8H());
10454 __ Fminnm(v8.V8H(), v4.V8H(), v0.V8H());
10455 __ Fminnm(v9.V8H(), v5.V8H(), v1.V8H());
10456 __ Fminnm(v10.V4H(), v0.V4H(), v1.V4H());
10457 __ Fminnm(v11.V4H(), v2.V4H(), v3.V4H());
10458 __ Fminnm(v12.V4H(), v4.V4H(), v0.V4H());
10459 __ Fminnm(v13.V4H(), v5.V4H(), v1.V4H());
10483 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10484 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10485 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10486 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10488 __ Fminnmp(v6.V8H(), v0.V8H(), v1.V8H());
10489 __ Fminnmp(v7.V8H(), v2.V8H(), v3.V8H());
10490 __ Fminnmp(v8.V4H(), v0.V4H(), v1.V4H());
10491 __ Fminnmp(v9.V4H(), v2.V4H(), v3.V4H());
10509 __ Movi(d0, 0x3f80000040000000);
10510 __ Movi(d1, 0xff8000007f800000);
10511 __ Movi(d2, 0x7fc00000ff800000);
10512 __ Fminnmp(s0, v0.V2S());
10513 __ Fminnmp(s1, v1.V2S());
10514 __ Fminnmp(s2, v2.V2S());
10516 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10517 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10518 __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
10519 __ Fminnmp(d3, v3.V2D());
10520 __ Fminnmp(d4, v4.V2D());
10521 __ Fminnmp(d5, v5.V2D());
10543 __ Movi(s0, 0x3c004000);
10544 __ Movi(s1, 0xfc007c00);
10545 __ Movi(s2, 0x7e00fc00);
10546 __ Fminnmp(h0, v0.V2H());
10547 __ Fminnmp(h1, v1.V2H());
10548 __ Fminnmp(h2, v2.V2H());
10578 __ Fmov(h0, n);
10579 __ Fmov(h1, m);
10580 __ Fmov(v0.V8H(), n);
10581 __ Fmov(v1.V8H(), m);
10582 __ Fmin(h28, h0, h1);
10583 __ Fmin(v2.V4H(), v0.V4H(), v1.V4H());
10584 __ Fmin(v3.V8H(), v0.V8H(), v1.V8H());
10585 __ Fmax(h29, h0, h1);
10586 __ Fmax(v4.V4H(), v0.V4H(), v1.V4H());
10587 __ Fmax(v5.V8H(), v0.V8H(), v1.V8H());
10588 __ Fminnm(h30, h0, h1);
10589 __ Fminnm(v6.V4H(), v0.V4H(), v1.V4H());
10590 __ Fminnm(v7.V8H(), v0.V8H(), v1.V8H());
10591 __ Fmaxnm(h31, h0, h1);
10592 __ Fmaxnm(v8.V4H(), v0.V4H(), v1.V4H());
10593 __ Fmaxnm(v9.V8H(), v0.V8H(), v1.V8H());
10771 __ Movi(v0.V2D(), 0x3f8000003f8ccccd, 0x3fc000003ff33333);
10772 __ Movi(v1.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
10773 __ Movi(v2.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10774 __ Frint32x(v16.V2S(), v0.V2S());
10775 __ Frint32x(v17.V4S(), v1.V4S());
10776 __ Frint32x(v18.V2D(), v2.V2D());
10777 __ Frint64x(v19.V2S(), v0.V2S());
10778 __ Frint64x(v20.V4S(), v1.V4S());
10779 __ Frint64x(v21.V2D(), v2.V2D());
10780 __ Frint32z(v22.V2S(), v0.V2S());
10781 __ Frint32z(v23.V4S(), v1.V4S());
10782 __ Frint32z(v24.V2D(), v2.V2D());
10783 __ Frint64z(v25.V2S(), v0.V2S());
10784 __ Frint64z(v26.V4S(), v1.V4S());
10785 __ Frint64z(v27.V2D(), v2.V2D());
10812 __ Movi(v30.V2D(), 0xbf561e188b1280e9, 0xbd542b8cbd24e8e8);
10813 __ Movi(v31.V2D(), 0xb5e9883d2c88a46d, 0x12276d5b614c915e);
10814 __ Movi(v0.V2D(), 0xc45b7782bc5ecd72, 0x5dd4fe5a4bc6bf5e);
10815 __ Movi(v1.V2D(), 0x1e3254094bd1746a, 0xf099ecf50e861c80);
10817 __ Movi(v4.V2D(), 0xf80c030100031f16, 0x00070504031201ff);
10818 __ Movi(v5.V2D(), 0x1f01001afc14202a, 0x2a081e1b0c02020c);
10819 __ Movi(v6.V2D(), 0x353f1a13022a2360, 0x2c464a00203a0a33);
10820 __ Movi(v7.V2D(), 0x64801a1c054cf30d, 0x793a2c052e213739);
10822 __ Movi(v8.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
10823 __ Movi(v9.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
10824 __ Movi(v10.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
10825 __ Movi(v11.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
10826 __ Movi(v12.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
10827 __ Movi(v13.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
10828 __ Movi(v14.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
10829 __ Movi(v15.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
10831 __ Tbl(v8.V16B(), v1.V16B(), v4.V16B());
10832 __ Tbl(v9.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
10833 __ Tbl(v10.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
10834 __ Tbl(v11.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
10835 __ Tbl(v12.V8B(), v1.V16B(), v4.V8B());
10836 __ Tbl(v13.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
10837 __ Tbl(v14.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
10838 __ Tbl(v15.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
10840 __ Movi(v16.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
10841 __ Movi(v17.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
10842 __ Movi(v18.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
10843 __ Movi(v19.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
10844 __ Movi(v20.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
10845 __ Movi(v21.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
10846 __ Movi(v22.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
10847 __ Movi(v23.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
10849 __ Tbx(v16.V16B(), v1.V16B(), v4.V16B());
10850 __ Tbx(v17.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
10851 __ Tbx(v18.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
10852 __ Tbx(v19.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
10853 __ Tbx(v20.V8B(), v1.V16B(), v4.V8B());
10854 __ Tbx(v21.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
10855 __ Tbx(v22.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
10856 __ Tbx(v23.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
10888 __ Movi(v0.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);
10889 __ Movi(v1.V2D(), 0x7f7f7f7f7f7f7f7f, 0x7f7f7f7f7f7f7f7f);
10890 __ Movi(v2.V2D(), 0x8080808080808080, 0x8080808080808080);
10891 __ Movi(v3.V2D(), 0, 0);
10892 __ Mov(q4, q3);
10893 __ Mov(q5, q3);
10894 __ Mov(q6, q3);
10895 __ Mov(q7, q3);
10896 __ Mov(q8, q3);
10897 __ Mov(q9, q3);
10898 __ Mov(q10, q3);
10899 __ Mov(q11, q3);
10903 __ Usdot(v3.V2S(), v0.V8B(), v1.V8B());
10904 __ Udot(v4.V2S(), v0.V8B(), v1.V8B());
10905 __ Cmeq(v3.V4S(), v3.V4S(), v4.V4S());
10906 __ Usdot(v5.V4S(), v0.V16B(), v1.V16B());
10907 __ Udot(v6.V4S(), v0.V16B(), v1.V16B());
10908 __ Cmeq(v5.V4S(), v5.V4S(), v6.V4S());
10910 __ Usdot(v7.V2S(), v1.V8B(), v2.V8B());
10911 __ Sdot(v8.V2S(), v1.V8B(), v2.V8B());
10912 __ Cmeq(v7.V4S(), v7.V4S(), v8.V4S());
10913 __ Usdot(v9.V4S(), v1.V16B(), v2.V16B());
10914 __ Sdot(v10.V4S(), v1.V16B(), v2.V16B());
10915 __ Cmeq(v9.V4S(), v9.V4S(), v10.V4S());
10919 __ Mov(w0, 0x8101ff40); // [-127, 1, -1, 64] as signed bytes.
10920 __ Mov(w1, 0x02fe8002); // [2, 254, 128, 2] as unsigned bytes.
10921 __ Dup(v0.V4S(), w0);
10922 __ Dup(v1.V4S(), w1);
10923 __ Usdot(v11.V4S(), v1.V16B(), v0.V16B());
10942 __ Movi(v0.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
10943 __ Movi(v1.V2D(), 0x4242424242424242, 0x5555aaaaaaaa5555);
10946 __ Dup(v2.V4S(), v1.V4S(), 0);
10947 __ Dup(v3.V4S(), v1.V4S(), 1);
10948 __ Dup(v4.V4S(), v1.V4S(), 3);
10950 __ Mov(q10, q1);
10951 __ Usdot(v10.V2S(), v0.V8B(), v2.V8B());
10952 __ Mov(q11, q1);
10953 __ Usdot(v11.V2S(), v0.V8B(), v1.S4B(), 0);
10954 __ Cmeq(v11.V4S(), v11.V4S(), v10.V4S());
10956 __ Mov(q12, q1);
10957 __ Usdot(v12.V4S(), v0.V16B(), v3.V16B());
10958 __ Mov(q13, q1);
10959 __ Usdot(v13.V4S(), v0.V16B(), v1.S4B(), 1);
10960 __ Cmeq(v13.V4S(), v13.V4S(), v12.V4S());
10962 __ Mov(q14, q1);
10963 __ Usdot(v14.V4S(), v4.V16B(), v0.V16B());
10964 __ Mov(q15, q1);
10965 __ Sudot(v15.V4S(), v0.V16B(), v1.S4B(), 3);
10966 __ Cmeq(v15.V4S(), v15.V4S(), v14.V4S());
10982 __ Mov(x0, 0x55aa42ffaa42ff55);
10983 __ Mov(x1, 4);
10984 __ Movi(q30.V16B(), 0);
10989 __ Bind(&loop);
10991 __ Dup(q0.V16B(), w0);
10992 __ Ror(x0, x0, 8);
10993 __ Dup(q1.V16B(), w0);
10994 __ Ror(x0, x0, 8);
10995 __ Dup(q2.V16B(), w0);
10996 __ Ror(x0, x0, 8);
11000 __ movi(q9.V16B(), 0x55);
11001 __ dci(0x5e010409); // mov b9, v0.b[0]
11002 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11004 __ movi(q9.V16B(), 0x55);
11005 __ dci(0x5e207809); // sqabs b9, b0
11006 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11008 __ movi(q9.V16B(), 0x55);
11009 __ dci(0x5e200c29); // sqadd b9, b1, b0
11010 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11012 __ movi(q9.V16B(), 0x55);
11013 __ dci(0x7e207809); // sqneg b9, b0
11014 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11016 __ movi(q9.V16B(), 0x55);
11017 __ dci(0x7e008429); // sqrdmlah b9, b1, b0
11018 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11020 __ movi(q9.V16B(), 0x55);
11021 __ dci(0x7e008c29); // sqrdmlsh b9, b1, b0
11022 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11024 __ movi(q9.V16B(), 0x55);
11025 __ dci(0x5e205c29); // sqrshl b9, b1, b0
11026 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11028 __ movi(q9.V16B(), 0x55);
11029 __ dci(0x5f089c09); // sqrshrn b9, h0, #8
11030 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11032 __ movi(q9.V16B(), 0x55);
11033 __ dci(0x7f088c09); // sqrshrun b9, h0, #8
11034 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11036 __ movi(q9.V16B(), 0x55);
11037 __ dci(0x5e204c29); // sqshl b9, b1, b0
11038 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11040 __ movi(q9.V16B(), 0x55);
11041 __ dci(0x5f087409); // sqshl b9, b0, #0
11042 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11044 __ movi(q9.V16B(), 0x55);
11045 __ dci(0x7f086409); // sqshlu b9, b0, #0
11046 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11048 __ movi(q9.V16B(), 0x55);
11049 __ dci(0x5f089409); // sqshrn b9, h0, #8
11050 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11052 __ movi(q9.V16B(), 0x55);
11053 __ dci(0x7f088409); // sqshrun b9, h0, #8
11054 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11056 __ movi(q9.V16B(), 0x55);
11057 __ dci(0x5e202c29); // sqsub b9, b1, b0
11058 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11060 __ movi(q9.V16B(), 0x55);
11061 __ dci(0x5e214809); // sqxtn b9, h0
11062 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11064 __ movi(q9.V16B(), 0x55);
11065 __ dci(0x7e212809); // sqxtun b9, h0
11066 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11068 __ movi(q9.V16B(), 0x55);
11069 __ dci(0x5e203809); // suqadd b9, b0
11070 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11072 __ movi(q9.V16B(), 0x55);
11073 __ dci(0x7e200c29); // uqadd b9, b1, b0
11074 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11076 __ movi(q9.V16B(), 0x55);
11077 __ dci(0x7e205c29); // uqrshl b9, b1, b0
11078 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11080 __ movi(q9.V16B(), 0x55);
11081 __ dci(0x7f089c09); // uqrshrn b9, h0, #8
11082 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11084 __ movi(q9.V16B(), 0x55);
11085 __ dci(0x7e204c29); // uqshl b9, b1, b0
11086 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11088 __ movi(q9.V16B(), 0x55);
11089 __ dci(0x7f087409); // uqshl b9, b0, #0
11090 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11092 __ movi(q9.V16B(), 0x55);
11093 __ dci(0x7f089409); // uqshrn b9, h0, #8
11094 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11096 __ movi(q9.V16B(), 0x55);
11097 __ dci(0x7e202c29); // uqsub b9, b1, b0
11098 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11100 __ movi(q9.V16B(), 0x55);
11101 __ dci(0x7e214809); // uqxtn b9, h0
11102 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11104 __ movi(q9.V16B(), 0x55);
11105 __ dci(0x7e203809); // usqadd b9, b0
11106 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11108 __ Sub(x1, x1, 1);
11109 __ Cbnz(x1, &loop);
11111 __ Ins(q30.V16B(), 0, wzr);
11128 __ Mov(x0, 0x55aa42ffaa42ff55);
11129 __ Mov(x1, 4);
11130 __ Movi(q30.V16B(), 0);
11135 __ Bind(&loop);
11137 __ Dup(q0.V8H(), w0);
11138 __ Ror(x0, x0, 8);
11139 __ Dup(q1.V8H(), w0);
11140 __ Ror(x0, x0, 8);
11141 __ Dup(q2.V8H(), w0);
11142 __ Ror(x0, x0, 8);
11146 __ movi(q9.V16B(), 0x55);
11147 __ dci(0x5e020409); // mov h9, v0.h[0]
11148 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11150 __ movi(q9.V16B(), 0x55);
11151 __ dci(0x7ec01429); // fabd h9, h1, h0
11152 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11154 __ movi(q9.V16B(), 0x55);
11155 __ dci(0x7e402c29); // facge h9, h1, h0
11156 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11158 __ movi(q9.V16B(), 0x55);
11159 __ dci(0x7ec02c29); // facgt h9, h1, h0
11160 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11162 __ movi(q9.V16B(), 0x55);
11163 __ dci(0x5e30d809); // faddp h9, v0.2h
11164 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11166 __ movi(q9.V16B(), 0x55);
11167 __ dci(0x5ef8d809); // fcmeq h9, h0, #0.0
11168 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11170 __ movi(q9.V16B(), 0x55);
11171 __ dci(0x5e402429); // fcmeq h9, h1, h0
11172 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11174 __ movi(q9.V16B(), 0x55);
11175 __ dci(0x7ef8c809); // fcmge h9, h0, #0.0
11176 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11178 __ movi(q9.V16B(), 0x55);
11179 __ dci(0x7e402429); // fcmge h9, h1, h0
11180 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11182 __ movi(q9.V16B(), 0x55);
11183 __ dci(0x5ef8c809); // fcmgt h9, h0, #0.0
11184 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11186 __ movi(q9.V16B(), 0x55);
11187 __ dci(0x7ec02429); // fcmgt h9, h1, h0
11188 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11190 __ movi(q9.V16B(), 0x55);
11191 __ dci(0x7ef8d809); // fcmle h9, h0, #0.0
11192 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11194 __ movi(q9.V16B(), 0x55);
11195 __ dci(0x5ef8e809); // fcmlt h9, h0, #0.0
11196 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11198 __ movi(q9.V16B(), 0x55);
11199 __ dci(0x5e79c809); // fcvtas h9, h0
11200 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11202 __ movi(q9.V16B(), 0x55);
11203 __ dci(0x7e79c809); // fcvtau h9, h0
11204 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11206 __ movi(q9.V16B(), 0x55);
11207 __ dci(0x5e79b809); // fcvtms h9, h0
11208 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11210 __ movi(q9.V16B(), 0x55);
11211 __ dci(0x7e79b809); // fcvtmu h9, h0
11212 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11214 __ movi(q9.V16B(), 0x55);
11215 __ dci(0x5e79a809); // fcvtns h9, h0
11216 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11218 __ movi(q9.V16B(), 0x55);
11219 __ dci(0x7e79a809); // fcvtnu h9, h0
11220 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11222 __ movi(q9.V16B(), 0x55);
11223 __ dci(0x5ef9a809); // fcvtps h9, h0
11224 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11226 __ movi(q9.V16B(), 0x55);
11227 __ dci(0x7ef9a809); // fcvtpu h9, h0
11228 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11230 __ movi(q9.V16B(), 0x55);
11231 __ dci(0x5ef9b809); // fcvtzs h9, h0
11232 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11234 __ movi(q9.V16B(), 0x55);
11235 __ dci(0x5f10fc09); // fcvtzs h9, h0, #16
11236 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11238 __ movi(q9.V16B(), 0x55);
11239 __ dci(0x7ef9b809); // fcvtzu h9, h0
11240 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11242 __ movi(q9.V16B(), 0x55);
11243 __ dci(0x7f10fc09); // fcvtzu h9, h0, #16
11244 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11246 __ movi(q9.V16B(), 0x55);
11247 __ dci(0x5e30c809); // fmaxnmp h9, v0.2h
11248 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11250 __ movi(q9.V16B(), 0x55);
11251 __ dci(0x5e30f809); // fmaxp h9, v0.2h
11252 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11254 __ movi(q9.V16B(), 0x55);
11255 __ dci(0x5eb0c809); // fminnmp h9, v0.2h
11256 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11258 __ movi(q9.V16B(), 0x55);
11259 __ dci(0x5eb0f809); // fminp h9, v0.2h
11260 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11262 __ movi(q9.V16B(), 0x55);
11263 __ dci(0x5f001029); // fmla h9, h1, v0.h[0]
11264 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11266 __ movi(q9.V16B(), 0x55);
11267 __ dci(0x5f005029); // fmls h9, h1, v0.h[0]
11268 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11270 __ movi(q9.V16B(), 0x55);
11271 __ dci(0x5f009029); // fmul h9, h1, v0.h[0]
11272 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11274 __ movi(q9.V16B(), 0x55);
11275 __ dci(0x7f009029); // fmulx h9, h1, v0.h[0]
11276 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11278 __ movi(q9.V16B(), 0x55);
11279 __ dci(0x5e401c29); // fmulx h9, h1, h0
11280 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11282 __ movi(q9.V16B(), 0x55);
11283 __ dci(0x5ef9d809); // frecpe h9, h0
11284 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11286 __ movi(q9.V16B(), 0x55);
11287 __ dci(0x5e403c29); // frecps h9, h1, h0
11288 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11290 __ movi(q9.V16B(), 0x55);
11291 __ dci(0x5ef9f809); // frecpx h9, h0
11292 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11294 __ movi(q9.V16B(), 0x55);
11295 __ dci(0x7ef9d809); // frsqrte h9, h0
11296 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11298 __ movi(q9.V16B(), 0x55);
11299 __ dci(0x5ec03c29); // frsqrts h9, h1, h0
11300 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11302 __ movi(q9.V16B(), 0x55);
11303 __ dci(0x5e79d809); // scvtf h9, h0
11304 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11306 __ movi(q9.V16B(), 0x55);
11307 __ dci(0x5f10e409); // scvtf h9, h0, #16
11308 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11310 __ movi(q9.V16B(), 0x55);
11311 __ dci(0x5e607809); // sqabs h9, h0
11312 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11314 __ movi(q9.V16B(), 0x55);
11315 __ dci(0x5e600c29); // sqadd h9, h1, h0
11316 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11318 __ movi(q9.V16B(), 0x55);
11319 __ dci(0x5f40c029); // sqdmulh h9, h1, v0.h[0]
11320 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11322 __ movi(q9.V16B(), 0x55);
11323 __ dci(0x5e60b429); // sqdmulh h9, h1, h0
11324 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11326 __ movi(q9.V16B(), 0x55);
11327 __ dci(0x7e607809); // sqneg h9, h0
11328 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11330 __ movi(q9.V16B(), 0x55);
11331 __ dci(0x7f40d029); // sqrdmlah h9, h1, v0.h[0]
11332 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11334 __ movi(q9.V16B(), 0x55);
11335 __ dci(0x7e408429); // sqrdmlah h9, h1, h0
11336 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11338 __ movi(q9.V16B(), 0x55);
11339 __ dci(0x7f40f029); // sqrdmlsh h9, h1, v0.h[0]
11340 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11342 __ movi(q9.V16B(), 0x55);
11343 __ dci(0x7e408c29); // sqrdmlsh h9, h1, h0
11344 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11346 __ movi(q9.V16B(), 0x55);
11347 __ dci(0x5f40d029); // sqrdmulh h9, h1, v0.h[0]
11348 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11350 __ movi(q9.V16B(), 0x55);
11351 __ dci(0x7e60b429); // sqrdmulh h9, h1, h0
11352 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11354 __ movi(q9.V16B(), 0x55);
11355 __ dci(0x5e605c29); // sqrshl h9, h1, h0
11356 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11358 __ movi(q9.V16B(), 0x55);
11359 __ dci(0x5f109c09); // sqrshrn h9, s0, #16
11360 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11362 __ movi(q9.V16B(), 0x55);
11363 __ dci(0x7f108c09); // sqrshrun h9, s0, #16
11364 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11366 __ movi(q9.V16B(), 0x55);
11367 __ dci(0x5e604c29); // sqshl h9, h1, h0
11368 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11370 __ movi(q9.V16B(), 0x55);
11371 __ dci(0x5f107409); // sqshl h9, h0, #0
11372 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11374 __ movi(q9.V16B(), 0x55);
11375 __ dci(0x7f106409); // sqshlu h9, h0, #0
11376 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11378 __ movi(q9.V16B(), 0x55);
11379 __ dci(0x5f109409); // sqshrn h9, s0, #16
11380 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11382 __ movi(q9.V16B(), 0x55);
11383 __ dci(0x7f108409); // sqshrun h9, s0, #16
11384 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11386 __ movi(q9.V16B(), 0x55);
11387 __ dci(0x5e602c29); // sqsub h9, h1, h0
11388 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11390 __ movi(q9.V16B(), 0x55);
11391 __ dci(0x5e614809); // sqxtn h9, s0
11392 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11394 __ movi(q9.V16B(), 0x55);
11395 __ dci(0x7e612809); // sqxtun h9, s0
11396 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11398 __ movi(q9.V16B(), 0x55);
11399 __ dci(0x5e603809); // suqadd h9, h0
11400 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11402 __ movi(q9.V16B(), 0x55);
11403 __ dci(0x7e79d809); // ucvtf h9, h0
11404 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11406 __ movi(q9.V16B(), 0x55);
11407 __ dci(0x7f10e409); // ucvtf h9, h0, #16
11408 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11410 __ movi(q9.V16B(), 0x55);
11411 __ dci(0x7e600c29); // uqadd h9, h1, h0
11412 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11414 __ movi(q9.V16B(), 0x55);
11415 __ dci(0x7e605c29); // uqrshl h9, h1, h0
11416 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11418 __ movi(q9.V16B(), 0x55);
11419 __ dci(0x7f109c09); // uqrshrn h9, s0, #16
11420 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11422 __ movi(q9.V16B(), 0x55);
11423 __ dci(0x7e604c29); // uqshl h9, h1, h0
11424 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11426 __ movi(q9.V16B(), 0x55);
11427 __ dci(0x7f107409); // uqshl h9, h0, #0
11428 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11430 __ movi(q9.V16B(), 0x55);
11431 __ dci(0x7f109409); // uqshrn h9, s0, #16
11432 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11434 __ movi(q9.V16B(), 0x55);
11435 __ dci(0x7e602c29); // uqsub h9, h1, h0
11436 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11438 __ movi(q9.V16B(), 0x55);
11439 __ dci(0x7e614809); // uqxtn h9, s0
11440 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11442 __ movi(q9.V16B(), 0x55);
11443 __ dci(0x7e603809); // usqadd h9, h0
11444 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11446 __ Sub(x1, x1, 1);
11447 __ Cbnz(x1, &loop);
11449 __ Ins(q30.V8H(), 0, wzr);
11465 __ Mov(x0, 0x55aa42ffaa42ff55);
11466 __ Mov(x1, 4);
11467 __ Movi(q30.V16B(), 0);
11472 __ Bind(&loop);
11474 __ Dup(q0.V4S(), w0);
11475 __ Ror(x0, x0, 8);
11476 __ Dup(q1.V4S(), w0);
11477 __ Ror(x0, x0, 8);
11478 __ Dup(q2.V4S(), w0);
11479 __ Ror(x0, x0, 8);
11483 __ movi(q9.V16B(), 0x55);
11484 __ dci(0x5e040409); // mov s9, v0.s[0]
11485 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11487 __ movi(q9.V16B(), 0x55);
11488 __ dci(0x7ea0d429); // fabd s9, s1, s0
11489 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11491 __ movi(q9.V16B(), 0x55);
11492 __ dci(0x7e20ec29); // facge s9, s1, s0
11493 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11495 __ movi(q9.V16B(), 0x55);
11496 __ dci(0x7ea0ec29); // facgt s9, s1, s0
11497 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11499 __ movi(q9.V16B(), 0x55);
11500 __ dci(0x7e30d809); // faddp s9, v0.2s
11501 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11503 __ movi(q9.V16B(), 0x55);
11504 __ dci(0x5ea0d809); // fcmeq s9, s0, #0.0
11505 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11507 __ movi(q9.V16B(), 0x55);
11508 __ dci(0x5e20e429); // fcmeq s9, s1, s0
11509 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11511 __ movi(q9.V16B(), 0x55);
11512 __ dci(0x7ea0c809); // fcmge s9, s0, #0.0
11513 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11515 __ movi(q9.V16B(), 0x55);
11516 __ dci(0x7e20e429); // fcmge s9, s1, s0
11517 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11519 __ movi(q9.V16B(), 0x55);
11520 __ dci(0x5ea0c809); // fcmgt s9, s0, #0.0
11521 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11523 __ movi(q9.V16B(), 0x55);
11524 __ dci(0x7ea0e429); // fcmgt s9, s1, s0
11525 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11527 __ movi(q9.V16B(), 0x55);
11528 __ dci(0x7ea0d809); // fcmle s9, s0, #0.0
11529 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11531 __ movi(q9.V16B(), 0x55);
11532 __ dci(0x5ea0e809); // fcmlt s9, s0, #0.0
11533 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11535 __ movi(q9.V16B(), 0x55);
11536 __ dci(0x5e21c809); // fcvtas s9, s0
11537 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11539 __ movi(q9.V16B(), 0x55);
11540 __ dci(0x7e21c809); // fcvtau s9, s0
11541 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11543 __ movi(q9.V16B(), 0x55);
11544 __ dci(0x5e21b809); // fcvtms s9, s0
11545 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11547 __ movi(q9.V16B(), 0x55);
11548 __ dci(0x7e21b809); // fcvtmu s9, s0
11549 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11551 __ movi(q9.V16B(), 0x55);
11552 __ dci(0x5e21a809); // fcvtns s9, s0
11553 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11555 __ movi(q9.V16B(), 0x55);
11556 __ dci(0x7e21a809); // fcvtnu s9, s0
11557 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11559 __ movi(q9.V16B(), 0x55);
11560 __ dci(0x5ea1a809); // fcvtps s9, s0
11561 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11563 __ movi(q9.V16B(), 0x55);
11564 __ dci(0x7ea1a809); // fcvtpu s9, s0
11565 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11567 __ movi(q9.V16B(), 0x55);
11568 __ dci(0x7e616809); // fcvtxn s9, d0
11569 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11571 __ movi(q9.V16B(), 0x55);
11572 __ dci(0x5ea1b809); // fcvtzs s9, s0
11573 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11575 __ movi(q9.V16B(), 0x55);
11576 __ dci(0x5f20fc09); // fcvtzs s9, s0, #32
11577 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11579 __ movi(q9.V16B(), 0x55);
11580 __ dci(0x7ea1b809); // fcvtzu s9, s0
11581 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11583 __ movi(q9.V16B(), 0x55);
11584 __ dci(0x7f20fc09); // fcvtzu s9, s0, #32
11585 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11587 __ movi(q9.V16B(), 0x55);
11588 __ dci(0x7e30c809); // fmaxnmp s9, v0.2s
11589 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11591 __ movi(q9.V16B(), 0x55);
11592 __ dci(0x7e30f809); // fmaxp s9, v0.2s
11593 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11595 __ movi(q9.V16B(), 0x55);
11596 __ dci(0x7eb0c809); // fminnmp s9, v0.2s
11597 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11599 __ movi(q9.V16B(), 0x55);
11600 __ dci(0x7eb0f809); // fminp s9, v0.2s
11601 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11603 __ movi(q9.V16B(), 0x55);
11604 __ dci(0x5f801029); // fmla s9, s1, v0.s[0]
11605 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11607 __ movi(q9.V16B(), 0x55);
11608 __ dci(0x5f805029); // fmls s9, s1, v0.s[0]
11609 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11611 __ movi(q9.V16B(), 0x55);
11612 __ dci(0x5f809029); // fmul s9, s1, v0.s[0]
11613 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11615 __ movi(q9.V16B(), 0x55);
11616 __ dci(0x7f809029); // fmulx s9, s1, v0.s[0]
11617 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11619 __ movi(q9.V16B(), 0x55);
11620 __ dci(0x5e20dc29); // fmulx s9, s1, s0
11621 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11623 __ movi(q9.V16B(), 0x55);
11624 __ dci(0x5ea1d809); // frecpe s9, s0
11625 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11627 __ movi(q9.V16B(), 0x55);
11628 __ dci(0x5e20fc29); // frecps s9, s1, s0
11629 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11631 __ movi(q9.V16B(), 0x55);
11632 __ dci(0x5ea1f809); // frecpx s9, s0
11633 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11635 __ movi(q9.V16B(), 0x55);
11636 __ dci(0x7ea1d809); // frsqrte s9, s0
11637 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11639 __ movi(q9.V16B(), 0x55);
11640 __ dci(0x5ea0fc29); // frsqrts s9, s1, s0
11641 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11643 __ movi(q9.V16B(), 0x55);
11644 __ dci(0x5e21d809); // scvtf s9, s0
11645 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11647 __ movi(q9.V16B(), 0x55);
11648 __ dci(0x5f20e409); // scvtf s9, s0, #32
11649 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11651 __ movi(q9.V16B(), 0x55);
11652 __ dci(0x5ea07809); // sqabs s9, s0
11653 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11655 __ movi(q9.V16B(), 0x55);
11656 __ dci(0x5ea00c29); // sqadd s9, s1, s0
11657 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11659 __ movi(q9.V16B(), 0x55);
11660 __ dci(0x5e609029); // sqdmlal s9, h1, h0
11661 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11663 __ movi(q9.V16B(), 0x55);
11664 __ dci(0x5f403029); // sqdmlal s9, h1, v0.h[0]
11665 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11667 __ movi(q9.V16B(), 0x55);
11668 __ dci(0x5e60b029); // sqdmlsl s9, h1, h0
11669 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11671 __ movi(q9.V16B(), 0x55);
11672 __ dci(0x5f407029); // sqdmlsl s9, h1, v0.h[0]
11673 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11675 __ movi(q9.V16B(), 0x55);
11676 __ dci(0x5f80c029); // sqdmulh s9, s1, v0.s[0]
11677 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11679 __ movi(q9.V16B(), 0x55);
11680 __ dci(0x5ea0b429); // sqdmulh s9, s1, s0
11681 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11683 __ movi(q9.V16B(), 0x55);
11684 __ dci(0x5e60d029); // sqdmull s9, h1, h0
11685 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11687 __ movi(q9.V16B(), 0x55);
11688 __ dci(0x5f40b029); // sqdmull s9, h1, v0.h[0]
11689 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11691 __ movi(q9.V16B(), 0x55);
11692 __ dci(0x7ea07809); // sqneg s9, s0
11693 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11695 __ movi(q9.V16B(), 0x55);
11696 __ dci(0x7f80d029); // sqrdmlah s9, s1, v0.s[0]
11697 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11699 __ movi(q9.V16B(), 0x55);
11700 __ dci(0x7e808429); // sqrdmlah s9, s1, s0
11701 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11703 __ movi(q9.V16B(), 0x55);
11704 __ dci(0x7f80f029); // sqrdmlsh s9, s1, v0.s[0]
11705 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11707 __ movi(q9.V16B(), 0x55);
11708 __ dci(0x7e808c29); // sqrdmlsh s9, s1, s0
11709 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11711 __ movi(q9.V16B(), 0x55);
11712 __ dci(0x5f80d029); // sqrdmulh s9, s1, v0.s[0]
11713 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11715 __ movi(q9.V16B(), 0x55);
11716 __ dci(0x7ea0b429); // sqrdmulh s9, s1, s0
11717 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11719 __ movi(q9.V16B(), 0x55);
11720 __ dci(0x5ea05c29); // sqrshl s9, s1, s0
11721 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11723 __ movi(q9.V16B(), 0x55);
11724 __ dci(0x5f209c09); // sqrshrn s9, d0, #32
11725 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11727 __ movi(q9.V16B(), 0x55);
11728 __ dci(0x7f208c09); // sqrshrun s9, d0, #32
11729 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11731 __ movi(q9.V16B(), 0x55);
11732 __ dci(0x5ea04c29); // sqshl s9, s1, s0
11733 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11735 __ movi(q9.V16B(), 0x55);
11736 __ dci(0x5f207409); // sqshl s9, s0, #0
11737 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11739 __ movi(q9.V16B(), 0x55);
11740 __ dci(0x7f206409); // sqshlu s9, s0, #0
11741 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11743 __ movi(q9.V16B(), 0x55);
11744 __ dci(0x5f209409); // sqshrn s9, d0, #32
11745 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11747 __ movi(q9.V16B(), 0x55);
11748 __ dci(0x7f208409); // sqshrun s9, d0, #32
11749 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11751 __ movi(q9.V16B(), 0x55);
11752 __ dci(0x5ea02c29); // sqsub s9, s1, s0
11753 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11755 __ movi(q9.V16B(), 0x55);
11756 __ dci(0x5ea14809); // sqxtn s9, d0
11757 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11759 __ movi(q9.V16B(), 0x55);
11760 __ dci(0x7ea12809); // sqxtun s9, d0
11761 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11763 __ movi(q9.V16B(), 0x55);
11764 __ dci(0x5ea03809); // suqadd s9, s0
11765 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11767 __ movi(q9.V16B(), 0x55);
11768 __ dci(0x7e21d809); // ucvtf s9, s0
11769 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11771 __ movi(q9.V16B(), 0x55);
11772 __ dci(0x7f20e409); // ucvtf s9, s0, #32
11773 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11775 __ movi(q9.V16B(), 0x55);
11776 __ dci(0x7ea00c29); // uqadd s9, s1, s0
11777 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11779 __ movi(q9.V16B(), 0x55);
11780 __ dci(0x7ea05c29); // uqrshl s9, s1, s0
11781 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11783 __ movi(q9.V16B(), 0x55);
11784 __ dci(0x7f209c09); // uqrshrn s9, d0, #32
11785 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11787 __ movi(q9.V16B(), 0x55);
11788 __ dci(0x7ea04c29); // uqshl s9, s1, s0
11789 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11791 __ movi(q9.V16B(), 0x55);
11792 __ dci(0x7f207409); // uqshl s9, s0, #0
11793 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11795 __ movi(q9.V16B(), 0x55);
11796 __ dci(0x7f209409); // uqshrn s9, d0, #32
11797 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11799 __ movi(q9.V16B(), 0x55);
11800 __ dci(0x7ea02c29); // uqsub s9, s1, s0
11801 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11803 __ movi(q9.V16B(), 0x55);
11804 __ dci(0x7ea14809); // uqxtn s9, d0
11805 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11807 __ movi(q9.V16B(), 0x55);
11808 __ dci(0x7ea03809); // usqadd s9, s0
11809 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11811 __ Sub(x1, x1, 1);
11812 __ Cbnz(x1, &loop);
11814 __ Ins(q30.V4S(), 0, wzr);
11830 __ Mov(x0, 0x55aa42ffaa42ff55);
11831 __ Mov(x1, 4);
11832 __ Movi(q30.V16B(), 0);
11837 __ Bind(&loop);
11839 __ Dup(q0.V2D(), x0);
11840 __ Ror(x0, x0, 8);
11841 __ Dup(q1.V2D(), x0);
11842 __ Ror(x0, x0, 8);
11843 __ Dup(q2.V2D(), x0);
11844 __ Ror(x0, x0, 8);
11848 __ movi(q9.V16B(), 0x55);
11849 __ dci(0x5ee0b809); // abs d9, d0
11850 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11852 __ movi(q9.V16B(), 0x55);
11853 __ dci(0x5ee08429); // add d9, d1, d0
11854 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11856 __ movi(q9.V16B(), 0x55);
11857 __ dci(0x5ef1b809); // addp d9, v0.2d
11858 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11860 __ movi(q9.V16B(), 0x55);
11861 __ dci(0x5ee09809); // cmeq d9, d0, #0
11862 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11864 __ movi(q9.V16B(), 0x55);
11865 __ dci(0x7ee08c29); // cmeq d9, d1, d0
11866 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11868 __ movi(q9.V16B(), 0x55);
11869 __ dci(0x7ee08809); // cmge d9, d0, #0
11870 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11872 __ movi(q9.V16B(), 0x55);
11873 __ dci(0x5ee03c29); // cmge d9, d1, d0
11874 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11876 __ movi(q9.V16B(), 0x55);
11877 __ dci(0x5ee08809); // cmgt d9, d0, #0
11878 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11880 __ movi(q9.V16B(), 0x55);
11881 __ dci(0x5ee03429); // cmgt d9, d1, d0
11882 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11884 __ movi(q9.V16B(), 0x55);
11885 __ dci(0x7ee03429); // cmhi d9, d1, d0
11886 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11888 __ movi(q9.V16B(), 0x55);
11889 __ dci(0x7ee03c29); // cmhs d9, d1, d0
11890 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11892 __ movi(q9.V16B(), 0x55);
11893 __ dci(0x7ee09809); // cmle d9, d0, #0
11894 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11896 __ movi(q9.V16B(), 0x55);
11897 __ dci(0x5ee0a809); // cmlt d9, d0, #0
11898 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11900 __ movi(q9.V16B(), 0x55);
11901 __ dci(0x5ee08c29); // cmtst d9, d1, d0
11902 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11904 __ movi(q9.V16B(), 0x55);
11905 __ dci(0x5e080409); // mov d9, v0.d[0]
11906 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11908 __ movi(q9.V16B(), 0x55);
11909 __ dci(0x7ee0d429); // fabd d9, d1, d0
11910 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11912 __ movi(q9.V16B(), 0x55);
11913 __ dci(0x7e60ec29); // facge d9, d1, d0
11914 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11916 __ movi(q9.V16B(), 0x55);
11917 __ dci(0x7ee0ec29); // facgt d9, d1, d0
11918 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11920 __ movi(q9.V16B(), 0x55);
11921 __ dci(0x7e70d809); // faddp d9, v0.2d
11922 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11924 __ movi(q9.V16B(), 0x55);
11925 __ dci(0x5ee0d809); // fcmeq d9, d0, #0.0
11926 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11928 __ movi(q9.V16B(), 0x55);
11929 __ dci(0x5e60e429); // fcmeq d9, d1, d0
11930 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11932 __ movi(q9.V16B(), 0x55);
11933 __ dci(0x7ee0c809); // fcmge d9, d0, #0.0
11934 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11936 __ movi(q9.V16B(), 0x55);
11937 __ dci(0x7e60e429); // fcmge d9, d1, d0
11938 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11940 __ movi(q9.V16B(), 0x55);
11941 __ dci(0x5ee0c809); // fcmgt d9, d0, #0.0
11942 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11944 __ movi(q9.V16B(), 0x55);
11945 __ dci(0x7ee0e429); // fcmgt d9, d1, d0
11946 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11948 __ movi(q9.V16B(), 0x55);
11949 __ dci(0x7ee0d809); // fcmle d9, d0, #0.0
11950 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11952 __ movi(q9.V16B(), 0x55);
11953 __ dci(0x5ee0e809); // fcmlt d9, d0, #0.0
11954 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11956 __ movi(q9.V16B(), 0x55);
11957 __ dci(0x5e61c809); // fcvtas d9, d0
11958 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11960 __ movi(q9.V16B(), 0x55);
11961 __ dci(0x7e61c809); // fcvtau d9, d0
11962 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11964 __ movi(q9.V16B(), 0x55);
11965 __ dci(0x5e61b809); // fcvtms d9, d0
11966 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11968 __ movi(q9.V16B(), 0x55);
11969 __ dci(0x7e61b809); // fcvtmu d9, d0
11970 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11972 __ movi(q9.V16B(), 0x55);
11973 __ dci(0x5e61a809); // fcvtns d9, d0
11974 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11976 __ movi(q9.V16B(), 0x55);
11977 __ dci(0x7e61a809); // fcvtnu d9, d0
11978 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11980 __ movi(q9.V16B(), 0x55);
11981 __ dci(0x5ee1a809); // fcvtps d9, d0
11982 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11984 __ movi(q9.V16B(), 0x55);
11985 __ dci(0x7ee1a809); // fcvtpu d9, d0
11986 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11988 __ movi(q9.V16B(), 0x55);
11989 __ dci(0x5ee1b809); // fcvtzs d9, d0
11990 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11992 __ movi(q9.V16B(), 0x55);
11993 __ dci(0x5f40fc09); // fcvtzs d9, d0, #64
11994 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11996 __ movi(q9.V16B(), 0x55);
11997 __ dci(0x7ee1b809); // fcvtzu d9, d0
11998 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12000 __ movi(q9.V16B(), 0x55);
12001 __ dci(0x7f40fc09); // fcvtzu d9, d0, #64
12002 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12004 __ movi(q9.V16B(), 0x55);
12005 __ dci(0x7e70c809); // fmaxnmp d9, v0.2d
12006 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12008 __ movi(q9.V16B(), 0x55);
12009 __ dci(0x7e70f809); // fmaxp d9, v0.2d
12010 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12012 __ movi(q9.V16B(), 0x55);
12013 __ dci(0x7ef0c809); // fminnmp d9, v0.2d
12014 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12016 __ movi(q9.V16B(), 0x55);
12017 __ dci(0x7ef0f809); // fminp d9, v0.2d
12018 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12020 __ movi(q9.V16B(), 0x55);
12021 __ dci(0x5fc01029); // fmla d9, d1, v0.d[0]
12022 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12024 __ movi(q9.V16B(), 0x55);
12025 __ dci(0x5fc05029); // fmls d9, d1, v0.d[0]
12026 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12028 __ movi(q9.V16B(), 0x55);
12029 __ dci(0x5fc09029); // fmul d9, d1, v0.d[0]
12030 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12032 __ movi(q9.V16B(), 0x55);
12033 __ dci(0x7fc09029); // fmulx d9, d1, v0.d[0]
12034 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12036 __ movi(q9.V16B(), 0x55);
12037 __ dci(0x5e60dc29); // fmulx d9, d1, d0
12038 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12040 __ movi(q9.V16B(), 0x55);
12041 __ dci(0x5ee1d809); // frecpe d9, d0
12042 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12044 __ movi(q9.V16B(), 0x55);
12045 __ dci(0x5e60fc29); // frecps d9, d1, d0
12046 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12048 __ movi(q9.V16B(), 0x55);
12049 __ dci(0x5ee1f809); // frecpx d9, d0
12050 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12052 __ movi(q9.V16B(), 0x55);
12053 __ dci(0x7ee1d809); // frsqrte d9, d0
12054 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12056 __ movi(q9.V16B(), 0x55);
12057 __ dci(0x5ee0fc29); // frsqrts d9, d1, d0
12058 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12060 __ movi(q9.V16B(), 0x55);
12061 __ dci(0x7ee0b809); // neg d9, d0
12062 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12064 __ movi(q9.V16B(), 0x55);
12065 __ dci(0x5e61d809); // scvtf d9, d0
12066 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12068 __ movi(q9.V16B(), 0x55);
12069 __ dci(0x5f40e409); // scvtf d9, d0, #64
12070 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12072 __ movi(q9.V16B(), 0x55);
12073 __ dci(0x5f405409); // shl d9, d0, #0
12074 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12076 __ movi(q9.V16B(), 0x55);
12077 __ dci(0x7f405409); // sli d9, d0, #0
12078 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12080 __ movi(q9.V16B(), 0x55);
12081 __ dci(0x5ee07809); // sqabs d9, d0
12082 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12084 __ movi(q9.V16B(), 0x55);
12085 __ dci(0x5ee00c29); // sqadd d9, d1, d0
12086 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12088 __ movi(q9.V16B(), 0x55);
12089 __ dci(0x5ea09029); // sqdmlal d9, s1, s0
12090 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12092 __ movi(q9.V16B(), 0x55);
12093 __ dci(0x5f803029); // sqdmlal d9, s1, v0.s[0]
12094 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12096 __ movi(q9.V16B(), 0x55);
12097 __ dci(0x5ea0b029); // sqdmlsl d9, s1, s0
12098 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12100 __ movi(q9.V16B(), 0x55);
12101 __ dci(0x5f807029); // sqdmlsl d9, s1, v0.s[0]
12102 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12104 __ movi(q9.V16B(), 0x55);
12105 __ dci(0x5ea0d029); // sqdmull d9, s1, s0
12106 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12108 __ movi(q9.V16B(), 0x55);
12109 __ dci(0x5f80b029); // sqdmull d9, s1, v0.s[0]
12110 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12112 __ movi(q9.V16B(), 0x55);
12113 __ dci(0x7ee07809); // sqneg d9, d0
12114 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12116 __ movi(q9.V16B(), 0x55);
12117 __ dci(0x7ec08429); // sqrdmlah d9, d1, d0
12118 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12120 __ movi(q9.V16B(), 0x55);
12121 __ dci(0x7ec08c29); // sqrdmlsh d9, d1, d0
12122 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12124 __ movi(q9.V16B(), 0x55);
12125 __ dci(0x5ee05c29); // sqrshl d9, d1, d0
12126 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12128 __ movi(q9.V16B(), 0x55);
12129 __ dci(0x5ee04c29); // sqshl d9, d1, d0
12130 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12132 __ movi(q9.V16B(), 0x55);
12133 __ dci(0x5f407409); // sqshl d9, d0, #0
12134 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12136 __ movi(q9.V16B(), 0x55);
12137 __ dci(0x7f406409); // sqshlu d9, d0, #0
12138 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12140 __ movi(q9.V16B(), 0x55);
12141 __ dci(0x5ee02c29); // sqsub d9, d1, d0
12142 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12144 __ movi(q9.V16B(), 0x55);
12145 __ dci(0x7f404409); // sri d9, d0, #64
12146 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12148 __ movi(q9.V16B(), 0x55);
12149 __ dci(0x5ee05429); // srshl d9, d1, d0
12150 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12152 __ movi(q9.V16B(), 0x55);
12153 __ dci(0x5f402409); // srshr d9, d0, #64
12154 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12156 __ movi(q9.V16B(), 0x55);
12157 __ dci(0x5f403409); // srsra d9, d0, #64
12158 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12160 __ movi(q9.V16B(), 0x55);
12161 __ dci(0x5ee04429); // sshl d9, d1, d0
12162 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12164 __ movi(q9.V16B(), 0x55);
12165 __ dci(0x5f400409); // sshr d9, d0, #64
12166 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12168 __ movi(q9.V16B(), 0x55);
12169 __ dci(0x5f401409); // ssra d9, d0, #64
12170 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12172 __ movi(q9.V16B(), 0x55);
12173 __ dci(0x7ee08429); // sub d9, d1, d0
12174 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12176 __ movi(q9.V16B(), 0x55);
12177 __ dci(0x5ee03809); // suqadd d9, d0
12178 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12180 __ movi(q9.V16B(), 0x55);
12181 __ dci(0x7e61d809); // ucvtf d9, d0
12182 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12184 __ movi(q9.V16B(), 0x55);
12185 __ dci(0x7f40e409); // ucvtf d9, d0, #64
12186 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12188 __ movi(q9.V16B(), 0x55);
12189 __ dci(0x7ee00c29); // uqadd d9, d1, d0
12190 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12192 __ movi(q9.V16B(), 0x55);
12193 __ dci(0x7ee05c29); // uqrshl d9, d1, d0
12194 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12196 __ movi(q9.V16B(), 0x55);
12197 __ dci(0x7ee04c29); // uqshl d9, d1, d0
12198 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12200 __ movi(q9.V16B(), 0x55);
12201 __ dci(0x7f407409); // uqshl d9, d0, #0
12202 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12204 __ movi(q9.V16B(), 0x55);
12205 __ dci(0x7ee02c29); // uqsub d9, d1, d0
12206 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12208 __ movi(q9.V16B(), 0x55);
12209 __ dci(0x7ee05429); // urshl d9, d1, d0
12210 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12212 __ movi(q9.V16B(), 0x55);
12213 __ dci(0x7f402409); // urshr d9, d0, #64
12214 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12216 __ movi(q9.V16B(), 0x55);
12217 __ dci(0x7f403409); // ursra d9, d0, #64
12218 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12220 __ movi(q9.V16B(), 0x55);
12221 __ dci(0x7ee04429); // ushl d9, d1, d0
12222 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12224 __ movi(q9.V16B(), 0x55);
12225 __ dci(0x7f400409); // ushr d9, d0, #64
12226 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12228 __ movi(q9.V16B(), 0x55);
12229 __ dci(0x7ee03809); // usqadd d9, d0
12230 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12232 __ movi(q9.V16B(), 0x55);
12233 __ dci(0x7f401409); // usra d9, d0, #64
12234 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12236 __ Sub(x1, x1, 1);
12237 __ Cbnz(x1, &loop);
12239 __ Ins(q30.V2D(), 0, xzr);