Lines Matching refs:__

38 #define __ assm.
108 __ movprfx(z0.VnB(), p0.Merging(), z9.VnB());
109 __ abs(z0.VnB(), p0.Merging(), z0.VnB());
111 __ movprfx(z1, z17);
112 __ add(z1.VnH(), p2.Merging(), z1.VnH(), z1.VnH());
114 __ movprfx(z12, z13);
115 __ and_(z12.VnD(), p5.Merging(), z12.VnD(), z12.VnD());
117 __ movprfx(z2, z4);
118 __ asr(z2.VnS(), p2.Merging(), z2.VnS(), z2.VnS());
120 __ movprfx(z10, z18);
121 __ asr(z10.VnH(), p2.Merging(), z10.VnH(), z10.VnD());
123 __ movprfx(z17.VnD(), p5.Zeroing(), z20.VnD());
124 __ asr(z17.VnD(), p5.Merging(), z17.VnD(), z17.VnD());
126 __ movprfx(z22, z9);
127 __ asrr(z22.VnH(), p1.Merging(), z22.VnH(), z22.VnH());
129 __ movprfx(z0.VnS(), p6.Zeroing(), z6.VnS());
130 __ bic(z0.VnS(), p6.Merging(), z0.VnS(), z0.VnS());
132 __ movprfx(z12, z16);
133 __ clasta(z12.VnD(), p5, z12.VnD(), z12.VnD());
135 __ movprfx(z7, z15);
136 __ clastb(z7.VnS(), p7, z7.VnS(), z7.VnS());
138 __ movprfx(z10, z29);
139 __ cls(z10.VnH(), p2.Merging(), z10.VnH());
141 __ movprfx(z6, z13);
142 __ clz(z6.VnB(), p4.Merging(), z6.VnB());
144 __ movprfx(z14.VnS(), p6.Zeroing(), z3.VnS());
145 __ cnot(z14.VnS(), p6.Merging(), z14.VnS());
147 __ movprfx(z5.VnD(), p6.Merging(), z4.VnD());
148 __ cnt(z5.VnD(), p6.Merging(), z5.VnD());
150 __ movprfx(z19.VnB(), p6.Zeroing(), z4.VnB());
151 __ eor(z19.VnB(), p6.Merging(), z19.VnB(), z19.VnB());
153 __ movprfx(z27, z2);
154 __ ext(z27.VnB(), z27.VnB(), z27.VnB(), 42);
156 __ movprfx(z4.VnS(), p1.Zeroing(), z22.VnS());
157 __ lsl(z4.VnS(), p1.Merging(), z4.VnS(), z4.VnS());
159 __ movprfx(z4, z5);
160 __ lsl(z4.VnB(), p5.Merging(), z4.VnB(), z4.VnD());
162 __ movprfx(z11.VnD(), p4.Merging(), z29.VnD());
163 __ lsl(z11.VnD(), p4.Merging(), z11.VnD(), z11.VnD());
165 __ movprfx(z12.VnD(), p6.Merging(), z3.VnD());
166 __ lslr(z12.VnD(), p6.Merging(), z12.VnD(), z12.VnD());
168 __ movprfx(z7, z2);
169 __ lsr(z7.VnB(), p4.Merging(), z7.VnB(), z7.VnB());
171 __ movprfx(z25.VnH(), p6.Merging(), z28.VnH());
172 __ lsr(z25.VnH(), p6.Merging(), z25.VnH(), z25.VnD());
174 __ movprfx(z14.VnD(), p6.Merging(), z6.VnD());
175 __ lsr(z14.VnD(), p6.Merging(), z14.VnD(), z14.VnD());
177 __ movprfx(z26.VnH(), p6.Zeroing(), z27.VnH());
178 __ lsrr(z26.VnH(), p6.Merging(), z26.VnH(), z26.VnH());
180 __ movprfx(z17.VnS(), p4.Zeroing(), z29.VnS());
181 __ mad(z17.VnS(), p4.Merging(), z17.VnS(), z23.VnS());
183 __ movprfx(z7, z17);
184 __ mad(z7.VnD(), p5.Merging(), z4.VnD(), z7.VnD());
186 __ movprfx(z11, z7);
187 __ mla(z11.VnS(), p1.Merging(), z11.VnS(), z27.VnS());
189 __ movprfx(z7, z5);
190 __ mla(z7.VnH(), p0.Merging(), z5.VnH(), z7.VnH());
192 __ movprfx(z1.VnH(), p0.Merging(), z17.VnH());
193 __ mls(z1.VnH(), p0.Merging(), z1.VnH(), z31.VnH());
195 __ movprfx(z22.VnB(), p3.Merging(), z18.VnB());
196 __ mls(z22.VnB(), p3.Merging(), z18.VnB(), z22.VnB());
198 __ movprfx(z7.VnS(), p0.Merging(), z10.VnS());
199 __ msb(z7.VnS(), p0.Merging(), z7.VnS(), z10.VnS());
201 __ movprfx(z12, z6);
202 __ msb(z12.VnH(), p7.Merging(), z6.VnH(), z12.VnH());
204 __ movprfx(z8.VnB(), p4.Merging(), z3.VnB());
205 __ mul(z8.VnB(), p4.Merging(), z8.VnB(), z8.VnB());
207 __ movprfx(z9, z26);
208 __ neg(z9.VnS(), p7.Merging(), z9.VnS());
210 __ movprfx(z16, z8);
211 __ not_(z16.VnH(), p6.Merging(), z16.VnH());
213 __ movprfx(z25.VnH(), p5.Zeroing(), z11.VnH());
214 __ orr(z25.VnH(), p5.Merging(), z25.VnH(), z25.VnH());
216 __ movprfx(z17.VnH(), p1.Merging(), z22.VnH());
217 __ rbit(z17.VnH(), p1.Merging(), z17.VnH());
219 __ movprfx(z11, z25);
220 __ revb(z11.VnD(), p6.Merging(), z11.VnD());
222 __ movprfx(z13, z27);
223 __ revh(z13.VnS(), p2.Merging(), z13.VnS());
225 __ movprfx(z30.VnD(), p6.Merging(), z20.VnD());
226 __ revw(z30.VnD(), p6.Merging(), z30.VnD());
228 __ movprfx(z2.VnD(), p2.Merging(), z21.VnD());
229 __ sabd(z2.VnD(), p2.Merging(), z2.VnD(), z2.VnD());
231 __ movprfx(z0, z7);
232 __ sdiv(z0.VnD(), p0.Merging(), z0.VnD(), z0.VnD());
234 __ movprfx(z19, z28);
235 __ sdivr(z19.VnS(), p1.Merging(), z19.VnS(), z19.VnS());
237 __ movprfx(z5, z18);
238 __ sdot(z5.VnS(), z18.VnB(), z5.VnB(), 1);
240 __ movprfx(z15, z11);
241 __ sdot(z15.VnD(), z2.VnH(), z15.VnH(), 1);
243 __ movprfx(z30, z13);
244 __ sdot(z30.VnD(), z30.VnH(), z13.VnH(), 1);
246 __ movprfx(z8, z9);
247 __ sdot(z8.VnS(), z8.VnB(), z9.VnB());
249 __ movprfx(z23, z14);
250 __ sdot(z23.VnS(), z14.VnB(), z23.VnB());
252 __ movprfx(z26, z5);
253 __ sdot(z26.VnS(), z26.VnB(), z5.VnB(), 1);
255 __ movprfx(z14, z15);
256 __ smax(z14.VnB(), p2.Merging(), z14.VnB(), z14.VnB());
258 __ movprfx(z26.VnS(), p0.Merging(), z10.VnS());
259 __ smin(z26.VnS(), p0.Merging(), z26.VnS(), z26.VnS());
261 __ movprfx(z22, z18);
262 __ smulh(z22.VnB(), p2.Merging(), z22.VnB(), z22.VnB());
264 __ movprfx(z8, z19);
265 __ splice(z8.VnD(), p2, z8.VnD(), z8.VnD());
267 __ movprfx(z23.VnH(), p6.Zeroing(), z2.VnH());
268 __ sub(z23.VnH(), p6.Merging(), z23.VnH(), z23.VnH());
270 __ movprfx(z25.VnS(), p2.Merging(), z21.VnS());
271 __ subr(z25.VnS(), p2.Merging(), z25.VnS(), z25.VnS());
273 __ movprfx(z28, z31);
274 __ sxtb(z28.VnS(), p6.Merging(), z28.VnS());
276 __ movprfx(z14.VnD(), p6.Merging(), z17.VnD());
277 __ sxth(z14.VnD(), p6.Merging(), z14.VnD());
279 __ movprfx(z21.VnD(), p0.Zeroing(), z28.VnD());
280 __ sxtw(z21.VnD(), p0.Merging(), z21.VnD());
282 __ movprfx(z25, z30);
283 __ uabd(z25.VnB(), p5.Merging(), z25.VnB(), z25.VnB());
285 __ movprfx(z13.VnD(), p2.Merging(), z30.VnD());
286 __ udiv(z13.VnD(), p2.Merging(), z13.VnD(), z13.VnD());
288 __ movprfx(z19.VnD(), p4.Zeroing(), z6.VnD());
289 __ udivr(z19.VnD(), p4.Merging(), z19.VnD(), z19.VnD());
291 __ movprfx(z1, z20);
292 __ udot(z1.VnS(), z18.VnB(), z1.VnB(), 1);
294 __ movprfx(z8, z2);
295 __ udot(z8.VnD(), z2.VnH(), z8.VnH(), 1);
297 __ movprfx(z28, z10);
298 __ udot(z28.VnD(), z28.VnH(), z7.VnH(), 1);
300 __ movprfx(z21, z11);
301 __ udot(z21.VnD(), z21.VnH(), z11.VnH());
303 __ movprfx(z1, z22);
304 __ udot(z1.VnD(), z10.VnH(), z1.VnH());
306 __ movprfx(z8, z23);
307 __ udot(z8.VnS(), z8.VnB(), z0.VnB(), 1);
309 __ movprfx(z10.VnB(), p5.Zeroing(), z0.VnB());
310 __ umax(z10.VnB(), p5.Merging(), z10.VnB(), z10.VnB());
312 __ movprfx(z0.VnS(), p2.Zeroing(), z30.VnS());
313 __ umin(z0.VnS(), p2.Merging(), z0.VnS(), z0.VnS());
315 __ movprfx(z26.VnD(), p6.Zeroing(), z29.VnD());
316 __ umulh(z26.VnD(), p6.Merging(), z26.VnD(), z26.VnD());
318 __ movprfx(z23, z25);
319 __ uxtb(z23.VnS(), p7.Merging(), z23.VnS());
321 __ movprfx(z14.VnS(), p3.Zeroing(), z5.VnS());
322 __ uxth(z14.VnS(), p3.Merging(), z14.VnS());
324 __ movprfx(z14, z5);
325 __ uxtw(z14.VnD(), p3.Merging(), z14.VnD());
327 __ movprfx(z22, z5);
328 __ smmla(z22.VnS(), z22.VnB(), z0.VnB());
330 __ movprfx(z1, z5);
331 __ ummla(z1.VnS(), z10.VnB(), z1.VnB());
333 __ movprfx(z30, z5);
334 __ usmmla(z30.VnS(), z30.VnB(), z18.VnB());
336 __ movprfx(z4, z5);
337 __ usdot(z4.VnS(), z3.VnB(), z4.VnB());
339 __ movprfx(z10, z5);
340 __ usdot(z10.VnS(), z10.VnB(), z0.VnB(), 0);
342 __ movprfx(z1, z5);
343 __ sudot(z1.VnS(), z10.VnB(), z1.VnB(), 1);
363 __ movprfx(z17.VnS(), p1.Zeroing(), z12.VnS());
364 __ fabd(z17.VnS(), p1.Merging(), z17.VnS(), z17.VnS());
366 __ movprfx(z13, z23);
367 __ fabs(z13.VnS(), p4.Merging(), z13.VnS());
369 __ movprfx(z24.VnS(), p5.Merging(), z15.VnS());
370 __ fadd(z24.VnS(), p5.Merging(), z24.VnS(), z24.VnS());
372 __ movprfx(z28.VnD(), p5.Zeroing(), z14.VnD());
373 __ fcadd(z28.VnD(), p5.Merging(), z28.VnD(), z28.VnD(), 90);
375 __ movprfx(z5, z0);
376 __ fcmla(z5.VnH(), z0.VnH(), z5.VnH(), 2, 180);
378 __ movprfx(z10, z4);
379 __ fcmla(z10.VnS(), z8.VnS(), z10.VnS(), 1, 270);
381 __ movprfx(z12, z26);
382 __ fcmla(z12.VnH(), z12.VnH(), z3.VnH(), 2, 180);
384 __ movprfx(z8, z1);
385 __ fcmla(z8.VnS(), z8.VnS(), z1.VnS(), 1, 270);
387 __ movprfx(z16.VnD(), p0.Merging(), z13.VnD());
388 __ fcvt(z16.VnD(), p0.Merging(), z16.VnH());
390 __ movprfx(z12.VnD(), p7.Zeroing(), z13.VnD());
391 __ fcvt(z12.VnD(), p7.Merging(), z12.VnS());
393 __ movprfx(z14, z26);
394 __ fcvt(z14.VnS(), p5.Merging(), z14.VnD());
396 __ movprfx(z26, z2);
397 __ fcvt(z26.VnH(), p7.Merging(), z26.VnD());
399 __ movprfx(z25.VnD(), p2.Merging(), z13.VnD());
400 __ fcvtzs(z25.VnD(), p2.Merging(), z25.VnH());
402 __ movprfx(z31, z2);
403 __ fcvtzs(z31.VnH(), p7.Merging(), z31.VnH());
405 __ movprfx(z21.VnD(), p1.Merging(), z7.VnD());
406 __ fcvtzs(z21.VnD(), p1.Merging(), z21.VnS());
408 __ movprfx(z5, z17);
409 __ fcvtzs(z5.VnS(), p5.Merging(), z5.VnD());
411 __ movprfx(z19.VnD(), p1.Zeroing(), z16.VnD());
412 __ fcvtzu(z19.VnD(), p1.Merging(), z19.VnH());
414 __ movprfx(z2.VnH(), p7.Zeroing(), z28.VnH());
415 __ fcvtzu(z2.VnH(), p7.Merging(), z2.VnH());
417 __ movprfx(z21.VnD(), p7.Zeroing(), z27.VnD());
418 __ fcvtzu(z21.VnD(), p7.Merging(), z21.VnS());
420 __ movprfx(z22.VnD(), p4.Zeroing(), z8.VnD());
421 __ fcvtzu(z22.VnS(), p4.Merging(), z22.VnD());
423 __ movprfx(z0.VnS(), p5.Merging(), z5.VnS());
424 __ fdiv(z0.VnS(), p5.Merging(), z0.VnS(), z0.VnS());
426 __ movprfx(z12, z24);
427 __ fdivr(z12.VnD(), p7.Merging(), z12.VnD(), z12.VnD());
429 __ movprfx(z14.VnD(), p6.Zeroing(), z21.VnD());
430 __ fmad(z14.VnD(), p6.Merging(), z14.VnD(), z3.VnD());
432 __ movprfx(z2.VnS(), p5.Zeroing(), z10.VnS());
433 __ fmad(z2.VnS(), p5.Merging(), z14.VnS(), z2.VnS());
435 __ movprfx(z24, z5);
436 __ fmax(z24.VnS(), p1.Merging(), z24.VnS(), z24.VnS());
438 __ movprfx(z15.VnD(), p2.Merging(), z26.VnD());
439 __ fmaxnm(z15.VnD(), p2.Merging(), z15.VnD(), z15.VnD());
441 __ movprfx(z20, z22);
442 __ fmin(z20.VnH(), p0.Merging(), z20.VnH(), z20.VnH());
444 __ movprfx(z24.VnS(), p6.Zeroing(), z30.VnS());
445 __ fminnm(z24.VnS(), p6.Merging(), z24.VnS(), z24.VnS());
447 __ movprfx(z4, z24);
448 __ fmla(z4.VnH(), z24.VnH(), z4.VnH(), 7);
450 __ movprfx(z4, z7);
451 __ fmla(z4.VnS(), z24.VnS(), z4.VnS(), 3);
453 __ movprfx(z5, z28);
454 __ fmla(z5.VnD(), z28.VnD(), z5.VnD(), 1);
456 __ movprfx(z24, z2);
457 __ fmla(z24.VnD(), z24.VnD(), z2.VnD(), 1);
459 __ movprfx(z7, z21);
460 __ fmla(z7.VnH(), p2.Merging(), z7.VnH(), z31.VnH());
462 __ movprfx(z25.VnH(), p5.Zeroing(), z29.VnH());
463 __ fmla(z25.VnH(), p5.Merging(), z29.VnH(), z25.VnH());
465 __ movprfx(z31, z25);
466 __ fmla(z31.VnH(), z31.VnH(), z2.VnH(), 7);
468 __ movprfx(z15, z4);
469 __ fmla(z15.VnS(), z15.VnS(), z4.VnS(), 3);
471 __ movprfx(z7, z11);
472 __ fmls(z7.VnH(), z11.VnH(), z7.VnH(), 4);
474 __ movprfx(z3, z10);
475 __ fmls(z3.VnS(), z10.VnS(), z3.VnS(), 3);
477 __ movprfx(z5, z16);
478 __ fmls(z5.VnD(), z16.VnD(), z5.VnD(), 1);
480 __ movprfx(z31, z26);
481 __ fmls(z31.VnD(), z31.VnD(), z8.VnD(), 1);
483 __ movprfx(z5.VnH(), p3.Merging(), z2.VnH());
484 __ fmls(z5.VnH(), p3.Merging(), z5.VnH(), z2.VnH());
486 __ movprfx(z22.VnS(), p3.Zeroing(), z17.VnS());
487 __ fmls(z22.VnS(), p3.Merging(), z21.VnS(), z22.VnS());
489 __ movprfx(z17, z2);
490 __ fmls(z17.VnH(), z17.VnH(), z2.VnH(), 4);
492 __ movprfx(z28, z11);
493 __ fmls(z28.VnS(), z28.VnS(), z0.VnS(), 3);
495 __ movprfx(z15.VnD(), p1.Merging(), z31.VnD());
496 __ fmsb(z15.VnD(), p1.Merging(), z15.VnD(), z31.VnD());
498 __ movprfx(z21.VnD(), p0.Zeroing(), z5.VnD());
499 __ fmsb(z21.VnD(), p0.Merging(), z19.VnD(), z21.VnD());
501 __ movprfx(z0.VnH(), p3.Merging(), z31.VnH());
502 __ fmul(z0.VnH(), p3.Merging(), z0.VnH(), z0.VnH());
504 __ movprfx(z31.VnH(), p6.Merging(), z8.VnH());
505 __ fmulx(z31.VnH(), p6.Merging(), z31.VnH(), z31.VnH());
507 __ movprfx(z17.VnH(), p1.Zeroing(), z10.VnH());
508 __ fneg(z17.VnH(), p1.Merging(), z17.VnH());
510 __ movprfx(z22, z31);
511 __ fnmad(z22.VnH(), p1.Merging(), z22.VnH(), z23.VnH());
513 __ movprfx(z14.VnD(), p0.Zeroing(), z26.VnD());
514 __ fnmad(z14.VnD(), p0.Merging(), z2.VnD(), z14.VnD());
516 __ movprfx(z13.VnH(), p6.Zeroing(), z29.VnH());
517 __ fnmla(z13.VnH(), p6.Merging(), z13.VnH(), z26.VnH());
519 __ movprfx(z19.VnH(), p7.Zeroing(), z25.VnH());
520 __ fnmla(z19.VnH(), p7.Merging(), z25.VnH(), z19.VnH());
522 __ movprfx(z27.VnH(), p5.Merging(), z24.VnH());
523 __ fnmls(z27.VnH(), p5.Merging(), z27.VnH(), z24.VnH());
525 __ movprfx(z6.VnH(), p6.Zeroing(), z21.VnH());
526 __ fnmls(z6.VnH(), p6.Merging(), z21.VnH(), z6.VnH());
528 __ movprfx(z7.VnS(), p3.Merging(), z23.VnS());
529 __ fnmsb(z7.VnS(), p3.Merging(), z7.VnS(), z23.VnS());
531 __ movprfx(z29.VnH(), p2.Zeroing(), z24.VnH());
532 __ fnmsb(z29.VnH(), p2.Merging(), z24.VnH(), z29.VnH());
534 __ movprfx(z7.VnH(), p6.Merging(), z23.VnH());
535 __ frecpx(z7.VnH(), p6.Merging(), z7.VnH());
537 __ movprfx(z17.VnS(), p5.Zeroing(), z2.VnS());
538 __ frinta(z17.VnS(), p5.Merging(), z17.VnS());
540 __ movprfx(z0.VnS(), p2.Zeroing(), z7.VnS());
541 __ frinti(z0.VnS(), p2.Merging(), z0.VnS());
543 __ movprfx(z8.VnH(), p3.Merging(), z20.VnH());
544 __ frintm(z8.VnH(), p3.Merging(), z8.VnH());
546 __ movprfx(z3.VnD(), p2.Zeroing(), z20.VnD());
547 __ frintn(z3.VnD(), p2.Merging(), z3.VnD());
549 __ movprfx(z11, z3);
550 __ frintp(z11.VnS(), p4.Merging(), z11.VnS());
552 __ movprfx(z23, z29);
553 __ frintx(z23.VnD(), p4.Merging(), z23.VnD());
555 __ movprfx(z4.VnH(), p4.Zeroing(), z14.VnH());
556 __ frintz(z4.VnH(), p4.Merging(), z4.VnH());
558 __ movprfx(z18.VnH(), p3.Zeroing(), z0.VnH());
559 __ fscale(z18.VnH(), p3.Merging(), z18.VnH(), z18.VnH());
561 __ movprfx(z2.VnS(), p6.Zeroing(), z4.VnS());
562 __ fsqrt(z2.VnS(), p6.Merging(), z2.VnS());
564 __ movprfx(z14.VnD(), p4.Zeroing(), z31.VnD());
565 __ fsub(z14.VnD(), p4.Merging(), z14.VnD(), z14.VnD());
567 __ movprfx(z31.VnH(), p2.Merging(), z6.VnH());
568 __ fsubr(z31.VnH(), p2.Merging(), z31.VnH(), z31.VnH());
570 __ movprfx(z4, z30);
571 __ ftmad(z4.VnH(), z4.VnH(), z4.VnH(), 2);
573 __ movprfx(z25.VnD(), p6.Zeroing(), z2.VnD());
574 __ scvtf(z25.VnD(), p6.Merging(), z25.VnS());
576 __ movprfx(z0.VnD(), p3.Merging(), z16.VnD());
577 __ scvtf(z0.VnD(), p3.Merging(), z0.VnD());
579 __ movprfx(z19, z23);
580 __ scvtf(z19.VnS(), p7.Merging(), z19.VnD());
582 __ movprfx(z19, z4);
583 __ scvtf(z19.VnH(), p4.Merging(), z19.VnD());
585 __ movprfx(z13.VnD(), p4.Zeroing(), z6.VnD());
586 __ ucvtf(z13.VnD(), p4.Merging(), z13.VnS());
588 __ movprfx(z6.VnH(), p0.Zeroing(), z14.VnH());
589 __ ucvtf(z6.VnH(), p0.Merging(), z6.VnH());
591 __ movprfx(z19.VnS(), p4.Merging(), z12.VnS());
592 __ ucvtf(z19.VnH(), p4.Merging(), z19.VnS());
594 __ movprfx(z0.VnD(), p5.Zeroing(), z12.VnD());
595 __ ucvtf(z0.VnH(), p5.Merging(), z0.VnD());
597 __ movprfx(z30, z5);
598 __ fmmla(z30.VnS(), z30.VnS(), z18.VnS());
600 __ movprfx(z31, z5);
601 __ fmmla(z31.VnD(), z31.VnD(), z18.VnD());
617 __ movprfx(z26, z11);
618 __ add(z26.VnB(), z11.VnB(), z4.VnB());
621 __ movprfx(z29.VnB(), p3.Zeroing(), z7.VnB());
622 __ cpy(z29.VnB(), p3.Zeroing(), -42);
625 __ movprfx(z13, z15);
626 __ frecpe(z13.VnD(), z26.VnD());
628 __ movprfx(z19, z1);
629 __ frecps(z19.VnD(), z1.VnD(), z12.VnD());
631 __ movprfx(z6, z12);
632 __ frsqrte(z6.VnS(), z12.VnS());
634 __ movprfx(z29, z5);
635 __ frsqrts(z29.VnH(), z5.VnH(), z20.VnH());
638 __ movprfx(z1, z31);
639 __ ftsmul(z1.VnD(), z31.VnD(), z16.VnD());
641 __ movprfx(z8, z27);
642 __ ftssel(z8.VnH(), z27.VnH(), z1.VnH());
646 __ movprfx(z0, z18);
647 __ mov(z0.VnS(), p6.Merging(), z18.VnS());
650 __ movprfx(z12.VnS(), p2.Merging(), z11.VnS());
651 __ mov(z12.VnS(), p2.Zeroing(), -42);
653 __ movprfx(z13, z6);
654 __ movprfx(z13, z2);
657 __ movprfx(z3.VnD(), p5.Zeroing(), z8.VnD());
658 __ movprfx(z3.VnD(), p5.Merging(), z8.VnD());
660 __ movprfx(z1.VnD(), p3.Zeroing(), z14.VnD());
661 __ movprfx(z1.VnD(), p3.Zeroing(), z18.VnD());
679 __ movprfx(z0.VnH(), p2.Zeroing(), z17.VnH());
680 __ abs(z0.VnS(), p2.Merging(), z17.VnS());
682 __ movprfx(z10.VnD(), p0.Zeroing(), z4.VnD());
683 __ add(z10.VnS(), p0.Merging(), z10.VnS(), z2.VnS());
685 __ movprfx(z25.VnS(), p4.Zeroing(), z26.VnS());
686 __ and_(z25.VnB(), p4.Merging(), z25.VnB(), z27.VnB());
688 __ movprfx(z26.VnD(), p5.Merging(), z23.VnD());
689 __ asr(z26.VnB(), p5.Merging(), z26.VnB(), 3);
691 __ movprfx(z25.VnS(), p7.Zeroing(), z14.VnS());
692 __ asr(z25.VnH(), p7.Merging(), z25.VnH(), z14.VnH());
694 __ movprfx(z12.VnS(), p7.Zeroing(), z23.VnS());
695 __ asr(z12.VnH(), p7.Merging(), z12.VnH(), z23.VnD());
697 __ movprfx(z3.VnH(), p4.Zeroing(), z18.VnH());
698 __ asr(z3.VnD(), p4.Merging(), z3.VnD(), z15.VnD());
700 __ movprfx(z29.VnH(), p4.Merging(), z31.VnH());
701 __ asrd(z29.VnB(), p4.Merging(), z29.VnB(), 3);
703 __ movprfx(z31.VnH(), p5.Zeroing(), z14.VnH());
704 __ asrr(z31.VnB(), p5.Merging(), z31.VnB(), z5.VnB());
706 __ movprfx(z0.VnS(), p6.Zeroing(), z18.VnS());
707 __ bic(z0.VnB(), p6.Merging(), z0.VnB(), z23.VnB());
709 __ movprfx(z19.VnH(), p2.Zeroing(), z24.VnH());
710 __ cls(z19.VnB(), p2.Merging(), z24.VnB());
712 __ movprfx(z14.VnS(), p5.Zeroing(), z4.VnS());
713 __ clz(z14.VnD(), p5.Merging(), z10.VnD());
715 __ movprfx(z0.VnD(), p5.Merging(), z2.VnD());
716 __ cnot(z0.VnH(), p5.Merging(), z2.VnH());
718 __ movprfx(z0.VnB(), p3.Zeroing(), z19.VnB());
719 __ cnt(z0.VnH(), p3.Merging(), z8.VnH());
721 __ movprfx(z29.VnS(), p0.Merging(), z7.VnS());
722 __ cpy(z29.VnD(), p0.Merging(), -42);
724 __ movprfx(z13.VnB(), p2.Merging(), z31.VnB());
725 __ cpy(z13.VnS(), p2.Merging(), w13);
727 __ movprfx(z0.VnS(), p3.Merging(), z15.VnS());
728 __ cpy(z0.VnH(), p3.Merging(), h0);
730 __ movprfx(z2.VnD(), p6.Zeroing(), z26.VnD());
731 __ eor(z2.VnB(), p6.Merging(), z2.VnB(), z26.VnB());
733 __ movprfx(z7.VnS(), p7.Zeroing(), z30.VnS());
734 __ lsl(z7.VnD(), p7.Merging(), z7.VnD(), 3);
736 __ movprfx(z11.VnH(), p3.Merging(), z23.VnH());
737 __ lsl(z11.VnB(), p3.Merging(), z11.VnB(), z21.VnB());
739 __ movprfx(z31.VnS(), p7.Zeroing(), z21.VnS());
740 __ lsl(z31.VnH(), p7.Merging(), z31.VnH(), z21.VnD());
742 __ movprfx(z26.VnH(), p0.Merging(), z0.VnH());
743 __ lsl(z26.VnD(), p0.Merging(), z26.VnD(), z24.VnD());
745 __ movprfx(z1.VnS(), p2.Zeroing(), z6.VnS());
746 __ lslr(z1.VnB(), p2.Merging(), z1.VnB(), z6.VnB());
748 __ movprfx(z4.VnD(), p4.Zeroing(), z6.VnD());
749 __ lsr(z4.VnH(), p4.Merging(), z4.VnH(), 3);
751 __ movprfx(z27.VnH(), p0.Zeroing(), z29.VnH());
752 __ lsr(z27.VnS(), p0.Merging(), z27.VnS(), z29.VnS());
754 __ movprfx(z5.VnD(), p2.Zeroing(), z16.VnD());
755 __ lsr(z5.VnH(), p2.Merging(), z5.VnH(), z2.VnD());
757 __ movprfx(z27.VnB(), p4.Zeroing(), z5.VnB());
758 __ lsr(z27.VnD(), p4.Merging(), z27.VnD(), z5.VnD());
760 __ movprfx(z27.VnS(), p3.Merging(), z13.VnS());
761 __ lsrr(z27.VnD(), p3.Merging(), z27.VnD(), z13.VnD());
763 __ movprfx(z30.VnS(), p2.Zeroing(), z14.VnS());
764 __ mad(z30.VnB(), p2.Merging(), z20.VnB(), z14.VnB());
766 __ movprfx(z14.VnB(), p6.Merging(), z11.VnB());
767 __ mla(z14.VnD(), p6.Merging(), z28.VnD(), z11.VnD());
769 __ movprfx(z28.VnH(), p2.Zeroing(), z22.VnH());
770 __ mls(z28.VnS(), p2.Merging(), z3.VnS(), z22.VnS());
773 __ movprfx(z18.VnH(), p6.Zeroing(), z25.VnH());
774 __ mov(z18.VnD(), p6.Merging(), -42);
776 __ movprfx(z22.VnD(), p2.Zeroing(), z6.VnD());
777 __ mov(z22.VnS(), p2.Merging(), w22);
779 __ movprfx(z3.VnH(), p0.Zeroing(), z13.VnH());
780 __ mov(z3.VnB(), p0.Merging(), b0);
782 __ movprfx(z31.VnS(), p7.Zeroing(), z12.VnS());
783 __ msb(z31.VnH(), p7.Merging(), z14.VnH(), z12.VnH());
785 __ movprfx(z16.VnS(), p7.Zeroing(), z6.VnS());
786 __ mul(z16.VnB(), p7.Merging(), z16.VnB(), z30.VnB());
788 __ movprfx(z17.VnD(), p7.Merging(), z1.VnD());
789 __ neg(z17.VnB(), p7.Merging(), z1.VnB());
791 __ movprfx(z31.VnH(), p4.Zeroing(), z12.VnH());
792 __ not_(z31.VnB(), p4.Merging(), z12.VnB());
794 __ movprfx(z9.VnH(), p3.Zeroing(), z23.VnH());
795 __ orr(z9.VnS(), p3.Merging(), z9.VnS(), z13.VnS());
797 __ movprfx(z25.VnD(), p2.Zeroing(), z21.VnD());
798 __ rbit(z25.VnS(), p2.Merging(), z21.VnS());
800 __ movprfx(z26.VnH(), p3.Merging(), z13.VnH());
801 __ revb(z26.VnD(), p3.Merging(), z13.VnD());
803 __ movprfx(z8.VnH(), p5.Merging(), z20.VnH());
804 __ revh(z8.VnS(), p5.Merging(), z0.VnS());
806 __ movprfx(z22.VnH(), p6.Merging(), z15.VnH());
807 __ revw(z22.VnD(), p6.Merging(), z10.VnD());
809 __ movprfx(z1.VnD(), p3.Merging(), z15.VnD());
810 __ sabd(z1.VnB(), p3.Merging(), z1.VnB(), z15.VnB());
812 __ movprfx(z25.VnD(), p1.Zeroing(), z30.VnD());
813 __ sdiv(z25.VnS(), p1.Merging(), z25.VnS(), z30.VnS());
815 __ movprfx(z19.VnS(), p3.Zeroing(), z11.VnS());
816 __ sdivr(z19.VnD(), p3.Merging(), z19.VnD(), z24.VnD());
818 __ movprfx(z12.VnH(), p2.Merging(), z2.VnH());
819 __ smax(z12.VnS(), p2.Merging(), z12.VnS(), z24.VnS());
821 __ movprfx(z3.VnD(), p1.Merging(), z15.VnD());
822 __ smin(z3.VnS(), p1.Merging(), z3.VnS(), z20.VnS());
824 __ movprfx(z13.VnS(), p5.Merging(), z22.VnS());
825 __ smulh(z13.VnB(), p5.Merging(), z13.VnB(), z27.VnB());
827 __ movprfx(z11.VnH(), p5.Zeroing(), z25.VnH());
828 __ sub(z11.VnB(), p5.Merging(), z11.VnB(), z7.VnB());
830 __ movprfx(z3.VnB(), p6.Merging(), z13.VnB());
831 __ subr(z3.VnS(), p6.Merging(), z3.VnS(), z13.VnS());
833 __ movprfx(z26.VnH(), p5.Merging(), z1.VnH());
834 __ sxtb(z26.VnS(), p5.Merging(), z17.VnS());
836 __ movprfx(z11.VnB(), p7.Zeroing(), z26.VnB());
837 __ sxth(z11.VnS(), p7.Merging(), z26.VnS());
839 __ movprfx(z1.VnS(), p2.Merging(), z21.VnS());
840 __ sxtw(z1.VnD(), p2.Merging(), z21.VnD());
842 __ movprfx(z4.VnS(), p6.Zeroing(), z6.VnS());
843 __ uabd(z4.VnH(), p6.Merging(), z4.VnH(), z6.VnH());
845 __ movprfx(z26.VnB(), p2.Zeroing(), z11.VnB());
846 __ udiv(z26.VnD(), p2.Merging(), z26.VnD(), z11.VnD());
848 __ movprfx(z19.VnB(), p5.Merging(), z6.VnB());
849 __ udivr(z19.VnS(), p5.Merging(), z19.VnS(), z9.VnS());
851 __ movprfx(z16.VnB(), p4.Merging(), z6.VnB());
852 __ umax(z16.VnH(), p4.Merging(), z16.VnH(), z6.VnH());
854 __ movprfx(z1.VnD(), p0.Zeroing(), z4.VnD());
855 __ umin(z1.VnS(), p0.Merging(), z1.VnS(), z28.VnS());
857 __ movprfx(z25.VnD(), p7.Merging(), z4.VnD());
858 __ umulh(z25.VnB(), p7.Merging(), z25.VnB(), z16.VnB());
860 __ movprfx(z29.VnB(), p4.Merging(), z2.VnB());
861 __ uxtb(z29.VnS(), p4.Merging(), z31.VnS());
863 __ movprfx(z27.VnH(), p5.Merging(), z21.VnH());
864 __ uxth(z27.VnD(), p5.Merging(), z1.VnD());
866 __ movprfx(z29.VnB(), p2.Merging(), z7.VnB());
867 __ uxtw(z29.VnD(), p2.Merging(), z7.VnD());
885 __ movprfx(z29.VnD(), p5.Zeroing(), z8.VnD());
886 __ fabd(z29.VnS(), p5.Merging(), z29.VnS(), z26.VnS());
888 __ movprfx(z9.VnB(), p0.Zeroing(), z1.VnB());
889 __ fabs(z9.VnS(), p0.Merging(), z15.VnS());
891 __ movprfx(z24.VnD(), p0.Zeroing(), z8.VnD());
892 __ fadd(z24.VnH(), p0.Merging(), z24.VnH(), 0.5);
894 __ movprfx(z24.VnB(), p1.Zeroing(), z27.VnB());
895 __ fadd(z24.VnH(), p1.Merging(), z24.VnH(), z27.VnH());
897 __ movprfx(z14.VnH(), p7.Merging(), z12.VnH());
898 __ fcadd(z14.VnD(), p7.Merging(), z14.VnD(), z12.VnD(), 90);
900 __ movprfx(z10.VnB(), p6.Merging(), z11.VnB());
901 __ fcpy(z10.VnH(), p6.Merging(), 1.25);
903 __ movprfx(z12.VnB(), p6.Merging(), z18.VnB());
904 __ fcvt(z12.VnD(), p6.Merging(), z18.VnH());
906 __ movprfx(z18.VnH(), p7.Zeroing(), z2.VnH());
907 __ fcvt(z18.VnD(), p7.Merging(), z0.VnS());
909 __ movprfx(z3.VnH(), p5.Merging(), z14.VnH());
910 __ fcvt(z3.VnS(), p5.Merging(), z21.VnD());
912 __ movprfx(z15.VnH(), p1.Zeroing(), z12.VnH());
913 __ fcvt(z15.VnH(), p1.Merging(), z12.VnD());
915 __ movprfx(z3.VnH(), p2.Merging(), z22.VnH());
916 __ fcvtzs(z3.VnD(), p2.Merging(), z7.VnH());
918 __ movprfx(z17.VnS(), p3.Merging(), z14.VnS());
919 __ fcvtzs(z17.VnD(), p3.Merging(), z14.VnD());
921 __ movprfx(z2.VnH(), p1.Zeroing(), z16.VnH());
922 __ fcvtzs(z2.VnS(), p1.Merging(), z31.VnH());
924 __ movprfx(z13.VnB(), p2.Merging(), z9.VnB());
925 __ fcvtzs(z13.VnS(), p2.Merging(), z23.VnD());
927 __ movprfx(z19.VnB(), p1.Merging(), z4.VnB());
928 __ fcvtzu(z19.VnD(), p1.Merging(), z14.VnH());
930 __ movprfx(z29.VnS(), p2.Merging(), z19.VnS());
931 __ fcvtzu(z29.VnD(), p2.Merging(), z19.VnD());
933 __ movprfx(z21.VnS(), p4.Zeroing(), z17.VnS());
934 __ fcvtzu(z21.VnD(), p4.Merging(), z17.VnS());
936 __ movprfx(z19.VnH(), p4.Zeroing(), z30.VnH());
937 __ fcvtzu(z19.VnS(), p4.Merging(), z16.VnD());
939 __ movprfx(z10.VnS(), p7.Zeroing(), z27.VnS());
940 __ fdiv(z10.VnH(), p7.Merging(), z10.VnH(), z27.VnH());
942 __ movprfx(z7.VnD(), p7.Zeroing(), z17.VnD());
943 __ fdivr(z7.VnH(), p7.Merging(), z7.VnH(), z28.VnH());
945 __ movprfx(z22.VnB(), p0.Merging(), z27.VnB());
946 __ fmad(z22.VnH(), p0.Merging(), z27.VnH(), z15.VnH());
948 __ movprfx(z14.VnD(), p1.Zeroing(), z11.VnD());
949 __ fmax(z14.VnS(), p1.Merging(), z14.VnS(), 0.0);
951 __ movprfx(z27.VnB(), p5.Merging(), z14.VnB());
952 __ fmax(z27.VnD(), p5.Merging(), z27.VnD(), z14.VnD());
954 __ movprfx(z31.VnH(), p7.Merging(), z24.VnH());
955 __ fmaxnm(z31.VnD(), p7.Merging(), z31.VnD(), 0.0);
957 __ movprfx(z11.VnD(), p7.Zeroing(), z25.VnD());
958 __ fmaxnm(z11.VnS(), p7.Merging(), z11.VnS(), z28.VnS());
960 __ movprfx(z31.VnD(), p6.Merging(), z19.VnD());
961 __ fmin(z31.VnH(), p6.Merging(), z31.VnH(), 0.0);
963 __ movprfx(z20.VnS(), p3.Zeroing(), z15.VnS());
964 __ fmin(z20.VnH(), p3.Merging(), z20.VnH(), z8.VnH());
966 __ movprfx(z6.VnS(), p0.Merging(), z30.VnS());
967 __ fminnm(z6.VnH(), p0.Merging(), z6.VnH(), 0.0);
969 __ movprfx(z1.VnH(), p1.Zeroing(), z14.VnH());
970 __ fminnm(z1.VnS(), p1.Merging(), z1.VnS(), z14.VnS());
972 __ movprfx(z13.VnB(), p3.Zeroing(), z21.VnB());
973 __ fmla(z13.VnD(), p3.Merging(), z12.VnD(), z21.VnD());
975 __ movprfx(z15.VnS(), p1.Zeroing(), z20.VnS());
976 __ fmls(z15.VnH(), p1.Merging(), z28.VnH(), z20.VnH());
978 __ movprfx(z19.VnD(), p3.Zeroing(), z31.VnD());
979 __ fmov(z19.VnH(), p3.Merging(), 0.0);
981 __ movprfx(z16.VnS(), p7.Merging(), z30.VnS());
982 __ fmov(z16.VnH(), p7.Merging(), 2.5);
984 __ movprfx(z21.VnB(), p1.Merging(), z28.VnB());
985 __ fmsb(z21.VnH(), p1.Merging(), z30.VnH(), z28.VnH());
987 __ movprfx(z21.VnS(), p1.Zeroing(), z19.VnS());
988 __ fmul(z21.VnH(), p1.Merging(), z21.VnH(), 2.0);
990 __ movprfx(z28.VnB(), p7.Zeroing(), z8.VnB());
991 __ fmul(z28.VnS(), p7.Merging(), z28.VnS(), z26.VnS());
993 __ movprfx(z2.VnB(), p4.Merging(), z31.VnB());
994 __ fmulx(z2.VnH(), p4.Merging(), z2.VnH(), z31.VnH());
996 __ movprfx(z6.VnB(), p2.Zeroing(), z0.VnB());
997 __ fneg(z6.VnS(), p2.Merging(), z28.VnS());
999 __ movprfx(z26.VnB(), p0.Zeroing(), z21.VnB());
1000 __ fnmad(z26.VnH(), p0.Merging(), z21.VnH(), z18.VnH());
1002 __ movprfx(z15.VnB(), p1.Zeroing(), z26.VnB());
1003 __ fnmla(z15.VnH(), p1.Merging(), z26.VnH(), z18.VnH());
1005 __ movprfx(z16.VnS(), p0.Merging(), z1.VnS());
1006 __ fnmls(z16.VnD(), p0.Merging(), z1.VnD(), z13.VnD());
1008 __ movprfx(z4.VnH(), p0.Zeroing(), z16.VnH());
1009 __ fnmsb(z4.VnS(), p0.Merging(), z30.VnS(), z3.VnS());
1012 __ movprfx(z9.VnH(), p0.Zeroing(), z21.VnH());
1013 __ frecpx(z9.VnS(), p0.Merging(), z14.VnS());
1015 __ movprfx(z6.VnH(), p2.Zeroing(), z28.VnH());
1016 __ frinta(z6.VnD(), p2.Merging(), z28.VnD());
1018 __ movprfx(z12.VnS(), p4.Zeroing(), z7.VnS());
1019 __ frinti(z12.VnH(), p4.Merging(), z7.VnH());
1021 __ movprfx(z6.VnB(), p5.Merging(), z20.VnB());
1022 __ frintm(z6.VnD(), p5.Merging(), z20.VnD());
1024 __ movprfx(z7.VnB(), p6.Merging(), z19.VnB());
1025 __ frintn(z7.VnH(), p6.Merging(), z11.VnH());
1027 __ movprfx(z12.VnD(), p2.Merging(), z31.VnD());
1028 __ frintp(z12.VnS(), p2.Merging(), z31.VnS());
1030 __ movprfx(z1.VnS(), p5.Merging(), z10.VnS());
1031 __ frintx(z1.VnD(), p5.Merging(), z0.VnD());
1033 __ movprfx(z6.VnH(), p0.Merging(), z12.VnH());
1034 __ frintz(z6.VnS(), p0.Merging(), z7.VnS());
1036 __ movprfx(z8.VnH(), p2.Merging(), z6.VnH());
1037 __ fscale(z8.VnD(), p2.Merging(), z8.VnD(), z6.VnD());
1039 __ movprfx(z20.VnH(), p2.Zeroing(), z2.VnH());
1040 __ fsqrt(z20.VnD(), p2.Merging(), z15.VnD());
1042 __ movprfx(z28.VnS(), p6.Zeroing(), z19.VnS());
1043 __ fsub(z28.VnD(), p6.Merging(), z28.VnD(), 1.0);
1045 __ movprfx(z6.VnB(), p0.Zeroing(), z12.VnB());
1046 __ fsub(z6.VnD(), p0.Merging(), z6.VnD(), z20.VnD());
1048 __ movprfx(z6.VnS(), p7.Zeroing(), z11.VnS());
1049 __ fsubr(z6.VnH(), p7.Merging(), z6.VnH(), 1.0);
1051 __ movprfx(z28.VnB(), p3.Merging(), z10.VnB());
1052 __ fsubr(z28.VnS(), p3.Merging(), z28.VnS(), z9.VnS());
1054 __ movprfx(z22.VnB(), p3.Zeroing(), z14.VnB());
1055 __ scvtf(z22.VnD(), p3.Merging(), z24.VnS());
1057 __ movprfx(z20.VnS(), p2.Merging(), z9.VnS());
1058 __ scvtf(z20.VnH(), p2.Merging(), z9.VnH());
1060 __ movprfx(z19.VnH(), p1.Merging(), z21.VnH());
1061 __ scvtf(z19.VnS(), p1.Merging(), z6.VnD());
1063 __ movprfx(z31.VnS(), p3.Merging(), z22.VnS());
1064 __ scvtf(z31.VnH(), p3.Merging(), z22.VnD());
1066 __ movprfx(z8.VnS(), p3.Merging(), z3.VnS());
1067 __ ucvtf(z8.VnD(), p3.Merging(), z1.VnS());
1069 __ movprfx(z0.VnB(), p0.Merging(), z23.VnB());
1070 __ ucvtf(z0.VnH(), p0.Merging(), z12.VnH());
1072 __ movprfx(z8.VnH(), p3.Zeroing(), z4.VnH());
1073 __ ucvtf(z8.VnH(), p3.Merging(), z4.VnS());
1075 __ movprfx(z20.VnH(), p2.Zeroing(), z10.VnH());
1076 __ ucvtf(z20.VnH(), p2.Merging(), z11.VnD());
1094 __ movprfx(z27.VnS(), p1.Zeroing(), z12.VnS());
1095 __ add(z27.VnS(), z27.VnS(), 42);
1097 __ movprfx(z31.VnS(), p6.Zeroing(), z1.VnS());
1098 __ and_(z31.VnS(), z31.VnS(), 4);
1100 __ movprfx(z27.VnS(), p5.Merging(), z24.VnS());
1101 __ bic(z27.VnS(), z27.VnS(), 4);
1103 __ movprfx(z6.VnH(), p7.Merging(), z30.VnH());
1104 __ clasta(z6.VnH(), p7, z6.VnH(), z14.VnH());
1106 __ movprfx(z11.VnB(), p6.Merging(), z5.VnB());
1107 __ clastb(z11.VnB(), p6, z11.VnB(), z29.VnB());
1109 __ movprfx(z5.VnD(), p0.Merging(), z1.VnD());
1110 __ decd(z5.VnD(), SVE_MUL3);
1112 __ movprfx(z11.VnH(), p7.Zeroing(), z28.VnH());
1113 __ dech(z11.VnH(), SVE_VL2);
1115 __ movprfx(z14.VnS(), p5.Zeroing(), z6.VnS());
1116 __ decp(z14.VnS(), p5);
1118 __ movprfx(z6.VnS(), p5.Merging(), z10.VnS());
1119 __ decw(z6.VnS(), SVE_ALL);
1121 __ movprfx(z27.VnH(), p7.Zeroing(), z9.VnH());
1122 __ eon(z27.VnH(), z27.VnH(), 4);
1124 __ movprfx(z3.VnS(), p3.Zeroing(), z2.VnS());
1125 __ eor(z3.VnS(), z3.VnS(), 4);
1127 __ movprfx(z30.VnB(), p2.Zeroing(), z25.VnB());
1128 __ ext(z30.VnB(), z30.VnB(), z25.VnB(), 42);
1130 __ movprfx(z22.VnD(), p0.Merging(), z0.VnD());
1131 __ incd(z22.VnD(), SVE_MUL3);
1133 __ movprfx(z7.VnH(), p3.Merging(), z3.VnH());
1134 __ inch(z7.VnH(), SVE_VL2);
1136 __ movprfx(z9.VnD(), p1.Zeroing(), z28.VnD());
1137 __ incp(z9.VnD(), p1);
1139 __ movprfx(z30.VnS(), p3.Merging(), z4.VnS());
1140 __ incw(z30.VnS(), SVE_ALL);
1142 __ movprfx(z30.VnB(), p7.Zeroing(), z21.VnB());
1143 __ insr(z30.VnB(), w30);
1145 __ movprfx(z2.VnB(), p4.Zeroing(), z26.VnB());
1146 __ insr(z2.VnB(), b0);
1148 __ movprfx(z27.VnS(), p5.Zeroing(), z5.VnS());
1149 __ mul(z27.VnS(), z27.VnS(), 42);
1151 __ movprfx(z5.VnS(), p0.Merging(), z26.VnS());
1152 __ orn(z5.VnS(), z5.VnS(), 4);
1154 __ movprfx(z5.VnS(), p0.Merging(), z26.VnS());
1155 __ orn(z5.VnS(), z5.VnS(), 4);
1157 __ movprfx(z16.VnD(), p1.Merging(), z13.VnD());
1158 __ sdot(z16.VnD(), z11.VnH(), z7.VnH(), 1);
1160 __ movprfx(z27.VnD(), p5.Merging(), z18.VnD());
1161 __ sdot(z27.VnD(), z18.VnH(), z0.VnH());
1163 __ movprfx(z20.VnS(), p6.Merging(), z1.VnS());
1164 __ sdot(z20.VnS(), z10.VnB(), z1.VnB(), 1);
1166 __ movprfx(z19.VnD(), p0.Zeroing(), z7.VnD());
1167 __ smax(z19.VnD(), z19.VnD(), 42);
1169 __ movprfx(z15.VnD(), p1.Zeroing(), z7.VnD());
1170 __ smin(z15.VnD(), z15.VnD(), 42);
1172 __ movprfx(z15.VnB(), p5.Merging(), z3.VnB());
1173 __ splice(z15.VnB(), p5, z15.VnB(), z3.VnB());
1175 __ movprfx(z5.VnB(), p6.Zeroing(), z4.VnB());
1176 __ sqadd(z5.VnB(), z5.VnB(), 42);
1178 __ movprfx(z16.VnD(), p0.Zeroing(), z18.VnD());
1179 __ sqdecd(z16.VnD(), SVE_MUL3);
1181 __ movprfx(z7.VnH(), p3.Merging(), z28.VnH());
1182 __ sqdech(z7.VnH(), SVE_VL2);
1184 __ movprfx(z7.VnS(), p2.Merging(), z13.VnS());
1185 __ sqdecp(z7.VnS(), p2);
1187 __ movprfx(z22.VnS(), p7.Zeroing(), z20.VnS());
1188 __ sqdecw(z22.VnS(), SVE_ALL);
1190 __ movprfx(z26.VnD(), p1.Zeroing(), z0.VnD());
1191 __ sqincd(z26.VnD(), SVE_MUL3);
1193 __ movprfx(z15.VnH(), p7.Zeroing(), z27.VnH());
1194 __ sqinch(z15.VnH(), SVE_VL2);
1196 __ movprfx(z4.VnD(), p7.Merging(), z13.VnD());
1197 __ sqincp(z4.VnD(), p7);
1199 __ movprfx(z29.VnS(), p6.Merging(), z14.VnS());
1200 __ sqincw(z29.VnS(), SVE_ALL);
1202 __ movprfx(z17.VnB(), p1.Merging(), z24.VnB());
1203 __ sqsub(z17.VnB(), z17.VnB(), 42);
1205 __ movprfx(z26.VnS(), p5.Zeroing(), z19.VnS());
1206 __ sub(z26.VnS(), z26.VnS(), 42);
1208 __ movprfx(z15.VnD(), p1.Merging(), z3.VnD());
1209 __ subr(z15.VnD(), z15.VnD(), 42);
1211 __ movprfx(z4.VnD(), p2.Zeroing(), z14.VnD());
1212 __ udot(z4.VnD(), z15.VnH(), z7.VnH(), 1);
1214 __ movprfx(z29.VnD(), p4.Zeroing(), z28.VnD());
1215 __ udot(z29.VnD(), z2.VnH(), z17.VnH());
1217 __ movprfx(z7.VnS(), p6.Merging(), z3.VnS());
1218 __ udot(z7.VnS(), z14.VnB(), z1.VnB(), 1);
1220 __ movprfx(z14.VnB(), p3.Merging(), z5.VnB());
1221 __ umax(z14.VnB(), z14.VnB(), 42);
1223 __ movprfx(z4.VnD(), p1.Zeroing(), z2.VnD());
1224 __ umin(z4.VnD(), z4.VnD(), 42);
1226 __ movprfx(z19.VnB(), p0.Zeroing(), z27.VnB());
1227 __ uqadd(z19.VnB(), z19.VnB(), 42);
1229 __ movprfx(z24.VnD(), p7.Zeroing(), z11.VnD());
1230 __ uqdecd(z24.VnD(), SVE_MUL3);
1232 __ movprfx(z24.VnH(), p4.Zeroing(), z18.VnH());
1233 __ uqdech(z24.VnH(), SVE_VL2);
1235 __ movprfx(z31.VnS(), p5.Zeroing(), z2.VnS());
1236 __ uqdecp(z31.VnS(), p5);
1238 __ movprfx(z19.VnS(), p6.Merging(), z21.VnS());
1239 __ uqdecw(z19.VnS(), SVE_ALL);
1241 __ movprfx(z27.VnD(), p0.Merging(), z21.VnD());
1242 __ uqincd(z27.VnD(), SVE_MUL3);
1244 __ movprfx(z13.VnH(), p4.Zeroing(), z12.VnH());
1245 __ uqinch(z13.VnH(), SVE_VL2);
1247 __ movprfx(z0.VnD(), p4.Zeroing(), z1.VnD());
1248 __ uqincp(z0.VnD(), p4);
1250 __ movprfx(z12.VnS(), p4.Merging(), z21.VnS());
1251 __ uqincw(z12.VnS(), SVE_ALL);
1253 __ movprfx(z9.VnD(), p0.Zeroing(), z16.VnD());
1254 __ uqsub(z9.VnD(), z9.VnD(), 42);
1256 __ movprfx(z22.VnS(), p0.Zeroing(), z5.VnS());
1257 __ smmla(z22.VnS(), z21.VnB(), z0.VnB());
1259 __ movprfx(z1.VnS(), p0.Zeroing(), z5.VnS());
1260 __ ummla(z1.VnS(), z10.VnB(), z2.VnB());
1262 __ movprfx(z30.VnS(), p0.Zeroing(), z5.VnS());
1263 __ usmmla(z30.VnS(), z29.VnB(), z18.VnB());
1265 __ movprfx(z4.VnS(), p0.Zeroing(), z5.VnS());
1266 __ usdot(z4.VnS(), z3.VnB(), z4.VnB());
1268 __ movprfx(z10.VnS(), p0.Zeroing(), z5.VnS());
1269 __ usdot(z10.VnS(), z10.VnB(), z0.VnB(), 0);
1271 __ movprfx(z1.VnS(), p0.Zeroing(), z5.VnS());
1272 __ sudot(z1.VnS(), z10.VnB(), z1.VnB(), 1);
1292 __ movprfx(z10.VnH(), p3.Zeroing(), z3.VnH());
1293 __ fcmla(z10.VnH(), z22.VnH(), z3.VnH(), 2, 180);
1295 __ movprfx(z12.VnS(), p4.Merging(), z14.VnS());
1296 __ fcmla(z12.VnS(), z3.VnS(), z10.VnS(), 1, 270);
1298 __ movprfx(z16.VnD(), p3.Zeroing(), z24.VnD());
1299 __ fmla(z16.VnD(), z24.VnD(), z8.VnD(), 1);
1301 __ movprfx(z9.VnH(), p7.Zeroing(), z0.VnH());
1302 __ fmla(z9.VnH(), z8.VnH(), z0.VnH(), 7);
1304 __ movprfx(z23.VnS(), p5.Merging(), z5.VnS());
1305 __ fmla(z23.VnS(), z7.VnS(), z5.VnS(), 3);
1307 __ movprfx(z19.VnD(), p6.Zeroing(), z8.VnD());
1308 __ fmls(z19.VnD(), z27.VnD(), z13.VnD(), 1);
1310 __ movprfx(z25.VnH(), p7.Merging(), z24.VnH());
1311 __ fmls(z25.VnH(), z24.VnH(), z4.VnH(), 4);
1313 __ movprfx(z2.VnS(), p1.Zeroing(), z0.VnS());
1314 __ fmls(z2.VnS(), z9.VnS(), z0.VnS(), 3);
1317 __ movprfx(z22.VnD(), p6.Merging(), z16.VnD());
1318 __ ftmad(z22.VnD(), z22.VnD(), z20.VnD(), 2);
1320 __ movprfx(z30.VnS(), p0.Zeroing(), z5.VnS());
1321 __ fmmla(z30.VnS(), z29.VnS(), z18.VnS());
1323 __ movprfx(z31.VnD(), p1.Merging(), z5.VnD());
1324 __ fmmla(z31.VnD(), z30.VnD(), z18.VnD());
1340 __ movprfx(z17, z28);
1341 __ abs(z17.VnB(), p6.Merging(), z28.VnB());
1343 __ movprfx(z9, z7);
1344 __ add(z9.VnB(), p5.Merging(), z9.VnB(), z29.VnB());
1346 __ movprfx(z11, z0);
1347 __ add(z11.VnD(), z11.VnD(), 42);
1349 __ movprfx(z8.VnS(), p3.Zeroing(), z28.VnS());
1350 __ and_(z8.VnS(), p3.Merging(), z8.VnS(), z31.VnS());
1352 __ movprfx(z20, z23);
1353 __ and_(z20.VnS(), z20.VnS(), 4);
1355 __ movprfx(z24.VnD(), p5.Merging(), z11.VnD());
1356 __ asr(z24.VnD(), p5.Merging(), z24.VnD(), 3);
1358 __ movprfx(z1, z13);
1359 __ asr(z1.VnH(), p3.Merging(), z1.VnH(), z4.VnH());
1361 __ movprfx(z0.VnB(), p7.Zeroing(), z28.VnB());
1362 __ asr(z0.VnB(), p7.Merging(), z0.VnB(), z28.VnD());
1364 __ movprfx(z15, z5);
1365 __ asr(z15.VnD(), p3.Merging(), z15.VnD(), z5.VnD());
1367 __ movprfx(z24.VnH(), p3.Merging(), z22.VnH());
1368 __ asrd(z24.VnH(), p3.Merging(), z24.VnH(), 3);
1370 __ movprfx(z2.VnS(), p3.Zeroing(), z20.VnS());
1371 __ asrr(z2.VnS(), p3.Merging(), z2.VnS(), z15.VnS());
1373 __ movprfx(z17.VnB(), p7.Merging(), z6.VnB());
1374 __ bic(z17.VnB(), p7.Merging(), z17.VnB(), z25.VnB());
1376 __ movprfx(z31, z6);
1377 __ bic(z31.VnD(), z31.VnD(), 4);
1379 __ movprfx(z20, z2);
1380 __ clasta(z20.VnB(), p4, z20.VnB(), z15.VnB());
1382 __ movprfx(z27, z11);
1383 __ clastb(z27.VnB(), p5, z27.VnB(), z6.VnB());
1385 __ movprfx(z3.VnS(), p7.Zeroing(), z17.VnS());
1386 __ cls(z3.VnS(), p7.Merging(), z0.VnS());
1388 __ movprfx(z29.VnB(), p0.Zeroing(), z24.VnB());
1389 __ clz(z29.VnB(), p0.Merging(), z7.VnB());
1391 __ movprfx(z2.VnH(), p7.Zeroing(), z29.VnH());
1392 __ cnot(z2.VnH(), p7.Merging(), z28.VnH());
1394 __ movprfx(z23, z5);
1395 __ cnt(z23.VnH(), p0.Merging(), z12.VnH());
1397 __ movprfx(z5, z3);
1398 __ cpy(z5.VnD(), p1.Merging(), -42);
1400 __ movprfx(z0, z12);
1401 __ cpy(z0.VnB(), p1.Merging(), w0);
1403 __ movprfx(z27, z8);
1404 __ cpy(z27.VnB(), p0.Merging(), b0);
1406 __ movprfx(z20, z24);
1407 __ decd(z20.VnD(), SVE_MUL3);
1409 __ movprfx(z5, z28);
1410 __ dech(z5.VnH(), SVE_VL2);
1412 __ movprfx(z7, z3);
1413 __ decp(z7.VnD(), p2);
1415 __ movprfx(z4, z7);
1416 __ decw(z4.VnS(), SVE_ALL);
1418 __ movprfx(z3, z18);
1419 __ eon(z3.VnS(), z3.VnS(), 4);
1421 __ movprfx(z4.VnD(), p0.Merging(), z10.VnD());
1422 __ eor(z4.VnD(), p0.Merging(), z4.VnD(), z10.VnD());
1424 __ movprfx(z15, z18);
1425 __ eor(z15.VnH(), z15.VnH(), 4);
1427 __ movprfx(z17, z30);
1428 __ ext(z17.VnB(), z17.VnB(), z18.VnB(), 2);
1430 __ movprfx(z19, z28);
1431 __ incd(z19.VnD(), SVE_MUL3);
1433 __ movprfx(z13, z7);
1434 __ inch(z13.VnH(), SVE_VL2);
1436 __ movprfx(z14, z21);
1437 __ incp(z14.VnD(), p1);
1439 __ movprfx(z26, z12);
1440 __ incw(z26.VnS(), SVE_ALL);
1442 __ movprfx(z16, z2);
1443 __ insr(z16.VnB(), w16);
1445 __ movprfx(z20, z26);
1446 __ insr(z20.VnB(), b0);
1448 __ movprfx(z30.VnD(), p0.Merging(), z23.VnD());
1449 __ lsl(z30.VnD(), p0.Merging(), z30.VnD(), 3);
1451 __ movprfx(z28.VnS(), p2.Zeroing(), z6.VnS());
1452 __ lsl(z28.VnS(), p2.Merging(), z28.VnS(), z6.VnS());
1454 __ movprfx(z15.VnH(), p6.Zeroing(), z3.VnH());
1455 __ lsl(z15.VnH(), p6.Merging(), z15.VnH(), z3.VnD());
1457 __ movprfx(z13.VnD(), p4.Zeroing(), z14.VnD());
1458 __ lsl(z13.VnD(), p4.Merging(), z13.VnD(), z25.VnD());
1460 __ movprfx(z14, z5);
1461 __ lslr(z14.VnS(), p0.Merging(), z14.VnS(), z17.VnS());
1463 __ movprfx(z21, z1);
1464 __ lsr(z21.VnH(), p5.Merging(), z21.VnH(), 3);
1466 __ movprfx(z11.VnH(), p0.Zeroing(), z13.VnH());
1467 __ lsr(z11.VnH(), p0.Merging(), z11.VnH(), z9.VnH());
1469 __ movprfx(z24, z29);
1470 __ lsr(z24.VnS(), p4.Merging(), z24.VnS(), z1.VnD());
1472 __ movprfx(z1.VnD(), p6.Merging(), z9.VnD());
1473 __ lsr(z1.VnD(), p6.Merging(), z1.VnD(), z9.VnD());
1475 __ movprfx(z22, z3);
1476 __ lsrr(z22.VnB(), p3.Merging(), z22.VnB(), z3.VnB());
1478 __ movprfx(z24.VnB(), p2.Zeroing(), z5.VnB());
1479 __ mad(z24.VnB(), p2.Merging(), z5.VnB(), z10.VnB());
1481 __ movprfx(z8, z4);
1482 __ mla(z8.VnS(), p6.Merging(), z4.VnS(), z26.VnS());
1484 __ movprfx(z10, z8);
1485 __ mls(z10.VnS(), p4.Merging(), z23.VnS(), z16.VnS());
1488 __ movprfx(z4.VnH(), p5.Zeroing(), z2.VnH());
1489 __ mov(z4.VnH(), p5.Merging(), -42);
1491 __ movprfx(z2.VnB(), p3.Zeroing(), z24.VnB());
1492 __ mov(z2.VnB(), p3.Merging(), w2);
1494 __ movprfx(z27, z13);
1495 __ mov(z27.VnD(), p3.Merging(), d0);
1497 __ movprfx(z18.VnB(), p5.Zeroing(), z11.VnB());
1498 __ msb(z18.VnB(), p5.Merging(), z3.VnB(), z11.VnB());
1500 __ movprfx(z29, z16);
1501 __ mul(z29.VnS(), p6.Merging(), z29.VnS(), z9.VnS());
1503 __ movprfx(z21, z23);
1504 __ mul(z21.VnH(), z21.VnH(), 42);
1506 __ movprfx(z7.VnS(), p4.Merging(), z14.VnS());
1507 __ neg(z7.VnS(), p4.Merging(), z14.VnS());
1509 __ movprfx(z8.VnD(), p4.Zeroing(), z5.VnD());
1510 __ not_(z8.VnD(), p4.Merging(), z5.VnD());
1512 __ movprfx(z14, z13);
1513 __ orn(z14.VnS(), z14.VnS(), 4);
1515 __ movprfx(z14, z13);
1516 __ orn(z14.VnS(), z14.VnS(), 4);
1518 __ movprfx(z27, z17);
1519 __ orr(z27.VnD(), p2.Merging(), z27.VnD(), z17.VnD());
1521 __ movprfx(z13.VnH(), p2.Zeroing(), z27.VnH());
1522 __ rbit(z13.VnH(), p2.Merging(), z1.VnH());
1524 __ movprfx(z1, z29);
1525 __ revb(z1.VnS(), p4.Merging(), z6.VnS());
1527 __ movprfx(z18.VnD(), p2.Zeroing(), z10.VnD());
1528 __ revh(z18.VnD(), p2.Merging(), z16.VnD());
1530 __ movprfx(z2.VnD(), p1.Merging(), z10.VnD());
1531 __ revw(z2.VnD(), p1.Merging(), z1.VnD());
1533 __ movprfx(z28.VnS(), p7.Merging(), z11.VnS());
1534 __ sabd(z28.VnS(), p7.Merging(), z28.VnS(), z11.VnS());
1536 __ movprfx(z22.VnS(), p0.Merging(), z20.VnS());
1537 __ sdiv(z22.VnS(), p0.Merging(), z22.VnS(), z6.VnS());
1539 __ movprfx(z13.VnS(), p7.Merging(), z0.VnS());
1540 __ sdivr(z13.VnS(), p7.Merging(), z13.VnS(), z2.VnS());
1542 __ movprfx(z0, z12);
1543 __ sdot(z0.VnD(), z10.VnH(), z12.VnH(), 1);
1545 __ movprfx(z8, z15);
1546 __ sdot(z8.VnS(), z15.VnB(), z12.VnB());
1548 __ movprfx(z13, z0);
1549 __ sdot(z13.VnS(), z10.VnB(), z0.VnB(), 1);
1551 __ movprfx(z11, z13);
1552 __ smax(z11.VnB(), p5.Merging(), z11.VnB(), z24.VnB());
1554 __ movprfx(z3, z17);
1555 __ smax(z3.VnD(), z3.VnD(), 42);
1557 __ movprfx(z10, z29);
1558 __ smin(z10.VnD(), p4.Merging(), z10.VnD(), z29.VnD());
1560 __ movprfx(z13, z29);
1561 __ smin(z13.VnD(), z13.VnD(), 42);
1563 __ movprfx(z6, z17);
1564 __ smulh(z6.VnS(), p7.Merging(), z6.VnS(), z31.VnS());
1566 __ movprfx(z19, z20);
1567 __ splice(z19.VnB(), p3, z19.VnB(), z20.VnB());
1569 __ movprfx(z0, z3);
1570 __ sqadd(z0.VnD(), z0.VnD(), 42);
1572 __ movprfx(z29, z5);
1573 __ sqdecd(z29.VnD(), SVE_MUL3);
1575 __ movprfx(z25, z11);
1576 __ sqdech(z25.VnH(), SVE_VL2);
1578 __ movprfx(z16, z9);
1579 __ sqdecp(z16.VnS(), p1);
1581 __ movprfx(z8, z17);
1582 __ sqdecw(z8.VnS(), SVE_ALL);
1584 __ movprfx(z4, z5);
1585 __ sqincd(z4.VnD(), SVE_MUL3);
1587 __ movprfx(z0, z17);
1588 __ sqinch(z0.VnH(), SVE_VL2);
1590 __ movprfx(z7, z27);
1591 __ sqincp(z7.VnS(), p6);
1593 __ movprfx(z10, z9);
1594 __ sqincw(z10.VnS(), SVE_ALL);
1596 __ movprfx(z31, z22);
1597 __ sqsub(z31.VnB(), z31.VnB(), 42);
1599 __ movprfx(z12.VnH(), p7.Zeroing(), z23.VnH());
1600 __ sub(z12.VnH(), p7.Merging(), z12.VnH(), z23.VnH());
1602 __ movprfx(z10, z1);
1603 __ sub(z10.VnH(), z10.VnH(), 42);
1605 __ movprfx(z15.VnB(), p0.Merging(), z0.VnB());
1606 __ subr(z15.VnB(), p0.Merging(), z15.VnB(), z0.VnB());
1608 __ movprfx(z17, z2);
1609 __ subr(z17.VnH(), z17.VnH(), 42);
1611 __ movprfx(z5, z3);
1612 __ sxtb(z5.VnD(), p6.Merging(), z20.VnD());
1614 __ movprfx(z11, z17);
1615 __ sxth(z11.VnD(), p6.Merging(), z25.VnD());
1617 __ movprfx(z26, z4);
1618 __ sxtw(z26.VnD(), p5.Merging(), z4.VnD());
1620 __ movprfx(z15.VnD(), p0.Zeroing(), z8.VnD());
1621 __ uabd(z15.VnD(), p0.Merging(), z15.VnD(), z20.VnD());
1623 __ movprfx(z21, z24);
1624 __ udiv(z21.VnD(), p3.Merging(), z21.VnD(), z24.VnD());
1626 __ movprfx(z22, z10);
1627 __ udivr(z22.VnD(), p7.Merging(), z22.VnD(), z27.VnD());
1629 __ movprfx(z27, z25);
1630 __ udot(z27.VnD(), z29.VnH(), z3.VnH(), 1);
1632 __ movprfx(z29, z10);
1633 __ udot(z29.VnS(), z10.VnB(), z21.VnB());
1635 __ movprfx(z18, z0);
1636 __ udot(z18.VnS(), z14.VnB(), z0.VnB(), 1);
1638 __ movprfx(z6, z30);
1639 __ umax(z6.VnS(), p2.Merging(), z6.VnS(), z27.VnS());
1641 __ movprfx(z31, z17);
1642 __ umax(z31.VnD(), z31.VnD(), 42);
1644 __ movprfx(z27.VnS(), p0.Merging(), z20.VnS());
1645 __ umin(z27.VnS(), p0.Merging(), z27.VnS(), z8.VnS());
1647 __ movprfx(z0, z11);
1648 __ umin(z0.VnH(), z0.VnH(), 42);
1650 __ movprfx(z21, z17);
1651 __ umulh(z21.VnB(), p0.Merging(), z21.VnB(), z30.VnB());
1653 __ movprfx(z9, z24);
1654 __ uqadd(z9.VnD(), z9.VnD(), 42);
1656 __ movprfx(z18, z13);
1657 __ uqdecd(z18.VnD(), SVE_MUL3);
1659 __ movprfx(z20, z23);
1660 __ uqdech(z20.VnH(), SVE_VL2);
1662 __ movprfx(z12, z29);
1663 __ uqdecp(z12.VnS(), p7);
1665 __ movprfx(z24, z25);
1666 __ uqdecw(z24.VnS(), SVE_ALL);
1668 __ movprfx(z13, z1);
1669 __ uqincd(z13.VnD(), SVE_MUL3);
1671 __ movprfx(z5, z19);
1672 __ uqinch(z5.VnH(), SVE_VL2);
1674 __ movprfx(z6, z25);
1675 __ uqincp(z6.VnS(), p5);
1677 __ movprfx(z12, z14);
1678 __ uqincw(z12.VnS(), SVE_ALL);
1680 __ movprfx(z13, z6);
1681 __ uqsub(z13.VnH(), z13.VnH(), 42);
1683 __ movprfx(z31, z3);
1684 __ uxtb(z31.VnS(), p0.Merging(), z3.VnS());
1686 __ movprfx(z18.VnD(), p4.Merging(), z25.VnD());
1687 __ uxth(z18.VnD(), p4.Merging(), z25.VnD());
1689 __ movprfx(z18.VnD(), p7.Merging(), z25.VnD());
1690 __ uxtw(z18.VnD(), p7.Merging(), z25.VnD());
1692 __ movprfx(z22, z5);
1693 __ smmla(z22.VnS(), z21.VnB(), z0.VnB());
1695 __ movprfx(z1, z5);
1696 __ ummla(z1.VnS(), z10.VnB(), z0.VnB());
1698 __ movprfx(z30, z5);
1699 __ usmmla(z30.VnS(), z31.VnB(), z18.VnB());
1701 __ movprfx(z4, z5);
1702 __ usdot(z4.VnS(), z3.VnB(), z3.VnB());
1704 __ movprfx(z10, z5);
1705 __ usdot(z10.VnS(), z9.VnB(), z0.VnB(), 0);
1707 __ movprfx(z1, z5);
1708 __ sudot(z1.VnS(), z10.VnB(), z2.VnB(), 1);
1726 __ movprfx(z18.VnS(), p6.Zeroing(), z20.VnS());
1727 __ fabd(z18.VnS(), p6.Merging(), z18.VnS(), z19.VnS());
1729 __ movprfx(z28.VnD(), p4.Zeroing(), z24.VnD());
1730 __ fabs(z28.VnD(), p4.Merging(), z24.VnD());
1732 __ movprfx(z12, z8);
1733 __ fadd(z12.VnS(), p2.Merging(), z12.VnS(), 0.5);
1735 __ movprfx(z0.VnS(), p1.Merging(), z9.VnS());
1736 __ fadd(z0.VnS(), p1.Merging(), z0.VnS(), z9.VnS());
1738 __ movprfx(z10.VnH(), p2.Merging(), z2.VnH());
1739 __ fcadd(z10.VnH(), p2.Merging(), z10.VnH(), z20.VnH(), 90);
1741 __ movprfx(z21, z6);
1742 __ fcmla(z21.VnH(), z31.VnH(), z6.VnH(), 2, 180);
1744 __ movprfx(z16, z6);
1745 __ fcmla(z16.VnS(), z11.VnS(), z6.VnS(), 1, 270);
1747 __ movprfx(z15.VnH(), p6.Merging(), z16.VnH());
1748 __ fcpy(z15.VnH(), p6.Merging(), 1.25);
1750 __ movprfx(z1, z14);
1751 __ fcvt(z1.VnD(), p2.Merging(), z4.VnH());
1753 __ movprfx(z25.VnD(), p6.Merging(), z1.VnD());
1754 __ fcvt(z25.VnD(), p6.Merging(), z1.VnS());
1756 __ movprfx(z18.VnS(), p2.Merging(), z2.VnS());
1757 __ fcvt(z18.VnH(), p2.Merging(), z7.VnS());
1759 __ movprfx(z21.VnD(), p5.Zeroing(), z26.VnD());
1760 __ fcvt(z21.VnH(), p5.Merging(), z26.VnD());
1762 __ movprfx(z12.VnD(), p1.Merging(), z18.VnD());
1763 __ fcvtzs(z12.VnD(), p1.Merging(), z18.VnH());
1765 __ movprfx(z3.VnS(), p2.Merging(), z0.VnS());
1766 __ fcvtzs(z3.VnS(), p2.Merging(), z26.VnS());
1768 __ movprfx(z21.VnS(), p4.Merging(), z7.VnS());
1769 __ fcvtzs(z21.VnS(), p4.Merging(), z7.VnH());
1771 __ movprfx(z16.VnD(), p3.Zeroing(), z4.VnD());
1772 __ fcvtzs(z16.VnS(), p3.Merging(), z28.VnD());
1774 __ movprfx(z31.VnD(), p4.Merging(), z1.VnD());
1775 __ fcvtzu(z31.VnD(), p4.Merging(), z1.VnH());
1777 __ movprfx(z23.VnH(), p0.Zeroing(), z28.VnH());
1778 __ fcvtzu(z23.VnH(), p0.Merging(), z28.VnH());
1780 __ movprfx(z2, z12);
1781 __ fcvtzu(z2.VnD(), p3.Merging(), z28.VnS());
1783 __ movprfx(z4, z7);
1784 __ fcvtzu(z4.VnS(), p7.Merging(), z16.VnD());
1786 __ movprfx(z13.VnS(), p3.Zeroing(), z23.VnS());
1787 __ fdiv(z13.VnS(), p3.Merging(), z13.VnS(), z23.VnS());
1789 __ movprfx(z6.VnD(), p1.Zeroing(), z16.VnD());
1790 __ fdivr(z6.VnD(), p1.Merging(), z6.VnD(), z5.VnD());
1792 __ movprfx(z31, z23);
1793 __ fmad(z31.VnS(), p5.Merging(), z23.VnS(), z11.VnS());
1795 __ movprfx(z14.VnH(), p7.Merging(), z21.VnH());
1796 __ fmax(z14.VnH(), p7.Merging(), z14.VnH(), 0.0);
1798 __ movprfx(z17.VnS(), p4.Merging(), z9.VnS());
1799 __ fmax(z17.VnS(), p4.Merging(), z17.VnS(), z9.VnS());
1801 __ movprfx(z1.VnS(), p3.Zeroing(), z30.VnS());
1802 __ fmaxnm(z1.VnS(), p3.Merging(), z1.VnS(), 0.0);
1804 __ movprfx(z10.VnD(), p1.Zeroing(), z17.VnD());
1805 __ fmaxnm(z10.VnD(), p1.Merging(), z10.VnD(), z17.VnD());
1807 __ movprfx(z3, z13);
1808 __ fmin(z3.VnS(), p0.Merging(), z3.VnS(), 0.0);
1810 __ movprfx(z15, z21);
1811 __ fmin(z15.VnS(), p4.Merging(), z15.VnS(), z21.VnS());
1813 __ movprfx(z30.VnH(), p7.Zeroing(), z25.VnH());
1814 __ fminnm(z30.VnH(), p7.Merging(), z30.VnH(), 0.0);
1816 __ movprfx(z31, z15);
1817 __ fminnm(z31.VnD(), p5.Merging(), z31.VnD(), z25.VnD());
1819 __ movprfx(z27, z28);
1820 __ fmla(z27.VnD(), z28.VnD(), z12.VnD(), 1);
1822 __ movprfx(z26.VnH(), p6.Zeroing(), z13.VnH());
1823 __ fmla(z26.VnH(), p6.Merging(), z13.VnH(), z7.VnH());
1825 __ movprfx(z26, z10);
1826 __ fmla(z26.VnH(), z10.VnH(), z1.VnH(), 7);
1828 __ movprfx(z0, z1);
1829 __ fmla(z0.VnS(), z25.VnS(), z1.VnS(), 3);
1831 __ movprfx(z7, z3);
1832 __ fmls(z7.VnD(), z30.VnD(), z3.VnD(), 1);
1834 __ movprfx(z1, z24);
1835 __ fmls(z1.VnD(), p5.Merging(), z20.VnD(), z24.VnD());
1837 __ movprfx(z19, z18);
1838 __ fmls(z19.VnH(), z18.VnH(), z7.VnH(), 4);
1840 __ movprfx(z0, z26);
1841 __ fmls(z0.VnS(), z17.VnS(), z4.VnS(), 3);
1843 __ movprfx(z19.VnS(), p7.Zeroing(), z6.VnS());
1844 __ fmov(z19.VnS(), p7.Merging(), 0.0);
1846 __ movprfx(z21, z15);
1847 __ fmov(z21.VnH(), p7.Merging(), 2.5);
1849 __ movprfx(z23, z18);
1850 __ fmsb(z23.VnS(), p4.Merging(), z1.VnS(), z7.VnS());
1852 __ movprfx(z8, z28);
1853 __ fmul(z8.VnS(), p4.Merging(), z8.VnS(), 2.0);
1855 __ movprfx(z6.VnD(), p6.Merging(), z27.VnD());
1856 __ fmul(z6.VnD(), p6.Merging(), z6.VnD(), z27.VnD());
1858 __ movprfx(z6.VnH(), p0.Merging(), z19.VnH());
1859 __ fmulx(z6.VnH(), p0.Merging(), z6.VnH(), z19.VnH());
1861 __ movprfx(z5.VnH(), p0.Merging(), z1.VnH());
1862 __ fneg(z5.VnH(), p0.Merging(), z1.VnH());
1864 __ movprfx(z22.VnD(), p4.Zeroing(), z24.VnD());
1865 __ fnmad(z22.VnD(), p4.Merging(), z24.VnD(), z12.VnD());
1867 __ movprfx(z5.VnS(), p0.Merging(), z29.VnS());
1868 __ fnmla(z5.VnS(), p0.Merging(), z17.VnS(), z29.VnS());
1870 __ movprfx(z5, z3);
1871 __ fnmls(z5.VnD(), p5.Merging(), z3.VnD(), z2.VnD());
1873 __ movprfx(z9.VnD(), p2.Zeroing(), z7.VnD());
1874 __ fnmsb(z9.VnD(), p2.Merging(), z7.VnD(), z23.VnD());
1877 __ movprfx(z12.VnH(), p1.Zeroing(), z17.VnH());
1878 __ frecpx(z12.VnH(), p1.Merging(), z4.VnH());
1880 __ movprfx(z28.VnS(), p4.Zeroing(), z27.VnS());
1881 __ frinta(z28.VnS(), p4.Merging(), z24.VnS());
1883 __ movprfx(z7.VnD(), p7.Merging(), z25.VnD());
1884 __ frinti(z7.VnD(), p7.Merging(), z25.VnD());
1886 __ movprfx(z10, z21);
1887 __ frintm(z10.VnD(), p5.Merging(), z26.VnD());
1889 __ movprfx(z25, z21);
1890 __ frintn(z25.VnH(), p4.Merging(), z1.VnH());
1892 __ movprfx(z25, z9);
1893 __ frintp(z25.VnH(), p1.Merging(), z9.VnH());
1895 __ movprfx(z30, z16);
1896 __ frintx(z30.VnS(), p1.Merging(), z16.VnS());
1898 __ movprfx(z0.VnD(), p5.Merging(), z9.VnD());
1899 __ frintz(z0.VnD(), p5.Merging(), z23.VnD());
1901 __ movprfx(z11.VnD(), p7.Merging(), z2.VnD());
1902 __ fscale(z11.VnD(), p7.Merging(), z11.VnD(), z2.VnD());
1904 __ movprfx(z23.VnS(), p4.Merging(), z17.VnS());
1905 __ fsqrt(z23.VnS(), p4.Merging(), z10.VnS());
1907 __ movprfx(z0.VnD(), p2.Merging(), z26.VnD());
1908 __ fsub(z0.VnD(), p2.Merging(), z0.VnD(), 1.0);
1910 __ movprfx(z28.VnD(), p1.Zeroing(), z16.VnD());
1911 __ fsub(z28.VnD(), p1.Merging(), z28.VnD(), z16.VnD());
1913 __ movprfx(z22, z27);
1914 __ fsubr(z22.VnD(), p4.Merging(), z22.VnD(), 1.0);
1916 __ movprfx(z4.VnS(), p2.Merging(), z26.VnS());
1917 __ fsubr(z4.VnS(), p2.Merging(), z4.VnS(), z26.VnS());
1920 __ movprfx(z10, z4);
1921 __ ftmad(z10.VnS(), z10.VnS(), z4.VnS(), 2);
1923 __ movprfx(z2, z16);
1924 __ scvtf(z2.VnD(), p1.Merging(), z16.VnS());
1926 __ movprfx(z10, z20);
1927 __ scvtf(z10.VnD(), p5.Merging(), z20.VnD());
1929 __ movprfx(z29, z28);
1930 __ scvtf(z29.VnS(), p0.Merging(), z31.VnD());
1932 __ movprfx(z26.VnD(), p3.Merging(), z13.VnD());
1933 __ scvtf(z26.VnH(), p3.Merging(), z5.VnD());
1935 __ movprfx(z7.VnD(), p3.Zeroing(), z26.VnD());
1936 __ ucvtf(z7.VnD(), p3.Merging(), z26.VnS());
1938 __ movprfx(z13, z17);
1939 __ ucvtf(z13.VnD(), p7.Merging(), z17.VnD());
1941 __ movprfx(z24.VnD(), p1.Merging(), z31.VnD());
1942 __ ucvtf(z24.VnS(), p1.Merging(), z18.VnD());
1944 __ movprfx(z17.VnD(), p4.Merging(), z22.VnD());
1945 __ ucvtf(z17.VnH(), p4.Merging(), z4.VnD());
1947 __ movprfx(z30, z5);
1948 __ fmmla(z30.VnS(), z29.VnS(), z18.VnS());
1950 __ movprfx(z31, z5);
1951 __ fmmla(z31.VnD(), z30.VnD(), z18.VnD());
1967 __ movprfx(z25, z26);
1968 __ adclb(z25.VnS(), z17.VnS(), z24.VnS());
1970 __ movprfx(z0, z1);
1971 __ adclt(z0.VnS(), z2.VnS(), z15.VnS());
1973 __ movprfx(z3, z4);
1974 __ addp(z3.VnB(), p1.Merging(), z3.VnB(), z0.VnB());
1976 __ movprfx(z6, z7);
1977 __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD());
1979 __ movprfx(z18, z19);
1980 __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD());
1982 __ movprfx(z7, z8);
1983 __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD());
1985 __ movprfx(z21, z22);
1986 __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD());
1988 __ movprfx(z5, z6);
1989 __ cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90);
1991 __ movprfx(z7, z8);
1992 __ cdot(z7.VnS(), z4.VnB(), z10.VnB(), 0);
1994 __ movprfx(z7, z8);
1995 __ cdot(z7.VnS(), z4.VnB(), z0.VnB(), 0, 0);
1997 __ movprfx(z7, z8);
1998 __ cdot(z7.VnD(), z4.VnH(), z0.VnH(), 0, 0);
2000 __ movprfx(z19, z20);
2001 __ cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0);
2003 __ movprfx(z19, z20);
2004 __ cmla(z19.VnS(), z7.VnS(), z2.VnS(), 0, 0);
2006 __ movprfx(z19, z20);
2007 __ cmla(z19.VnH(), z7.VnH(), z2.VnH(), 0, 0);
2009 __ movprfx(z10, z11);
2010 __ eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD());
2012 __ movprfx(z3, z4);
2013 __ eorbt(z3.VnB(), z10.VnB(), z8.VnB());
2015 __ movprfx(z20, z22);
2016 __ eortb(z20.VnB(), z21.VnB(), z15.VnB());
2018 __ movprfx(z14, z15);
2019 __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD());
2021 __ movprfx(z14.VnD(), p4.Merging(), z15.VnD());
2022 __ fcvtx(z14.VnS(), p4.Merging(), z0.VnD());
2024 __ movprfx(z15.VnH(), p0.Merging(), z16.VnH());
2025 __ flogb(z15.VnH(), p0.Merging(), z3.VnH());
2027 __ movprfx(z2, z3);
2028 __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD());
2030 __ movprfx(z22, z23);
2031 __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD());
2033 __ movprfx(z1, z2);
2034 __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD());
2036 __ movprfx(z16, z17);
2037 __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD());
2039 __ movprfx(z16, z17);
2040 __ fmlalb(z16.VnS(), z18.VnH(), z29.VnH());
2042 __ movprfx(z16, z17);
2043 __ fmlalb(z16.VnS(), z18.VnH(), z2.VnH(), 0);
2045 __ movprfx(z18, z19);
2046 __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH());
2048 __ movprfx(z18, z19);
2049 __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH(), 0);
2051 __ movprfx(z16, z17);
2052 __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH());
2054 __ movprfx(z16, z17);
2055 __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH(), 0);
2057 __ movprfx(z3, z4);
2058 __ fmlslt(z3.VnS(), z17.VnH(), z14.VnH());
2060 __ movprfx(z3, z4);
2061 __ fmlslt(z3.VnS(), z17.VnH(), z1.VnH(), 0);
2063 __ movprfx(z2, z3);
2064 __ mla(z2.VnH(), z0.VnH(), z1.VnH(), 0);
2066 __ movprfx(z2, z3);
2067 __ mla(z2.VnS(), z0.VnS(), z1.VnS(), 0);
2069 __ movprfx(z2, z3);
2070 __ mla(z2.VnD(), z0.VnD(), z1.VnD(), 0);
2072 __ movprfx(z2, z3);
2073 __ mls(z2.VnH(), z0.VnH(), z1.VnH(), 0);
2075 __ movprfx(z2, z3);
2076 __ mls(z2.VnS(), z0.VnS(), z1.VnS(), 0);
2078 __ movprfx(z2, z3);
2079 __ mls(z2.VnD(), z0.VnD(), z1.VnD(), 0);
2081 __ movprfx(z17, z18);
2082 __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD());
2084 __ movprfx(z13, z14);
2085 __ saba(z13.VnB(), z2.VnB(), z31.VnB());
2087 __ movprfx(z13, z14);
2088 __ sabalb(z13.VnD(), z20.VnS(), z26.VnS());
2090 __ movprfx(z14, z15);
2091 __ sabalt(z14.VnD(), z19.VnS(), z10.VnS());
2093 __ movprfx(z19.VnD(), p5.Merging(), z20.VnD());
2094 __ sadalp(z19.VnD(), p5.Merging(), z9.VnS());
2096 __ movprfx(z17, z18);
2097 __ sbclb(z17.VnS(), z10.VnS(), z8.VnS());
2099 __ movprfx(z20, z21);
2100 __ sbclt(z20.VnS(), z0.VnS(), z13.VnS());
2102 __ movprfx(z20.VnB(), p3.Merging(), z21.VnB());
2103 __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB());
2105 __ movprfx(z21.VnB(), p0.Merging(), z22.VnB());
2106 __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB());
2108 __ movprfx(z1.VnB(), p0.Merging(), z2.VnB());
2109 __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB());
2111 __ movprfx(z5, z6);
2112 __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB());
2114 __ movprfx(z27, z28);
2115 __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB());
2117 __ movprfx(z1, z2);
2118 __ smlalb(z1.VnD(), z3.VnS(), z23.VnS());
2120 __ movprfx(z1, z2);
2121 __ smlalb(z1.VnD(), z3.VnS(), z2.VnS(), 0);
2123 __ movprfx(z1, z2);
2124 __ smlalb(z1.VnS(), z3.VnH(), z2.VnH(), 0);
2126 __ movprfx(z1, z2);
2127 __ smlalt(z1.VnD(), z3.VnS(), z23.VnS());
2129 __ movprfx(z1, z2);
2130 __ smlalt(z1.VnD(), z3.VnS(), z2.VnS(), 0);
2132 __ movprfx(z1, z2);
2133 __ smlalt(z1.VnS(), z3.VnH(), z2.VnH(), 0);
2135 __ movprfx(z1, z2);
2136 __ smlslb(z1.VnD(), z3.VnS(), z23.VnS());
2138 __ movprfx(z1, z2);
2139 __ smlslb(z1.VnD(), z3.VnS(), z2.VnS(), 0);
2141 __ movprfx(z1, z2);
2142 __ smlslb(z1.VnS(), z3.VnH(), z2.VnH(), 0);
2144 __ movprfx(z1, z2);
2145 __ smlslt(z1.VnD(), z3.VnS(), z23.VnS());
2147 __ movprfx(z1, z2);
2148 __ smlslt(z1.VnD(), z3.VnS(), z2.VnS(), 0);
2150 __ movprfx(z1, z2);
2151 __ smlslt(z1.VnS(), z3.VnH(), z2.VnH(), 0);
2153 __ movprfx(z29.VnB(), p1.Merging(), z30.VnB());
2154 __ sqabs(z29.VnB(), p1.Merging(), z18.VnB());
2156 __ movprfx(z28.VnB(), p0.Merging(), z29.VnB());
2157 __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB());
2159 __ movprfx(z20, z21);
2160 __ sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90);
2162 __ movprfx(z6, z7);
2163 __ sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS());
2165 __ movprfx(z6, z7);
2166 __ sqdmlalb(z6.VnD(), z19.VnS(), z2.VnS(), 0);
2168 __ movprfx(z6, z7);
2169 __ sqdmlalb(z6.VnS(), z19.VnH(), z2.VnH(), 0);
2171 __ movprfx(z23, z24);
2172 __ sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS());
2174 __ movprfx(z11, z12);
2175 __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS());
2177 __ movprfx(z11, z12);
2178 __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS(), 0);
2180 __ movprfx(z11, z12);
2181 __ sqdmlalt(z11.VnS(), z0.VnH(), z0.VnH(), 0);
2183 __ movprfx(z16, z17);
2184 __ sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS());
2186 __ movprfx(z16, z17);
2187 __ sqdmlslb(z16.VnD(), z26.VnS(), z2.VnS(), 0);
2189 __ movprfx(z16, z17);
2190 __ sqdmlslb(z16.VnS(), z26.VnH(), z2.VnH(), 0);
2192 __ movprfx(z26, z27);
2193 __ sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS());
2195 __ movprfx(z21, z22);
2196 __ sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS());
2198 __ movprfx(z21, z22);
2199 __ sqdmlslt(z21.VnD(), z23.VnS(), z0.VnS(), 0);
2201 __ movprfx(z21, z22);
2202 __ sqdmlslt(z21.VnS(), z23.VnH(), z0.VnH(), 0);
2204 __ movprfx(z21.VnB(), p0.Merging(), z22.VnB());
2205 __ sqneg(z21.VnB(), p0.Merging(), z17.VnB());
2207 __ movprfx(z31, z0);
2208 __ sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0);
2210 __ movprfx(z31, z0);
2211 __ sqrdcmlah(z31.VnH(), z15.VnH(), z2.VnH(), 0, 0);
2213 __ movprfx(z31, z0);
2214 __ sqrdcmlah(z31.VnS(), z15.VnS(), z2.VnS(), 0, 0);
2216 __ movprfx(z27, z28);
2217 __ sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB());
2219 __ movprfx(z27, z28);
2220 __ sqrdmlah(z27.VnH(), z28.VnH(), z1.VnH(), 0);
2222 __ movprfx(z27, z28);
2223 __ sqrdmlah(z27.VnS(), z28.VnS(), z1.VnS(), 0);
2225 __ movprfx(z27, z28);
2226 __ sqrdmlah(z27.VnD(), z28.VnD(), z1.VnD(), 0);
2228 __ movprfx(z11, z12);
2229 __ sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB());
2231 __ movprfx(z11, z12);
2232 __ sqrdmlsh(z11.VnH(), z16.VnH(), z1.VnH(), 0);
2234 __ movprfx(z11, z12);
2235 __ sqrdmlsh(z11.VnS(), z16.VnS(), z1.VnS(), 0);
2237 __ movprfx(z11, z12);
2238 __ sqrdmlsh(z11.VnD(), z16.VnD(), z1.VnD(), 0);
2240 __ movprfx(z31.VnB(), p5.Merging(), z0.VnB());
2241 __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB());
2243 __ movprfx(z25.VnB(), p6.Merging(), z26.VnB());
2244 __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB());
2246 __ movprfx(z0.VnB(), p5.Merging(), z1.VnB());
2247 __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0);
2249 __ movprfx(z0.VnB(), p5.Merging(), z1.VnB());
2250 __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z2.VnB());
2252 __ movprfx(z7.VnB(), p3.Merging(), z8.VnB());
2253 __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB());
2255 __ movprfx(z10.VnB(), p1.Merging(), z11.VnB());
2256 __ sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0);
2258 __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
2259 __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB());
2261 __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
2262 __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB());
2264 __ movprfx(z23.VnB(), p4.Merging(), z24.VnB());
2265 __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB());
2267 __ movprfx(z31.VnB(), p7.Merging(), z0.VnB());
2268 __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB());
2270 __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
2271 __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB());
2273 __ movprfx(z12.VnB(), p0.Merging(), z13.VnB());
2274 __ srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1);
2276 __ movprfx(z0, z1);
2277 __ srsra(z0.VnB(), z8.VnB(), 1);
2279 __ movprfx(z0, z1);
2280 __ ssra(z0.VnB(), z8.VnB(), 1);
2282 __ movprfx(z26.VnB(), p2.Merging(), z27.VnB());
2283 __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB());
2285 __ movprfx(z23, z24);
2286 __ uaba(z23.VnB(), z22.VnB(), z20.VnB());
2288 __ movprfx(z11, z12);
2289 __ uabalb(z11.VnD(), z25.VnS(), z12.VnS());
2291 __ movprfx(z4, z5);
2292 __ uabalt(z4.VnD(), z2.VnS(), z31.VnS());
2294 __ movprfx(z20.VnD(), p4.Merging(), z21.VnD());
2295 __ uadalp(z20.VnD(), p4.Merging(), z5.VnS());
2297 __ movprfx(z21.VnB(), p2.Merging(), z22.VnB());
2298 __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB());
2300 __ movprfx(z1.VnB(), p4.Merging(), z2.VnB());
2301 __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB());
2303 __ movprfx(z18.VnB(), p0.Merging(), z19.VnB());
2304 __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB());
2306 __ movprfx(z7, z8);
2307 __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB());
2309 __ movprfx(z10, z11);
2310 __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB());
2312 __ movprfx(z31, z0);
2313 __ umlalb(z31.VnD(), z9.VnS(), z21.VnS());
2315 __ movprfx(z31, z0);
2316 __ umlalb(z31.VnD(), z9.VnS(), z1.VnS(), 0);
2318 __ movprfx(z31, z0);
2319 __ umlalb(z31.VnS(), z9.VnH(), z1.VnH(), 0);
2321 __ movprfx(z11, z12);
2322 __ umlalt(z11.VnD(), z5.VnS(), z22.VnS());
2324 __ movprfx(z11, z12);
2325 __ umlalt(z11.VnD(), z5.VnS(), z2.VnS(), 0);
2327 __ movprfx(z11, z12);
2328 __ umlalt(z11.VnS(), z5.VnH(), z2.VnH(), 0);
2330 __ movprfx(z28, z29);
2331 __ umlslb(z28.VnD(), z13.VnS(), z9.VnS());
2333 __ movprfx(z28, z29);
2334 __ umlslb(z28.VnD(), z13.VnS(), z1.VnS(), 0);
2336 __ movprfx(z28, z29);
2337 __ umlslb(z28.VnS(), z13.VnH(), z1.VnH(), 0);
2339 __ movprfx(z9, z10);
2340 __ umlslt(z9.VnD(), z12.VnS(), z30.VnS());
2342 __ movprfx(z9, z10);
2343 __ umlslt(z9.VnD(), z12.VnS(), z0.VnS(), 0);
2345 __ movprfx(z9, z10);
2346 __ umlslt(z9.VnS(), z12.VnH(), z0.VnH(), 0);
2348 __ movprfx(z24.VnB(), p7.Merging(), z25.VnB());
2349 __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()),
2351 __ movprfx(z20.VnB(), p1.Merging(), z21.VnB());
2352 __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB());
2354 __ movprfx(z8.VnB(), p5.Merging(), z9.VnB());
2355 __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB());
2357 __ movprfx(z29.VnB(), p7.Merging(), z30.VnB());
2358 __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0);
2360 __ movprfx(z29.VnB(), p7.Merging(), z30.VnB());
2361 __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z30.VnB());
2363 __ movprfx(z12.VnB(), p1.Merging(), z13.VnB());
2364 __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z13.VnB());
2366 __ movprfx(z20.VnB(), p0.Merging(), z21.VnB());
2367 __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB());
2369 __ movprfx(z20.VnB(), p0.Merging(), z21.VnB());
2370 __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB());
2372 __ movprfx(z25.VnS(), p7.Merging(), z26.VnS());
2373 __ urecpe(z25.VnS(), p7.Merging(), z2.VnS());
2375 __ movprfx(z29.VnB(), p4.Merging(), z30.VnB());
2376 __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB());
2378 __ movprfx(z15.VnB(), p2.Merging(), z16.VnB());
2379 __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB());
2381 __ movprfx(z27.VnB(), p1.Merging(), z28.VnB());
2382 __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB());
2384 __ movprfx(z31.VnB(), p2.Merging(), z0.VnB());
2385 __ urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1);
2387 __ movprfx(z4.VnS(), p3.Merging(), z5.VnS());
2388 __ ursqrte(z4.VnS(), p3.Merging(), z3.VnS());
2390 __ movprfx(z0, z1);
2391 __ ursra(z0.VnB(), z8.VnB(), 1);
2393 __ movprfx(z25.VnB(), p4.Merging(), z26.VnB());
2394 __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB());
2396 __ movprfx(z0, z1);
2397 __ usra(z0.VnB(), z8.VnB(), 1);
2399 __ movprfx(z16, z17);
2400 __ xar(z16.VnB(), z16.VnB(), z13.VnB(), 1);
2418 __ movprfx(z29, z30);
2419 __ addhnb(z29.VnS(), z19.VnD(), z2.VnD());
2421 __ movprfx(z8, z9);
2422 __ addhnt(z8.VnS(), z12.VnD(), z6.VnD());
2424 __ movprfx(z18, z19);
2425 __ bdep(z18.VnB(), z10.VnB(), z0.VnB());
2427 __ movprfx(z6, z7);
2428 __ bext(z6.VnB(), z2.VnB(), z5.VnB());
2430 __ movprfx(z24, z25);
2431 __ bgrp(z24.VnB(), z9.VnB(), z5.VnB());
2433 __ movprfx(z1, z2);
2434 __ fcvtlt(z1.VnD(), p1.Merging(), z28.VnS());
2436 __ movprfx(z1, z2);
2437 __ fcvtlt(z1.VnS(), p1.Merging(), z28.VnH());
2439 __ movprfx(z4, z5);
2440 __ fcvtnt(z4.VnH(), p7.Merging(), z0.VnS());
2442 __ movprfx(z4, z5);
2443 __ fcvtnt(z4.VnS(), p7.Merging(), z0.VnD());
2445 __ movprfx(z27, z28);
2446 __ fcvtxnt(z27.VnS(), p0.Merging(), z17.VnD());
2448 __ movprfx(z24, z25);
2449 __ histcnt(z24.VnS(), p6.Zeroing(), z3.VnS(), z10.VnS());
2451 __ movprfx(z22, z23);
2452 __ histseg(z22.VnB(), z14.VnB(), z8.VnB());
2454 __ movprfx(z21, z22);
2455 __ ldnt1b(z21.VnS(), p5.Zeroing(), SVEMemOperand(z21.VnS(), x23));
2457 __ movprfx(z21, z22);
2458 __ ldnt1b(z21.VnD(), p5.Zeroing(), SVEMemOperand(z1.VnD(), x23));
2460 __ movprfx(z10, z11);
2461 __ ldnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(z23.VnD(), x6));
2463 __ movprfx(z30, z31);
2464 __ ldnt1h(z30.VnS(), p4.Zeroing(), SVEMemOperand(z6.VnS(), x11));
2466 __ movprfx(z30, z31);
2467 __ ldnt1h(z30.VnD(), p4.Zeroing(), SVEMemOperand(z6.VnD(), x11));
2469 __ movprfx(z7, z8);
2470 __ ldnt1sb(z7.VnS(), p3.Zeroing(), SVEMemOperand(z18.VnS(), x11));
2472 __ movprfx(z7, z8);
2473 __ ldnt1sb(z7.VnD(), p3.Zeroing(), SVEMemOperand(z18.VnD(), x11));
2475 __ movprfx(z17, z18);
2476 __ ldnt1sh(z17.VnS(), p5.Zeroing(), SVEMemOperand(z31.VnS(), x19));
2478 __ movprfx(z17, z18);
2479 __ ldnt1sh(z17.VnD(), p5.Zeroing(), SVEMemOperand(z31.VnD(), x19));
2481 __ movprfx(z3, z4);
2482 __ ldnt1sw(z3.VnD(), p7.Zeroing(), SVEMemOperand(z1.VnD(), x10));
2484 __ movprfx(z0, z1);
2485 __ ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(z11.VnS(), x1));
2487 __ movprfx(z0, z1);
2488 __ ldnt1w(z0.VnD(), p4.Zeroing(), SVEMemOperand(z11.VnD(), x1));
2490 __ movprfx(z18, z19);
2491 __ match(p15.VnB(), p1.Zeroing(), z18.VnB(), z5.VnB());
2493 __ movprfx(z15, z16);
2494 __ mul(z15.VnB(), z15.VnB(), z15.VnB());
2496 __ movprfx(z15, z16);
2497 __ mul(z15.VnH(), z15.VnH(), z1.VnH(), 0);
2499 __ movprfx(z15, z16);
2500 __ mul(z15.VnS(), z15.VnS(), z1.VnS(), 0);
2502 __ movprfx(z15, z16);
2503 __ mul(z15.VnD(), z15.VnD(), z1.VnD(), 0);
2505 __ movprfx(z20, z21);
2506 __ nmatch(p1.VnB(), p1.Zeroing(), z20.VnB(), z17.VnB());
2508 __ movprfx(z0, z1);
2509 __ pmul(z0.VnB(), z5.VnB(), z5.VnB());
2511 __ movprfx(z12, z13);
2512 __ pmullb(z12.VnD(), z21.VnS(), z12.VnS());
2514 __ movprfx(z31, z0);
2515 __ pmullt(z31.VnD(), z30.VnS(), z26.VnS());
2517 __ movprfx(z0, z1);
2518 __ raddhnb(z0.VnS(), z11.VnD(), z10.VnD());
2520 __ movprfx(z23, z24);
2521 __ raddhnt(z23.VnS(), z27.VnD(), z9.VnD());
2523 __ movprfx(z5, z6);
2524 __ rshrnb(z5.VnB(), z1.VnH(), 1);
2526 __ movprfx(z5, z6);
2527 __ rshrnt(z5.VnB(), z1.VnH(), 8);
2529 __ movprfx(z30, z31);
2530 __ rsubhnb(z30.VnS(), z29.VnD(), z11.VnD());
2532 __ movprfx(z25, z26);
2533 __ rsubhnt(z25.VnS(), z7.VnD(), z18.VnD());
2535 __ movprfx(z2, z3);
2536 __ sabdlb(z2.VnD(), z21.VnS(), z3.VnS());
2538 __ movprfx(z25, z26);
2539 __ sabdlt(z25.VnD(), z23.VnS(), z17.VnS());
2541 __ movprfx(z24, z25);
2542 __ saddlb(z24.VnD(), z30.VnS(), z16.VnS());
2544 __ movprfx(z15, z16);
2545 __ saddlbt(z15.VnD(), z6.VnS(), z18.VnS());
2547 __ movprfx(z21, z22);
2548 __ saddlt(z21.VnD(), z29.VnS(), z31.VnS());
2550 __ movprfx(z12, z13);
2551 __ saddwb(z12.VnD(), z8.VnD(), z8.VnS());
2553 __ movprfx(z24, z25);
2554 __ saddwt(z24.VnD(), z0.VnD(), z3.VnS());
2556 __ movprfx(z7, z8);
2557 __ shrnb(z7.VnB(), z4.VnH(), 1);
2559 __ movprfx(z21, z22);
2560 __ shrnt(z21.VnB(), z29.VnH(), 1);
2562 __ movprfx(z29, z30);
2563 __ sli(z29.VnB(), z7.VnB(), 0);
2565 __ movprfx(z23, z24);
2566 __ smulh(z23.VnB(), z23.VnB(), z3.VnB());
2568 __ movprfx(z10, z11);
2569 __ smullb(z10.VnD(), z4.VnS(), z4.VnS());
2571 __ movprfx(z10, z11);
2572 __ smullb(z10.VnS(), z4.VnH(), z4.VnH(), 0);
2574 __ movprfx(z10, z11);
2575 __ smullb(z10.VnD(), z4.VnS(), z4.VnS(), 0);
2577 __ movprfx(z31, z0);
2578 __ smullt(z31.VnD(), z26.VnS(), z5.VnS());
2580 __ movprfx(z31, z0);
2581 __ smullt(z31.VnS(), z26.VnH(), z5.VnH(), 0);
2583 __ movprfx(z31, z0);
2584 __ smullt(z31.VnD(), z26.VnS(), z5.VnS(), 0);
2586 __ movprfx(z4, z5);
2587 __ splice_con(z4.VnB(), p7.Merging(), z0.VnB(), z1.VnB());
2589 __ movprfx(z18, z19);
2590 __ sqdmulh(z18.VnB(), z25.VnB(), z1.VnB());
2592 __ movprfx(z18, z19);
2593 __ sqdmulh(z18.VnH(), z25.VnH(), z1.VnH(), 0);
2595 __ movprfx(z18, z19);
2596 __ sqdmulh(z18.VnS(), z25.VnS(), z1.VnS(), 0);
2598 __ movprfx(z18, z19);
2599 __ sqdmulh(z18.VnD(), z25.VnD(), z1.VnD(), 0);
2601 __ movprfx(z1, z2);
2602 __ sqdmullb(z1.VnD(), z31.VnS(), z21.VnS());
2604 __ movprfx(z1, z2);
2605 __ sqdmullb(z1.VnS(), z31.VnH(), z1.VnH(), 0);
2607 __ movprfx(z1, z2);
2608 __ sqdmullb(z1.VnD(), z31.VnS(), z1.VnS(), 0);
2610 __ movprfx(z2, z3);
2611 __ sqdmullt(z2.VnD(), z1.VnS(), z5.VnS());
2613 __ movprfx(z2, z3);
2614 __ sqdmullt(z2.VnS(), z1.VnH(), z5.VnH(), 0);
2616 __ movprfx(z2, z3);
2617 __ sqdmullt(z2.VnD(), z1.VnS(), z5.VnS(), 0);
2619 __ movprfx(z21, z22);
2620 __ sqrdmulh(z21.VnB(), z21.VnB(), z27.VnB());
2622 __ movprfx(z21, z22);
2623 __ sqrdmulh(z21.VnH(), z21.VnH(), z2.VnH(), 0);
2625 __ movprfx(z21, z22);
2626 __ sqrdmulh(z21.VnS(), z21.VnS(), z2.VnS(), 0);
2628 __ movprfx(z21, z22);
2629 __ sqrdmulh(z21.VnD(), z21.VnD(), z2.VnD(), 0);
2631 __ movprfx(z1, z2);
2632 __ sqrshrnb(z1.VnB(), z1.VnH(), 1);
2634 __ movprfx(z24, z25);
2635 __ sqrshrnt(z24.VnB(), z19.VnH(), 8);
2637 __ movprfx(z23, z24);
2638 __ sqrshrunb(z23.VnB(), z28.VnH(), 1);
2640 __ movprfx(z9, z10);
2641 __ sqrshrunt(z9.VnB(), z15.VnH(), 8);
2643 __ movprfx(z25, z26);
2644 __ sqshrnb(z25.VnB(), z1.VnH(), 1);
2646 __ movprfx(z0, z1);
2647 __ sqshrnt(z0.VnB(), z25.VnH(), 8);
2649 __ movprfx(z25, z26);
2650 __ sqshrunb(z25.VnB(), z10.VnH(), 1);
2652 __ movprfx(z20, z21);
2653 __ sqshrunt(z20.VnB(), z3.VnH(), 8);
2655 __ movprfx(z2, z3);
2656 __ sqxtnb(z2.VnB(), z0.VnH());
2658 __ movprfx(z31, z0);
2659 __ sqxtnt(z31.VnB(), z18.VnH());
2661 __ movprfx(z28, z29);
2662 __ sqxtunb(z28.VnB(), z6.VnH());
2664 __ movprfx(z14, z15);
2665 __ sqxtunt(z14.VnB(), z31.VnH());
2667 __ movprfx(z6, z7);
2668 __ sri(z6.VnB(), z9.VnB(), 1);
2670 __ movprfx(z2, z3);
2671 __ sshllb(z2.VnH(), z20.VnB(), 0);
2673 __ movprfx(z27, z28);
2674 __ sshllt(z27.VnH(), z8.VnB(), 0);
2676 __ movprfx(z4, z5);
2677 __ ssublb(z4.VnD(), z23.VnS(), z7.VnS());
2679 __ movprfx(z6, z7);
2680 __ ssublbt(z6.VnD(), z28.VnS(), z12.VnS());
2682 __ movprfx(z12, z13);
2683 __ ssublt(z12.VnD(), z13.VnS(), z6.VnS());
2685 __ movprfx(z11, z12);
2686 __ ssubltb(z11.VnD(), z18.VnS(), z19.VnS());
2688 __ movprfx(z7, z8);
2689 __ ssubwb(z7.VnD(), z28.VnD(), z11.VnS());
2691 __ movprfx(z29, z30);
2692 __ ssubwt(z29.VnD(), z25.VnD(), z20.VnS());
2694 __ movprfx(z21, z22);
2695 __ stnt1b(z21.VnS(), p5.Zeroing(), SVEMemOperand(z1.VnS(), x23));
2697 __ movprfx(z21, z22);
2698 __ stnt1b(z21.VnD(), p5.Zeroing(), SVEMemOperand(z1.VnD(), x23));
2700 __ movprfx(z10, z11);
2701 __ stnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(z1.VnD(), x23));
2703 __ movprfx(z30, z31);
2704 __ stnt1h(z30.VnS(), p4.Zeroing(), SVEMemOperand(z6.VnS(), x6));
2706 __ movprfx(z30, z31);
2707 __ stnt1h(z30.VnD(), p4.Zeroing(), SVEMemOperand(z6.VnD(), x6));
2709 __ movprfx(z0, z1);
2710 __ stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(z11.VnS(), x1));
2712 __ movprfx(z0, z1);
2713 __ stnt1w(z0.VnD(), p4.Zeroing(), SVEMemOperand(z11.VnD(), x1));
2715 __ movprfx(z31, z0);
2716 __ subhnb(z31.VnS(), z31.VnD(), z7.VnD());
2718 __ movprfx(z31, z0);
2719 __ subhnt(z31.VnS(), z22.VnD(), z27.VnD());
2721 __ movprfx(z24, z25);
2722 __ tbl(z24.VnB(), z29.VnB(), z30.VnB(), z0.VnB());
2724 __ movprfx(z22, z23);
2725 __ tbx(z22.VnB(), z15.VnB(), z19.VnB());
2727 __ movprfx(z1, z2);
2728 __ uabdlb(z1.VnD(), z26.VnS(), z12.VnS());
2730 __ movprfx(z25, z26);
2731 __ uabdlt(z25.VnD(), z29.VnS(), z14.VnS());
2733 __ movprfx(z3, z4);
2734 __ uaddlb(z3.VnD(), z5.VnS(), z2.VnS());
2736 __ movprfx(z15, z16);
2737 __ uaddlt(z15.VnD(), z28.VnS(), z20.VnS());
2739 __ movprfx(z31, z0);
2740 __ uaddwb(z31.VnD(), z8.VnD(), z25.VnS());
2742 __ movprfx(z17, z18);
2743 __ uaddwt(z17.VnD(), z15.VnD(), z2.VnS());
2745 __ movprfx(z12, z13);
2746 __ umulh(z12.VnB(), z12.VnB(), z17.VnB());
2748 __ movprfx(z12, z13);
2749 __ umullb(z12.VnD(), z5.VnS(), z2.VnS());
2751 __ movprfx(z12, z13);
2752 __ umullb(z12.VnS(), z5.VnH(), z2.VnH(), 0);
2754 __ movprfx(z12, z13);
2755 __ umullb(z12.VnD(), z5.VnS(), z2.VnS(), 0);
2757 __ movprfx(z24, z25);
2758 __ umullt(z24.VnD(), z6.VnS(), z6.VnS());
2760 __ movprfx(z24, z25);
2761 __ umullt(z24.VnS(), z6.VnH(), z1.VnH(), 0);
2763 __ movprfx(z24, z25);
2764 __ umullt(z24.VnD(), z6.VnS(), z1.VnS(), 0);
2766 __ movprfx(z30, z31);
2767 __ uqrshrnb(z30.VnB(), z25.VnH(), 1);
2769 __ movprfx(z3, z4);
2770 __ uqrshrnt(z3.VnB(), z25.VnH(), 8);
2772 __ movprfx(z17, z18);
2773 __ uqshrnb(z17.VnB(), z4.VnH(), 1);
2775 __ movprfx(z28, z29);
2776 __ uqshrnt(z28.VnB(), z18.VnH(), 8);
2778 __ movprfx(z28, z29);
2779 __ uqxtnb(z28.VnB(), z4.VnH());
2781 __ movprfx(z19, z20);
2782 __ uqxtnt(z19.VnB(), z7.VnH());
2784 __ movprfx(z8, z9);
2785 __ ushllb(z8.VnH(), z31.VnB(), 0);
2787 __ movprfx(z3, z4);
2788 __ ushllt(z3.VnH(), z21.VnB(), 0);
2790 __ movprfx(z25, z26);
2791 __ usublb(z25.VnD(), z9.VnS(), z17.VnS());
2793 __ movprfx(z5, z6);
2794 __ usublt(z5.VnD(), z11.VnS(), z15.VnS());
2796 __ movprfx(z10, z11);
2797 __ usubwb(z10.VnD(), z13.VnD(), z20.VnS());
2799 __ movprfx(z15, z16);
2800 __ usubwt(z15.VnD(), z8.VnD(), z23.VnS());
2802 __ movprfx(z20, z21);
2803 __ whilege(p0.VnB(), w20, w29);
2805 __ movprfx(z24, z25);
2806 __ whilegt(p11.VnB(), w24, w3);
2808 __ movprfx(z20, z21);
2809 __ whilehi(p2.VnB(), x20, x8);
2811 __ movprfx(z22, z23);
2812 __ whilehs(p4.VnB(), w22, w9);
2814 __ movprfx(z25, z26);
2815 __ whilerw(p7.VnB(), x25, x27);
2817 __ movprfx(z14, z15);
2818 __ whilewr(p8.VnB(), x14, x14);
2834 __ movprfx(z25.VnS(), p0.Zeroing(), z26.VnS());
2835 __ adclb(z25.VnS(), z17.VnS(), z24.VnS());
2837 __ movprfx(z0.VnS(), p0.Zeroing(), z1.VnS());
2838 __ adclt(z0.VnS(), z2.VnS(), z15.VnS());
2840 __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD());
2841 __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD());
2843 __ movprfx(z18.VnD(), p0.Zeroing(), z19.VnD());
2844 __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD());
2846 __ movprfx(z7.VnD(), p0.Zeroing(), z8.VnD());
2847 __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD());
2849 __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD());
2850 __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD());
2852 __ movprfx(z5.VnB(), p0.Zeroing(), z6.VnB());
2853 __ cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90);
2855 __ movprfx(z7.VnS(), p0.Zeroing(), z8.VnS());
2856 __ cdot(z7.VnS(), z4.VnB(), z10.VnB(), 0);
2858 __ movprfx(z7.VnS(), p0.Zeroing(), z8.VnS());
2859 __ cdot(z7.VnS(), z4.VnB(), z0.VnB(), 0, 0);
2861 __ movprfx(z7.VnD(), p0.Zeroing(), z8.VnD());
2862 __ cdot(z7.VnD(), z4.VnH(), z0.VnH(), 0, 0);
2864 __ movprfx(z19.VnB(), p0.Zeroing(), z20.VnB());
2865 __ cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0);
2867 __ movprfx(z19.VnS(), p0.Zeroing(), z20.VnS());
2868 __ cmla(z19.VnS(), z7.VnS(), z2.VnS(), 0, 0);
2870 __ movprfx(z19.VnH(), p0.Zeroing(), z20.VnH());
2871 __ cmla(z19.VnH(), z7.VnH(), z2.VnH(), 0, 0);
2873 __ movprfx(z10.VnD(), p0.Zeroing(), z11.VnD());
2874 __ eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD());
2876 __ movprfx(z3.VnB(), p0.Zeroing(), z4.VnB());
2877 __ eorbt(z3.VnB(), z10.VnB(), z8.VnB());
2879 __ movprfx(z20.VnB(), p0.Zeroing(), z22.VnB());
2880 __ eortb(z20.VnB(), z21.VnB(), z15.VnB());
2882 __ movprfx(z14.VnD(), p0.Zeroing(), z15.VnD());
2883 __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD());
2885 __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD());
2886 __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD());
2888 __ movprfx(z22.VnD(), p0.Zeroing(), z23.VnD());
2889 __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD());
2891 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2892 __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD());
2894 __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD());
2895 __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD());
2897 __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS());
2898 __ fmlalb(z16.VnS(), z18.VnH(), z29.VnH());
2900 __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS());
2901 __ fmlalb(z16.VnS(), z18.VnH(), z2.VnH(), 0);
2903 __ movprfx(z18.VnS(), p0.Zeroing(), z19.VnS());
2904 __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH());
2906 __ movprfx(z18.VnS(), p0.Zeroing(), z19.VnS());
2907 __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH(), 0);
2909 __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS());
2910 __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH());
2912 __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS());
2913 __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH(), 0);
2915 __ movprfx(z3.VnS(), p0.Zeroing(), z4.VnS());
2916 __ fmlslt(z3.VnS(), z17.VnH(), z14.VnH());
2918 __ movprfx(z3.VnS(), p0.Zeroing(), z4.VnS());
2919 __ fmlslt(z3.VnS(), z17.VnH(), z1.VnH(), 0);
2921 __ movprfx(z2.VnH(), p0.Zeroing(), z3.VnH());
2922 __ mla(z2.VnH(), z0.VnH(), z1.VnH(), 0);
2924 __ movprfx(z2.VnS(), p0.Zeroing(), z3.VnS());
2925 __ mla(z2.VnS(), z0.VnS(), z1.VnS(), 0);
2927 __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD());
2928 __ mla(z2.VnD(), z0.VnD(), z1.VnD(), 0);
2930 __ movprfx(z2.VnH(), p0.Zeroing(), z3.VnH());
2931 __ mls(z2.VnH(), z0.VnH(), z1.VnH(), 0);
2933 __ movprfx(z2.VnS(), p0.Zeroing(), z3.VnS());
2934 __ mls(z2.VnS(), z0.VnS(), z1.VnS(), 0);
2936 __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD());
2937 __ mls(z2.VnD(), z0.VnD(), z1.VnD(), 0);
2939 __ movprfx(z17.VnD(), p0.Zeroing(), z18.VnD());
2940 __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD());
2942 __ movprfx(z13.VnB(), p0.Zeroing(), z14.VnB());
2943 __ saba(z13.VnB(), z2.VnB(), z31.VnB());
2945 __ movprfx(z13.VnD(), p0.Zeroing(), z14.VnD());
2946 __ sabalb(z13.VnD(), z20.VnS(), z26.VnS());
2948 __ movprfx(z14.VnD(), p0.Zeroing(), z15.VnD());
2949 __ sabalt(z14.VnD(), z19.VnS(), z10.VnS());
2951 __ movprfx(z17.VnS(), p0.Zeroing(), z18.VnS());
2952 __ sbclb(z17.VnS(), z10.VnS(), z8.VnS());
2954 __ movprfx(z20.VnS(), p0.Zeroing(), z21.VnS());
2955 __ sbclt(z20.VnS(), z0.VnS(), z13.VnS());
2957 __ movprfx(z5.VnB(), p0.Zeroing(), z6.VnB());
2958 __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB());
2960 __ movprfx(z27.VnB(), p0.Zeroing(), z28.VnB());
2961 __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB());
2963 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2964 __ smlalb(z1.VnD(), z3.VnS(), z23.VnS());
2966 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2967 __ smlalb(z1.VnD(), z3.VnS(), z2.VnS(), 0);
2969 __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS());
2970 __ smlalb(z1.VnS(), z3.VnH(), z2.VnH(), 0);
2972 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2973 __ smlalt(z1.VnD(), z3.VnS(), z23.VnS());
2975 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2976 __ smlalt(z1.VnD(), z3.VnS(), z2.VnS(), 0);
2978 __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS());
2979 __ smlalt(z1.VnS(), z3.VnH(), z2.VnH(), 0);
2981 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2982 __ smlslb(z1.VnD(), z3.VnS(), z23.VnS());
2984 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2985 __ smlslb(z1.VnD(), z3.VnS(), z2.VnS(), 0);
2987 __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS());
2988 __ smlslb(z1.VnS(), z3.VnH(), z2.VnH(), 0);
2990 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2991 __ smlslt(z1.VnD(), z3.VnS(), z23.VnS());
2993 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
2994 __ smlslt(z1.VnD(), z3.VnS(), z2.VnS(), 0);
2996 __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS());
2997 __ smlslt(z1.VnS(), z3.VnH(), z2.VnH(), 0);
2999 __ movprfx(z20.VnB(), p0.Zeroing(), z21.VnB());
3000 __ sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90);
3002 __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD());
3003 __ sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS());
3005 __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD());
3006 __ sqdmlalb(z6.VnD(), z19.VnS(), z2.VnS(), 0);
3008 __ movprfx(z6.VnS(), p0.Zeroing(), z7.VnS());
3009 __ sqdmlalb(z6.VnS(), z19.VnH(), z2.VnH(), 0);
3011 __ movprfx(z23.VnD(), p0.Zeroing(), z24.VnD());
3012 __ sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS());
3014 __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
3015 __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS());
3017 __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
3018 __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS(), 0);
3020 __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS());
3021 __ sqdmlalt(z11.VnS(), z0.VnH(), z0.VnH(), 0);
3023 __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD());
3024 __ sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS());
3026 __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD());
3027 __ sqdmlslb(z16.VnD(), z26.VnS(), z2.VnS(), 0);
3029 __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS());
3030 __ sqdmlslb(z16.VnS(), z26.VnH(), z2.VnH(), 0);
3032 __ movprfx(z26.VnD(), p0.Zeroing(), z27.VnD());
3033 __ sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS());
3035 __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD());
3036 __ sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS());
3038 __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD());
3039 __ sqdmlslt(z21.VnD(), z23.VnS(), z0.VnS(), 0);
3041 __ movprfx(z21.VnS(), p0.Zeroing(), z22.VnS());
3042 __ sqdmlslt(z21.VnS(), z23.VnH(), z0.VnH(), 0);
3044 __ movprfx(z31.VnB(), p0.Zeroing(), z0.VnB());
3045 __ sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0);
3047 __ movprfx(z31.VnH(), p0.Zeroing(), z0.VnH());
3048 __ sqrdcmlah(z31.VnH(), z15.VnH(), z2.VnH(), 0, 0);
3050 __ movprfx(z31.VnS(), p0.Zeroing(), z0.VnS());
3051 __ sqrdcmlah(z31.VnS(), z15.VnS(), z2.VnS(), 0, 0);
3053 __ movprfx(z27.VnB(), p0.Zeroing(), z28.VnB());
3054 __ sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB());
3056 __ movprfx(z27.VnH(), p0.Zeroing(), z28.VnH());
3057 __ sqrdmlah(z27.VnH(), z28.VnH(), z1.VnH(), 0);
3059 __ movprfx(z27.VnS(), p0.Zeroing(), z28.VnS());
3060 __ sqrdmlah(z27.VnS(), z28.VnS(), z1.VnS(), 0);
3062 __ movprfx(z27.VnD(), p0.Zeroing(), z28.VnD());
3063 __ sqrdmlah(z27.VnD(), z28.VnD(), z1.VnD(), 0);
3065 __ movprfx(z11.VnB(), p0.Zeroing(), z12.VnB());
3066 __ sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB());
3068 __ movprfx(z11.VnH(), p0.Zeroing(), z12.VnH());
3069 __ sqrdmlsh(z11.VnH(), z16.VnH(), z1.VnH(), 0);
3071 __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS());
3072 __ sqrdmlsh(z11.VnS(), z16.VnS(), z1.VnS(), 0);
3074 __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
3075 __ sqrdmlsh(z11.VnD(), z16.VnD(), z1.VnD(), 0);
3077 __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB());
3078 __ srsra(z0.VnB(), z8.VnB(), 1);
3080 __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB());
3081 __ ssra(z0.VnB(), z8.VnB(), 1);
3083 __ movprfx(z23.VnB(), p0.Zeroing(), z24.VnB());
3084 __ uaba(z23.VnB(), z22.VnB(), z20.VnB());
3086 __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
3087 __ uabalb(z11.VnD(), z25.VnS(), z12.VnS());
3089 __ movprfx(z4.VnD(), p0.Zeroing(), z5.VnD());
3090 __ uabalt(z4.VnD(), z2.VnS(), z31.VnS());
3092 __ movprfx(z7.VnB(), p0.Zeroing(), z8.VnB());
3093 __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB());
3095 __ movprfx(z10.VnB(), p0.Zeroing(), z11.VnB());
3096 __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB());
3098 __ movprfx(z31.VnD(), p0.Zeroing(), z0.VnD());
3099 __ umlalb(z31.VnD(), z9.VnS(), z21.VnS());
3101 __ movprfx(z31.VnD(), p0.Zeroing(), z0.VnD());
3102 __ umlalb(z31.VnD(), z9.VnS(), z1.VnS(), 0);
3104 __ movprfx(z31.VnS(), p0.Zeroing(), z0.VnS());
3105 __ umlalb(z31.VnS(), z9.VnH(), z1.VnH(), 0);
3107 __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
3108 __ umlalt(z11.VnD(), z5.VnS(), z22.VnS());
3110 __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
3111 __ umlalt(z11.VnD(), z5.VnS(), z2.VnS(), 0);
3113 __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS());
3114 __ umlalt(z11.VnS(), z5.VnH(), z2.VnH(), 0);
3116 __ movprfx(z28.VnD(), p0.Zeroing(), z29.VnD());
3117 __ umlslb(z28.VnD(), z13.VnS(), z9.VnS());
3119 __ movprfx(z28.VnD(), p0.Zeroing(), z29.VnD());
3120 __ umlslb(z28.VnD(), z13.VnS(), z1.VnS(), 0);
3122 __ movprfx(z28.VnS(), p0.Zeroing(), z29.VnS());
3123 __ umlslb(z28.VnS(), z13.VnH(), z1.VnH(), 0);
3125 __ movprfx(z9.VnD(), p0.Zeroing(), z10.VnD());
3126 __ umlslt(z9.VnD(), z12.VnS(), z30.VnS());
3128 __ movprfx(z9.VnD(), p0.Zeroing(), z10.VnD());
3129 __ umlslt(z9.VnD(), z12.VnS(), z0.VnS(), 0);
3131 __ movprfx(z9.VnS(), p0.Zeroing(), z10.VnS());
3132 __ umlslt(z9.VnS(), z12.VnH(), z0.VnH(), 0);
3134 __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB());
3135 __ ursra(z0.VnB(), z8.VnB(), 1);
3137 __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB());
3138 __ usra(z0.VnB(), z8.VnB(), 1);
3140 __ movprfx(z16.VnB(), p0.Zeroing(), z17.VnB());
3141 __ xar(z16.VnB(), z16.VnB(), z13.VnB(), 1);
3157 __ movprfx(z25, z26);
3158 __ adclb(z25.VnS(), z17.VnS(), z25.VnS());
3160 __ movprfx(z0, z1);
3161 __ adclt(z0.VnS(), z2.VnS(), z0.VnS());
3163 __ movprfx(z3, z4);
3164 __ addp(z3.VnB(), p1.Merging(), z3.VnB(), z3.VnB());
3166 __ movprfx(z6, z7);
3167 __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z6.VnD());
3169 __ movprfx(z18, z19);
3170 __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z18.VnD());
3172 __ movprfx(z7, z8);
3173 __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z7.VnD());
3175 __ movprfx(z21, z22);
3176 __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z21.VnD());
3178 __ movprfx(z5, z6);
3179 __ cadd(z5.VnB(), z5.VnB(), z5.VnB(), 90);
3181 __ movprfx(z7, z8);
3182 __ cdot(z7.VnS(), z4.VnB(), z7.VnB(), 0);
3184 __ movprfx(z7, z8);
3185 __ cdot(z7.VnS(), z4.VnB(), z7.VnB(), 0, 0);
3187 __ movprfx(z7, z8);
3188 __ cdot(z7.VnD(), z7.VnH(), z0.VnH(), 0, 0);
3190 __ movprfx(z19, z20);
3191 __ cmla(z19.VnB(), z19.VnB(), z2.VnB(), 0);
3193 __ movprfx(z19, z20);
3194 __ cmla(z19.VnS(), z19.VnS(), z2.VnS(), 0, 0);
3196 __ movprfx(z1, z20);
3197 __ cmla(z1.VnH(), z7.VnH(), z1.VnH(), 0, 0);
3199 __ movprfx(z10, z11);
3200 __ eor3(z10.VnD(), z10.VnD(), z10.VnD(), z23.VnD());
3202 __ movprfx(z3, z4);
3203 __ eorbt(z3.VnB(), z10.VnB(), z3.VnB());
3205 __ movprfx(z20, z22);
3206 __ eortb(z20.VnB(), z21.VnB(), z20.VnB());
3208 __ movprfx(z14, z15);
3209 __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z14.VnD());
3211 __ movprfx(z14.VnD(), p4.Merging(), z15.VnD());
3212 __ fcvtx(z14.VnS(), p4.Merging(), z14.VnD());
3214 __ movprfx(z15.VnH(), p0.Merging(), z16.VnH());
3215 __ flogb(z15.VnH(), p0.Merging(), z15.VnH());
3217 __ movprfx(z2, z3);
3218 __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z2.VnD());
3220 __ movprfx(z22, z23);
3221 __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z22.VnD());
3223 __ movprfx(z1, z2);
3224 __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z1.VnD());
3226 __ movprfx(z16, z17);
3227 __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z16.VnD());
3229 __ movprfx(z16, z17);
3230 __ fmlalb(z16.VnS(), z18.VnH(), z16.VnH());
3232 __ movprfx(z16, z17);
3233 __ fmlalb(z16.VnS(), z16.VnH(), z2.VnH(), 0);
3235 __ movprfx(z18, z19);
3236 __ fmlalt(z18.VnS(), z13.VnH(), z18.VnH());
3238 __ movprfx(z18, z19);
3239 __ fmlalt(z18.VnS(), z18.VnH(), z5.VnH(), 0);
3241 __ movprfx(z16, z17);
3242 __ fmlslb(z16.VnS(), z16.VnH(), z1.VnH());
3244 __ movprfx(z16, z17);
3245 __ fmlslb(z16.VnS(), z16.VnH(), z1.VnH(), 0);
3247 __ movprfx(z3, z4);
3248 __ fmlslt(z3.VnS(), z17.VnH(), z3.VnH());
3250 __ movprfx(z3, z4);
3251 __ fmlslt(z3.VnS(), z17.VnH(), z3.VnH(), 0);
3253 __ movprfx(z2, z3);
3254 __ mla(z2.VnH(), z0.VnH(), z2.VnH(), 0);
3256 __ movprfx(z2, z3);
3257 __ mla(z2.VnS(), z0.VnS(), z2.VnS(), 0);
3259 __ movprfx(z2, z3);
3260 __ mla(z2.VnD(), z0.VnD(), z2.VnD(), 0);
3262 __ movprfx(z2, z3);
3263 __ mls(z2.VnH(), z0.VnH(), z2.VnH(), 0);
3265 __ movprfx(z2, z3);
3266 __ mls(z2.VnS(), z0.VnS(), z2.VnS(), 0);
3268 __ movprfx(z2, z3);
3269 __ mls(z2.VnD(), z0.VnD(), z2.VnD(), 0);
3271 __ movprfx(z17, z18);
3272 __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z17.VnD());
3274 __ movprfx(z13, z14);
3275 __ saba(z13.VnB(), z2.VnB(), z13.VnB());
3277 __ movprfx(z13, z14);
3278 __ sabalb(z13.VnD(), z13.VnS(), z26.VnS());
3280 __ movprfx(z14, z15);
3281 __ sabalt(z14.VnD(), z14.VnS(), z10.VnS());
3283 __ movprfx(z19.VnD(), p5.Merging(), z20.VnD());
3284 __ sadalp(z19.VnD(), p5.Merging(), z19.VnS());
3286 __ movprfx(z17, z18);
3287 __ sbclb(z17.VnS(), z17.VnS(), z8.VnS());
3289 __ movprfx(z20, z21);
3290 __ sbclt(z20.VnS(), z20.VnS(), z13.VnS());
3292 __ movprfx(z20.VnB(), p3.Merging(), z21.VnB());
3293 __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z20.VnB());
3295 __ movprfx(z21.VnB(), p0.Merging(), z22.VnB());
3296 __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z21.VnB());
3298 __ movprfx(z1.VnB(), p0.Merging(), z2.VnB());
3299 __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z1.VnB());
3301 __ movprfx(z5, z6);
3302 __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z5.VnB());
3304 __ movprfx(z27, z28);
3305 __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z27.VnB());
3307 __ movprfx(z1, z2);
3308 __ smlalb(z1.VnD(), z3.VnS(), z1.VnS());
3310 __ movprfx(z1, z2);
3311 __ smlalb(z1.VnD(), z3.VnS(), z1.VnS(), 0);
3313 __ movprfx(z1, z2);
3314 __ smlalb(z1.VnS(), z1.VnH(), z2.VnH(), 0);
3316 __ movprfx(z1, z2);
3317 __ smlalt(z1.VnD(), z1.VnS(), z23.VnS());
3319 __ movprfx(z1, z2);
3320 __ smlalt(z1.VnD(), z3.VnS(), z1.VnS(), 0);
3322 __ movprfx(z1, z2);
3323 __ smlalt(z1.VnS(), z1.VnH(), z2.VnH(), 0);
3325 __ movprfx(z1, z2);
3326 __ smlslb(z1.VnD(), z1.VnS(), z23.VnS());
3328 __ movprfx(z1, z2);
3329 __ smlslb(z1.VnD(), z3.VnS(), z1.VnS(), 0);
3331 __ movprfx(z1, z2);
3332 __ smlslb(z1.VnS(), z3.VnH(), z1.VnH(), 0);
3334 __ movprfx(z1, z2);
3335 __ smlslt(z1.VnD(), z1.VnS(), z23.VnS());
3337 __ movprfx(z1, z2);
3338 __ smlslt(z1.VnD(), z3.VnS(), z1.VnS(), 0);
3340 __ movprfx(z1, z2);
3341 __ smlslt(z1.VnS(), z1.VnH(), z2.VnH(), 0);
3343 __ movprfx(z29.VnB(), p1.Merging(), z30.VnB());
3344 __ sqabs(z29.VnB(), p1.Merging(), z29.VnB());
3346 __ movprfx(z28.VnB(), p0.Merging(), z29.VnB());
3347 __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB());
3349 __ movprfx(z20, z21);
3350 __ sqcadd(z20.VnB(), z20.VnB(), z20.VnB(), 90);
3352 __ movprfx(z6, z7);
3353 __ sqdmlalb(z6.VnD(), z6.VnS(), z25.VnS());
3355 __ movprfx(z6, z7);
3356 __ sqdmlalb(z6.VnD(), z6.VnS(), z2.VnS(), 0);
3358 __ movprfx(z6, z7);
3359 __ sqdmlalb(z6.VnS(), z6.VnH(), z2.VnH(), 0);
3361 __ movprfx(z23, z24);
3362 __ sqdmlalbt(z23.VnD(), z23.VnS(), z26.VnS());
3364 __ movprfx(z11, z12);
3365 __ sqdmlalt(z11.VnD(), z11.VnS(), z0.VnS());
3367 __ movprfx(z11, z12);
3368 __ sqdmlalt(z11.VnD(), z11.VnS(), z0.VnS(), 0);
3370 __ movprfx(z1, z12);
3371 __ sqdmlalt(z1.VnS(), z0.VnH(), z1.VnH(), 0);
3373 __ movprfx(z16, z17);
3374 __ sqdmlslb(z16.VnD(), z26.VnS(), z16.VnS());
3376 __ movprfx(z16, z17);
3377 __ sqdmlslb(z16.VnD(), z16.VnS(), z2.VnS(), 0);
3379 __ movprfx(z16, z17);
3380 __ sqdmlslb(z16.VnS(), z16.VnH(), z2.VnH(), 0);
3382 __ movprfx(z26, z27);
3383 __ sqdmlslbt(z26.VnD(), z26.VnS(), z4.VnS());
3385 __ movprfx(z21, z22);
3386 __ sqdmlslt(z21.VnD(), z23.VnS(), z21.VnS());
3388 __ movprfx(z21, z22);
3389 __ sqdmlslt(z21.VnD(), z21.VnS(), z0.VnS(), 0);
3391 __ movprfx(z1, z22);
3392 __ sqdmlslt(z21.VnS(), z23.VnH(), z1.VnH(), 0);
3394 __ movprfx(z21.VnB(), p0.Merging(), z22.VnB());
3395 __ sqneg(z21.VnB(), p0.Merging(), z21.VnB());
3397 __ movprfx(z31, z0);
3398 __ sqrdcmlah(z31.VnB(), z15.VnB(), z31.VnB(), 0);
3400 __ movprfx(z31, z0);
3401 __ sqrdcmlah(z31.VnH(), z31.VnH(), z2.VnH(), 0, 0);
3403 __ movprfx(z31, z0);
3404 __ sqrdcmlah(z31.VnS(), z31.VnS(), z2.VnS(), 0, 0);
3406 __ movprfx(z27, z28);
3407 __ sqrdmlah(z27.VnB(), z27.VnB(), z19.VnB());
3409 __ movprfx(z27, z28);
3410 __ sqrdmlah(z27.VnH(), z27.VnH(), z1.VnH(), 0);
3412 __ movprfx(z27, z28);
3413 __ sqrdmlah(z27.VnS(), z27.VnS(), z1.VnS(), 0);
3415 __ movprfx(z27, z28);
3416 __ sqrdmlah(z27.VnD(), z27.VnD(), z1.VnD(), 0);
3418 __ movprfx(z11, z12);
3419 __ sqrdmlsh(z11.VnB(), z16.VnB(), z11.VnB());
3421 __ movprfx(z11, z12);
3422 __ sqrdmlsh(z11.VnH(), z11.VnH(), z1.VnH(), 0);
3424 __ movprfx(z11, z12);
3425 __ sqrdmlsh(z11.VnS(), z11.VnS(), z1.VnS(), 0);
3427 __ movprfx(z11, z12);
3428 __ sqrdmlsh(z11.VnD(), z11.VnD(), z1.VnD(), 0);
3430 __ movprfx(z31.VnB(), p5.Merging(), z0.VnB());
3431 __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z31.VnB());
3433 __ movprfx(z25.VnB(), p6.Merging(), z26.VnB());
3434 __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z25.VnB());
3436 __ movprfx(z0.VnB(), p5.Merging(), z1.VnB());
3437 __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z0.VnB());
3439 __ movprfx(z7.VnB(), p3.Merging(), z8.VnB());
3440 __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z7.VnB());
3442 __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
3443 __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB());
3445 __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
3446 __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB());
3448 __ movprfx(z23.VnB(), p4.Merging(), z24.VnB());
3449 __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z23.VnB());
3451 __ movprfx(z31.VnB(), p7.Merging(), z0.VnB());
3452 __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z31.VnB());
3454 __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
3455 __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB());
3457 __ movprfx(z0, z1);
3458 __ srsra(z0.VnB(), z0.VnB(), 1);
3460 __ movprfx(z0, z1);
3461 __ ssra(z0.VnB(), z0.VnB(), 1);
3463 __ movprfx(z26.VnB(), p2.Merging(), z27.VnB());
3464 __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z26.VnB());
3466 __ movprfx(z23, z24);
3467 __ uaba(z23.VnB(), z22.VnB(), z23.VnB());
3469 __ movprfx(z11, z12);
3470 __ uabalb(z11.VnD(), z25.VnS(), z11.VnS());
3472 __ movprfx(z4, z5);
3473 __ uabalt(z4.VnD(), z4.VnS(), z31.VnS());
3475 __ movprfx(z20.VnD(), p4.Merging(), z21.VnD());
3476 __ uadalp(z20.VnD(), p4.Merging(), z20.VnS());
3478 __ movprfx(z21.VnB(), p2.Merging(), z22.VnB());
3479 __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z21.VnB());
3481 __ movprfx(z1.VnB(), p4.Merging(), z2.VnB());
3482 __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z1.VnB());
3484 __ movprfx(z18.VnB(), p0.Merging(), z19.VnB());
3485 __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z18.VnB());
3487 __ movprfx(z7, z8);
3488 __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z7.VnB());
3490 __ movprfx(z10, z11);
3491 __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z10.VnB());
3493 __ movprfx(z31, z0);
3494 __ umlalb(z31.VnD(), z9.VnS(), z31.VnS());
3496 __ movprfx(z31, z0);
3497 __ umlalb(z31.VnD(), z31.VnS(), z1.VnS(), 0);
3499 __ movprfx(z31, z0);
3500 __ umlalb(z31.VnS(), z31.VnH(), z1.VnH(), 0);
3502 __ movprfx(z11, z12);
3503 __ umlalt(z11.VnD(), z11.VnS(), z22.VnS());
3505 __ movprfx(z11, z12);
3506 __ umlalt(z11.VnD(), z11.VnS(), z2.VnS(), 0);
3508 __ movprfx(z1, z12);
3509 __ umlalt(z1.VnS(), z5.VnH(), z1.VnH(), 0);
3511 __ movprfx(z28, z29);
3512 __ umlslb(z28.VnD(), z28.VnS(), z9.VnS());
3514 __ movprfx(z28, z29);
3515 __ umlslb(z28.VnD(), z28.VnS(), z1.VnS(), 0);
3517 __ movprfx(z28, z29);
3518 __ umlslb(z28.VnS(), z28.VnH(), z1.VnH(), 0);
3520 __ movprfx(z9, z10);
3521 __ umlslt(z9.VnD(), z9.VnS(), z30.VnS());
3523 __ movprfx(z9, z10);
3524 __ umlslt(z9.VnD(), z9.VnS(), z0.VnS(), 0);
3526 __ movprfx(z9, z10);
3527 __ umlslt(z9.VnS(), z9.VnH(), z0.VnH(), 0);
3529 __ movprfx(z24.VnB(), p7.Merging(), z25.VnB());
3530 __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z24.VnB()),
3532 __ movprfx(z20.VnB(), p1.Merging(), z21.VnB());
3533 __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z20.VnB());
3535 __ movprfx(z8.VnB(), p5.Merging(), z9.VnB());
3536 __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z8.VnB());
3538 __ movprfx(z29.VnB(), p7.Merging(), z30.VnB());
3539 __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z29.VnB());
3541 __ movprfx(z12.VnB(), p1.Merging(), z13.VnB());
3542 __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z12.VnB());
3544 __ movprfx(z20.VnB(), p0.Merging(), z21.VnB());
3545 __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z20.VnB());
3547 __ movprfx(z20.VnB(), p0.Merging(), z21.VnB());
3548 __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z20.VnB());
3550 __ movprfx(z25.VnS(), p7.Merging(), z26.VnS());
3551 __ urecpe(z25.VnS(), p7.Merging(), z25.VnS());
3553 __ movprfx(z29.VnB(), p4.Merging(), z30.VnB());
3554 __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z29.VnB());
3556 __ movprfx(z15.VnB(), p2.Merging(), z16.VnB());
3557 __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z15.VnB());
3559 __ movprfx(z27.VnB(), p1.Merging(), z28.VnB());
3560 __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z27.VnB());
3562 __ movprfx(z4.VnS(), p3.Merging(), z5.VnS());
3563 __ ursqrte(z4.VnS(), p3.Merging(), z4.VnS());
3565 __ movprfx(z0, z1);
3566 __ ursra(z0.VnB(), z0.VnB(), 1);
3568 __ movprfx(z25.VnB(), p4.Merging(), z26.VnB());
3569 __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z25.VnB());
3571 __ movprfx(z0, z1);
3572 __ usra(z0.VnB(), z0.VnB(), 1);
3574 __ movprfx(z16, z17);
3575 __ xar(z16.VnB(), z16.VnB(), z16.VnB(), 1);
3591 __ movprfx(z14.VnS(), p4.Merging(), z15.VnS());
3592 __ fcvtx(z14.VnS(), p4.Merging(), z0.VnD());
3594 __ movprfx(z15.VnS(), p0.Merging(), z16.VnS());
3595 __ flogb(z15.VnH(), p0.Merging(), z3.VnH());
3597 __ movprfx(z19.VnB(), p5.Merging(), z20.VnB());
3598 __ sadalp(z19.VnD(), p5.Merging(), z9.VnS());
3600 __ movprfx(z20.VnH(), p3.Merging(), z21.VnH());
3601 __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB());
3603 __ movprfx(z21.VnH(), p0.Merging(), z22.VnH());
3604 __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB());
3606 __ movprfx(z1.VnS(), p0.Merging(), z2.VnS());
3607 __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB());
3609 __ movprfx(z29.VnD(), p1.Merging(), z30.VnD());
3610 __ sqabs(z29.VnB(), p1.Merging(), z18.VnB());
3612 __ movprfx(z28.VnH(), p0.Merging(), z29.VnH());
3613 __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB());
3615 __ movprfx(z21.VnH(), p0.Merging(), z22.VnH());
3616 __ sqneg(z21.VnB(), p0.Merging(), z17.VnB());
3618 __ movprfx(z31.VnS(), p5.Merging(), z0.VnS());
3619 __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB());
3621 __ movprfx(z25.VnD(), p6.Merging(), z26.VnD());
3622 __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB());
3624 __ movprfx(z0.VnH(), p5.Merging(), z1.VnH());
3625 __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0);
3627 __ movprfx(z0.VnS(), p5.Merging(), z1.VnS());
3628 __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z2.VnB());
3630 __ movprfx(z7.VnD(), p3.Merging(), z8.VnD());
3631 __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB());
3633 __ movprfx(z10.VnH(), p1.Merging(), z11.VnH());
3634 __ sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0);
3636 __ movprfx(z16.VnH(), p7.Merging(), z17.VnH());
3637 __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB());
3639 __ movprfx(z16.VnS(), p7.Merging(), z17.VnS());
3640 __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB());
3642 __ movprfx(z23.VnD(), p4.Merging(), z24.VnD());
3643 __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB());
3645 __ movprfx(z31.VnH(), p7.Merging(), z0.VnH());
3646 __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB());
3648 __ movprfx(z16.VnH(), p7.Merging(), z17.VnH());
3649 __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB());
3651 __ movprfx(z12.VnH(), p0.Merging(), z13.VnH());
3652 __ srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1);
3654 __ movprfx(z26.VnH(), p2.Merging(), z27.VnH());
3655 __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB());
3657 __ movprfx(z20.VnB(), p4.Merging(), z21.VnB());
3658 __ uadalp(z20.VnD(), p4.Merging(), z5.VnS());
3660 __ movprfx(z21.VnH(), p2.Merging(), z22.VnH());
3661 __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB());
3663 __ movprfx(z1.VnH(), p4.Merging(), z2.VnH());
3664 __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB());
3666 __ movprfx(z18.VnH(), p0.Merging(), z19.VnH());
3667 __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB());
3669 __ movprfx(z24.VnH(), p7.Merging(), z25.VnH());
3670 __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()),
3672 __ movprfx(z20.VnS(), p1.Merging(), z21.VnS());
3673 __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB());
3675 __ movprfx(z8.VnS(), p5.Merging(), z9.VnS());
3676 __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB());
3678 __ movprfx(z29.VnS(), p7.Merging(), z30.VnS());
3679 __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0);
3681 __ movprfx(z29.VnS(), p7.Merging(), z30.VnS());
3682 __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z30.VnB());
3684 __ movprfx(z12.VnS(), p1.Merging(), z13.VnS());
3685 __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z13.VnB());
3687 __ movprfx(z20.VnS(), p0.Merging(), z21.VnS());
3688 __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB());
3690 __ movprfx(z20.VnS(), p0.Merging(), z21.VnS());
3691 __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB());
3693 __ movprfx(z25.VnB(), p7.Merging(), z26.VnB());
3694 __ urecpe(z25.VnS(), p7.Merging(), z2.VnS());
3696 __ movprfx(z29.VnD(), p4.Merging(), z30.VnD());
3697 __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB());
3699 __ movprfx(z15.VnD(), p2.Merging(), z16.VnD());
3700 __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB());
3702 __ movprfx(z27.VnD(), p1.Merging(), z28.VnD());
3703 __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB());
3705 __ movprfx(z31.VnD(), p2.Merging(), z0.VnD());
3706 __ urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1);
3708 __ movprfx(z4.VnH(), p3.Merging(), z5.VnH());
3709 __ ursqrte(z4.VnS(), p3.Merging(), z3.VnS());
3711 __ movprfx(z25.VnD(), p4.Merging(), z26.VnD());
3712 __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB());