Lines Matching refs:outq

475     movaps [outq +  0*mmsize], m0
476 movaps [outq + 4*mmsize], m1
477 movaps [outq + 8*mmsize], tx1_e0
478 movaps [outq + 12*mmsize], tx2_e0
482 movaps [outq + 2*mmsize], m2
483 movaps [outq + 6*mmsize], m3
484 movaps [outq + 10*mmsize], tx1_o0
485 movaps [outq + 14*mmsize], tx2_o0
490 movaps m0, [outq + 1*mmsize]
491 movaps m1, [outq + 3*mmsize]
492 movaps m2, [outq + 5*mmsize]
493 movaps m3, [outq + 7*mmsize]
498 movaps [outq + 1*mmsize], m0
499 movaps [outq + 3*mmsize], m1
500 movaps [outq + 5*mmsize], m2
501 movaps [outq + 7*mmsize], m3
503 movaps [outq + 9*mmsize], tx1_e1
504 movaps [outq + 11*mmsize], tx1_o1
505 movaps [outq + 13*mmsize], tx2_e1
506 movaps [outq + 15*mmsize], tx2_o1
517 movaps m0, [outq + (0 + %4)*mmsize + %6]
518 movaps m2, [outq + (2 + %4)*mmsize + %6]
519 movaps m1, [outq + %1 + (0 + %4)*mmsize + %6]
520 movaps m3, [outq + %1 + (2 + %4)*mmsize + %6]
522 movaps m4, [outq + %2 + (0 + %4)*mmsize + %6]
523 movaps m6, [outq + %2 + (2 + %4)*mmsize + %6]
524 movaps m5, [outq + %3 + (0 + %4)*mmsize + %6]
525 movaps m7, [outq + %3 + (2 + %4)*mmsize + %6]
532 movaps [outq + (0 + %4)*mmsize + %6], m0
533 movaps [outq + (2 + %4)*mmsize + %6], m2
534 movaps [outq + %1 + (0 + %4)*mmsize + %6], m1
535 movaps [outq + %1 + (2 + %4)*mmsize + %6], m3
537 movaps [outq + %2 + (0 + %4)*mmsize + %6], m4
538 movaps [outq + %2 + (2 + %4)*mmsize + %6], m6
539 movaps [outq + %3 + (0 + %4)*mmsize + %6], m5
540 movaps [outq + %3 + (2 + %4)*mmsize + %6], m7
574 movaps m0, [outq + (0 + 0 + %1)*mmsize + %6]
575 movaps m2, [outq + (2 + 0 + %1)*mmsize + %6]
576 movaps m1, [outq + %3 + (0 + 0 + %1)*mmsize + %6]
577 movaps m3, [outq + %3 + (2 + 0 + %1)*mmsize + %6]
579 movaps m4, [outq + %4 + (0 + 0 + %1)*mmsize + %6]
580 movaps m6, [outq + %4 + (2 + 0 + %1)*mmsize + %6]
581 movaps m5, [outq + %5 + (0 + 0 + %1)*mmsize + %6]
582 movaps m7, [outq + %5 + (2 + 0 + %1)*mmsize + %6]
598 vextractf128 [outq + (0 + 0 + %1)*mmsize + %6 + 0], m0, 0
599 vextractf128 [outq + (0 + 0 + %1)*mmsize + %6 + 16], m10, 0
600 vextractf128 [outq + %3 + (0 + 0 + %1)*mmsize + %6 + 0], m1, 0
601 vextractf128 [outq + %3 + (0 + 0 + %1)*mmsize + %6 + 16], m11, 0
603 vextractf128 [outq + %4 + (0 + 0 + %1)*mmsize + %6 + 0], m4, 0
604 vextractf128 [outq + %4 + (0 + 0 + %1)*mmsize + %6 + 16], m12, 0
605 vextractf128 [outq + %5 + (0 + 0 + %1)*mmsize + %6 + 0], m5, 0
606 vextractf128 [outq + %5 + (0 + 0 + %1)*mmsize + %6 + 16], m13, 0
616 movaps m0, [outq + (0 + 1 + %1)*mmsize + %6]
617 movaps m2, [outq + (2 + 1 + %1)*mmsize + %6]
618 movaps m1, [outq + %3 + (0 + 1 + %1)*mmsize + %6]
619 movaps m3, [outq + %3 + (2 + 1 + %1)*mmsize + %6]
621 movaps [outq + (0 + 1 + %1)*mmsize + %6], m10 ; m0 conflict
622 movaps [outq + %3 + (0 + 1 + %1)*mmsize + %6], m11 ; m1 conflict
624 movaps m4, [outq + %4 + (0 + 1 + %1)*mmsize + %6]
625 movaps m6, [outq + %4 + (2 + 1 + %1)*mmsize + %6]
626 movaps m5, [outq + %5 + (0 + 1 + %1)*mmsize + %6]
627 movaps m7, [outq + %5 + (2 + 1 + %1)*mmsize + %6]
629 movaps [outq + %4 + (0 + 1 + %1)*mmsize + %6], m12 ; m4 conflict
630 movaps [outq + %5 + (0 + 1 + %1)*mmsize + %6], m13 ; m5 conflict
646 vextractf128 [outq + (2 + 0 + %1)*mmsize + %6 + 0], m8, 0
647 vextractf128 [outq + (2 + 0 + %1)*mmsize + %6 + 16], m0, 0
648 vextractf128 [outq + (2 + 1 + %1)*mmsize + %6 + 0], m8, 1
649 vextractf128 [outq + (2 + 1 + %1)*mmsize + %6 + 16], m0, 1
651 vextractf128 [outq + %3 + (2 + 0 + %1)*mmsize + %6 + 0], m9, 0
652 vextractf128 [outq + %3 + (2 + 0 + %1)*mmsize + %6 + 16], m1, 0
653 vextractf128 [outq + %3 + (2 + 1 + %1)*mmsize + %6 + 0], m9, 1
654 vextractf128 [outq + %3 + (2 + 1 + %1)*mmsize + %6 + 16], m1, 1
656 vextractf128 [outq + %4 + (2 + 0 + %1)*mmsize + %6 + 0], m10, 0
657 vextractf128 [outq + %4 + (2 + 0 + %1)*mmsize + %6 + 16], m4, 0
658 vextractf128 [outq + %4 + (2 + 1 + %1)*mmsize + %6 + 0], m10, 1
659 vextractf128 [outq + %4 + (2 + 1 + %1)*mmsize + %6 + 16], m4, 1
661 vextractf128 [outq + %5 + (2 + 0 + %1)*mmsize + %6 + 0], m11, 0
662 vextractf128 [outq + %5 + (2 + 0 + %1)*mmsize + %6 + 16], m5, 0
663 vextractf128 [outq + %5 + (2 + 1 + %1)*mmsize + %6 + 0], m11, 1
664 vextractf128 [outq + %5 + (2 + 1 + %1)*mmsize + %6 + 16], m5, 1
681 movaps [outq], m0
701 movaps [outq + 0*mmsize], m2
702 movaps [outq + 1*mmsize], m0
733 movups [outq + 0*mmsize], m4
734 movups [outq + 1*mmsize], m0
735 movups [outq + 2*mmsize], m5
736 movups [outq + 3*mmsize], m1
762 vextractf128 [outq + 16*0], m2, 0
763 vextractf128 [outq + 16*1], m0, 0
764 vextractf128 [outq + 16*2], m2, 1
765 vextractf128 [outq + 16*3], m0, 1
796 vextractf128 [outq + 16*0], m4, 0
797 vextractf128 [outq + 16*1], m0, 0
798 vextractf128 [outq + 16*2], m4, 1
799 vextractf128 [outq + 16*3], m0, 1
800 vextractf128 [outq + 16*4], m5, 0
801 vextractf128 [outq + 16*5], m1, 0
802 vextractf128 [outq + 16*6], m5, 1
803 vextractf128 [outq + 16*7], m1, 1
860 vextractf128 [outq + 16* 0], m8, 0
861 vextractf128 [outq + 16* 1], m0, 0
862 vextractf128 [outq + 16* 2], m8, 1
863 vextractf128 [outq + 16* 3], m0, 1
864 vextractf128 [outq + 16* 4], m9, 0
865 vextractf128 [outq + 16* 5], m1, 0
866 vextractf128 [outq + 16* 6], m9, 1
867 vextractf128 [outq + 16* 7], m1, 1
869 vextractf128 [outq + 16* 8], m11, 0
870 vextractf128 [outq + 16* 9], m4, 0
871 vextractf128 [outq + 16*10], m11, 1
872 vextractf128 [outq + 16*11], m4, 1
873 vextractf128 [outq + 16*12], m10, 0
874 vextractf128 [outq + 16*13], m5, 0
875 vextractf128 [outq + 16*14], m10, 1
876 vextractf128 [outq + 16*15], m5, 1
894 add outq, (%1*4) - (%1/1)
897 add outq, (%1*2) - (%1/2) ; the synth loops also increment outq
901 sub outq, (%1*4) + (%1*2) + (%1/2)
914 add outq, 8*mmsize
970 movaps [outq + 1*mmsize], m1
971 movaps [outq + 3*mmsize], m3
972 movaps [outq + 5*mmsize], m5
973 movaps [outq + 7*mmsize], m7
983 movaps [outq + 0*mmsize], m0
984 movaps [outq + 2*mmsize], m2
985 movaps [outq + 4*mmsize], m4
986 movaps [outq + 6*mmsize], m6
1061 add outq, 16*mmsize
1064 add outq, 8*mmsize
1068 sub outq, 24*mmsize
1088 add outq, 32*mmsize
1091 add outq, 16*mmsize
1095 sub outq, 48*mmsize
1116 add outq, 64*mmsize
1119 add outq, 32*mmsize
1123 sub outq, 96*mmsize
1135 add outq, 8*mmsize
1151 add outq, 96*mmsize
1154 add outq, 64*mmsize
1158 sub outq, 192*mmsize
1170 add outq, 8*mmsize
1201 add outq, 8*mmsize
1223 vextractf128 [outq + 0*mmsize + 0], tmp1, 0
1224 vextractf128 [outq + 0*mmsize + 16], m0, 0
1225 vextractf128 [outq + 4*mmsize + 0], tmp2, 0
1226 vextractf128 [outq + 4*mmsize + 16], m1, 0
1228 vextractf128 [outq + 8*mmsize + 0], tw_o, 0
1229 vextractf128 [outq + 8*mmsize + 16], tx1_e0, 0
1230 vextractf128 [outq + 9*mmsize + 0], tw_o, 1
1231 vextractf128 [outq + 9*mmsize + 16], tx1_e0, 1
1236 vextractf128 [outq + 12*mmsize + 0], tw_e, 0
1237 vextractf128 [outq + 12*mmsize + 16], tx2_e0, 0
1238 vextractf128 [outq + 13*mmsize + 0], tw_e, 1
1239 vextractf128 [outq + 13*mmsize + 16], tx2_e0, 1
1244 movaps m0, [outq + 1*mmsize]
1245 movaps m1, [outq + 3*mmsize]
1246 movaps m2, [outq + 5*mmsize]
1247 movaps m3, [outq + 7*mmsize]
1249 movaps [outq + 1*mmsize], tmp1
1250 movaps [outq + 5*mmsize], tmp2
1264 vextractf128 [outq + 2*mmsize + 0], tmp1, 0
1265 vextractf128 [outq + 2*mmsize + 16], m0, 0
1266 vextractf128 [outq + 3*mmsize + 0], tmp1, 1
1267 vextractf128 [outq + 3*mmsize + 16], m0, 1
1269 vextractf128 [outq + 6*mmsize + 0], tmp2, 0
1270 vextractf128 [outq + 6*mmsize + 16], m2, 0
1271 vextractf128 [outq + 7*mmsize + 0], tmp2, 1
1272 vextractf128 [outq + 7*mmsize + 16], m2, 1
1274 vextractf128 [outq + 10*mmsize + 0], tw_e, 0
1275 vextractf128 [outq + 10*mmsize + 16], tx1_e1, 0
1276 vextractf128 [outq + 11*mmsize + 0], tw_e, 1
1277 vextractf128 [outq + 11*mmsize + 16], tx1_e1, 1
1279 vextractf128 [outq + 14*mmsize + 0], tw_o, 0
1280 vextractf128 [outq + 14*mmsize + 16], tx2_e1, 0
1281 vextractf128 [outq + 15*mmsize + 0], tw_o, 1
1282 vextractf128 [outq + 15*mmsize + 16], tx2_e1, 1