Lines Matching refs:src1
80 void SharedTurboAssembler::Movhps(XMMRegister dst, XMMRegister src1,
84 vmovhps(dst, src1, src2);
86 if (dst != src1) {
87 movaps(dst, src1);
93 void SharedTurboAssembler::Movlps(XMMRegister dst, XMMRegister src1,
97 vmovlps(dst, src1, src2);
99 if (dst != src1) {
100 movaps(dst, src1);
106 void SharedTurboAssembler::Pblendvb(XMMRegister dst, XMMRegister src1,
110 vpblendvb(dst, src1, src2, mask);
114 DCHECK_EQ(dst, src1);
119 void SharedTurboAssembler::Shufps(XMMRegister dst, XMMRegister src1,
123 vshufps(dst, src1, src2, imm8);
125 if (dst != src1) {
126 movaps(dst, src1);
400 void SharedTurboAssembler::I8x16Shl(XMMRegister dst, XMMRegister src1,
406 if (!CpuFeatures::IsSupported(AVX) && (dst != src1)) {
407 movaps(dst, src1);
408 src1 = dst;
412 Psllw(dst, src1, byte{shift});
422 void SharedTurboAssembler::I8x16Shl(XMMRegister dst, XMMRegister src1,
427 DCHECK(!AreAliased(src1, tmp2, tmp3));
438 if (!CpuFeatures::IsSupported(AVX) && (dst != src1)) {
439 movaps(dst, src1);
440 src1 = dst;
443 Pand(dst, src1, tmp2);
449 void SharedTurboAssembler::I8x16ShrS(XMMRegister dst, XMMRegister src1,
456 Punpckhbw(tmp, src1);
457 Punpcklbw(dst, src1);
463 void SharedTurboAssembler::I8x16ShrS(XMMRegister dst, XMMRegister src1,
468 DCHECK_NE(src1, tmp2);
471 Punpckhbw(tmp2, src1);
472 Punpcklbw(dst, src1);
484 void SharedTurboAssembler::I8x16ShrU(XMMRegister dst, XMMRegister src1,
489 if (!CpuFeatures::IsSupported(AVX) && (dst != src1)) {
490 movaps(dst, src1);
491 src1 = dst;
496 Psrlw(dst, src1, shift);
506 void SharedTurboAssembler::I8x16ShrU(XMMRegister dst, XMMRegister src1,
511 DCHECK_NE(src1, tmp2);
514 Punpckhbw(tmp2, src1);
515 Punpcklbw(dst, src1);
557 void SharedTurboAssembler::I16x8ExtMulLow(XMMRegister dst, XMMRegister src1,
561 is_signed ? Pmovsxbw(scratch, src1) : Pmovzxbw(scratch, src1);
566 void SharedTurboAssembler::I16x8ExtMulHighS(XMMRegister dst, XMMRegister src1,
572 vpunpckhbw(scratch, src1, src1);
578 if (dst != src1) {
579 movaps(dst, src1);
590 void SharedTurboAssembler::I16x8ExtMulHighU(XMMRegister dst, XMMRegister src1,
598 if (src1 == src2) {
600 vpunpckhbw(dst, src1, scratch);
604 // We overwrite dst, then use src2, so swap src1 and src2.
605 std::swap(src1, src2);
608 vpunpckhbw(dst, src1, scratch);
613 if (src1 == src2) {
615 if (dst != src1) {
616 movaps(dst, src1);
621 // When dst == src1, nothing special needs to be done.
622 // When dst == src2, swap src1 and src2, since we overwrite dst.
623 // When dst is unique, copy src1 to dst first.
625 std::swap(src1, src2);
626 // Now, dst == src1.
627 } else if (dst != src1) {
628 // dst != src1 && dst != src2.
629 movaps(dst, src1);
689 void SharedTurboAssembler::I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1,
697 if (!CpuFeatures::IsSupported(AVX) && (dst != src1)) {
698 movaps(dst, src1);
699 src1 = dst;
702 Pmulhrsw(dst, src1, src2);
751 void SharedTurboAssembler::I32x4ExtMul(XMMRegister dst, XMMRegister src1,
757 vpmullw(scratch, src1, src2);
758 is_signed ? vpmulhw(dst, src1, src2) : vpmulhuw(dst, src1, src2);
761 DCHECK_EQ(dst, src1);
762 movaps(scratch, src1);
857 XMMRegister src1, XMMRegister scratch) {
861 vpcmpgtq(dst, src0, src1);
865 pcmpgtq(dst, src1);
866 } else if (dst == src1) {
868 pcmpgtq(scratch, src1);
872 pcmpgtq(dst, src1);
877 DCHECK_NE(dst, src1);
878 movaps(dst, src1);
881 pcmpeqd(scratch, src1);
884 pcmpgtd(scratch, src1);
891 XMMRegister src1, XMMRegister scratch) {
895 vpcmpgtq(dst, src1, src0);
901 if (dst != src1) {
902 movaps(dst, src1);
910 DCHECK_NE(dst, src1);
912 movaps(scratch, src1);
913 psubq(dst, src1);
916 movaps(scratch, src1);
1034 // 1. Unpack src0, src1 into even-number elements of scratch.
1035 // 2. Unpack src1, src0 into even-number elements of dst.
1038 void SharedTurboAssembler::I64x2ExtMul(XMMRegister dst, XMMRegister src1,
1045 vpunpckldq(scratch, src1, src1);
1048 vpunpckhdq(scratch, src1, src1);
1058 pshufd(scratch, src1, mask);
1122 XMMRegister src1, XMMRegister src2,
1130 vpand(dst, src1, mask);
1137 andps(dst, src1);
1217 if (dst == src1) { \
1220 vfmadd132##ps_or_pd(dst, src1, src3); \
1222 vfmadd213##ps_or_pd(dst, src2, src1); \
1225 vmovups(dst, src1); \
1231 vadd##ps_or_pd(dst, src1, tmp); \
1233 if (dst == src1) { \
1238 DCHECK_NE(src2, src1); \
1240 add##ps_or_pd(src2, src1); \
1242 DCHECK_NE(src3, src1); \
1244 add##ps_or_pd(src3, src1); \
1248 add##ps_or_pd(dst, src1); \
1257 if (dst == src1) { \
1260 vfnmadd132##ps_or_pd(dst, src1, src3); \
1262 vfnmadd213##ps_or_pd(dst, src2, src1); \
1265 vmovups(dst, src1); \
1271 vsub##ps_or_pd(dst, src1, tmp); \
1275 if (dst != src1) { \
1276 movaps(dst, src1); \
1281 void SharedTurboAssembler::F32x4Qfma(XMMRegister dst, XMMRegister src1,
1287 void SharedTurboAssembler::F32x4Qfms(XMMRegister dst, XMMRegister src1,
1293 void SharedTurboAssembler::F64x2Qfma(XMMRegister dst, XMMRegister src1,
1299 void SharedTurboAssembler::F64x2Qfms(XMMRegister dst, XMMRegister src1,