Lines Matching defs:scratch
176 XMMRegister rhs, XMMRegister scratch) {
182 vminps(scratch, lhs, rhs);
186 movaps(scratch, src);
187 minps(scratch, dst);
190 movaps(scratch, lhs);
191 minps(scratch, rhs);
196 Orps(scratch, dst);
198 Cmpunordps(dst, dst, scratch);
199 Orps(scratch, dst);
201 Andnps(dst, dst, scratch);
205 XMMRegister rhs, XMMRegister scratch) {
211 vmaxps(scratch, lhs, rhs);
215 movaps(scratch, src);
216 maxps(scratch, dst);
219 movaps(scratch, lhs);
220 maxps(scratch, rhs);
225 Xorps(dst, scratch);
227 Orps(scratch, dst);
229 Subps(scratch, scratch, dst);
231 Cmpunordps(dst, dst, scratch);
233 Andnps(dst, dst, scratch);
237 XMMRegister rhs, XMMRegister scratch) {
243 vminpd(scratch, lhs, rhs);
246 vorpd(scratch, scratch, dst);
248 vcmpunordpd(dst, dst, scratch);
249 vorpd(scratch, scratch, dst);
251 vandnpd(dst, dst, scratch);
253 // Compare lhs with rhs, and rhs with lhs, and have the results in scratch
257 movaps(scratch, src);
258 minpd(scratch, dst);
261 movaps(scratch, lhs);
263 minpd(scratch, rhs);
266 orpd(scratch, dst);
267 cmpunordpd(dst, scratch);
268 orpd(scratch, dst);
270 andnpd(dst, scratch);
275 XMMRegister rhs, XMMRegister scratch) {
281 vmaxpd(scratch, lhs, rhs);
284 vxorpd(dst, dst, scratch);
286 vorpd(scratch, scratch, dst);
288 vsubpd(scratch, scratch, dst);
290 vcmpunordpd(dst, dst, scratch);
292 vandnpd(dst, dst, scratch);
296 movaps(scratch, src);
297 maxpd(scratch, dst);
300 movaps(scratch, lhs);
302 maxpd(scratch, rhs);
305 xorpd(dst, scratch);
306 orpd(scratch, dst);
307 subpd(scratch, dst);
308 cmpunordpd(dst, scratch);
310 andnpd(dst, scratch);
367 XMMRegister scratch) {
372 Xorps(scratch, scratch);
373 Pshufb(dst, scratch);
377 XMMRegister scratch) {
381 Movd(scratch, src);
382 vpbroadcastb(dst, scratch);
384 I8x16SplatPreAvx2(dst, src, scratch);
389 XMMRegister scratch) {
396 I8x16SplatPreAvx2(dst, src, scratch);
558 XMMRegister src2, XMMRegister scratch,
561 is_signed ? Pmovsxbw(scratch, src1) : Pmovzxbw(scratch, src1);
563 Pmullw(dst, scratch);
568 XMMRegister scratch) {
572 vpunpckhbw(scratch, src1, src1);
573 vpsraw(scratch, scratch, 8);
576 vpmullw(dst, dst, scratch);
581 movaps(scratch, src2);
584 punpckhbw(scratch, scratch);
585 psraw(scratch, 8);
586 pmullw(dst, scratch);
592 XMMRegister scratch) {
599 vpxor(scratch, scratch, scratch);
600 vpunpckhbw(dst, src1, scratch);
607 vpxor(scratch, scratch, scratch);
608 vpunpckhbw(dst, src1, scratch);
609 vpunpckhbw(scratch, src2, scratch);
610 vpmullw(dst, dst, scratch);
614 xorps(scratch, scratch);
618 punpckhbw(dst, scratch);
619 pmullw(dst, scratch);
631 xorps(scratch, scratch);
632 punpckhbw(dst, scratch);
633 punpckhbw(scratch, src2);
634 psrlw(scratch, 8);
635 pmullw(dst, scratch);
665 XMMRegister scratch) {
672 XMMRegister tmp = dst == src ? scratch : dst;
679 xorps(scratch, scratch);
680 punpckhbw(dst, scratch);
691 XMMRegister scratch) {
694 Pcmpeqd(scratch, scratch);
695 Psllw(scratch, scratch, byte{15});
703 Pcmpeqw(scratch, dst);
704 Pxor(dst, scratch);
714 // scratch = |0|a|0|c|0|e|0|g|
748 // 1. Multiply low word into scratch.
750 // 3. Unpack and interleave scratch and dst into dst.
752 XMMRegister src2, XMMRegister scratch,
757 vpmullw(scratch, src1, src2);
759 low ? vpunpcklwd(dst, scratch, dst) : vpunpckhwd(dst, scratch, dst);
762 movaps(scratch, src1);
764 is_signed ? pmulhw(scratch, src2) : pmulhuw(scratch, src2);
765 low ? punpcklwd(dst, scratch) : punpckhwd(dst, scratch);
794 XMMRegister scratch) {
798 // scratch = |0|0|0|0|0|0|0|0|
801 XMMRegister tmp = dst == src ? scratch : dst;
807 xorps(scratch, scratch);
808 punpckhwd(dst, scratch);
819 XMMRegister scratch) {
823 vpxor(scratch, scratch, scratch);
824 vpsubq(dst, scratch, src);
827 movaps(scratch, src);
828 std::swap(src, scratch);
836 XMMRegister scratch) {
840 XMMRegister tmp = dst == src ? scratch : dst;
846 movshdup(scratch, src);
850 psrad(scratch, 31);
851 xorps(dst, scratch);
852 psubq(dst, scratch);
857 XMMRegister src1, XMMRegister scratch) {
867 movaps(scratch, src0);
868 pcmpgtq(scratch, src1);
869 movaps(dst, scratch);
879 movaps(scratch, src0);
881 pcmpeqd(scratch, src1);
882 andps(dst, scratch);
883 movaps(scratch, src0);
884 pcmpgtd(scratch, src1);
885 orps(dst, scratch);
891 XMMRegister src1, XMMRegister scratch) {
896 vpcmpeqd(scratch, scratch, scratch);
897 vpxor(dst, dst, scratch);
905 pcmpeqd(scratch, scratch);
906 xorps(dst, scratch);
912 movaps(scratch, src1);
914 pcmpeqd(scratch, src0);
915 andps(dst, scratch);
916 movaps(scratch, src1);
917 pcmpgtd(scratch, src0);
918 orps(dst, scratch);
920 pcmpeqd(scratch, scratch);
921 xorps(dst, scratch);
1034 // 1. Unpack src0, src1 into even-number elements of scratch.
1039 XMMRegister src2, XMMRegister scratch,
1045 vpunpckldq(scratch, src1, src1);
1048 vpunpckhdq(scratch, src1, src1);
1052 vpmuldq(dst, scratch, dst);
1054 vpmuludq(dst, scratch, dst);
1058 pshufd(scratch, src1, mask);
1062 pmuldq(dst, scratch);
1064 pmuludq(dst, scratch);
1089 XMMRegister scratch) {
1093 vpxor(scratch, scratch, scratch);
1094 vpunpckhdq(dst, src, scratch);
1098 xorps(scratch, scratch);
1099 punpckhdq(dst, scratch);
1110 XMMRegister scratch) {
1113 Pcmpeqd(scratch, scratch);
1114 Pxor(dst, scratch);
1123 XMMRegister scratch) {
1129 vpandn(scratch, mask, src2);
1131 vpor(dst, dst, scratch);
1135 movaps(scratch, mask);
1136 andnps(scratch, src2);
1138 orps(dst, scratch);
1143 XMMRegister scratch) {
1154 vpinsrb(dst, scratch, src, uint8_t{0});
1155 vpxor(scratch, scratch, scratch);
1156 vpshufb(dst, dst, scratch);
1160 xorps(scratch, scratch);
1161 pshufb(dst, scratch);
1166 XMMRegister scratch) {
1177 vpinsrw(dst, scratch, src, uint8_t{0});