Lines Matching refs:vmovdqu

187 	vmovdqu		32*0-128($np), $ACC0
189 vmovdqu 32*1-128($np), $ACC1
190 vmovdqu 32*2-128($np), $ACC2
191 vmovdqu 32*3-128($np), $ACC3
192 vmovdqu 32*4-128($np), $ACC4
193 vmovdqu 32*5-128($np), $ACC5
194 vmovdqu 32*6-128($np), $ACC6
195 vmovdqu 32*7-128($np), $ACC7
196 vmovdqu 32*8-128($np), $ACC8
198 vmovdqu $ACC0, 32*0-128($np)
199 vmovdqu $ACC1, 32*1-128($np)
200 vmovdqu $ACC2, 32*2-128($np)
201 vmovdqu $ACC3, 32*3-128($np)
202 vmovdqu $ACC4, 32*4-128($np)
203 vmovdqu $ACC5, 32*5-128($np)
204 vmovdqu $ACC6, 32*6-128($np)
205 vmovdqu $ACC7, 32*7-128($np)
206 vmovdqu $ACC8, 32*8-128($np)
207 vmovdqu $ACC9, 32*9-128($np) # $ACC9 is zero
212 vmovdqu 32*1-128($ap), $ACC1
213 vmovdqu 32*2-128($ap), $ACC2
214 vmovdqu 32*3-128($ap), $ACC3
215 vmovdqu 32*4-128($ap), $ACC4
216 vmovdqu 32*5-128($ap), $ACC5
217 vmovdqu 32*6-128($ap), $ACC6
218 vmovdqu 32*7-128($ap), $ACC7
219 vmovdqu 32*8-128($ap), $ACC8
222 vmovdqu .Land_mask(%rip), $AND_MASK
254 vmovdqu $ACC9, 32*9-192($tp0) # zero upper half
256 vmovdqu $ACC9, 32*10-448($tp1)
258 vmovdqu $ACC9, 32*11-448($tp1)
260 vmovdqu $ACC9, 32*12-448($tp1)
262 vmovdqu $ACC9, 32*13-448($tp1)
264 vmovdqu $ACC9, 32*14-448($tp1)
266 vmovdqu $ACC9, 32*15-448($tp1)
268 vmovdqu $ACC9, 32*16-448($tp1)
271 vmovdqu $ACC9, 32*17-448($tp1)
303 vmovdqu $ACC0, 32*0-192($tp0)
304 vmovdqu $ACC1, 32*1-192($tp0)
324 vmovdqu $ACC2, 32*2-192($tp0)
325 vmovdqu $ACC3, 32*3-192($tp0)
343 vmovdqu $ACC4, 32*4-192($tp0)
344 vmovdqu $ACC5, 32*5-192($tp0)
360 vmovdqu $ACC6, 32*6-192($tp0)
361 vmovdqu $ACC7, 32*7-192($tp0)
375 vmovdqu $ACC8, 32*8-192($tp0)
376 vmovdqu $ACC0, 32*9-192($tp0)
389 vmovdqu $ACC1, 32*10-448($tp1)
390 vmovdqu $ACC2, 32*11-448($tp1)
401 vmovdqu $ACC3, 32*12-448($tp1)
402 vmovdqu $ACC4, 32*13-448($tp1)
411 vmovdqu $ACC5, 32*14-448($tp1)
413 vmovdqu $ACC6, 32*15-448($tp1)
414 vmovdqu $ACC7, 32*16-448($tp1)
427 vmovdqu 32*8(%rsp), $ACC8 # 32*8-192($tp0),
428 vmovdqu 32*9(%rsp), $ACC1 # 32*9-192($tp0)
429 vmovdqu 32*10(%rsp), $ACC2 # 32*10-192($tp0)
447 vmovdqu $ACC1, 32*9-192($tp0)
448 vmovdqu $ACC2, 32*10-192($tp0)
454 vmovdqu 32*1(%rsp), $ACC1
455 vmovdqu 32*2-192($tp0), $ACC2
456 vmovdqu 32*3-192($tp0), $ACC3
457 vmovdqu 32*4-192($tp0), $ACC4
458 vmovdqu 32*5-192($tp0), $ACC5
459 vmovdqu 32*6-192($tp0), $ACC6
460 vmovdqu 32*7-192($tp0), $ACC7
526 #vmovdqu 32*1-8-128($np), $TEMP2 # moved below
528 #vmovdqu 32*2-8-128($np), $TEMP0 # moved below
532 vmovdqu 32*3-8-128($np), $TEMP1
537 vmovdqu 32*4-8-128($np), $TEMP2
545 vmovdqu 32*5-8-128($np), $TEMP0
549 vmovdqu 32*6-8-128($np), $TEMP1
555 .byte 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00 # vmovdqu 32*7-8-128($np), $TEMP2
559 vmovdqu 32*8-8-128($np), $TEMP0
562 vmovdqu 32*9-8-128($np), $ACC9
567 vmovdqu 32*1-16-128($np), $TEMP1
571 vmovdqu 32*2-16-128($np), $TEMP2
577 vmovdqu 32*1-24-128($np), $ACC0
579 vmovdqu 32*3-16-128($np), $TEMP0
583 .byte 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff # vmovdqu 32*4-16-128($np), $TEMP1
587 vmovdqu 32*5-16-128($np), $TEMP2
590 vmovdqu $ACC0, (%rsp) # transfer $r0-$r3
593 vmovdqu 32*6-16-128($np), $TEMP0
596 vmovdqu 32*7-16-128($np), $TEMP1
599 vmovdqu 32*8-16-128($np), $TEMP2
603 vmovdqu 32*9-16-128($np), $TEMP0
607 #vmovdqu 32*2-24-128($np), $TEMP1 # moved below
614 vmovdqu 32*3-24-128($np), $TEMP2
620 vmovdqu 32*4-24-128($np), $TEMP0
626 vmovdqu 32*5-24-128($np), $TEMP1
635 vmovdqu 32*6-24-128($np), $TEMP2
641 vmovdqu 32*7-24-128($np), $TEMP0
647 vmovdqu 32*8-24-128($np), $TEMP1
652 vmovdqu 32*9-24-128($np), $TEMP2
724 vmovdqu $ACC0, 32*0-128($rp)
727 vmovdqu $ACC1, 32*1-128($rp)
730 vmovdqu $ACC2, 32*2-128($rp)
732 vmovdqu $ACC3, 32*3-128($rp)
784 vmovdqu $ACC4, 32*4-128($rp)
787 vmovdqu $ACC5, 32*5-128($rp)
790 vmovdqu $ACC6, 32*6-128($rp)
792 vmovdqu $ACC7, 32*7-128($rp)
793 vmovdqu $ACC8, 32*8-128($rp)
954 vmovdqu 32*0-128($np), $ACC0
956 vmovdqu 32*1-128($np), $ACC1
957 vmovdqu 32*2-128($np), $ACC2
958 vmovdqu 32*3-128($np), $ACC3
959 vmovdqu 32*4-128($np), $ACC4
960 vmovdqu 32*5-128($np), $ACC5
961 vmovdqu 32*6-128($np), $ACC6
962 vmovdqu 32*7-128($np), $ACC7
963 vmovdqu 32*8-128($np), $ACC8
965 vmovdqu $ACC0, 32*0-128($np)
967 vmovdqu $ACC1, 32*1-128($np)
969 vmovdqu $ACC2, 32*2-128($np)
971 vmovdqu $ACC3, 32*3-128($np)
973 vmovdqu $ACC4, 32*4-128($np)
975 vmovdqu $ACC5, 32*5-128($np)
977 vmovdqu $ACC6, 32*6-128($np)
979 vmovdqu $ACC7, 32*7-128($np)
981 vmovdqu $ACC8, 32*8-128($np)
983 vmovdqu $ACC9, 32*9-128($np) # $ACC9 is zero after vzeroall
989 vmovdqu $ACC0, (%rsp) # clear top of stack
996 vmovdqu .Land_mask(%rip), $AND_MASK
998 vmovdqu $ACC9, 32*9-128($rp) # $ACC9 is zero after vzeroall
1081 vmovdqu -8+32*1-128($ap),$TEMP1
1085 vmovdqu -8+32*2-128($ap),$TEMP2
1097 vmovdqu -8+32*3-128($ap),$TEMP0
1101 vmovdqu -8+32*4-128($ap),$TEMP1
1104 vmovdqu -8+32*5-128($ap),$TEMP2
1107 vmovdqu -8+32*6-128($ap),$TEMP0
1110 vmovdqu -8+32*7-128($ap),$TEMP1
1113 vmovdqu -8+32*8-128($ap),$TEMP2
1116 vmovdqu -8+32*9-128($ap),$ACC9
1126 vmovdqu -8+32*1-128($np),$TEMP0
1130 vmovdqu -8+32*2-128($np),$TEMP1
1138 vmovdqu -8+32*3-128($np),$TEMP2
1141 vmovdqu -8+32*4-128($np),$TEMP0
1144 vmovdqu -8+32*5-128($np),$TEMP1
1147 vmovdqu -8+32*6-128($np),$TEMP2
1150 vmovdqu -8+32*7-128($np),$TEMP0
1153 vmovdqu -8+32*8-128($np),$TEMP1
1156 vmovdqu -8+32*9-128($np),$TEMP2
1163 vmovdqu -16+32*1-128($ap),$TEMP0
1168 vmovdqu -16+32*2-128($ap),$TEMP1
1177 vmovdqu -16+32*3-128($ap),$TEMP2
1181 vmovdqu -16+32*4-128($ap),$TEMP0
1184 vmovdqu -16+32*5-128($ap),$TEMP1
1187 vmovdqu -16+32*6-128($ap),$TEMP2
1190 vmovdqu -16+32*7-128($ap),$TEMP0
1193 vmovdqu -16+32*8-128($ap),$TEMP1
1196 vmovdqu -16+32*9-128($ap),$TEMP2
1204 vmovdqu -16+32*1-128($np),$TEMP0
1208 vmovdqu -16+32*2-128($np),$TEMP1
1215 vmovdqu -16+32*3-128($np),$TEMP2
1218 vmovdqu -16+32*4-128($np),$TEMP0
1221 vmovdqu -16+32*5-128($np),$TEMP1
1224 vmovdqu -16+32*6-128($np),$TEMP2
1227 vmovdqu -16+32*7-128($np),$TEMP0
1230 vmovdqu -16+32*8-128($np),$TEMP1
1233 vmovdqu -16+32*9-128($np),$TEMP2
1236 vmovdqu -24+32*1-128($ap),$TEMP0
1239 vmovdqu -24+32*2-128($ap),$TEMP1
1252 vmovdqu -24+32*3-128($ap),$TEMP2
1256 vmovdqu -24+32*4-128($ap),$TEMP0
1259 vmovdqu -24+32*5-128($ap),$TEMP1
1262 vmovdqu -24+32*6-128($ap),$TEMP2
1265 vmovdqu -24+32*7-128($ap),$TEMP0
1268 vmovdqu -24+32*8-128($ap),$TEMP1
1271 vmovdqu -24+32*9-128($ap),$TEMP2
1280 vmovdqu -24+32*1-128($np),$TEMP0
1285 vmovdqu -24+32*2-128($np),$TEMP1
1288 vmovdqu -24+32*3-128($np),$TEMP2
1291 vmovdqu $ACC0, (%rsp) # transfer $r0-$r3
1293 vmovdqu -24+32*4-128($np),$TEMP0
1295 vmovdqu -24+32*5-128($np),$TEMP1
1298 vmovdqu -24+32*6-128($np),$TEMP2
1301 vmovdqu -24+32*7-128($np),$TEMP0
1304 vmovdqu -24+32*8-128($np),$TEMP1
1307 vmovdqu -24+32*9-128($np),$TEMP2
1382 vmovdqu $ACC0, 0-128($rp)
1383 vmovdqu $ACC1, 32-128($rp)
1384 vmovdqu $ACC2, 64-128($rp)
1385 vmovdqu $ACC3, 96-128($rp)
1444 vmovdqu $ACC4, 128-128($rp)
1445 vmovdqu $ACC5, 160-128($rp)
1446 vmovdqu $ACC6, 192-128($rp)
1447 vmovdqu $ACC7, 224-128($rp)
1448 vmovdqu $ACC8, 256-128($rp)
1584 vmovdqu .Lscatter_permd(%rip),%ymm5
1592 vmovdqu ($inp),%ymm0
1595 vmovdqu %xmm0,($out)
1730 vmovdqu %ymm5,($out)
1736 vmovdqu %ymm0,($out)
1947 s/\b(vmovdqu)\b(.+)%x%ymm([0-9]+)/$1$2%xmm$3/go or