Lines Matching refs:instr
84 aco::Instruction* instr) -> void
94 aco_print_instr(instr, memf);
114 for (aco_ptr<Instruction>& instr : block.instructions) {
117 Format base_format = instr->format;
128 if (instr->opcode == aco_opcode::v_interp_p1ll_f16 ||
129 instr->opcode == aco_opcode::v_interp_p1lv_f16 ||
130 instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||
131 instr->opcode == aco_opcode::v_interp_p2_f16) {
140 check(base_format == instr_info.format[(int)instr->opcode],
141 "Wrong base format for instruction", instr.get());
144 if (instr->isVOP3() && instr->format != Format::VOP3) {
147 "Format cannot have VOP3/VOP3B applied", instr.get());
151 if (instr->isSDWA()) {
154 "Format cannot have SDWA applied", instr.get());
156 check(program->gfx_level >= GFX8, "SDWA is GFX8 to GFX10.3 only", instr.get());
157 check(program->gfx_level < GFX11, "SDWA is GFX8 to GFX10.3 only", instr.get());
159 SDWA_instruction& sdwa = instr->sdwa();
161 instr.get());
164 "SDWA VOPC clamp only supported on GFX8", instr.get());
165 check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
167 "SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
169 const Definition& def = instr->definitions[0];
171 instr.get());
173 "SDWA definition selection size must be at most definition size", instr.get());
176 "SDWA definition selection size must be 1, 2 or 4 bytes", instr.get());
178 instr.get());
181 instr.get());
183 "SDWA dst_sel offset must be 0 for subdword definitions", instr.get());
186 for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) {
187 const Operand& op = instr->operands[i];
188 check(op.bytes() <= 4, "SDWA operands must not be larger than 4 bytes", instr.get());
190 "SDWA operand selection size must be at most operand size", instr.get());
192 "SDWA operand selection size must be 1, 2 or 4 bytes", instr.get());
194 instr.get());
196 if (instr->operands.size() >= 3) {
197 check(instr->operands[2].isFixed() && instr->operands[2].physReg() == vcc,
198 "3rd operand must be fixed to vcc with SDWA", instr.get());
200 if (instr->definitions.size() >= 2) {
201 check(instr->definitions[1].isFixed() && instr->definitions[1].physReg() == vcc,
202 "2nd definition must be fixed to vcc with SDWA", instr.get());
206 instr->opcode != aco_opcode::v_fmac_f32 && instr->opcode != aco_opcode::v_fmac_f16 &&
207 instr->opcode != aco_opcode::v_fmamk_f32 &&
208 instr->opcode != aco_opcode::v_fmaak_f32 &&
209 instr->opcode != aco_opcode::v_fmamk_f16 &&
210 instr->opcode != aco_opcode::v_fmaak_f16 &&
211 instr->opcode != aco_opcode::v_madmk_f32 &&
212 instr->opcode != aco_opcode::v_madak_f32 &&
213 instr->opcode != aco_opcode::v_madmk_f16 &&
214 instr->opcode != aco_opcode::v_madak_f16 &&
215 instr->opcode != aco_opcode::v_readfirstlane_b32 &&
216 instr->opcode != aco_opcode::v_clrexcp && instr->opcode != aco_opcode::v_swap_b32;
220 (instr->opcode == aco_opcode::v_mac_f32 && instr->opcode == aco_opcode::v_mac_f16);
222 check(sdwa_opcodes || feature_mac, "SDWA can't be used with this opcode", instr.get());
226 if (instr->isVOP3()) {
227 VOP3_instruction& vop3 = instr->vop3();
229 instr.get());
232 if (i >= instr->operands.size() ||
233 (instr->operands[i].hasRegClass() &&
234 instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()))
235 check((vop3.opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get());
237 if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed())
239 instr.get());
240 } else if (instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
241 instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
242 instr->opcode == aco_opcode::v_fma_mix_f32) {
243 check(instr->definitions[0].regClass() ==
244 (instr->opcode == aco_opcode::v_fma_mix_f32 ? v1 : v2b),
245 "v_fma_mix_f32/v_fma_mix_f16 must have v1/v2b definition", instr.get());
246 } else if (instr->isVOP3P()) {
247 VOP3P_instruction& vop3p = instr->vop3p();
248 for (unsigned i = 0; i < instr->operands.size(); i++) {
249 if (instr->operands[i].hasRegClass() &&
250 instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed())
252 "Unexpected opsel for subdword operand", instr.get());
254 check(instr->definitions[0].regClass() == v1, "VOP3P must have v1 definition",
255 instr.get());
259 for (unsigned i = 0; i < instr->operands.size(); i++) {
260 if (instr->operands[i].isUndefined()) {
261 bool flat = instr->isFlatLike();
262 bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() ||
263 instr->opcode == aco_opcode::p_create_vector ||
264 instr->opcode == aco_opcode::p_jump_to_epilog ||
265 (flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
266 ((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
267 (instr->isScratch() && i == 0);
268 check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
270 check(instr->operands[i].isFixed() || instr->operands[i].isTemp() ||
271 instr->operands[i].isConstant(),
272 "Uninitialized Operand", instr.get());
277 for (unsigned i = 0; i < instr->definitions.size(); i++) {
278 if (instr->definitions[i].regClass().is_subdword())
279 check(instr->definitions[i].bytes() <= 4 || instr->isPseudo() || instr->isVMEM(),
281 instr.get());
284 if (instr->isSALU() || instr->isVALU()) {
287 for (unsigned i = 0; i < instr->operands.size(); i++) {
288 Operand op = instr->operands[i];
292 check(!instr->isDPP() && !instr->isSDWA() &&
293 (!instr->isVOP3() || program->gfx_level >= GFX10) &&
294 (!instr->isVOP3P() || program->gfx_level >= GFX10),
295 "Literal applied on wrong instruction format", instr.get());
299 "Only 1 Literal allowed", instr.get());
301 check(instr->isSALU() || instr->isVOP3() || instr->isVOP3P() || i == 0 || i == 2,
302 "Wrong source position for Literal argument", instr.get());
306 if (instr->isVALU()) {
307 bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 ||
308 instr->opcode == aco_opcode::v_lshrrev_b64 ||
309 instr->opcode == aco_opcode::v_ashrrev_i64;
314 uint32_t scalar_mask = instr->isVOP3() || instr->isVOP3P() ? 0x7 : 0x5;
315 if (instr->isSDWA())
317 else if (instr->isDPP())
320 if (instr->isVOPC() || instr->opcode == aco_opcode::v_readfirstlane_b32 ||
321 instr->opcode == aco_opcode::v_readlane_b32 ||
322 instr->opcode == aco_opcode::v_readlane_b32_e64) {
323 check(instr->definitions[0].getTemp().type() == RegType::sgpr,
324 "Wrong Definition type for VALU instruction", instr.get());
326 check(instr->definitions[0].getTemp().type() == RegType::vgpr,
327 "Wrong Definition type for VALU instruction", instr.get());
332 for (unsigned i = 0; i < instr->operands.size(); i++) {
333 Operand op = instr->operands[i];
334 if (instr->opcode == aco_opcode::v_readfirstlane_b32 ||
335 instr->opcode == aco_opcode::v_readlane_b32 ||
336 instr->opcode == aco_opcode::v_readlane_b32_e64) {
339 "Must be a SGPR or a constant", instr.get());
342 "Wrong Operand type for VALU instruction", instr.get());
345 if (instr->opcode == aco_opcode::v_permlane16_b32 ||
346 instr->opcode == aco_opcode::v_permlanex16_b32) {
348 "Operand 0 of v_permlane must be VGPR", instr.get());
352 instr.get());
355 if (instr->opcode == aco_opcode::v_writelane_b32 ||
356 instr->opcode == aco_opcode::v_writelane_b32_e64) {
359 "Wrong Operand type for VALU instruction", instr.get());
362 "Must be a SGPR or a constant", instr.get());
365 if (op.isTemp() && instr->operands[i].regClass().type() == RegType::sgpr) {
367 instr.get());
377 instr.get());
380 "Too many SGPRs/literals", instr.get());
383 if (instr->isSOP1() || instr->isSOP2()) {
384 if (!instr->definitions.empty())
385 check(instr->definitions[0].getTemp().type() == RegType::sgpr,
386 "Wrong Definition type for SALU instruction", instr.get());
387 for (const Operand& op : instr->operands) {
389 "Wrong Operand type for SALU instruction", instr.get());
394 switch (instr->format) {
396 if (instr->opcode == aco_opcode::p_create_vector) {
398 for (const Operand& op : instr->operands) {
399 check(op.bytes() < 4 || size % 4 == 0, "Operand is not aligned", instr.get());
402 check(size == instr->definitions[0].bytes(),
403 "Definition size does not match operand sizes", instr.get());
404 if (instr->definitions[0].getTemp().type() == RegType::sgpr) {
405 for (const Operand& op : instr->operands) {
407 "Wrong Operand type for scalar vector", instr.get());
410 } else if (instr->opcode == aco_opcode::p_extract_vector) {
411 check((instr->operands[0].isTemp()) && instr->operands[1].isConstant(),
412 "Wrong Operand types", instr.get());
413 check((instr->operands[1].constantValue() + 1) * instr->definitions[0].bytes() <=
414 instr->operands[0].bytes(),
415 "Index out of range", instr.get());
416 check(instr->definitions[0].getTemp().type() == RegType::vgpr ||
417 instr->operands[0].regClass().type() == RegType::sgpr,
418 "Cannot extract SGPR value from VGPR vector", instr.get());
420 !instr->definitions[0].regClass().is_subdword() ||
421 instr->operands[0].regClass().type() == RegType::vgpr,
422 "Cannot extract subdword from SGPR before GFX9+", instr.get());
423 } else if (instr->opcode == aco_opcode::p_split_vector) {
424 check(instr->operands[0].isTemp(), "Operand must be a temporary", instr.get());
426 for (const Definition& def : instr->definitions) {
429 check(size == instr->operands[0].bytes(),
430 "Operand size does not match definition sizes", instr.get());
431 if (instr->operands[0].getTemp().type() == RegType::vgpr) {
432 for (const Definition& def : instr->definitions)
434 "Wrong Definition type for VGPR split_vector", instr.get());
436 for (const Definition& def : instr->definitions)
438 "Cannot split SGPR into subdword VGPRs before GFX9+", instr.get());
440 } else if (instr->opcode == aco_opcode::p_parallelcopy) {
441 check(instr->definitions.size() == instr->operands.size(),
442 "Number of Operands does not match number of Definitions", instr.get());
443 for (unsigned i = 0; i < instr->operands.size(); i++) {
444 check(instr->definitions[i].bytes() == instr->operands[i].bytes(),
445 "Operand and Definition size must match", instr.get());
446 if (instr->operands[i].isTemp()) {
447 check((instr->definitions[i].getTemp().type() ==
448 instr->operands[i].regClass().type()) ||
449 (instr->definitions[i].getTemp().type() == RegType::vgpr &&
450 instr->operands[i].regClass().type() == RegType::sgpr),
451 "Operand and Definition types do not match", instr.get());
452 check(instr->definitions[i].regClass().is_linear_vgpr() ==
453 instr->operands[i].regClass().is_linear_vgpr(),
454 "Operand and Definition types do not match", instr.get());
456 check(!instr->definitions[i].regClass().is_linear_vgpr(),
458 instr.get());
461 } else if (instr->opcode == aco_opcode::p_phi) {
462 check(instr->operands.size() == block.logical_preds.size(),
463 "Number of Operands does not match number of predecessors", instr.get());
464 check(instr->definitions[0].getTemp().type() == RegType::vgpr,
465 "Logical Phi Definition must be vgpr", instr.get());
466 for (const Operand& op : instr->operands)
467 check(instr->definitions[0].size() == op.size(),
468 "Operand sizes must match Definition size", instr.get());
469 } else if (instr->opcode == aco_opcode::p_linear_phi) {
470 for (const Operand& op : instr->operands) {
472 instr.get());
473 check(instr->definitions[0].size() == op.size(),
474 "Operand sizes must match Definition size", instr.get());
476 check(instr->operands.size() == block.linear_preds.size(),
477 "Number of Operands does not match number of predecessors", instr.get());
478 } else if (instr->opcode == aco_opcode::p_extract ||
479 instr->opcode == aco_opcode::p_insert) {
480 check(instr->operands[0].isTemp(), "Data operand must be temporary", instr.get());
481 check(instr->operands[1].isConstant(), "Index must be constant", instr.get());
482 if (instr->opcode == aco_opcode::p_extract)
483 check(instr->operands[3].isConstant(), "Sign-extend flag must be constant",
484 instr.get());
486 check(instr->definitions[0].getTemp().type() != RegType::sgpr ||
487 instr->operands[0].getTemp().type() == RegType::sgpr,
488 "Can't extract/insert VGPR to SGPR", instr.get());
490 if (instr->opcode == aco_opcode::p_insert)
491 check(instr->operands[0].bytes() == instr->definitions[0].bytes(),
492 "Sizes of p_insert data operand and definition must match", instr.get());
494 if (instr->definitions[0].getTemp().type() == RegType::sgpr)
495 check(instr->definitions.size() >= 2 && instr->definitions[1].isFixed() &&
496 instr->definitions[1].physReg() == scc,
497 "SGPR extract/insert needs an SCC definition", instr.get());
499 unsigned data_bits = instr->operands[0].getTemp().bytes() * 8u;
500 unsigned op_bits = instr->operands[2].constantValue();
502 if (instr->opcode == aco_opcode::p_insert) {
503 check(op_bits == 8 || op_bits == 16, "Size must be 8 or 16", instr.get());
504 check(op_bits < data_bits, "Size must be smaller than source", instr.get());
505 } else if (instr->opcode == aco_opcode::p_extract) {
507 "Size must be 8 or 16 or 32", instr.get());
509 instr.get());
513 check(instr->operands[1].constantValue() < comp, "Index must be in-bounds",
514 instr.get());
515 } else if (instr->opcode == aco_opcode::p_jump_to_epilog) {
516 check(instr->definitions.size() == 0, "p_jump_to_epilog must have 0 definitions",
517 instr.get());
518 check(instr->operands.size() > 0 &&
519 instr->operands[0].getTemp().type() == RegType::sgpr &&
520 instr->operands[0].getTemp().size() == 2,
521 "First operand of p_jump_to_epilog must be a SGPR", instr.get());
522 for (unsigned i = 1; i < instr->operands.size(); i++) {
523 check(instr->operands[i].getTemp().type() == RegType::vgpr ||
524 instr->operands[i].isUndefined(),
525 "Other operands of p_jump_to_epilog must be VGPRs or undef", instr.get());
531 for (const Operand& op : instr->operands)
534 instr.get());
536 if (instr->opcode == aco_opcode::p_reduce &&
537 instr->reduction().cluster_size == program->wave_size)
538 check(instr->definitions[0].regClass().type() == RegType::sgpr ||
540 "The result of unclustered reductions must go into an SGPR.", instr.get());
542 check(instr->definitions[0].regClass().type() == RegType::vgpr,
544 instr.get());
549 if (instr->operands.size() >= 1)
550 check((instr->operands[0].isFixed() && !instr->operands[0].isConstant()) ||
551 (instr->operands[0].isTemp() &&
552 instr->operands[0].regClass().type() == RegType::sgpr),
553 "SMEM operands must be sgpr", instr.get());
554 if (instr->operands.size() >= 2)
555 check(instr->operands[1].isConstant() ||
556 (instr->operands[1].isTemp() &&
557 instr->operands[1].regClass().type() == RegType::sgpr),
558 "SMEM offset must be constant or sgpr", instr.get());
559 if (!instr->definitions.empty())
560 check(instr->definitions[0].getTemp().type() == RegType::sgpr,
561 "SMEM result must be sgpr", instr.get());
566 check(instr->operands.size() > 1, "VMEM instructions must have at least one operand",
567 instr.get());
568 check(instr->operands[1].hasRegClass() &&
569 instr->operands[1].regClass().type() == RegType::vgpr,
570 "VADDR must be in vgpr for VMEM instructions", instr.get());
572 instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::sgpr,
573 "VMEM resource constant must be sgpr", instr.get());
574 check(instr->operands.size() < 4 ||
575 (instr->operands[3].isTemp() &&
576 instr->operands[3].regClass().type() == RegType::vgpr),
577 "VMEM write data must be vgpr", instr.get());
579 const bool d16 = instr->opcode == aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables
580 instr->opcode == aco_opcode::buffer_load_ubyte ||
581 instr->opcode == aco_opcode::buffer_load_sbyte ||
582 instr->opcode == aco_opcode::buffer_load_ushort ||
583 instr->opcode == aco_opcode::buffer_load_sshort ||
584 instr->opcode == aco_opcode::buffer_load_ubyte_d16 ||
585 instr->opcode == aco_opcode::buffer_load_ubyte_d16_hi ||
586 instr->opcode == aco_opcode::buffer_load_sbyte_d16 ||
587 instr->opcode == aco_opcode::buffer_load_sbyte_d16_hi ||
588 instr->opcode == aco_opcode::buffer_load_short_d16 ||
589 instr->opcode == aco_opcode::buffer_load_short_d16_hi ||
590 instr->opcode == aco_opcode::buffer_load_format_d16_x ||
591 instr->opcode == aco_opcode::buffer_load_format_d16_hi_x ||
592 instr->opcode == aco_opcode::buffer_load_format_d16_xy ||
593 instr->opcode == aco_opcode::buffer_load_format_d16_xyz ||
594 instr->opcode == aco_opcode::buffer_load_format_d16_xyzw ||
595 instr->opcode == aco_opcode::tbuffer_load_format_d16_x ||
596 instr->opcode == aco_opcode::tbuffer_load_format_d16_xy ||
597 instr->opcode == aco_opcode::tbuffer_load_format_d16_xyz ||
598 instr->opcode == aco_opcode::tbuffer_load_format_d16_xyzw;
599 if (instr->definitions.size()) {
600 check(instr->definitions[0].isTemp() &&
601 instr->definitions[0].regClass().type() == RegType::vgpr,
602 "VMEM definitions[0] (VDATA) must be VGPR", instr.get());
603 check(d16 || !instr->definitions[0].regClass().is_subdword(),
604 "Only D16 opcodes can load subdword values.", instr.get());
605 check(instr->definitions[0].bytes() <= 8 || !d16,
606 "D16 opcodes can only load up to 8 bytes.", instr.get());
611 check(instr->operands.size() >= 4, "MIMG instructions must have at least 4 operands",
612 instr.get());
613 check(instr->operands[0].hasRegClass() &&
614 (instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8),
615 "MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get());
616 if (instr->operands[1].hasRegClass())
617 check(instr->operands[1].regClass() == s4,
618 "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get());
619 if (!instr->operands[2].isUndefined()) {
620 bool is_cmpswap = instr->opcode == aco_opcode::image_atomic_cmpswap ||
621 instr->opcode == aco_opcode::image_atomic_fcmpswap;
622 check(instr->definitions.empty() ||
623 (instr->definitions[0].regClass() == instr->operands[2].regClass() ||
627 instr.get());
629 check(instr->operands.size() == 4 || program->gfx_level >= GFX10,
630 "NSA is only supported on GFX10+", instr.get());
631 for (unsigned i = 3; i < instr->operands.size(); i++) {
632 if (instr->operands.size() == 4) {
633 check(instr->operands[i].hasRegClass() &&
634 instr->operands[i].regClass().type() == RegType::vgpr,
635 "MIMG operands[3] (VADDR) must be VGPR", instr.get());
637 check(instr->operands[i].regClass() == v1, "MIMG VADDR must be v1 if NSA is used",
638 instr.get());
642 if (instr->definitions.size()) {
643 check(instr->definitions[0].isTemp() &&
644 instr->definitions[0].regClass().type() == RegType::vgpr,
645 "MIMG definitions[0] (VDATA) must be VGPR", instr.get());
646 check(instr->mimg().d16 || !instr->definitions[0].regClass().is_subdword(),
647 "Only D16 MIMG instructions can load subdword values.", instr.get());
648 check(instr->definitions[0].bytes() <= 8 || !instr->mimg().d16,
649 "D16 MIMG instructions can only load up to 8 bytes.", instr.get());
654 for (const Operand& op : instr->operands) {
656 "Only VGPRs are valid DS instruction operands", instr.get());
658 if (!instr->definitions.empty())
659 check(instr->definitions[0].getTemp().type() == RegType::vgpr,
660 "DS instruction must return VGPR", instr.get());
665 check(instr->operands[i].hasRegClass() &&
666 instr->operands[i].regClass().type() == RegType::vgpr,
667 "Only VGPRs are valid Export arguments", instr.get());
671 check(instr->operands[1].isUndefined(), "Flat instructions don't support SADDR",
672 instr.get());
676 instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::vgpr,
677 "FLAT/GLOBAL address must be vgpr", instr.get());
680 check(instr->operands[0].hasRegClass() &&
681 instr->operands[0].regClass().type() == RegType::vgpr,
682 "FLAT/GLOBAL/SCRATCH address must be undefined or vgpr", instr.get());
683 check(instr->operands[1].hasRegClass() &&
684 instr->operands[1].regClass().type() == RegType::sgpr,
685 "FLAT/GLOBAL/SCRATCH sgpr address must be undefined or sgpr", instr.get());
686 if (instr->format == Format::SCRATCH && program->gfx_level < GFX10_3)
687 check(instr->operands[0].isTemp() || instr->operands[1].isTemp(),
688 "SCRATCH must have either SADDR or ADDR operand", instr.get());
689 if (!instr->definitions.empty())
690 check(instr->definitions[0].getTemp().type() == RegType::vgpr,
691 "FLAT/GLOBAL/SCRATCH result must be vgpr", instr.get());
693 check(instr->operands[2].regClass().type() == RegType::vgpr,
694 "FLAT/GLOBAL/SCRATCH data must be vgpr", instr.get());
739 Location() : block(NULL), instr(NULL) {}
742 Instruction* instr; // NULL if it's the block's live-in
768 if (loc.instr) {
769 aco_print_instr(loc.instr, memf);
776 aco_print_instr(loc2.instr, memf);
788 validate_subdword_operand(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr,
791 Operand op = instr->operands[index];
794 if (instr->opcode == aco_opcode::p_as_uniform)
796 if (instr->isPseudo() && gfx_level >= GFX8)
798 if (instr->isSDWA())
799 return byte + instr->sdwa().sel[index].offset() + instr->sdwa().sel[index].size() <= 4 &&
800 byte % instr->sdwa().sel[index].size() == 0;
801 if (instr->isVOP3P()) {
802 bool fma_mix = instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
803 instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
804 instr->opcode == aco_opcode::v_fma_mix_f32;
805 return ((instr->vop3p().opsel_lo >> index) & 1) == (byte >> 1) &&
806 ((instr->vop3p().opsel_hi >> index) & 1) == (fma_mix || (byte >> 1));
808 if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, index))
811 switch (instr->opcode) {
851 validate_subdword_definition(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr)
853 Definition def = instr->definitions[0];
856 if (instr->isPseudo() && gfx_level >= GFX8)
858 if (instr->isSDWA())
859 return byte + instr->sdwa().dst_sel.offset() + instr->sdwa().dst_sel.size() <= 4 &&
860 byte % instr->sdwa().dst_sel.size() == 0;
861 if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, -1))
864 switch (instr->opcode) {
885 get_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr, unsigned index)
888 Definition def = instr->definitions[index];
890 if (instr->isPseudo())
892 if (instr->isVALU()) {
894 if (instr->isSDWA())
895 return instr->sdwa().dst_sel.size();
897 if (instr_is_16bit(gfx_level, instr->opcode))
903 if (instr->isMIMG()) {
904 assert(instr->mimg().d16);
908 switch (instr->opcode) {
943 aco_ptr<Instruction>& instr)
947 for (unsigned i = 0; i < instr->definitions.size(); i++) {
948 Definition& def = instr->definitions[i];
962 unsigned written = get_subdword_bytes_written(program, instr, i);
976 for (const Definition& def : instr->definitions) {
1005 for (aco_ptr<Instruction>& instr : block.instructions) {
1006 if (instr->opcode == aco_opcode::p_phi) {
1007 for (unsigned i = 0; i < instr->operands.size(); i++) {
1008 if (instr->operands[i].isTemp() &&
1009 instr->operands[i].getTemp().type() == RegType::sgpr &&
1010 instr->operands[i].isFirstKill())
1011 phi_sgpr_ops[block.logical_preds[i]].emplace_back(instr->operands[i].getTemp());
1015 loc.instr = instr.get();
1016 for (unsigned i = 0; i < instr->operands.size(); i++) {
1017 Operand& op = instr->operands[i];
1037 !validate_subdword_operand(program->gfx_level, instr, i))
1047 for (unsigned i = 0; i < instr->definitions.size(); i++) {
1048 Definition& def = instr->definitions[i];
1068 !validate_subdword_definition(program->gfx_level, instr))
1107 aco_ptr<Instruction>& instr = *it;
1110 if (instr->opcode == aco_opcode::p_logical_end) {
1124 for (const Definition& def : instr->definitions) {
1132 if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
1133 for (const Operand& op : instr->operands) {
1148 for (aco_ptr<Instruction>& instr : block.instructions) {
1149 loc.instr = instr.get();
1152 if (instr->opcode == aco_opcode::p_logical_end) {
1160 if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
1161 for (const Operand& op : instr->operands) {
1171 if (!instr->isBranch() || block.linear_succs.size() != 1)
1172 err |= validate_instr_defs(program, regs, assignments, loc, instr);
1174 if (!is_phi(instr)) {
1175 for (const Operand& op : instr->operands) {