Lines Matching refs:inst
409 foreach_inst_in_block_safe(vec4_instruction, inst, block) {
418 if (inst->opcode == BRW_OPCODE_MOV &&
419 inst->src[0].file == IMM &&
420 inst->predicate == BRW_PREDICATE_NONE &&
421 inst->dst.writemask != WRITEMASK_XYZW &&
422 type_sz(inst->src[0].type) < 8 &&
423 (inst->src[0].type == inst->dst.type || inst->src[0].d == 0)) {
425 vf = brw_float_to_vf(inst->src[0].d);
429 vf = brw_float_to_vf(inst->src[0].f);
440 if (last_reg != inst->dst.nr ||
441 last_offset != inst->dst.offset ||
442 last_reg_file != inst->dst.file ||
451 inst->insert_before(block, mov);
472 if ((inst->dst.writemask & WRITEMASK_X) != 0)
474 if ((inst->dst.writemask & WRITEMASK_Y) != 0)
476 if ((inst->dst.writemask & WRITEMASK_Z) != 0)
478 if ((inst->dst.writemask & WRITEMASK_W) != 0)
481 writemask |= inst->dst.writemask;
482 imm_inst[inst_count++] = inst;
484 last_reg = inst->dst.nr;
485 last_offset = inst->dst.offset;
486 last_reg_file = inst->dst.file;
517 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
518 if (inst->dst.file == BAD_FILE ||
519 inst->dst.file == ARF ||
520 inst->dst.file == FIXED_GRF ||
521 inst->is_send_from_grf())
527 switch (inst->opcode) {
554 swizzle = brw_swizzle_for_mask(inst->dst.writemask);
560 if (inst->src[i].file != VGRF &&
561 inst->src[i].file != ATTR &&
562 inst->src[i].file != UNIFORM)
566 brw_compose_swizzle(swizzle, inst->src[i].swizzle);
567 if (inst->src[i].swizzle != new_swizzle) {
568 inst->src[i].swizzle = new_swizzle;
590 foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
592 if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START)
595 assert(!inst->src[i].reladdr);
597 inst->src[i].nr += inst->src[i].offset / 16;
598 inst->src[i].offset %= 16;
620 foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
621 switch (inst->opcode) {
623 if (inst->src[0].file != IMM)
626 if (inst->saturate) {
634 if (inst->dst.type != inst->src[0].type &&
635 inst->dst.type != BRW_REGISTER_TYPE_DF &&
636 inst->src[0].type != BRW_REGISTER_TYPE_F)
639 if (brw_saturate_immediate(inst->src[0].type,
640 &inst->src[0].as_brw_reg())) {
641 inst->saturate = false;
648 if (inst->src[1].is_zero()) {
649 inst->opcode = BRW_OPCODE_MOV;
650 inst->src[1] = src_reg();
656 if (inst->src[0].file != UNIFORM) {
657 inst->opcode = BRW_OPCODE_MOV;
663 if (inst->src[1].is_zero()) {
664 inst->opcode = BRW_OPCODE_MOV;
665 inst->src[1] = src_reg();
671 if (inst->src[1].is_zero()) {
672 inst->opcode = BRW_OPCODE_MOV;
673 switch (inst->src[0].type) {
675 inst->src[0] = brw_imm_f(0.0f);
678 inst->src[0] = brw_imm_d(0);
681 inst->src[0] = brw_imm_ud(0u);
686 inst->src[1] = src_reg();
688 } else if (inst->src[1].is_one()) {
689 inst->opcode = BRW_OPCODE_MOV;
690 inst->src[1] = src_reg();
692 } else if (inst->src[1].is_negative_one()) {
693 inst->opcode = BRW_OPCODE_MOV;
694 inst->src[0].negate = !inst->src[0].negate;
695 inst->src[1] = src_reg();
700 if (is_uniform(inst->src[0]) ||
701 inst->src[1].is_zero()) {
702 inst->opcode = BRW_OPCODE_MOV;
703 inst->src[1] = src_reg();
704 inst->force_writemask_all = true;
723 vec4_visitor::is_dep_ctrl_unsafe(const vec4_instruction *inst)
732 if (IS_64BIT(inst->dst) || IS_64BIT(inst->src[0]) ||
733 IS_64BIT(inst->src[1]) || IS_64BIT(inst->src[2]))
759 return (inst->mlen || inst->predicate || inst->is_math());
792 foreach_inst_in_block (vec4_instruction, inst, block) {
797 int reg = inst->src[i].nr + inst->src[i].offset / REG_SIZE;
798 if (inst->src[i].file == VGRF) {
800 } else if (inst->src[i].file == FIXED_GRF) {
804 assert(inst->src[i].file != MRF);
807 if (is_dep_ctrl_unsafe(inst)) {
816 int reg = inst->dst.nr + inst->dst.offset / REG_SIZE;
817 if (inst->dst.file == VGRF || inst->dst.file == FIXED_GRF) {
819 last_grf_write[reg]->dst.offset == inst->dst.offset &&
820 !(inst->dst.writemask & grf_channels_written[reg])) {
822 inst->no_dd_check = true;
827 last_grf_write[reg] = inst;
828 grf_channels_written[reg] |= inst->dst.writemask;
829 } else if (inst->dst.file == MRF) {
831 last_mrf_write[reg]->dst.offset == inst->dst.offset &&
832 !(inst->dst.writemask & mrf_channels_written[reg])) {
834 inst->no_dd_check = true;
839 last_mrf_write[reg] = inst;
840 mrf_channels_written[reg] |= inst->dst.writemask;
956 foreach_block_and_inst_safe (block, vec4_instruction, inst, cfg) {
960 if (inst->opcode != BRW_OPCODE_MOV ||
961 (inst->dst.file != VGRF && inst->dst.file != MRF) ||
962 inst->predicate ||
963 inst->src[0].file != VGRF ||
964 inst->dst.type != inst->src[0].type ||
965 inst->src[0].abs || inst->src[0].negate || inst->src[0].reladdr)
969 if (inst->dst.file == inst->src[0].file &&
970 inst->dst.nr == inst->src[0].nr &&
971 inst->dst.offset == inst->src[0].offset) {
975 if ((inst->dst.writemask & (1 << c)) == 0)
978 if (BRW_GET_SWZ(inst->src[0].swizzle, c) != c) {
985 inst->remove(block);
991 bool to_mrf = (inst->dst.file == MRF);
996 if (live.var_range_end(var_from_reg(alloc, dst_reg(inst->src[0])), 8) > ip)
1005 brw_apply_inv_swizzle_to_mask(inst->src[0].swizzle,
1006 inst->dst.writemask);
1013 vec4_instruction *_scan_inst = (vec4_instruction *)inst->prev;
1015 inst) {
1018 if (regions_overlap(inst->src[0], inst->size_read(0),
1050 if (inst->saturate &&
1051 inst->dst.type != scan_inst->dst.type &&
1060 if (type_sz(inst->src[0].type) != type_sz(scan_inst->src[0].type))
1067 if (scan_inst->size_written != inst->size_written)
1071 if (!scan_inst->can_reswizzle(devinfo, inst->dst.writemask,
1072 inst->src[0].swizzle,
1083 scan_inst->dst.offset != inst->src[0].offset)
1101 if (regions_overlap(inst->src[0], inst->size_read(0),
1111 if (regions_overlap(inst->dst, inst->size_written,
1113 (inst->dst.writemask & scan_inst->dst.writemask) != 0) {
1125 if (inst->dst.nr >= start && inst->dst.nr < end) {
1130 if (regions_overlap(inst->dst, inst->size_written,
1146 while (scan_inst != inst) {
1148 scan_inst->dst.nr == inst->src[0].nr &&
1149 scan_inst->dst.offset == inst->src[0].offset) {
1150 scan_inst->reswizzle(inst->dst.writemask,
1151 inst->src[0].swizzle);
1152 scan_inst->dst.file = inst->dst.file;
1153 scan_inst->dst.nr = inst->dst.nr;
1154 scan_inst->dst.offset = inst->dst.offset;
1155 if (inst->saturate &&
1156 inst->dst.type != scan_inst->dst.type) {
1159 * to match the ones in inst. Otherwise, we could have an
1162 scan_inst->dst.type = inst->dst.type;
1163 scan_inst->src[0].type = inst->src[0].type;
1165 scan_inst->saturate |= inst->saturate;
1169 inst->remove(block);
1199 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
1200 switch (inst->opcode) {
1213 inst->opcode = BRW_OPCODE_MOV;
1214 inst->src[0] = brw_imm_d(0);
1215 inst->force_writemask_all = true;
1260 foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
1261 if (inst->dst.file == VGRF && regs_written(inst) > 1)
1262 split_grf[inst->dst.nr] = false;
1265 if (inst->src[i].file == VGRF && regs_read(inst, i) > 1)
1266 split_grf[inst->src[i].nr] = false;
1286 foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
1287 if (inst->dst.file == VGRF && split_grf[inst->dst.nr] &&
1288 inst->dst.offset / REG_SIZE != 0) {
1289 inst->dst.nr = (new_virtual_grf[inst->dst.nr] +
1290 inst->dst.offset / REG_SIZE - 1);
1291 inst->dst.offset %= REG_SIZE;
1294 if (inst->src[i].file == VGRF && split_grf[inst->src[i].nr] &&
1295 inst->src[i].offset / REG_SIZE != 0) {
1296 inst->src[i].nr = (new_virtual_grf[inst->src[i].nr] +
1297 inst->src[i].offset / REG_SIZE - 1);
1298 inst->src[i].offset %= REG_SIZE;
1314 const vec4_instruction *inst = (const vec4_instruction *)be_inst;
1316 if (inst->predicate) {
1318 inst->predicate_inverse ? '-' : '+',
1319 inst->flag_subreg / 2,
1320 inst->flag_subreg % 2,
1321 pred_ctrl_align16[inst->predicate]);
1324 fprintf(file, "%s(%d)", brw_instruction_name(&compiler->isa, inst->opcode),
1325 inst->exec_size);
1326 if (inst->saturate)
1328 if (inst->conditional_mod) {
1329 fprintf(file, "%s", conditional_modifier[inst->conditional_mod]);
1330 if (!inst->predicate &&
1331 (devinfo->ver < 5 || (inst->opcode != BRW_OPCODE_SEL &&
1332 inst->opcode != BRW_OPCODE_CSEL &&
1333 inst->opcode != BRW_OPCODE_IF &&
1334 inst->opcode != BRW_OPCODE_WHILE))) {
1335 fprintf(file, ".f%d.%d", inst->flag_subreg / 2, inst->flag_subreg % 2);
1340 switch (inst->dst.file) {
1342 fprintf(file, "vgrf%d", inst->dst.nr);
1345 fprintf(file, "g%d", inst->dst.nr);
1348 fprintf(file, "m%d", inst->dst.nr);
1351 switch (inst->dst.nr) {
1356 fprintf(file, "a0.%d", inst->dst.subnr);
1359 fprintf(file, "acc%d", inst->dst.subnr);
1362 fprintf(file, "f%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
1365 fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
1377 if (inst->dst.offset ||
1378 (inst->dst.file == VGRF &&
1379 alloc.sizes[inst->dst.nr] * REG_SIZE != inst->size_written)) {
1380 const unsigned reg_size = (inst->dst.file == UNIFORM ? 16 : REG_SIZE);
1381 fprintf(file, "+%d.%d", inst->dst.offset / reg_size,
1382 inst->dst.offset % reg_size);
1384 if (inst->dst.writemask != WRITEMASK_XYZW) {
1386 if (inst->dst.writemask & 1)
1388 if (inst->dst.writemask & 2)
1390 if (inst->dst.writemask & 4)
1392 if (inst->dst.writemask & 8)
1395 fprintf(file, ":%s", brw_reg_type_to_letters(inst->dst.type));
1397 if (inst->src[0].file != BAD_FILE)
1400 for (int i = 0; i < 3 && inst->src[i].file != BAD_FILE; i++) {
1401 if (inst->src[i].negate)
1403 if (inst->src[i].abs)
1405 switch (inst->src[i].file) {
1407 fprintf(file, "vgrf%d", inst->src[i].nr);
1410 fprintf(file, "g%d.%d", inst->src[i].nr, inst->src[i].subnr);
1413 fprintf(file, "attr%d", inst->src[i].nr);
1416 fprintf(file, "u%d", inst->src[i].nr);
1419 switch (inst->src[i].type) {
1421 fprintf(file, "%fF", inst->src[i].f);
1424 fprintf(file, "%fDF", inst->src[i].df);
1427 fprintf(file, "%dD", inst->src[i].d);
1430 fprintf(file, "%uU", inst->src[i].ud);
1434 brw_vf_to_float((inst->src[i].ud >> 0) & 0xff),
1435 brw_vf_to_float((inst->src[i].ud >> 8) & 0xff),
1436 brw_vf_to_float((inst->src[i].ud >> 16) & 0xff),
1437 brw_vf_to_float((inst->src[i].ud >> 24) & 0xff));
1445 switch (inst->src[i].nr) {
1450 fprintf(file, "a0.%d", inst->src[i].subnr);
1453 fprintf(file, "acc%d", inst->src[i].subnr);
1456 fprintf(file, "f%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
1459 fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
1470 if (inst->src[i].offset ||
1471 (inst->src[i].file == VGRF &&
1472 alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i))) {
1473 const unsigned reg_size = (inst->src[i].file == UNIFORM ? 16 : REG_SIZE);
1474 fprintf(file, "+%d.%d", inst->src[i].offset / reg_size,
1475 inst->src[i].offset % reg_size);
1478 if (inst->src[i].file != IMM) {
1482 fprintf(file, "%s", chans[BRW_GET_SWZ(inst->src[i].swizzle, c)]);
1486 if (inst->src[i].abs)
1489 if (inst->src[i].file != IMM) {
1490 fprintf(file, ":%s", brw_reg_type_to_letters(inst->src[i].type));
1493 if (i < 2 && inst->src[i + 1].file != BAD_FILE)
1497 if (inst->force_writemask_all)
1500 if (inst->exec_size != 8)
1501 fprintf(file, " group%d", inst->group);
1510 foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
1512 if (inst->src[i].file == ATTR) {
1513 assert(inst->src[i].offset % REG_SIZE == 0);
1514 int grf = payload_reg + inst->src[i].nr +
1515 inst->src[i].offset / REG_SIZE;
1518 reg.swizzle = inst->src[i].swizzle;
1519 reg.type = inst->src[i].type;
1520 reg.abs = inst->src[i].abs;
1521 reg.negate = inst->src[i].negate;
1522 inst->src[i] = reg;
1613 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
1614 const vec4_builder ibld(this, block, inst);
1616 if (inst->opcode == BRW_OPCODE_SEL &&
1617 inst->predicate == BRW_PREDICATE_NONE) {
1623 if (inst->src[1].type != BRW_REGISTER_TYPE_F ||
1624 (inst->src[1].file == IMM && !isnan(inst->src[1].f))) {
1625 ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1],
1626 inst->conditional_mod);
1628 ibld.CMPN(ibld.null_reg_d(), inst->src[0], inst->src[1],
1629 inst->conditional_mod);
1631 inst->predicate = BRW_PREDICATE_NORMAL;
1632 inst->conditional_mod = BRW_CONDITIONAL_NONE;
1673 is_align1_df(vec4_instruction *inst)
1675 switch (inst->opcode) {
1699 foreach_block_and_inst_safe (block, vec4_instruction, inst, cfg) {
1700 if (inst->is_3src(compiler) && inst->dst.is_null()) {
1701 const unsigned size_written = type_sz(inst->dst.type);
1704 inst->dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)),
1705 inst->dst.type);
1718 foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
1720 class src_reg &src = inst->src[i];
1775 apply_logical_swizzle(®, inst, i);
1789 if (is_align1_df(inst) && (cvt(inst->exec_size) - 1) == src.width)
1793 if (inst->is_3src(compiler)) {
1800 if (inst->src[i].vstride == BRW_VERTICAL_STRIDE_0 &&
1801 type_sz(inst->src[i].type) < 8) {
1802 assert(brw_is_single_value_swizzle(inst->src[i].swizzle));
1803 inst->src[i].subnr += 4 * BRW_GET_SWZ(inst->src[i].swizzle, 0);
1808 dst_reg &dst = inst->dst;
1811 switch (inst->dst.file) {
1861 * \p inst. The instruction will be left untouched by
1868 unsigned stage, const vec4_instruction *inst)
1871 switch (inst->opcode) {
1874 return inst->exec_size;
1879 unsigned lowered_width = MIN2(16, inst->exec_size);
1885 if (devinfo->ver == 7 && inst->size_written > REG_SIZE) {
1889 if (inst->opcode == BRW_OPCODE_SEL && type_sz(inst->dst.type) == 8)
1899 if (inst->src[i].file == BAD_FILE)
1901 if (inst->size_read(i) <= REG_SIZE)
1908 if (inst->src[i].file == ATTR &&
1921 (get_exec_type_size(inst) == 8 || type_sz(inst->dst.type) == 8))
1928 dst_src_regions_overlap(vec4_instruction *inst)
1930 if (inst->size_written == 0)
1933 unsigned dst_start = inst->dst.offset;
1934 unsigned dst_end = dst_start + inst->size_written - 1;
1936 if (inst->src[i].file == BAD_FILE)
1939 if (inst->dst.file != inst->src[i].file ||
1940 inst->dst.nr != inst->src[i].nr)
1943 unsigned src_start = inst->src[i].offset;
1944 unsigned src_end = src_start + inst->size_read(i) - 1;
1961 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
1963 get_lowered_simd_width(devinfo, prog_data->dispatch_mode, stage, inst);
1964 assert(lowered_width <= inst->exec_size);
1965 if (lowered_width == inst->exec_size)
1980 bool needs_temp = dst_src_regions_overlap(inst);
1981 for (unsigned n = 0; n < inst->exec_size / lowered_width; n++) {
1984 unsigned size_written = lowered_width * type_sz(inst->dst.type);
1990 vec4_instruction *linst = new(mem_ctx) vec4_instruction(*inst);
2000 inst->dst.type);
2001 if (inst->is_align1_partial_write()) {
2002 vec4_instruction *copy = MOV(dst, src_reg(inst->dst));
2006 inst->insert_before(block, copy);
2009 dst = horiz_offset(inst->dst, channel_offset);
2027 inst->insert_before(block, linst);
2034 MOV(offset(inst->dst, lowered_width, n), src_reg(dst));
2038 mov->predicate = inst->predicate;
2039 inst->insert_before(block, mov);
2043 inst->remove(block);
2077 is_gfx7_supported_64bit_swizzle(vec4_instruction *inst, unsigned arg)
2079 switch (inst->src[arg].swizzle) {
2107 vec4_visitor::is_supported_64bit_region(vec4_instruction *inst, unsigned arg)
2109 const src_reg &src = inst->src[arg];
2131 return devinfo->ver == 7 && is_gfx7_supported_64bit_swizzle(inst, arg);
2140 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
2142 if (is_align1_df(inst))
2146 bool is_double = type_sz(inst->dst.type) == 8;
2148 is_double = inst->src[arg].file != BAD_FILE &&
2149 type_sz(inst->src[arg].type) == 8;
2163 if (inst->dst.writemask == WRITEMASK_XY ||
2164 inst->dst.writemask == WRITEMASK_ZW) {
2168 if (inst->src[i].file == BAD_FILE || type_sz(inst->src[i].type) < 8)
2170 skip_lowering = skip_lowering && is_supported_64bit_region(inst, i);
2180 if (!(inst->dst.writemask & chan_mask))
2183 vec4_instruction *scalar_inst = new(mem_ctx) vec4_instruction(*inst);
2186 unsigned swz = BRW_GET_SWZ(inst->src[i].swizzle, chan);
2192 if (inst->predicate != BRW_PREDICATE_NONE) {
2194 scalarize_predicate(inst->predicate, chan_mask);
2197 inst->insert_before(block, scalar_inst);
2200 inst->remove(block);
2215 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
2216 if (inst->opcode != BRW_OPCODE_MAD)
2219 if (type_sz(inst->dst.type) != 8)
2227 vec4_instruction *mul = new(mem_ctx) vec4_instruction(*inst);
2230 mul->src[0] = inst->src[1];
2231 mul->src[1] = inst->src[2];
2234 vec4_instruction *add = new(mem_ctx) vec4_instruction(*inst);
2237 add->src[1] = inst->src[0];
2240 inst->insert_before(block, mul);
2241 inst->insert_before(block, add);
2242 inst->remove(block);
2257 * @inst and @arg identify the original vec4 IR source operand we need to
2267 vec4_instruction *inst, int arg)
2269 src_reg reg = inst->src[arg];
2277 if(type_sz(reg.type) < 8 || is_align1_df(inst)) {
2284 is_supported_64bit_region(inst, arg));
2291 if (is_supported_64bit_region(inst, arg) &&
2292 !is_gfx7_supported_64bit_swizzle(inst, arg)) {
2325 if (devinfo->ver == 7 && is_gfx7_supported_64bit_swizzle(inst, arg))