Lines Matching refs:ctx
34 #define SMEM_WINDOW_SIZE (350 - ctx.num_waves * 35)
35 #define VMEM_WINDOW_SIZE (1024 - ctx.num_waves * 64)
37 #define SMEM_MAX_MOVES (64 - ctx.num_waves * 4)
38 #define VMEM_MAX_MOVES (256 - ctx.num_waves * 16)
40 #define VMEM_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 2)
644 schedule_SMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& register_demand,
661 DownwardsCursor cursor = ctx.mv.downwards_init(idx, false, false);
671 idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx;
672 if (can_stall_prev_smem && ctx.last_SMEM_stall >= 0)
681 (cursor.insert_idx - cursor.source_idx > (ctx.num_waves * 4) ||
703 ctx.mv.downwards_skip(cursor);
707 MoveResult res = ctx.mv.downwards_move(cursor, false);
710 ctx.mv.downwards_skip(cursor);
716 if (candidate_idx < ctx.last_SMEM_dep_idx)
717 ctx.last_SMEM_stall++;
722 UpwardsCursor up_cursor = ctx.mv.upwards_init(idx + 1, false);
736 bool is_dependency = !found_dependency && !ctx.mv.upwards_check_deps(up_cursor);
753 ctx.mv.upwards_update_insert_idx(up_cursor);
764 ctx.mv.upwards_skip(up_cursor);
768 MoveResult res = ctx.mv.upwards_move(up_cursor);
774 ctx.mv.upwards_skip(up_cursor);
782 ctx.last_SMEM_dep_idx = found_dependency ? up_cursor.insert_idx : 0;
783 ctx.last_SMEM_stall = 10 - ctx.num_waves - k;
787 schedule_VMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& register_demand,
804 DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, true);
819 idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx;
820 if (can_stall_prev_smem && ctx.last_SMEM_stall >= 0)
865 ctx.mv.downwards_skip(cursor);
870 MoveResult res = ctx.mv.downwards_move(cursor, part_of_clause);
876 ctx.mv.downwards_skip(cursor);
884 ctx.mv.downwards_skip(cursor);
891 if (candidate_idx < ctx.last_SMEM_dep_idx)
892 ctx.last_SMEM_stall++;
896 UpwardsCursor up_cursor = ctx.mv.upwards_init(idx + 1, true);
922 is_dependency |= !found_dependency && !ctx.mv.upwards_check_deps(up_cursor);
925 ctx.mv.upwards_update_insert_idx(up_cursor);
933 ctx.mv.depends_on[def.tempId()] = true;
942 ctx.mv.upwards_skip(up_cursor);
946 MoveResult res = ctx.mv.upwards_move(up_cursor);
949 ctx.mv.upwards_skip(up_cursor);
959 schedule_position_export(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& register_demand,
963 int window_size = POS_EXP_WINDOW_SIZE / ctx.schedule_pos_export_div;
964 int max_moves = POS_EXP_MAX_MOVES / ctx.schedule_pos_export_div;
967 DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, false);
989 ctx.mv.downwards_skip(cursor);
993 MoveResult res = ctx.mv.downwards_move(cursor, false);
996 ctx.mv.downwards_skip(cursor);
1006 schedule_block(sched_ctx& ctx, Program* program, Block* block, live& live_vars)
1008 ctx.last_SMEM_dep_idx = 0;
1009 ctx.last_SMEM_stall = INT16_MIN;
1010 ctx.mv.block = block;
1011 ctx.mv.register_demand = live_vars.register_demand[block->index].data();
1017 if (block->kind & block_kind_export_end && current->isEXP() && ctx.schedule_pos_exports) {
1020 ctx.mv.current = current;
1021 schedule_position_export(ctx, block, live_vars.register_demand[block->index], current,
1030 ctx.mv.current = current;
1031 schedule_VMEM(ctx, block, live_vars.register_demand[block->index], current, idx);
1035 ctx.mv.current = current;
1036 schedule_SMEM(ctx, block, live_vars.register_demand[block->index], current, idx);
1056 sched_ctx ctx;
1057 ctx.mv.depends_on.resize(program->peekAllocationId());
1058 ctx.mv.RAR_dependencies.resize(program->peekAllocationId());
1059 ctx.mv.RAR_dependencies_clause.resize(program->peekAllocationId());
1066 ctx.num_waves = program->num_waves;
1068 ctx.num_waves = 5 * wave_fac;
1070 ctx.num_waves = 6 * wave_fac;
1072 ctx.num_waves = 7 * wave_fac;
1073 ctx.num_waves = std::max<uint16_t>(ctx.num_waves, program->min_waves);
1074 ctx.num_waves = std::min<uint16_t>(ctx.num_waves, program->num_waves);
1075 ctx.num_waves = max_suitable_waves(program, ctx.num_waves);
1078 ctx.num_waves = std::max<uint16_t>(ctx.num_waves / wave_fac, 1);
1080 assert(ctx.num_waves > 0);
1081 ctx.mv.max_registers = {int16_t(get_addr_vgpr_from_waves(program, ctx.num_waves * wave_fac) - 2),
1082 int16_t(get_addr_sgpr_from_waves(program, ctx.num_waves * wave_fac))};
1090 ctx.schedule_pos_exports = false;
1092 ctx.schedule_pos_export_div = 4;
1096 schedule_block(ctx, program, &block, live_vars);