1/* 2 * Copyright (c) 2017 Lima Project 3 * Copyright (c) 2013 Connor Abbott 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a copy 6 * of this software and associated documentation files (the "Software"), to deal 7 * in the Software without restriction, including without limitation the rights 8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 * copies of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 * THE SOFTWARE. 22 * 23 */ 24 25#ifndef LIMA_IR_GP_GPIR_H 26#define LIMA_IR_GP_GPIR_H 27 28#include "util/list.h" 29#include "util/u_math.h" 30 31#include "ir/lima_ir.h" 32 33/* list of operations that a node can do. */ 34typedef enum { 35 gpir_op_unsupported = 0, 36 gpir_op_mov, 37 38 /* mul ops */ 39 gpir_op_mul, 40 gpir_op_select, 41 gpir_op_complex1, 42 gpir_op_complex2, 43 44 /* add ops */ 45 gpir_op_add, 46 gpir_op_floor, 47 gpir_op_sign, 48 gpir_op_ge, 49 gpir_op_lt, 50 gpir_op_min, 51 gpir_op_max, 52 gpir_op_abs, 53 gpir_op_not, 54 55 /* mul/add ops */ 56 gpir_op_neg, 57 58 /* passthrough ops */ 59 gpir_op_clamp_const, 60 gpir_op_preexp2, 61 gpir_op_postlog2, 62 63 /* complex ops */ 64 gpir_op_exp2_impl, 65 gpir_op_log2_impl, 66 gpir_op_rcp_impl, 67 gpir_op_rsqrt_impl, 68 69 /* load/store ops */ 70 gpir_op_load_uniform, 71 gpir_op_load_temp, 72 gpir_op_load_attribute, 73 gpir_op_load_reg, 74 gpir_op_store_temp, 75 gpir_op_store_reg, 76 gpir_op_store_varying, 77 gpir_op_store_temp_load_off0, 78 gpir_op_store_temp_load_off1, 79 gpir_op_store_temp_load_off2, 80 81 /* branch */ 82 gpir_op_branch_cond, 83 84 /* const (emulated) */ 85 gpir_op_const, 86 87 /* emulated ops */ 88 gpir_op_exp2, 89 gpir_op_log2, 90 gpir_op_rcp, 91 gpir_op_rsqrt, 92 gpir_op_ceil, 93 gpir_op_exp, 94 gpir_op_log, 95 gpir_op_sin, 96 gpir_op_cos, 97 gpir_op_tan, 98 gpir_op_branch_uncond, 99 gpir_op_eq, 100 gpir_op_ne, 101 102 /* auxiliary ops */ 103 gpir_op_dummy_f, 104 gpir_op_dummy_m, 105 106 gpir_op_num, 107} gpir_op; 108 109typedef enum { 110 gpir_node_type_alu, 111 gpir_node_type_const, 112 gpir_node_type_load, 113 gpir_node_type_store, 114 gpir_node_type_branch, 115} gpir_node_type; 116 117typedef struct { 118 char *name; 119 bool dest_neg; 120 bool src_neg[4]; 121 int *slots; 122 gpir_node_type type; 123 bool spillless; 124 bool schedule_first; 125 bool may_consume_two_slots; 126} gpir_op_info; 127 128extern const gpir_op_info gpir_op_infos[]; 129 130typedef struct { 131 enum { 132 GPIR_DEP_INPUT, /* def is the input of use */ 133 GPIR_DEP_OFFSET, /* def is the offset of use (i.e. temp store) */ 134 GPIR_DEP_READ_AFTER_WRITE, 135 GPIR_DEP_WRITE_AFTER_READ, 136 } type; 137 138 /* node execute before succ */ 139 struct gpir_node *pred; 140 /* node execute after pred */ 141 struct gpir_node *succ; 142 143 /* for node pred_list */ 144 struct list_head pred_link; 145 /* for ndoe succ_list */ 146 struct list_head succ_link; 147} gpir_dep; 148 149struct gpir_instr; 150struct gpir_store_node; 151 152typedef struct gpir_node { 153 struct list_head list; 154 gpir_op op; 155 gpir_node_type type; 156 int index; 157 char name[16]; 158 bool printed; 159 struct gpir_block *block; 160 161 /* for nodes relationship */ 162 /* for node who uses this node (successor) */ 163 struct list_head succ_list; 164 /* for node this node uses (predecessor) */ 165 struct list_head pred_list; 166 167 /* for scheduler and regalloc */ 168 int value_reg; 169 union { 170 struct { 171 struct gpir_instr *instr; 172 struct gpir_store_node *physreg_store; 173 int pos; 174 int dist; 175 int index; 176 bool ready; 177 bool inserted; 178 bool max_node, next_max_node; 179 bool complex_allowed; 180 } sched; 181 struct { 182 int parent_index; 183 float reg_pressure; 184 int est; 185 bool scheduled; 186 } rsched; 187 struct { 188 float index; 189 struct gpir_node *last; 190 } vreg; 191 struct { 192 int index; 193 } preg; 194 }; 195} gpir_node; 196 197typedef struct { 198 gpir_node node; 199 200 gpir_node *children[3]; 201 bool children_negate[3]; 202 int num_child; 203 204 bool dest_negate; 205} gpir_alu_node; 206 207typedef struct { 208 gpir_node node; 209 union fi value; 210} gpir_const_node; 211 212typedef struct { 213 int index; 214 struct list_head list; 215} gpir_reg; 216 217typedef struct { 218 gpir_node node; 219 220 unsigned index; 221 unsigned component; 222 223 gpir_reg *reg; 224 struct list_head reg_link; 225} gpir_load_node; 226 227typedef struct gpir_store_node { 228 gpir_node node; 229 230 unsigned index; 231 unsigned component; 232 gpir_node *child; 233 234 gpir_reg *reg; 235} gpir_store_node; 236 237enum gpir_instr_slot { 238 GPIR_INSTR_SLOT_MUL0, 239 GPIR_INSTR_SLOT_MUL1, 240 GPIR_INSTR_SLOT_ADD0, 241 GPIR_INSTR_SLOT_ADD1, 242 GPIR_INSTR_SLOT_PASS, 243 GPIR_INSTR_SLOT_COMPLEX, 244 GPIR_INSTR_SLOT_REG0_LOAD0, 245 GPIR_INSTR_SLOT_REG0_LOAD1, 246 GPIR_INSTR_SLOT_REG0_LOAD2, 247 GPIR_INSTR_SLOT_REG0_LOAD3, 248 GPIR_INSTR_SLOT_REG1_LOAD0, 249 GPIR_INSTR_SLOT_REG1_LOAD1, 250 GPIR_INSTR_SLOT_REG1_LOAD2, 251 GPIR_INSTR_SLOT_REG1_LOAD3, 252 GPIR_INSTR_SLOT_MEM_LOAD0, 253 GPIR_INSTR_SLOT_MEM_LOAD1, 254 GPIR_INSTR_SLOT_MEM_LOAD2, 255 GPIR_INSTR_SLOT_MEM_LOAD3, 256 GPIR_INSTR_SLOT_STORE0, 257 GPIR_INSTR_SLOT_STORE1, 258 GPIR_INSTR_SLOT_STORE2, 259 GPIR_INSTR_SLOT_STORE3, 260 GPIR_INSTR_SLOT_NUM, 261 GPIR_INSTR_SLOT_END, 262 GPIR_INSTR_SLOT_ALU_BEGIN = GPIR_INSTR_SLOT_MUL0, 263 GPIR_INSTR_SLOT_ALU_END = GPIR_INSTR_SLOT_COMPLEX, 264 GPIR_INSTR_SLOT_DIST_TWO_BEGIN = GPIR_INSTR_SLOT_MUL0, 265 GPIR_INSTR_SLOT_DIST_TWO_END = GPIR_INSTR_SLOT_PASS, 266}; 267 268typedef struct gpir_instr { 269 int index; 270 struct list_head list; 271 272 gpir_node *slots[GPIR_INSTR_SLOT_NUM]; 273 274 /* The number of ALU slots free for moves. */ 275 int alu_num_slot_free; 276 277 /* The number of ALU slots free for moves, except for the complex slot. */ 278 int alu_non_cplx_slot_free; 279 280 /* We need to make sure that we can insert moves in the following cases: 281 * (1) There was a use of a value two cycles ago. 282 * (2) There were more than 5 uses of a value 1 cycle ago (or else we can't 283 * possibly satisfy (1) for the next cycle). 284 * (3) There is a store instruction scheduled, but not its child. 285 * 286 * The complex slot cannot be used for a move in case (1), since it only 287 * has a FIFO depth of 1, but it can be used for (2) as well as (3) as long 288 * as the uses aren't in certain slots. It turns out that we don't have to 289 * worry about nodes that can't use the complex slot for (2), since there 290 * are at most 4 uses 1 cycle ago that can't use the complex slot, but we 291 * do have to worry about (3). This means tracking stores whose children 292 * cannot be in the complex slot. In order to ensure that we have enough 293 * space for all three, we maintain the following invariants: 294 * 295 * (1) alu_num_slot_free >= alu_num_slot_needed_by_store + 296 * alu_num_slot_needed_by_max + 297 * max(alu_num_unscheduled_next_max - alu_max_allowed_next_max, 0) 298 * (2) alu_non_cplx_slot_free >= alu_num_slot_needed_by_max + 299 * alu_num_slot_needed_by_non_cplx_store 300 * 301 * alu_max_allowed_next_max is normally 5 (since there can be at most 5 max 302 * nodes for the next instruction) but when there is a complex1 node in 303 * this instruction it reduces to 4 to reserve a slot for complex2 in the 304 * next instruction. 305 */ 306 int alu_num_slot_needed_by_store; 307 int alu_num_slot_needed_by_non_cplx_store; 308 int alu_num_slot_needed_by_max; 309 int alu_num_unscheduled_next_max; 310 int alu_max_allowed_next_max; 311 312 /* Used to communicate to the scheduler how many slots need to be cleared 313 * up in order to satisfy the invariants. 314 */ 315 int slot_difference; 316 int non_cplx_slot_difference; 317 318 int reg0_use_count; 319 bool reg0_is_attr; 320 int reg0_index; 321 322 int reg1_use_count; 323 int reg1_index; 324 325 int mem_use_count; 326 bool mem_is_temp; 327 int mem_index; 328 329 enum { 330 GPIR_INSTR_STORE_NONE, 331 GPIR_INSTR_STORE_VARYING, 332 GPIR_INSTR_STORE_REG, 333 GPIR_INSTR_STORE_TEMP, 334 } store_content[2]; 335 int store_index[2]; 336} gpir_instr; 337 338typedef struct gpir_block { 339 struct list_head list; 340 struct list_head node_list; 341 struct list_head instr_list; 342 struct gpir_compiler *comp; 343 344 struct gpir_block *successors[2]; 345 struct list_head predecessors; 346 struct list_head predecessors_node; 347 348 /* for regalloc */ 349 350 /* The set of live registers, i.e. registers whose value may be used 351 * eventually, at the beginning of the block. 352 */ 353 BITSET_WORD *live_in; 354 355 /* Set of live registers at the end of the block. */ 356 BITSET_WORD *live_out; 357 358 /* Set of registers that may have a value defined at the end of the 359 * block. 360 */ 361 BITSET_WORD *def_out; 362 363 /* After register allocation, the set of live physical registers at the end 364 * of the block. Needed for scheduling. 365 */ 366 uint64_t live_out_phys; 367 368 /* For codegen, the offset in the final program. */ 369 unsigned instr_offset; 370 371 /* for scheduler */ 372 union { 373 struct { 374 int instr_index; 375 } sched; 376 struct { 377 int node_index; 378 } rsched; 379 }; 380} gpir_block; 381 382typedef struct { 383 gpir_node node; 384 gpir_block *dest; 385 gpir_node *cond; 386} gpir_branch_node; 387 388struct lima_vs_compiled_shader; 389 390#define GPIR_VECTOR_SSA_VIEWPORT_SCALE 0 391#define GPIR_VECTOR_SSA_VIEWPORT_OFFSET 1 392#define GPIR_VECTOR_SSA_NUM 2 393 394typedef struct gpir_compiler { 395 struct list_head block_list; 396 int cur_index; 397 398 /* Find the gpir node for a given NIR SSA def. */ 399 gpir_node **node_for_ssa; 400 401 /* Find the gpir node for a given NIR register. */ 402 gpir_node **node_for_reg; 403 404 /* Find the gpir register for a given NIR SSA def. */ 405 gpir_reg **reg_for_ssa; 406 407 /* Find the gpir register for a given NIR register. */ 408 gpir_reg **reg_for_reg; 409 410 /* gpir block for NIR block. */ 411 gpir_block **blocks; 412 413 /* for physical reg */ 414 struct list_head reg_list; 415 int cur_reg; 416 417 /* lookup for vector ssa */ 418 struct { 419 int ssa; 420 gpir_node *nodes[4]; 421 } vector_ssa[GPIR_VECTOR_SSA_NUM]; 422 423 struct lima_vs_compiled_shader *prog; 424 int constant_base; 425 426 /* shaderdb */ 427 int num_instr; 428 int num_loops; 429 int num_spills; 430 int num_fills; 431} gpir_compiler; 432 433#define GPIR_VALUE_REG_NUM 11 434#define GPIR_PHYSICAL_REG_NUM 64 435 436void *gpir_node_create(gpir_block *block, gpir_op op); 437gpir_dep *gpir_node_add_dep(gpir_node *succ, gpir_node *pred, int type); 438void gpir_node_remove_dep(gpir_node *succ, gpir_node *pred); 439void gpir_node_replace_succ(gpir_node *dst, gpir_node *src); 440void gpir_node_replace_pred(gpir_dep *dep, gpir_node *new_pred); 441void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child, gpir_node *new_child); 442void gpir_node_insert_child(gpir_node *parent, gpir_node *child, gpir_node *insert_child); 443void gpir_node_delete(gpir_node *node); 444void gpir_node_print_prog_dep(gpir_compiler *comp); 445void gpir_node_print_prog_seq(gpir_compiler *comp); 446 447#define gpir_node_foreach_succ(node, dep) \ 448 list_for_each_entry(gpir_dep, dep, &node->succ_list, succ_link) 449#define gpir_node_foreach_succ_safe(node, dep) \ 450 list_for_each_entry_safe(gpir_dep, dep, &node->succ_list, succ_link) 451#define gpir_node_foreach_pred(node, dep) \ 452 list_for_each_entry(gpir_dep, dep, &node->pred_list, pred_link) 453#define gpir_node_foreach_pred_safe(node, dep) \ 454 list_for_each_entry_safe(gpir_dep, dep, &node->pred_list, pred_link) 455 456static inline bool gpir_node_is_root(gpir_node *node) 457{ 458 return list_is_empty(&node->succ_list); 459} 460 461static inline bool gpir_node_is_leaf(gpir_node *node) 462{ 463 return list_is_empty(&node->pred_list); 464} 465 466#define gpir_node_to_alu(node) ((gpir_alu_node *)(node)) 467#define gpir_node_to_const(node) ((gpir_const_node *)(node)) 468#define gpir_node_to_load(node) ((gpir_load_node *)(node)) 469#define gpir_node_to_store(node) ((gpir_store_node *)(node)) 470#define gpir_node_to_branch(node) ((gpir_branch_node *)(node)) 471 472gpir_instr *gpir_instr_create(gpir_block *block); 473bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node); 474void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node); 475void gpir_instr_print_prog(gpir_compiler *comp); 476 477bool gpir_codegen_acc_same_op(gpir_op op1, gpir_op op2); 478 479bool gpir_optimize(gpir_compiler *comp); 480bool gpir_pre_rsched_lower_prog(gpir_compiler *comp); 481bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp); 482bool gpir_regalloc_prog(gpir_compiler *comp); 483bool gpir_schedule_prog(gpir_compiler *comp); 484bool gpir_codegen_prog(gpir_compiler *comp); 485 486gpir_reg *gpir_create_reg(gpir_compiler *comp); 487 488#endif 489