1/* 2 * Copyright (c) 2017 Lima Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sub license, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the 12 * next paragraph) shall be included in all copies or substantial portions 13 * of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25#include "util/u_math.h" 26#include "util/ralloc.h" 27 28#include "gpir.h" 29 30const gpir_op_info gpir_op_infos[] = { 31 [gpir_op_unsupported] = { 32 .name = "unsupported", 33 }, 34 [gpir_op_mov] = { 35 .name = "mov", 36 .slots = (int []) { 37 GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_MUL1, 38 GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_MUL0, 39 GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_COMPLEX, 40 GPIR_INSTR_SLOT_END 41 }, 42 }, 43 [gpir_op_mul] = { 44 .name = "mul", 45 .dest_neg = true, 46 .slots = (int []) { GPIR_INSTR_SLOT_MUL1, GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END }, 47 }, 48 [gpir_op_select] = { 49 .name = "select", 50 .dest_neg = true, 51 .slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END }, 52 .may_consume_two_slots = true, 53 }, 54 [gpir_op_complex1] = { 55 .name = "complex1", 56 .slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END }, 57 .spillless = true, 58 .may_consume_two_slots = true, 59 }, 60 [gpir_op_complex2] = { 61 .name = "complex2", 62 .slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END }, 63 .spillless = true, 64 .schedule_first = true, 65 }, 66 [gpir_op_add] = { 67 .name = "add", 68 .src_neg = {true, true, false, false}, 69 .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END }, 70 }, 71 [gpir_op_floor] = { 72 .name = "floor", 73 .src_neg = {true, false, false, false}, 74 .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END }, 75 .spillless = true, 76 .may_consume_two_slots = true, 77 }, 78 [gpir_op_sign] = { 79 .name = "sign", 80 .src_neg = {true, false, false, false}, 81 .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END }, 82 .spillless = true, 83 .may_consume_two_slots = true, 84 }, 85 [gpir_op_ge] = { 86 .name = "ge", 87 .src_neg = {true, true, false, false}, 88 .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END }, 89 .spillless = true, 90 .may_consume_two_slots = true, 91 }, 92 [gpir_op_lt] = { 93 .name = "lt", 94 .src_neg = {true, true, false, false}, 95 .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END }, 96 .spillless = true, 97 .may_consume_two_slots = true, 98 }, 99 [gpir_op_min] = { 100 .name = "min", 101 .src_neg = {true, true, false, false}, 102 .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END }, 103 .spillless = true, 104 .may_consume_two_slots = true, 105 }, 106 [gpir_op_max] = { 107 .name = "max", 108 .src_neg = {true, true, false, false}, 109 .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END }, 110 .spillless = true, 111 .may_consume_two_slots = true, 112 }, 113 [gpir_op_abs] = { 114 .name = "abs", 115 .src_neg = {true, true, false, false}, 116 }, 117 [gpir_op_neg] = { 118 .name = "neg", 119 .slots = (int []) { 120 GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_MUL1, 121 GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_MUL0, 122 GPIR_INSTR_SLOT_END 123 }, 124 }, 125 [gpir_op_not] = { 126 .name = "not", 127 .src_neg = {true, true, false, false}, 128 .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END }, 129 }, 130 [gpir_op_eq] = { 131 .name = "eq", 132 .slots = (int []) { 133 GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END 134 }, 135 }, 136 [gpir_op_ne] = { 137 .name = "ne", 138 .slots = (int []) { 139 GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END 140 }, 141 }, 142 [gpir_op_clamp_const] = { 143 .name = "clamp_const", 144 }, 145 [gpir_op_preexp2] = { 146 .name = "preexp2", 147 .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END }, 148 .spillless = true, 149 .schedule_first = true, 150 }, 151 [gpir_op_postlog2] = { 152 .name = "postlog2", 153 .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END }, 154 }, 155 [gpir_op_exp2_impl] = { 156 .name = "exp2_impl", 157 .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END }, 158 .spillless = true, 159 .schedule_first = true, 160 }, 161 [gpir_op_log2_impl] = { 162 .name = "log2_impl", 163 .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END }, 164 .spillless = true, 165 .schedule_first = true, 166 }, 167 [gpir_op_rcp_impl] = { 168 .name = "rcp_impl", 169 .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END }, 170 .spillless = true, 171 .schedule_first = true, 172 }, 173 [gpir_op_rsqrt_impl] = { 174 .name = "rsqrt_impl", 175 .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END }, 176 .spillless = true, 177 .schedule_first = true, 178 }, 179 [gpir_op_load_uniform] = { 180 .name = "ld_uni", 181 .slots = (int []) { 182 GPIR_INSTR_SLOT_MEM_LOAD0, GPIR_INSTR_SLOT_MEM_LOAD1, 183 GPIR_INSTR_SLOT_MEM_LOAD2, GPIR_INSTR_SLOT_MEM_LOAD3, 184 GPIR_INSTR_SLOT_END 185 }, 186 .type = gpir_node_type_load, 187 }, 188 [gpir_op_load_temp] = { 189 .name = "ld_tmp", 190 .type = gpir_node_type_load, 191 }, 192 [gpir_op_load_attribute] = { 193 .name = "ld_att", 194 .slots = (int []) { 195 GPIR_INSTR_SLOT_REG0_LOAD0, GPIR_INSTR_SLOT_REG0_LOAD1, 196 GPIR_INSTR_SLOT_REG0_LOAD2, GPIR_INSTR_SLOT_REG0_LOAD3, 197 GPIR_INSTR_SLOT_END 198 }, 199 .type = gpir_node_type_load, 200 }, 201 [gpir_op_load_reg] = { 202 .name = "ld_reg", 203 .slots = (int []) { 204 GPIR_INSTR_SLOT_REG1_LOAD0, GPIR_INSTR_SLOT_REG1_LOAD1, 205 GPIR_INSTR_SLOT_REG1_LOAD2, GPIR_INSTR_SLOT_REG1_LOAD3, 206 GPIR_INSTR_SLOT_REG0_LOAD0, GPIR_INSTR_SLOT_REG0_LOAD1, 207 GPIR_INSTR_SLOT_REG0_LOAD2, GPIR_INSTR_SLOT_REG0_LOAD3, 208 GPIR_INSTR_SLOT_END 209 }, 210 .type = gpir_node_type_load, 211 .spillless = true, 212 }, 213 [gpir_op_store_temp] = { 214 .name = "st_tmp", 215 .type = gpir_node_type_store, 216 }, 217 [gpir_op_store_reg] = { 218 .name = "st_reg", 219 .slots = (int []) { 220 GPIR_INSTR_SLOT_STORE0, GPIR_INSTR_SLOT_STORE1, 221 GPIR_INSTR_SLOT_STORE2, GPIR_INSTR_SLOT_STORE3, 222 GPIR_INSTR_SLOT_END 223 }, 224 .type = gpir_node_type_store, 225 .spillless = true, 226 }, 227 [gpir_op_store_varying] = { 228 .name = "st_var", 229 .slots = (int []) { 230 GPIR_INSTR_SLOT_STORE0, GPIR_INSTR_SLOT_STORE1, 231 GPIR_INSTR_SLOT_STORE2, GPIR_INSTR_SLOT_STORE3, 232 GPIR_INSTR_SLOT_END 233 }, 234 .type = gpir_node_type_store, 235 .spillless = true, 236 }, 237 [gpir_op_store_temp_load_off0] = { 238 .name = "st_of0", 239 .type = gpir_node_type_store, 240 }, 241 [gpir_op_store_temp_load_off1] = { 242 .name = "st_of1", 243 .type = gpir_node_type_store, 244 }, 245 [gpir_op_store_temp_load_off2] = { 246 .name = "st_of2", 247 .type = gpir_node_type_store, 248 }, 249 [gpir_op_branch_cond] = { 250 .name = "branch_cond", 251 .type = gpir_node_type_branch, 252 .schedule_first = true, 253 .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END }, 254 }, 255 [gpir_op_const] = { 256 .name = "const", 257 .type = gpir_node_type_const, 258 }, 259 [gpir_op_exp2] = { 260 .name = "exp2", 261 }, 262 [gpir_op_log2] = { 263 .name = "log2", 264 }, 265 [gpir_op_rcp] = { 266 .name = "rcp", 267 }, 268 [gpir_op_rsqrt] = { 269 .name = "rsqrt", 270 }, 271 [gpir_op_ceil] = { 272 .name = "ceil", 273 }, 274 [gpir_op_exp] = { 275 .name = "exp", 276 }, 277 [gpir_op_log] = { 278 .name = "log", 279 }, 280 [gpir_op_sin] = { 281 .name = "sin", 282 }, 283 [gpir_op_cos] = { 284 .name = "cos", 285 }, 286 [gpir_op_tan] = { 287 .name = "tan", 288 }, 289 [gpir_op_dummy_f] = { 290 .name = "dummy_f", 291 .type = gpir_node_type_alu, 292 .spillless = true, 293 }, 294 [gpir_op_dummy_m] = { 295 .name = "dummy_m", 296 .type = gpir_node_type_alu, 297 }, 298 [gpir_op_branch_uncond] = { 299 .name = "branch_uncond", 300 .type = gpir_node_type_branch, 301 }, 302}; 303 304void *gpir_node_create(gpir_block *block, gpir_op op) 305{ 306 static const int node_size[] = { 307 [gpir_node_type_alu] = sizeof(gpir_alu_node), 308 [gpir_node_type_const] = sizeof(gpir_const_node), 309 [gpir_node_type_load] = sizeof(gpir_load_node), 310 [gpir_node_type_store] = sizeof(gpir_store_node), 311 [gpir_node_type_branch] = sizeof(gpir_branch_node), 312 }; 313 314 gpir_node_type type = gpir_op_infos[op].type; 315 int size = node_size[type]; 316 gpir_node *node = rzalloc_size(block, size); 317 if (unlikely(!node)) 318 return NULL; 319 320 snprintf(node->name, sizeof(node->name), "new"); 321 322 list_inithead(&node->succ_list); 323 list_inithead(&node->pred_list); 324 325 node->op = op; 326 node->type = type; 327 node->index = block->comp->cur_index++; 328 node->block = block; 329 330 return node; 331} 332 333gpir_dep *gpir_node_add_dep(gpir_node *succ, gpir_node *pred, int type) 334{ 335 /* don't add dep for two nodes from different block */ 336 if (succ->block != pred->block) 337 return NULL; 338 339 /* don't add self loop dep */ 340 if (succ == pred) 341 return NULL; 342 343 /* don't add duplicated dep */ 344 gpir_node_foreach_pred(succ, dep) { 345 if (dep->pred == pred) { 346 /* use stronger dependency */ 347 if (dep->type > type) 348 dep->type = type; 349 return dep; 350 } 351 } 352 353 gpir_dep *dep = ralloc(succ, gpir_dep); 354 dep->type = type; 355 dep->pred = pred; 356 dep->succ = succ; 357 list_addtail(&dep->pred_link, &succ->pred_list); 358 list_addtail(&dep->succ_link, &pred->succ_list); 359 return dep; 360} 361 362void gpir_node_remove_dep(gpir_node *succ, gpir_node *pred) 363{ 364 gpir_node_foreach_pred(succ, dep) { 365 if (dep->pred == pred) { 366 list_del(&dep->succ_link); 367 list_del(&dep->pred_link); 368 ralloc_free(dep); 369 return; 370 } 371 } 372} 373 374void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child, 375 gpir_node *new_child) 376{ 377 if (parent->type == gpir_node_type_alu) { 378 gpir_alu_node *alu = gpir_node_to_alu(parent); 379 for (int i = 0; i < alu->num_child; i++) { 380 if (alu->children[i] == old_child) 381 alu->children[i] = new_child; 382 } 383 } 384 else if (parent->type == gpir_node_type_store) { 385 gpir_store_node *store = gpir_node_to_store(parent); 386 if (store->child == old_child) 387 store->child = new_child; 388 } else if (parent->type == gpir_node_type_branch) { 389 gpir_branch_node *branch = gpir_node_to_branch(parent); 390 if (branch->cond == old_child) 391 branch->cond = new_child; 392 } 393} 394 395void gpir_node_replace_pred(gpir_dep *dep, gpir_node *new_pred) 396{ 397 list_del(&dep->succ_link); 398 dep->pred = new_pred; 399 list_addtail(&dep->succ_link, &new_pred->succ_list); 400} 401 402void gpir_node_replace_succ(gpir_node *dst, gpir_node *src) 403{ 404 gpir_node_foreach_succ_safe(src, dep) { 405 if (dep->type != GPIR_DEP_INPUT) 406 continue; 407 408 gpir_node_replace_pred(dep, dst); 409 gpir_node_replace_child(dep->succ, src, dst); 410 } 411} 412 413void gpir_node_insert_child(gpir_node *parent, gpir_node *child, 414 gpir_node *insert_child) 415{ 416 gpir_node_foreach_pred(parent, dep) { 417 if (dep->pred == child) { 418 gpir_node_replace_pred(dep, insert_child); 419 gpir_node_replace_child(parent, child, insert_child); 420 break; 421 } 422 } 423} 424 425void gpir_node_delete(gpir_node *node) 426{ 427 gpir_node_foreach_succ_safe(node, dep) { 428 list_del(&dep->succ_link); 429 list_del(&dep->pred_link); 430 ralloc_free(dep); 431 } 432 433 gpir_node_foreach_pred_safe(node, dep) { 434 list_del(&dep->succ_link); 435 list_del(&dep->pred_link); 436 ralloc_free(dep); 437 } 438 439 list_del(&node->list); 440 ralloc_free(node); 441} 442 443static void gpir_node_print_node(gpir_node *node, int type, int space) 444{ 445 static char *dep_name[] = { 446 [GPIR_DEP_INPUT] = "input", 447 [GPIR_DEP_OFFSET] = "offset", 448 [GPIR_DEP_READ_AFTER_WRITE] = "RaW", 449 [GPIR_DEP_WRITE_AFTER_READ] = "WaR", 450 }; 451 452 for (int i = 0; i < space; i++) 453 printf(" "); 454 printf("%s%s %d %s %s\n", node->printed && !gpir_node_is_leaf(node) ? "+" : "", 455 gpir_op_infos[node->op].name, node->index, node->name, dep_name[type]); 456 457 if (!node->printed) { 458 gpir_node_foreach_pred(node, dep) { 459 gpir_node_print_node(dep->pred, dep->type, space + 2); 460 } 461 462 node->printed = true; 463 } 464} 465 466void gpir_node_print_prog_dep(gpir_compiler *comp) 467{ 468 if (!(lima_debug & LIMA_DEBUG_GP)) 469 return; 470 471 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 472 list_for_each_entry(gpir_node, node, &block->node_list, list) { 473 node->printed = false; 474 } 475 } 476 477 printf("======== node prog dep ========\n"); 478 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 479 list_for_each_entry(gpir_node, node, &block->node_list, list) { 480 if (gpir_node_is_root(node)) 481 gpir_node_print_node(node, GPIR_DEP_INPUT, 0); 482 } 483 printf("----------------------------\n"); 484 } 485} 486 487void gpir_node_print_prog_seq(gpir_compiler *comp) 488{ 489 if (!(lima_debug & LIMA_DEBUG_GP)) 490 return; 491 492 int index = 0; 493 printf("======== node prog seq ========\n"); 494 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 495 list_for_each_entry(gpir_node, node, &block->node_list, list) { 496 printf("%03d: %s %d %s pred", index++, gpir_op_infos[node->op].name, 497 node->index, node->name); 498 gpir_node_foreach_pred(node, dep) { 499 printf(" %d", dep->pred->index); 500 } 501 printf(" succ"); 502 gpir_node_foreach_succ(node, dep) { 503 printf(" %d", dep->succ->index); 504 } 505 printf("\n"); 506 } 507 printf("----------------------------\n"); 508 } 509} 510