1/* 2 * Copyright (c) 2017 Lima Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sub license, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the 12 * next paragraph) shall be included in all copies or substantial portions 13 * of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25#include "util/ralloc.h" 26 27#include "gpir.h" 28#include "lima_context.h" 29 30static bool gpir_lower_const(gpir_compiler *comp) 31{ 32 int num_constant = 0; 33 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 34 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { 35 if (node->op == gpir_op_const) { 36 if (gpir_node_is_root(node)) 37 gpir_node_delete(node); 38 else 39 num_constant++; 40 } 41 } 42 } 43 44 if (num_constant) { 45 union fi *constant = ralloc_array(comp->prog, union fi, num_constant); 46 if (!constant) 47 return false; 48 49 comp->prog->constant = constant; 50 comp->prog->state.constant_size = num_constant * sizeof(union fi); 51 52 int index = 0; 53 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 54 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { 55 if (node->op == gpir_op_const) { 56 gpir_const_node *c = gpir_node_to_const(node); 57 58 if (!gpir_node_is_root(node)) { 59 gpir_load_node *load = gpir_node_create(block, gpir_op_load_uniform); 60 if (unlikely(!load)) 61 return false; 62 63 load->index = comp->constant_base + (index >> 2); 64 load->component = index % 4; 65 constant[index++] = c->value; 66 67 gpir_node_replace_succ(&load->node, node); 68 69 list_addtail(&load->node.list, &node->list); 70 71 gpir_debug("lower const create uniform %d for const %d\n", 72 load->node.index, node->index); 73 } 74 75 gpir_node_delete(node); 76 } 77 } 78 } 79 } 80 81 return true; 82} 83 84/* duplicate load to all its successors */ 85static bool gpir_lower_load(gpir_compiler *comp) 86{ 87 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 88 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { 89 if (node->type == gpir_node_type_load) { 90 gpir_load_node *load = gpir_node_to_load(node); 91 92 bool first = true; 93 gpir_node_foreach_succ_safe(node, dep) { 94 gpir_node *succ = dep->succ; 95 96 if (first) { 97 first = false; 98 continue; 99 } 100 101 gpir_node *new = gpir_node_create(succ->block, node->op); 102 if (unlikely(!new)) 103 return false; 104 list_addtail(&new->list, &succ->list); 105 106 gpir_debug("lower load create %d from %d for succ %d\n", 107 new->index, node->index, succ->index); 108 109 gpir_load_node *nload = gpir_node_to_load(new); 110 nload->index = load->index; 111 nload->component = load->component; 112 nload->reg = load->reg; 113 114 gpir_node_replace_pred(dep, new); 115 gpir_node_replace_child(succ, node, new); 116 } 117 } 118 } 119 } 120 121 return true; 122} 123 124static bool gpir_lower_neg(gpir_block *block, gpir_node *node) 125{ 126 gpir_alu_node *neg = gpir_node_to_alu(node); 127 gpir_node *child = neg->children[0]; 128 129 /* check if child can dest negate */ 130 if (child->type == gpir_node_type_alu) { 131 /* negate must be its only successor */ 132 if (list_is_singular(&child->succ_list) && 133 gpir_op_infos[child->op].dest_neg) { 134 gpir_alu_node *alu = gpir_node_to_alu(child); 135 alu->dest_negate = !alu->dest_negate; 136 137 gpir_node_replace_succ(child, node); 138 gpir_node_delete(node); 139 return true; 140 } 141 } 142 143 /* check if child can src negate */ 144 gpir_node_foreach_succ_safe(node, dep) { 145 gpir_node *succ = dep->succ; 146 if (succ->type != gpir_node_type_alu) 147 continue; 148 149 bool success = true; 150 gpir_alu_node *alu = gpir_node_to_alu(dep->succ); 151 for (int i = 0; i < alu->num_child; i++) { 152 if (alu->children[i] == node) { 153 if (gpir_op_infos[succ->op].src_neg[i]) { 154 alu->children_negate[i] = !alu->children_negate[i]; 155 alu->children[i] = child; 156 } 157 else 158 success = false; 159 } 160 } 161 162 if (success) 163 gpir_node_replace_pred(dep, child); 164 } 165 166 if (gpir_node_is_root(node)) 167 gpir_node_delete(node); 168 169 return true; 170} 171 172static bool gpir_lower_complex(gpir_block *block, gpir_node *node) 173{ 174 gpir_alu_node *alu = gpir_node_to_alu(node); 175 gpir_node *child = alu->children[0]; 176 177 if (node->op == gpir_op_exp2) { 178 gpir_alu_node *preexp2 = gpir_node_create(block, gpir_op_preexp2); 179 if (unlikely(!preexp2)) 180 return false; 181 182 preexp2->children[0] = child; 183 preexp2->num_child = 1; 184 gpir_node_add_dep(&preexp2->node, child, GPIR_DEP_INPUT); 185 list_addtail(&preexp2->node.list, &node->list); 186 187 child = &preexp2->node; 188 } 189 190 gpir_alu_node *complex2 = gpir_node_create(block, gpir_op_complex2); 191 if (unlikely(!complex2)) 192 return false; 193 194 complex2->children[0] = child; 195 complex2->num_child = 1; 196 gpir_node_add_dep(&complex2->node, child, GPIR_DEP_INPUT); 197 list_addtail(&complex2->node.list, &node->list); 198 199 int impl_op = 0; 200 switch (node->op) { 201 case gpir_op_rcp: 202 impl_op = gpir_op_rcp_impl; 203 break; 204 case gpir_op_rsqrt: 205 impl_op = gpir_op_rsqrt_impl; 206 break; 207 case gpir_op_exp2: 208 impl_op = gpir_op_exp2_impl; 209 break; 210 case gpir_op_log2: 211 impl_op = gpir_op_log2_impl; 212 break; 213 default: 214 assert(0); 215 } 216 217 gpir_alu_node *impl = gpir_node_create(block, impl_op); 218 if (unlikely(!impl)) 219 return false; 220 221 impl->children[0] = child; 222 impl->num_child = 1; 223 gpir_node_add_dep(&impl->node, child, GPIR_DEP_INPUT); 224 list_addtail(&impl->node.list, &node->list); 225 226 gpir_alu_node *complex1 = gpir_node_create(block, gpir_op_complex1); 227 complex1->children[0] = &impl->node; 228 complex1->children[1] = &complex2->node; 229 complex1->children[2] = child; 230 complex1->num_child = 3; 231 gpir_node_add_dep(&complex1->node, child, GPIR_DEP_INPUT); 232 gpir_node_add_dep(&complex1->node, &impl->node, GPIR_DEP_INPUT); 233 gpir_node_add_dep(&complex1->node, &complex2->node, GPIR_DEP_INPUT); 234 list_addtail(&complex1->node.list, &node->list); 235 236 gpir_node *result = &complex1->node; 237 238 if (node->op == gpir_op_log2) { 239 gpir_alu_node *postlog2 = gpir_node_create(block, gpir_op_postlog2); 240 if (unlikely(!postlog2)) 241 return false; 242 243 postlog2->children[0] = result; 244 postlog2->num_child = 1; 245 gpir_node_add_dep(&postlog2->node, result, GPIR_DEP_INPUT); 246 list_addtail(&postlog2->node.list, &node->list); 247 248 result = &postlog2->node; 249 } 250 251 gpir_node_replace_succ(result, node); 252 gpir_node_delete(node); 253 254 return true; 255} 256 257static bool gpir_lower_node_may_consume_two_slots(gpir_compiler *comp) 258{ 259 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 260 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { 261 if (gpir_op_infos[node->op].may_consume_two_slots) { 262 /* dummy_f/m are auxiliary nodes for value reg alloc: 263 * 1. before reg alloc, create fake nodes dummy_f, dummy_m, 264 * so the tree become: (dummy_m (node dummy_f)) 265 * dummy_m can be spilled, but other nodes in the tree can't 266 * be spilled. 267 * 2. After reg allocation and fake dep add, merge all deps of 268 * dummy_m and dummy_f to node and remove dummy_m & dummy_f 269 * 270 * We may also not use dummy_f/m, but alloc two value reg for 271 * node. But that means we need to make sure there're 2 free 272 * slot after the node successors, but we just need one slot 273 * after to be able to schedule it because we can use one move for 274 * the two slot node. It's also not easy to handle the spill case 275 * for the alloc 2 value method. 276 * 277 * With the dummy_f/m method, there's no such requirement, the 278 * node can be scheduled only when there's two slots for it, 279 * otherwise a move. And the node can be spilled with one reg. 280 */ 281 gpir_node *dummy_m = gpir_node_create(block, gpir_op_dummy_m); 282 if (unlikely(!dummy_m)) 283 return false; 284 list_add(&dummy_m->list, &node->list); 285 286 gpir_node *dummy_f = gpir_node_create(block, gpir_op_dummy_f); 287 if (unlikely(!dummy_f)) 288 return false; 289 list_add(&dummy_f->list, &node->list); 290 291 gpir_alu_node *alu = gpir_node_to_alu(dummy_m); 292 alu->children[0] = node; 293 alu->children[1] = dummy_f; 294 alu->num_child = 2; 295 296 gpir_node_replace_succ(dummy_m, node); 297 gpir_node_add_dep(dummy_m, node, GPIR_DEP_INPUT); 298 gpir_node_add_dep(dummy_m, dummy_f, GPIR_DEP_INPUT); 299 300 } 301 } 302 } 303 304 return true; 305} 306 307/* 308 * There are no 'equal' or 'not-equal' opcodes. 309 * eq (a == b) is lowered to and(a >= b, b >= a) 310 * ne (a != b) is lowered to or(a < b, b < a) 311 */ 312static bool gpir_lower_eq_ne(gpir_block *block, gpir_node *node) 313{ 314 gpir_op cmp_node_op; 315 gpir_op node_new_op; 316 switch (node->op) { 317 case gpir_op_eq: 318 cmp_node_op = gpir_op_ge; 319 node_new_op = gpir_op_min; /* and */ 320 break; 321 case gpir_op_ne: 322 cmp_node_op = gpir_op_lt; 323 node_new_op = gpir_op_max; /* or */ 324 break; 325 default: 326 unreachable("bad node op"); 327 } 328 329 gpir_alu_node *e = gpir_node_to_alu(node); 330 331 gpir_alu_node *cmp1 = gpir_node_create(block, cmp_node_op); 332 list_addtail(&cmp1->node.list, &node->list); 333 gpir_alu_node *cmp2 = gpir_node_create(block, cmp_node_op); 334 list_addtail(&cmp2->node.list, &node->list); 335 336 cmp1->children[0] = e->children[0]; 337 cmp1->children[1] = e->children[1]; 338 cmp1->num_child = 2; 339 340 cmp2->children[0] = e->children[1]; 341 cmp2->children[1] = e->children[0]; 342 cmp2->num_child = 2; 343 344 gpir_node_add_dep(&cmp1->node, e->children[0], GPIR_DEP_INPUT); 345 gpir_node_add_dep(&cmp1->node, e->children[1], GPIR_DEP_INPUT); 346 347 gpir_node_add_dep(&cmp2->node, e->children[0], GPIR_DEP_INPUT); 348 gpir_node_add_dep(&cmp2->node, e->children[1], GPIR_DEP_INPUT); 349 350 gpir_node_foreach_pred_safe(node, dep) { 351 gpir_node_remove_dep(node, dep->pred); 352 } 353 354 gpir_node_add_dep(node, &cmp1->node, GPIR_DEP_INPUT); 355 gpir_node_add_dep(node, &cmp2->node, GPIR_DEP_INPUT); 356 357 node->op = node_new_op; 358 e->children[0] = &cmp1->node; 359 e->children[1] = &cmp2->node; 360 e->num_child = 2; 361 362 return true; 363} 364 365/* 366 * There is no 'abs' opcode. 367 * abs(a) is lowered to max(a, -a) 368 */ 369static bool gpir_lower_abs(gpir_block *block, gpir_node *node) 370{ 371 gpir_alu_node *alu = gpir_node_to_alu(node); 372 373 assert(node->op == gpir_op_abs); 374 375 node->op = gpir_op_max; 376 377 alu->children[1] = alu->children[0]; 378 alu->children_negate[1] = true; 379 alu->num_child = 2; 380 381 return true; 382} 383 384/* 385 * There is no 'not' opcode. 386 * not(a) is lowered to add(1, -a) 387 */ 388static bool gpir_lower_not(gpir_block *block, gpir_node *node) 389{ 390 gpir_alu_node *alu = gpir_node_to_alu(node); 391 392 assert(alu->node.op == gpir_op_not); 393 394 node->op = gpir_op_add; 395 396 gpir_node *node_const = gpir_node_create(block, gpir_op_const); 397 gpir_const_node *c = gpir_node_to_const(node_const); 398 399 assert(c->node.op == gpir_op_const); 400 401 list_addtail(&c->node.list, &node->list); 402 c->value.f = 1.0f; 403 gpir_node_add_dep(&alu->node, &c->node, GPIR_DEP_INPUT); 404 405 alu->children_negate[1] = !alu->children_negate[0]; 406 alu->children[1] = alu->children[0]; 407 alu->children[0] = &c->node; 408 alu->num_child = 2; 409 410 return true; 411} 412 413/* There is no unconditional branch instruction, so we have to lower it to a 414 * conditional branch with a condition of 1.0. 415 */ 416 417static bool gpir_lower_branch_uncond(gpir_block *block, gpir_node *node) 418{ 419 gpir_branch_node *branch = gpir_node_to_branch(node); 420 421 gpir_node *node_const = gpir_node_create(block, gpir_op_const); 422 gpir_const_node *c = gpir_node_to_const(node_const); 423 424 list_addtail(&c->node.list, &node->list); 425 c->value.f = 1.0f; 426 gpir_node_add_dep(&branch->node, &c->node, GPIR_DEP_INPUT); 427 428 branch->node.op = gpir_op_branch_cond; 429 branch->cond = node_const; 430 431 return true; 432} 433 434static bool (*gpir_pre_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node *) = { 435 [gpir_op_not] = gpir_lower_not, 436 [gpir_op_neg] = gpir_lower_neg, 437 [gpir_op_rcp] = gpir_lower_complex, 438 [gpir_op_rsqrt] = gpir_lower_complex, 439 [gpir_op_exp2] = gpir_lower_complex, 440 [gpir_op_log2] = gpir_lower_complex, 441 [gpir_op_eq] = gpir_lower_eq_ne, 442 [gpir_op_ne] = gpir_lower_eq_ne, 443 [gpir_op_abs] = gpir_lower_abs, 444 [gpir_op_branch_uncond] = gpir_lower_branch_uncond, 445}; 446 447bool gpir_pre_rsched_lower_prog(gpir_compiler *comp) 448{ 449 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 450 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { 451 if (gpir_pre_rsched_lower_funcs[node->op] && 452 !gpir_pre_rsched_lower_funcs[node->op](block, node)) 453 return false; 454 } 455 } 456 457 if (!gpir_lower_const(comp)) 458 return false; 459 460 if (!gpir_lower_load(comp)) 461 return false; 462 463 if (!gpir_lower_node_may_consume_two_slots(comp)) 464 return false; 465 466 gpir_debug("pre rsched lower prog\n"); 467 gpir_node_print_prog_seq(comp); 468 return true; 469} 470 471