1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (c) 2017 Lima Project 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sub license, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 12bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions 13bf215546Sopenharmony_ci * of the Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include <string.h> 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "util/ralloc.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include "gpir.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_cigpir_instr *gpir_instr_create(gpir_block *block) 32bf215546Sopenharmony_ci{ 33bf215546Sopenharmony_ci gpir_instr *instr = rzalloc(block, gpir_instr); 34bf215546Sopenharmony_ci if (unlikely(!instr)) 35bf215546Sopenharmony_ci return NULL; 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci block->comp->num_instr++; 38bf215546Sopenharmony_ci if (block->comp->num_instr > 512) { 39bf215546Sopenharmony_ci gpir_error("shader exceeds limit of 512 instructions\n"); 40bf215546Sopenharmony_ci return NULL; 41bf215546Sopenharmony_ci } 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci instr->index = block->sched.instr_index++; 44bf215546Sopenharmony_ci instr->alu_num_slot_free = 6; 45bf215546Sopenharmony_ci instr->alu_non_cplx_slot_free = 5; 46bf215546Sopenharmony_ci instr->alu_max_allowed_next_max = 5; 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci list_add(&instr->list, &block->instr_list); 49bf215546Sopenharmony_ci return instr; 50bf215546Sopenharmony_ci} 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_cistatic gpir_node *gpir_instr_get_the_other_acc_node(gpir_instr *instr, int slot) 53bf215546Sopenharmony_ci{ 54bf215546Sopenharmony_ci if (slot == GPIR_INSTR_SLOT_ADD0) 55bf215546Sopenharmony_ci return instr->slots[GPIR_INSTR_SLOT_ADD1]; 56bf215546Sopenharmony_ci else if (slot == GPIR_INSTR_SLOT_ADD1) 57bf215546Sopenharmony_ci return instr->slots[GPIR_INSTR_SLOT_ADD0]; 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_ci return NULL; 60bf215546Sopenharmony_ci} 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_cistatic bool gpir_instr_check_acc_same_op(gpir_instr *instr, gpir_node *node, int slot) 63bf215546Sopenharmony_ci{ 64bf215546Sopenharmony_ci /* two ACC slots must share the same op code */ 65bf215546Sopenharmony_ci gpir_node *acc_node = gpir_instr_get_the_other_acc_node(instr, slot); 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci /* spill move case may get acc_node == node */ 68bf215546Sopenharmony_ci if (acc_node && acc_node != node && 69bf215546Sopenharmony_ci !gpir_codegen_acc_same_op(node->op, acc_node->op)) 70bf215546Sopenharmony_ci return false; 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci return true; 73bf215546Sopenharmony_ci} 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_cistatic int gpir_instr_get_consume_slot(gpir_instr *instr, gpir_node *node) 76bf215546Sopenharmony_ci{ 77bf215546Sopenharmony_ci if (gpir_op_infos[node->op].may_consume_two_slots) { 78bf215546Sopenharmony_ci gpir_node *acc_node = gpir_instr_get_the_other_acc_node(instr, node->sched.pos); 79bf215546Sopenharmony_ci if (acc_node) 80bf215546Sopenharmony_ci /* at this point node must have the same acc op with acc_node, 81bf215546Sopenharmony_ci * so it just consumes the extra slot acc_node consumed */ 82bf215546Sopenharmony_ci return 0; 83bf215546Sopenharmony_ci else 84bf215546Sopenharmony_ci return 2; 85bf215546Sopenharmony_ci } 86bf215546Sopenharmony_ci else 87bf215546Sopenharmony_ci return 1; 88bf215546Sopenharmony_ci} 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_cistatic bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node) 91bf215546Sopenharmony_ci{ 92bf215546Sopenharmony_ci if (!gpir_instr_check_acc_same_op(instr, node, node->sched.pos)) 93bf215546Sopenharmony_ci return false; 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci if (node->sched.next_max_node && !node->sched.complex_allowed && 96bf215546Sopenharmony_ci node->sched.pos == GPIR_INSTR_SLOT_COMPLEX) 97bf215546Sopenharmony_ci return false; 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci int consume_slot = gpir_instr_get_consume_slot(instr, node); 100bf215546Sopenharmony_ci int non_cplx_consume_slot = 101bf215546Sopenharmony_ci node->sched.pos == GPIR_INSTR_SLOT_COMPLEX ? 0 : consume_slot; 102bf215546Sopenharmony_ci int store_reduce_slot = 0; 103bf215546Sopenharmony_ci int non_cplx_store_reduce_slot = 0; 104bf215546Sopenharmony_ci int max_reduce_slot = node->sched.max_node ? 1 : 0; 105bf215546Sopenharmony_ci int next_max_reduce_slot = node->sched.next_max_node ? 1 : 0; 106bf215546Sopenharmony_ci int alu_new_max_allowed_next_max = 107bf215546Sopenharmony_ci node->op == gpir_op_complex1 ? 4 : instr->alu_max_allowed_next_max; 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci /* check if this node is child of one store node. 110bf215546Sopenharmony_ci * complex1 won't be any of this instr's store node's child, 111bf215546Sopenharmony_ci * because it has two instr latency before store can use it. 112bf215546Sopenharmony_ci */ 113bf215546Sopenharmony_ci for (int i = GPIR_INSTR_SLOT_STORE0; i <= GPIR_INSTR_SLOT_STORE3; i++) { 114bf215546Sopenharmony_ci gpir_store_node *s = gpir_node_to_store(instr->slots[i]); 115bf215546Sopenharmony_ci if (s && s->child == node) { 116bf215546Sopenharmony_ci store_reduce_slot = 1; 117bf215546Sopenharmony_ci if (node->sched.next_max_node && !node->sched.complex_allowed) 118bf215546Sopenharmony_ci non_cplx_store_reduce_slot = 1; 119bf215546Sopenharmony_ci break; 120bf215546Sopenharmony_ci } 121bf215546Sopenharmony_ci } 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_ci /* Check that the invariants will be maintained after we adjust everything 124bf215546Sopenharmony_ci */ 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci int slot_difference = 127bf215546Sopenharmony_ci instr->alu_num_slot_needed_by_store - store_reduce_slot + 128bf215546Sopenharmony_ci instr->alu_num_slot_needed_by_max - max_reduce_slot + 129bf215546Sopenharmony_ci MAX2(instr->alu_num_unscheduled_next_max - next_max_reduce_slot - 130bf215546Sopenharmony_ci alu_new_max_allowed_next_max, 0) - 131bf215546Sopenharmony_ci (instr->alu_num_slot_free - consume_slot); 132bf215546Sopenharmony_ci if (slot_difference > 0) { 133bf215546Sopenharmony_ci gpir_debug("failed %d because of alu slot\n", node->index); 134bf215546Sopenharmony_ci instr->slot_difference = slot_difference; 135bf215546Sopenharmony_ci } 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci int non_cplx_slot_difference = 138bf215546Sopenharmony_ci instr->alu_num_slot_needed_by_max - max_reduce_slot + 139bf215546Sopenharmony_ci instr->alu_num_slot_needed_by_non_cplx_store - non_cplx_store_reduce_slot - 140bf215546Sopenharmony_ci (instr->alu_non_cplx_slot_free - non_cplx_consume_slot); 141bf215546Sopenharmony_ci if (non_cplx_slot_difference > 0) { 142bf215546Sopenharmony_ci gpir_debug("failed %d because of alu slot\n", node->index); 143bf215546Sopenharmony_ci instr->non_cplx_slot_difference = non_cplx_slot_difference; 144bf215546Sopenharmony_ci } 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci if (slot_difference > 0 || non_cplx_slot_difference > 0) 147bf215546Sopenharmony_ci return false; 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci instr->alu_num_slot_free -= consume_slot; 150bf215546Sopenharmony_ci instr->alu_non_cplx_slot_free -= non_cplx_consume_slot; 151bf215546Sopenharmony_ci instr->alu_num_slot_needed_by_store -= store_reduce_slot; 152bf215546Sopenharmony_ci instr->alu_num_slot_needed_by_non_cplx_store -= non_cplx_store_reduce_slot; 153bf215546Sopenharmony_ci instr->alu_num_slot_needed_by_max -= max_reduce_slot; 154bf215546Sopenharmony_ci instr->alu_num_unscheduled_next_max -= next_max_reduce_slot; 155bf215546Sopenharmony_ci instr->alu_max_allowed_next_max = alu_new_max_allowed_next_max; 156bf215546Sopenharmony_ci return true; 157bf215546Sopenharmony_ci} 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_cistatic void gpir_instr_remove_alu(gpir_instr *instr, gpir_node *node) 160bf215546Sopenharmony_ci{ 161bf215546Sopenharmony_ci int consume_slot = gpir_instr_get_consume_slot(instr, node); 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci for (int i = GPIR_INSTR_SLOT_STORE0; i <= GPIR_INSTR_SLOT_STORE3; i++) { 164bf215546Sopenharmony_ci gpir_store_node *s = gpir_node_to_store(instr->slots[i]); 165bf215546Sopenharmony_ci if (s && s->child == node) { 166bf215546Sopenharmony_ci instr->alu_num_slot_needed_by_store++; 167bf215546Sopenharmony_ci if (node->sched.next_max_node && !node->sched.complex_allowed) 168bf215546Sopenharmony_ci instr->alu_num_slot_needed_by_non_cplx_store++; 169bf215546Sopenharmony_ci break; 170bf215546Sopenharmony_ci } 171bf215546Sopenharmony_ci } 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci instr->alu_num_slot_free += consume_slot; 174bf215546Sopenharmony_ci if (node->sched.pos != GPIR_INSTR_SLOT_COMPLEX) 175bf215546Sopenharmony_ci instr->alu_non_cplx_slot_free += consume_slot; 176bf215546Sopenharmony_ci if (node->sched.max_node) 177bf215546Sopenharmony_ci instr->alu_num_slot_needed_by_max++; 178bf215546Sopenharmony_ci if (node->sched.next_max_node) 179bf215546Sopenharmony_ci instr->alu_num_unscheduled_next_max++; 180bf215546Sopenharmony_ci if (node->op == gpir_op_complex1) 181bf215546Sopenharmony_ci instr->alu_max_allowed_next_max = 5; 182bf215546Sopenharmony_ci} 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_cistatic bool gpir_instr_insert_reg0_check(gpir_instr *instr, gpir_node *node) 185bf215546Sopenharmony_ci{ 186bf215546Sopenharmony_ci gpir_load_node *load = gpir_node_to_load(node); 187bf215546Sopenharmony_ci int i = node->sched.pos - GPIR_INSTR_SLOT_REG0_LOAD0; 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci if (load->component != i) 190bf215546Sopenharmony_ci return false; 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci if (instr->reg0_is_attr && node->op != gpir_op_load_attribute) 193bf215546Sopenharmony_ci return false; 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci if (instr->reg0_use_count) { 196bf215546Sopenharmony_ci if (instr->reg0_index != load->index) 197bf215546Sopenharmony_ci return false; 198bf215546Sopenharmony_ci } 199bf215546Sopenharmony_ci else { 200bf215546Sopenharmony_ci instr->reg0_is_attr = node->op == gpir_op_load_attribute; 201bf215546Sopenharmony_ci instr->reg0_index = load->index; 202bf215546Sopenharmony_ci } 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci instr->reg0_use_count++; 205bf215546Sopenharmony_ci return true; 206bf215546Sopenharmony_ci} 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_cistatic void gpir_instr_remove_reg0(gpir_instr *instr, gpir_node *node) 209bf215546Sopenharmony_ci{ 210bf215546Sopenharmony_ci instr->reg0_use_count--; 211bf215546Sopenharmony_ci if (!instr->reg0_use_count) 212bf215546Sopenharmony_ci instr->reg0_is_attr = false; 213bf215546Sopenharmony_ci} 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_cistatic bool gpir_instr_insert_reg1_check(gpir_instr *instr, gpir_node *node) 216bf215546Sopenharmony_ci{ 217bf215546Sopenharmony_ci gpir_load_node *load = gpir_node_to_load(node); 218bf215546Sopenharmony_ci int i = node->sched.pos - GPIR_INSTR_SLOT_REG1_LOAD0; 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci if (load->component != i) 221bf215546Sopenharmony_ci return false; 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci if (instr->reg1_use_count) { 224bf215546Sopenharmony_ci if (instr->reg1_index != load->index) 225bf215546Sopenharmony_ci return false; 226bf215546Sopenharmony_ci } 227bf215546Sopenharmony_ci else 228bf215546Sopenharmony_ci instr->reg1_index = load->index; 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci instr->reg1_use_count++; 231bf215546Sopenharmony_ci return true; 232bf215546Sopenharmony_ci} 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_cistatic void gpir_instr_remove_reg1(gpir_instr *instr, gpir_node *node) 235bf215546Sopenharmony_ci{ 236bf215546Sopenharmony_ci instr->reg1_use_count--; 237bf215546Sopenharmony_ci} 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_cistatic bool gpir_instr_insert_mem_check(gpir_instr *instr, gpir_node *node) 240bf215546Sopenharmony_ci{ 241bf215546Sopenharmony_ci gpir_load_node *load = gpir_node_to_load(node); 242bf215546Sopenharmony_ci int i = node->sched.pos - GPIR_INSTR_SLOT_MEM_LOAD0; 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_ci if (load->component != i) 245bf215546Sopenharmony_ci return false; 246bf215546Sopenharmony_ci 247bf215546Sopenharmony_ci if (instr->mem_is_temp && node->op != gpir_op_load_temp) 248bf215546Sopenharmony_ci return false; 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci if (instr->mem_use_count) { 251bf215546Sopenharmony_ci if (instr->mem_index != load->index) 252bf215546Sopenharmony_ci return false; 253bf215546Sopenharmony_ci } 254bf215546Sopenharmony_ci else { 255bf215546Sopenharmony_ci instr->mem_is_temp = node->op == gpir_op_load_temp; 256bf215546Sopenharmony_ci instr->mem_index = load->index; 257bf215546Sopenharmony_ci } 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci instr->mem_use_count++; 260bf215546Sopenharmony_ci return true; 261bf215546Sopenharmony_ci} 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_cistatic void gpir_instr_remove_mem(gpir_instr *instr, gpir_node *node) 264bf215546Sopenharmony_ci{ 265bf215546Sopenharmony_ci instr->mem_use_count--; 266bf215546Sopenharmony_ci if (!instr->mem_use_count) 267bf215546Sopenharmony_ci instr->mem_is_temp = false; 268bf215546Sopenharmony_ci} 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_cistatic bool gpir_instr_insert_store_check(gpir_instr *instr, gpir_node *node) 271bf215546Sopenharmony_ci{ 272bf215546Sopenharmony_ci gpir_store_node *store = gpir_node_to_store(node); 273bf215546Sopenharmony_ci int i = node->sched.pos - GPIR_INSTR_SLOT_STORE0; 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci if (store->component != i) 276bf215546Sopenharmony_ci return false; 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci i >>= 1; 279bf215546Sopenharmony_ci switch (instr->store_content[i]) { 280bf215546Sopenharmony_ci case GPIR_INSTR_STORE_NONE: 281bf215546Sopenharmony_ci /* store temp has only one address reg for two store unit */ 282bf215546Sopenharmony_ci if (node->op == gpir_op_store_temp && 283bf215546Sopenharmony_ci instr->store_content[!i] == GPIR_INSTR_STORE_TEMP && 284bf215546Sopenharmony_ci instr->store_index[!i] != store->index) 285bf215546Sopenharmony_ci return false; 286bf215546Sopenharmony_ci break; 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci case GPIR_INSTR_STORE_VARYING: 289bf215546Sopenharmony_ci if (node->op != gpir_op_store_varying || 290bf215546Sopenharmony_ci instr->store_index[i] != store->index) 291bf215546Sopenharmony_ci return false; 292bf215546Sopenharmony_ci break; 293bf215546Sopenharmony_ci 294bf215546Sopenharmony_ci case GPIR_INSTR_STORE_REG: 295bf215546Sopenharmony_ci if (node->op != gpir_op_store_reg || 296bf215546Sopenharmony_ci instr->store_index[i] != store->index) 297bf215546Sopenharmony_ci return false; 298bf215546Sopenharmony_ci break; 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci case GPIR_INSTR_STORE_TEMP: 301bf215546Sopenharmony_ci if (node->op != gpir_op_store_temp || 302bf215546Sopenharmony_ci instr->store_index[i] != store->index) 303bf215546Sopenharmony_ci return false; 304bf215546Sopenharmony_ci break; 305bf215546Sopenharmony_ci } 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_ci /* check if any store node has the same child as this node */ 308bf215546Sopenharmony_ci for (int j = GPIR_INSTR_SLOT_STORE0; j <= GPIR_INSTR_SLOT_STORE3; j++) { 309bf215546Sopenharmony_ci gpir_store_node *s = gpir_node_to_store(instr->slots[j]); 310bf215546Sopenharmony_ci if (s && s->child == store->child) 311bf215546Sopenharmony_ci goto out; 312bf215546Sopenharmony_ci } 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci /* check if the child is already in this instr's alu slot, 315bf215546Sopenharmony_ci * this may happen when store an scheduled alu node to reg 316bf215546Sopenharmony_ci */ 317bf215546Sopenharmony_ci for (int j = GPIR_INSTR_SLOT_ALU_BEGIN; j <= GPIR_INSTR_SLOT_ALU_END; j++) { 318bf215546Sopenharmony_ci if (store->child == instr->slots[j]) 319bf215546Sopenharmony_ci goto out; 320bf215546Sopenharmony_ci } 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci /* Check the invariants documented in gpir.h, similar to the ALU case. 323bf215546Sopenharmony_ci * When the only thing that changes is alu_num_slot_needed_by_store, we 324bf215546Sopenharmony_ci * can get away with just checking the first one. 325bf215546Sopenharmony_ci */ 326bf215546Sopenharmony_ci int slot_difference = instr->alu_num_slot_needed_by_store + 1 327bf215546Sopenharmony_ci + instr->alu_num_slot_needed_by_max + 328bf215546Sopenharmony_ci MAX2(instr->alu_num_unscheduled_next_max - instr->alu_max_allowed_next_max, 0) - 329bf215546Sopenharmony_ci instr->alu_num_slot_free; 330bf215546Sopenharmony_ci if (slot_difference > 0) { 331bf215546Sopenharmony_ci instr->slot_difference = slot_difference; 332bf215546Sopenharmony_ci return false; 333bf215546Sopenharmony_ci } 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci if (store->child->sched.next_max_node && 336bf215546Sopenharmony_ci !store->child->sched.complex_allowed) { 337bf215546Sopenharmony_ci /* The child of the store is already partially ready, and has a use one 338bf215546Sopenharmony_ci * cycle ago that disqualifies it (or a move replacing it) from being 339bf215546Sopenharmony_ci * put in the complex slot. Therefore we have to check the non-complex 340bf215546Sopenharmony_ci * invariant. 341bf215546Sopenharmony_ci */ 342bf215546Sopenharmony_ci int non_cplx_slot_difference = 343bf215546Sopenharmony_ci instr->alu_num_slot_needed_by_max + 344bf215546Sopenharmony_ci instr->alu_num_slot_needed_by_non_cplx_store + 1 - 345bf215546Sopenharmony_ci instr->alu_non_cplx_slot_free; 346bf215546Sopenharmony_ci if (non_cplx_slot_difference > 0) { 347bf215546Sopenharmony_ci instr->non_cplx_slot_difference = non_cplx_slot_difference; 348bf215546Sopenharmony_ci return false; 349bf215546Sopenharmony_ci } 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci instr->alu_num_slot_needed_by_non_cplx_store++; 352bf215546Sopenharmony_ci } 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci instr->alu_num_slot_needed_by_store++; 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ciout: 357bf215546Sopenharmony_ci if (instr->store_content[i] == GPIR_INSTR_STORE_NONE) { 358bf215546Sopenharmony_ci if (node->op == gpir_op_store_varying) 359bf215546Sopenharmony_ci instr->store_content[i] = GPIR_INSTR_STORE_VARYING; 360bf215546Sopenharmony_ci else if (node->op == gpir_op_store_reg) 361bf215546Sopenharmony_ci instr->store_content[i] = GPIR_INSTR_STORE_REG; 362bf215546Sopenharmony_ci else 363bf215546Sopenharmony_ci instr->store_content[i] = GPIR_INSTR_STORE_TEMP; 364bf215546Sopenharmony_ci 365bf215546Sopenharmony_ci instr->store_index[i] = store->index; 366bf215546Sopenharmony_ci } 367bf215546Sopenharmony_ci return true; 368bf215546Sopenharmony_ci} 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_cistatic void gpir_instr_remove_store(gpir_instr *instr, gpir_node *node) 371bf215546Sopenharmony_ci{ 372bf215546Sopenharmony_ci gpir_store_node *store = gpir_node_to_store(node); 373bf215546Sopenharmony_ci int component = node->sched.pos - GPIR_INSTR_SLOT_STORE0; 374bf215546Sopenharmony_ci int other_slot = GPIR_INSTR_SLOT_STORE0 + (component ^ 1); 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci for (int j = GPIR_INSTR_SLOT_STORE0; j <= GPIR_INSTR_SLOT_STORE3; j++) { 377bf215546Sopenharmony_ci if (j == node->sched.pos) 378bf215546Sopenharmony_ci continue; 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci gpir_store_node *s = gpir_node_to_store(instr->slots[j]); 381bf215546Sopenharmony_ci if (s && s->child == store->child) 382bf215546Sopenharmony_ci goto out; 383bf215546Sopenharmony_ci } 384bf215546Sopenharmony_ci 385bf215546Sopenharmony_ci for (int j = GPIR_INSTR_SLOT_ALU_BEGIN; j <= GPIR_INSTR_SLOT_ALU_END; j++) { 386bf215546Sopenharmony_ci if (store->child == instr->slots[j]) 387bf215546Sopenharmony_ci goto out; 388bf215546Sopenharmony_ci } 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci instr->alu_num_slot_needed_by_store--; 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ci if (store->child->sched.next_max_node && 393bf215546Sopenharmony_ci !store->child->sched.complex_allowed) { 394bf215546Sopenharmony_ci instr->alu_num_slot_needed_by_non_cplx_store--; 395bf215546Sopenharmony_ci } 396bf215546Sopenharmony_ci 397bf215546Sopenharmony_ciout: 398bf215546Sopenharmony_ci if (!instr->slots[other_slot]) 399bf215546Sopenharmony_ci instr->store_content[component >> 1] = GPIR_INSTR_STORE_NONE; 400bf215546Sopenharmony_ci} 401bf215546Sopenharmony_ci 402bf215546Sopenharmony_cistatic bool gpir_instr_spill_move(gpir_instr *instr, int slot, int spill_to_start) 403bf215546Sopenharmony_ci{ 404bf215546Sopenharmony_ci gpir_node *node = instr->slots[slot]; 405bf215546Sopenharmony_ci if (!node) 406bf215546Sopenharmony_ci return true; 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_ci if (node->op != gpir_op_mov) 409bf215546Sopenharmony_ci return false; 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci for (int i = spill_to_start; i <= GPIR_INSTR_SLOT_DIST_TWO_END; i++) { 412bf215546Sopenharmony_ci if (i != slot && !instr->slots[i] && 413bf215546Sopenharmony_ci gpir_instr_check_acc_same_op(instr, node, i)) { 414bf215546Sopenharmony_ci instr->slots[i] = node; 415bf215546Sopenharmony_ci instr->slots[slot] = NULL; 416bf215546Sopenharmony_ci node->sched.pos = i; 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci gpir_debug("instr %d spill move %d from slot %d to %d\n", 419bf215546Sopenharmony_ci instr->index, node->index, slot, i); 420bf215546Sopenharmony_ci return true; 421bf215546Sopenharmony_ci } 422bf215546Sopenharmony_ci } 423bf215546Sopenharmony_ci 424bf215546Sopenharmony_ci return false; 425bf215546Sopenharmony_ci} 426bf215546Sopenharmony_ci 427bf215546Sopenharmony_cistatic bool gpir_instr_slot_free(gpir_instr *instr, gpir_node *node) 428bf215546Sopenharmony_ci{ 429bf215546Sopenharmony_ci if (node->op == gpir_op_mov || 430bf215546Sopenharmony_ci node->sched.pos > GPIR_INSTR_SLOT_DIST_TWO_END) { 431bf215546Sopenharmony_ci if (instr->slots[node->sched.pos]) 432bf215546Sopenharmony_ci return false; 433bf215546Sopenharmony_ci } 434bf215546Sopenharmony_ci else { 435bf215546Sopenharmony_ci /* for node needs dist two slot, if the slot has a move, we can 436bf215546Sopenharmony_ci * spill it to other dist two slot without any side effect */ 437bf215546Sopenharmony_ci int spill_to_start = GPIR_INSTR_SLOT_MUL0; 438bf215546Sopenharmony_ci if (node->op == gpir_op_complex1 || node->op == gpir_op_select) 439bf215546Sopenharmony_ci spill_to_start = GPIR_INSTR_SLOT_ADD0; 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_ci if (!gpir_instr_spill_move(instr, node->sched.pos, spill_to_start)) 442bf215546Sopenharmony_ci return false; 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci if (node->op == gpir_op_complex1 || node->op == gpir_op_select) { 445bf215546Sopenharmony_ci if (!gpir_instr_spill_move(instr, GPIR_INSTR_SLOT_MUL1, spill_to_start)) 446bf215546Sopenharmony_ci return false; 447bf215546Sopenharmony_ci } 448bf215546Sopenharmony_ci } 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci return true; 451bf215546Sopenharmony_ci} 452bf215546Sopenharmony_ci 453bf215546Sopenharmony_cibool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node) 454bf215546Sopenharmony_ci{ 455bf215546Sopenharmony_ci instr->slot_difference = 0; 456bf215546Sopenharmony_ci instr->non_cplx_slot_difference = 0; 457bf215546Sopenharmony_ci 458bf215546Sopenharmony_ci if (!gpir_instr_slot_free(instr, node)) 459bf215546Sopenharmony_ci return false; 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_ci if (node->sched.pos >= GPIR_INSTR_SLOT_ALU_BEGIN && 462bf215546Sopenharmony_ci node->sched.pos <= GPIR_INSTR_SLOT_ALU_END) { 463bf215546Sopenharmony_ci if (!gpir_instr_insert_alu_check(instr, node)) 464bf215546Sopenharmony_ci return false; 465bf215546Sopenharmony_ci } 466bf215546Sopenharmony_ci else if (node->sched.pos >= GPIR_INSTR_SLOT_REG0_LOAD0 && 467bf215546Sopenharmony_ci node->sched.pos <= GPIR_INSTR_SLOT_REG0_LOAD3) { 468bf215546Sopenharmony_ci if (!gpir_instr_insert_reg0_check(instr, node)) 469bf215546Sopenharmony_ci return false; 470bf215546Sopenharmony_ci } 471bf215546Sopenharmony_ci else if (node->sched.pos >= GPIR_INSTR_SLOT_REG1_LOAD0 && 472bf215546Sopenharmony_ci node->sched.pos <= GPIR_INSTR_SLOT_REG1_LOAD3) { 473bf215546Sopenharmony_ci if (!gpir_instr_insert_reg1_check(instr, node)) 474bf215546Sopenharmony_ci return false; 475bf215546Sopenharmony_ci } 476bf215546Sopenharmony_ci else if (node->sched.pos >= GPIR_INSTR_SLOT_MEM_LOAD0 && 477bf215546Sopenharmony_ci node->sched.pos <= GPIR_INSTR_SLOT_MEM_LOAD3) { 478bf215546Sopenharmony_ci if (!gpir_instr_insert_mem_check(instr, node)) 479bf215546Sopenharmony_ci return false; 480bf215546Sopenharmony_ci } 481bf215546Sopenharmony_ci else if (node->sched.pos >= GPIR_INSTR_SLOT_STORE0 && 482bf215546Sopenharmony_ci node->sched.pos <= GPIR_INSTR_SLOT_STORE3) { 483bf215546Sopenharmony_ci if (!gpir_instr_insert_store_check(instr, node)) 484bf215546Sopenharmony_ci return false; 485bf215546Sopenharmony_ci } 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci instr->slots[node->sched.pos] = node; 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci if (node->op == gpir_op_complex1 || node->op == gpir_op_select) 490bf215546Sopenharmony_ci instr->slots[GPIR_INSTR_SLOT_MUL1] = node; 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci return true; 493bf215546Sopenharmony_ci} 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_civoid gpir_instr_remove_node(gpir_instr *instr, gpir_node *node) 496bf215546Sopenharmony_ci{ 497bf215546Sopenharmony_ci assert(node->sched.pos >= 0); 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_ci /* This can happen if we merge duplicate loads in the scheduler. */ 500bf215546Sopenharmony_ci if (instr->slots[node->sched.pos] != node) { 501bf215546Sopenharmony_ci node->sched.pos = -1; 502bf215546Sopenharmony_ci node->sched.instr = NULL; 503bf215546Sopenharmony_ci return; 504bf215546Sopenharmony_ci } 505bf215546Sopenharmony_ci 506bf215546Sopenharmony_ci if (node->sched.pos >= GPIR_INSTR_SLOT_ALU_BEGIN && 507bf215546Sopenharmony_ci node->sched.pos <= GPIR_INSTR_SLOT_ALU_END) 508bf215546Sopenharmony_ci gpir_instr_remove_alu(instr, node); 509bf215546Sopenharmony_ci else if (node->sched.pos >= GPIR_INSTR_SLOT_REG0_LOAD0 && 510bf215546Sopenharmony_ci node->sched.pos <= GPIR_INSTR_SLOT_REG0_LOAD3) 511bf215546Sopenharmony_ci gpir_instr_remove_reg0(instr, node); 512bf215546Sopenharmony_ci else if (node->sched.pos >= GPIR_INSTR_SLOT_REG1_LOAD0 && 513bf215546Sopenharmony_ci node->sched.pos <= GPIR_INSTR_SLOT_REG1_LOAD3) 514bf215546Sopenharmony_ci gpir_instr_remove_reg1(instr, node); 515bf215546Sopenharmony_ci else if (node->sched.pos >= GPIR_INSTR_SLOT_MEM_LOAD0 && 516bf215546Sopenharmony_ci node->sched.pos <= GPIR_INSTR_SLOT_MEM_LOAD3) 517bf215546Sopenharmony_ci gpir_instr_remove_mem(instr, node); 518bf215546Sopenharmony_ci else if (node->sched.pos >= GPIR_INSTR_SLOT_STORE0 && 519bf215546Sopenharmony_ci node->sched.pos <= GPIR_INSTR_SLOT_STORE3) 520bf215546Sopenharmony_ci gpir_instr_remove_store(instr, node); 521bf215546Sopenharmony_ci 522bf215546Sopenharmony_ci instr->slots[node->sched.pos] = NULL; 523bf215546Sopenharmony_ci 524bf215546Sopenharmony_ci if (node->op == gpir_op_complex1 || node->op == gpir_op_select) 525bf215546Sopenharmony_ci instr->slots[GPIR_INSTR_SLOT_MUL1] = NULL; 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_ci node->sched.pos = -1; 528bf215546Sopenharmony_ci node->sched.instr = NULL; 529bf215546Sopenharmony_ci} 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_civoid gpir_instr_print_prog(gpir_compiler *comp) 532bf215546Sopenharmony_ci{ 533bf215546Sopenharmony_ci struct { 534bf215546Sopenharmony_ci int len; 535bf215546Sopenharmony_ci char *name; 536bf215546Sopenharmony_ci } fields[] = { 537bf215546Sopenharmony_ci [GPIR_INSTR_SLOT_MUL0] = { 4, "mul0" }, 538bf215546Sopenharmony_ci [GPIR_INSTR_SLOT_MUL1] = { 4, "mul1" }, 539bf215546Sopenharmony_ci [GPIR_INSTR_SLOT_ADD0] = { 4, "add0" }, 540bf215546Sopenharmony_ci [GPIR_INSTR_SLOT_ADD1] = { 4, "add1" }, 541bf215546Sopenharmony_ci [GPIR_INSTR_SLOT_REG0_LOAD3] = { 15, "load0" }, 542bf215546Sopenharmony_ci [GPIR_INSTR_SLOT_REG1_LOAD3] = { 15, "load1" }, 543bf215546Sopenharmony_ci [GPIR_INSTR_SLOT_MEM_LOAD3] = { 15, "load2" }, 544bf215546Sopenharmony_ci [GPIR_INSTR_SLOT_STORE3] = { 15, "store" }, 545bf215546Sopenharmony_ci [GPIR_INSTR_SLOT_COMPLEX] = { 4, "cmpl" }, 546bf215546Sopenharmony_ci [GPIR_INSTR_SLOT_PASS] = { 4, "pass" }, 547bf215546Sopenharmony_ci }; 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_ci printf("========prog instr========\n"); 550bf215546Sopenharmony_ci printf(" "); 551bf215546Sopenharmony_ci for (int i = 0; i < GPIR_INSTR_SLOT_NUM; i++) { 552bf215546Sopenharmony_ci if (fields[i].len) 553bf215546Sopenharmony_ci printf("%-*s ", fields[i].len, fields[i].name); 554bf215546Sopenharmony_ci } 555bf215546Sopenharmony_ci printf("\n"); 556bf215546Sopenharmony_ci 557bf215546Sopenharmony_ci int index = 0; 558bf215546Sopenharmony_ci list_for_each_entry(gpir_block, block, &comp->block_list, list) { 559bf215546Sopenharmony_ci list_for_each_entry(gpir_instr, instr, &block->instr_list, list) { 560bf215546Sopenharmony_ci printf("%03d: ", index++); 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci char buff[16] = "null"; 563bf215546Sopenharmony_ci int start = 0; 564bf215546Sopenharmony_ci for (int j = 0; j < GPIR_INSTR_SLOT_NUM; j++) { 565bf215546Sopenharmony_ci gpir_node *node = instr->slots[j]; 566bf215546Sopenharmony_ci if (fields[j].len) { 567bf215546Sopenharmony_ci if (node) 568bf215546Sopenharmony_ci snprintf(buff + start, sizeof(buff) - start, "%d", node->index); 569bf215546Sopenharmony_ci printf("%-*s ", fields[j].len, buff); 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci strcpy(buff, "null"); 572bf215546Sopenharmony_ci start = 0; 573bf215546Sopenharmony_ci } 574bf215546Sopenharmony_ci else { 575bf215546Sopenharmony_ci if (node) 576bf215546Sopenharmony_ci start += snprintf(buff + start, sizeof(buff) - start, "%d", node->index); 577bf215546Sopenharmony_ci start += snprintf(buff + start, sizeof(buff) - start, "|"); 578bf215546Sopenharmony_ci } 579bf215546Sopenharmony_ci } 580bf215546Sopenharmony_ci printf("\n"); 581bf215546Sopenharmony_ci } 582bf215546Sopenharmony_ci printf("-----------------------\n"); 583bf215546Sopenharmony_ci } 584bf215546Sopenharmony_ci printf("==========================\n"); 585bf215546Sopenharmony_ci} 586