1/*
2 * Copyright (c) 2019 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25#include "gpir.h"
26
27/* Here we perform a few optimizations that can't currently be done in NIR:
28 *
29 * - Optimize the result of a conditional break/continue. In NIR something
30 *   like:
31 *
32 * loop {
33 *    ...
34 *    if (cond)
35 *       continue;
36 *
37 * would get lowered to:
38 *
39 * block_0:
40 * ...
41 * block_1:
42 * branch_cond !cond block_3
43 * block_2:
44 * branch_uncond block_0
45 * block_3:
46 * ...
47 *
48 *   We recognize the conditional branch skipping over the unconditional
49 *   branch, and turn it into:
50 *
51 * block_0:
52 * ...
53 * block_1:
54 * branch_cond cond block_0
55 * block_2:
56 * block_3:
57 * ...
58 *
59 * - Optimize away nots of comparisons as produced by lowering ifs to
60 *   branches, and nots of nots produced by the above optimization.
61 *
62 * - DCE
63 */
64
65static void
66optimize_branches(gpir_compiler *comp)
67{
68   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
69      /* Look for a block with a single unconditional branch. */
70      if (!list_is_singular(&block->node_list))
71         continue;
72
73      gpir_node *node = list_first_entry(&block->node_list, gpir_node, list);
74      if (node->op != gpir_op_branch_uncond)
75         continue;
76
77      gpir_block *target = gpir_node_to_branch(node)->dest;
78
79      /* Find the previous block */
80      if (block->list.prev == &comp->block_list)
81         continue;
82
83      gpir_block *prev_block = list_entry(block->list.prev, gpir_block, list);
84      if (list_is_empty(&prev_block->node_list))
85         continue;
86
87      /* Previous block must end with a conditional branch */
88      gpir_node *prev_block_last =
89         list_last_entry(&prev_block->node_list, gpir_node, list);
90      if (prev_block_last->op != gpir_op_branch_cond)
91         continue;
92
93      /* That branch must branch to the block after this */
94      gpir_branch_node *prev_branch = gpir_node_to_branch(prev_block_last);
95      gpir_block *prev_target = prev_branch->dest;
96
97      if (&prev_target->list != block->list.next)
98         continue;
99
100      /* Hooray! Invert the conditional branch and change the target */
101      gpir_alu_node *cond = gpir_node_create(prev_block, gpir_op_not);
102      cond->children[0] = prev_branch->cond;
103      cond->num_child = 1;
104      gpir_node_add_dep(&cond->node, cond->children[0], GPIR_DEP_INPUT);
105      list_addtail(&cond->node.list, &prev_block_last->list);
106      gpir_node_insert_child(prev_block_last, prev_branch->cond, &cond->node);
107      prev_branch->dest = target;
108      prev_block->successors[1] = target;
109
110      /* Delete the branch */
111      list_del(&node->list);
112      block->successors[0] = list_entry(block->list.next, gpir_block, list);
113   }
114}
115
116static void
117optimize_not(gpir_compiler *comp)
118{
119   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
120      list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
121         if (node->op != gpir_op_not)
122            continue;
123
124         gpir_alu_node *alu = gpir_node_to_alu(node);
125
126         gpir_node *replace = NULL;
127         if (alu->children[0]->op == gpir_op_not) {
128            /* (not (not a)) -> a */
129            gpir_alu_node *child = gpir_node_to_alu(alu->children[0]);
130            replace = child->children[0];
131         } else if (alu->children[0]->op == gpir_op_ge ||
132                    alu->children[0]->op == gpir_op_lt) {
133            /* (not (ge a, b)) -> (lt a, b) and
134             * (not (lt a, b)) -> (ge a, b)
135             */
136            gpir_alu_node *child = gpir_node_to_alu(alu->children[0]);
137            gpir_op op = alu->children[0]->op == gpir_op_ge ?
138               gpir_op_lt : gpir_op_ge;
139            gpir_alu_node *new = gpir_node_create(block, op);
140            new->children[0] = child->children[0];
141            new->children[1] = child->children[1];
142            new->num_child = 2;
143            gpir_node_add_dep(&new->node, new->children[0], GPIR_DEP_INPUT);
144            gpir_node_add_dep(&new->node, new->children[1], GPIR_DEP_INPUT);
145            list_addtail(&new->node.list, &alu->children[0]->list);
146            replace = &new->node;
147         }
148
149         if (replace) {
150            gpir_node_replace_succ(replace, node);
151         }
152      }
153   }
154}
155
156/* Does what it says. In addition to removing unused nodes from the not
157 * optimization above, we need this to remove unused load_const nodes which
158 * were created from store_output intrinsics in NIR, since we ignore the
159 * offset.
160 */
161
162static void
163dead_code_eliminate(gpir_compiler *comp)
164{
165   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
166      list_for_each_entry_safe_rev(gpir_node, node, &block->node_list, list) {
167         if (node->type != gpir_node_type_store &&
168             node->type != gpir_node_type_branch &&
169             list_is_empty(&node->succ_list)) {
170            gpir_node_delete(node);
171         }
172      }
173   }
174
175   /* Kill all the writes to regs that are never read. All the known
176    * instances of these are coming from the cycle-breaking register
177    * created in out-of-SSA. See resolve_parallel_copy() in nir_from_ssa.c
178    * Since we kill redundant movs when we translate nir into gpir, it
179    * results in this reg being written, but never read.
180    */
181   BITSET_WORD *regs = rzalloc_array(comp, BITSET_WORD, comp->cur_reg);
182   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
183      list_for_each_entry(gpir_node, node, &block->node_list, list) {
184         if (node->op != gpir_op_load_reg)
185            continue;
186         gpir_load_node *load = gpir_node_to_load(node);
187         BITSET_SET(regs, load->reg->index);
188      }
189   }
190
191   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
192      list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
193         if (node->op != gpir_op_store_reg)
194            continue;
195         gpir_store_node *store = gpir_node_to_store(node);
196         if (!BITSET_TEST(regs, store->reg->index))
197            gpir_node_delete(node);
198      }
199   }
200
201   ralloc_free(regs);
202}
203
204bool
205gpir_optimize(gpir_compiler *comp)
206{
207   optimize_branches(comp);
208   optimize_not(comp);
209   dead_code_eliminate(comp);
210
211   gpir_debug("after optimization\n");
212   gpir_node_print_prog_seq(comp);
213
214   return true;
215}
216
217