1/* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27#define PPH_DEBUG 0 28 29#if PPH_DEBUG 30#define PPH_DUMP(q) do { q } while (0) 31#else 32#define PPH_DUMP(q) 33#endif 34 35#include "sb_shader.h" 36#include "sb_pass.h" 37 38namespace r600_sb { 39 40int peephole::run() { 41 42 run_on(sh.root); 43 44 return 0; 45} 46 47void peephole::run_on(container_node* c) { 48 49 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { 50 node *n = *I; 51 52 if (n->is_container()) 53 run_on(static_cast<container_node*>(n)); 54 else { 55 if (n->is_fetch_inst() && (n->fetch_op_flags() & FF_GDS)) { 56 fetch_node *f = static_cast<fetch_node*>(n); 57 bool has_dst = false; 58 59 for(vvec::iterator I = f->dst.begin(), E = f->dst.end(); I != E; ++I) { 60 value *v = *I; 61 if (v) 62 has_dst = true; 63 } 64 if (!has_dst) 65 if (f->bc.op >= FETCH_OP_GDS_ADD_RET && f->bc.op <= FETCH_OP_GDS_USHORT_READ_RET) 66 f->bc.set_op(f->bc.op - FETCH_OP_GDS_ADD_RET + FETCH_OP_GDS_ADD); 67 } 68 if (n->is_alu_inst()) { 69 alu_node *a = static_cast<alu_node*>(n); 70 71 if (a->bc.op_ptr->flags & AF_LDS) { 72 if (!a->dst[0]) { 73 if (a->bc.op >= LDS_OP2_LDS_ADD_RET && a->bc.op <= LDS_OP3_LDS_MSKOR_RET) 74 a->bc.set_op(a->bc.op - LDS_OP2_LDS_ADD_RET + LDS_OP2_LDS_ADD); 75 if (a->bc.op == LDS_OP1_LDS_READ_RET) 76 a->src[0] = sh.get_undef_value(); 77 } 78 } else if (a->bc.op_ptr->flags & 79 (AF_PRED | AF_SET | AF_CMOV | AF_KILL)) { 80 optimize_cc_op(a); 81 } else if (a->bc.op == ALU_OP1_FLT_TO_INT) { 82 83 alu_node *s = a; 84 if (get_bool_flt_to_int_source(s)) { 85 convert_float_setcc(a, s); 86 } 87 } 88 } 89 } 90 } 91} 92 93void peephole::optimize_cc_op(alu_node* a) { 94 unsigned aflags = a->bc.op_ptr->flags; 95 96 if (aflags & (AF_PRED | AF_SET | AF_KILL)) { 97 optimize_cc_op2(a); 98 } else if (aflags & AF_CMOV) { 99 optimize_CNDcc_op(a); 100 } 101} 102 103void peephole::convert_float_setcc(alu_node *f2i, alu_node *s) { 104 alu_node *ns = sh.clone(s); 105 106 ns->dst[0] = f2i->dst[0]; 107 ns->dst[0]->def = ns; 108 ns->bc.set_op(ns->bc.op + (ALU_OP2_SETE_DX10 - ALU_OP2_SETE)); 109 f2i->insert_after(ns); 110 f2i->remove(); 111} 112 113void peephole::optimize_cc_op2(alu_node* a) { 114 115 unsigned flags = a->bc.op_ptr->flags; 116 unsigned cc = flags & AF_CC_MASK; 117 118 if ((cc != AF_CC_E && cc != AF_CC_NE) || a->pred) 119 return; 120 121 unsigned cmp_type = flags & AF_CMP_TYPE_MASK; 122 unsigned dst_type = flags & AF_DST_TYPE_MASK; 123 124 int op_kind = (flags & AF_PRED) ? 1 : 125 (flags & AF_SET) ? 2 : 126 (flags & AF_KILL) ? 3 : 0; 127 128 bool swapped = false; 129 130 if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) { 131 std::swap(a->src[0],a->src[1]); 132 swapped = true; 133 // clear modifiers 134 a->bc.src[0].clear(); 135 a->bc.src[1].clear(); 136 } 137 138 if (swapped || (a->src[1]->is_const() && 139 a->src[1]->literal_value == literal(0))) { 140 141 value *s = a->src[0]; 142 143 bool_op_info bop = {}; 144 145 PPH_DUMP( 146 sblog << "cc_op2: "; 147 dump::dump_op(a); 148 sblog << "\n"; 149 ); 150 151 if (!get_bool_op_info(s, bop)) 152 return; 153 154 if (cc == AF_CC_E) 155 bop.invert = !bop.invert; 156 157 bool swap_args = false; 158 159 cc = bop.n->bc.op_ptr->flags & AF_CC_MASK; 160 161 if (bop.invert) 162 cc = invert_setcc_condition(cc, swap_args); 163 164 if (bop.int_cvt) { 165 assert(cmp_type != AF_FLOAT_CMP); 166 cmp_type = AF_FLOAT_CMP; 167 } 168 169 PPH_DUMP( 170 sblog << "boi node: "; 171 dump::dump_op(bop.n); 172 sblog << " invert: " << bop.invert << " int_cvt: " << bop.int_cvt; 173 sblog <<"\n"; 174 ); 175 176 unsigned newop; 177 178 switch(op_kind) { 179 case 1: 180 newop = get_predsetcc_op(cc, cmp_type); 181 break; 182 case 2: 183 newop = get_setcc_op(cc, cmp_type, dst_type != AF_FLOAT_DST); 184 break; 185 case 3: 186 newop = get_killcc_op(cc, cmp_type); 187 break; 188 default: 189 newop = ALU_OP0_NOP; 190 assert(!"invalid op kind"); 191 break; 192 } 193 194 a->bc.set_op(newop); 195 196 if (swap_args) { 197 a->src[0] = bop.n->src[1]; 198 a->src[1] = bop.n->src[0]; 199 a->bc.src[0] = bop.n->bc.src[1]; 200 a->bc.src[1] = bop.n->bc.src[0]; 201 202 } else { 203 a->src[0] = bop.n->src[0]; 204 a->src[1] = bop.n->src[1]; 205 a->bc.src[0] = bop.n->bc.src[0]; 206 a->bc.src[1] = bop.n->bc.src[1]; 207 } 208 } 209} 210 211void peephole::optimize_CNDcc_op(alu_node* a) { 212 unsigned flags = a->bc.op_ptr->flags; 213 unsigned cc = flags & AF_CC_MASK; 214 unsigned cmp_type = flags & AF_CMP_TYPE_MASK; 215 bool swap = false; 216 217 if (cc == AF_CC_E) { 218 swap = !swap; 219 cc = AF_CC_NE; 220 } else if (cc != AF_CC_NE) 221 return; 222 223 value *s = a->src[0]; 224 225 bool_op_info bop = {}; 226 227 PPH_DUMP( 228 sblog << "cndcc: "; 229 dump::dump_op(a); 230 sblog << "\n"; 231 ); 232 233 if (!get_bool_op_info(s, bop)) 234 return; 235 236 alu_node *d = bop.n; 237 238 if (d->bc.omod) 239 return; 240 241 PPH_DUMP( 242 sblog << "cndcc def: "; 243 dump::dump_op(d); 244 sblog << "\n"; 245 ); 246 247 248 unsigned dflags = d->bc.op_ptr->flags; 249 unsigned dcc = dflags & AF_CC_MASK; 250 unsigned dcmp_type = dflags & AF_CMP_TYPE_MASK; 251 unsigned ddst_type = dflags & AF_DST_TYPE_MASK; 252 int nds; 253 254 // TODO we can handle some of these cases, 255 // though probably this shouldn't happen 256 if (cmp_type != AF_FLOAT_CMP && ddst_type == AF_FLOAT_DST) 257 return; 258 259 if (d->src[0]->is_const() && d->src[0]->literal_value == literal(0)) 260 nds = 1; 261 else if ((d->src[1]->is_const() && 262 d->src[1]->literal_value == literal(0))) 263 nds = 0; 264 else 265 return; 266 267 // can't propagate ABS modifier to CNDcc because it's OP3 268 if (d->bc.src[nds].abs) 269 return; 270 271 // Don't create an instruction that uses three kcache values 272 // chances are high that it can't be scheduled 273 if (d->src[0]->is_kcache() && a->src[1]->is_kcache() && 274 a->src[2]->is_kcache()) 275 return; 276 277 // TODO we can handle some cases for uint comparison 278 if (dcmp_type == AF_UINT_CMP) 279 return; 280 281 if (dcc == AF_CC_NE) { 282 dcc = AF_CC_E; 283 swap = !swap; 284 } 285 286 if (nds == 1) { 287 switch (dcc) { 288 case AF_CC_GT: dcc = AF_CC_GE; swap = !swap; break; 289 case AF_CC_GE: dcc = AF_CC_GT; swap = !swap; break; 290 default: break; 291 } 292 } 293 294 a->src[0] = d->src[nds]; 295 a->bc.src[0] = d->bc.src[nds]; 296 297 if (swap) { 298 std::swap(a->src[1], a->src[2]); 299 std::swap(a->bc.src[1], a->bc.src[2]); 300 } 301 302 a->bc.set_op(get_cndcc_op(dcc, dcmp_type)); 303 304} 305 306bool peephole::get_bool_flt_to_int_source(alu_node* &a) { 307 308 if (a->bc.op == ALU_OP1_FLT_TO_INT) { 309 310 if (a->bc.src[0].neg || a->bc.src[0].abs || a->bc.src[0].rel) 311 return false; 312 313 value *s = a->src[0]; 314 if (!s || !s->def || !s->def->is_alu_inst()) 315 return false; 316 317 alu_node *dn = static_cast<alu_node*>(s->def); 318 319 if (dn->is_alu_op(ALU_OP1_TRUNC)) { 320 s = dn->src[0]; 321 if (!s || !s->def || !s->def->is_alu_inst()) 322 return false; 323 324 if (dn->bc.src[0].neg != 1 || dn->bc.src[0].abs != 0 || 325 dn->bc.src[0].rel != 0) { 326 return false; 327 } 328 329 dn = static_cast<alu_node*>(s->def); 330 331 } 332 333 if (dn->bc.op_ptr->flags & AF_SET) { 334 a = dn; 335 return true; 336 } 337 } 338 return false; 339} 340 341bool peephole::get_bool_op_info(value* b, bool_op_info& bop) { 342 343 node *d = b->def; 344 345 if (!d || !d->is_alu_inst()) 346 return false; 347 348 alu_node *dn = static_cast<alu_node*>(d); 349 350 if (dn->bc.op_ptr->flags & AF_SET) { 351 bop.n = dn; 352 353 if (dn->bc.op_ptr->flags & AF_DX10) 354 bop.int_cvt = true; 355 356 return true; 357 } 358 359 if (get_bool_flt_to_int_source(dn)) { 360 bop.n = dn; 361 bop.int_cvt = true; 362 return true; 363 } 364 365 return false; 366} 367 368} // namespace r600_sb 369