1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub 8bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom 9bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Vadim Girlin 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include <cmath> 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include "sb_shader.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_cinamespace r600_sb { 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_civalue* get_select_value_for_em(shader& sh, value* em) { 34bf215546Sopenharmony_ci if (!em->def) 35bf215546Sopenharmony_ci return NULL; 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci node *predset = em->def; 38bf215546Sopenharmony_ci if (!predset->is_pred_set()) 39bf215546Sopenharmony_ci return NULL; 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_ci alu_node *s = sh.clone(static_cast<alu_node*>(predset)); 42bf215546Sopenharmony_ci convert_predset_to_set(sh, s); 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ci predset->insert_after(s); 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci value* &d0 = s->dst[0]; 47bf215546Sopenharmony_ci d0 = sh.create_temp_value(); 48bf215546Sopenharmony_ci d0->def = s; 49bf215546Sopenharmony_ci return d0; 50bf215546Sopenharmony_ci} 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_civoid convert_to_mov(alu_node &n, value *src, bool neg, bool abs) { 53bf215546Sopenharmony_ci n.src.resize(1); 54bf215546Sopenharmony_ci n.src[0] = src; 55bf215546Sopenharmony_ci n.bc.src[0].abs = abs; 56bf215546Sopenharmony_ci n.bc.src[0].neg = neg; 57bf215546Sopenharmony_ci n.bc.set_op(ALU_OP1_MOV); 58bf215546Sopenharmony_ci} 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ciexpr_handler::expr_handler(shader& sh) : sh(sh), vt(sh.vt) {} 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_civalue * expr_handler::get_const(const literal &l) { 63bf215546Sopenharmony_ci value *v = sh.get_const_value(l); 64bf215546Sopenharmony_ci if (!v->gvn_source) 65bf215546Sopenharmony_ci vt.add_value(v); 66bf215546Sopenharmony_ci return v; 67bf215546Sopenharmony_ci} 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_civoid expr_handler::assign_source(value *dst, value *src) { 70bf215546Sopenharmony_ci dst->gvn_source = src->gvn_source; 71bf215546Sopenharmony_ci} 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_cibool expr_handler::equal(value *l, value *r) { 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci assert(l != r); 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci if (l->is_lds_access() || r->is_lds_access()) 78bf215546Sopenharmony_ci return false; 79bf215546Sopenharmony_ci if (l->gvalue() == r->gvalue()) 80bf215546Sopenharmony_ci return true; 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_ci if (l->def && r->def) 83bf215546Sopenharmony_ci return defs_equal(l, r); 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci if (l->is_rel() && r->is_rel()) 86bf215546Sopenharmony_ci return ivars_equal(l, r); 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci return false; 89bf215546Sopenharmony_ci} 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_cibool expr_handler::ivars_equal(value* l, value* r) { 92bf215546Sopenharmony_ci if (l->rel->gvalue() == r->rel->gvalue() 93bf215546Sopenharmony_ci && l->select == r->select) { 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci vvec &lv = l->mdef.empty() ? l->muse : l->mdef; 96bf215546Sopenharmony_ci vvec &rv = r->mdef.empty() ? r->muse : r->mdef; 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci // FIXME: replace this with more precise aliasing test 99bf215546Sopenharmony_ci return lv == rv; 100bf215546Sopenharmony_ci } 101bf215546Sopenharmony_ci return false; 102bf215546Sopenharmony_ci} 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_cibool expr_handler::defs_equal(value* l, value* r) { 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci node *d1 = l->def; 107bf215546Sopenharmony_ci node *d2 = r->def; 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci if (d1->type != d2->type || d1->subtype != d2->subtype) 110bf215546Sopenharmony_ci return false; 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci if (d1->is_pred_set() || d2->is_pred_set()) 113bf215546Sopenharmony_ci return false; 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci if (d1->type == NT_OP) { 116bf215546Sopenharmony_ci switch (d1->subtype) { 117bf215546Sopenharmony_ci case NST_ALU_INST: 118bf215546Sopenharmony_ci return ops_equal( 119bf215546Sopenharmony_ci static_cast<alu_node*>(d1), 120bf215546Sopenharmony_ci static_cast<alu_node*>(d2)); 121bf215546Sopenharmony_ci// case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1), 122bf215546Sopenharmony_ci// static_cast<fetch_node*>(d2); 123bf215546Sopenharmony_ci// case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1), 124bf215546Sopenharmony_ci// static_cast<cf_node*>(d2); 125bf215546Sopenharmony_ci default: 126bf215546Sopenharmony_ci break; 127bf215546Sopenharmony_ci } 128bf215546Sopenharmony_ci } 129bf215546Sopenharmony_ci return false; 130bf215546Sopenharmony_ci} 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_cibool expr_handler::try_fold(value* v) { 133bf215546Sopenharmony_ci assert(!v->gvn_source); 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci if (v->def) 136bf215546Sopenharmony_ci try_fold(v->def); 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci if (v->gvn_source) 139bf215546Sopenharmony_ci return true; 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci return false; 142bf215546Sopenharmony_ci} 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_cibool expr_handler::try_fold(node* n) { 145bf215546Sopenharmony_ci return n->fold_dispatch(this); 146bf215546Sopenharmony_ci} 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_cibool expr_handler::fold(node& n) { 149bf215546Sopenharmony_ci if (n.subtype == NST_PHI) { 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci value *s = n.src[0]; 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci // FIXME disabling phi folding for registers for now, otherwise we lose 154bf215546Sopenharmony_ci // control flow information in some cases 155bf215546Sopenharmony_ci // (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test) 156bf215546Sopenharmony_ci // probably control flow transformation is required to enable it 157bf215546Sopenharmony_ci if (s->is_sgpr()) 158bf215546Sopenharmony_ci return false; 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_ci for(vvec::iterator I = n.src.begin() + 1, E = n.src.end(); I != E; ++I) { 161bf215546Sopenharmony_ci value *v = *I; 162bf215546Sopenharmony_ci if (!s->v_equal(v)) 163bf215546Sopenharmony_ci return false; 164bf215546Sopenharmony_ci } 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci assign_source(n.dst[0], s); 167bf215546Sopenharmony_ci } else { 168bf215546Sopenharmony_ci assert(n.subtype == NST_PSI); 169bf215546Sopenharmony_ci assert(n.src.size() >= 6); 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci value *s = n.src[2]; 172bf215546Sopenharmony_ci assert(s->gvn_source); 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci for(vvec::iterator I = n.src.begin() + 3, E = n.src.end(); I != E; I += 3) { 175bf215546Sopenharmony_ci value *v = *(I+2); 176bf215546Sopenharmony_ci if (!s->v_equal(v)) 177bf215546Sopenharmony_ci return false; 178bf215546Sopenharmony_ci } 179bf215546Sopenharmony_ci assign_source(n.dst[0], s); 180bf215546Sopenharmony_ci } 181bf215546Sopenharmony_ci return true; 182bf215546Sopenharmony_ci} 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_cibool expr_handler::fold(container_node& n) { 185bf215546Sopenharmony_ci return false; 186bf215546Sopenharmony_ci} 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_cibool expr_handler::fold_setcc(alu_node &n) { 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci value* v0 = n.src[0]->gvalue(); 191bf215546Sopenharmony_ci value* v1 = n.src[1]->gvalue(); 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci assert(v0 && v1 && n.dst[0]); 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci unsigned flags = n.bc.op_ptr->flags; 196bf215546Sopenharmony_ci unsigned cc = flags & AF_CC_MASK; 197bf215546Sopenharmony_ci unsigned cmp_type = flags & AF_CMP_TYPE_MASK; 198bf215546Sopenharmony_ci unsigned dst_type = flags & AF_DST_TYPE_MASK; 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci bool cond_result; 201bf215546Sopenharmony_ci bool have_result = false; 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci bool isc0 = v0->is_const(); 204bf215546Sopenharmony_ci bool isc1 = v1->is_const(); 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci literal dv, cv0, cv1; 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci if (isc0) { 209bf215546Sopenharmony_ci cv0 = v0->get_const_value(); 210bf215546Sopenharmony_ci apply_alu_src_mod(n.bc, 0, cv0); 211bf215546Sopenharmony_ci } 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci if (isc1) { 214bf215546Sopenharmony_ci cv1 = v1->get_const_value(); 215bf215546Sopenharmony_ci apply_alu_src_mod(n.bc, 1, cv1); 216bf215546Sopenharmony_ci } 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci if (isc0 && isc1) { 219bf215546Sopenharmony_ci cond_result = evaluate_condition(flags, cv0, cv1); 220bf215546Sopenharmony_ci have_result = true; 221bf215546Sopenharmony_ci } else if (isc1) { 222bf215546Sopenharmony_ci if (cmp_type == AF_FLOAT_CMP) { 223bf215546Sopenharmony_ci if (n.bc.src[0].abs && !n.bc.src[0].neg) { 224bf215546Sopenharmony_ci if (cv1.f < 0.0f && cc == AF_CC_NE) { 225bf215546Sopenharmony_ci cond_result = true; 226bf215546Sopenharmony_ci have_result = true; 227bf215546Sopenharmony_ci } 228bf215546Sopenharmony_ci } else if (n.bc.src[0].abs && n.bc.src[0].neg) { 229bf215546Sopenharmony_ci if (cv1.f > 0.0f && cc == AF_CC_E) { 230bf215546Sopenharmony_ci cond_result = false; 231bf215546Sopenharmony_ci have_result = true; 232bf215546Sopenharmony_ci } 233bf215546Sopenharmony_ci } 234bf215546Sopenharmony_ci } else if (cmp_type == AF_UINT_CMP && cv1.u == 0 && cc == AF_CC_GE) { 235bf215546Sopenharmony_ci cond_result = true; 236bf215546Sopenharmony_ci have_result = true; 237bf215546Sopenharmony_ci } 238bf215546Sopenharmony_ci } else if (isc0) { 239bf215546Sopenharmony_ci if (cmp_type == AF_FLOAT_CMP) { 240bf215546Sopenharmony_ci if (n.bc.src[1].abs && !n.bc.src[1].neg) { 241bf215546Sopenharmony_ci if (cv0.f < 0.0f && (cc == AF_CC_E)) { 242bf215546Sopenharmony_ci cond_result = false; 243bf215546Sopenharmony_ci have_result = true; 244bf215546Sopenharmony_ci } 245bf215546Sopenharmony_ci } else if (n.bc.src[1].abs && n.bc.src[1].neg) { 246bf215546Sopenharmony_ci if (cv0.f > 0.0f && cc == AF_CC_NE) { 247bf215546Sopenharmony_ci cond_result = true; 248bf215546Sopenharmony_ci have_result = true; 249bf215546Sopenharmony_ci } 250bf215546Sopenharmony_ci } 251bf215546Sopenharmony_ci } else if (cmp_type == AF_UINT_CMP && cv0.u == 0 && cc == AF_CC_GT) { 252bf215546Sopenharmony_ci cond_result = false; 253bf215546Sopenharmony_ci have_result = true; 254bf215546Sopenharmony_ci } 255bf215546Sopenharmony_ci } else if (v0 == v1) { 256bf215546Sopenharmony_ci bc_alu_src &s0 = n.bc.src[0], &s1 = n.bc.src[1]; 257bf215546Sopenharmony_ci if (s0.abs == s1.abs && s0.neg == s1.neg && cmp_type != AF_FLOAT_CMP) { 258bf215546Sopenharmony_ci // NOTE can't handle float comparisons here because of NaNs 259bf215546Sopenharmony_ci cond_result = (cc == AF_CC_E || cc == AF_CC_GE); 260bf215546Sopenharmony_ci have_result = true; 261bf215546Sopenharmony_ci } 262bf215546Sopenharmony_ci } 263bf215546Sopenharmony_ci 264bf215546Sopenharmony_ci if (have_result) { 265bf215546Sopenharmony_ci literal result; 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci if (cond_result) 268bf215546Sopenharmony_ci result = dst_type != AF_FLOAT_DST ? 269bf215546Sopenharmony_ci literal(0xFFFFFFFFu) : literal(1.0f); 270bf215546Sopenharmony_ci else 271bf215546Sopenharmony_ci result = literal(0); 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_ci convert_to_mov(n, sh.get_const_value(result)); 274bf215546Sopenharmony_ci return fold_alu_op1(n); 275bf215546Sopenharmony_ci } 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci return false; 278bf215546Sopenharmony_ci} 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_cibool expr_handler::fold(alu_node& n) { 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_ci switch (n.bc.op_ptr->src_count) { 283bf215546Sopenharmony_ci case 1: return fold_alu_op1(n); 284bf215546Sopenharmony_ci case 2: return fold_alu_op2(n); 285bf215546Sopenharmony_ci case 3: return fold_alu_op3(n); 286bf215546Sopenharmony_ci default: 287bf215546Sopenharmony_ci assert(0); 288bf215546Sopenharmony_ci } 289bf215546Sopenharmony_ci return false; 290bf215546Sopenharmony_ci} 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_cibool expr_handler::fold(fetch_node& n) { 293bf215546Sopenharmony_ci 294bf215546Sopenharmony_ci unsigned chan = 0; 295bf215546Sopenharmony_ci for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) { 296bf215546Sopenharmony_ci value* &v = *I; 297bf215546Sopenharmony_ci if (v) { 298bf215546Sopenharmony_ci if (n.bc.dst_sel[chan] == SEL_0) 299bf215546Sopenharmony_ci assign_source(*I, get_const(0.0f)); 300bf215546Sopenharmony_ci else if (n.bc.dst_sel[chan] == SEL_1) 301bf215546Sopenharmony_ci assign_source(*I, get_const(1.0f)); 302bf215546Sopenharmony_ci } 303bf215546Sopenharmony_ci ++chan; 304bf215546Sopenharmony_ci } 305bf215546Sopenharmony_ci return false; 306bf215546Sopenharmony_ci} 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_cibool expr_handler::fold(cf_node& n) { 309bf215546Sopenharmony_ci return false; 310bf215546Sopenharmony_ci} 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_civoid expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src, 313bf215546Sopenharmony_ci literal &v) { 314bf215546Sopenharmony_ci const bc_alu_src &s = bc.src[src]; 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci if (s.abs) 317bf215546Sopenharmony_ci v = fabsf(v.f); 318bf215546Sopenharmony_ci if (s.neg) 319bf215546Sopenharmony_ci v = -v.f; 320bf215546Sopenharmony_ci} 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_civoid expr_handler::apply_alu_dst_mod(const bc_alu &bc, literal &v) { 323bf215546Sopenharmony_ci const float omod_coeff[] = {2.0f, 4.0, 0.5f}; 324bf215546Sopenharmony_ci 325bf215546Sopenharmony_ci if (bc.omod) 326bf215546Sopenharmony_ci v = v.f * omod_coeff[bc.omod - 1]; 327bf215546Sopenharmony_ci if (bc.clamp) 328bf215546Sopenharmony_ci v = float_clamp(v.f); 329bf215546Sopenharmony_ci} 330bf215546Sopenharmony_ci 331bf215546Sopenharmony_cibool expr_handler::args_equal(const vvec &l, const vvec &r) { 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_ci assert(l.size() == r.size()); 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci int s = l.size(); 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci for (int k = 0; k < s; ++k) { 338bf215546Sopenharmony_ci if (!l[k]->v_equal(r[k])) 339bf215546Sopenharmony_ci return false; 340bf215546Sopenharmony_ci } 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci return true; 343bf215546Sopenharmony_ci} 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_cibool expr_handler::ops_equal(const alu_node *l, const alu_node* r) { 346bf215546Sopenharmony_ci const bc_alu &b0 = l->bc; 347bf215546Sopenharmony_ci const bc_alu &b1 = r->bc; 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci if (b0.op != b1.op) 350bf215546Sopenharmony_ci return false; 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_ci unsigned src_count = b0.op_ptr->src_count; 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci if (b0.index_mode != b1.index_mode) 355bf215546Sopenharmony_ci return false; 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci if (b0.clamp != b1.clamp || b0.omod != b1.omod) 358bf215546Sopenharmony_ci return false; 359bf215546Sopenharmony_ci 360bf215546Sopenharmony_ci for (unsigned s = 0; s < src_count; ++s) { 361bf215546Sopenharmony_ci const bc_alu_src &s0 = b0.src[s]; 362bf215546Sopenharmony_ci const bc_alu_src &s1 = b1.src[s]; 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci if (s0.abs != s1.abs || s0.neg != s1.neg) 365bf215546Sopenharmony_ci return false; 366bf215546Sopenharmony_ci } 367bf215546Sopenharmony_ci return args_equal(l->src, r->src); 368bf215546Sopenharmony_ci} 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_cibool expr_handler::fold_alu_op1(alu_node& n) { 371bf215546Sopenharmony_ci 372bf215546Sopenharmony_ci assert(!n.src.empty()); 373bf215546Sopenharmony_ci if (n.src.empty()) 374bf215546Sopenharmony_ci return false; 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci /* don't fold LDS instructions */ 377bf215546Sopenharmony_ci if (n.bc.op_ptr->flags & AF_LDS) 378bf215546Sopenharmony_ci return false; 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci value* v0 = n.src[0]->gvalue(); 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci if (v0->is_lds_oq() || v0->is_lds_access()) 383bf215546Sopenharmony_ci return false; 384bf215546Sopenharmony_ci assert(v0 && n.dst[0]); 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_ci if (!v0->is_const()) { 387bf215546Sopenharmony_ci // handle (MOV -(MOV -x)) => (MOV x) 388bf215546Sopenharmony_ci if (n.bc.op == ALU_OP1_MOV && n.bc.src[0].neg && !n.bc.src[1].abs 389bf215546Sopenharmony_ci && v0->def && v0->def->is_alu_op(ALU_OP1_MOV)) { 390bf215546Sopenharmony_ci alu_node *sd = static_cast<alu_node*>(v0->def); 391bf215546Sopenharmony_ci if (!sd->bc.clamp && !sd->bc.omod && !sd->bc.src[0].abs && 392bf215546Sopenharmony_ci sd->bc.src[0].neg) { 393bf215546Sopenharmony_ci n.src[0] = sd->src[0]; 394bf215546Sopenharmony_ci n.bc.src[0].neg = 0; 395bf215546Sopenharmony_ci v0 = n.src[0]->gvalue(); 396bf215546Sopenharmony_ci } 397bf215546Sopenharmony_ci } 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_ci if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT || 400bf215546Sopenharmony_ci n.bc.op == ALU_OP1_MOVA_GPR_INT) 401bf215546Sopenharmony_ci && n.bc.clamp == 0 && n.bc.omod == 0 402bf215546Sopenharmony_ci && n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0 && 403bf215546Sopenharmony_ci n.src.size() == 1 /* RIM/SIM can be appended as additional values */ 404bf215546Sopenharmony_ci && n.dst[0]->no_reladdr_conflict_with(v0)) { 405bf215546Sopenharmony_ci assign_source(n.dst[0], v0); 406bf215546Sopenharmony_ci return true; 407bf215546Sopenharmony_ci } 408bf215546Sopenharmony_ci return false; 409bf215546Sopenharmony_ci } 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci literal dv, cv = v0->get_const_value(); 412bf215546Sopenharmony_ci apply_alu_src_mod(n.bc, 0, cv); 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci switch (n.bc.op) { 415bf215546Sopenharmony_ci case ALU_OP1_CEIL: dv = ceilf(cv.f); break; 416bf215546Sopenharmony_ci case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break; 417bf215546Sopenharmony_ci case ALU_OP1_EXP_IEEE: dv = exp2f(cv.f); break; 418bf215546Sopenharmony_ci case ALU_OP1_FLOOR: dv = floorf(cv.f); break; 419bf215546Sopenharmony_ci case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ???? 420bf215546Sopenharmony_ci case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floorf(cv.f); break; 421bf215546Sopenharmony_ci case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floorf(cv.f + 0.5f); break; 422bf215546Sopenharmony_ci case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)truncf(cv.f); break; 423bf215546Sopenharmony_ci case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break; 424bf215546Sopenharmony_ci case ALU_OP1_FRACT: dv = cv.f - floorf(cv.f); break; 425bf215546Sopenharmony_ci case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break; 426bf215546Sopenharmony_ci case ALU_OP1_LOG_CLAMPED: 427bf215546Sopenharmony_ci case ALU_OP1_LOG_IEEE: 428bf215546Sopenharmony_ci if (cv.f != 0.0f) 429bf215546Sopenharmony_ci dv = log2f(cv.f); 430bf215546Sopenharmony_ci else 431bf215546Sopenharmony_ci // don't fold to NAN, let the GPU handle it for now 432bf215546Sopenharmony_ci // (prevents degenerate LIT tests from failing) 433bf215546Sopenharmony_ci return false; 434bf215546Sopenharmony_ci break; 435bf215546Sopenharmony_ci case ALU_OP1_MOV: dv = cv; break; 436bf215546Sopenharmony_ci case ALU_OP1_MOVA_INT: dv = cv; break; // FIXME ??? 437bf215546Sopenharmony_ci// case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break; 438bf215546Sopenharmony_ci// case ALU_OP1_MOVA_GPR_INT: 439bf215546Sopenharmony_ci case ALU_OP1_NOT_INT: dv = ~cv.i; break; 440bf215546Sopenharmony_ci case ALU_OP1_PRED_SET_INV: 441bf215546Sopenharmony_ci dv = cv.f == 0.0f ? 1.0f : (cv.f == 1.0f ? 0.0f : cv.f); break; 442bf215546Sopenharmony_ci case ALU_OP1_PRED_SET_RESTORE: dv = cv; break; 443bf215546Sopenharmony_ci case ALU_OP1_RECIPSQRT_CLAMPED: 444bf215546Sopenharmony_ci case ALU_OP1_RECIPSQRT_FF: 445bf215546Sopenharmony_ci case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrtf(cv.f); break; 446bf215546Sopenharmony_ci case ALU_OP1_RECIP_CLAMPED: 447bf215546Sopenharmony_ci case ALU_OP1_RECIP_FF: 448bf215546Sopenharmony_ci case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break; 449bf215546Sopenharmony_ci// case ALU_OP1_RECIP_INT: 450bf215546Sopenharmony_ci case ALU_OP1_RECIP_UINT: { 451bf215546Sopenharmony_ci if (!cv.u) 452bf215546Sopenharmony_ci return false; 453bf215546Sopenharmony_ci dv.u = (1ull << 32) / cv.u; 454bf215546Sopenharmony_ci break; 455bf215546Sopenharmony_ci } 456bf215546Sopenharmony_ci // case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break; 457bf215546Sopenharmony_ci case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break; 458bf215546Sopenharmony_ci case ALU_OP1_SQRT_IEEE: dv = sqrtf(cv.f); break; 459bf215546Sopenharmony_ci case ALU_OP1_TRUNC: dv = truncf(cv.f); break; 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_ci default: 462bf215546Sopenharmony_ci return false; 463bf215546Sopenharmony_ci } 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci apply_alu_dst_mod(n.bc, dv); 466bf215546Sopenharmony_ci assign_source(n.dst[0], get_const(dv)); 467bf215546Sopenharmony_ci return true; 468bf215546Sopenharmony_ci} 469bf215546Sopenharmony_ci 470bf215546Sopenharmony_cibool expr_handler::fold_mul_add(alu_node *n) { 471bf215546Sopenharmony_ci 472bf215546Sopenharmony_ci bool ieee; 473bf215546Sopenharmony_ci value* v0 = n->src[0]->gvalue(); 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ci alu_node *d0 = (v0->def && v0->def->is_alu_inst()) ? 476bf215546Sopenharmony_ci static_cast<alu_node*>(v0->def) : NULL; 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_ci if (d0) { 479bf215546Sopenharmony_ci if (d0->is_alu_op(ALU_OP2_MUL_IEEE)) 480bf215546Sopenharmony_ci ieee = true; 481bf215546Sopenharmony_ci else if (d0->is_alu_op(ALU_OP2_MUL)) 482bf215546Sopenharmony_ci ieee = false; 483bf215546Sopenharmony_ci else 484bf215546Sopenharmony_ci return false; 485bf215546Sopenharmony_ci 486bf215546Sopenharmony_ci if (!d0->bc.src[0].abs && !d0->bc.src[1].abs && 487bf215546Sopenharmony_ci !n->bc.src[1].abs && !n->bc.src[0].abs && !d0->bc.omod && 488bf215546Sopenharmony_ci !d0->bc.clamp && !n->bc.omod && 489bf215546Sopenharmony_ci (!d0->src[0]->is_kcache() || !d0->src[1]->is_kcache() || 490bf215546Sopenharmony_ci !n->src[1]->is_kcache())) { 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci bool mul_neg = n->bc.src[0].neg; 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci n->src.resize(3); 495bf215546Sopenharmony_ci n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD); 496bf215546Sopenharmony_ci n->src[2] = n->src[1]; 497bf215546Sopenharmony_ci n->bc.src[2] = n->bc.src[1]; 498bf215546Sopenharmony_ci n->src[0] = d0->src[0]; 499bf215546Sopenharmony_ci n->bc.src[0] = d0->bc.src[0]; 500bf215546Sopenharmony_ci n->src[1] = d0->src[1]; 501bf215546Sopenharmony_ci n->bc.src[1] = d0->bc.src[1]; 502bf215546Sopenharmony_ci 503bf215546Sopenharmony_ci n->bc.src[0].neg ^= mul_neg; 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_ci fold_alu_op3(*n); 506bf215546Sopenharmony_ci return true; 507bf215546Sopenharmony_ci } 508bf215546Sopenharmony_ci } 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_ci value* v1 = n->src[1]->gvalue(); 511bf215546Sopenharmony_ci 512bf215546Sopenharmony_ci alu_node *d1 = (v1->def && v1->def->is_alu_inst()) ? 513bf215546Sopenharmony_ci static_cast<alu_node*>(v1->def) : NULL; 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_ci if (d1) { 516bf215546Sopenharmony_ci if (d1->is_alu_op(ALU_OP2_MUL_IEEE)) 517bf215546Sopenharmony_ci ieee = true; 518bf215546Sopenharmony_ci else if (d1->is_alu_op(ALU_OP2_MUL)) 519bf215546Sopenharmony_ci ieee = false; 520bf215546Sopenharmony_ci else 521bf215546Sopenharmony_ci return false; 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci if (!d1->bc.src[1].abs && !d1->bc.src[0].abs && 524bf215546Sopenharmony_ci !n->bc.src[0].abs && !n->bc.src[1].abs && !d1->bc.omod && 525bf215546Sopenharmony_ci !d1->bc.clamp && !n->bc.omod && 526bf215546Sopenharmony_ci (!d1->src[0]->is_kcache() || !d1->src[1]->is_kcache() || 527bf215546Sopenharmony_ci !n->src[0]->is_kcache())) { 528bf215546Sopenharmony_ci 529bf215546Sopenharmony_ci bool mul_neg = n->bc.src[1].neg; 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci n->src.resize(3); 532bf215546Sopenharmony_ci n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD); 533bf215546Sopenharmony_ci n->src[2] = n->src[0]; 534bf215546Sopenharmony_ci n->bc.src[2] = n->bc.src[0]; 535bf215546Sopenharmony_ci n->src[1] = d1->src[1]; 536bf215546Sopenharmony_ci n->bc.src[1] = d1->bc.src[1]; 537bf215546Sopenharmony_ci n->src[0] = d1->src[0]; 538bf215546Sopenharmony_ci n->bc.src[0] = d1->bc.src[0]; 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_ci n->bc.src[1].neg ^= mul_neg; 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_ci fold_alu_op3(*n); 543bf215546Sopenharmony_ci return true; 544bf215546Sopenharmony_ci } 545bf215546Sopenharmony_ci } 546bf215546Sopenharmony_ci 547bf215546Sopenharmony_ci return false; 548bf215546Sopenharmony_ci} 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_cibool expr_handler::eval_const_op(unsigned op, literal &r, 551bf215546Sopenharmony_ci literal cv0, literal cv1) { 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_ci switch (op) { 554bf215546Sopenharmony_ci case ALU_OP2_ADD: r = cv0.f + cv1.f; break; 555bf215546Sopenharmony_ci case ALU_OP2_ADDC_UINT: 556bf215546Sopenharmony_ci r = (uint32_t)(((uint64_t)cv0.u + cv1.u)>>32); break; 557bf215546Sopenharmony_ci case ALU_OP2_ADD_INT: r = cv0.i + cv1.i; break; 558bf215546Sopenharmony_ci case ALU_OP2_AND_INT: r = cv0.i & cv1.i; break; 559bf215546Sopenharmony_ci case ALU_OP2_ASHR_INT: r = cv0.i >> (cv1.i & 0x1F); break; 560bf215546Sopenharmony_ci case ALU_OP2_BFM_INT: 561bf215546Sopenharmony_ci r = (((1 << (cv0.i & 0x1F)) - 1) << (cv1.i & 0x1F)); break; 562bf215546Sopenharmony_ci case ALU_OP2_LSHL_INT: r = cv0.i << cv1.i; break; 563bf215546Sopenharmony_ci case ALU_OP2_LSHR_INT: r = cv0.u >> cv1.u; break; 564bf215546Sopenharmony_ci case ALU_OP2_MAX: 565bf215546Sopenharmony_ci case ALU_OP2_MAX_DX10: r = cv0.f > cv1.f ? cv0.f : cv1.f; break; 566bf215546Sopenharmony_ci case ALU_OP2_MAX_INT: r = cv0.i > cv1.i ? cv0.i : cv1.i; break; 567bf215546Sopenharmony_ci case ALU_OP2_MAX_UINT: r = cv0.u > cv1.u ? cv0.u : cv1.u; break; 568bf215546Sopenharmony_ci case ALU_OP2_MIN: 569bf215546Sopenharmony_ci case ALU_OP2_MIN_DX10: r = cv0.f < cv1.f ? cv0.f : cv1.f; break; 570bf215546Sopenharmony_ci case ALU_OP2_MIN_INT: r = cv0.i < cv1.i ? cv0.i : cv1.i; break; 571bf215546Sopenharmony_ci case ALU_OP2_MIN_UINT: r = cv0.u < cv1.u ? cv0.u : cv1.u; break; 572bf215546Sopenharmony_ci case ALU_OP2_MUL: 573bf215546Sopenharmony_ci case ALU_OP2_MUL_IEEE: r = cv0.f * cv1.f; break; 574bf215546Sopenharmony_ci case ALU_OP2_MULHI_INT: 575bf215546Sopenharmony_ci r = (int32_t)(((int64_t)cv0.u * cv1.u)>>32); break; 576bf215546Sopenharmony_ci case ALU_OP2_MULHI_UINT: 577bf215546Sopenharmony_ci r = (uint32_t)(((uint64_t)cv0.u * cv1.u)>>32); break; 578bf215546Sopenharmony_ci case ALU_OP2_MULLO_INT: 579bf215546Sopenharmony_ci r = (int32_t)(((int64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break; 580bf215546Sopenharmony_ci case ALU_OP2_MULLO_UINT: 581bf215546Sopenharmony_ci r = (uint32_t)(((uint64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break; 582bf215546Sopenharmony_ci case ALU_OP2_OR_INT: r = cv0.i | cv1.i; break; 583bf215546Sopenharmony_ci case ALU_OP2_SUB_INT: r = cv0.i - cv1.i; break; 584bf215546Sopenharmony_ci case ALU_OP2_XOR_INT: r = cv0.i ^ cv1.i; break; 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci default: 587bf215546Sopenharmony_ci return false; 588bf215546Sopenharmony_ci } 589bf215546Sopenharmony_ci 590bf215546Sopenharmony_ci return true; 591bf215546Sopenharmony_ci} 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci// fold the chain of associative ops, e.g. (ADD 2, (ADD x, 3)) => (ADD x, 5) 594bf215546Sopenharmony_cibool expr_handler::fold_assoc(alu_node *n) { 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_ci alu_node *a = n; 597bf215546Sopenharmony_ci literal cr; 598bf215546Sopenharmony_ci 599bf215546Sopenharmony_ci int last_arg = -3; 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci unsigned op = n->bc.op; 602bf215546Sopenharmony_ci bool allow_neg = false, cur_neg = false; 603bf215546Sopenharmony_ci bool distribute_neg = false; 604bf215546Sopenharmony_ci 605bf215546Sopenharmony_ci switch(op) { 606bf215546Sopenharmony_ci case ALU_OP2_ADD: 607bf215546Sopenharmony_ci distribute_neg = true; 608bf215546Sopenharmony_ci allow_neg = true; 609bf215546Sopenharmony_ci break; 610bf215546Sopenharmony_ci case ALU_OP2_MUL: 611bf215546Sopenharmony_ci case ALU_OP2_MUL_IEEE: 612bf215546Sopenharmony_ci allow_neg = true; 613bf215546Sopenharmony_ci break; 614bf215546Sopenharmony_ci case ALU_OP3_MULADD: 615bf215546Sopenharmony_ci allow_neg = true; 616bf215546Sopenharmony_ci op = ALU_OP2_MUL; 617bf215546Sopenharmony_ci break; 618bf215546Sopenharmony_ci case ALU_OP3_MULADD_IEEE: 619bf215546Sopenharmony_ci allow_neg = true; 620bf215546Sopenharmony_ci op = ALU_OP2_MUL_IEEE; 621bf215546Sopenharmony_ci break; 622bf215546Sopenharmony_ci default: 623bf215546Sopenharmony_ci if (n->bc.op_ptr->src_count != 2) 624bf215546Sopenharmony_ci return false; 625bf215546Sopenharmony_ci } 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_ci // check if we can evaluate the op 628bf215546Sopenharmony_ci if (!eval_const_op(op, cr, literal(0), literal(0))) 629bf215546Sopenharmony_ci return false; 630bf215546Sopenharmony_ci 631bf215546Sopenharmony_ci while (true) { 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci value *v0 = a->src[0]->gvalue(); 634bf215546Sopenharmony_ci value *v1 = a->src[1]->gvalue(); 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci last_arg = -2; 637bf215546Sopenharmony_ci 638bf215546Sopenharmony_ci if (v1->is_const()) { 639bf215546Sopenharmony_ci literal arg = v1->get_const_value(); 640bf215546Sopenharmony_ci apply_alu_src_mod(a->bc, 1, arg); 641bf215546Sopenharmony_ci if (cur_neg && distribute_neg) 642bf215546Sopenharmony_ci arg.f = -arg.f; 643bf215546Sopenharmony_ci 644bf215546Sopenharmony_ci if (a == n) 645bf215546Sopenharmony_ci cr = arg; 646bf215546Sopenharmony_ci else 647bf215546Sopenharmony_ci eval_const_op(op, cr, cr, arg); 648bf215546Sopenharmony_ci 649bf215546Sopenharmony_ci if (v0->def) { 650bf215546Sopenharmony_ci alu_node *d0 = static_cast<alu_node*>(v0->def); 651bf215546Sopenharmony_ci if ((d0->is_alu_op(op) || 652bf215546Sopenharmony_ci (op == ALU_OP2_MUL_IEEE && 653bf215546Sopenharmony_ci d0->is_alu_op(ALU_OP2_MUL))) && 654bf215546Sopenharmony_ci !d0->bc.omod && !d0->bc.clamp && 655bf215546Sopenharmony_ci !a->bc.src[0].abs && 656bf215546Sopenharmony_ci (!a->bc.src[0].neg || allow_neg)) { 657bf215546Sopenharmony_ci cur_neg ^= a->bc.src[0].neg; 658bf215546Sopenharmony_ci a = d0; 659bf215546Sopenharmony_ci continue; 660bf215546Sopenharmony_ci } 661bf215546Sopenharmony_ci } 662bf215546Sopenharmony_ci last_arg = 0; 663bf215546Sopenharmony_ci 664bf215546Sopenharmony_ci } 665bf215546Sopenharmony_ci 666bf215546Sopenharmony_ci if (v0->is_const()) { 667bf215546Sopenharmony_ci literal arg = v0->get_const_value(); 668bf215546Sopenharmony_ci apply_alu_src_mod(a->bc, 0, arg); 669bf215546Sopenharmony_ci if (cur_neg && distribute_neg) 670bf215546Sopenharmony_ci arg.f = -arg.f; 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci if (last_arg == 0) { 673bf215546Sopenharmony_ci eval_const_op(op, cr, cr, arg); 674bf215546Sopenharmony_ci last_arg = -1; 675bf215546Sopenharmony_ci break; 676bf215546Sopenharmony_ci } 677bf215546Sopenharmony_ci 678bf215546Sopenharmony_ci if (a == n) 679bf215546Sopenharmony_ci cr = arg; 680bf215546Sopenharmony_ci else 681bf215546Sopenharmony_ci eval_const_op(op, cr, cr, arg); 682bf215546Sopenharmony_ci 683bf215546Sopenharmony_ci if (v1->def) { 684bf215546Sopenharmony_ci alu_node *d1 = static_cast<alu_node*>(v1->def); 685bf215546Sopenharmony_ci if ((d1->is_alu_op(op) || 686bf215546Sopenharmony_ci (op == ALU_OP2_MUL_IEEE && 687bf215546Sopenharmony_ci d1->is_alu_op(ALU_OP2_MUL))) && 688bf215546Sopenharmony_ci !d1->bc.omod && !d1->bc.clamp && 689bf215546Sopenharmony_ci !a->bc.src[1].abs && 690bf215546Sopenharmony_ci (!a->bc.src[1].neg || allow_neg)) { 691bf215546Sopenharmony_ci cur_neg ^= a->bc.src[1].neg; 692bf215546Sopenharmony_ci a = d1; 693bf215546Sopenharmony_ci continue; 694bf215546Sopenharmony_ci } 695bf215546Sopenharmony_ci } 696bf215546Sopenharmony_ci 697bf215546Sopenharmony_ci last_arg = 1; 698bf215546Sopenharmony_ci } 699bf215546Sopenharmony_ci 700bf215546Sopenharmony_ci break; 701bf215546Sopenharmony_ci }; 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci if (last_arg == -1) { 704bf215546Sopenharmony_ci // result is const 705bf215546Sopenharmony_ci apply_alu_dst_mod(n->bc, cr); 706bf215546Sopenharmony_ci 707bf215546Sopenharmony_ci if (n->bc.op == op) { 708bf215546Sopenharmony_ci convert_to_mov(*n, sh.get_const_value(cr)); 709bf215546Sopenharmony_ci fold_alu_op1(*n); 710bf215546Sopenharmony_ci return true; 711bf215546Sopenharmony_ci } else { // MULADD => ADD 712bf215546Sopenharmony_ci n->src[0] = n->src[2]; 713bf215546Sopenharmony_ci n->bc.src[0] = n->bc.src[2]; 714bf215546Sopenharmony_ci n->src[1] = sh.get_const_value(cr); 715bf215546Sopenharmony_ci n->bc.src[1].clear(); 716bf215546Sopenharmony_ci 717bf215546Sopenharmony_ci n->src.resize(2); 718bf215546Sopenharmony_ci n->bc.set_op(ALU_OP2_ADD); 719bf215546Sopenharmony_ci } 720bf215546Sopenharmony_ci } else if (last_arg >= 0) { 721bf215546Sopenharmony_ci n->src[0] = a->src[last_arg]; 722bf215546Sopenharmony_ci n->bc.src[0] = a->bc.src[last_arg]; 723bf215546Sopenharmony_ci n->bc.src[0].neg ^= cur_neg; 724bf215546Sopenharmony_ci n->src[1] = sh.get_const_value(cr); 725bf215546Sopenharmony_ci n->bc.src[1].clear(); 726bf215546Sopenharmony_ci } 727bf215546Sopenharmony_ci 728bf215546Sopenharmony_ci return false; 729bf215546Sopenharmony_ci} 730bf215546Sopenharmony_ci 731bf215546Sopenharmony_cibool expr_handler::fold_alu_op2(alu_node& n) { 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci if (n.src.size() < 2) 734bf215546Sopenharmony_ci return false; 735bf215546Sopenharmony_ci 736bf215546Sopenharmony_ci unsigned flags = n.bc.op_ptr->flags; 737bf215546Sopenharmony_ci 738bf215546Sopenharmony_ci if (flags & AF_SET) { 739bf215546Sopenharmony_ci return fold_setcc(n); 740bf215546Sopenharmony_ci } 741bf215546Sopenharmony_ci 742bf215546Sopenharmony_ci if (!sh.safe_math && (flags & AF_M_ASSOC)) { 743bf215546Sopenharmony_ci if (fold_assoc(&n)) 744bf215546Sopenharmony_ci return true; 745bf215546Sopenharmony_ci } 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci value* v0 = n.src[0]->gvalue(); 748bf215546Sopenharmony_ci value* v1 = n.src[1]->gvalue(); 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_ci assert(v0 && v1); 751bf215546Sopenharmony_ci 752bf215546Sopenharmony_ci // handle some operations with equal args, e.g. x + x => x * 2 753bf215546Sopenharmony_ci if (v0 == v1) { 754bf215546Sopenharmony_ci if (n.bc.src[0].neg == n.bc.src[1].neg && 755bf215546Sopenharmony_ci n.bc.src[0].abs == n.bc.src[1].abs) { 756bf215546Sopenharmony_ci switch (n.bc.op) { 757bf215546Sopenharmony_ci case ALU_OP2_MIN: // (MIN x, x) => (MOV x) 758bf215546Sopenharmony_ci case ALU_OP2_MIN_DX10: 759bf215546Sopenharmony_ci case ALU_OP2_MAX: 760bf215546Sopenharmony_ci case ALU_OP2_MAX_DX10: 761bf215546Sopenharmony_ci convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs); 762bf215546Sopenharmony_ci return fold_alu_op1(n); 763bf215546Sopenharmony_ci case ALU_OP2_ADD: // (ADD x, x) => (MUL x, 2) 764bf215546Sopenharmony_ci if (!sh.safe_math) { 765bf215546Sopenharmony_ci n.src[1] = sh.get_const_value(2.0f); 766bf215546Sopenharmony_ci n.bc.src[1].clear(); 767bf215546Sopenharmony_ci n.bc.set_op(ALU_OP2_MUL); 768bf215546Sopenharmony_ci return fold_alu_op2(n); 769bf215546Sopenharmony_ci } 770bf215546Sopenharmony_ci break; 771bf215546Sopenharmony_ci } 772bf215546Sopenharmony_ci } 773bf215546Sopenharmony_ci if (n.bc.src[0].neg != n.bc.src[1].neg && 774bf215546Sopenharmony_ci n.bc.src[0].abs == n.bc.src[1].abs) { 775bf215546Sopenharmony_ci switch (n.bc.op) { 776bf215546Sopenharmony_ci case ALU_OP2_ADD: // (ADD x, -x) => (MOV 0) 777bf215546Sopenharmony_ci if (!sh.safe_math) { 778bf215546Sopenharmony_ci convert_to_mov(n, sh.get_const_value(literal(0))); 779bf215546Sopenharmony_ci return fold_alu_op1(n); 780bf215546Sopenharmony_ci } 781bf215546Sopenharmony_ci break; 782bf215546Sopenharmony_ci } 783bf215546Sopenharmony_ci } 784bf215546Sopenharmony_ci } 785bf215546Sopenharmony_ci 786bf215546Sopenharmony_ci if (n.bc.op == ALU_OP2_ADD) { 787bf215546Sopenharmony_ci if (fold_mul_add(&n)) 788bf215546Sopenharmony_ci return true; 789bf215546Sopenharmony_ci } 790bf215546Sopenharmony_ci 791bf215546Sopenharmony_ci bool isc0 = v0->is_const(); 792bf215546Sopenharmony_ci bool isc1 = v1->is_const(); 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_ci if (!isc0 && !isc1) 795bf215546Sopenharmony_ci return false; 796bf215546Sopenharmony_ci 797bf215546Sopenharmony_ci literal dv, cv0, cv1; 798bf215546Sopenharmony_ci 799bf215546Sopenharmony_ci if (isc0) { 800bf215546Sopenharmony_ci cv0 = v0->get_const_value(); 801bf215546Sopenharmony_ci apply_alu_src_mod(n.bc, 0, cv0); 802bf215546Sopenharmony_ci } 803bf215546Sopenharmony_ci 804bf215546Sopenharmony_ci if (isc1) { 805bf215546Sopenharmony_ci cv1 = v1->get_const_value(); 806bf215546Sopenharmony_ci apply_alu_src_mod(n.bc, 1, cv1); 807bf215546Sopenharmony_ci } 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_ci if (isc0 && isc1) { 810bf215546Sopenharmony_ci 811bf215546Sopenharmony_ci if (!eval_const_op(n.bc.op, dv, cv0, cv1)) 812bf215546Sopenharmony_ci return false; 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_ci } else { // one source is const 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_ci if (isc0 && cv0 == literal(0)) { 817bf215546Sopenharmony_ci switch (n.bc.op) { 818bf215546Sopenharmony_ci case ALU_OP2_ADD: 819bf215546Sopenharmony_ci case ALU_OP2_ADD_INT: 820bf215546Sopenharmony_ci case ALU_OP2_MAX_UINT: 821bf215546Sopenharmony_ci case ALU_OP2_OR_INT: 822bf215546Sopenharmony_ci case ALU_OP2_XOR_INT: 823bf215546Sopenharmony_ci convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs); 824bf215546Sopenharmony_ci return fold_alu_op1(n); 825bf215546Sopenharmony_ci case ALU_OP2_AND_INT: 826bf215546Sopenharmony_ci case ALU_OP2_ASHR_INT: 827bf215546Sopenharmony_ci case ALU_OP2_LSHL_INT: 828bf215546Sopenharmony_ci case ALU_OP2_LSHR_INT: 829bf215546Sopenharmony_ci case ALU_OP2_MIN_UINT: 830bf215546Sopenharmony_ci case ALU_OP2_MUL: 831bf215546Sopenharmony_ci case ALU_OP2_MULHI_UINT: 832bf215546Sopenharmony_ci case ALU_OP2_MULLO_UINT: 833bf215546Sopenharmony_ci convert_to_mov(n, sh.get_const_value(literal(0))); 834bf215546Sopenharmony_ci return fold_alu_op1(n); 835bf215546Sopenharmony_ci } 836bf215546Sopenharmony_ci } else if (isc1 && cv1 == literal(0)) { 837bf215546Sopenharmony_ci switch (n.bc.op) { 838bf215546Sopenharmony_ci case ALU_OP2_ADD: 839bf215546Sopenharmony_ci case ALU_OP2_ADD_INT: 840bf215546Sopenharmony_ci case ALU_OP2_ASHR_INT: 841bf215546Sopenharmony_ci case ALU_OP2_LSHL_INT: 842bf215546Sopenharmony_ci case ALU_OP2_LSHR_INT: 843bf215546Sopenharmony_ci case ALU_OP2_MAX_UINT: 844bf215546Sopenharmony_ci case ALU_OP2_OR_INT: 845bf215546Sopenharmony_ci case ALU_OP2_SUB_INT: 846bf215546Sopenharmony_ci case ALU_OP2_XOR_INT: 847bf215546Sopenharmony_ci convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs); 848bf215546Sopenharmony_ci return fold_alu_op1(n); 849bf215546Sopenharmony_ci case ALU_OP2_AND_INT: 850bf215546Sopenharmony_ci case ALU_OP2_MIN_UINT: 851bf215546Sopenharmony_ci case ALU_OP2_MUL: 852bf215546Sopenharmony_ci case ALU_OP2_MULHI_UINT: 853bf215546Sopenharmony_ci case ALU_OP2_MULLO_UINT: 854bf215546Sopenharmony_ci convert_to_mov(n, sh.get_const_value(literal(0))); 855bf215546Sopenharmony_ci return fold_alu_op1(n); 856bf215546Sopenharmony_ci } 857bf215546Sopenharmony_ci } else if (isc0 && cv0 == literal(1.0f)) { 858bf215546Sopenharmony_ci switch (n.bc.op) { 859bf215546Sopenharmony_ci case ALU_OP2_MUL: 860bf215546Sopenharmony_ci case ALU_OP2_MUL_IEEE: 861bf215546Sopenharmony_ci convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs); 862bf215546Sopenharmony_ci return fold_alu_op1(n); 863bf215546Sopenharmony_ci } 864bf215546Sopenharmony_ci } else if (isc1 && cv1 == literal(1.0f)) { 865bf215546Sopenharmony_ci switch (n.bc.op) { 866bf215546Sopenharmony_ci case ALU_OP2_MUL: 867bf215546Sopenharmony_ci case ALU_OP2_MUL_IEEE: 868bf215546Sopenharmony_ci convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs); 869bf215546Sopenharmony_ci return fold_alu_op1(n); 870bf215546Sopenharmony_ci } 871bf215546Sopenharmony_ci } 872bf215546Sopenharmony_ci 873bf215546Sopenharmony_ci return false; 874bf215546Sopenharmony_ci } 875bf215546Sopenharmony_ci 876bf215546Sopenharmony_ci apply_alu_dst_mod(n.bc, dv); 877bf215546Sopenharmony_ci assign_source(n.dst[0], get_const(dv)); 878bf215546Sopenharmony_ci return true; 879bf215546Sopenharmony_ci} 880bf215546Sopenharmony_ci 881bf215546Sopenharmony_cibool expr_handler::evaluate_condition(unsigned alu_cnd_flags, 882bf215546Sopenharmony_ci literal s1, literal s2) { 883bf215546Sopenharmony_ci 884bf215546Sopenharmony_ci unsigned cmp_type = alu_cnd_flags & AF_CMP_TYPE_MASK; 885bf215546Sopenharmony_ci unsigned cc = alu_cnd_flags & AF_CC_MASK; 886bf215546Sopenharmony_ci 887bf215546Sopenharmony_ci switch (cmp_type) { 888bf215546Sopenharmony_ci case AF_FLOAT_CMP: { 889bf215546Sopenharmony_ci switch (cc) { 890bf215546Sopenharmony_ci case AF_CC_E : return s1.f == s2.f; 891bf215546Sopenharmony_ci case AF_CC_GT: return s1.f > s2.f; 892bf215546Sopenharmony_ci case AF_CC_GE: return s1.f >= s2.f; 893bf215546Sopenharmony_ci case AF_CC_NE: return s1.f != s2.f; 894bf215546Sopenharmony_ci case AF_CC_LT: return s1.f < s2.f; 895bf215546Sopenharmony_ci case AF_CC_LE: return s1.f <= s2.f; 896bf215546Sopenharmony_ci default: 897bf215546Sopenharmony_ci assert(!"invalid condition code"); 898bf215546Sopenharmony_ci return false; 899bf215546Sopenharmony_ci } 900bf215546Sopenharmony_ci } 901bf215546Sopenharmony_ci case AF_INT_CMP: { 902bf215546Sopenharmony_ci switch (cc) { 903bf215546Sopenharmony_ci case AF_CC_E : return s1.i == s2.i; 904bf215546Sopenharmony_ci case AF_CC_GT: return s1.i > s2.i; 905bf215546Sopenharmony_ci case AF_CC_GE: return s1.i >= s2.i; 906bf215546Sopenharmony_ci case AF_CC_NE: return s1.i != s2.i; 907bf215546Sopenharmony_ci case AF_CC_LT: return s1.i < s2.i; 908bf215546Sopenharmony_ci case AF_CC_LE: return s1.i <= s2.i; 909bf215546Sopenharmony_ci default: 910bf215546Sopenharmony_ci assert(!"invalid condition code"); 911bf215546Sopenharmony_ci return false; 912bf215546Sopenharmony_ci } 913bf215546Sopenharmony_ci } 914bf215546Sopenharmony_ci case AF_UINT_CMP: { 915bf215546Sopenharmony_ci switch (cc) { 916bf215546Sopenharmony_ci case AF_CC_E : return s1.u == s2.u; 917bf215546Sopenharmony_ci case AF_CC_GT: return s1.u > s2.u; 918bf215546Sopenharmony_ci case AF_CC_GE: return s1.u >= s2.u; 919bf215546Sopenharmony_ci case AF_CC_NE: return s1.u != s2.u; 920bf215546Sopenharmony_ci case AF_CC_LT: return s1.u < s2.u; 921bf215546Sopenharmony_ci case AF_CC_LE: return s1.u <= s2.u; 922bf215546Sopenharmony_ci default: 923bf215546Sopenharmony_ci assert(!"invalid condition code"); 924bf215546Sopenharmony_ci return false; 925bf215546Sopenharmony_ci } 926bf215546Sopenharmony_ci } 927bf215546Sopenharmony_ci default: 928bf215546Sopenharmony_ci assert(!"invalid cmp_type"); 929bf215546Sopenharmony_ci return false; 930bf215546Sopenharmony_ci } 931bf215546Sopenharmony_ci} 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_cibool expr_handler::fold_alu_op3(alu_node& n) { 934bf215546Sopenharmony_ci 935bf215546Sopenharmony_ci if (n.src.size() < 3) 936bf215546Sopenharmony_ci return false; 937bf215546Sopenharmony_ci 938bf215546Sopenharmony_ci if (!sh.safe_math && (n.bc.op_ptr->flags & AF_M_ASSOC)) { 939bf215546Sopenharmony_ci if (fold_assoc(&n)) 940bf215546Sopenharmony_ci return true; 941bf215546Sopenharmony_ci if (n.src.size() < 3) 942bf215546Sopenharmony_ci return fold_alu_op2(n); 943bf215546Sopenharmony_ci } 944bf215546Sopenharmony_ci 945bf215546Sopenharmony_ci value* v0 = n.src[0]->gvalue(); 946bf215546Sopenharmony_ci value* v1 = n.src[1]->gvalue(); 947bf215546Sopenharmony_ci value* v2 = n.src[2]->gvalue(); 948bf215546Sopenharmony_ci 949bf215546Sopenharmony_ci /* LDS instructions look like op3 with no dst - don't fold. */ 950bf215546Sopenharmony_ci if (!n.dst[0]) 951bf215546Sopenharmony_ci return false; 952bf215546Sopenharmony_ci assert(v0 && v1 && v2 && n.dst[0]); 953bf215546Sopenharmony_ci 954bf215546Sopenharmony_ci bool isc0 = v0->is_const(); 955bf215546Sopenharmony_ci bool isc1 = v1->is_const(); 956bf215546Sopenharmony_ci bool isc2 = v2->is_const(); 957bf215546Sopenharmony_ci 958bf215546Sopenharmony_ci literal dv, cv0, cv1, cv2; 959bf215546Sopenharmony_ci 960bf215546Sopenharmony_ci if (isc0) { 961bf215546Sopenharmony_ci cv0 = v0->get_const_value(); 962bf215546Sopenharmony_ci apply_alu_src_mod(n.bc, 0, cv0); 963bf215546Sopenharmony_ci } 964bf215546Sopenharmony_ci 965bf215546Sopenharmony_ci if (isc1) { 966bf215546Sopenharmony_ci cv1 = v1->get_const_value(); 967bf215546Sopenharmony_ci apply_alu_src_mod(n.bc, 1, cv1); 968bf215546Sopenharmony_ci } 969bf215546Sopenharmony_ci 970bf215546Sopenharmony_ci if (isc2) { 971bf215546Sopenharmony_ci cv2 = v2->get_const_value(); 972bf215546Sopenharmony_ci apply_alu_src_mod(n.bc, 2, cv2); 973bf215546Sopenharmony_ci } 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_ci unsigned flags = n.bc.op_ptr->flags; 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_ci if (flags & AF_CMOV) { 978bf215546Sopenharmony_ci int src = 0; 979bf215546Sopenharmony_ci 980bf215546Sopenharmony_ci if (v1 == v2 && n.bc.src[1].neg == n.bc.src[2].neg) { 981bf215546Sopenharmony_ci // result doesn't depend on condition, convert to MOV 982bf215546Sopenharmony_ci src = 1; 983bf215546Sopenharmony_ci } else if (isc0) { 984bf215546Sopenharmony_ci // src0 is const, condition can be evaluated, convert to MOV 985bf215546Sopenharmony_ci bool cond = evaluate_condition(n.bc.op_ptr->flags & (AF_CC_MASK | 986bf215546Sopenharmony_ci AF_CMP_TYPE_MASK), cv0, literal(0)); 987bf215546Sopenharmony_ci src = cond ? 1 : 2; 988bf215546Sopenharmony_ci } 989bf215546Sopenharmony_ci 990bf215546Sopenharmony_ci if (src) { 991bf215546Sopenharmony_ci // if src is selected, convert to MOV 992bf215546Sopenharmony_ci convert_to_mov(n, n.src[src], n.bc.src[src].neg); 993bf215546Sopenharmony_ci return fold_alu_op1(n); 994bf215546Sopenharmony_ci } 995bf215546Sopenharmony_ci } 996bf215546Sopenharmony_ci 997bf215546Sopenharmony_ci // handle (MULADD a, x, MUL (x, b)) => (MUL x, ADD (a, b)) 998bf215546Sopenharmony_ci if (!sh.safe_math && (n.bc.op == ALU_OP3_MULADD || 999bf215546Sopenharmony_ci n.bc.op == ALU_OP3_MULADD_IEEE)) { 1000bf215546Sopenharmony_ci 1001bf215546Sopenharmony_ci unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ? 1002bf215546Sopenharmony_ci ALU_OP2_MUL_IEEE : ALU_OP2_MUL; 1003bf215546Sopenharmony_ci 1004bf215546Sopenharmony_ci if (!isc2 && v2->def && v2->def->is_alu_op(op)) { 1005bf215546Sopenharmony_ci 1006bf215546Sopenharmony_ci alu_node *md = static_cast<alu_node*>(v2->def); 1007bf215546Sopenharmony_ci value *mv0 = md->src[0]->gvalue(); 1008bf215546Sopenharmony_ci value *mv1 = md->src[1]->gvalue(); 1009bf215546Sopenharmony_ci 1010bf215546Sopenharmony_ci int es0 = -1, es1 = -1; 1011bf215546Sopenharmony_ci 1012bf215546Sopenharmony_ci if (v0 == mv0) { 1013bf215546Sopenharmony_ci es0 = 0; 1014bf215546Sopenharmony_ci es1 = 0; 1015bf215546Sopenharmony_ci } else if (v0 == mv1) { 1016bf215546Sopenharmony_ci es0 = 0; 1017bf215546Sopenharmony_ci es1 = 1; 1018bf215546Sopenharmony_ci } else if (v1 == mv0) { 1019bf215546Sopenharmony_ci es0 = 1; 1020bf215546Sopenharmony_ci es1 = 0; 1021bf215546Sopenharmony_ci } else if (v1 == mv1) { 1022bf215546Sopenharmony_ci es0 = 1; 1023bf215546Sopenharmony_ci es1 = 1; 1024bf215546Sopenharmony_ci } 1025bf215546Sopenharmony_ci 1026bf215546Sopenharmony_ci value *va0 = es0 == 0 ? v1 : v0; 1027bf215546Sopenharmony_ci value *va1 = es1 == 0 ? mv1 : mv0; 1028bf215546Sopenharmony_ci 1029bf215546Sopenharmony_ci /* Don't fold if no equal multipliers were found. 1030bf215546Sopenharmony_ci * Also don#t fold if the operands of the to be created ADD are both 1031bf215546Sopenharmony_ci * relatively accessed with different AR values because that would 1032bf215546Sopenharmony_ci * create impossible code. 1033bf215546Sopenharmony_ci */ 1034bf215546Sopenharmony_ci if (es0 != -1 && 1035bf215546Sopenharmony_ci (!va0->is_rel() || !va1->is_rel() || 1036bf215546Sopenharmony_ci (va0->rel == va1->rel))) { 1037bf215546Sopenharmony_ci 1038bf215546Sopenharmony_ci alu_node *add = sh.create_alu(); 1039bf215546Sopenharmony_ci add->bc.set_op(ALU_OP2_ADD); 1040bf215546Sopenharmony_ci 1041bf215546Sopenharmony_ci add->dst.resize(1); 1042bf215546Sopenharmony_ci add->src.resize(2); 1043bf215546Sopenharmony_ci 1044bf215546Sopenharmony_ci value *t = sh.create_temp_value(); 1045bf215546Sopenharmony_ci t->def = add; 1046bf215546Sopenharmony_ci add->dst[0] = t; 1047bf215546Sopenharmony_ci add->src[0] = va0; 1048bf215546Sopenharmony_ci add->src[1] = va1; 1049bf215546Sopenharmony_ci add->bc.src[0] = n.bc.src[!es0]; 1050bf215546Sopenharmony_ci add->bc.src[1] = md->bc.src[!es1]; 1051bf215546Sopenharmony_ci 1052bf215546Sopenharmony_ci add->bc.src[1].neg ^= n.bc.src[2].neg ^ 1053bf215546Sopenharmony_ci (n.bc.src[es0].neg != md->bc.src[es1].neg); 1054bf215546Sopenharmony_ci 1055bf215546Sopenharmony_ci n.insert_before(add); 1056bf215546Sopenharmony_ci vt.add_value(t); 1057bf215546Sopenharmony_ci 1058bf215546Sopenharmony_ci t = t->gvalue(); 1059bf215546Sopenharmony_ci 1060bf215546Sopenharmony_ci if (es0 == 1) { 1061bf215546Sopenharmony_ci n.src[0] = n.src[1]; 1062bf215546Sopenharmony_ci n.bc.src[0] = n.bc.src[1]; 1063bf215546Sopenharmony_ci } 1064bf215546Sopenharmony_ci 1065bf215546Sopenharmony_ci n.src[1] = t; 1066bf215546Sopenharmony_ci n.bc.src[1].clear(); 1067bf215546Sopenharmony_ci 1068bf215546Sopenharmony_ci n.src.resize(2); 1069bf215546Sopenharmony_ci 1070bf215546Sopenharmony_ci n.bc.set_op(op); 1071bf215546Sopenharmony_ci return fold_alu_op2(n); 1072bf215546Sopenharmony_ci } 1073bf215546Sopenharmony_ci } 1074bf215546Sopenharmony_ci } 1075bf215546Sopenharmony_ci 1076bf215546Sopenharmony_ci if (!isc0 && !isc1 && !isc2) 1077bf215546Sopenharmony_ci return false; 1078bf215546Sopenharmony_ci 1079bf215546Sopenharmony_ci if (isc0 && isc1 && isc2) { 1080bf215546Sopenharmony_ci switch (n.bc.op) { 1081bf215546Sopenharmony_ci case ALU_OP3_MULADD_IEEE: 1082bf215546Sopenharmony_ci case ALU_OP3_MULADD: dv = cv0.f * cv1.f + cv2.f; break; 1083bf215546Sopenharmony_ci 1084bf215546Sopenharmony_ci // TODO 1085bf215546Sopenharmony_ci 1086bf215546Sopenharmony_ci default: 1087bf215546Sopenharmony_ci return false; 1088bf215546Sopenharmony_ci } 1089bf215546Sopenharmony_ci } else { 1090bf215546Sopenharmony_ci if (isc0 && isc1) { 1091bf215546Sopenharmony_ci switch (n.bc.op) { 1092bf215546Sopenharmony_ci case ALU_OP3_MULADD: 1093bf215546Sopenharmony_ci case ALU_OP3_MULADD_IEEE: 1094bf215546Sopenharmony_ci dv = cv0.f * cv1.f; 1095bf215546Sopenharmony_ci n.bc.set_op(ALU_OP2_ADD); 1096bf215546Sopenharmony_ci n.src[0] = sh.get_const_value(dv); 1097bf215546Sopenharmony_ci n.bc.src[0].clear(); 1098bf215546Sopenharmony_ci n.src[1] = n.src[2]; 1099bf215546Sopenharmony_ci n.bc.src[1] = n.bc.src[2]; 1100bf215546Sopenharmony_ci n.src.resize(2); 1101bf215546Sopenharmony_ci return fold_alu_op2(n); 1102bf215546Sopenharmony_ci } 1103bf215546Sopenharmony_ci } 1104bf215546Sopenharmony_ci 1105bf215546Sopenharmony_ci if (n.bc.op == ALU_OP3_MULADD) { 1106bf215546Sopenharmony_ci if ((isc0 && cv0 == literal(0)) || (isc1 && cv1 == literal(0))) { 1107bf215546Sopenharmony_ci convert_to_mov(n, n.src[2], n.bc.src[2].neg, n.bc.src[2].abs); 1108bf215546Sopenharmony_ci return fold_alu_op1(n); 1109bf215546Sopenharmony_ci } 1110bf215546Sopenharmony_ci } 1111bf215546Sopenharmony_ci 1112bf215546Sopenharmony_ci if (n.bc.op == ALU_OP3_MULADD || n.bc.op == ALU_OP3_MULADD_IEEE) { 1113bf215546Sopenharmony_ci unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ? 1114bf215546Sopenharmony_ci ALU_OP2_MUL_IEEE : ALU_OP2_MUL; 1115bf215546Sopenharmony_ci 1116bf215546Sopenharmony_ci if (isc1 && v0 == v2) { 1117bf215546Sopenharmony_ci cv1.f += (n.bc.src[2].neg != n.bc.src[0].neg ? -1.0f : 1.0f); 1118bf215546Sopenharmony_ci n.src[1] = sh.get_const_value(cv1); 1119bf215546Sopenharmony_ci n.bc.src[1].neg = 0; 1120bf215546Sopenharmony_ci n.bc.src[1].abs = 0; 1121bf215546Sopenharmony_ci n.bc.set_op(op); 1122bf215546Sopenharmony_ci n.src.resize(2); 1123bf215546Sopenharmony_ci return fold_alu_op2(n); 1124bf215546Sopenharmony_ci } else if (isc0 && v1 == v2) { 1125bf215546Sopenharmony_ci cv0.f += (n.bc.src[2].neg != n.bc.src[1].neg ? -1.0f : 1.0f); 1126bf215546Sopenharmony_ci n.src[0] = sh.get_const_value(cv0); 1127bf215546Sopenharmony_ci n.bc.src[0].neg = 0; 1128bf215546Sopenharmony_ci n.bc.src[0].abs = 0; 1129bf215546Sopenharmony_ci n.bc.set_op(op); 1130bf215546Sopenharmony_ci n.src.resize(2); 1131bf215546Sopenharmony_ci return fold_alu_op2(n); 1132bf215546Sopenharmony_ci } 1133bf215546Sopenharmony_ci } 1134bf215546Sopenharmony_ci 1135bf215546Sopenharmony_ci return false; 1136bf215546Sopenharmony_ci } 1137bf215546Sopenharmony_ci 1138bf215546Sopenharmony_ci apply_alu_dst_mod(n.bc, dv); 1139bf215546Sopenharmony_ci assign_source(n.dst[0], get_const(dv)); 1140bf215546Sopenharmony_ci return true; 1141bf215546Sopenharmony_ci} 1142bf215546Sopenharmony_ci 1143bf215546Sopenharmony_ciunsigned invert_setcc_condition(unsigned cc, bool &swap_args) { 1144bf215546Sopenharmony_ci unsigned ncc = 0; 1145bf215546Sopenharmony_ci 1146bf215546Sopenharmony_ci switch (cc) { 1147bf215546Sopenharmony_ci case AF_CC_E: ncc = AF_CC_NE; break; 1148bf215546Sopenharmony_ci case AF_CC_NE: ncc = AF_CC_E; break; 1149bf215546Sopenharmony_ci case AF_CC_GE: ncc = AF_CC_GT; swap_args = true; break; 1150bf215546Sopenharmony_ci case AF_CC_GT: ncc = AF_CC_GE; swap_args = true; break; 1151bf215546Sopenharmony_ci default: 1152bf215546Sopenharmony_ci assert(!"unexpected condition code"); 1153bf215546Sopenharmony_ci break; 1154bf215546Sopenharmony_ci } 1155bf215546Sopenharmony_ci return ncc; 1156bf215546Sopenharmony_ci} 1157bf215546Sopenharmony_ci 1158bf215546Sopenharmony_ciunsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) { 1159bf215546Sopenharmony_ci 1160bf215546Sopenharmony_ci if (int_dst && cmp_type == AF_FLOAT_CMP) { 1161bf215546Sopenharmony_ci switch (cc) { 1162bf215546Sopenharmony_ci case AF_CC_E: return ALU_OP2_SETE_DX10; 1163bf215546Sopenharmony_ci case AF_CC_NE: return ALU_OP2_SETNE_DX10; 1164bf215546Sopenharmony_ci case AF_CC_GT: return ALU_OP2_SETGT_DX10; 1165bf215546Sopenharmony_ci case AF_CC_GE: return ALU_OP2_SETGE_DX10; 1166bf215546Sopenharmony_ci } 1167bf215546Sopenharmony_ci } else { 1168bf215546Sopenharmony_ci 1169bf215546Sopenharmony_ci switch(cmp_type) { 1170bf215546Sopenharmony_ci case AF_FLOAT_CMP: { 1171bf215546Sopenharmony_ci switch (cc) { 1172bf215546Sopenharmony_ci case AF_CC_E: return ALU_OP2_SETE; 1173bf215546Sopenharmony_ci case AF_CC_NE: return ALU_OP2_SETNE; 1174bf215546Sopenharmony_ci case AF_CC_GT: return ALU_OP2_SETGT; 1175bf215546Sopenharmony_ci case AF_CC_GE: return ALU_OP2_SETGE; 1176bf215546Sopenharmony_ci } 1177bf215546Sopenharmony_ci break; 1178bf215546Sopenharmony_ci } 1179bf215546Sopenharmony_ci case AF_INT_CMP: { 1180bf215546Sopenharmony_ci switch (cc) { 1181bf215546Sopenharmony_ci case AF_CC_E: return ALU_OP2_SETE_INT; 1182bf215546Sopenharmony_ci case AF_CC_NE: return ALU_OP2_SETNE_INT; 1183bf215546Sopenharmony_ci case AF_CC_GT: return ALU_OP2_SETGT_INT; 1184bf215546Sopenharmony_ci case AF_CC_GE: return ALU_OP2_SETGE_INT; 1185bf215546Sopenharmony_ci } 1186bf215546Sopenharmony_ci break; 1187bf215546Sopenharmony_ci } 1188bf215546Sopenharmony_ci case AF_UINT_CMP: { 1189bf215546Sopenharmony_ci switch (cc) { 1190bf215546Sopenharmony_ci case AF_CC_E: return ALU_OP2_SETE_INT; 1191bf215546Sopenharmony_ci case AF_CC_NE: return ALU_OP2_SETNE_INT; 1192bf215546Sopenharmony_ci case AF_CC_GT: return ALU_OP2_SETGT_UINT; 1193bf215546Sopenharmony_ci case AF_CC_GE: return ALU_OP2_SETGE_UINT; 1194bf215546Sopenharmony_ci } 1195bf215546Sopenharmony_ci break; 1196bf215546Sopenharmony_ci } 1197bf215546Sopenharmony_ci } 1198bf215546Sopenharmony_ci } 1199bf215546Sopenharmony_ci 1200bf215546Sopenharmony_ci assert(!"unexpected cc&cmp_type combination"); 1201bf215546Sopenharmony_ci return ~0u; 1202bf215546Sopenharmony_ci} 1203bf215546Sopenharmony_ci 1204bf215546Sopenharmony_ciunsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) { 1205bf215546Sopenharmony_ci 1206bf215546Sopenharmony_ci switch(cmp_type) { 1207bf215546Sopenharmony_ci case AF_FLOAT_CMP: { 1208bf215546Sopenharmony_ci switch (cc) { 1209bf215546Sopenharmony_ci case AF_CC_E: return ALU_OP2_PRED_SETE; 1210bf215546Sopenharmony_ci case AF_CC_NE: return ALU_OP2_PRED_SETNE; 1211bf215546Sopenharmony_ci case AF_CC_GT: return ALU_OP2_PRED_SETGT; 1212bf215546Sopenharmony_ci case AF_CC_GE: return ALU_OP2_PRED_SETGE; 1213bf215546Sopenharmony_ci } 1214bf215546Sopenharmony_ci break; 1215bf215546Sopenharmony_ci } 1216bf215546Sopenharmony_ci case AF_INT_CMP: { 1217bf215546Sopenharmony_ci switch (cc) { 1218bf215546Sopenharmony_ci case AF_CC_E: return ALU_OP2_PRED_SETE_INT; 1219bf215546Sopenharmony_ci case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT; 1220bf215546Sopenharmony_ci case AF_CC_GT: return ALU_OP2_PRED_SETGT_INT; 1221bf215546Sopenharmony_ci case AF_CC_GE: return ALU_OP2_PRED_SETGE_INT; 1222bf215546Sopenharmony_ci } 1223bf215546Sopenharmony_ci break; 1224bf215546Sopenharmony_ci } 1225bf215546Sopenharmony_ci case AF_UINT_CMP: { 1226bf215546Sopenharmony_ci switch (cc) { 1227bf215546Sopenharmony_ci case AF_CC_E: return ALU_OP2_PRED_SETE_INT; 1228bf215546Sopenharmony_ci case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT; 1229bf215546Sopenharmony_ci case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT; 1230bf215546Sopenharmony_ci case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT; 1231bf215546Sopenharmony_ci } 1232bf215546Sopenharmony_ci break; 1233bf215546Sopenharmony_ci } 1234bf215546Sopenharmony_ci } 1235bf215546Sopenharmony_ci 1236bf215546Sopenharmony_ci assert(!"unexpected cc&cmp_type combination"); 1237bf215546Sopenharmony_ci return ~0u; 1238bf215546Sopenharmony_ci} 1239bf215546Sopenharmony_ci 1240bf215546Sopenharmony_ciunsigned get_killcc_op(unsigned cc, unsigned cmp_type) { 1241bf215546Sopenharmony_ci 1242bf215546Sopenharmony_ci switch(cmp_type) { 1243bf215546Sopenharmony_ci case AF_FLOAT_CMP: { 1244bf215546Sopenharmony_ci switch (cc) { 1245bf215546Sopenharmony_ci case AF_CC_E: return ALU_OP2_KILLE; 1246bf215546Sopenharmony_ci case AF_CC_NE: return ALU_OP2_KILLNE; 1247bf215546Sopenharmony_ci case AF_CC_GT: return ALU_OP2_KILLGT; 1248bf215546Sopenharmony_ci case AF_CC_GE: return ALU_OP2_KILLGE; 1249bf215546Sopenharmony_ci } 1250bf215546Sopenharmony_ci break; 1251bf215546Sopenharmony_ci } 1252bf215546Sopenharmony_ci case AF_INT_CMP: { 1253bf215546Sopenharmony_ci switch (cc) { 1254bf215546Sopenharmony_ci case AF_CC_E: return ALU_OP2_KILLE_INT; 1255bf215546Sopenharmony_ci case AF_CC_NE: return ALU_OP2_KILLNE_INT; 1256bf215546Sopenharmony_ci case AF_CC_GT: return ALU_OP2_KILLGT_INT; 1257bf215546Sopenharmony_ci case AF_CC_GE: return ALU_OP2_KILLGE_INT; 1258bf215546Sopenharmony_ci } 1259bf215546Sopenharmony_ci break; 1260bf215546Sopenharmony_ci } 1261bf215546Sopenharmony_ci case AF_UINT_CMP: { 1262bf215546Sopenharmony_ci switch (cc) { 1263bf215546Sopenharmony_ci case AF_CC_E: return ALU_OP2_KILLE_INT; 1264bf215546Sopenharmony_ci case AF_CC_NE: return ALU_OP2_KILLNE_INT; 1265bf215546Sopenharmony_ci case AF_CC_GT: return ALU_OP2_KILLGT_UINT; 1266bf215546Sopenharmony_ci case AF_CC_GE: return ALU_OP2_KILLGE_UINT; 1267bf215546Sopenharmony_ci } 1268bf215546Sopenharmony_ci break; 1269bf215546Sopenharmony_ci } 1270bf215546Sopenharmony_ci } 1271bf215546Sopenharmony_ci 1272bf215546Sopenharmony_ci assert(!"unexpected cc&cmp_type combination"); 1273bf215546Sopenharmony_ci return ~0u; 1274bf215546Sopenharmony_ci} 1275bf215546Sopenharmony_ci 1276bf215546Sopenharmony_ciunsigned get_cndcc_op(unsigned cc, unsigned cmp_type) { 1277bf215546Sopenharmony_ci 1278bf215546Sopenharmony_ci switch(cmp_type) { 1279bf215546Sopenharmony_ci case AF_FLOAT_CMP: { 1280bf215546Sopenharmony_ci switch (cc) { 1281bf215546Sopenharmony_ci case AF_CC_E: return ALU_OP3_CNDE; 1282bf215546Sopenharmony_ci case AF_CC_GT: return ALU_OP3_CNDGT; 1283bf215546Sopenharmony_ci case AF_CC_GE: return ALU_OP3_CNDGE; 1284bf215546Sopenharmony_ci } 1285bf215546Sopenharmony_ci break; 1286bf215546Sopenharmony_ci } 1287bf215546Sopenharmony_ci case AF_INT_CMP: { 1288bf215546Sopenharmony_ci switch (cc) { 1289bf215546Sopenharmony_ci case AF_CC_E: return ALU_OP3_CNDE_INT; 1290bf215546Sopenharmony_ci case AF_CC_GT: return ALU_OP3_CNDGT_INT; 1291bf215546Sopenharmony_ci case AF_CC_GE: return ALU_OP3_CNDGE_INT; 1292bf215546Sopenharmony_ci } 1293bf215546Sopenharmony_ci break; 1294bf215546Sopenharmony_ci } 1295bf215546Sopenharmony_ci } 1296bf215546Sopenharmony_ci 1297bf215546Sopenharmony_ci assert(!"unexpected cc&cmp_type combination"); 1298bf215546Sopenharmony_ci return ~0u; 1299bf215546Sopenharmony_ci} 1300bf215546Sopenharmony_ci 1301bf215546Sopenharmony_ci 1302bf215546Sopenharmony_civoid convert_predset_to_set(shader& sh, alu_node* a) { 1303bf215546Sopenharmony_ci 1304bf215546Sopenharmony_ci unsigned flags = a->bc.op_ptr->flags; 1305bf215546Sopenharmony_ci unsigned cc = flags & AF_CC_MASK; 1306bf215546Sopenharmony_ci unsigned cmp_type = flags & AF_CMP_TYPE_MASK; 1307bf215546Sopenharmony_ci 1308bf215546Sopenharmony_ci bool swap_args = false; 1309bf215546Sopenharmony_ci 1310bf215546Sopenharmony_ci cc = invert_setcc_condition(cc, swap_args); 1311bf215546Sopenharmony_ci 1312bf215546Sopenharmony_ci unsigned newop = get_setcc_op(cc, cmp_type, true); 1313bf215546Sopenharmony_ci 1314bf215546Sopenharmony_ci a->dst.resize(1); 1315bf215546Sopenharmony_ci a->bc.set_op(newop); 1316bf215546Sopenharmony_ci 1317bf215546Sopenharmony_ci if (swap_args) { 1318bf215546Sopenharmony_ci std::swap(a->src[0], a->src[1]); 1319bf215546Sopenharmony_ci std::swap(a->bc.src[0], a->bc.src[1]); 1320bf215546Sopenharmony_ci } 1321bf215546Sopenharmony_ci 1322bf215546Sopenharmony_ci a->bc.update_exec_mask = 0; 1323bf215546Sopenharmony_ci a->bc.update_pred = 0; 1324bf215546Sopenharmony_ci} 1325bf215546Sopenharmony_ci 1326bf215546Sopenharmony_ci} // namespace r600_sb 1327