1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2010 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci/** 25bf215546Sopenharmony_ci * \file lower_instructions.cpp 26bf215546Sopenharmony_ci * 27bf215546Sopenharmony_ci * Many GPUs lack native instructions for certain expression operations, and 28bf215546Sopenharmony_ci * must replace them with some other expression tree. This pass lowers some 29bf215546Sopenharmony_ci * of the most common cases, allowing the lowering code to be implemented once 30bf215546Sopenharmony_ci * rather than in each driver backend. 31bf215546Sopenharmony_ci * 32bf215546Sopenharmony_ci * Currently supported transformations: 33bf215546Sopenharmony_ci * - SUB_TO_ADD_NEG 34bf215546Sopenharmony_ci * - LDEXP_TO_ARITH 35bf215546Sopenharmony_ci * - CARRY_TO_ARITH 36bf215546Sopenharmony_ci * - BORROW_TO_ARITH 37bf215546Sopenharmony_ci * - DOPS_TO_DFRAC 38bf215546Sopenharmony_ci * 39bf215546Sopenharmony_ci * SUB_TO_ADD_NEG: 40bf215546Sopenharmony_ci * --------------- 41bf215546Sopenharmony_ci * Breaks an ir_binop_sub expression down to add(op0, neg(op1)) 42bf215546Sopenharmony_ci * 43bf215546Sopenharmony_ci * This simplifies expression reassociation, and for many backends 44bf215546Sopenharmony_ci * there is no subtract operation separate from adding the negation. 45bf215546Sopenharmony_ci * For backends with native subtract operations, they will probably 46bf215546Sopenharmony_ci * want to recognize add(op0, neg(op1)) or the other way around to 47bf215546Sopenharmony_ci * produce a subtract anyway. 48bf215546Sopenharmony_ci * 49bf215546Sopenharmony_ci * LDEXP_TO_ARITH: 50bf215546Sopenharmony_ci * ------------- 51bf215546Sopenharmony_ci * Converts ir_binop_ldexp to arithmetic and bit operations for float sources. 52bf215546Sopenharmony_ci * 53bf215546Sopenharmony_ci * DFREXP_DLDEXP_TO_ARITH: 54bf215546Sopenharmony_ci * --------------- 55bf215546Sopenharmony_ci * Converts ir_binop_ldexp, ir_unop_frexp_sig, and ir_unop_frexp_exp to 56bf215546Sopenharmony_ci * arithmetic and bit ops for double arguments. 57bf215546Sopenharmony_ci * 58bf215546Sopenharmony_ci * CARRY_TO_ARITH: 59bf215546Sopenharmony_ci * --------------- 60bf215546Sopenharmony_ci * Converts ir_carry into (x + y) < x. 61bf215546Sopenharmony_ci * 62bf215546Sopenharmony_ci * BORROW_TO_ARITH: 63bf215546Sopenharmony_ci * ---------------- 64bf215546Sopenharmony_ci * Converts ir_borrow into (x < y). 65bf215546Sopenharmony_ci * 66bf215546Sopenharmony_ci * DOPS_TO_DFRAC: 67bf215546Sopenharmony_ci * -------------- 68bf215546Sopenharmony_ci * Converts double trunc, ceil, floor, round to fract 69bf215546Sopenharmony_ci */ 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci#include "program/prog_instruction.h" /* for swizzle */ 72bf215546Sopenharmony_ci#include "compiler/glsl_types.h" 73bf215546Sopenharmony_ci#include "ir.h" 74bf215546Sopenharmony_ci#include "ir_builder.h" 75bf215546Sopenharmony_ci#include "ir_optimization.h" 76bf215546Sopenharmony_ci#include "util/half_float.h" 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci#include <math.h> 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ciusing namespace ir_builder; 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_cinamespace { 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ciclass lower_instructions_visitor : public ir_hierarchical_visitor { 85bf215546Sopenharmony_cipublic: 86bf215546Sopenharmony_ci lower_instructions_visitor(unsigned lower) 87bf215546Sopenharmony_ci : progress(false), lower(lower) { } 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci ir_visitor_status visit_leave(ir_expression *); 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci bool progress; 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ciprivate: 94bf215546Sopenharmony_ci unsigned lower; /** Bitfield of which operations to lower */ 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_ci void sub_to_add_neg(ir_expression *); 97bf215546Sopenharmony_ci void ldexp_to_arith(ir_expression *); 98bf215546Sopenharmony_ci void dldexp_to_arith(ir_expression *); 99bf215546Sopenharmony_ci void dfrexp_sig_to_arith(ir_expression *); 100bf215546Sopenharmony_ci void dfrexp_exp_to_arith(ir_expression *); 101bf215546Sopenharmony_ci void carry_to_arith(ir_expression *); 102bf215546Sopenharmony_ci void borrow_to_arith(ir_expression *); 103bf215546Sopenharmony_ci void double_dot_to_fma(ir_expression *); 104bf215546Sopenharmony_ci void double_lrp(ir_expression *); 105bf215546Sopenharmony_ci void dceil_to_dfrac(ir_expression *); 106bf215546Sopenharmony_ci void dfloor_to_dfrac(ir_expression *); 107bf215546Sopenharmony_ci void dround_even_to_dfrac(ir_expression *); 108bf215546Sopenharmony_ci void dtrunc_to_dfrac(ir_expression *); 109bf215546Sopenharmony_ci void dsign_to_csel(ir_expression *); 110bf215546Sopenharmony_ci void bit_count_to_math(ir_expression *); 111bf215546Sopenharmony_ci void extract_to_shifts(ir_expression *); 112bf215546Sopenharmony_ci void insert_to_shifts(ir_expression *); 113bf215546Sopenharmony_ci void reverse_to_shifts(ir_expression *ir); 114bf215546Sopenharmony_ci void find_lsb_to_float_cast(ir_expression *ir); 115bf215546Sopenharmony_ci void find_msb_to_float_cast(ir_expression *ir); 116bf215546Sopenharmony_ci void imul_high_to_mul(ir_expression *ir); 117bf215546Sopenharmony_ci void sqrt_to_abs_sqrt(ir_expression *ir); 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_ci ir_expression *_carry(operand a, operand b); 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci static ir_constant *_imm_fp(void *mem_ctx, 122bf215546Sopenharmony_ci const glsl_type *type, 123bf215546Sopenharmony_ci double f, 124bf215546Sopenharmony_ci unsigned vector_elements=1); 125bf215546Sopenharmony_ci}; 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci} /* anonymous namespace */ 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci/** 130bf215546Sopenharmony_ci * Determine if a particular type of lowering should occur 131bf215546Sopenharmony_ci */ 132bf215546Sopenharmony_ci#define lowering(x) (this->lower & x) 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_cibool 135bf215546Sopenharmony_cilower_instructions(exec_list *instructions, unsigned what_to_lower) 136bf215546Sopenharmony_ci{ 137bf215546Sopenharmony_ci lower_instructions_visitor v(what_to_lower); 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci visit_list_elements(&v, instructions); 140bf215546Sopenharmony_ci return v.progress; 141bf215546Sopenharmony_ci} 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_civoid 144bf215546Sopenharmony_cilower_instructions_visitor::sub_to_add_neg(ir_expression *ir) 145bf215546Sopenharmony_ci{ 146bf215546Sopenharmony_ci ir->operation = ir_binop_add; 147bf215546Sopenharmony_ci ir->init_num_operands(); 148bf215546Sopenharmony_ci ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type, 149bf215546Sopenharmony_ci ir->operands[1], NULL); 150bf215546Sopenharmony_ci this->progress = true; 151bf215546Sopenharmony_ci} 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_civoid 154bf215546Sopenharmony_cilower_instructions_visitor::ldexp_to_arith(ir_expression *ir) 155bf215546Sopenharmony_ci{ 156bf215546Sopenharmony_ci /* Translates 157bf215546Sopenharmony_ci * ir_binop_ldexp x exp 158bf215546Sopenharmony_ci * into 159bf215546Sopenharmony_ci * 160bf215546Sopenharmony_ci * extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); 161bf215546Sopenharmony_ci * resulting_biased_exp = min(extracted_biased_exp + exp, 255); 162bf215546Sopenharmony_ci * 163bf215546Sopenharmony_ci * if (extracted_biased_exp >= 255) 164bf215546Sopenharmony_ci * return x; // +/-inf, NaN 165bf215546Sopenharmony_ci * 166bf215546Sopenharmony_ci * sign_mantissa = bitcast_f2u(x) & sign_mantissa_mask; 167bf215546Sopenharmony_ci * 168bf215546Sopenharmony_ci * if (min(resulting_biased_exp, extracted_biased_exp) < 1) 169bf215546Sopenharmony_ci * resulting_biased_exp = 0; 170bf215546Sopenharmony_ci * if (resulting_biased_exp >= 255 || 171bf215546Sopenharmony_ci * min(resulting_biased_exp, extracted_biased_exp) < 1) { 172bf215546Sopenharmony_ci * sign_mantissa &= sign_mask; 173bf215546Sopenharmony_ci * } 174bf215546Sopenharmony_ci * 175bf215546Sopenharmony_ci * return bitcast_u2f(sign_mantissa | 176bf215546Sopenharmony_ci * lshift(i2u(resulting_biased_exp), exp_shift)); 177bf215546Sopenharmony_ci * 178bf215546Sopenharmony_ci * which we can't actually implement as such, since the GLSL IR doesn't 179bf215546Sopenharmony_ci * have vectorized if-statements. We actually implement it without branches 180bf215546Sopenharmony_ci * using conditional-select: 181bf215546Sopenharmony_ci * 182bf215546Sopenharmony_ci * extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); 183bf215546Sopenharmony_ci * resulting_biased_exp = min(extracted_biased_exp + exp, 255); 184bf215546Sopenharmony_ci * 185bf215546Sopenharmony_ci * sign_mantissa = bitcast_f2u(x) & sign_mantissa_mask; 186bf215546Sopenharmony_ci * 187bf215546Sopenharmony_ci * flush_to_zero = lequal(min(resulting_biased_exp, extracted_biased_exp), 0); 188bf215546Sopenharmony_ci * resulting_biased_exp = csel(flush_to_zero, 0, resulting_biased_exp) 189bf215546Sopenharmony_ci * zero_mantissa = logic_or(flush_to_zero, 190bf215546Sopenharmony_ci * gequal(resulting_biased_exp, 255)); 191bf215546Sopenharmony_ci * sign_mantissa = csel(zero_mantissa, sign_mantissa & sign_mask, sign_mantissa); 192bf215546Sopenharmony_ci * 193bf215546Sopenharmony_ci * result = sign_mantissa | 194bf215546Sopenharmony_ci * lshift(i2u(resulting_biased_exp), exp_shift)); 195bf215546Sopenharmony_ci * 196bf215546Sopenharmony_ci * return csel(extracted_biased_exp >= 255, x, bitcast_u2f(result)); 197bf215546Sopenharmony_ci * 198bf215546Sopenharmony_ci * The definition of ldexp in the GLSL spec says: 199bf215546Sopenharmony_ci * 200bf215546Sopenharmony_ci * "If this product is too large to be represented in the 201bf215546Sopenharmony_ci * floating-point type, the result is undefined." 202bf215546Sopenharmony_ci * 203bf215546Sopenharmony_ci * However, the definition of ldexp in the GLSL ES spec does not contain 204bf215546Sopenharmony_ci * this sentence, so we do need to handle overflow correctly. 205bf215546Sopenharmony_ci * 206bf215546Sopenharmony_ci * There is additional language limiting the defined range of exp, but this 207bf215546Sopenharmony_ci * is merely to allow implementations that store 2^exp in a temporary 208bf215546Sopenharmony_ci * variable. 209bf215546Sopenharmony_ci */ 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_ci const unsigned vec_elem = ir->type->vector_elements; 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci /* Types */ 214bf215546Sopenharmony_ci const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1); 215bf215546Sopenharmony_ci const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 1); 216bf215546Sopenharmony_ci const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci /* Temporary variables */ 219bf215546Sopenharmony_ci ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary); 220bf215546Sopenharmony_ci ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary); 221bf215546Sopenharmony_ci ir_variable *result = new(ir) ir_variable(uvec, "result", ir_var_temporary); 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci ir_variable *extracted_biased_exp = 224bf215546Sopenharmony_ci new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary); 225bf215546Sopenharmony_ci ir_variable *resulting_biased_exp = 226bf215546Sopenharmony_ci new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary); 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci ir_variable *sign_mantissa = 229bf215546Sopenharmony_ci new(ir) ir_variable(uvec, "sign_mantissa", ir_var_temporary); 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci ir_variable *flush_to_zero = 232bf215546Sopenharmony_ci new(ir) ir_variable(bvec, "flush_to_zero", ir_var_temporary); 233bf215546Sopenharmony_ci ir_variable *zero_mantissa = 234bf215546Sopenharmony_ci new(ir) ir_variable(bvec, "zero_mantissa", ir_var_temporary); 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci ir_instruction &i = *base_ir; 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci /* Copy <x> and <exp> arguments. */ 239bf215546Sopenharmony_ci i.insert_before(x); 240bf215546Sopenharmony_ci i.insert_before(assign(x, ir->operands[0])); 241bf215546Sopenharmony_ci i.insert_before(exp); 242bf215546Sopenharmony_ci i.insert_before(assign(exp, ir->operands[1])); 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_ci /* Extract the biased exponent from <x>. */ 245bf215546Sopenharmony_ci i.insert_before(extracted_biased_exp); 246bf215546Sopenharmony_ci i.insert_before(assign(extracted_biased_exp, 247bf215546Sopenharmony_ci rshift(bitcast_f2i(abs(x)), 248bf215546Sopenharmony_ci new(ir) ir_constant(23, vec_elem)))); 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci /* The definition of ldexp in the GLSL 4.60 spec says: 251bf215546Sopenharmony_ci * 252bf215546Sopenharmony_ci * "If exp is greater than +128 (single-precision) or +1024 253bf215546Sopenharmony_ci * (double-precision), the value returned is undefined. If exp is less 254bf215546Sopenharmony_ci * than -126 (single-precision) or -1022 (double-precision), the value 255bf215546Sopenharmony_ci * returned may be flushed to zero." 256bf215546Sopenharmony_ci * 257bf215546Sopenharmony_ci * So we do not have to guard against the possibility of addition overflow, 258bf215546Sopenharmony_ci * which could happen when exp is close to INT_MAX. Addition underflow 259bf215546Sopenharmony_ci * cannot happen (the worst case is 0 + (-INT_MAX)). 260bf215546Sopenharmony_ci */ 261bf215546Sopenharmony_ci i.insert_before(resulting_biased_exp); 262bf215546Sopenharmony_ci i.insert_before(assign(resulting_biased_exp, 263bf215546Sopenharmony_ci min2(add(extracted_biased_exp, exp), 264bf215546Sopenharmony_ci new(ir) ir_constant(255, vec_elem)))); 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci i.insert_before(sign_mantissa); 267bf215546Sopenharmony_ci i.insert_before(assign(sign_mantissa, 268bf215546Sopenharmony_ci bit_and(bitcast_f2u(x), 269bf215546Sopenharmony_ci new(ir) ir_constant(0x807fffffu, vec_elem)))); 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci /* We flush to zero if the original or resulting biased exponent is 0, 272bf215546Sopenharmony_ci * indicating a +/-0.0 or subnormal input or output. 273bf215546Sopenharmony_ci * 274bf215546Sopenharmony_ci * The mantissa is set to 0 if the resulting biased exponent is 255, since 275bf215546Sopenharmony_ci * an overflow should produce a +/-inf result. 276bf215546Sopenharmony_ci * 277bf215546Sopenharmony_ci * Note that NaN inputs are handled separately. 278bf215546Sopenharmony_ci */ 279bf215546Sopenharmony_ci i.insert_before(flush_to_zero); 280bf215546Sopenharmony_ci i.insert_before(assign(flush_to_zero, 281bf215546Sopenharmony_ci lequal(min2(resulting_biased_exp, 282bf215546Sopenharmony_ci extracted_biased_exp), 283bf215546Sopenharmony_ci ir_constant::zero(ir, ivec)))); 284bf215546Sopenharmony_ci i.insert_before(assign(resulting_biased_exp, 285bf215546Sopenharmony_ci csel(flush_to_zero, 286bf215546Sopenharmony_ci ir_constant::zero(ir, ivec), 287bf215546Sopenharmony_ci resulting_biased_exp))); 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci i.insert_before(zero_mantissa); 290bf215546Sopenharmony_ci i.insert_before(assign(zero_mantissa, 291bf215546Sopenharmony_ci logic_or(flush_to_zero, 292bf215546Sopenharmony_ci equal(resulting_biased_exp, 293bf215546Sopenharmony_ci new(ir) ir_constant(255, vec_elem))))); 294bf215546Sopenharmony_ci i.insert_before(assign(sign_mantissa, 295bf215546Sopenharmony_ci csel(zero_mantissa, 296bf215546Sopenharmony_ci bit_and(sign_mantissa, 297bf215546Sopenharmony_ci new(ir) ir_constant(0x80000000u, vec_elem)), 298bf215546Sopenharmony_ci sign_mantissa))); 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci /* Don't generate new IR that would need to be lowered in an additional 301bf215546Sopenharmony_ci * pass. 302bf215546Sopenharmony_ci */ 303bf215546Sopenharmony_ci i.insert_before(result); 304bf215546Sopenharmony_ci if (!lowering(INSERT_TO_SHIFTS)) { 305bf215546Sopenharmony_ci i.insert_before(assign(result, 306bf215546Sopenharmony_ci bitfield_insert(sign_mantissa, 307bf215546Sopenharmony_ci i2u(resulting_biased_exp), 308bf215546Sopenharmony_ci new(ir) ir_constant(23u, vec_elem), 309bf215546Sopenharmony_ci new(ir) ir_constant(8u, vec_elem)))); 310bf215546Sopenharmony_ci } else { 311bf215546Sopenharmony_ci i.insert_before(assign(result, 312bf215546Sopenharmony_ci bit_or(sign_mantissa, 313bf215546Sopenharmony_ci lshift(i2u(resulting_biased_exp), 314bf215546Sopenharmony_ci new(ir) ir_constant(23, vec_elem))))); 315bf215546Sopenharmony_ci } 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci ir->operation = ir_triop_csel; 318bf215546Sopenharmony_ci ir->init_num_operands(); 319bf215546Sopenharmony_ci ir->operands[0] = gequal(extracted_biased_exp, 320bf215546Sopenharmony_ci new(ir) ir_constant(255, vec_elem)); 321bf215546Sopenharmony_ci ir->operands[1] = new(ir) ir_dereference_variable(x); 322bf215546Sopenharmony_ci ir->operands[2] = bitcast_u2f(result); 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci this->progress = true; 325bf215546Sopenharmony_ci} 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_civoid 328bf215546Sopenharmony_cilower_instructions_visitor::dldexp_to_arith(ir_expression *ir) 329bf215546Sopenharmony_ci{ 330bf215546Sopenharmony_ci /* See ldexp_to_arith for structure. Uses frexp_exp to extract the exponent 331bf215546Sopenharmony_ci * from the significand. 332bf215546Sopenharmony_ci */ 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci const unsigned vec_elem = ir->type->vector_elements; 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci /* Types */ 337bf215546Sopenharmony_ci const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1); 338bf215546Sopenharmony_ci const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci /* Constants */ 341bf215546Sopenharmony_ci ir_constant *zeroi = ir_constant::zero(ir, ivec); 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci ir_constant *sign_mask = new(ir) ir_constant(0x80000000u); 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci ir_constant *exp_shift = new(ir) ir_constant(20u); 346bf215546Sopenharmony_ci ir_constant *exp_width = new(ir) ir_constant(11u); 347bf215546Sopenharmony_ci ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem); 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci /* Temporary variables */ 350bf215546Sopenharmony_ci ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary); 351bf215546Sopenharmony_ci ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary); 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x", 354bf215546Sopenharmony_ci ir_var_temporary); 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci ir_variable *extracted_biased_exp = 357bf215546Sopenharmony_ci new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary); 358bf215546Sopenharmony_ci ir_variable *resulting_biased_exp = 359bf215546Sopenharmony_ci new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary); 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci ir_variable *is_not_zero_or_underflow = 362bf215546Sopenharmony_ci new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary); 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci ir_instruction &i = *base_ir; 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci /* Copy <x> and <exp> arguments. */ 367bf215546Sopenharmony_ci i.insert_before(x); 368bf215546Sopenharmony_ci i.insert_before(assign(x, ir->operands[0])); 369bf215546Sopenharmony_ci i.insert_before(exp); 370bf215546Sopenharmony_ci i.insert_before(assign(exp, ir->operands[1])); 371bf215546Sopenharmony_ci 372bf215546Sopenharmony_ci ir_expression *frexp_exp = expr(ir_unop_frexp_exp, x); 373bf215546Sopenharmony_ci if (lowering(DFREXP_DLDEXP_TO_ARITH)) 374bf215546Sopenharmony_ci dfrexp_exp_to_arith(frexp_exp); 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci /* Extract the biased exponent from <x>. */ 377bf215546Sopenharmony_ci i.insert_before(extracted_biased_exp); 378bf215546Sopenharmony_ci i.insert_before(assign(extracted_biased_exp, add(frexp_exp, exp_bias))); 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci i.insert_before(resulting_biased_exp); 381bf215546Sopenharmony_ci i.insert_before(assign(resulting_biased_exp, 382bf215546Sopenharmony_ci add(extracted_biased_exp, exp))); 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_ci /* Test if result is ±0.0, subnormal, or underflow by checking if the 385bf215546Sopenharmony_ci * resulting biased exponent would be less than 0x1. If so, the result is 386bf215546Sopenharmony_ci * 0.0 with the sign of x. (Actually, invert the conditions so that 387bf215546Sopenharmony_ci * immediate values are the second arguments, which is better for i965) 388bf215546Sopenharmony_ci * TODO: Implement in a vector fashion. 389bf215546Sopenharmony_ci */ 390bf215546Sopenharmony_ci i.insert_before(zero_sign_x); 391bf215546Sopenharmony_ci for (unsigned elem = 0; elem < vec_elem; elem++) { 392bf215546Sopenharmony_ci ir_variable *unpacked = 393bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); 394bf215546Sopenharmony_ci i.insert_before(unpacked); 395bf215546Sopenharmony_ci i.insert_before( 396bf215546Sopenharmony_ci assign(unpacked, 397bf215546Sopenharmony_ci expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1)))); 398bf215546Sopenharmony_ci i.insert_before(assign(unpacked, bit_and(swizzle_y(unpacked), sign_mask->clone(ir, NULL)), 399bf215546Sopenharmony_ci WRITEMASK_Y)); 400bf215546Sopenharmony_ci i.insert_before(assign(unpacked, ir_constant::zero(ir, glsl_type::uint_type), WRITEMASK_X)); 401bf215546Sopenharmony_ci i.insert_before(assign(zero_sign_x, 402bf215546Sopenharmony_ci expr(ir_unop_pack_double_2x32, unpacked), 403bf215546Sopenharmony_ci 1 << elem)); 404bf215546Sopenharmony_ci } 405bf215546Sopenharmony_ci i.insert_before(is_not_zero_or_underflow); 406bf215546Sopenharmony_ci i.insert_before(assign(is_not_zero_or_underflow, 407bf215546Sopenharmony_ci gequal(resulting_biased_exp, 408bf215546Sopenharmony_ci new(ir) ir_constant(0x1, vec_elem)))); 409bf215546Sopenharmony_ci i.insert_before(assign(x, csel(is_not_zero_or_underflow, 410bf215546Sopenharmony_ci x, zero_sign_x))); 411bf215546Sopenharmony_ci i.insert_before(assign(resulting_biased_exp, 412bf215546Sopenharmony_ci csel(is_not_zero_or_underflow, 413bf215546Sopenharmony_ci resulting_biased_exp, zeroi))); 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci /* We could test for overflows by checking if the resulting biased exponent 416bf215546Sopenharmony_ci * would be greater than 0xFE. Turns out we don't need to because the GLSL 417bf215546Sopenharmony_ci * spec says: 418bf215546Sopenharmony_ci * 419bf215546Sopenharmony_ci * "If this product is too large to be represented in the 420bf215546Sopenharmony_ci * floating-point type, the result is undefined." 421bf215546Sopenharmony_ci */ 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ci ir_rvalue *results[4] = {NULL}; 424bf215546Sopenharmony_ci for (unsigned elem = 0; elem < vec_elem; elem++) { 425bf215546Sopenharmony_ci ir_variable *unpacked = 426bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); 427bf215546Sopenharmony_ci i.insert_before(unpacked); 428bf215546Sopenharmony_ci i.insert_before( 429bf215546Sopenharmony_ci assign(unpacked, 430bf215546Sopenharmony_ci expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1)))); 431bf215546Sopenharmony_ci 432bf215546Sopenharmony_ci ir_expression *bfi = bitfield_insert( 433bf215546Sopenharmony_ci swizzle_y(unpacked), 434bf215546Sopenharmony_ci i2u(swizzle(resulting_biased_exp, elem, 1)), 435bf215546Sopenharmony_ci exp_shift->clone(ir, NULL), 436bf215546Sopenharmony_ci exp_width->clone(ir, NULL)); 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_ci i.insert_before(assign(unpacked, bfi, WRITEMASK_Y)); 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci results[elem] = expr(ir_unop_pack_double_2x32, unpacked); 441bf215546Sopenharmony_ci } 442bf215546Sopenharmony_ci 443bf215546Sopenharmony_ci ir->operation = ir_quadop_vector; 444bf215546Sopenharmony_ci ir->init_num_operands(); 445bf215546Sopenharmony_ci ir->operands[0] = results[0]; 446bf215546Sopenharmony_ci ir->operands[1] = results[1]; 447bf215546Sopenharmony_ci ir->operands[2] = results[2]; 448bf215546Sopenharmony_ci ir->operands[3] = results[3]; 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci /* Don't generate new IR that would need to be lowered in an additional 451bf215546Sopenharmony_ci * pass. 452bf215546Sopenharmony_ci */ 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_ci this->progress = true; 455bf215546Sopenharmony_ci} 456bf215546Sopenharmony_ci 457bf215546Sopenharmony_civoid 458bf215546Sopenharmony_cilower_instructions_visitor::dfrexp_sig_to_arith(ir_expression *ir) 459bf215546Sopenharmony_ci{ 460bf215546Sopenharmony_ci const unsigned vec_elem = ir->type->vector_elements; 461bf215546Sopenharmony_ci const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); 462bf215546Sopenharmony_ci 463bf215546Sopenharmony_ci /* Double-precision floating-point values are stored as 464bf215546Sopenharmony_ci * 1 sign bit; 465bf215546Sopenharmony_ci * 11 exponent bits; 466bf215546Sopenharmony_ci * 52 mantissa bits. 467bf215546Sopenharmony_ci * 468bf215546Sopenharmony_ci * We're just extracting the significand here, so we only need to modify 469bf215546Sopenharmony_ci * the upper 32-bit uint. Unfortunately we must extract each double 470bf215546Sopenharmony_ci * independently as there is no vector version of unpackDouble. 471bf215546Sopenharmony_ci */ 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci ir_instruction &i = *base_ir; 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ci ir_variable *is_not_zero = 476bf215546Sopenharmony_ci new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary); 477bf215546Sopenharmony_ci ir_rvalue *results[4] = {NULL}; 478bf215546Sopenharmony_ci 479bf215546Sopenharmony_ci ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem); 480bf215546Sopenharmony_ci i.insert_before(is_not_zero); 481bf215546Sopenharmony_ci i.insert_before( 482bf215546Sopenharmony_ci assign(is_not_zero, 483bf215546Sopenharmony_ci nequal(abs(ir->operands[0]->clone(ir, NULL)), dzero))); 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci /* TODO: Remake this as more vector-friendly when int64 support is 486bf215546Sopenharmony_ci * available. 487bf215546Sopenharmony_ci */ 488bf215546Sopenharmony_ci for (unsigned elem = 0; elem < vec_elem; elem++) { 489bf215546Sopenharmony_ci ir_constant *zero = new(ir) ir_constant(0u, 1); 490bf215546Sopenharmony_ci ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x800fffffu, 1); 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci /* Exponent of double floating-point values in the range [0.5, 1.0). */ 493bf215546Sopenharmony_ci ir_constant *exponent_value = new(ir) ir_constant(0x3fe00000u, 1); 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci ir_variable *bits = 496bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uint_type, "bits", ir_var_temporary); 497bf215546Sopenharmony_ci ir_variable *unpacked = 498bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); 499bf215546Sopenharmony_ci 500bf215546Sopenharmony_ci ir_rvalue *x = swizzle(ir->operands[0]->clone(ir, NULL), elem, 1); 501bf215546Sopenharmony_ci 502bf215546Sopenharmony_ci i.insert_before(bits); 503bf215546Sopenharmony_ci i.insert_before(unpacked); 504bf215546Sopenharmony_ci i.insert_before(assign(unpacked, expr(ir_unop_unpack_double_2x32, x))); 505bf215546Sopenharmony_ci 506bf215546Sopenharmony_ci /* Manipulate the high uint to remove the exponent and replace it with 507bf215546Sopenharmony_ci * either the default exponent or zero. 508bf215546Sopenharmony_ci */ 509bf215546Sopenharmony_ci i.insert_before(assign(bits, swizzle_y(unpacked))); 510bf215546Sopenharmony_ci i.insert_before(assign(bits, bit_and(bits, sign_mantissa_mask))); 511bf215546Sopenharmony_ci i.insert_before(assign(bits, bit_or(bits, 512bf215546Sopenharmony_ci csel(swizzle(is_not_zero, elem, 1), 513bf215546Sopenharmony_ci exponent_value, 514bf215546Sopenharmony_ci zero)))); 515bf215546Sopenharmony_ci i.insert_before(assign(unpacked, bits, WRITEMASK_Y)); 516bf215546Sopenharmony_ci results[elem] = expr(ir_unop_pack_double_2x32, unpacked); 517bf215546Sopenharmony_ci } 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_ci /* Put the dvec back together */ 520bf215546Sopenharmony_ci ir->operation = ir_quadop_vector; 521bf215546Sopenharmony_ci ir->init_num_operands(); 522bf215546Sopenharmony_ci ir->operands[0] = results[0]; 523bf215546Sopenharmony_ci ir->operands[1] = results[1]; 524bf215546Sopenharmony_ci ir->operands[2] = results[2]; 525bf215546Sopenharmony_ci ir->operands[3] = results[3]; 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_ci this->progress = true; 528bf215546Sopenharmony_ci} 529bf215546Sopenharmony_ci 530bf215546Sopenharmony_civoid 531bf215546Sopenharmony_cilower_instructions_visitor::dfrexp_exp_to_arith(ir_expression *ir) 532bf215546Sopenharmony_ci{ 533bf215546Sopenharmony_ci const unsigned vec_elem = ir->type->vector_elements; 534bf215546Sopenharmony_ci const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); 535bf215546Sopenharmony_ci const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 1); 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_ci /* Double-precision floating-point values are stored as 538bf215546Sopenharmony_ci * 1 sign bit; 539bf215546Sopenharmony_ci * 11 exponent bits; 540bf215546Sopenharmony_ci * 52 mantissa bits. 541bf215546Sopenharmony_ci * 542bf215546Sopenharmony_ci * We're just extracting the exponent here, so we only care about the upper 543bf215546Sopenharmony_ci * 32-bit uint. 544bf215546Sopenharmony_ci */ 545bf215546Sopenharmony_ci 546bf215546Sopenharmony_ci ir_instruction &i = *base_ir; 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci ir_variable *is_not_zero = 549bf215546Sopenharmony_ci new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary); 550bf215546Sopenharmony_ci ir_variable *high_words = 551bf215546Sopenharmony_ci new(ir) ir_variable(uvec, "high_words", ir_var_temporary); 552bf215546Sopenharmony_ci ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem); 553bf215546Sopenharmony_ci ir_constant *izero = new(ir) ir_constant(0, vec_elem); 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci ir_rvalue *absval = abs(ir->operands[0]); 556bf215546Sopenharmony_ci 557bf215546Sopenharmony_ci i.insert_before(is_not_zero); 558bf215546Sopenharmony_ci i.insert_before(high_words); 559bf215546Sopenharmony_ci i.insert_before(assign(is_not_zero, nequal(absval->clone(ir, NULL), dzero))); 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_ci /* Extract all of the upper uints. */ 562bf215546Sopenharmony_ci for (unsigned elem = 0; elem < vec_elem; elem++) { 563bf215546Sopenharmony_ci ir_rvalue *x = swizzle(absval->clone(ir, NULL), elem, 1); 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_ci i.insert_before(assign(high_words, 566bf215546Sopenharmony_ci swizzle_y(expr(ir_unop_unpack_double_2x32, x)), 567bf215546Sopenharmony_ci 1 << elem)); 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_ci } 570bf215546Sopenharmony_ci ir_constant *exponent_shift = new(ir) ir_constant(20, vec_elem); 571bf215546Sopenharmony_ci ir_constant *exponent_bias = new(ir) ir_constant(-1022, vec_elem); 572bf215546Sopenharmony_ci 573bf215546Sopenharmony_ci /* For non-zero inputs, shift the exponent down and apply bias. */ 574bf215546Sopenharmony_ci ir->operation = ir_triop_csel; 575bf215546Sopenharmony_ci ir->init_num_operands(); 576bf215546Sopenharmony_ci ir->operands[0] = new(ir) ir_dereference_variable(is_not_zero); 577bf215546Sopenharmony_ci ir->operands[1] = add(exponent_bias, u2i(rshift(high_words, exponent_shift))); 578bf215546Sopenharmony_ci ir->operands[2] = izero; 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci this->progress = true; 581bf215546Sopenharmony_ci} 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_civoid 584bf215546Sopenharmony_cilower_instructions_visitor::carry_to_arith(ir_expression *ir) 585bf215546Sopenharmony_ci{ 586bf215546Sopenharmony_ci /* Translates 587bf215546Sopenharmony_ci * ir_binop_carry x y 588bf215546Sopenharmony_ci * into 589bf215546Sopenharmony_ci * sum = ir_binop_add x y 590bf215546Sopenharmony_ci * bcarry = ir_binop_less sum x 591bf215546Sopenharmony_ci * carry = ir_unop_b2i bcarry 592bf215546Sopenharmony_ci */ 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci ir_rvalue *x_clone = ir->operands[0]->clone(ir, NULL); 595bf215546Sopenharmony_ci ir->operation = ir_unop_i2u; 596bf215546Sopenharmony_ci ir->init_num_operands(); 597bf215546Sopenharmony_ci ir->operands[0] = b2i(less(add(ir->operands[0], ir->operands[1]), x_clone)); 598bf215546Sopenharmony_ci ir->operands[1] = NULL; 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci this->progress = true; 601bf215546Sopenharmony_ci} 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_civoid 604bf215546Sopenharmony_cilower_instructions_visitor::borrow_to_arith(ir_expression *ir) 605bf215546Sopenharmony_ci{ 606bf215546Sopenharmony_ci /* Translates 607bf215546Sopenharmony_ci * ir_binop_borrow x y 608bf215546Sopenharmony_ci * into 609bf215546Sopenharmony_ci * bcarry = ir_binop_less x y 610bf215546Sopenharmony_ci * carry = ir_unop_b2i bcarry 611bf215546Sopenharmony_ci */ 612bf215546Sopenharmony_ci 613bf215546Sopenharmony_ci ir->operation = ir_unop_i2u; 614bf215546Sopenharmony_ci ir->init_num_operands(); 615bf215546Sopenharmony_ci ir->operands[0] = b2i(less(ir->operands[0], ir->operands[1])); 616bf215546Sopenharmony_ci ir->operands[1] = NULL; 617bf215546Sopenharmony_ci 618bf215546Sopenharmony_ci this->progress = true; 619bf215546Sopenharmony_ci} 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_civoid 622bf215546Sopenharmony_cilower_instructions_visitor::double_dot_to_fma(ir_expression *ir) 623bf215546Sopenharmony_ci{ 624bf215546Sopenharmony_ci ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type->get_base_type(), "dot_res", 625bf215546Sopenharmony_ci ir_var_temporary); 626bf215546Sopenharmony_ci this->base_ir->insert_before(temp); 627bf215546Sopenharmony_ci 628bf215546Sopenharmony_ci int nc = ir->operands[0]->type->components(); 629bf215546Sopenharmony_ci for (int i = nc - 1; i >= 1; i--) { 630bf215546Sopenharmony_ci ir_assignment *assig; 631bf215546Sopenharmony_ci if (i == (nc - 1)) { 632bf215546Sopenharmony_ci assig = assign(temp, mul(swizzle(ir->operands[0]->clone(ir, NULL), i, 1), 633bf215546Sopenharmony_ci swizzle(ir->operands[1]->clone(ir, NULL), i, 1))); 634bf215546Sopenharmony_ci } else { 635bf215546Sopenharmony_ci assig = assign(temp, fma(swizzle(ir->operands[0]->clone(ir, NULL), i, 1), 636bf215546Sopenharmony_ci swizzle(ir->operands[1]->clone(ir, NULL), i, 1), 637bf215546Sopenharmony_ci temp)); 638bf215546Sopenharmony_ci } 639bf215546Sopenharmony_ci this->base_ir->insert_before(assig); 640bf215546Sopenharmony_ci } 641bf215546Sopenharmony_ci 642bf215546Sopenharmony_ci ir->operation = ir_triop_fma; 643bf215546Sopenharmony_ci ir->init_num_operands(); 644bf215546Sopenharmony_ci ir->operands[0] = swizzle(ir->operands[0], 0, 1); 645bf215546Sopenharmony_ci ir->operands[1] = swizzle(ir->operands[1], 0, 1); 646bf215546Sopenharmony_ci ir->operands[2] = new(ir) ir_dereference_variable(temp); 647bf215546Sopenharmony_ci 648bf215546Sopenharmony_ci this->progress = true; 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci} 651bf215546Sopenharmony_ci 652bf215546Sopenharmony_civoid 653bf215546Sopenharmony_cilower_instructions_visitor::double_lrp(ir_expression *ir) 654bf215546Sopenharmony_ci{ 655bf215546Sopenharmony_ci int swizval; 656bf215546Sopenharmony_ci ir_rvalue *op0 = ir->operands[0], *op2 = ir->operands[2]; 657bf215546Sopenharmony_ci ir_constant *one = new(ir) ir_constant(1.0, op2->type->vector_elements); 658bf215546Sopenharmony_ci 659bf215546Sopenharmony_ci switch (op2->type->vector_elements) { 660bf215546Sopenharmony_ci case 1: 661bf215546Sopenharmony_ci swizval = SWIZZLE_XXXX; 662bf215546Sopenharmony_ci break; 663bf215546Sopenharmony_ci default: 664bf215546Sopenharmony_ci assert(op0->type->vector_elements == op2->type->vector_elements); 665bf215546Sopenharmony_ci swizval = SWIZZLE_XYZW; 666bf215546Sopenharmony_ci break; 667bf215546Sopenharmony_ci } 668bf215546Sopenharmony_ci 669bf215546Sopenharmony_ci ir->operation = ir_triop_fma; 670bf215546Sopenharmony_ci ir->init_num_operands(); 671bf215546Sopenharmony_ci ir->operands[0] = swizzle(op2, swizval, op0->type->vector_elements); 672bf215546Sopenharmony_ci ir->operands[2] = mul(sub(one, op2->clone(ir, NULL)), op0); 673bf215546Sopenharmony_ci 674bf215546Sopenharmony_ci this->progress = true; 675bf215546Sopenharmony_ci} 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_civoid 678bf215546Sopenharmony_cilower_instructions_visitor::dceil_to_dfrac(ir_expression *ir) 679bf215546Sopenharmony_ci{ 680bf215546Sopenharmony_ci /* 681bf215546Sopenharmony_ci * frtemp = frac(x); 682bf215546Sopenharmony_ci * temp = sub(x, frtemp); 683bf215546Sopenharmony_ci * result = temp + ((frtemp != 0.0) ? 1.0 : 0.0); 684bf215546Sopenharmony_ci */ 685bf215546Sopenharmony_ci ir_instruction &i = *base_ir; 686bf215546Sopenharmony_ci ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements); 687bf215546Sopenharmony_ci ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements); 688bf215546Sopenharmony_ci ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", 689bf215546Sopenharmony_ci ir_var_temporary); 690bf215546Sopenharmony_ci 691bf215546Sopenharmony_ci i.insert_before(frtemp); 692bf215546Sopenharmony_ci i.insert_before(assign(frtemp, fract(ir->operands[0]))); 693bf215546Sopenharmony_ci 694bf215546Sopenharmony_ci ir->operation = ir_binop_add; 695bf215546Sopenharmony_ci ir->init_num_operands(); 696bf215546Sopenharmony_ci ir->operands[0] = sub(ir->operands[0]->clone(ir, NULL), frtemp); 697bf215546Sopenharmony_ci ir->operands[1] = csel(nequal(frtemp, zero), one, zero->clone(ir, NULL)); 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci this->progress = true; 700bf215546Sopenharmony_ci} 701bf215546Sopenharmony_ci 702bf215546Sopenharmony_civoid 703bf215546Sopenharmony_cilower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir) 704bf215546Sopenharmony_ci{ 705bf215546Sopenharmony_ci /* 706bf215546Sopenharmony_ci * frtemp = frac(x); 707bf215546Sopenharmony_ci * result = sub(x, frtemp); 708bf215546Sopenharmony_ci */ 709bf215546Sopenharmony_ci ir->operation = ir_binop_sub; 710bf215546Sopenharmony_ci ir->init_num_operands(); 711bf215546Sopenharmony_ci ir->operands[1] = fract(ir->operands[0]->clone(ir, NULL)); 712bf215546Sopenharmony_ci 713bf215546Sopenharmony_ci this->progress = true; 714bf215546Sopenharmony_ci} 715bf215546Sopenharmony_civoid 716bf215546Sopenharmony_cilower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir) 717bf215546Sopenharmony_ci{ 718bf215546Sopenharmony_ci /* 719bf215546Sopenharmony_ci * insane but works 720bf215546Sopenharmony_ci * temp = x + 0.5; 721bf215546Sopenharmony_ci * frtemp = frac(temp); 722bf215546Sopenharmony_ci * t2 = sub(temp, frtemp); 723bf215546Sopenharmony_ci * if (frac(x) == 0.5) 724bf215546Sopenharmony_ci * result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1; 725bf215546Sopenharmony_ci * else 726bf215546Sopenharmony_ci * result = t2; 727bf215546Sopenharmony_ci 728bf215546Sopenharmony_ci */ 729bf215546Sopenharmony_ci ir_instruction &i = *base_ir; 730bf215546Sopenharmony_ci ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", 731bf215546Sopenharmony_ci ir_var_temporary); 732bf215546Sopenharmony_ci ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", 733bf215546Sopenharmony_ci ir_var_temporary); 734bf215546Sopenharmony_ci ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2", 735bf215546Sopenharmony_ci ir_var_temporary); 736bf215546Sopenharmony_ci ir_constant *p5 = new(ir) ir_constant(0.5, ir->operands[0]->type->vector_elements); 737bf215546Sopenharmony_ci ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements); 738bf215546Sopenharmony_ci ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements); 739bf215546Sopenharmony_ci 740bf215546Sopenharmony_ci i.insert_before(temp); 741bf215546Sopenharmony_ci i.insert_before(assign(temp, add(ir->operands[0], p5))); 742bf215546Sopenharmony_ci 743bf215546Sopenharmony_ci i.insert_before(frtemp); 744bf215546Sopenharmony_ci i.insert_before(assign(frtemp, fract(temp))); 745bf215546Sopenharmony_ci 746bf215546Sopenharmony_ci i.insert_before(t2); 747bf215546Sopenharmony_ci i.insert_before(assign(t2, sub(temp, frtemp))); 748bf215546Sopenharmony_ci 749bf215546Sopenharmony_ci ir->operation = ir_triop_csel; 750bf215546Sopenharmony_ci ir->init_num_operands(); 751bf215546Sopenharmony_ci ir->operands[0] = equal(fract(ir->operands[0]->clone(ir, NULL)), 752bf215546Sopenharmony_ci p5->clone(ir, NULL)); 753bf215546Sopenharmony_ci ir->operands[1] = csel(equal(fract(mul(t2, p5->clone(ir, NULL))), 754bf215546Sopenharmony_ci zero), 755bf215546Sopenharmony_ci t2, 756bf215546Sopenharmony_ci sub(t2, one)); 757bf215546Sopenharmony_ci ir->operands[2] = new(ir) ir_dereference_variable(t2); 758bf215546Sopenharmony_ci 759bf215546Sopenharmony_ci this->progress = true; 760bf215546Sopenharmony_ci} 761bf215546Sopenharmony_ci 762bf215546Sopenharmony_civoid 763bf215546Sopenharmony_cilower_instructions_visitor::dtrunc_to_dfrac(ir_expression *ir) 764bf215546Sopenharmony_ci{ 765bf215546Sopenharmony_ci /* 766bf215546Sopenharmony_ci * frtemp = frac(x); 767bf215546Sopenharmony_ci * temp = sub(x, frtemp); 768bf215546Sopenharmony_ci * result = x >= 0 ? temp : temp + (frtemp == 0.0) ? 0 : 1; 769bf215546Sopenharmony_ci */ 770bf215546Sopenharmony_ci ir_rvalue *arg = ir->operands[0]; 771bf215546Sopenharmony_ci ir_instruction &i = *base_ir; 772bf215546Sopenharmony_ci 773bf215546Sopenharmony_ci ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements); 774bf215546Sopenharmony_ci ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements); 775bf215546Sopenharmony_ci ir_variable *frtemp = new(ir) ir_variable(arg->type, "frtemp", 776bf215546Sopenharmony_ci ir_var_temporary); 777bf215546Sopenharmony_ci ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", 778bf215546Sopenharmony_ci ir_var_temporary); 779bf215546Sopenharmony_ci 780bf215546Sopenharmony_ci i.insert_before(frtemp); 781bf215546Sopenharmony_ci i.insert_before(assign(frtemp, fract(arg))); 782bf215546Sopenharmony_ci i.insert_before(temp); 783bf215546Sopenharmony_ci i.insert_before(assign(temp, sub(arg->clone(ir, NULL), frtemp))); 784bf215546Sopenharmony_ci 785bf215546Sopenharmony_ci ir->operation = ir_triop_csel; 786bf215546Sopenharmony_ci ir->init_num_operands(); 787bf215546Sopenharmony_ci ir->operands[0] = gequal(arg->clone(ir, NULL), zero); 788bf215546Sopenharmony_ci ir->operands[1] = new (ir) ir_dereference_variable(temp); 789bf215546Sopenharmony_ci ir->operands[2] = add(temp, 790bf215546Sopenharmony_ci csel(equal(frtemp, zero->clone(ir, NULL)), 791bf215546Sopenharmony_ci zero->clone(ir, NULL), 792bf215546Sopenharmony_ci one)); 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_ci this->progress = true; 795bf215546Sopenharmony_ci} 796bf215546Sopenharmony_ci 797bf215546Sopenharmony_civoid 798bf215546Sopenharmony_cilower_instructions_visitor::dsign_to_csel(ir_expression *ir) 799bf215546Sopenharmony_ci{ 800bf215546Sopenharmony_ci /* 801bf215546Sopenharmony_ci * temp = x > 0.0 ? 1.0 : 0.0; 802bf215546Sopenharmony_ci * result = x < 0.0 ? -1.0 : temp; 803bf215546Sopenharmony_ci */ 804bf215546Sopenharmony_ci ir_rvalue *arg = ir->operands[0]; 805bf215546Sopenharmony_ci ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements); 806bf215546Sopenharmony_ci ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements); 807bf215546Sopenharmony_ci ir_constant *neg_one = new(ir) ir_constant(-1.0, arg->type->vector_elements); 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_ci ir->operation = ir_triop_csel; 810bf215546Sopenharmony_ci ir->init_num_operands(); 811bf215546Sopenharmony_ci ir->operands[0] = less(arg->clone(ir, NULL), 812bf215546Sopenharmony_ci zero->clone(ir, NULL)); 813bf215546Sopenharmony_ci ir->operands[1] = neg_one; 814bf215546Sopenharmony_ci ir->operands[2] = csel(greater(arg, zero), 815bf215546Sopenharmony_ci one, 816bf215546Sopenharmony_ci zero->clone(ir, NULL)); 817bf215546Sopenharmony_ci 818bf215546Sopenharmony_ci this->progress = true; 819bf215546Sopenharmony_ci} 820bf215546Sopenharmony_ci 821bf215546Sopenharmony_civoid 822bf215546Sopenharmony_cilower_instructions_visitor::bit_count_to_math(ir_expression *ir) 823bf215546Sopenharmony_ci{ 824bf215546Sopenharmony_ci /* For more details, see: 825bf215546Sopenharmony_ci * 826bf215546Sopenharmony_ci * http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetPaallel 827bf215546Sopenharmony_ci */ 828bf215546Sopenharmony_ci const unsigned elements = ir->operands[0]->type->vector_elements; 829bf215546Sopenharmony_ci ir_variable *temp = new(ir) ir_variable(glsl_type::uvec(elements), "temp", 830bf215546Sopenharmony_ci ir_var_temporary); 831bf215546Sopenharmony_ci ir_constant *c55555555 = new(ir) ir_constant(0x55555555u); 832bf215546Sopenharmony_ci ir_constant *c33333333 = new(ir) ir_constant(0x33333333u); 833bf215546Sopenharmony_ci ir_constant *c0F0F0F0F = new(ir) ir_constant(0x0F0F0F0Fu); 834bf215546Sopenharmony_ci ir_constant *c01010101 = new(ir) ir_constant(0x01010101u); 835bf215546Sopenharmony_ci ir_constant *c1 = new(ir) ir_constant(1u); 836bf215546Sopenharmony_ci ir_constant *c2 = new(ir) ir_constant(2u); 837bf215546Sopenharmony_ci ir_constant *c4 = new(ir) ir_constant(4u); 838bf215546Sopenharmony_ci ir_constant *c24 = new(ir) ir_constant(24u); 839bf215546Sopenharmony_ci 840bf215546Sopenharmony_ci base_ir->insert_before(temp); 841bf215546Sopenharmony_ci 842bf215546Sopenharmony_ci if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) { 843bf215546Sopenharmony_ci base_ir->insert_before(assign(temp, ir->operands[0])); 844bf215546Sopenharmony_ci } else { 845bf215546Sopenharmony_ci assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); 846bf215546Sopenharmony_ci base_ir->insert_before(assign(temp, i2u(ir->operands[0]))); 847bf215546Sopenharmony_ci } 848bf215546Sopenharmony_ci 849bf215546Sopenharmony_ci /* temp = temp - ((temp >> 1) & 0x55555555u); */ 850bf215546Sopenharmony_ci base_ir->insert_before(assign(temp, sub(temp, bit_and(rshift(temp, c1), 851bf215546Sopenharmony_ci c55555555)))); 852bf215546Sopenharmony_ci 853bf215546Sopenharmony_ci /* temp = (temp & 0x33333333u) + ((temp >> 2) & 0x33333333u); */ 854bf215546Sopenharmony_ci base_ir->insert_before(assign(temp, add(bit_and(temp, c33333333), 855bf215546Sopenharmony_ci bit_and(rshift(temp, c2), 856bf215546Sopenharmony_ci c33333333->clone(ir, NULL))))); 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci /* int(((temp + (temp >> 4) & 0xF0F0F0Fu) * 0x1010101u) >> 24); */ 859bf215546Sopenharmony_ci ir->operation = ir_unop_u2i; 860bf215546Sopenharmony_ci ir->init_num_operands(); 861bf215546Sopenharmony_ci ir->operands[0] = rshift(mul(bit_and(add(temp, rshift(temp, c4)), c0F0F0F0F), 862bf215546Sopenharmony_ci c01010101), 863bf215546Sopenharmony_ci c24); 864bf215546Sopenharmony_ci 865bf215546Sopenharmony_ci this->progress = true; 866bf215546Sopenharmony_ci} 867bf215546Sopenharmony_ci 868bf215546Sopenharmony_civoid 869bf215546Sopenharmony_cilower_instructions_visitor::extract_to_shifts(ir_expression *ir) 870bf215546Sopenharmony_ci{ 871bf215546Sopenharmony_ci ir_variable *bits = 872bf215546Sopenharmony_ci new(ir) ir_variable(ir->operands[0]->type, "bits", ir_var_temporary); 873bf215546Sopenharmony_ci 874bf215546Sopenharmony_ci base_ir->insert_before(bits); 875bf215546Sopenharmony_ci base_ir->insert_before(assign(bits, ir->operands[2])); 876bf215546Sopenharmony_ci 877bf215546Sopenharmony_ci if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) { 878bf215546Sopenharmony_ci ir_constant *c1 = 879bf215546Sopenharmony_ci new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements); 880bf215546Sopenharmony_ci ir_constant *c32 = 881bf215546Sopenharmony_ci new(ir) ir_constant(32u, ir->operands[0]->type->vector_elements); 882bf215546Sopenharmony_ci ir_constant *cFFFFFFFF = 883bf215546Sopenharmony_ci new(ir) ir_constant(0xFFFFFFFFu, ir->operands[0]->type->vector_elements); 884bf215546Sopenharmony_ci 885bf215546Sopenharmony_ci /* At least some hardware treats (x << y) as (x << (y%32)). This means 886bf215546Sopenharmony_ci * we'd get a mask of 0 when bits is 32. Special case it. 887bf215546Sopenharmony_ci * 888bf215546Sopenharmony_ci * mask = bits == 32 ? 0xffffffff : (1u << bits) - 1u; 889bf215546Sopenharmony_ci */ 890bf215546Sopenharmony_ci ir_expression *mask = csel(equal(bits, c32), 891bf215546Sopenharmony_ci cFFFFFFFF, 892bf215546Sopenharmony_ci sub(lshift(c1, bits), c1->clone(ir, NULL))); 893bf215546Sopenharmony_ci 894bf215546Sopenharmony_ci /* Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: 895bf215546Sopenharmony_ci * 896bf215546Sopenharmony_ci * If bits is zero, the result will be zero. 897bf215546Sopenharmony_ci * 898bf215546Sopenharmony_ci * Since (1 << 0) - 1 == 0, we don't need to bother with the conditional 899bf215546Sopenharmony_ci * select as in the signed integer case. 900bf215546Sopenharmony_ci * 901bf215546Sopenharmony_ci * (value >> offset) & mask; 902bf215546Sopenharmony_ci */ 903bf215546Sopenharmony_ci ir->operation = ir_binop_bit_and; 904bf215546Sopenharmony_ci ir->init_num_operands(); 905bf215546Sopenharmony_ci ir->operands[0] = rshift(ir->operands[0], ir->operands[1]); 906bf215546Sopenharmony_ci ir->operands[1] = mask; 907bf215546Sopenharmony_ci ir->operands[2] = NULL; 908bf215546Sopenharmony_ci } else { 909bf215546Sopenharmony_ci ir_constant *c0 = 910bf215546Sopenharmony_ci new(ir) ir_constant(int(0), ir->operands[0]->type->vector_elements); 911bf215546Sopenharmony_ci ir_constant *c32 = 912bf215546Sopenharmony_ci new(ir) ir_constant(int(32), ir->operands[0]->type->vector_elements); 913bf215546Sopenharmony_ci ir_variable *temp = 914bf215546Sopenharmony_ci new(ir) ir_variable(ir->operands[0]->type, "temp", ir_var_temporary); 915bf215546Sopenharmony_ci 916bf215546Sopenharmony_ci /* temp = 32 - bits; */ 917bf215546Sopenharmony_ci base_ir->insert_before(temp); 918bf215546Sopenharmony_ci base_ir->insert_before(assign(temp, sub(c32, bits))); 919bf215546Sopenharmony_ci 920bf215546Sopenharmony_ci /* expr = value << (temp - offset)) >> temp; */ 921bf215546Sopenharmony_ci ir_expression *expr = 922bf215546Sopenharmony_ci rshift(lshift(ir->operands[0], sub(temp, ir->operands[1])), temp); 923bf215546Sopenharmony_ci 924bf215546Sopenharmony_ci /* Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: 925bf215546Sopenharmony_ci * 926bf215546Sopenharmony_ci * If bits is zero, the result will be zero. 927bf215546Sopenharmony_ci * 928bf215546Sopenharmony_ci * Due to the (x << (y%32)) behavior mentioned before, the (value << 929bf215546Sopenharmony_ci * (32-0)) doesn't "erase" all of the data as we would like, so finish 930bf215546Sopenharmony_ci * up with: 931bf215546Sopenharmony_ci * 932bf215546Sopenharmony_ci * (bits == 0) ? 0 : e; 933bf215546Sopenharmony_ci */ 934bf215546Sopenharmony_ci ir->operation = ir_triop_csel; 935bf215546Sopenharmony_ci ir->init_num_operands(); 936bf215546Sopenharmony_ci ir->operands[0] = equal(c0, bits); 937bf215546Sopenharmony_ci ir->operands[1] = c0->clone(ir, NULL); 938bf215546Sopenharmony_ci ir->operands[2] = expr; 939bf215546Sopenharmony_ci } 940bf215546Sopenharmony_ci 941bf215546Sopenharmony_ci this->progress = true; 942bf215546Sopenharmony_ci} 943bf215546Sopenharmony_ci 944bf215546Sopenharmony_civoid 945bf215546Sopenharmony_cilower_instructions_visitor::insert_to_shifts(ir_expression *ir) 946bf215546Sopenharmony_ci{ 947bf215546Sopenharmony_ci ir_constant *c1; 948bf215546Sopenharmony_ci ir_constant *c32; 949bf215546Sopenharmony_ci ir_constant *cFFFFFFFF; 950bf215546Sopenharmony_ci ir_variable *offset = 951bf215546Sopenharmony_ci new(ir) ir_variable(ir->operands[0]->type, "offset", ir_var_temporary); 952bf215546Sopenharmony_ci ir_variable *bits = 953bf215546Sopenharmony_ci new(ir) ir_variable(ir->operands[0]->type, "bits", ir_var_temporary); 954bf215546Sopenharmony_ci ir_variable *mask = 955bf215546Sopenharmony_ci new(ir) ir_variable(ir->operands[0]->type, "mask", ir_var_temporary); 956bf215546Sopenharmony_ci 957bf215546Sopenharmony_ci if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) { 958bf215546Sopenharmony_ci c1 = new(ir) ir_constant(int(1), ir->operands[0]->type->vector_elements); 959bf215546Sopenharmony_ci c32 = new(ir) ir_constant(int(32), ir->operands[0]->type->vector_elements); 960bf215546Sopenharmony_ci cFFFFFFFF = new(ir) ir_constant(int(0xFFFFFFFF), ir->operands[0]->type->vector_elements); 961bf215546Sopenharmony_ci } else { 962bf215546Sopenharmony_ci assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); 963bf215546Sopenharmony_ci 964bf215546Sopenharmony_ci c1 = new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements); 965bf215546Sopenharmony_ci c32 = new(ir) ir_constant(32u, ir->operands[0]->type->vector_elements); 966bf215546Sopenharmony_ci cFFFFFFFF = new(ir) ir_constant(0xFFFFFFFFu, ir->operands[0]->type->vector_elements); 967bf215546Sopenharmony_ci } 968bf215546Sopenharmony_ci 969bf215546Sopenharmony_ci base_ir->insert_before(offset); 970bf215546Sopenharmony_ci base_ir->insert_before(assign(offset, ir->operands[2])); 971bf215546Sopenharmony_ci 972bf215546Sopenharmony_ci base_ir->insert_before(bits); 973bf215546Sopenharmony_ci base_ir->insert_before(assign(bits, ir->operands[3])); 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_ci /* At least some hardware treats (x << y) as (x << (y%32)). This means 976bf215546Sopenharmony_ci * we'd get a mask of 0 when bits is 32. Special case it. 977bf215546Sopenharmony_ci * 978bf215546Sopenharmony_ci * mask = (bits == 32 ? 0xffffffff : (1u << bits) - 1u) << offset; 979bf215546Sopenharmony_ci * 980bf215546Sopenharmony_ci * Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: 981bf215546Sopenharmony_ci * 982bf215546Sopenharmony_ci * The result will be undefined if offset or bits is negative, or if the 983bf215546Sopenharmony_ci * sum of offset and bits is greater than the number of bits used to 984bf215546Sopenharmony_ci * store the operand. 985bf215546Sopenharmony_ci * 986bf215546Sopenharmony_ci * Since it's undefined, there are a couple other ways this could be 987bf215546Sopenharmony_ci * implemented. The other way that was considered was to put the csel 988bf215546Sopenharmony_ci * around the whole thing: 989bf215546Sopenharmony_ci * 990bf215546Sopenharmony_ci * final_result = bits == 32 ? insert : ... ; 991bf215546Sopenharmony_ci */ 992bf215546Sopenharmony_ci base_ir->insert_before(mask); 993bf215546Sopenharmony_ci 994bf215546Sopenharmony_ci base_ir->insert_before(assign(mask, csel(equal(bits, c32), 995bf215546Sopenharmony_ci cFFFFFFFF, 996bf215546Sopenharmony_ci lshift(sub(lshift(c1, bits), 997bf215546Sopenharmony_ci c1->clone(ir, NULL)), 998bf215546Sopenharmony_ci offset)))); 999bf215546Sopenharmony_ci 1000bf215546Sopenharmony_ci /* (base & ~mask) | ((insert << offset) & mask) */ 1001bf215546Sopenharmony_ci ir->operation = ir_binop_bit_or; 1002bf215546Sopenharmony_ci ir->init_num_operands(); 1003bf215546Sopenharmony_ci ir->operands[0] = bit_and(ir->operands[0], bit_not(mask)); 1004bf215546Sopenharmony_ci ir->operands[1] = bit_and(lshift(ir->operands[1], offset), mask); 1005bf215546Sopenharmony_ci ir->operands[2] = NULL; 1006bf215546Sopenharmony_ci ir->operands[3] = NULL; 1007bf215546Sopenharmony_ci 1008bf215546Sopenharmony_ci this->progress = true; 1009bf215546Sopenharmony_ci} 1010bf215546Sopenharmony_ci 1011bf215546Sopenharmony_civoid 1012bf215546Sopenharmony_cilower_instructions_visitor::reverse_to_shifts(ir_expression *ir) 1013bf215546Sopenharmony_ci{ 1014bf215546Sopenharmony_ci /* For more details, see: 1015bf215546Sopenharmony_ci * 1016bf215546Sopenharmony_ci * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel 1017bf215546Sopenharmony_ci */ 1018bf215546Sopenharmony_ci ir_constant *c1 = 1019bf215546Sopenharmony_ci new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements); 1020bf215546Sopenharmony_ci ir_constant *c2 = 1021bf215546Sopenharmony_ci new(ir) ir_constant(2u, ir->operands[0]->type->vector_elements); 1022bf215546Sopenharmony_ci ir_constant *c4 = 1023bf215546Sopenharmony_ci new(ir) ir_constant(4u, ir->operands[0]->type->vector_elements); 1024bf215546Sopenharmony_ci ir_constant *c8 = 1025bf215546Sopenharmony_ci new(ir) ir_constant(8u, ir->operands[0]->type->vector_elements); 1026bf215546Sopenharmony_ci ir_constant *c16 = 1027bf215546Sopenharmony_ci new(ir) ir_constant(16u, ir->operands[0]->type->vector_elements); 1028bf215546Sopenharmony_ci ir_constant *c33333333 = 1029bf215546Sopenharmony_ci new(ir) ir_constant(0x33333333u, ir->operands[0]->type->vector_elements); 1030bf215546Sopenharmony_ci ir_constant *c55555555 = 1031bf215546Sopenharmony_ci new(ir) ir_constant(0x55555555u, ir->operands[0]->type->vector_elements); 1032bf215546Sopenharmony_ci ir_constant *c0F0F0F0F = 1033bf215546Sopenharmony_ci new(ir) ir_constant(0x0F0F0F0Fu, ir->operands[0]->type->vector_elements); 1034bf215546Sopenharmony_ci ir_constant *c00FF00FF = 1035bf215546Sopenharmony_ci new(ir) ir_constant(0x00FF00FFu, ir->operands[0]->type->vector_elements); 1036bf215546Sopenharmony_ci ir_variable *temp = 1037bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uvec(ir->operands[0]->type->vector_elements), 1038bf215546Sopenharmony_ci "temp", ir_var_temporary); 1039bf215546Sopenharmony_ci ir_instruction &i = *base_ir; 1040bf215546Sopenharmony_ci 1041bf215546Sopenharmony_ci i.insert_before(temp); 1042bf215546Sopenharmony_ci 1043bf215546Sopenharmony_ci if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) { 1044bf215546Sopenharmony_ci i.insert_before(assign(temp, ir->operands[0])); 1045bf215546Sopenharmony_ci } else { 1046bf215546Sopenharmony_ci assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); 1047bf215546Sopenharmony_ci i.insert_before(assign(temp, i2u(ir->operands[0]))); 1048bf215546Sopenharmony_ci } 1049bf215546Sopenharmony_ci 1050bf215546Sopenharmony_ci /* Swap odd and even bits. 1051bf215546Sopenharmony_ci * 1052bf215546Sopenharmony_ci * temp = ((temp >> 1) & 0x55555555u) | ((temp & 0x55555555u) << 1); 1053bf215546Sopenharmony_ci */ 1054bf215546Sopenharmony_ci i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c1), c55555555), 1055bf215546Sopenharmony_ci lshift(bit_and(temp, c55555555->clone(ir, NULL)), 1056bf215546Sopenharmony_ci c1->clone(ir, NULL))))); 1057bf215546Sopenharmony_ci /* Swap consecutive pairs. 1058bf215546Sopenharmony_ci * 1059bf215546Sopenharmony_ci * temp = ((temp >> 2) & 0x33333333u) | ((temp & 0x33333333u) << 2); 1060bf215546Sopenharmony_ci */ 1061bf215546Sopenharmony_ci i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c2), c33333333), 1062bf215546Sopenharmony_ci lshift(bit_and(temp, c33333333->clone(ir, NULL)), 1063bf215546Sopenharmony_ci c2->clone(ir, NULL))))); 1064bf215546Sopenharmony_ci 1065bf215546Sopenharmony_ci /* Swap nibbles. 1066bf215546Sopenharmony_ci * 1067bf215546Sopenharmony_ci * temp = ((temp >> 4) & 0x0F0F0F0Fu) | ((temp & 0x0F0F0F0Fu) << 4); 1068bf215546Sopenharmony_ci */ 1069bf215546Sopenharmony_ci i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c4), c0F0F0F0F), 1070bf215546Sopenharmony_ci lshift(bit_and(temp, c0F0F0F0F->clone(ir, NULL)), 1071bf215546Sopenharmony_ci c4->clone(ir, NULL))))); 1072bf215546Sopenharmony_ci 1073bf215546Sopenharmony_ci /* The last step is, basically, bswap. Swap the bytes, then swap the 1074bf215546Sopenharmony_ci * words. When this code is run through GCC on x86, it does generate a 1075bf215546Sopenharmony_ci * bswap instruction. 1076bf215546Sopenharmony_ci * 1077bf215546Sopenharmony_ci * temp = ((temp >> 8) & 0x00FF00FFu) | ((temp & 0x00FF00FFu) << 8); 1078bf215546Sopenharmony_ci * temp = ( temp >> 16 ) | ( temp << 16); 1079bf215546Sopenharmony_ci */ 1080bf215546Sopenharmony_ci i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c8), c00FF00FF), 1081bf215546Sopenharmony_ci lshift(bit_and(temp, c00FF00FF->clone(ir, NULL)), 1082bf215546Sopenharmony_ci c8->clone(ir, NULL))))); 1083bf215546Sopenharmony_ci 1084bf215546Sopenharmony_ci if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) { 1085bf215546Sopenharmony_ci ir->operation = ir_binop_bit_or; 1086bf215546Sopenharmony_ci ir->init_num_operands(); 1087bf215546Sopenharmony_ci ir->operands[0] = rshift(temp, c16); 1088bf215546Sopenharmony_ci ir->operands[1] = lshift(temp, c16->clone(ir, NULL)); 1089bf215546Sopenharmony_ci } else { 1090bf215546Sopenharmony_ci ir->operation = ir_unop_u2i; 1091bf215546Sopenharmony_ci ir->init_num_operands(); 1092bf215546Sopenharmony_ci ir->operands[0] = bit_or(rshift(temp, c16), 1093bf215546Sopenharmony_ci lshift(temp, c16->clone(ir, NULL))); 1094bf215546Sopenharmony_ci } 1095bf215546Sopenharmony_ci 1096bf215546Sopenharmony_ci this->progress = true; 1097bf215546Sopenharmony_ci} 1098bf215546Sopenharmony_ci 1099bf215546Sopenharmony_civoid 1100bf215546Sopenharmony_cilower_instructions_visitor::find_lsb_to_float_cast(ir_expression *ir) 1101bf215546Sopenharmony_ci{ 1102bf215546Sopenharmony_ci /* For more details, see: 1103bf215546Sopenharmony_ci * 1104bf215546Sopenharmony_ci * http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightFloatCast 1105bf215546Sopenharmony_ci */ 1106bf215546Sopenharmony_ci const unsigned elements = ir->operands[0]->type->vector_elements; 1107bf215546Sopenharmony_ci ir_constant *c0 = new(ir) ir_constant(unsigned(0), elements); 1108bf215546Sopenharmony_ci ir_constant *cminus1 = new(ir) ir_constant(int(-1), elements); 1109bf215546Sopenharmony_ci ir_constant *c23 = new(ir) ir_constant(int(23), elements); 1110bf215546Sopenharmony_ci ir_constant *c7F = new(ir) ir_constant(int(0x7F), elements); 1111bf215546Sopenharmony_ci ir_variable *temp = 1112bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::ivec(elements), "temp", ir_var_temporary); 1113bf215546Sopenharmony_ci ir_variable *lsb_only = 1114bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uvec(elements), "lsb_only", ir_var_temporary); 1115bf215546Sopenharmony_ci ir_variable *as_float = 1116bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::vec(elements), "as_float", ir_var_temporary); 1117bf215546Sopenharmony_ci ir_variable *lsb = 1118bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::ivec(elements), "lsb", ir_var_temporary); 1119bf215546Sopenharmony_ci 1120bf215546Sopenharmony_ci ir_instruction &i = *base_ir; 1121bf215546Sopenharmony_ci 1122bf215546Sopenharmony_ci i.insert_before(temp); 1123bf215546Sopenharmony_ci 1124bf215546Sopenharmony_ci if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) { 1125bf215546Sopenharmony_ci i.insert_before(assign(temp, ir->operands[0])); 1126bf215546Sopenharmony_ci } else { 1127bf215546Sopenharmony_ci assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); 1128bf215546Sopenharmony_ci i.insert_before(assign(temp, u2i(ir->operands[0]))); 1129bf215546Sopenharmony_ci } 1130bf215546Sopenharmony_ci 1131bf215546Sopenharmony_ci /* The int-to-float conversion is lossless because (value & -value) is 1132bf215546Sopenharmony_ci * either a power of two or zero. We don't use the result in the zero 1133bf215546Sopenharmony_ci * case. The uint() cast is necessary so that 0x80000000 does not 1134bf215546Sopenharmony_ci * generate a negative value. 1135bf215546Sopenharmony_ci * 1136bf215546Sopenharmony_ci * uint lsb_only = uint(value & -value); 1137bf215546Sopenharmony_ci * float as_float = float(lsb_only); 1138bf215546Sopenharmony_ci */ 1139bf215546Sopenharmony_ci i.insert_before(lsb_only); 1140bf215546Sopenharmony_ci i.insert_before(assign(lsb_only, i2u(bit_and(temp, neg(temp))))); 1141bf215546Sopenharmony_ci 1142bf215546Sopenharmony_ci i.insert_before(as_float); 1143bf215546Sopenharmony_ci i.insert_before(assign(as_float, u2f(lsb_only))); 1144bf215546Sopenharmony_ci 1145bf215546Sopenharmony_ci /* This is basically an open-coded frexp. Implementations that have a 1146bf215546Sopenharmony_ci * native frexp instruction would be better served by that. This is 1147bf215546Sopenharmony_ci * optimized versus a full-featured open-coded implementation in two ways: 1148bf215546Sopenharmony_ci * 1149bf215546Sopenharmony_ci * - We don't care about a correct result from subnormal numbers (including 1150bf215546Sopenharmony_ci * 0.0), so the raw exponent can always be safely unbiased. 1151bf215546Sopenharmony_ci * 1152bf215546Sopenharmony_ci * - The value cannot be negative, so it does not need to be masked off to 1153bf215546Sopenharmony_ci * extract the exponent. 1154bf215546Sopenharmony_ci * 1155bf215546Sopenharmony_ci * int lsb = (floatBitsToInt(as_float) >> 23) - 0x7f; 1156bf215546Sopenharmony_ci */ 1157bf215546Sopenharmony_ci i.insert_before(lsb); 1158bf215546Sopenharmony_ci i.insert_before(assign(lsb, sub(rshift(bitcast_f2i(as_float), c23), c7F))); 1159bf215546Sopenharmony_ci 1160bf215546Sopenharmony_ci /* Use lsb_only in the comparison instead of temp so that the & (far above) 1161bf215546Sopenharmony_ci * can possibly generate the result without an explicit comparison. 1162bf215546Sopenharmony_ci * 1163bf215546Sopenharmony_ci * (lsb_only == 0) ? -1 : lsb; 1164bf215546Sopenharmony_ci * 1165bf215546Sopenharmony_ci * Since our input values are all integers, the unbiased exponent must not 1166bf215546Sopenharmony_ci * be negative. It will only be negative (-0x7f, in fact) if lsb_only is 1167bf215546Sopenharmony_ci * 0. Instead of using (lsb_only == 0), we could use (lsb >= 0). Which is 1168bf215546Sopenharmony_ci * better is likely GPU dependent. Either way, the difference should be 1169bf215546Sopenharmony_ci * small. 1170bf215546Sopenharmony_ci */ 1171bf215546Sopenharmony_ci ir->operation = ir_triop_csel; 1172bf215546Sopenharmony_ci ir->init_num_operands(); 1173bf215546Sopenharmony_ci ir->operands[0] = equal(lsb_only, c0); 1174bf215546Sopenharmony_ci ir->operands[1] = cminus1; 1175bf215546Sopenharmony_ci ir->operands[2] = new(ir) ir_dereference_variable(lsb); 1176bf215546Sopenharmony_ci 1177bf215546Sopenharmony_ci this->progress = true; 1178bf215546Sopenharmony_ci} 1179bf215546Sopenharmony_ci 1180bf215546Sopenharmony_civoid 1181bf215546Sopenharmony_cilower_instructions_visitor::find_msb_to_float_cast(ir_expression *ir) 1182bf215546Sopenharmony_ci{ 1183bf215546Sopenharmony_ci /* For more details, see: 1184bf215546Sopenharmony_ci * 1185bf215546Sopenharmony_ci * http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightFloatCast 1186bf215546Sopenharmony_ci */ 1187bf215546Sopenharmony_ci const unsigned elements = ir->operands[0]->type->vector_elements; 1188bf215546Sopenharmony_ci ir_constant *c0 = new(ir) ir_constant(int(0), elements); 1189bf215546Sopenharmony_ci ir_constant *cminus1 = new(ir) ir_constant(int(-1), elements); 1190bf215546Sopenharmony_ci ir_constant *c23 = new(ir) ir_constant(int(23), elements); 1191bf215546Sopenharmony_ci ir_constant *c7F = new(ir) ir_constant(int(0x7F), elements); 1192bf215546Sopenharmony_ci ir_constant *c000000FF = new(ir) ir_constant(0x000000FFu, elements); 1193bf215546Sopenharmony_ci ir_constant *cFFFFFF00 = new(ir) ir_constant(0xFFFFFF00u, elements); 1194bf215546Sopenharmony_ci ir_variable *temp = 1195bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uvec(elements), "temp", ir_var_temporary); 1196bf215546Sopenharmony_ci ir_variable *as_float = 1197bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::vec(elements), "as_float", ir_var_temporary); 1198bf215546Sopenharmony_ci ir_variable *msb = 1199bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::ivec(elements), "msb", ir_var_temporary); 1200bf215546Sopenharmony_ci 1201bf215546Sopenharmony_ci ir_instruction &i = *base_ir; 1202bf215546Sopenharmony_ci 1203bf215546Sopenharmony_ci i.insert_before(temp); 1204bf215546Sopenharmony_ci 1205bf215546Sopenharmony_ci if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) { 1206bf215546Sopenharmony_ci i.insert_before(assign(temp, ir->operands[0])); 1207bf215546Sopenharmony_ci } else { 1208bf215546Sopenharmony_ci assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); 1209bf215546Sopenharmony_ci 1210bf215546Sopenharmony_ci /* findMSB(uint(abs(some_int))) almost always does the right thing. 1211bf215546Sopenharmony_ci * There are two problem values: 1212bf215546Sopenharmony_ci * 1213bf215546Sopenharmony_ci * * 0x80000000. Since abs(0x80000000) == 0x80000000, findMSB returns 1214bf215546Sopenharmony_ci * 31. However, findMSB(int(0x80000000)) == 30. 1215bf215546Sopenharmony_ci * 1216bf215546Sopenharmony_ci * * 0xffffffff. Since abs(0xffffffff) == 1, findMSB returns 1217bf215546Sopenharmony_ci * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: 1218bf215546Sopenharmony_ci * 1219bf215546Sopenharmony_ci * For a value of zero or negative one, -1 will be returned. 1220bf215546Sopenharmony_ci * 1221bf215546Sopenharmony_ci * For all negative number cases, including 0x80000000 and 0xffffffff, 1222bf215546Sopenharmony_ci * the correct value is obtained from findMSB if instead of negating the 1223bf215546Sopenharmony_ci * (already negative) value the logical-not is used. A conditonal 1224bf215546Sopenharmony_ci * logical-not can be achieved in two instructions. 1225bf215546Sopenharmony_ci */ 1226bf215546Sopenharmony_ci ir_variable *as_int = 1227bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::ivec(elements), "as_int", ir_var_temporary); 1228bf215546Sopenharmony_ci ir_constant *c31 = new(ir) ir_constant(int(31), elements); 1229bf215546Sopenharmony_ci 1230bf215546Sopenharmony_ci i.insert_before(as_int); 1231bf215546Sopenharmony_ci i.insert_before(assign(as_int, ir->operands[0])); 1232bf215546Sopenharmony_ci i.insert_before(assign(temp, i2u(expr(ir_binop_bit_xor, 1233bf215546Sopenharmony_ci as_int, 1234bf215546Sopenharmony_ci rshift(as_int, c31))))); 1235bf215546Sopenharmony_ci } 1236bf215546Sopenharmony_ci 1237bf215546Sopenharmony_ci /* The int-to-float conversion is lossless because bits are conditionally 1238bf215546Sopenharmony_ci * masked off the bottom of temp to ensure the value has at most 24 bits of 1239bf215546Sopenharmony_ci * data or is zero. We don't use the result in the zero case. The uint() 1240bf215546Sopenharmony_ci * cast is necessary so that 0x80000000 does not generate a negative value. 1241bf215546Sopenharmony_ci * 1242bf215546Sopenharmony_ci * float as_float = float(temp > 255 ? temp & ~255 : temp); 1243bf215546Sopenharmony_ci */ 1244bf215546Sopenharmony_ci i.insert_before(as_float); 1245bf215546Sopenharmony_ci i.insert_before(assign(as_float, u2f(csel(greater(temp, c000000FF), 1246bf215546Sopenharmony_ci bit_and(temp, cFFFFFF00), 1247bf215546Sopenharmony_ci temp)))); 1248bf215546Sopenharmony_ci 1249bf215546Sopenharmony_ci /* This is basically an open-coded frexp. Implementations that have a 1250bf215546Sopenharmony_ci * native frexp instruction would be better served by that. This is 1251bf215546Sopenharmony_ci * optimized versus a full-featured open-coded implementation in two ways: 1252bf215546Sopenharmony_ci * 1253bf215546Sopenharmony_ci * - We don't care about a correct result from subnormal numbers (including 1254bf215546Sopenharmony_ci * 0.0), so the raw exponent can always be safely unbiased. 1255bf215546Sopenharmony_ci * 1256bf215546Sopenharmony_ci * - The value cannot be negative, so it does not need to be masked off to 1257bf215546Sopenharmony_ci * extract the exponent. 1258bf215546Sopenharmony_ci * 1259bf215546Sopenharmony_ci * int msb = (floatBitsToInt(as_float) >> 23) - 0x7f; 1260bf215546Sopenharmony_ci */ 1261bf215546Sopenharmony_ci i.insert_before(msb); 1262bf215546Sopenharmony_ci i.insert_before(assign(msb, sub(rshift(bitcast_f2i(as_float), c23), c7F))); 1263bf215546Sopenharmony_ci 1264bf215546Sopenharmony_ci /* Use msb in the comparison instead of temp so that the subtract can 1265bf215546Sopenharmony_ci * possibly generate the result without an explicit comparison. 1266bf215546Sopenharmony_ci * 1267bf215546Sopenharmony_ci * (msb < 0) ? -1 : msb; 1268bf215546Sopenharmony_ci * 1269bf215546Sopenharmony_ci * Since our input values are all integers, the unbiased exponent must not 1270bf215546Sopenharmony_ci * be negative. It will only be negative (-0x7f, in fact) if temp is 0. 1271bf215546Sopenharmony_ci */ 1272bf215546Sopenharmony_ci ir->operation = ir_triop_csel; 1273bf215546Sopenharmony_ci ir->init_num_operands(); 1274bf215546Sopenharmony_ci ir->operands[0] = less(msb, c0); 1275bf215546Sopenharmony_ci ir->operands[1] = cminus1; 1276bf215546Sopenharmony_ci ir->operands[2] = new(ir) ir_dereference_variable(msb); 1277bf215546Sopenharmony_ci 1278bf215546Sopenharmony_ci this->progress = true; 1279bf215546Sopenharmony_ci} 1280bf215546Sopenharmony_ci 1281bf215546Sopenharmony_ciir_expression * 1282bf215546Sopenharmony_cilower_instructions_visitor::_carry(operand a, operand b) 1283bf215546Sopenharmony_ci{ 1284bf215546Sopenharmony_ci if (lowering(CARRY_TO_ARITH)) 1285bf215546Sopenharmony_ci return i2u(b2i(less(add(a, b), 1286bf215546Sopenharmony_ci a.val->clone(ralloc_parent(a.val), NULL)))); 1287bf215546Sopenharmony_ci else 1288bf215546Sopenharmony_ci return carry(a, b); 1289bf215546Sopenharmony_ci} 1290bf215546Sopenharmony_ci 1291bf215546Sopenharmony_civoid 1292bf215546Sopenharmony_cilower_instructions_visitor::imul_high_to_mul(ir_expression *ir) 1293bf215546Sopenharmony_ci{ 1294bf215546Sopenharmony_ci /* ABCD 1295bf215546Sopenharmony_ci * * EFGH 1296bf215546Sopenharmony_ci * ====== 1297bf215546Sopenharmony_ci * (GH * CD) + (GH * AB) << 16 + (EF * CD) << 16 + (EF * AB) << 32 1298bf215546Sopenharmony_ci * 1299bf215546Sopenharmony_ci * In GLSL, (a * b) becomes 1300bf215546Sopenharmony_ci * 1301bf215546Sopenharmony_ci * uint m1 = (a & 0x0000ffffu) * (b & 0x0000ffffu); 1302bf215546Sopenharmony_ci * uint m2 = (a & 0x0000ffffu) * (b >> 16); 1303bf215546Sopenharmony_ci * uint m3 = (a >> 16) * (b & 0x0000ffffu); 1304bf215546Sopenharmony_ci * uint m4 = (a >> 16) * (b >> 16); 1305bf215546Sopenharmony_ci * 1306bf215546Sopenharmony_ci * uint c1; 1307bf215546Sopenharmony_ci * uint c2; 1308bf215546Sopenharmony_ci * uint lo_result; 1309bf215546Sopenharmony_ci * uint hi_result; 1310bf215546Sopenharmony_ci * 1311bf215546Sopenharmony_ci * lo_result = uaddCarry(m1, m2 << 16, c1); 1312bf215546Sopenharmony_ci * hi_result = m4 + c1; 1313bf215546Sopenharmony_ci * lo_result = uaddCarry(lo_result, m3 << 16, c2); 1314bf215546Sopenharmony_ci * hi_result = hi_result + c2; 1315bf215546Sopenharmony_ci * hi_result = hi_result + (m2 >> 16) + (m3 >> 16); 1316bf215546Sopenharmony_ci */ 1317bf215546Sopenharmony_ci const unsigned elements = ir->operands[0]->type->vector_elements; 1318bf215546Sopenharmony_ci ir_variable *src1 = 1319bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uvec(elements), "src1", ir_var_temporary); 1320bf215546Sopenharmony_ci ir_variable *src1h = 1321bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uvec(elements), "src1h", ir_var_temporary); 1322bf215546Sopenharmony_ci ir_variable *src1l = 1323bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uvec(elements), "src1l", ir_var_temporary); 1324bf215546Sopenharmony_ci ir_variable *src2 = 1325bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uvec(elements), "src2", ir_var_temporary); 1326bf215546Sopenharmony_ci ir_variable *src2h = 1327bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uvec(elements), "src2h", ir_var_temporary); 1328bf215546Sopenharmony_ci ir_variable *src2l = 1329bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uvec(elements), "src2l", ir_var_temporary); 1330bf215546Sopenharmony_ci ir_variable *t1 = 1331bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uvec(elements), "t1", ir_var_temporary); 1332bf215546Sopenharmony_ci ir_variable *t2 = 1333bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uvec(elements), "t2", ir_var_temporary); 1334bf215546Sopenharmony_ci ir_variable *lo = 1335bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uvec(elements), "lo", ir_var_temporary); 1336bf215546Sopenharmony_ci ir_variable *hi = 1337bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::uvec(elements), "hi", ir_var_temporary); 1338bf215546Sopenharmony_ci ir_variable *different_signs = NULL; 1339bf215546Sopenharmony_ci ir_constant *c0000FFFF = new(ir) ir_constant(0x0000FFFFu, elements); 1340bf215546Sopenharmony_ci ir_constant *c16 = new(ir) ir_constant(16u, elements); 1341bf215546Sopenharmony_ci 1342bf215546Sopenharmony_ci ir_instruction &i = *base_ir; 1343bf215546Sopenharmony_ci 1344bf215546Sopenharmony_ci i.insert_before(src1); 1345bf215546Sopenharmony_ci i.insert_before(src2); 1346bf215546Sopenharmony_ci i.insert_before(src1h); 1347bf215546Sopenharmony_ci i.insert_before(src2h); 1348bf215546Sopenharmony_ci i.insert_before(src1l); 1349bf215546Sopenharmony_ci i.insert_before(src2l); 1350bf215546Sopenharmony_ci 1351bf215546Sopenharmony_ci if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) { 1352bf215546Sopenharmony_ci i.insert_before(assign(src1, ir->operands[0])); 1353bf215546Sopenharmony_ci i.insert_before(assign(src2, ir->operands[1])); 1354bf215546Sopenharmony_ci } else { 1355bf215546Sopenharmony_ci assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); 1356bf215546Sopenharmony_ci 1357bf215546Sopenharmony_ci ir_variable *itmp1 = 1358bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::ivec(elements), "itmp1", ir_var_temporary); 1359bf215546Sopenharmony_ci ir_variable *itmp2 = 1360bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::ivec(elements), "itmp2", ir_var_temporary); 1361bf215546Sopenharmony_ci ir_constant *c0 = new(ir) ir_constant(int(0), elements); 1362bf215546Sopenharmony_ci 1363bf215546Sopenharmony_ci i.insert_before(itmp1); 1364bf215546Sopenharmony_ci i.insert_before(itmp2); 1365bf215546Sopenharmony_ci i.insert_before(assign(itmp1, ir->operands[0])); 1366bf215546Sopenharmony_ci i.insert_before(assign(itmp2, ir->operands[1])); 1367bf215546Sopenharmony_ci 1368bf215546Sopenharmony_ci different_signs = 1369bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::bvec(elements), "different_signs", 1370bf215546Sopenharmony_ci ir_var_temporary); 1371bf215546Sopenharmony_ci 1372bf215546Sopenharmony_ci i.insert_before(different_signs); 1373bf215546Sopenharmony_ci i.insert_before(assign(different_signs, expr(ir_binop_logic_xor, 1374bf215546Sopenharmony_ci less(itmp1, c0), 1375bf215546Sopenharmony_ci less(itmp2, c0->clone(ir, NULL))))); 1376bf215546Sopenharmony_ci 1377bf215546Sopenharmony_ci i.insert_before(assign(src1, i2u(abs(itmp1)))); 1378bf215546Sopenharmony_ci i.insert_before(assign(src2, i2u(abs(itmp2)))); 1379bf215546Sopenharmony_ci } 1380bf215546Sopenharmony_ci 1381bf215546Sopenharmony_ci i.insert_before(assign(src1l, bit_and(src1, c0000FFFF))); 1382bf215546Sopenharmony_ci i.insert_before(assign(src2l, bit_and(src2, c0000FFFF->clone(ir, NULL)))); 1383bf215546Sopenharmony_ci i.insert_before(assign(src1h, rshift(src1, c16))); 1384bf215546Sopenharmony_ci i.insert_before(assign(src2h, rshift(src2, c16->clone(ir, NULL)))); 1385bf215546Sopenharmony_ci 1386bf215546Sopenharmony_ci i.insert_before(lo); 1387bf215546Sopenharmony_ci i.insert_before(hi); 1388bf215546Sopenharmony_ci i.insert_before(t1); 1389bf215546Sopenharmony_ci i.insert_before(t2); 1390bf215546Sopenharmony_ci 1391bf215546Sopenharmony_ci i.insert_before(assign(lo, mul(src1l, src2l))); 1392bf215546Sopenharmony_ci i.insert_before(assign(t1, mul(src1l, src2h))); 1393bf215546Sopenharmony_ci i.insert_before(assign(t2, mul(src1h, src2l))); 1394bf215546Sopenharmony_ci i.insert_before(assign(hi, mul(src1h, src2h))); 1395bf215546Sopenharmony_ci 1396bf215546Sopenharmony_ci i.insert_before(assign(hi, add(hi, _carry(lo, lshift(t1, c16->clone(ir, NULL)))))); 1397bf215546Sopenharmony_ci i.insert_before(assign(lo, add(lo, lshift(t1, c16->clone(ir, NULL))))); 1398bf215546Sopenharmony_ci 1399bf215546Sopenharmony_ci i.insert_before(assign(hi, add(hi, _carry(lo, lshift(t2, c16->clone(ir, NULL)))))); 1400bf215546Sopenharmony_ci i.insert_before(assign(lo, add(lo, lshift(t2, c16->clone(ir, NULL))))); 1401bf215546Sopenharmony_ci 1402bf215546Sopenharmony_ci if (different_signs == NULL) { 1403bf215546Sopenharmony_ci assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); 1404bf215546Sopenharmony_ci 1405bf215546Sopenharmony_ci ir->operation = ir_binop_add; 1406bf215546Sopenharmony_ci ir->init_num_operands(); 1407bf215546Sopenharmony_ci ir->operands[0] = add(hi, rshift(t1, c16->clone(ir, NULL))); 1408bf215546Sopenharmony_ci ir->operands[1] = rshift(t2, c16->clone(ir, NULL)); 1409bf215546Sopenharmony_ci } else { 1410bf215546Sopenharmony_ci assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); 1411bf215546Sopenharmony_ci 1412bf215546Sopenharmony_ci i.insert_before(assign(hi, add(add(hi, rshift(t1, c16->clone(ir, NULL))), 1413bf215546Sopenharmony_ci rshift(t2, c16->clone(ir, NULL))))); 1414bf215546Sopenharmony_ci 1415bf215546Sopenharmony_ci /* For channels where different_signs is set we have to perform a 64-bit 1416bf215546Sopenharmony_ci * negation. This is *not* the same as just negating the high 32-bits. 1417bf215546Sopenharmony_ci * Consider -3 * 2. The high 32-bits is 0, but the desired result is 1418bf215546Sopenharmony_ci * -1, not -0! Recall -x == ~x + 1. 1419bf215546Sopenharmony_ci */ 1420bf215546Sopenharmony_ci ir_variable *neg_hi = 1421bf215546Sopenharmony_ci new(ir) ir_variable(glsl_type::ivec(elements), "neg_hi", ir_var_temporary); 1422bf215546Sopenharmony_ci ir_constant *c1 = new(ir) ir_constant(1u, elements); 1423bf215546Sopenharmony_ci 1424bf215546Sopenharmony_ci i.insert_before(neg_hi); 1425bf215546Sopenharmony_ci i.insert_before(assign(neg_hi, add(bit_not(u2i(hi)), 1426bf215546Sopenharmony_ci u2i(_carry(bit_not(lo), c1))))); 1427bf215546Sopenharmony_ci 1428bf215546Sopenharmony_ci ir->operation = ir_triop_csel; 1429bf215546Sopenharmony_ci ir->init_num_operands(); 1430bf215546Sopenharmony_ci ir->operands[0] = new(ir) ir_dereference_variable(different_signs); 1431bf215546Sopenharmony_ci ir->operands[1] = new(ir) ir_dereference_variable(neg_hi); 1432bf215546Sopenharmony_ci ir->operands[2] = u2i(hi); 1433bf215546Sopenharmony_ci } 1434bf215546Sopenharmony_ci} 1435bf215546Sopenharmony_ci 1436bf215546Sopenharmony_civoid 1437bf215546Sopenharmony_cilower_instructions_visitor::sqrt_to_abs_sqrt(ir_expression *ir) 1438bf215546Sopenharmony_ci{ 1439bf215546Sopenharmony_ci ir->operands[0] = new(ir) ir_expression(ir_unop_abs, ir->operands[0]); 1440bf215546Sopenharmony_ci this->progress = true; 1441bf215546Sopenharmony_ci} 1442bf215546Sopenharmony_ci 1443bf215546Sopenharmony_ciir_visitor_status 1444bf215546Sopenharmony_cilower_instructions_visitor::visit_leave(ir_expression *ir) 1445bf215546Sopenharmony_ci{ 1446bf215546Sopenharmony_ci switch (ir->operation) { 1447bf215546Sopenharmony_ci case ir_binop_dot: 1448bf215546Sopenharmony_ci if (ir->operands[0]->type->is_double()) 1449bf215546Sopenharmony_ci double_dot_to_fma(ir); 1450bf215546Sopenharmony_ci break; 1451bf215546Sopenharmony_ci case ir_triop_lrp: 1452bf215546Sopenharmony_ci if (ir->operands[0]->type->is_double()) 1453bf215546Sopenharmony_ci double_lrp(ir); 1454bf215546Sopenharmony_ci break; 1455bf215546Sopenharmony_ci case ir_binop_sub: 1456bf215546Sopenharmony_ci if (lowering(SUB_TO_ADD_NEG)) 1457bf215546Sopenharmony_ci sub_to_add_neg(ir); 1458bf215546Sopenharmony_ci break; 1459bf215546Sopenharmony_ci 1460bf215546Sopenharmony_ci case ir_binop_ldexp: 1461bf215546Sopenharmony_ci if (lowering(LDEXP_TO_ARITH) && ir->type->is_float()) 1462bf215546Sopenharmony_ci ldexp_to_arith(ir); 1463bf215546Sopenharmony_ci if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->type->is_double()) 1464bf215546Sopenharmony_ci dldexp_to_arith(ir); 1465bf215546Sopenharmony_ci break; 1466bf215546Sopenharmony_ci 1467bf215546Sopenharmony_ci case ir_unop_frexp_exp: 1468bf215546Sopenharmony_ci if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double()) 1469bf215546Sopenharmony_ci dfrexp_exp_to_arith(ir); 1470bf215546Sopenharmony_ci break; 1471bf215546Sopenharmony_ci 1472bf215546Sopenharmony_ci case ir_unop_frexp_sig: 1473bf215546Sopenharmony_ci if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double()) 1474bf215546Sopenharmony_ci dfrexp_sig_to_arith(ir); 1475bf215546Sopenharmony_ci break; 1476bf215546Sopenharmony_ci 1477bf215546Sopenharmony_ci case ir_binop_carry: 1478bf215546Sopenharmony_ci if (lowering(CARRY_TO_ARITH)) 1479bf215546Sopenharmony_ci carry_to_arith(ir); 1480bf215546Sopenharmony_ci break; 1481bf215546Sopenharmony_ci 1482bf215546Sopenharmony_ci case ir_binop_borrow: 1483bf215546Sopenharmony_ci if (lowering(BORROW_TO_ARITH)) 1484bf215546Sopenharmony_ci borrow_to_arith(ir); 1485bf215546Sopenharmony_ci break; 1486bf215546Sopenharmony_ci 1487bf215546Sopenharmony_ci case ir_unop_trunc: 1488bf215546Sopenharmony_ci if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) 1489bf215546Sopenharmony_ci dtrunc_to_dfrac(ir); 1490bf215546Sopenharmony_ci break; 1491bf215546Sopenharmony_ci 1492bf215546Sopenharmony_ci case ir_unop_ceil: 1493bf215546Sopenharmony_ci if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) 1494bf215546Sopenharmony_ci dceil_to_dfrac(ir); 1495bf215546Sopenharmony_ci break; 1496bf215546Sopenharmony_ci 1497bf215546Sopenharmony_ci case ir_unop_floor: 1498bf215546Sopenharmony_ci if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) 1499bf215546Sopenharmony_ci dfloor_to_dfrac(ir); 1500bf215546Sopenharmony_ci break; 1501bf215546Sopenharmony_ci 1502bf215546Sopenharmony_ci case ir_unop_round_even: 1503bf215546Sopenharmony_ci if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) 1504bf215546Sopenharmony_ci dround_even_to_dfrac(ir); 1505bf215546Sopenharmony_ci break; 1506bf215546Sopenharmony_ci 1507bf215546Sopenharmony_ci case ir_unop_sign: 1508bf215546Sopenharmony_ci if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) 1509bf215546Sopenharmony_ci dsign_to_csel(ir); 1510bf215546Sopenharmony_ci break; 1511bf215546Sopenharmony_ci 1512bf215546Sopenharmony_ci case ir_unop_bit_count: 1513bf215546Sopenharmony_ci if (lowering(BIT_COUNT_TO_MATH)) 1514bf215546Sopenharmony_ci bit_count_to_math(ir); 1515bf215546Sopenharmony_ci break; 1516bf215546Sopenharmony_ci 1517bf215546Sopenharmony_ci case ir_triop_bitfield_extract: 1518bf215546Sopenharmony_ci if (lowering(EXTRACT_TO_SHIFTS)) 1519bf215546Sopenharmony_ci extract_to_shifts(ir); 1520bf215546Sopenharmony_ci break; 1521bf215546Sopenharmony_ci 1522bf215546Sopenharmony_ci case ir_quadop_bitfield_insert: 1523bf215546Sopenharmony_ci if (lowering(INSERT_TO_SHIFTS)) 1524bf215546Sopenharmony_ci insert_to_shifts(ir); 1525bf215546Sopenharmony_ci break; 1526bf215546Sopenharmony_ci 1527bf215546Sopenharmony_ci case ir_unop_bitfield_reverse: 1528bf215546Sopenharmony_ci if (lowering(REVERSE_TO_SHIFTS)) 1529bf215546Sopenharmony_ci reverse_to_shifts(ir); 1530bf215546Sopenharmony_ci break; 1531bf215546Sopenharmony_ci 1532bf215546Sopenharmony_ci case ir_unop_find_lsb: 1533bf215546Sopenharmony_ci if (lowering(FIND_LSB_TO_FLOAT_CAST)) 1534bf215546Sopenharmony_ci find_lsb_to_float_cast(ir); 1535bf215546Sopenharmony_ci break; 1536bf215546Sopenharmony_ci 1537bf215546Sopenharmony_ci case ir_unop_find_msb: 1538bf215546Sopenharmony_ci if (lowering(FIND_MSB_TO_FLOAT_CAST)) 1539bf215546Sopenharmony_ci find_msb_to_float_cast(ir); 1540bf215546Sopenharmony_ci break; 1541bf215546Sopenharmony_ci 1542bf215546Sopenharmony_ci case ir_binop_imul_high: 1543bf215546Sopenharmony_ci if (lowering(IMUL_HIGH_TO_MUL)) 1544bf215546Sopenharmony_ci imul_high_to_mul(ir); 1545bf215546Sopenharmony_ci break; 1546bf215546Sopenharmony_ci 1547bf215546Sopenharmony_ci case ir_unop_rsq: 1548bf215546Sopenharmony_ci case ir_unop_sqrt: 1549bf215546Sopenharmony_ci if (lowering(SQRT_TO_ABS_SQRT)) 1550bf215546Sopenharmony_ci sqrt_to_abs_sqrt(ir); 1551bf215546Sopenharmony_ci break; 1552bf215546Sopenharmony_ci 1553bf215546Sopenharmony_ci default: 1554bf215546Sopenharmony_ci return visit_continue; 1555bf215546Sopenharmony_ci } 1556bf215546Sopenharmony_ci 1557bf215546Sopenharmony_ci return visit_continue; 1558bf215546Sopenharmony_ci} 1559