1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2010 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "brw_cfg.h" 25bf215546Sopenharmony_ci#include "brw_eu.h" 26bf215546Sopenharmony_ci#include "brw_fs.h" 27bf215546Sopenharmony_ci#include "brw_nir.h" 28bf215546Sopenharmony_ci#include "brw_vec4_tes.h" 29bf215546Sopenharmony_ci#include "dev/intel_debug.h" 30bf215546Sopenharmony_ci#include "main/uniforms.h" 31bf215546Sopenharmony_ci#include "util/macros.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_cienum brw_reg_type 34bf215546Sopenharmony_cibrw_type_for_base_type(const struct glsl_type *type) 35bf215546Sopenharmony_ci{ 36bf215546Sopenharmony_ci switch (type->base_type) { 37bf215546Sopenharmony_ci case GLSL_TYPE_FLOAT16: 38bf215546Sopenharmony_ci return BRW_REGISTER_TYPE_HF; 39bf215546Sopenharmony_ci case GLSL_TYPE_FLOAT: 40bf215546Sopenharmony_ci return BRW_REGISTER_TYPE_F; 41bf215546Sopenharmony_ci case GLSL_TYPE_INT: 42bf215546Sopenharmony_ci case GLSL_TYPE_BOOL: 43bf215546Sopenharmony_ci case GLSL_TYPE_SUBROUTINE: 44bf215546Sopenharmony_ci return BRW_REGISTER_TYPE_D; 45bf215546Sopenharmony_ci case GLSL_TYPE_INT16: 46bf215546Sopenharmony_ci return BRW_REGISTER_TYPE_W; 47bf215546Sopenharmony_ci case GLSL_TYPE_INT8: 48bf215546Sopenharmony_ci return BRW_REGISTER_TYPE_B; 49bf215546Sopenharmony_ci case GLSL_TYPE_UINT: 50bf215546Sopenharmony_ci return BRW_REGISTER_TYPE_UD; 51bf215546Sopenharmony_ci case GLSL_TYPE_UINT16: 52bf215546Sopenharmony_ci return BRW_REGISTER_TYPE_UW; 53bf215546Sopenharmony_ci case GLSL_TYPE_UINT8: 54bf215546Sopenharmony_ci return BRW_REGISTER_TYPE_UB; 55bf215546Sopenharmony_ci case GLSL_TYPE_ARRAY: 56bf215546Sopenharmony_ci return brw_type_for_base_type(type->fields.array); 57bf215546Sopenharmony_ci case GLSL_TYPE_STRUCT: 58bf215546Sopenharmony_ci case GLSL_TYPE_INTERFACE: 59bf215546Sopenharmony_ci case GLSL_TYPE_SAMPLER: 60bf215546Sopenharmony_ci case GLSL_TYPE_TEXTURE: 61bf215546Sopenharmony_ci case GLSL_TYPE_ATOMIC_UINT: 62bf215546Sopenharmony_ci /* These should be overridden with the type of the member when 63bf215546Sopenharmony_ci * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 64bf215546Sopenharmony_ci * way to trip up if we don't. 65bf215546Sopenharmony_ci */ 66bf215546Sopenharmony_ci return BRW_REGISTER_TYPE_UD; 67bf215546Sopenharmony_ci case GLSL_TYPE_IMAGE: 68bf215546Sopenharmony_ci return BRW_REGISTER_TYPE_UD; 69bf215546Sopenharmony_ci case GLSL_TYPE_DOUBLE: 70bf215546Sopenharmony_ci return BRW_REGISTER_TYPE_DF; 71bf215546Sopenharmony_ci case GLSL_TYPE_UINT64: 72bf215546Sopenharmony_ci return BRW_REGISTER_TYPE_UQ; 73bf215546Sopenharmony_ci case GLSL_TYPE_INT64: 74bf215546Sopenharmony_ci return BRW_REGISTER_TYPE_Q; 75bf215546Sopenharmony_ci case GLSL_TYPE_VOID: 76bf215546Sopenharmony_ci case GLSL_TYPE_ERROR: 77bf215546Sopenharmony_ci case GLSL_TYPE_FUNCTION: 78bf215546Sopenharmony_ci unreachable("not reached"); 79bf215546Sopenharmony_ci } 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci return BRW_REGISTER_TYPE_F; 82bf215546Sopenharmony_ci} 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_cienum brw_conditional_mod 85bf215546Sopenharmony_cibrw_conditional_for_comparison(unsigned int op) 86bf215546Sopenharmony_ci{ 87bf215546Sopenharmony_ci switch (op) { 88bf215546Sopenharmony_ci case ir_binop_less: 89bf215546Sopenharmony_ci return BRW_CONDITIONAL_L; 90bf215546Sopenharmony_ci case ir_binop_gequal: 91bf215546Sopenharmony_ci return BRW_CONDITIONAL_GE; 92bf215546Sopenharmony_ci case ir_binop_equal: 93bf215546Sopenharmony_ci case ir_binop_all_equal: /* same as equal for scalars */ 94bf215546Sopenharmony_ci return BRW_CONDITIONAL_Z; 95bf215546Sopenharmony_ci case ir_binop_nequal: 96bf215546Sopenharmony_ci case ir_binop_any_nequal: /* same as nequal for scalars */ 97bf215546Sopenharmony_ci return BRW_CONDITIONAL_NZ; 98bf215546Sopenharmony_ci default: 99bf215546Sopenharmony_ci unreachable("not reached: bad operation for comparison"); 100bf215546Sopenharmony_ci } 101bf215546Sopenharmony_ci} 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ciuint32_t 104bf215546Sopenharmony_cibrw_math_function(enum opcode op) 105bf215546Sopenharmony_ci{ 106bf215546Sopenharmony_ci switch (op) { 107bf215546Sopenharmony_ci case SHADER_OPCODE_RCP: 108bf215546Sopenharmony_ci return BRW_MATH_FUNCTION_INV; 109bf215546Sopenharmony_ci case SHADER_OPCODE_RSQ: 110bf215546Sopenharmony_ci return BRW_MATH_FUNCTION_RSQ; 111bf215546Sopenharmony_ci case SHADER_OPCODE_SQRT: 112bf215546Sopenharmony_ci return BRW_MATH_FUNCTION_SQRT; 113bf215546Sopenharmony_ci case SHADER_OPCODE_EXP2: 114bf215546Sopenharmony_ci return BRW_MATH_FUNCTION_EXP; 115bf215546Sopenharmony_ci case SHADER_OPCODE_LOG2: 116bf215546Sopenharmony_ci return BRW_MATH_FUNCTION_LOG; 117bf215546Sopenharmony_ci case SHADER_OPCODE_POW: 118bf215546Sopenharmony_ci return BRW_MATH_FUNCTION_POW; 119bf215546Sopenharmony_ci case SHADER_OPCODE_SIN: 120bf215546Sopenharmony_ci return BRW_MATH_FUNCTION_SIN; 121bf215546Sopenharmony_ci case SHADER_OPCODE_COS: 122bf215546Sopenharmony_ci return BRW_MATH_FUNCTION_COS; 123bf215546Sopenharmony_ci case SHADER_OPCODE_INT_QUOTIENT: 124bf215546Sopenharmony_ci return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT; 125bf215546Sopenharmony_ci case SHADER_OPCODE_INT_REMAINDER: 126bf215546Sopenharmony_ci return BRW_MATH_FUNCTION_INT_DIV_REMAINDER; 127bf215546Sopenharmony_ci default: 128bf215546Sopenharmony_ci unreachable("not reached: unknown math function"); 129bf215546Sopenharmony_ci } 130bf215546Sopenharmony_ci} 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_cibool 133bf215546Sopenharmony_cibrw_texture_offset(const nir_tex_instr *tex, unsigned src, 134bf215546Sopenharmony_ci uint32_t *offset_bits_out) 135bf215546Sopenharmony_ci{ 136bf215546Sopenharmony_ci if (!nir_src_is_const(tex->src[src].src)) 137bf215546Sopenharmony_ci return false; 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci const unsigned num_components = nir_tex_instr_src_size(tex, src); 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci /* Combine all three offsets into a single unsigned dword: 142bf215546Sopenharmony_ci * 143bf215546Sopenharmony_ci * bits 11:8 - U Offset (X component) 144bf215546Sopenharmony_ci * bits 7:4 - V Offset (Y component) 145bf215546Sopenharmony_ci * bits 3:0 - R Offset (Z component) 146bf215546Sopenharmony_ci */ 147bf215546Sopenharmony_ci uint32_t offset_bits = 0; 148bf215546Sopenharmony_ci for (unsigned i = 0; i < num_components; i++) { 149bf215546Sopenharmony_ci int offset = nir_src_comp_as_int(tex->src[src].src, i); 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci /* offset out of bounds; caller will handle it. */ 152bf215546Sopenharmony_ci if (offset > 7 || offset < -8) 153bf215546Sopenharmony_ci return false; 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci const unsigned shift = 4 * (2 - i); 156bf215546Sopenharmony_ci offset_bits |= (offset << shift) & (0xF << shift); 157bf215546Sopenharmony_ci } 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci *offset_bits_out = offset_bits; 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci return true; 162bf215546Sopenharmony_ci} 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ciconst char * 165bf215546Sopenharmony_cibrw_instruction_name(const struct brw_isa_info *isa, enum opcode op) 166bf215546Sopenharmony_ci{ 167bf215546Sopenharmony_ci const struct intel_device_info *devinfo = isa->devinfo; 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci switch (op) { 170bf215546Sopenharmony_ci case 0 ... NUM_BRW_OPCODES - 1: 171bf215546Sopenharmony_ci /* The DO instruction doesn't exist on Gfx6+, but we use it to mark the 172bf215546Sopenharmony_ci * start of a loop in the IR. 173bf215546Sopenharmony_ci */ 174bf215546Sopenharmony_ci if (devinfo->ver >= 6 && op == BRW_OPCODE_DO) 175bf215546Sopenharmony_ci return "do"; 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci /* The following conversion opcodes doesn't exist on Gfx8+, but we use 178bf215546Sopenharmony_ci * then to mark that we want to do the conversion. 179bf215546Sopenharmony_ci */ 180bf215546Sopenharmony_ci if (devinfo->ver > 7 && op == BRW_OPCODE_F32TO16) 181bf215546Sopenharmony_ci return "f32to16"; 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci if (devinfo->ver > 7 && op == BRW_OPCODE_F16TO32) 184bf215546Sopenharmony_ci return "f16to32"; 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci assert(brw_opcode_desc(isa, op)->name); 187bf215546Sopenharmony_ci return brw_opcode_desc(isa, op)->name; 188bf215546Sopenharmony_ci case FS_OPCODE_FB_WRITE: 189bf215546Sopenharmony_ci return "fb_write"; 190bf215546Sopenharmony_ci case FS_OPCODE_FB_WRITE_LOGICAL: 191bf215546Sopenharmony_ci return "fb_write_logical"; 192bf215546Sopenharmony_ci case FS_OPCODE_REP_FB_WRITE: 193bf215546Sopenharmony_ci return "rep_fb_write"; 194bf215546Sopenharmony_ci case FS_OPCODE_FB_READ: 195bf215546Sopenharmony_ci return "fb_read"; 196bf215546Sopenharmony_ci case FS_OPCODE_FB_READ_LOGICAL: 197bf215546Sopenharmony_ci return "fb_read_logical"; 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci case SHADER_OPCODE_RCP: 200bf215546Sopenharmony_ci return "rcp"; 201bf215546Sopenharmony_ci case SHADER_OPCODE_RSQ: 202bf215546Sopenharmony_ci return "rsq"; 203bf215546Sopenharmony_ci case SHADER_OPCODE_SQRT: 204bf215546Sopenharmony_ci return "sqrt"; 205bf215546Sopenharmony_ci case SHADER_OPCODE_EXP2: 206bf215546Sopenharmony_ci return "exp2"; 207bf215546Sopenharmony_ci case SHADER_OPCODE_LOG2: 208bf215546Sopenharmony_ci return "log2"; 209bf215546Sopenharmony_ci case SHADER_OPCODE_POW: 210bf215546Sopenharmony_ci return "pow"; 211bf215546Sopenharmony_ci case SHADER_OPCODE_INT_QUOTIENT: 212bf215546Sopenharmony_ci return "int_quot"; 213bf215546Sopenharmony_ci case SHADER_OPCODE_INT_REMAINDER: 214bf215546Sopenharmony_ci return "int_rem"; 215bf215546Sopenharmony_ci case SHADER_OPCODE_SIN: 216bf215546Sopenharmony_ci return "sin"; 217bf215546Sopenharmony_ci case SHADER_OPCODE_COS: 218bf215546Sopenharmony_ci return "cos"; 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci case SHADER_OPCODE_SEND: 221bf215546Sopenharmony_ci return "send"; 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci case SHADER_OPCODE_UNDEF: 224bf215546Sopenharmony_ci return "undef"; 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci case SHADER_OPCODE_TEX: 227bf215546Sopenharmony_ci return "tex"; 228bf215546Sopenharmony_ci case SHADER_OPCODE_TEX_LOGICAL: 229bf215546Sopenharmony_ci return "tex_logical"; 230bf215546Sopenharmony_ci case SHADER_OPCODE_TXD: 231bf215546Sopenharmony_ci return "txd"; 232bf215546Sopenharmony_ci case SHADER_OPCODE_TXD_LOGICAL: 233bf215546Sopenharmony_ci return "txd_logical"; 234bf215546Sopenharmony_ci case SHADER_OPCODE_TXF: 235bf215546Sopenharmony_ci return "txf"; 236bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_LOGICAL: 237bf215546Sopenharmony_ci return "txf_logical"; 238bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_LZ: 239bf215546Sopenharmony_ci return "txf_lz"; 240bf215546Sopenharmony_ci case SHADER_OPCODE_TXL: 241bf215546Sopenharmony_ci return "txl"; 242bf215546Sopenharmony_ci case SHADER_OPCODE_TXL_LOGICAL: 243bf215546Sopenharmony_ci return "txl_logical"; 244bf215546Sopenharmony_ci case SHADER_OPCODE_TXL_LZ: 245bf215546Sopenharmony_ci return "txl_lz"; 246bf215546Sopenharmony_ci case SHADER_OPCODE_TXS: 247bf215546Sopenharmony_ci return "txs"; 248bf215546Sopenharmony_ci case SHADER_OPCODE_TXS_LOGICAL: 249bf215546Sopenharmony_ci return "txs_logical"; 250bf215546Sopenharmony_ci case FS_OPCODE_TXB: 251bf215546Sopenharmony_ci return "txb"; 252bf215546Sopenharmony_ci case FS_OPCODE_TXB_LOGICAL: 253bf215546Sopenharmony_ci return "txb_logical"; 254bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_CMS: 255bf215546Sopenharmony_ci return "txf_cms"; 256bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_CMS_LOGICAL: 257bf215546Sopenharmony_ci return "txf_cms_logical"; 258bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_CMS_W: 259bf215546Sopenharmony_ci return "txf_cms_w"; 260bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_CMS_W_LOGICAL: 261bf215546Sopenharmony_ci return "txf_cms_w_logical"; 262bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL: 263bf215546Sopenharmony_ci return "txf_cms_w_gfx12_logical"; 264bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_UMS: 265bf215546Sopenharmony_ci return "txf_ums"; 266bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_UMS_LOGICAL: 267bf215546Sopenharmony_ci return "txf_ums_logical"; 268bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_MCS: 269bf215546Sopenharmony_ci return "txf_mcs"; 270bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_MCS_LOGICAL: 271bf215546Sopenharmony_ci return "txf_mcs_logical"; 272bf215546Sopenharmony_ci case SHADER_OPCODE_LOD: 273bf215546Sopenharmony_ci return "lod"; 274bf215546Sopenharmony_ci case SHADER_OPCODE_LOD_LOGICAL: 275bf215546Sopenharmony_ci return "lod_logical"; 276bf215546Sopenharmony_ci case SHADER_OPCODE_TG4: 277bf215546Sopenharmony_ci return "tg4"; 278bf215546Sopenharmony_ci case SHADER_OPCODE_TG4_LOGICAL: 279bf215546Sopenharmony_ci return "tg4_logical"; 280bf215546Sopenharmony_ci case SHADER_OPCODE_TG4_OFFSET: 281bf215546Sopenharmony_ci return "tg4_offset"; 282bf215546Sopenharmony_ci case SHADER_OPCODE_TG4_OFFSET_LOGICAL: 283bf215546Sopenharmony_ci return "tg4_offset_logical"; 284bf215546Sopenharmony_ci case SHADER_OPCODE_SAMPLEINFO: 285bf215546Sopenharmony_ci return "sampleinfo"; 286bf215546Sopenharmony_ci case SHADER_OPCODE_SAMPLEINFO_LOGICAL: 287bf215546Sopenharmony_ci return "sampleinfo_logical"; 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: 290bf215546Sopenharmony_ci return "image_size_logical"; 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci case VEC4_OPCODE_UNTYPED_ATOMIC: 293bf215546Sopenharmony_ci return "untyped_atomic"; 294bf215546Sopenharmony_ci case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: 295bf215546Sopenharmony_ci return "untyped_atomic_logical"; 296bf215546Sopenharmony_ci case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: 297bf215546Sopenharmony_ci return "untyped_atomic_float_logical"; 298bf215546Sopenharmony_ci case VEC4_OPCODE_UNTYPED_SURFACE_READ: 299bf215546Sopenharmony_ci return "untyped_surface_read"; 300bf215546Sopenharmony_ci case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: 301bf215546Sopenharmony_ci return "untyped_surface_read_logical"; 302bf215546Sopenharmony_ci case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: 303bf215546Sopenharmony_ci return "untyped_surface_write"; 304bf215546Sopenharmony_ci case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: 305bf215546Sopenharmony_ci return "untyped_surface_write_logical"; 306bf215546Sopenharmony_ci case SHADER_OPCODE_OWORD_BLOCK_READ_LOGICAL: 307bf215546Sopenharmony_ci return "oword_block_read_logical"; 308bf215546Sopenharmony_ci case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: 309bf215546Sopenharmony_ci return "unaligned_oword_block_read_logical"; 310bf215546Sopenharmony_ci case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL: 311bf215546Sopenharmony_ci return "oword_block_write_logical"; 312bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: 313bf215546Sopenharmony_ci return "a64_untyped_read_logical"; 314bf215546Sopenharmony_ci case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL: 315bf215546Sopenharmony_ci return "a64_oword_block_read_logical"; 316bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: 317bf215546Sopenharmony_ci return "a64_unaligned_oword_block_read_logical"; 318bf215546Sopenharmony_ci case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: 319bf215546Sopenharmony_ci return "a64_oword_block_write_logical"; 320bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: 321bf215546Sopenharmony_ci return "a64_untyped_write_logical"; 322bf215546Sopenharmony_ci case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: 323bf215546Sopenharmony_ci return "a64_byte_scattered_read_logical"; 324bf215546Sopenharmony_ci case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: 325bf215546Sopenharmony_ci return "a64_byte_scattered_write_logical"; 326bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: 327bf215546Sopenharmony_ci return "a64_untyped_atomic_logical"; 328bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL: 329bf215546Sopenharmony_ci return "a64_untyped_atomic_int16_logical"; 330bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: 331bf215546Sopenharmony_ci return "a64_untyped_atomic_int64_logical"; 332bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL: 333bf215546Sopenharmony_ci return "a64_untyped_atomic_float16_logical"; 334bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL: 335bf215546Sopenharmony_ci return "a64_untyped_atomic_float32_logical"; 336bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL: 337bf215546Sopenharmony_ci return "a64_untyped_atomic_float64_logical"; 338bf215546Sopenharmony_ci case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: 339bf215546Sopenharmony_ci return "typed_atomic_logical"; 340bf215546Sopenharmony_ci case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: 341bf215546Sopenharmony_ci return "typed_surface_read_logical"; 342bf215546Sopenharmony_ci case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: 343bf215546Sopenharmony_ci return "typed_surface_write_logical"; 344bf215546Sopenharmony_ci case SHADER_OPCODE_MEMORY_FENCE: 345bf215546Sopenharmony_ci return "memory_fence"; 346bf215546Sopenharmony_ci case FS_OPCODE_SCHEDULING_FENCE: 347bf215546Sopenharmony_ci return "scheduling_fence"; 348bf215546Sopenharmony_ci case SHADER_OPCODE_INTERLOCK: 349bf215546Sopenharmony_ci /* For an interlock we actually issue a memory fence via sendc. */ 350bf215546Sopenharmony_ci return "interlock"; 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_ci case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: 353bf215546Sopenharmony_ci return "byte_scattered_read_logical"; 354bf215546Sopenharmony_ci case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: 355bf215546Sopenharmony_ci return "byte_scattered_write_logical"; 356bf215546Sopenharmony_ci case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: 357bf215546Sopenharmony_ci return "dword_scattered_read_logical"; 358bf215546Sopenharmony_ci case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: 359bf215546Sopenharmony_ci return "dword_scattered_write_logical"; 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci case SHADER_OPCODE_LOAD_PAYLOAD: 362bf215546Sopenharmony_ci return "load_payload"; 363bf215546Sopenharmony_ci case FS_OPCODE_PACK: 364bf215546Sopenharmony_ci return "pack"; 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci case SHADER_OPCODE_GFX4_SCRATCH_READ: 367bf215546Sopenharmony_ci return "gfx4_scratch_read"; 368bf215546Sopenharmony_ci case SHADER_OPCODE_GFX4_SCRATCH_WRITE: 369bf215546Sopenharmony_ci return "gfx4_scratch_write"; 370bf215546Sopenharmony_ci case SHADER_OPCODE_GFX7_SCRATCH_READ: 371bf215546Sopenharmony_ci return "gfx7_scratch_read"; 372bf215546Sopenharmony_ci case SHADER_OPCODE_SCRATCH_HEADER: 373bf215546Sopenharmony_ci return "scratch_header"; 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci case SHADER_OPCODE_URB_WRITE_LOGICAL: 376bf215546Sopenharmony_ci return "urb_write_logical"; 377bf215546Sopenharmony_ci case SHADER_OPCODE_URB_READ_LOGICAL: 378bf215546Sopenharmony_ci return "urb_read_logical"; 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci case SHADER_OPCODE_FIND_LIVE_CHANNEL: 381bf215546Sopenharmony_ci return "find_live_channel"; 382bf215546Sopenharmony_ci case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: 383bf215546Sopenharmony_ci return "find_last_live_channel"; 384bf215546Sopenharmony_ci case FS_OPCODE_LOAD_LIVE_CHANNELS: 385bf215546Sopenharmony_ci return "load_live_channels"; 386bf215546Sopenharmony_ci 387bf215546Sopenharmony_ci case SHADER_OPCODE_BROADCAST: 388bf215546Sopenharmony_ci return "broadcast"; 389bf215546Sopenharmony_ci case SHADER_OPCODE_SHUFFLE: 390bf215546Sopenharmony_ci return "shuffle"; 391bf215546Sopenharmony_ci case SHADER_OPCODE_SEL_EXEC: 392bf215546Sopenharmony_ci return "sel_exec"; 393bf215546Sopenharmony_ci case SHADER_OPCODE_QUAD_SWIZZLE: 394bf215546Sopenharmony_ci return "quad_swizzle"; 395bf215546Sopenharmony_ci case SHADER_OPCODE_CLUSTER_BROADCAST: 396bf215546Sopenharmony_ci return "cluster_broadcast"; 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci case SHADER_OPCODE_GET_BUFFER_SIZE: 399bf215546Sopenharmony_ci return "get_buffer_size"; 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_ci case VEC4_OPCODE_MOV_BYTES: 402bf215546Sopenharmony_ci return "mov_bytes"; 403bf215546Sopenharmony_ci case VEC4_OPCODE_PACK_BYTES: 404bf215546Sopenharmony_ci return "pack_bytes"; 405bf215546Sopenharmony_ci case VEC4_OPCODE_UNPACK_UNIFORM: 406bf215546Sopenharmony_ci return "unpack_uniform"; 407bf215546Sopenharmony_ci case VEC4_OPCODE_DOUBLE_TO_F32: 408bf215546Sopenharmony_ci return "double_to_f32"; 409bf215546Sopenharmony_ci case VEC4_OPCODE_DOUBLE_TO_D32: 410bf215546Sopenharmony_ci return "double_to_d32"; 411bf215546Sopenharmony_ci case VEC4_OPCODE_DOUBLE_TO_U32: 412bf215546Sopenharmony_ci return "double_to_u32"; 413bf215546Sopenharmony_ci case VEC4_OPCODE_TO_DOUBLE: 414bf215546Sopenharmony_ci return "single_to_double"; 415bf215546Sopenharmony_ci case VEC4_OPCODE_PICK_LOW_32BIT: 416bf215546Sopenharmony_ci return "pick_low_32bit"; 417bf215546Sopenharmony_ci case VEC4_OPCODE_PICK_HIGH_32BIT: 418bf215546Sopenharmony_ci return "pick_high_32bit"; 419bf215546Sopenharmony_ci case VEC4_OPCODE_SET_LOW_32BIT: 420bf215546Sopenharmony_ci return "set_low_32bit"; 421bf215546Sopenharmony_ci case VEC4_OPCODE_SET_HIGH_32BIT: 422bf215546Sopenharmony_ci return "set_high_32bit"; 423bf215546Sopenharmony_ci case VEC4_OPCODE_MOV_FOR_SCRATCH: 424bf215546Sopenharmony_ci return "mov_for_scratch"; 425bf215546Sopenharmony_ci case VEC4_OPCODE_ZERO_OOB_PUSH_REGS: 426bf215546Sopenharmony_ci return "zero_oob_push_regs"; 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci case FS_OPCODE_DDX_COARSE: 429bf215546Sopenharmony_ci return "ddx_coarse"; 430bf215546Sopenharmony_ci case FS_OPCODE_DDX_FINE: 431bf215546Sopenharmony_ci return "ddx_fine"; 432bf215546Sopenharmony_ci case FS_OPCODE_DDY_COARSE: 433bf215546Sopenharmony_ci return "ddy_coarse"; 434bf215546Sopenharmony_ci case FS_OPCODE_DDY_FINE: 435bf215546Sopenharmony_ci return "ddy_fine"; 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci case FS_OPCODE_LINTERP: 438bf215546Sopenharmony_ci return "linterp"; 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci case FS_OPCODE_PIXEL_X: 441bf215546Sopenharmony_ci return "pixel_x"; 442bf215546Sopenharmony_ci case FS_OPCODE_PIXEL_Y: 443bf215546Sopenharmony_ci return "pixel_y"; 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_ci case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: 446bf215546Sopenharmony_ci return "uniform_pull_const"; 447bf215546Sopenharmony_ci case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GFX7: 448bf215546Sopenharmony_ci return "uniform_pull_const_gfx7"; 449bf215546Sopenharmony_ci case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4: 450bf215546Sopenharmony_ci return "varying_pull_const_gfx4"; 451bf215546Sopenharmony_ci case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: 452bf215546Sopenharmony_ci return "varying_pull_const_logical"; 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_ci case FS_OPCODE_SET_SAMPLE_ID: 455bf215546Sopenharmony_ci return "set_sample_id"; 456bf215546Sopenharmony_ci 457bf215546Sopenharmony_ci case FS_OPCODE_PACK_HALF_2x16_SPLIT: 458bf215546Sopenharmony_ci return "pack_half_2x16_split"; 459bf215546Sopenharmony_ci 460bf215546Sopenharmony_ci case SHADER_OPCODE_HALT_TARGET: 461bf215546Sopenharmony_ci return "halt_target"; 462bf215546Sopenharmony_ci 463bf215546Sopenharmony_ci case FS_OPCODE_INTERPOLATE_AT_SAMPLE: 464bf215546Sopenharmony_ci return "interp_sample"; 465bf215546Sopenharmony_ci case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: 466bf215546Sopenharmony_ci return "interp_shared_offset"; 467bf215546Sopenharmony_ci case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: 468bf215546Sopenharmony_ci return "interp_per_slot_offset"; 469bf215546Sopenharmony_ci 470bf215546Sopenharmony_ci case VEC4_VS_OPCODE_URB_WRITE: 471bf215546Sopenharmony_ci return "vs_urb_write"; 472bf215546Sopenharmony_ci case VS_OPCODE_PULL_CONSTANT_LOAD: 473bf215546Sopenharmony_ci return "pull_constant_load"; 474bf215546Sopenharmony_ci case VS_OPCODE_PULL_CONSTANT_LOAD_GFX7: 475bf215546Sopenharmony_ci return "pull_constant_load_gfx7"; 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_ci case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: 478bf215546Sopenharmony_ci return "unpack_flags_simd4x2"; 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_ci case VEC4_GS_OPCODE_URB_WRITE: 481bf215546Sopenharmony_ci return "gs_urb_write"; 482bf215546Sopenharmony_ci case VEC4_GS_OPCODE_URB_WRITE_ALLOCATE: 483bf215546Sopenharmony_ci return "gs_urb_write_allocate"; 484bf215546Sopenharmony_ci case GS_OPCODE_THREAD_END: 485bf215546Sopenharmony_ci return "gs_thread_end"; 486bf215546Sopenharmony_ci case GS_OPCODE_SET_WRITE_OFFSET: 487bf215546Sopenharmony_ci return "set_write_offset"; 488bf215546Sopenharmony_ci case GS_OPCODE_SET_VERTEX_COUNT: 489bf215546Sopenharmony_ci return "set_vertex_count"; 490bf215546Sopenharmony_ci case GS_OPCODE_SET_DWORD_2: 491bf215546Sopenharmony_ci return "set_dword_2"; 492bf215546Sopenharmony_ci case GS_OPCODE_PREPARE_CHANNEL_MASKS: 493bf215546Sopenharmony_ci return "prepare_channel_masks"; 494bf215546Sopenharmony_ci case GS_OPCODE_SET_CHANNEL_MASKS: 495bf215546Sopenharmony_ci return "set_channel_masks"; 496bf215546Sopenharmony_ci case GS_OPCODE_GET_INSTANCE_ID: 497bf215546Sopenharmony_ci return "get_instance_id"; 498bf215546Sopenharmony_ci case GS_OPCODE_FF_SYNC: 499bf215546Sopenharmony_ci return "ff_sync"; 500bf215546Sopenharmony_ci case GS_OPCODE_SET_PRIMITIVE_ID: 501bf215546Sopenharmony_ci return "set_primitive_id"; 502bf215546Sopenharmony_ci case GS_OPCODE_SVB_WRITE: 503bf215546Sopenharmony_ci return "gs_svb_write"; 504bf215546Sopenharmony_ci case GS_OPCODE_SVB_SET_DST_INDEX: 505bf215546Sopenharmony_ci return "gs_svb_set_dst_index"; 506bf215546Sopenharmony_ci case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: 507bf215546Sopenharmony_ci return "gs_ff_sync_set_primitives"; 508bf215546Sopenharmony_ci case CS_OPCODE_CS_TERMINATE: 509bf215546Sopenharmony_ci return "cs_terminate"; 510bf215546Sopenharmony_ci case SHADER_OPCODE_BARRIER: 511bf215546Sopenharmony_ci return "barrier"; 512bf215546Sopenharmony_ci case SHADER_OPCODE_MULH: 513bf215546Sopenharmony_ci return "mulh"; 514bf215546Sopenharmony_ci case SHADER_OPCODE_ISUB_SAT: 515bf215546Sopenharmony_ci return "isub_sat"; 516bf215546Sopenharmony_ci case SHADER_OPCODE_USUB_SAT: 517bf215546Sopenharmony_ci return "usub_sat"; 518bf215546Sopenharmony_ci case SHADER_OPCODE_MOV_INDIRECT: 519bf215546Sopenharmony_ci return "mov_indirect"; 520bf215546Sopenharmony_ci case SHADER_OPCODE_MOV_RELOC_IMM: 521bf215546Sopenharmony_ci return "mov_reloc_imm"; 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci case VEC4_OPCODE_URB_READ: 524bf215546Sopenharmony_ci return "urb_read"; 525bf215546Sopenharmony_ci case TCS_OPCODE_GET_INSTANCE_ID: 526bf215546Sopenharmony_ci return "tcs_get_instance_id"; 527bf215546Sopenharmony_ci case VEC4_TCS_OPCODE_URB_WRITE: 528bf215546Sopenharmony_ci return "tcs_urb_write"; 529bf215546Sopenharmony_ci case VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS: 530bf215546Sopenharmony_ci return "tcs_set_input_urb_offsets"; 531bf215546Sopenharmony_ci case VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS: 532bf215546Sopenharmony_ci return "tcs_set_output_urb_offsets"; 533bf215546Sopenharmony_ci case TCS_OPCODE_GET_PRIMITIVE_ID: 534bf215546Sopenharmony_ci return "tcs_get_primitive_id"; 535bf215546Sopenharmony_ci case TCS_OPCODE_CREATE_BARRIER_HEADER: 536bf215546Sopenharmony_ci return "tcs_create_barrier_header"; 537bf215546Sopenharmony_ci case TCS_OPCODE_SRC0_010_IS_ZERO: 538bf215546Sopenharmony_ci return "tcs_src0<0,1,0>_is_zero"; 539bf215546Sopenharmony_ci case TCS_OPCODE_RELEASE_INPUT: 540bf215546Sopenharmony_ci return "tcs_release_input"; 541bf215546Sopenharmony_ci case TCS_OPCODE_THREAD_END: 542bf215546Sopenharmony_ci return "tcs_thread_end"; 543bf215546Sopenharmony_ci case TES_OPCODE_CREATE_INPUT_READ_HEADER: 544bf215546Sopenharmony_ci return "tes_create_input_read_header"; 545bf215546Sopenharmony_ci case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: 546bf215546Sopenharmony_ci return "tes_add_indirect_urb_offset"; 547bf215546Sopenharmony_ci case TES_OPCODE_GET_PRIMITIVE_ID: 548bf215546Sopenharmony_ci return "tes_get_primitive_id"; 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_ci case RT_OPCODE_TRACE_RAY_LOGICAL: 551bf215546Sopenharmony_ci return "rt_trace_ray_logical"; 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_ci case SHADER_OPCODE_RND_MODE: 554bf215546Sopenharmony_ci return "rnd_mode"; 555bf215546Sopenharmony_ci case SHADER_OPCODE_FLOAT_CONTROL_MODE: 556bf215546Sopenharmony_ci return "float_control_mode"; 557bf215546Sopenharmony_ci case SHADER_OPCODE_BTD_SPAWN_LOGICAL: 558bf215546Sopenharmony_ci return "btd_spawn_logical"; 559bf215546Sopenharmony_ci case SHADER_OPCODE_BTD_RETIRE_LOGICAL: 560bf215546Sopenharmony_ci return "btd_retire_logical"; 561bf215546Sopenharmony_ci case SHADER_OPCODE_READ_SR_REG: 562bf215546Sopenharmony_ci return "read_sr_reg"; 563bf215546Sopenharmony_ci } 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_ci unreachable("not reached"); 566bf215546Sopenharmony_ci} 567bf215546Sopenharmony_ci 568bf215546Sopenharmony_cibool 569bf215546Sopenharmony_cibrw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg) 570bf215546Sopenharmony_ci{ 571bf215546Sopenharmony_ci union { 572bf215546Sopenharmony_ci unsigned ud; 573bf215546Sopenharmony_ci int d; 574bf215546Sopenharmony_ci float f; 575bf215546Sopenharmony_ci double df; 576bf215546Sopenharmony_ci } imm, sat_imm = { 0 }; 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci const unsigned size = type_sz(type); 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise 581bf215546Sopenharmony_ci * irrelevant, so just check the size of the type and copy from/to an 582bf215546Sopenharmony_ci * appropriately sized field. 583bf215546Sopenharmony_ci */ 584bf215546Sopenharmony_ci if (size < 8) 585bf215546Sopenharmony_ci imm.ud = reg->ud; 586bf215546Sopenharmony_ci else 587bf215546Sopenharmony_ci imm.df = reg->df; 588bf215546Sopenharmony_ci 589bf215546Sopenharmony_ci switch (type) { 590bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UD: 591bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_D: 592bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UW: 593bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_W: 594bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UQ: 595bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_Q: 596bf215546Sopenharmony_ci /* Nothing to do. */ 597bf215546Sopenharmony_ci return false; 598bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_F: 599bf215546Sopenharmony_ci sat_imm.f = SATURATE(imm.f); 600bf215546Sopenharmony_ci break; 601bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_DF: 602bf215546Sopenharmony_ci sat_imm.df = SATURATE(imm.df); 603bf215546Sopenharmony_ci break; 604bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UB: 605bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_B: 606bf215546Sopenharmony_ci unreachable("no UB/B immediates"); 607bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_V: 608bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UV: 609bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_VF: 610bf215546Sopenharmony_ci unreachable("unimplemented: saturate vector immediate"); 611bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_HF: 612bf215546Sopenharmony_ci unreachable("unimplemented: saturate HF immediate"); 613bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_NF: 614bf215546Sopenharmony_ci unreachable("no NF immediates"); 615bf215546Sopenharmony_ci } 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci if (size < 8) { 618bf215546Sopenharmony_ci if (imm.ud != sat_imm.ud) { 619bf215546Sopenharmony_ci reg->ud = sat_imm.ud; 620bf215546Sopenharmony_ci return true; 621bf215546Sopenharmony_ci } 622bf215546Sopenharmony_ci } else { 623bf215546Sopenharmony_ci if (imm.df != sat_imm.df) { 624bf215546Sopenharmony_ci reg->df = sat_imm.df; 625bf215546Sopenharmony_ci return true; 626bf215546Sopenharmony_ci } 627bf215546Sopenharmony_ci } 628bf215546Sopenharmony_ci return false; 629bf215546Sopenharmony_ci} 630bf215546Sopenharmony_ci 631bf215546Sopenharmony_cibool 632bf215546Sopenharmony_cibrw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg) 633bf215546Sopenharmony_ci{ 634bf215546Sopenharmony_ci switch (type) { 635bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_D: 636bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UD: 637bf215546Sopenharmony_ci reg->d = -reg->d; 638bf215546Sopenharmony_ci return true; 639bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_W: 640bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UW: { 641bf215546Sopenharmony_ci uint16_t value = -(int16_t)reg->ud; 642bf215546Sopenharmony_ci reg->ud = value | (uint32_t)value << 16; 643bf215546Sopenharmony_ci return true; 644bf215546Sopenharmony_ci } 645bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_F: 646bf215546Sopenharmony_ci reg->f = -reg->f; 647bf215546Sopenharmony_ci return true; 648bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_VF: 649bf215546Sopenharmony_ci reg->ud ^= 0x80808080; 650bf215546Sopenharmony_ci return true; 651bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_DF: 652bf215546Sopenharmony_ci reg->df = -reg->df; 653bf215546Sopenharmony_ci return true; 654bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UQ: 655bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_Q: 656bf215546Sopenharmony_ci reg->d64 = -reg->d64; 657bf215546Sopenharmony_ci return true; 658bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UB: 659bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_B: 660bf215546Sopenharmony_ci unreachable("no UB/B immediates"); 661bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UV: 662bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_V: 663bf215546Sopenharmony_ci assert(!"unimplemented: negate UV/V immediate"); 664bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_HF: 665bf215546Sopenharmony_ci reg->ud ^= 0x80008000; 666bf215546Sopenharmony_ci return true; 667bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_NF: 668bf215546Sopenharmony_ci unreachable("no NF immediates"); 669bf215546Sopenharmony_ci } 670bf215546Sopenharmony_ci 671bf215546Sopenharmony_ci return false; 672bf215546Sopenharmony_ci} 673bf215546Sopenharmony_ci 674bf215546Sopenharmony_cibool 675bf215546Sopenharmony_cibrw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) 676bf215546Sopenharmony_ci{ 677bf215546Sopenharmony_ci switch (type) { 678bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_D: 679bf215546Sopenharmony_ci reg->d = abs(reg->d); 680bf215546Sopenharmony_ci return true; 681bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_W: { 682bf215546Sopenharmony_ci uint16_t value = abs((int16_t)reg->ud); 683bf215546Sopenharmony_ci reg->ud = value | (uint32_t)value << 16; 684bf215546Sopenharmony_ci return true; 685bf215546Sopenharmony_ci } 686bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_F: 687bf215546Sopenharmony_ci reg->f = fabsf(reg->f); 688bf215546Sopenharmony_ci return true; 689bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_DF: 690bf215546Sopenharmony_ci reg->df = fabs(reg->df); 691bf215546Sopenharmony_ci return true; 692bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_VF: 693bf215546Sopenharmony_ci reg->ud &= ~0x80808080; 694bf215546Sopenharmony_ci return true; 695bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_Q: 696bf215546Sopenharmony_ci reg->d64 = imaxabs(reg->d64); 697bf215546Sopenharmony_ci return true; 698bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UB: 699bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_B: 700bf215546Sopenharmony_ci unreachable("no UB/B immediates"); 701bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UQ: 702bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UD: 703bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UW: 704bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UV: 705bf215546Sopenharmony_ci /* Presumably the absolute value modifier on an unsigned source is a 706bf215546Sopenharmony_ci * nop, but it would be nice to confirm. 707bf215546Sopenharmony_ci */ 708bf215546Sopenharmony_ci assert(!"unimplemented: abs unsigned immediate"); 709bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_V: 710bf215546Sopenharmony_ci assert(!"unimplemented: abs V immediate"); 711bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_HF: 712bf215546Sopenharmony_ci reg->ud &= ~0x80008000; 713bf215546Sopenharmony_ci return true; 714bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_NF: 715bf215546Sopenharmony_ci unreachable("no NF immediates"); 716bf215546Sopenharmony_ci } 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_ci return false; 719bf215546Sopenharmony_ci} 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_cibackend_shader::backend_shader(const struct brw_compiler *compiler, 722bf215546Sopenharmony_ci void *log_data, 723bf215546Sopenharmony_ci void *mem_ctx, 724bf215546Sopenharmony_ci const nir_shader *shader, 725bf215546Sopenharmony_ci struct brw_stage_prog_data *stage_prog_data, 726bf215546Sopenharmony_ci bool debug_enabled) 727bf215546Sopenharmony_ci : compiler(compiler), 728bf215546Sopenharmony_ci log_data(log_data), 729bf215546Sopenharmony_ci devinfo(compiler->devinfo), 730bf215546Sopenharmony_ci nir(shader), 731bf215546Sopenharmony_ci stage_prog_data(stage_prog_data), 732bf215546Sopenharmony_ci mem_ctx(mem_ctx), 733bf215546Sopenharmony_ci cfg(NULL), idom_analysis(this), 734bf215546Sopenharmony_ci stage(shader->info.stage), 735bf215546Sopenharmony_ci debug_enabled(debug_enabled) 736bf215546Sopenharmony_ci{ 737bf215546Sopenharmony_ci stage_name = _mesa_shader_stage_to_string(stage); 738bf215546Sopenharmony_ci stage_abbrev = _mesa_shader_stage_to_abbrev(stage); 739bf215546Sopenharmony_ci} 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_cibackend_shader::~backend_shader() 742bf215546Sopenharmony_ci{ 743bf215546Sopenharmony_ci} 744bf215546Sopenharmony_ci 745bf215546Sopenharmony_cibool 746bf215546Sopenharmony_cibackend_reg::equals(const backend_reg &r) const 747bf215546Sopenharmony_ci{ 748bf215546Sopenharmony_ci return brw_regs_equal(this, &r) && offset == r.offset; 749bf215546Sopenharmony_ci} 750bf215546Sopenharmony_ci 751bf215546Sopenharmony_cibool 752bf215546Sopenharmony_cibackend_reg::negative_equals(const backend_reg &r) const 753bf215546Sopenharmony_ci{ 754bf215546Sopenharmony_ci return brw_regs_negative_equal(this, &r) && offset == r.offset; 755bf215546Sopenharmony_ci} 756bf215546Sopenharmony_ci 757bf215546Sopenharmony_cibool 758bf215546Sopenharmony_cibackend_reg::is_zero() const 759bf215546Sopenharmony_ci{ 760bf215546Sopenharmony_ci if (file != IMM) 761bf215546Sopenharmony_ci return false; 762bf215546Sopenharmony_ci 763bf215546Sopenharmony_ci assert(type_sz(type) > 1); 764bf215546Sopenharmony_ci 765bf215546Sopenharmony_ci switch (type) { 766bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_HF: 767bf215546Sopenharmony_ci assert((d & 0xffff) == ((d >> 16) & 0xffff)); 768bf215546Sopenharmony_ci return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000; 769bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_F: 770bf215546Sopenharmony_ci return f == 0; 771bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_DF: 772bf215546Sopenharmony_ci return df == 0; 773bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_W: 774bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UW: 775bf215546Sopenharmony_ci assert((d & 0xffff) == ((d >> 16) & 0xffff)); 776bf215546Sopenharmony_ci return (d & 0xffff) == 0; 777bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_D: 778bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UD: 779bf215546Sopenharmony_ci return d == 0; 780bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UQ: 781bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_Q: 782bf215546Sopenharmony_ci return u64 == 0; 783bf215546Sopenharmony_ci default: 784bf215546Sopenharmony_ci return false; 785bf215546Sopenharmony_ci } 786bf215546Sopenharmony_ci} 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_cibool 789bf215546Sopenharmony_cibackend_reg::is_one() const 790bf215546Sopenharmony_ci{ 791bf215546Sopenharmony_ci if (file != IMM) 792bf215546Sopenharmony_ci return false; 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_ci assert(type_sz(type) > 1); 795bf215546Sopenharmony_ci 796bf215546Sopenharmony_ci switch (type) { 797bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_HF: 798bf215546Sopenharmony_ci assert((d & 0xffff) == ((d >> 16) & 0xffff)); 799bf215546Sopenharmony_ci return (d & 0xffff) == 0x3c00; 800bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_F: 801bf215546Sopenharmony_ci return f == 1.0f; 802bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_DF: 803bf215546Sopenharmony_ci return df == 1.0; 804bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_W: 805bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UW: 806bf215546Sopenharmony_ci assert((d & 0xffff) == ((d >> 16) & 0xffff)); 807bf215546Sopenharmony_ci return (d & 0xffff) == 1; 808bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_D: 809bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UD: 810bf215546Sopenharmony_ci return d == 1; 811bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UQ: 812bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_Q: 813bf215546Sopenharmony_ci return u64 == 1; 814bf215546Sopenharmony_ci default: 815bf215546Sopenharmony_ci return false; 816bf215546Sopenharmony_ci } 817bf215546Sopenharmony_ci} 818bf215546Sopenharmony_ci 819bf215546Sopenharmony_cibool 820bf215546Sopenharmony_cibackend_reg::is_negative_one() const 821bf215546Sopenharmony_ci{ 822bf215546Sopenharmony_ci if (file != IMM) 823bf215546Sopenharmony_ci return false; 824bf215546Sopenharmony_ci 825bf215546Sopenharmony_ci assert(type_sz(type) > 1); 826bf215546Sopenharmony_ci 827bf215546Sopenharmony_ci switch (type) { 828bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_HF: 829bf215546Sopenharmony_ci assert((d & 0xffff) == ((d >> 16) & 0xffff)); 830bf215546Sopenharmony_ci return (d & 0xffff) == 0xbc00; 831bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_F: 832bf215546Sopenharmony_ci return f == -1.0; 833bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_DF: 834bf215546Sopenharmony_ci return df == -1.0; 835bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_W: 836bf215546Sopenharmony_ci assert((d & 0xffff) == ((d >> 16) & 0xffff)); 837bf215546Sopenharmony_ci return (d & 0xffff) == 0xffff; 838bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_D: 839bf215546Sopenharmony_ci return d == -1; 840bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_Q: 841bf215546Sopenharmony_ci return d64 == -1; 842bf215546Sopenharmony_ci default: 843bf215546Sopenharmony_ci return false; 844bf215546Sopenharmony_ci } 845bf215546Sopenharmony_ci} 846bf215546Sopenharmony_ci 847bf215546Sopenharmony_cibool 848bf215546Sopenharmony_cibackend_reg::is_null() const 849bf215546Sopenharmony_ci{ 850bf215546Sopenharmony_ci return file == ARF && nr == BRW_ARF_NULL; 851bf215546Sopenharmony_ci} 852bf215546Sopenharmony_ci 853bf215546Sopenharmony_ci 854bf215546Sopenharmony_cibool 855bf215546Sopenharmony_cibackend_reg::is_accumulator() const 856bf215546Sopenharmony_ci{ 857bf215546Sopenharmony_ci return file == ARF && nr == BRW_ARF_ACCUMULATOR; 858bf215546Sopenharmony_ci} 859bf215546Sopenharmony_ci 860bf215546Sopenharmony_cibool 861bf215546Sopenharmony_cibackend_instruction::is_commutative() const 862bf215546Sopenharmony_ci{ 863bf215546Sopenharmony_ci switch (opcode) { 864bf215546Sopenharmony_ci case BRW_OPCODE_AND: 865bf215546Sopenharmony_ci case BRW_OPCODE_OR: 866bf215546Sopenharmony_ci case BRW_OPCODE_XOR: 867bf215546Sopenharmony_ci case BRW_OPCODE_ADD: 868bf215546Sopenharmony_ci case BRW_OPCODE_ADD3: 869bf215546Sopenharmony_ci case BRW_OPCODE_MUL: 870bf215546Sopenharmony_ci case SHADER_OPCODE_MULH: 871bf215546Sopenharmony_ci return true; 872bf215546Sopenharmony_ci case BRW_OPCODE_SEL: 873bf215546Sopenharmony_ci /* MIN and MAX are commutative. */ 874bf215546Sopenharmony_ci if (conditional_mod == BRW_CONDITIONAL_GE || 875bf215546Sopenharmony_ci conditional_mod == BRW_CONDITIONAL_L) { 876bf215546Sopenharmony_ci return true; 877bf215546Sopenharmony_ci } 878bf215546Sopenharmony_ci FALLTHROUGH; 879bf215546Sopenharmony_ci default: 880bf215546Sopenharmony_ci return false; 881bf215546Sopenharmony_ci } 882bf215546Sopenharmony_ci} 883bf215546Sopenharmony_ci 884bf215546Sopenharmony_cibool 885bf215546Sopenharmony_cibackend_instruction::is_3src(const struct brw_compiler *compiler) const 886bf215546Sopenharmony_ci{ 887bf215546Sopenharmony_ci return ::is_3src(&compiler->isa, opcode); 888bf215546Sopenharmony_ci} 889bf215546Sopenharmony_ci 890bf215546Sopenharmony_cibool 891bf215546Sopenharmony_cibackend_instruction::is_tex() const 892bf215546Sopenharmony_ci{ 893bf215546Sopenharmony_ci return (opcode == SHADER_OPCODE_TEX || 894bf215546Sopenharmony_ci opcode == FS_OPCODE_TXB || 895bf215546Sopenharmony_ci opcode == SHADER_OPCODE_TXD || 896bf215546Sopenharmony_ci opcode == SHADER_OPCODE_TXF || 897bf215546Sopenharmony_ci opcode == SHADER_OPCODE_TXF_LZ || 898bf215546Sopenharmony_ci opcode == SHADER_OPCODE_TXF_CMS || 899bf215546Sopenharmony_ci opcode == SHADER_OPCODE_TXF_CMS_W || 900bf215546Sopenharmony_ci opcode == SHADER_OPCODE_TXF_UMS || 901bf215546Sopenharmony_ci opcode == SHADER_OPCODE_TXF_MCS || 902bf215546Sopenharmony_ci opcode == SHADER_OPCODE_TXL || 903bf215546Sopenharmony_ci opcode == SHADER_OPCODE_TXL_LZ || 904bf215546Sopenharmony_ci opcode == SHADER_OPCODE_TXS || 905bf215546Sopenharmony_ci opcode == SHADER_OPCODE_LOD || 906bf215546Sopenharmony_ci opcode == SHADER_OPCODE_TG4 || 907bf215546Sopenharmony_ci opcode == SHADER_OPCODE_TG4_OFFSET || 908bf215546Sopenharmony_ci opcode == SHADER_OPCODE_SAMPLEINFO); 909bf215546Sopenharmony_ci} 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_cibool 912bf215546Sopenharmony_cibackend_instruction::is_math() const 913bf215546Sopenharmony_ci{ 914bf215546Sopenharmony_ci return (opcode == SHADER_OPCODE_RCP || 915bf215546Sopenharmony_ci opcode == SHADER_OPCODE_RSQ || 916bf215546Sopenharmony_ci opcode == SHADER_OPCODE_SQRT || 917bf215546Sopenharmony_ci opcode == SHADER_OPCODE_EXP2 || 918bf215546Sopenharmony_ci opcode == SHADER_OPCODE_LOG2 || 919bf215546Sopenharmony_ci opcode == SHADER_OPCODE_SIN || 920bf215546Sopenharmony_ci opcode == SHADER_OPCODE_COS || 921bf215546Sopenharmony_ci opcode == SHADER_OPCODE_INT_QUOTIENT || 922bf215546Sopenharmony_ci opcode == SHADER_OPCODE_INT_REMAINDER || 923bf215546Sopenharmony_ci opcode == SHADER_OPCODE_POW); 924bf215546Sopenharmony_ci} 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_cibool 927bf215546Sopenharmony_cibackend_instruction::is_control_flow() const 928bf215546Sopenharmony_ci{ 929bf215546Sopenharmony_ci switch (opcode) { 930bf215546Sopenharmony_ci case BRW_OPCODE_DO: 931bf215546Sopenharmony_ci case BRW_OPCODE_WHILE: 932bf215546Sopenharmony_ci case BRW_OPCODE_IF: 933bf215546Sopenharmony_ci case BRW_OPCODE_ELSE: 934bf215546Sopenharmony_ci case BRW_OPCODE_ENDIF: 935bf215546Sopenharmony_ci case BRW_OPCODE_BREAK: 936bf215546Sopenharmony_ci case BRW_OPCODE_CONTINUE: 937bf215546Sopenharmony_ci return true; 938bf215546Sopenharmony_ci default: 939bf215546Sopenharmony_ci return false; 940bf215546Sopenharmony_ci } 941bf215546Sopenharmony_ci} 942bf215546Sopenharmony_ci 943bf215546Sopenharmony_cibool 944bf215546Sopenharmony_cibackend_instruction::uses_indirect_addressing() const 945bf215546Sopenharmony_ci{ 946bf215546Sopenharmony_ci switch (opcode) { 947bf215546Sopenharmony_ci case SHADER_OPCODE_BROADCAST: 948bf215546Sopenharmony_ci case SHADER_OPCODE_CLUSTER_BROADCAST: 949bf215546Sopenharmony_ci case SHADER_OPCODE_MOV_INDIRECT: 950bf215546Sopenharmony_ci return true; 951bf215546Sopenharmony_ci default: 952bf215546Sopenharmony_ci return false; 953bf215546Sopenharmony_ci } 954bf215546Sopenharmony_ci} 955bf215546Sopenharmony_ci 956bf215546Sopenharmony_cibool 957bf215546Sopenharmony_cibackend_instruction::can_do_source_mods() const 958bf215546Sopenharmony_ci{ 959bf215546Sopenharmony_ci switch (opcode) { 960bf215546Sopenharmony_ci case BRW_OPCODE_ADDC: 961bf215546Sopenharmony_ci case BRW_OPCODE_BFE: 962bf215546Sopenharmony_ci case BRW_OPCODE_BFI1: 963bf215546Sopenharmony_ci case BRW_OPCODE_BFI2: 964bf215546Sopenharmony_ci case BRW_OPCODE_BFREV: 965bf215546Sopenharmony_ci case BRW_OPCODE_CBIT: 966bf215546Sopenharmony_ci case BRW_OPCODE_FBH: 967bf215546Sopenharmony_ci case BRW_OPCODE_FBL: 968bf215546Sopenharmony_ci case BRW_OPCODE_ROL: 969bf215546Sopenharmony_ci case BRW_OPCODE_ROR: 970bf215546Sopenharmony_ci case BRW_OPCODE_SUBB: 971bf215546Sopenharmony_ci case BRW_OPCODE_DP4A: 972bf215546Sopenharmony_ci case SHADER_OPCODE_BROADCAST: 973bf215546Sopenharmony_ci case SHADER_OPCODE_CLUSTER_BROADCAST: 974bf215546Sopenharmony_ci case SHADER_OPCODE_MOV_INDIRECT: 975bf215546Sopenharmony_ci case SHADER_OPCODE_SHUFFLE: 976bf215546Sopenharmony_ci case SHADER_OPCODE_INT_QUOTIENT: 977bf215546Sopenharmony_ci case SHADER_OPCODE_INT_REMAINDER: 978bf215546Sopenharmony_ci return false; 979bf215546Sopenharmony_ci default: 980bf215546Sopenharmony_ci return true; 981bf215546Sopenharmony_ci } 982bf215546Sopenharmony_ci} 983bf215546Sopenharmony_ci 984bf215546Sopenharmony_cibool 985bf215546Sopenharmony_cibackend_instruction::can_do_saturate() const 986bf215546Sopenharmony_ci{ 987bf215546Sopenharmony_ci switch (opcode) { 988bf215546Sopenharmony_ci case BRW_OPCODE_ADD: 989bf215546Sopenharmony_ci case BRW_OPCODE_ADD3: 990bf215546Sopenharmony_ci case BRW_OPCODE_ASR: 991bf215546Sopenharmony_ci case BRW_OPCODE_AVG: 992bf215546Sopenharmony_ci case BRW_OPCODE_CSEL: 993bf215546Sopenharmony_ci case BRW_OPCODE_DP2: 994bf215546Sopenharmony_ci case BRW_OPCODE_DP3: 995bf215546Sopenharmony_ci case BRW_OPCODE_DP4: 996bf215546Sopenharmony_ci case BRW_OPCODE_DPH: 997bf215546Sopenharmony_ci case BRW_OPCODE_DP4A: 998bf215546Sopenharmony_ci case BRW_OPCODE_F16TO32: 999bf215546Sopenharmony_ci case BRW_OPCODE_F32TO16: 1000bf215546Sopenharmony_ci case BRW_OPCODE_LINE: 1001bf215546Sopenharmony_ci case BRW_OPCODE_LRP: 1002bf215546Sopenharmony_ci case BRW_OPCODE_MAC: 1003bf215546Sopenharmony_ci case BRW_OPCODE_MAD: 1004bf215546Sopenharmony_ci case BRW_OPCODE_MATH: 1005bf215546Sopenharmony_ci case BRW_OPCODE_MOV: 1006bf215546Sopenharmony_ci case BRW_OPCODE_MUL: 1007bf215546Sopenharmony_ci case SHADER_OPCODE_MULH: 1008bf215546Sopenharmony_ci case BRW_OPCODE_PLN: 1009bf215546Sopenharmony_ci case BRW_OPCODE_RNDD: 1010bf215546Sopenharmony_ci case BRW_OPCODE_RNDE: 1011bf215546Sopenharmony_ci case BRW_OPCODE_RNDU: 1012bf215546Sopenharmony_ci case BRW_OPCODE_RNDZ: 1013bf215546Sopenharmony_ci case BRW_OPCODE_SEL: 1014bf215546Sopenharmony_ci case BRW_OPCODE_SHL: 1015bf215546Sopenharmony_ci case BRW_OPCODE_SHR: 1016bf215546Sopenharmony_ci case FS_OPCODE_LINTERP: 1017bf215546Sopenharmony_ci case SHADER_OPCODE_COS: 1018bf215546Sopenharmony_ci case SHADER_OPCODE_EXP2: 1019bf215546Sopenharmony_ci case SHADER_OPCODE_LOG2: 1020bf215546Sopenharmony_ci case SHADER_OPCODE_POW: 1021bf215546Sopenharmony_ci case SHADER_OPCODE_RCP: 1022bf215546Sopenharmony_ci case SHADER_OPCODE_RSQ: 1023bf215546Sopenharmony_ci case SHADER_OPCODE_SIN: 1024bf215546Sopenharmony_ci case SHADER_OPCODE_SQRT: 1025bf215546Sopenharmony_ci return true; 1026bf215546Sopenharmony_ci default: 1027bf215546Sopenharmony_ci return false; 1028bf215546Sopenharmony_ci } 1029bf215546Sopenharmony_ci} 1030bf215546Sopenharmony_ci 1031bf215546Sopenharmony_cibool 1032bf215546Sopenharmony_cibackend_instruction::can_do_cmod() const 1033bf215546Sopenharmony_ci{ 1034bf215546Sopenharmony_ci switch (opcode) { 1035bf215546Sopenharmony_ci case BRW_OPCODE_ADD: 1036bf215546Sopenharmony_ci case BRW_OPCODE_ADD3: 1037bf215546Sopenharmony_ci case BRW_OPCODE_ADDC: 1038bf215546Sopenharmony_ci case BRW_OPCODE_AND: 1039bf215546Sopenharmony_ci case BRW_OPCODE_ASR: 1040bf215546Sopenharmony_ci case BRW_OPCODE_AVG: 1041bf215546Sopenharmony_ci case BRW_OPCODE_CMP: 1042bf215546Sopenharmony_ci case BRW_OPCODE_CMPN: 1043bf215546Sopenharmony_ci case BRW_OPCODE_DP2: 1044bf215546Sopenharmony_ci case BRW_OPCODE_DP3: 1045bf215546Sopenharmony_ci case BRW_OPCODE_DP4: 1046bf215546Sopenharmony_ci case BRW_OPCODE_DPH: 1047bf215546Sopenharmony_ci case BRW_OPCODE_F16TO32: 1048bf215546Sopenharmony_ci case BRW_OPCODE_F32TO16: 1049bf215546Sopenharmony_ci case BRW_OPCODE_FRC: 1050bf215546Sopenharmony_ci case BRW_OPCODE_LINE: 1051bf215546Sopenharmony_ci case BRW_OPCODE_LRP: 1052bf215546Sopenharmony_ci case BRW_OPCODE_LZD: 1053bf215546Sopenharmony_ci case BRW_OPCODE_MAC: 1054bf215546Sopenharmony_ci case BRW_OPCODE_MACH: 1055bf215546Sopenharmony_ci case BRW_OPCODE_MAD: 1056bf215546Sopenharmony_ci case BRW_OPCODE_MOV: 1057bf215546Sopenharmony_ci case BRW_OPCODE_MUL: 1058bf215546Sopenharmony_ci case BRW_OPCODE_NOT: 1059bf215546Sopenharmony_ci case BRW_OPCODE_OR: 1060bf215546Sopenharmony_ci case BRW_OPCODE_PLN: 1061bf215546Sopenharmony_ci case BRW_OPCODE_RNDD: 1062bf215546Sopenharmony_ci case BRW_OPCODE_RNDE: 1063bf215546Sopenharmony_ci case BRW_OPCODE_RNDU: 1064bf215546Sopenharmony_ci case BRW_OPCODE_RNDZ: 1065bf215546Sopenharmony_ci case BRW_OPCODE_SAD2: 1066bf215546Sopenharmony_ci case BRW_OPCODE_SADA2: 1067bf215546Sopenharmony_ci case BRW_OPCODE_SHL: 1068bf215546Sopenharmony_ci case BRW_OPCODE_SHR: 1069bf215546Sopenharmony_ci case BRW_OPCODE_SUBB: 1070bf215546Sopenharmony_ci case BRW_OPCODE_XOR: 1071bf215546Sopenharmony_ci case FS_OPCODE_LINTERP: 1072bf215546Sopenharmony_ci return true; 1073bf215546Sopenharmony_ci default: 1074bf215546Sopenharmony_ci return false; 1075bf215546Sopenharmony_ci } 1076bf215546Sopenharmony_ci} 1077bf215546Sopenharmony_ci 1078bf215546Sopenharmony_cibool 1079bf215546Sopenharmony_cibackend_instruction::reads_accumulator_implicitly() const 1080bf215546Sopenharmony_ci{ 1081bf215546Sopenharmony_ci switch (opcode) { 1082bf215546Sopenharmony_ci case BRW_OPCODE_MAC: 1083bf215546Sopenharmony_ci case BRW_OPCODE_MACH: 1084bf215546Sopenharmony_ci case BRW_OPCODE_SADA2: 1085bf215546Sopenharmony_ci return true; 1086bf215546Sopenharmony_ci default: 1087bf215546Sopenharmony_ci return false; 1088bf215546Sopenharmony_ci } 1089bf215546Sopenharmony_ci} 1090bf215546Sopenharmony_ci 1091bf215546Sopenharmony_cibool 1092bf215546Sopenharmony_cibackend_instruction::writes_accumulator_implicitly(const struct intel_device_info *devinfo) const 1093bf215546Sopenharmony_ci{ 1094bf215546Sopenharmony_ci return writes_accumulator || 1095bf215546Sopenharmony_ci (devinfo->ver < 6 && 1096bf215546Sopenharmony_ci ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) || 1097bf215546Sopenharmony_ci (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP))) || 1098bf215546Sopenharmony_ci (opcode == FS_OPCODE_LINTERP && 1099bf215546Sopenharmony_ci (!devinfo->has_pln || devinfo->ver <= 6)) || 1100bf215546Sopenharmony_ci (eot && devinfo->ver >= 12); /* See Wa_14010017096. */ 1101bf215546Sopenharmony_ci} 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_cibool 1104bf215546Sopenharmony_cibackend_instruction::has_side_effects() const 1105bf215546Sopenharmony_ci{ 1106bf215546Sopenharmony_ci switch (opcode) { 1107bf215546Sopenharmony_ci case SHADER_OPCODE_SEND: 1108bf215546Sopenharmony_ci return send_has_side_effects; 1109bf215546Sopenharmony_ci 1110bf215546Sopenharmony_ci case BRW_OPCODE_SYNC: 1111bf215546Sopenharmony_ci case VEC4_OPCODE_UNTYPED_ATOMIC: 1112bf215546Sopenharmony_ci case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: 1113bf215546Sopenharmony_ci case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: 1114bf215546Sopenharmony_ci case SHADER_OPCODE_GFX4_SCRATCH_WRITE: 1115bf215546Sopenharmony_ci case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: 1116bf215546Sopenharmony_ci case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: 1117bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: 1118bf215546Sopenharmony_ci case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: 1119bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: 1120bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL: 1121bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: 1122bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL: 1123bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL: 1124bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL: 1125bf215546Sopenharmony_ci case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: 1126bf215546Sopenharmony_ci case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: 1127bf215546Sopenharmony_ci case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: 1128bf215546Sopenharmony_ci case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: 1129bf215546Sopenharmony_ci case SHADER_OPCODE_MEMORY_FENCE: 1130bf215546Sopenharmony_ci case SHADER_OPCODE_INTERLOCK: 1131bf215546Sopenharmony_ci case SHADER_OPCODE_URB_WRITE_LOGICAL: 1132bf215546Sopenharmony_ci case FS_OPCODE_FB_WRITE: 1133bf215546Sopenharmony_ci case FS_OPCODE_FB_WRITE_LOGICAL: 1134bf215546Sopenharmony_ci case FS_OPCODE_REP_FB_WRITE: 1135bf215546Sopenharmony_ci case SHADER_OPCODE_BARRIER: 1136bf215546Sopenharmony_ci case VEC4_TCS_OPCODE_URB_WRITE: 1137bf215546Sopenharmony_ci case TCS_OPCODE_RELEASE_INPUT: 1138bf215546Sopenharmony_ci case SHADER_OPCODE_RND_MODE: 1139bf215546Sopenharmony_ci case SHADER_OPCODE_FLOAT_CONTROL_MODE: 1140bf215546Sopenharmony_ci case FS_OPCODE_SCHEDULING_FENCE: 1141bf215546Sopenharmony_ci case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL: 1142bf215546Sopenharmony_ci case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: 1143bf215546Sopenharmony_ci case SHADER_OPCODE_BTD_SPAWN_LOGICAL: 1144bf215546Sopenharmony_ci case SHADER_OPCODE_BTD_RETIRE_LOGICAL: 1145bf215546Sopenharmony_ci case RT_OPCODE_TRACE_RAY_LOGICAL: 1146bf215546Sopenharmony_ci case VEC4_OPCODE_ZERO_OOB_PUSH_REGS: 1147bf215546Sopenharmony_ci return true; 1148bf215546Sopenharmony_ci default: 1149bf215546Sopenharmony_ci return eot; 1150bf215546Sopenharmony_ci } 1151bf215546Sopenharmony_ci} 1152bf215546Sopenharmony_ci 1153bf215546Sopenharmony_cibool 1154bf215546Sopenharmony_cibackend_instruction::is_volatile() const 1155bf215546Sopenharmony_ci{ 1156bf215546Sopenharmony_ci switch (opcode) { 1157bf215546Sopenharmony_ci case SHADER_OPCODE_SEND: 1158bf215546Sopenharmony_ci return send_is_volatile; 1159bf215546Sopenharmony_ci 1160bf215546Sopenharmony_ci case VEC4_OPCODE_UNTYPED_SURFACE_READ: 1161bf215546Sopenharmony_ci case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: 1162bf215546Sopenharmony_ci case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: 1163bf215546Sopenharmony_ci case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: 1164bf215546Sopenharmony_ci case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: 1165bf215546Sopenharmony_ci case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: 1166bf215546Sopenharmony_ci case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: 1167bf215546Sopenharmony_ci case VEC4_OPCODE_URB_READ: 1168bf215546Sopenharmony_ci return true; 1169bf215546Sopenharmony_ci default: 1170bf215546Sopenharmony_ci return false; 1171bf215546Sopenharmony_ci } 1172bf215546Sopenharmony_ci} 1173bf215546Sopenharmony_ci 1174bf215546Sopenharmony_ci#ifndef NDEBUG 1175bf215546Sopenharmony_cistatic bool 1176bf215546Sopenharmony_ciinst_is_in_block(const bblock_t *block, const backend_instruction *inst) 1177bf215546Sopenharmony_ci{ 1178bf215546Sopenharmony_ci foreach_inst_in_block (backend_instruction, i, block) { 1179bf215546Sopenharmony_ci if (inst == i) 1180bf215546Sopenharmony_ci return true; 1181bf215546Sopenharmony_ci } 1182bf215546Sopenharmony_ci return false; 1183bf215546Sopenharmony_ci} 1184bf215546Sopenharmony_ci#endif 1185bf215546Sopenharmony_ci 1186bf215546Sopenharmony_cistatic void 1187bf215546Sopenharmony_ciadjust_later_block_ips(bblock_t *start_block, int ip_adjustment) 1188bf215546Sopenharmony_ci{ 1189bf215546Sopenharmony_ci for (bblock_t *block_iter = start_block->next(); 1190bf215546Sopenharmony_ci block_iter; 1191bf215546Sopenharmony_ci block_iter = block_iter->next()) { 1192bf215546Sopenharmony_ci block_iter->start_ip += ip_adjustment; 1193bf215546Sopenharmony_ci block_iter->end_ip += ip_adjustment; 1194bf215546Sopenharmony_ci } 1195bf215546Sopenharmony_ci} 1196bf215546Sopenharmony_ci 1197bf215546Sopenharmony_civoid 1198bf215546Sopenharmony_cibackend_instruction::insert_after(bblock_t *block, backend_instruction *inst) 1199bf215546Sopenharmony_ci{ 1200bf215546Sopenharmony_ci assert(this != inst); 1201bf215546Sopenharmony_ci assert(block->end_ip_delta == 0); 1202bf215546Sopenharmony_ci 1203bf215546Sopenharmony_ci if (!this->is_head_sentinel()) 1204bf215546Sopenharmony_ci assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1205bf215546Sopenharmony_ci 1206bf215546Sopenharmony_ci block->end_ip++; 1207bf215546Sopenharmony_ci 1208bf215546Sopenharmony_ci adjust_later_block_ips(block, 1); 1209bf215546Sopenharmony_ci 1210bf215546Sopenharmony_ci exec_node::insert_after(inst); 1211bf215546Sopenharmony_ci} 1212bf215546Sopenharmony_ci 1213bf215546Sopenharmony_civoid 1214bf215546Sopenharmony_cibackend_instruction::insert_before(bblock_t *block, backend_instruction *inst) 1215bf215546Sopenharmony_ci{ 1216bf215546Sopenharmony_ci assert(this != inst); 1217bf215546Sopenharmony_ci assert(block->end_ip_delta == 0); 1218bf215546Sopenharmony_ci 1219bf215546Sopenharmony_ci if (!this->is_tail_sentinel()) 1220bf215546Sopenharmony_ci assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1221bf215546Sopenharmony_ci 1222bf215546Sopenharmony_ci block->end_ip++; 1223bf215546Sopenharmony_ci 1224bf215546Sopenharmony_ci adjust_later_block_ips(block, 1); 1225bf215546Sopenharmony_ci 1226bf215546Sopenharmony_ci exec_node::insert_before(inst); 1227bf215546Sopenharmony_ci} 1228bf215546Sopenharmony_ci 1229bf215546Sopenharmony_civoid 1230bf215546Sopenharmony_cibackend_instruction::insert_before(bblock_t *block, exec_list *list) 1231bf215546Sopenharmony_ci{ 1232bf215546Sopenharmony_ci assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1233bf215546Sopenharmony_ci assert(block->end_ip_delta == 0); 1234bf215546Sopenharmony_ci 1235bf215546Sopenharmony_ci unsigned num_inst = list->length(); 1236bf215546Sopenharmony_ci 1237bf215546Sopenharmony_ci block->end_ip += num_inst; 1238bf215546Sopenharmony_ci 1239bf215546Sopenharmony_ci adjust_later_block_ips(block, num_inst); 1240bf215546Sopenharmony_ci 1241bf215546Sopenharmony_ci exec_node::insert_before(list); 1242bf215546Sopenharmony_ci} 1243bf215546Sopenharmony_ci 1244bf215546Sopenharmony_civoid 1245bf215546Sopenharmony_cibackend_instruction::remove(bblock_t *block, bool defer_later_block_ip_updates) 1246bf215546Sopenharmony_ci{ 1247bf215546Sopenharmony_ci assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1248bf215546Sopenharmony_ci 1249bf215546Sopenharmony_ci if (defer_later_block_ip_updates) { 1250bf215546Sopenharmony_ci block->end_ip_delta--; 1251bf215546Sopenharmony_ci } else { 1252bf215546Sopenharmony_ci assert(block->end_ip_delta == 0); 1253bf215546Sopenharmony_ci adjust_later_block_ips(block, -1); 1254bf215546Sopenharmony_ci } 1255bf215546Sopenharmony_ci 1256bf215546Sopenharmony_ci if (block->start_ip == block->end_ip) { 1257bf215546Sopenharmony_ci if (block->end_ip_delta != 0) { 1258bf215546Sopenharmony_ci adjust_later_block_ips(block, block->end_ip_delta); 1259bf215546Sopenharmony_ci block->end_ip_delta = 0; 1260bf215546Sopenharmony_ci } 1261bf215546Sopenharmony_ci 1262bf215546Sopenharmony_ci block->cfg->remove_block(block); 1263bf215546Sopenharmony_ci } else { 1264bf215546Sopenharmony_ci block->end_ip--; 1265bf215546Sopenharmony_ci } 1266bf215546Sopenharmony_ci 1267bf215546Sopenharmony_ci exec_node::remove(); 1268bf215546Sopenharmony_ci} 1269bf215546Sopenharmony_ci 1270bf215546Sopenharmony_civoid 1271bf215546Sopenharmony_cibackend_shader::dump_instructions() const 1272bf215546Sopenharmony_ci{ 1273bf215546Sopenharmony_ci dump_instructions(NULL); 1274bf215546Sopenharmony_ci} 1275bf215546Sopenharmony_ci 1276bf215546Sopenharmony_civoid 1277bf215546Sopenharmony_cibackend_shader::dump_instructions(const char *name) const 1278bf215546Sopenharmony_ci{ 1279bf215546Sopenharmony_ci FILE *file = stderr; 1280bf215546Sopenharmony_ci if (name && geteuid() != 0) { 1281bf215546Sopenharmony_ci file = fopen(name, "w"); 1282bf215546Sopenharmony_ci if (!file) 1283bf215546Sopenharmony_ci file = stderr; 1284bf215546Sopenharmony_ci } 1285bf215546Sopenharmony_ci 1286bf215546Sopenharmony_ci if (cfg) { 1287bf215546Sopenharmony_ci int ip = 0; 1288bf215546Sopenharmony_ci foreach_block_and_inst(block, backend_instruction, inst, cfg) { 1289bf215546Sopenharmony_ci if (!INTEL_DEBUG(DEBUG_OPTIMIZER)) 1290bf215546Sopenharmony_ci fprintf(file, "%4d: ", ip++); 1291bf215546Sopenharmony_ci dump_instruction(inst, file); 1292bf215546Sopenharmony_ci } 1293bf215546Sopenharmony_ci } else { 1294bf215546Sopenharmony_ci int ip = 0; 1295bf215546Sopenharmony_ci foreach_in_list(backend_instruction, inst, &instructions) { 1296bf215546Sopenharmony_ci if (!INTEL_DEBUG(DEBUG_OPTIMIZER)) 1297bf215546Sopenharmony_ci fprintf(file, "%4d: ", ip++); 1298bf215546Sopenharmony_ci dump_instruction(inst, file); 1299bf215546Sopenharmony_ci } 1300bf215546Sopenharmony_ci } 1301bf215546Sopenharmony_ci 1302bf215546Sopenharmony_ci if (file != stderr) { 1303bf215546Sopenharmony_ci fclose(file); 1304bf215546Sopenharmony_ci } 1305bf215546Sopenharmony_ci} 1306bf215546Sopenharmony_ci 1307bf215546Sopenharmony_civoid 1308bf215546Sopenharmony_cibackend_shader::calculate_cfg() 1309bf215546Sopenharmony_ci{ 1310bf215546Sopenharmony_ci if (this->cfg) 1311bf215546Sopenharmony_ci return; 1312bf215546Sopenharmony_ci cfg = new(mem_ctx) cfg_t(this, &this->instructions); 1313bf215546Sopenharmony_ci} 1314bf215546Sopenharmony_ci 1315bf215546Sopenharmony_civoid 1316bf215546Sopenharmony_cibackend_shader::invalidate_analysis(brw::analysis_dependency_class c) 1317bf215546Sopenharmony_ci{ 1318bf215546Sopenharmony_ci idom_analysis.invalidate(c); 1319bf215546Sopenharmony_ci} 1320bf215546Sopenharmony_ci 1321bf215546Sopenharmony_ciextern "C" const unsigned * 1322bf215546Sopenharmony_cibrw_compile_tes(const struct brw_compiler *compiler, 1323bf215546Sopenharmony_ci void *mem_ctx, 1324bf215546Sopenharmony_ci brw_compile_tes_params *params) 1325bf215546Sopenharmony_ci{ 1326bf215546Sopenharmony_ci const struct intel_device_info *devinfo = compiler->devinfo; 1327bf215546Sopenharmony_ci nir_shader *nir = params->nir; 1328bf215546Sopenharmony_ci const struct brw_tes_prog_key *key = params->key; 1329bf215546Sopenharmony_ci const struct brw_vue_map *input_vue_map = params->input_vue_map; 1330bf215546Sopenharmony_ci struct brw_tes_prog_data *prog_data = params->prog_data; 1331bf215546Sopenharmony_ci 1332bf215546Sopenharmony_ci const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL]; 1333bf215546Sopenharmony_ci const bool debug_enabled = INTEL_DEBUG(DEBUG_TES); 1334bf215546Sopenharmony_ci const unsigned *assembly; 1335bf215546Sopenharmony_ci 1336bf215546Sopenharmony_ci prog_data->base.base.stage = MESA_SHADER_TESS_EVAL; 1337bf215546Sopenharmony_ci prog_data->base.base.ray_queries = nir->info.ray_queries; 1338bf215546Sopenharmony_ci 1339bf215546Sopenharmony_ci nir->info.inputs_read = key->inputs_read; 1340bf215546Sopenharmony_ci nir->info.patch_inputs_read = key->patch_inputs_read; 1341bf215546Sopenharmony_ci 1342bf215546Sopenharmony_ci brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar); 1343bf215546Sopenharmony_ci brw_nir_lower_tes_inputs(nir, input_vue_map); 1344bf215546Sopenharmony_ci brw_nir_lower_vue_outputs(nir); 1345bf215546Sopenharmony_ci brw_postprocess_nir(nir, compiler, is_scalar, debug_enabled, 1346bf215546Sopenharmony_ci key->base.robust_buffer_access); 1347bf215546Sopenharmony_ci 1348bf215546Sopenharmony_ci brw_compute_vue_map(devinfo, &prog_data->base.vue_map, 1349bf215546Sopenharmony_ci nir->info.outputs_written, 1350bf215546Sopenharmony_ci nir->info.separate_shader, 1); 1351bf215546Sopenharmony_ci 1352bf215546Sopenharmony_ci unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4; 1353bf215546Sopenharmony_ci 1354bf215546Sopenharmony_ci assert(output_size_bytes >= 1); 1355bf215546Sopenharmony_ci if (output_size_bytes > GFX7_MAX_DS_URB_ENTRY_SIZE_BYTES) { 1356bf215546Sopenharmony_ci params->error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size"); 1357bf215546Sopenharmony_ci return NULL; 1358bf215546Sopenharmony_ci } 1359bf215546Sopenharmony_ci 1360bf215546Sopenharmony_ci prog_data->base.clip_distance_mask = 1361bf215546Sopenharmony_ci ((1 << nir->info.clip_distance_array_size) - 1); 1362bf215546Sopenharmony_ci prog_data->base.cull_distance_mask = 1363bf215546Sopenharmony_ci ((1 << nir->info.cull_distance_array_size) - 1) << 1364bf215546Sopenharmony_ci nir->info.clip_distance_array_size; 1365bf215546Sopenharmony_ci 1366bf215546Sopenharmony_ci prog_data->include_primitive_id = 1367bf215546Sopenharmony_ci BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID); 1368bf215546Sopenharmony_ci 1369bf215546Sopenharmony_ci /* URB entry sizes are stored as a multiple of 64 bytes. */ 1370bf215546Sopenharmony_ci prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; 1371bf215546Sopenharmony_ci 1372bf215546Sopenharmony_ci prog_data->base.urb_read_length = 0; 1373bf215546Sopenharmony_ci 1374bf215546Sopenharmony_ci STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1); 1375bf215546Sopenharmony_ci STATIC_ASSERT(BRW_TESS_PARTITIONING_ODD_FRACTIONAL == 1376bf215546Sopenharmony_ci TESS_SPACING_FRACTIONAL_ODD - 1); 1377bf215546Sopenharmony_ci STATIC_ASSERT(BRW_TESS_PARTITIONING_EVEN_FRACTIONAL == 1378bf215546Sopenharmony_ci TESS_SPACING_FRACTIONAL_EVEN - 1); 1379bf215546Sopenharmony_ci 1380bf215546Sopenharmony_ci prog_data->partitioning = 1381bf215546Sopenharmony_ci (enum brw_tess_partitioning) (nir->info.tess.spacing - 1); 1382bf215546Sopenharmony_ci 1383bf215546Sopenharmony_ci switch (nir->info.tess._primitive_mode) { 1384bf215546Sopenharmony_ci case TESS_PRIMITIVE_QUADS: 1385bf215546Sopenharmony_ci prog_data->domain = BRW_TESS_DOMAIN_QUAD; 1386bf215546Sopenharmony_ci break; 1387bf215546Sopenharmony_ci case TESS_PRIMITIVE_TRIANGLES: 1388bf215546Sopenharmony_ci prog_data->domain = BRW_TESS_DOMAIN_TRI; 1389bf215546Sopenharmony_ci break; 1390bf215546Sopenharmony_ci case TESS_PRIMITIVE_ISOLINES: 1391bf215546Sopenharmony_ci prog_data->domain = BRW_TESS_DOMAIN_ISOLINE; 1392bf215546Sopenharmony_ci break; 1393bf215546Sopenharmony_ci default: 1394bf215546Sopenharmony_ci unreachable("invalid domain shader primitive mode"); 1395bf215546Sopenharmony_ci } 1396bf215546Sopenharmony_ci 1397bf215546Sopenharmony_ci if (nir->info.tess.point_mode) { 1398bf215546Sopenharmony_ci prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT; 1399bf215546Sopenharmony_ci } else if (nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) { 1400bf215546Sopenharmony_ci prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE; 1401bf215546Sopenharmony_ci } else { 1402bf215546Sopenharmony_ci /* Hardware winding order is backwards from OpenGL */ 1403bf215546Sopenharmony_ci prog_data->output_topology = 1404bf215546Sopenharmony_ci nir->info.tess.ccw ? BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW 1405bf215546Sopenharmony_ci : BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW; 1406bf215546Sopenharmony_ci } 1407bf215546Sopenharmony_ci 1408bf215546Sopenharmony_ci if (unlikely(debug_enabled)) { 1409bf215546Sopenharmony_ci fprintf(stderr, "TES Input "); 1410bf215546Sopenharmony_ci brw_print_vue_map(stderr, input_vue_map, MESA_SHADER_TESS_EVAL); 1411bf215546Sopenharmony_ci fprintf(stderr, "TES Output "); 1412bf215546Sopenharmony_ci brw_print_vue_map(stderr, &prog_data->base.vue_map, 1413bf215546Sopenharmony_ci MESA_SHADER_TESS_EVAL); 1414bf215546Sopenharmony_ci } 1415bf215546Sopenharmony_ci 1416bf215546Sopenharmony_ci if (is_scalar) { 1417bf215546Sopenharmony_ci fs_visitor v(compiler, params->log_data, mem_ctx, &key->base, 1418bf215546Sopenharmony_ci &prog_data->base.base, nir, 8, 1419bf215546Sopenharmony_ci debug_enabled); 1420bf215546Sopenharmony_ci if (!v.run_tes()) { 1421bf215546Sopenharmony_ci params->error_str = ralloc_strdup(mem_ctx, v.fail_msg); 1422bf215546Sopenharmony_ci return NULL; 1423bf215546Sopenharmony_ci } 1424bf215546Sopenharmony_ci 1425bf215546Sopenharmony_ci prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; 1426bf215546Sopenharmony_ci prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; 1427bf215546Sopenharmony_ci 1428bf215546Sopenharmony_ci fs_generator g(compiler, params->log_data, mem_ctx, 1429bf215546Sopenharmony_ci &prog_data->base.base, false, MESA_SHADER_TESS_EVAL); 1430bf215546Sopenharmony_ci if (unlikely(debug_enabled)) { 1431bf215546Sopenharmony_ci g.enable_debug(ralloc_asprintf(mem_ctx, 1432bf215546Sopenharmony_ci "%s tessellation evaluation shader %s", 1433bf215546Sopenharmony_ci nir->info.label ? nir->info.label 1434bf215546Sopenharmony_ci : "unnamed", 1435bf215546Sopenharmony_ci nir->info.name)); 1436bf215546Sopenharmony_ci } 1437bf215546Sopenharmony_ci 1438bf215546Sopenharmony_ci g.generate_code(v.cfg, 8, v.shader_stats, 1439bf215546Sopenharmony_ci v.performance_analysis.require(), params->stats); 1440bf215546Sopenharmony_ci 1441bf215546Sopenharmony_ci g.add_const_data(nir->constant_data, nir->constant_data_size); 1442bf215546Sopenharmony_ci 1443bf215546Sopenharmony_ci assembly = g.get_assembly(); 1444bf215546Sopenharmony_ci } else { 1445bf215546Sopenharmony_ci brw::vec4_tes_visitor v(compiler, params->log_data, key, prog_data, 1446bf215546Sopenharmony_ci nir, mem_ctx, debug_enabled); 1447bf215546Sopenharmony_ci if (!v.run()) { 1448bf215546Sopenharmony_ci params->error_str = ralloc_strdup(mem_ctx, v.fail_msg); 1449bf215546Sopenharmony_ci return NULL; 1450bf215546Sopenharmony_ci } 1451bf215546Sopenharmony_ci 1452bf215546Sopenharmony_ci if (unlikely(debug_enabled)) 1453bf215546Sopenharmony_ci v.dump_instructions(); 1454bf215546Sopenharmony_ci 1455bf215546Sopenharmony_ci assembly = brw_vec4_generate_assembly(compiler, params->log_data, mem_ctx, nir, 1456bf215546Sopenharmony_ci &prog_data->base, v.cfg, 1457bf215546Sopenharmony_ci v.performance_analysis.require(), 1458bf215546Sopenharmony_ci params->stats, debug_enabled); 1459bf215546Sopenharmony_ci } 1460bf215546Sopenharmony_ci 1461bf215546Sopenharmony_ci return assembly; 1462bf215546Sopenharmony_ci} 1463