1bf215546Sopenharmony_ci/* Copyright © 2011 Intel Corporation 2bf215546Sopenharmony_ci * 3bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 4bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 5bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 6bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 7bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 8bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 9bf215546Sopenharmony_ci * 10bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 11bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 12bf215546Sopenharmony_ci * Software. 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20bf215546Sopenharmony_ci * IN THE SOFTWARE. 21bf215546Sopenharmony_ci */ 22bf215546Sopenharmony_ci 23bf215546Sopenharmony_ci#include "brw_vec4.h" 24bf215546Sopenharmony_ci#include "brw_cfg.h" 25bf215546Sopenharmony_ci#include "brw_eu.h" 26bf215546Sopenharmony_ci#include "dev/intel_debug.h" 27bf215546Sopenharmony_ci#include "util/mesa-sha1.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ciusing namespace brw; 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_cistatic void 32bf215546Sopenharmony_cigenerate_math1_gfx4(struct brw_codegen *p, 33bf215546Sopenharmony_ci vec4_instruction *inst, 34bf215546Sopenharmony_ci struct brw_reg dst, 35bf215546Sopenharmony_ci struct brw_reg src) 36bf215546Sopenharmony_ci{ 37bf215546Sopenharmony_ci gfx4_math(p, 38bf215546Sopenharmony_ci dst, 39bf215546Sopenharmony_ci brw_math_function(inst->opcode), 40bf215546Sopenharmony_ci inst->base_mrf, 41bf215546Sopenharmony_ci src, 42bf215546Sopenharmony_ci BRW_MATH_PRECISION_FULL); 43bf215546Sopenharmony_ci} 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_cistatic void 46bf215546Sopenharmony_cicheck_gfx6_math_src_arg(struct brw_reg src) 47bf215546Sopenharmony_ci{ 48bf215546Sopenharmony_ci /* Source swizzles are ignored. */ 49bf215546Sopenharmony_ci assert(!src.abs); 50bf215546Sopenharmony_ci assert(!src.negate); 51bf215546Sopenharmony_ci assert(src.swizzle == BRW_SWIZZLE_XYZW); 52bf215546Sopenharmony_ci} 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_cistatic void 55bf215546Sopenharmony_cigenerate_math_gfx6(struct brw_codegen *p, 56bf215546Sopenharmony_ci vec4_instruction *inst, 57bf215546Sopenharmony_ci struct brw_reg dst, 58bf215546Sopenharmony_ci struct brw_reg src0, 59bf215546Sopenharmony_ci struct brw_reg src1) 60bf215546Sopenharmony_ci{ 61bf215546Sopenharmony_ci /* Can't do writemask because math can't be align16. */ 62bf215546Sopenharmony_ci assert(dst.writemask == WRITEMASK_XYZW); 63bf215546Sopenharmony_ci /* Source swizzles are ignored. */ 64bf215546Sopenharmony_ci check_gfx6_math_src_arg(src0); 65bf215546Sopenharmony_ci if (src1.file == BRW_GENERAL_REGISTER_FILE) 66bf215546Sopenharmony_ci check_gfx6_math_src_arg(src1); 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 69bf215546Sopenharmony_ci gfx6_math(p, dst, brw_math_function(inst->opcode), src0, src1); 70bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_16); 71bf215546Sopenharmony_ci} 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_cistatic void 74bf215546Sopenharmony_cigenerate_math2_gfx4(struct brw_codegen *p, 75bf215546Sopenharmony_ci vec4_instruction *inst, 76bf215546Sopenharmony_ci struct brw_reg dst, 77bf215546Sopenharmony_ci struct brw_reg src0, 78bf215546Sopenharmony_ci struct brw_reg src1) 79bf215546Sopenharmony_ci{ 80bf215546Sopenharmony_ci /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13 81bf215546Sopenharmony_ci * "Message Payload": 82bf215546Sopenharmony_ci * 83bf215546Sopenharmony_ci * "Operand0[7]. For the INT DIV functions, this operand is the 84bf215546Sopenharmony_ci * denominator." 85bf215546Sopenharmony_ci * ... 86bf215546Sopenharmony_ci * "Operand1[7]. For the INT DIV functions, this operand is the 87bf215546Sopenharmony_ci * numerator." 88bf215546Sopenharmony_ci */ 89bf215546Sopenharmony_ci bool is_int_div = inst->opcode != SHADER_OPCODE_POW; 90bf215546Sopenharmony_ci struct brw_reg &op0 = is_int_div ? src1 : src0; 91bf215546Sopenharmony_ci struct brw_reg &op1 = is_int_div ? src0 : src1; 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci brw_push_insn_state(p); 94bf215546Sopenharmony_ci brw_set_default_saturate(p, false); 95bf215546Sopenharmony_ci brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 96bf215546Sopenharmony_ci brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1); 97bf215546Sopenharmony_ci brw_pop_insn_state(p); 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci gfx4_math(p, 100bf215546Sopenharmony_ci dst, 101bf215546Sopenharmony_ci brw_math_function(inst->opcode), 102bf215546Sopenharmony_ci inst->base_mrf, 103bf215546Sopenharmony_ci op0, 104bf215546Sopenharmony_ci BRW_MATH_PRECISION_FULL); 105bf215546Sopenharmony_ci} 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_cistatic void 108bf215546Sopenharmony_cigenerate_tex(struct brw_codegen *p, 109bf215546Sopenharmony_ci struct brw_vue_prog_data *prog_data, 110bf215546Sopenharmony_ci gl_shader_stage stage, 111bf215546Sopenharmony_ci vec4_instruction *inst, 112bf215546Sopenharmony_ci struct brw_reg dst, 113bf215546Sopenharmony_ci struct brw_reg src, 114bf215546Sopenharmony_ci struct brw_reg surface_index, 115bf215546Sopenharmony_ci struct brw_reg sampler_index) 116bf215546Sopenharmony_ci{ 117bf215546Sopenharmony_ci const struct intel_device_info *devinfo = p->devinfo; 118bf215546Sopenharmony_ci int msg_type = -1; 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci if (devinfo->ver >= 5) { 121bf215546Sopenharmony_ci switch (inst->opcode) { 122bf215546Sopenharmony_ci case SHADER_OPCODE_TEX: 123bf215546Sopenharmony_ci case SHADER_OPCODE_TXL: 124bf215546Sopenharmony_ci if (inst->shadow_compare) { 125bf215546Sopenharmony_ci msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; 126bf215546Sopenharmony_ci } else { 127bf215546Sopenharmony_ci msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_LOD; 128bf215546Sopenharmony_ci } 129bf215546Sopenharmony_ci break; 130bf215546Sopenharmony_ci case SHADER_OPCODE_TXD: 131bf215546Sopenharmony_ci if (inst->shadow_compare) { 132bf215546Sopenharmony_ci /* Gfx7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */ 133bf215546Sopenharmony_ci assert(devinfo->verx10 == 75); 134bf215546Sopenharmony_ci msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE; 135bf215546Sopenharmony_ci } else { 136bf215546Sopenharmony_ci msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_DERIVS; 137bf215546Sopenharmony_ci } 138bf215546Sopenharmony_ci break; 139bf215546Sopenharmony_ci case SHADER_OPCODE_TXF: 140bf215546Sopenharmony_ci msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_LD; 141bf215546Sopenharmony_ci break; 142bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_CMS: 143bf215546Sopenharmony_ci if (devinfo->ver >= 7) 144bf215546Sopenharmony_ci msg_type = GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; 145bf215546Sopenharmony_ci else 146bf215546Sopenharmony_ci msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_LD; 147bf215546Sopenharmony_ci break; 148bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_MCS: 149bf215546Sopenharmony_ci assert(devinfo->ver >= 7); 150bf215546Sopenharmony_ci msg_type = GFX7_SAMPLER_MESSAGE_SAMPLE_LD_MCS; 151bf215546Sopenharmony_ci break; 152bf215546Sopenharmony_ci case SHADER_OPCODE_TXS: 153bf215546Sopenharmony_ci msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO; 154bf215546Sopenharmony_ci break; 155bf215546Sopenharmony_ci case SHADER_OPCODE_TG4: 156bf215546Sopenharmony_ci if (inst->shadow_compare) { 157bf215546Sopenharmony_ci msg_type = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C; 158bf215546Sopenharmony_ci } else { 159bf215546Sopenharmony_ci msg_type = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4; 160bf215546Sopenharmony_ci } 161bf215546Sopenharmony_ci break; 162bf215546Sopenharmony_ci case SHADER_OPCODE_TG4_OFFSET: 163bf215546Sopenharmony_ci if (inst->shadow_compare) { 164bf215546Sopenharmony_ci msg_type = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C; 165bf215546Sopenharmony_ci } else { 166bf215546Sopenharmony_ci msg_type = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO; 167bf215546Sopenharmony_ci } 168bf215546Sopenharmony_ci break; 169bf215546Sopenharmony_ci case SHADER_OPCODE_SAMPLEINFO: 170bf215546Sopenharmony_ci msg_type = GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO; 171bf215546Sopenharmony_ci break; 172bf215546Sopenharmony_ci default: 173bf215546Sopenharmony_ci unreachable("should not get here: invalid vec4 texture opcode"); 174bf215546Sopenharmony_ci } 175bf215546Sopenharmony_ci } else { 176bf215546Sopenharmony_ci switch (inst->opcode) { 177bf215546Sopenharmony_ci case SHADER_OPCODE_TEX: 178bf215546Sopenharmony_ci case SHADER_OPCODE_TXL: 179bf215546Sopenharmony_ci if (inst->shadow_compare) { 180bf215546Sopenharmony_ci msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE; 181bf215546Sopenharmony_ci assert(inst->mlen == 3); 182bf215546Sopenharmony_ci } else { 183bf215546Sopenharmony_ci msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD; 184bf215546Sopenharmony_ci assert(inst->mlen == 2); 185bf215546Sopenharmony_ci } 186bf215546Sopenharmony_ci break; 187bf215546Sopenharmony_ci case SHADER_OPCODE_TXD: 188bf215546Sopenharmony_ci /* There is no sample_d_c message; comparisons are done manually. */ 189bf215546Sopenharmony_ci msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS; 190bf215546Sopenharmony_ci assert(inst->mlen == 4); 191bf215546Sopenharmony_ci break; 192bf215546Sopenharmony_ci case SHADER_OPCODE_TXF: 193bf215546Sopenharmony_ci msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD; 194bf215546Sopenharmony_ci assert(inst->mlen == 2); 195bf215546Sopenharmony_ci break; 196bf215546Sopenharmony_ci case SHADER_OPCODE_TXS: 197bf215546Sopenharmony_ci msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO; 198bf215546Sopenharmony_ci assert(inst->mlen == 2); 199bf215546Sopenharmony_ci break; 200bf215546Sopenharmony_ci default: 201bf215546Sopenharmony_ci unreachable("should not get here: invalid vec4 texture opcode"); 202bf215546Sopenharmony_ci } 203bf215546Sopenharmony_ci } 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci assert(msg_type != -1); 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci assert(sampler_index.type == BRW_REGISTER_TYPE_UD); 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci /* Load the message header if present. If there's a texture offset, we need 210bf215546Sopenharmony_ci * to set it up explicitly and load the offset bitfield. Otherwise, we can 211bf215546Sopenharmony_ci * use an implied move from g0 to the first message register. 212bf215546Sopenharmony_ci */ 213bf215546Sopenharmony_ci if (inst->header_size != 0) { 214bf215546Sopenharmony_ci if (devinfo->ver < 6 && !inst->offset) { 215bf215546Sopenharmony_ci /* Set up an implied move from g0 to the MRF. */ 216bf215546Sopenharmony_ci src = brw_vec8_grf(0, 0); 217bf215546Sopenharmony_ci } else { 218bf215546Sopenharmony_ci struct brw_reg header = 219bf215546Sopenharmony_ci retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD); 220bf215546Sopenharmony_ci uint32_t dw2 = 0; 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_ci /* Explicitly set up the message header by copying g0 to the MRF. */ 223bf215546Sopenharmony_ci brw_push_insn_state(p); 224bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 225bf215546Sopenharmony_ci brw_MOV(p, header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci if (inst->offset) 230bf215546Sopenharmony_ci /* Set the texel offset bits in DWord 2. */ 231bf215546Sopenharmony_ci dw2 = inst->offset; 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci /* The VS, DS, and FS stages have the g0.2 payload delivered as 0, 234bf215546Sopenharmony_ci * so header0.2 is 0 when g0 is copied. The HS and GS stages do 235bf215546Sopenharmony_ci * not, so we must set to to 0 to avoid setting undesirable bits 236bf215546Sopenharmony_ci * in the message header. 237bf215546Sopenharmony_ci */ 238bf215546Sopenharmony_ci if (dw2 || 239bf215546Sopenharmony_ci stage == MESA_SHADER_TESS_CTRL || 240bf215546Sopenharmony_ci stage == MESA_SHADER_GEOMETRY) { 241bf215546Sopenharmony_ci brw_MOV(p, get_element_ud(header, 2), brw_imm_ud(dw2)); 242bf215546Sopenharmony_ci } 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_ci brw_adjust_sampler_state_pointer(p, header, sampler_index); 245bf215546Sopenharmony_ci brw_pop_insn_state(p); 246bf215546Sopenharmony_ci } 247bf215546Sopenharmony_ci } 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci uint32_t return_format; 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci switch (dst.type) { 252bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_D: 253bf215546Sopenharmony_ci return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32; 254bf215546Sopenharmony_ci break; 255bf215546Sopenharmony_ci case BRW_REGISTER_TYPE_UD: 256bf215546Sopenharmony_ci return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32; 257bf215546Sopenharmony_ci break; 258bf215546Sopenharmony_ci default: 259bf215546Sopenharmony_ci return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 260bf215546Sopenharmony_ci break; 261bf215546Sopenharmony_ci } 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_ci /* Stomp the resinfo output type to UINT32. On gens 4-5, the output type 264bf215546Sopenharmony_ci * is set as part of the message descriptor. On gfx4, the PRM seems to 265bf215546Sopenharmony_ci * allow UINT32 and FLOAT32 (i965 PRM, Vol. 4 Section 4.8.1.1), but on 266bf215546Sopenharmony_ci * later gens UINT32 is required. Once you hit Sandy Bridge, the bit is 267bf215546Sopenharmony_ci * gone from the message descriptor entirely and you just get UINT32 all 268bf215546Sopenharmony_ci * the time regasrdless. Since we can really only do non-UINT32 on gfx4, 269bf215546Sopenharmony_ci * just stomp it to UINT32 all the time. 270bf215546Sopenharmony_ci */ 271bf215546Sopenharmony_ci if (inst->opcode == SHADER_OPCODE_TXS) 272bf215546Sopenharmony_ci return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32; 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci if (surface_index.file == BRW_IMMEDIATE_VALUE && 275bf215546Sopenharmony_ci sampler_index.file == BRW_IMMEDIATE_VALUE) { 276bf215546Sopenharmony_ci uint32_t surface = surface_index.ud; 277bf215546Sopenharmony_ci uint32_t sampler = sampler_index.ud; 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci brw_SAMPLE(p, 280bf215546Sopenharmony_ci dst, 281bf215546Sopenharmony_ci inst->base_mrf, 282bf215546Sopenharmony_ci src, 283bf215546Sopenharmony_ci surface, 284bf215546Sopenharmony_ci sampler % 16, 285bf215546Sopenharmony_ci msg_type, 286bf215546Sopenharmony_ci 1, /* response length */ 287bf215546Sopenharmony_ci inst->mlen, 288bf215546Sopenharmony_ci inst->header_size != 0, 289bf215546Sopenharmony_ci BRW_SAMPLER_SIMD_MODE_SIMD4X2, 290bf215546Sopenharmony_ci return_format); 291bf215546Sopenharmony_ci } else { 292bf215546Sopenharmony_ci /* Non-constant sampler index. */ 293bf215546Sopenharmony_ci 294bf215546Sopenharmony_ci struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); 295bf215546Sopenharmony_ci struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD)); 296bf215546Sopenharmony_ci struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ci brw_push_insn_state(p); 299bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 300bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci if (brw_regs_equal(&surface_reg, &sampler_reg)) { 303bf215546Sopenharmony_ci brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); 304bf215546Sopenharmony_ci } else { 305bf215546Sopenharmony_ci if (sampler_reg.file == BRW_IMMEDIATE_VALUE) { 306bf215546Sopenharmony_ci brw_OR(p, addr, surface_reg, brw_imm_ud(sampler_reg.ud << 8)); 307bf215546Sopenharmony_ci } else { 308bf215546Sopenharmony_ci brw_SHL(p, addr, sampler_reg, brw_imm_ud(8)); 309bf215546Sopenharmony_ci brw_OR(p, addr, addr, surface_reg); 310bf215546Sopenharmony_ci } 311bf215546Sopenharmony_ci } 312bf215546Sopenharmony_ci brw_AND(p, addr, addr, brw_imm_ud(0xfff)); 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci brw_pop_insn_state(p); 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci if (inst->base_mrf != -1) 317bf215546Sopenharmony_ci gfx6_resolve_implied_move(p, &src, inst->base_mrf); 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci /* dst = send(offset, a0.0 | <descriptor>) */ 320bf215546Sopenharmony_ci brw_send_indirect_message( 321bf215546Sopenharmony_ci p, BRW_SFID_SAMPLER, dst, src, addr, 322bf215546Sopenharmony_ci brw_message_desc(devinfo, inst->mlen, 1, inst->header_size) | 323bf215546Sopenharmony_ci brw_sampler_desc(devinfo, 324bf215546Sopenharmony_ci 0 /* surface */, 325bf215546Sopenharmony_ci 0 /* sampler */, 326bf215546Sopenharmony_ci msg_type, 327bf215546Sopenharmony_ci BRW_SAMPLER_SIMD_MODE_SIMD4X2, 328bf215546Sopenharmony_ci return_format), 329bf215546Sopenharmony_ci false /* EOT */); 330bf215546Sopenharmony_ci 331bf215546Sopenharmony_ci /* visitor knows more than we do about the surface limit required, 332bf215546Sopenharmony_ci * so has already done marking. 333bf215546Sopenharmony_ci */ 334bf215546Sopenharmony_ci } 335bf215546Sopenharmony_ci} 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_cistatic void 338bf215546Sopenharmony_cigenerate_vs_urb_write(struct brw_codegen *p, vec4_instruction *inst) 339bf215546Sopenharmony_ci{ 340bf215546Sopenharmony_ci brw_urb_WRITE(p, 341bf215546Sopenharmony_ci brw_null_reg(), /* dest */ 342bf215546Sopenharmony_ci inst->base_mrf, /* starting mrf reg nr */ 343bf215546Sopenharmony_ci brw_vec8_grf(0, 0), /* src */ 344bf215546Sopenharmony_ci inst->urb_write_flags, 345bf215546Sopenharmony_ci inst->mlen, 346bf215546Sopenharmony_ci 0, /* response len */ 347bf215546Sopenharmony_ci inst->offset, /* urb destination offset */ 348bf215546Sopenharmony_ci BRW_URB_SWIZZLE_INTERLEAVE); 349bf215546Sopenharmony_ci} 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_cistatic void 352bf215546Sopenharmony_cigenerate_gs_urb_write(struct brw_codegen *p, vec4_instruction *inst) 353bf215546Sopenharmony_ci{ 354bf215546Sopenharmony_ci struct brw_reg src = brw_message_reg(inst->base_mrf); 355bf215546Sopenharmony_ci brw_urb_WRITE(p, 356bf215546Sopenharmony_ci brw_null_reg(), /* dest */ 357bf215546Sopenharmony_ci inst->base_mrf, /* starting mrf reg nr */ 358bf215546Sopenharmony_ci src, 359bf215546Sopenharmony_ci inst->urb_write_flags, 360bf215546Sopenharmony_ci inst->mlen, 361bf215546Sopenharmony_ci 0, /* response len */ 362bf215546Sopenharmony_ci inst->offset, /* urb destination offset */ 363bf215546Sopenharmony_ci BRW_URB_SWIZZLE_INTERLEAVE); 364bf215546Sopenharmony_ci} 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_cistatic void 367bf215546Sopenharmony_cigenerate_gs_urb_write_allocate(struct brw_codegen *p, vec4_instruction *inst) 368bf215546Sopenharmony_ci{ 369bf215546Sopenharmony_ci struct brw_reg src = brw_message_reg(inst->base_mrf); 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci /* We pass the temporary passed in src0 as the writeback register */ 372bf215546Sopenharmony_ci brw_urb_WRITE(p, 373bf215546Sopenharmony_ci inst->src[0].as_brw_reg(), /* dest */ 374bf215546Sopenharmony_ci inst->base_mrf, /* starting mrf reg nr */ 375bf215546Sopenharmony_ci src, 376bf215546Sopenharmony_ci BRW_URB_WRITE_ALLOCATE_COMPLETE, 377bf215546Sopenharmony_ci inst->mlen, 378bf215546Sopenharmony_ci 1, /* response len */ 379bf215546Sopenharmony_ci inst->offset, /* urb destination offset */ 380bf215546Sopenharmony_ci BRW_URB_SWIZZLE_INTERLEAVE); 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci /* Now put allocated urb handle in dst.0 */ 383bf215546Sopenharmony_ci brw_push_insn_state(p); 384bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 385bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 386bf215546Sopenharmony_ci brw_MOV(p, get_element_ud(inst->dst.as_brw_reg(), 0), 387bf215546Sopenharmony_ci get_element_ud(inst->src[0].as_brw_reg(), 0)); 388bf215546Sopenharmony_ci brw_pop_insn_state(p); 389bf215546Sopenharmony_ci} 390bf215546Sopenharmony_ci 391bf215546Sopenharmony_cistatic void 392bf215546Sopenharmony_cigenerate_gs_thread_end(struct brw_codegen *p, vec4_instruction *inst) 393bf215546Sopenharmony_ci{ 394bf215546Sopenharmony_ci struct brw_reg src = brw_message_reg(inst->base_mrf); 395bf215546Sopenharmony_ci brw_urb_WRITE(p, 396bf215546Sopenharmony_ci brw_null_reg(), /* dest */ 397bf215546Sopenharmony_ci inst->base_mrf, /* starting mrf reg nr */ 398bf215546Sopenharmony_ci src, 399bf215546Sopenharmony_ci BRW_URB_WRITE_EOT | inst->urb_write_flags, 400bf215546Sopenharmony_ci inst->mlen, 401bf215546Sopenharmony_ci 0, /* response len */ 402bf215546Sopenharmony_ci 0, /* urb destination offset */ 403bf215546Sopenharmony_ci BRW_URB_SWIZZLE_INTERLEAVE); 404bf215546Sopenharmony_ci} 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_cistatic void 407bf215546Sopenharmony_cigenerate_gs_set_write_offset(struct brw_codegen *p, 408bf215546Sopenharmony_ci struct brw_reg dst, 409bf215546Sopenharmony_ci struct brw_reg src0, 410bf215546Sopenharmony_ci struct brw_reg src1) 411bf215546Sopenharmony_ci{ 412bf215546Sopenharmony_ci /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message 413bf215546Sopenharmony_ci * Header: M0.3): 414bf215546Sopenharmony_ci * 415bf215546Sopenharmony_ci * Slot 0 Offset. This field, after adding to the Global Offset field 416bf215546Sopenharmony_ci * in the message descriptor, specifies the offset (in 256-bit units) 417bf215546Sopenharmony_ci * from the start of the URB entry, as referenced by URB Handle 0, at 418bf215546Sopenharmony_ci * which the data will be accessed. 419bf215546Sopenharmony_ci * 420bf215546Sopenharmony_ci * Similar text describes DWORD M0.4, which is slot 1 offset. 421bf215546Sopenharmony_ci * 422bf215546Sopenharmony_ci * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components 423bf215546Sopenharmony_ci * of the register for geometry shader invocations 0 and 1) by the 424bf215546Sopenharmony_ci * immediate value in src1, and store the result in DWORDs 3 and 4 of dst. 425bf215546Sopenharmony_ci * 426bf215546Sopenharmony_ci * We can do this with the following EU instruction: 427bf215546Sopenharmony_ci * 428bf215546Sopenharmony_ci * mul(2) dst.3<1>UD src0<8;2,4>UD src1<...>UW { Align1 WE_all } 429bf215546Sopenharmony_ci */ 430bf215546Sopenharmony_ci brw_push_insn_state(p); 431bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 432bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 433bf215546Sopenharmony_ci assert(p->devinfo->ver >= 7 && 434bf215546Sopenharmony_ci src1.file == BRW_IMMEDIATE_VALUE && 435bf215546Sopenharmony_ci src1.type == BRW_REGISTER_TYPE_UD && 436bf215546Sopenharmony_ci src1.ud <= USHRT_MAX); 437bf215546Sopenharmony_ci if (src0.file == BRW_IMMEDIATE_VALUE) { 438bf215546Sopenharmony_ci brw_MOV(p, suboffset(stride(dst, 2, 2, 1), 3), 439bf215546Sopenharmony_ci brw_imm_ud(src0.ud * src1.ud)); 440bf215546Sopenharmony_ci } else { 441bf215546Sopenharmony_ci if (src1.file == BRW_IMMEDIATE_VALUE) { 442bf215546Sopenharmony_ci src1 = brw_imm_uw(src1.ud); 443bf215546Sopenharmony_ci } 444bf215546Sopenharmony_ci brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4), 445bf215546Sopenharmony_ci retype(src1, BRW_REGISTER_TYPE_UW)); 446bf215546Sopenharmony_ci } 447bf215546Sopenharmony_ci brw_pop_insn_state(p); 448bf215546Sopenharmony_ci} 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_cistatic void 451bf215546Sopenharmony_cigenerate_gs_set_vertex_count(struct brw_codegen *p, 452bf215546Sopenharmony_ci struct brw_reg dst, 453bf215546Sopenharmony_ci struct brw_reg src) 454bf215546Sopenharmony_ci{ 455bf215546Sopenharmony_ci brw_push_insn_state(p); 456bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 457bf215546Sopenharmony_ci 458bf215546Sopenharmony_ci /* If we think of the src and dst registers as composed of 8 DWORDs each, 459bf215546Sopenharmony_ci * we want to pick up the contents of DWORDs 0 and 4 from src, truncate 460bf215546Sopenharmony_ci * them to WORDs, and then pack them into DWORD 2 of dst. 461bf215546Sopenharmony_ci * 462bf215546Sopenharmony_ci * It's easier to get the EU to do this if we think of the src and dst 463bf215546Sopenharmony_ci * registers as composed of 16 WORDS each; then, we want to pick up the 464bf215546Sopenharmony_ci * contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5 465bf215546Sopenharmony_ci * of dst. 466bf215546Sopenharmony_ci * 467bf215546Sopenharmony_ci * We can do that by the following EU instruction: 468bf215546Sopenharmony_ci * 469bf215546Sopenharmony_ci * mov (2) dst.4<1>:uw src<8;1,0>:uw { Align1, Q1, NoMask } 470bf215546Sopenharmony_ci */ 471bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 472bf215546Sopenharmony_ci brw_MOV(p, 473bf215546Sopenharmony_ci suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4), 474bf215546Sopenharmony_ci stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0)); 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_ci brw_pop_insn_state(p); 477bf215546Sopenharmony_ci} 478bf215546Sopenharmony_ci 479bf215546Sopenharmony_cistatic void 480bf215546Sopenharmony_cigenerate_gs_svb_write(struct brw_codegen *p, 481bf215546Sopenharmony_ci vec4_instruction *inst, 482bf215546Sopenharmony_ci struct brw_reg dst, 483bf215546Sopenharmony_ci struct brw_reg src0, 484bf215546Sopenharmony_ci struct brw_reg src1) 485bf215546Sopenharmony_ci{ 486bf215546Sopenharmony_ci int binding = inst->sol_binding; 487bf215546Sopenharmony_ci bool final_write = inst->sol_final_write; 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci brw_push_insn_state(p); 490bf215546Sopenharmony_ci brw_set_default_exec_size(p, BRW_EXECUTE_4); 491bf215546Sopenharmony_ci /* Copy Vertex data into M0.x */ 492bf215546Sopenharmony_ci brw_MOV(p, stride(dst, 4, 4, 1), 493bf215546Sopenharmony_ci stride(retype(src0, BRW_REGISTER_TYPE_UD), 4, 4, 1)); 494bf215546Sopenharmony_ci brw_pop_insn_state(p); 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_ci brw_push_insn_state(p); 497bf215546Sopenharmony_ci /* Send SVB Write */ 498bf215546Sopenharmony_ci brw_svb_write(p, 499bf215546Sopenharmony_ci final_write ? src1 : brw_null_reg(), /* dest == src1 */ 500bf215546Sopenharmony_ci 1, /* msg_reg_nr */ 501bf215546Sopenharmony_ci dst, /* src0 == previous dst */ 502bf215546Sopenharmony_ci BRW_GFX6_SOL_BINDING_START + binding, /* binding_table_index */ 503bf215546Sopenharmony_ci final_write); /* send_commit_msg */ 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_ci /* Finally, wait for the write commit to occur so that we can proceed to 506bf215546Sopenharmony_ci * other things safely. 507bf215546Sopenharmony_ci * 508bf215546Sopenharmony_ci * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3: 509bf215546Sopenharmony_ci * 510bf215546Sopenharmony_ci * The write commit does not modify the destination register, but 511bf215546Sopenharmony_ci * merely clears the dependency associated with the destination 512bf215546Sopenharmony_ci * register. Thus, a simple “mov” instruction using the register as a 513bf215546Sopenharmony_ci * source is sufficient to wait for the write commit to occur. 514bf215546Sopenharmony_ci */ 515bf215546Sopenharmony_ci if (final_write) { 516bf215546Sopenharmony_ci brw_MOV(p, src1, src1); 517bf215546Sopenharmony_ci } 518bf215546Sopenharmony_ci brw_pop_insn_state(p); 519bf215546Sopenharmony_ci} 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_cistatic void 522bf215546Sopenharmony_cigenerate_gs_svb_set_destination_index(struct brw_codegen *p, 523bf215546Sopenharmony_ci vec4_instruction *inst, 524bf215546Sopenharmony_ci struct brw_reg dst, 525bf215546Sopenharmony_ci struct brw_reg src) 526bf215546Sopenharmony_ci{ 527bf215546Sopenharmony_ci int vertex = inst->sol_vertex; 528bf215546Sopenharmony_ci brw_push_insn_state(p); 529bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 530bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 531bf215546Sopenharmony_ci brw_MOV(p, get_element_ud(dst, 5), get_element_ud(src, vertex)); 532bf215546Sopenharmony_ci brw_pop_insn_state(p); 533bf215546Sopenharmony_ci} 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_cistatic void 536bf215546Sopenharmony_cigenerate_gs_set_dword_2(struct brw_codegen *p, 537bf215546Sopenharmony_ci struct brw_reg dst, 538bf215546Sopenharmony_ci struct brw_reg src) 539bf215546Sopenharmony_ci{ 540bf215546Sopenharmony_ci brw_push_insn_state(p); 541bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 542bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 543bf215546Sopenharmony_ci brw_MOV(p, suboffset(vec1(dst), 2), suboffset(vec1(src), 0)); 544bf215546Sopenharmony_ci brw_pop_insn_state(p); 545bf215546Sopenharmony_ci} 546bf215546Sopenharmony_ci 547bf215546Sopenharmony_cistatic void 548bf215546Sopenharmony_cigenerate_gs_prepare_channel_masks(struct brw_codegen *p, 549bf215546Sopenharmony_ci struct brw_reg dst) 550bf215546Sopenharmony_ci{ 551bf215546Sopenharmony_ci /* We want to left shift just DWORD 4 (the x component belonging to the 552bf215546Sopenharmony_ci * second geometry shader invocation) by 4 bits. So generate the 553bf215546Sopenharmony_ci * instruction: 554bf215546Sopenharmony_ci * 555bf215546Sopenharmony_ci * shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all } 556bf215546Sopenharmony_ci */ 557bf215546Sopenharmony_ci dst = suboffset(vec1(dst), 4); 558bf215546Sopenharmony_ci brw_push_insn_state(p); 559bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 560bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 561bf215546Sopenharmony_ci brw_SHL(p, dst, dst, brw_imm_ud(4)); 562bf215546Sopenharmony_ci brw_pop_insn_state(p); 563bf215546Sopenharmony_ci} 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_cistatic void 566bf215546Sopenharmony_cigenerate_gs_set_channel_masks(struct brw_codegen *p, 567bf215546Sopenharmony_ci struct brw_reg dst, 568bf215546Sopenharmony_ci struct brw_reg src) 569bf215546Sopenharmony_ci{ 570bf215546Sopenharmony_ci /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message 571bf215546Sopenharmony_ci * Header: M0.5): 572bf215546Sopenharmony_ci * 573bf215546Sopenharmony_ci * 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask 574bf215546Sopenharmony_ci * 575bf215546Sopenharmony_ci * When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1 576bf215546Sopenharmony_ci * DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls 577bf215546Sopenharmony_ci * Vertex 0 DATA[7]. This bit is ANDed with the corresponding 578bf215546Sopenharmony_ci * channel enable to determine the final channel enable. For the 579bf215546Sopenharmony_ci * URB_READ_OWORD & URB_READ_HWORD messages, when final channel 580bf215546Sopenharmony_ci * enable is 1 it indicates that Vertex 1 DATA [3] will be included 581bf215546Sopenharmony_ci * in the writeback message. For the URB_WRITE_OWORD & 582bf215546Sopenharmony_ci * URB_WRITE_HWORD messages, when final channel enable is 1 it 583bf215546Sopenharmony_ci * indicates that Vertex 1 DATA [3] will be written to the surface. 584bf215546Sopenharmony_ci * 585bf215546Sopenharmony_ci * 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included 586bf215546Sopenharmony_ci * 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included 587bf215546Sopenharmony_ci * 588bf215546Sopenharmony_ci * 14 Vertex 1 DATA [2] Channel Mask 589bf215546Sopenharmony_ci * 13 Vertex 1 DATA [1] Channel Mask 590bf215546Sopenharmony_ci * 12 Vertex 1 DATA [0] Channel Mask 591bf215546Sopenharmony_ci * 11 Vertex 0 DATA [3] Channel Mask 592bf215546Sopenharmony_ci * 10 Vertex 0 DATA [2] Channel Mask 593bf215546Sopenharmony_ci * 9 Vertex 0 DATA [1] Channel Mask 594bf215546Sopenharmony_ci * 8 Vertex 0 DATA [0] Channel Mask 595bf215546Sopenharmony_ci * 596bf215546Sopenharmony_ci * (This is from a section of the PRM that is agnostic to the particular 597bf215546Sopenharmony_ci * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to 598bf215546Sopenharmony_ci * geometry shader invocations 0 and 1, respectively). Since we have the 599bf215546Sopenharmony_ci * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0, 600bf215546Sopenharmony_ci * and the enable flags for geometry shader invocation 1 in bits 7:0 of 601bf215546Sopenharmony_ci * DWORD 4, we just need to OR them together and store the result in bits 602bf215546Sopenharmony_ci * 15:8 of DWORD 5. 603bf215546Sopenharmony_ci * 604bf215546Sopenharmony_ci * It's easier to get the EU to do this if we think of the src and dst 605bf215546Sopenharmony_ci * registers as composed of 32 bytes each; then, we want to pick up the 606bf215546Sopenharmony_ci * contents of bytes 0 and 16 from src, OR them together, and store them in 607bf215546Sopenharmony_ci * byte 21. 608bf215546Sopenharmony_ci * 609bf215546Sopenharmony_ci * We can do that by the following EU instruction: 610bf215546Sopenharmony_ci * 611bf215546Sopenharmony_ci * or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all } 612bf215546Sopenharmony_ci * 613bf215546Sopenharmony_ci * Note: this relies on the source register having zeros in (a) bits 7:4 of 614bf215546Sopenharmony_ci * DWORD 0 and (b) bits 3:0 of DWORD 4. We can rely on (b) because the 615bf215546Sopenharmony_ci * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which 616bf215546Sopenharmony_ci * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to 617bf215546Sopenharmony_ci * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to 618bf215546Sopenharmony_ci * contain valid channel mask values (which are in the range 0x0-0xf). 619bf215546Sopenharmony_ci */ 620bf215546Sopenharmony_ci dst = retype(dst, BRW_REGISTER_TYPE_UB); 621bf215546Sopenharmony_ci src = retype(src, BRW_REGISTER_TYPE_UB); 622bf215546Sopenharmony_ci brw_push_insn_state(p); 623bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 624bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 625bf215546Sopenharmony_ci brw_OR(p, suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16)); 626bf215546Sopenharmony_ci brw_pop_insn_state(p); 627bf215546Sopenharmony_ci} 628bf215546Sopenharmony_ci 629bf215546Sopenharmony_cistatic void 630bf215546Sopenharmony_cigenerate_gs_get_instance_id(struct brw_codegen *p, 631bf215546Sopenharmony_ci struct brw_reg dst) 632bf215546Sopenharmony_ci{ 633bf215546Sopenharmony_ci /* We want to right shift R0.0 & R0.1 by GFX7_GS_PAYLOAD_INSTANCE_ID_SHIFT 634bf215546Sopenharmony_ci * and store into dst.0 & dst.4. So generate the instruction: 635bf215546Sopenharmony_ci * 636bf215546Sopenharmony_ci * shr(8) dst<1> R0<1,4,0> GFX7_GS_PAYLOAD_INSTANCE_ID_SHIFT { align1 WE_normal 1Q } 637bf215546Sopenharmony_ci */ 638bf215546Sopenharmony_ci brw_push_insn_state(p); 639bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 640bf215546Sopenharmony_ci dst = retype(dst, BRW_REGISTER_TYPE_UD); 641bf215546Sopenharmony_ci struct brw_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 642bf215546Sopenharmony_ci brw_SHR(p, dst, stride(r0, 1, 4, 0), 643bf215546Sopenharmony_ci brw_imm_ud(GFX7_GS_PAYLOAD_INSTANCE_ID_SHIFT)); 644bf215546Sopenharmony_ci brw_pop_insn_state(p); 645bf215546Sopenharmony_ci} 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_cistatic void 648bf215546Sopenharmony_cigenerate_gs_ff_sync_set_primitives(struct brw_codegen *p, 649bf215546Sopenharmony_ci struct brw_reg dst, 650bf215546Sopenharmony_ci struct brw_reg src0, 651bf215546Sopenharmony_ci struct brw_reg src1, 652bf215546Sopenharmony_ci struct brw_reg src2) 653bf215546Sopenharmony_ci{ 654bf215546Sopenharmony_ci brw_push_insn_state(p); 655bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 656bf215546Sopenharmony_ci /* Save src0 data in 16:31 bits of dst.0 */ 657bf215546Sopenharmony_ci brw_AND(p, suboffset(vec1(dst), 0), suboffset(vec1(src0), 0), 658bf215546Sopenharmony_ci brw_imm_ud(0xffffu)); 659bf215546Sopenharmony_ci brw_SHL(p, suboffset(vec1(dst), 0), suboffset(vec1(dst), 0), brw_imm_ud(16)); 660bf215546Sopenharmony_ci /* Save src1 data in 0:15 bits of dst.0 */ 661bf215546Sopenharmony_ci brw_AND(p, suboffset(vec1(src2), 0), suboffset(vec1(src1), 0), 662bf215546Sopenharmony_ci brw_imm_ud(0xffffu)); 663bf215546Sopenharmony_ci brw_OR(p, suboffset(vec1(dst), 0), 664bf215546Sopenharmony_ci suboffset(vec1(dst), 0), 665bf215546Sopenharmony_ci suboffset(vec1(src2), 0)); 666bf215546Sopenharmony_ci brw_pop_insn_state(p); 667bf215546Sopenharmony_ci} 668bf215546Sopenharmony_ci 669bf215546Sopenharmony_cistatic void 670bf215546Sopenharmony_cigenerate_gs_ff_sync(struct brw_codegen *p, 671bf215546Sopenharmony_ci vec4_instruction *inst, 672bf215546Sopenharmony_ci struct brw_reg dst, 673bf215546Sopenharmony_ci struct brw_reg src0, 674bf215546Sopenharmony_ci struct brw_reg src1) 675bf215546Sopenharmony_ci{ 676bf215546Sopenharmony_ci /* This opcode uses an implied MRF register for: 677bf215546Sopenharmony_ci * - the header of the ff_sync message. And as such it is expected to be 678bf215546Sopenharmony_ci * initialized to r0 before calling here. 679bf215546Sopenharmony_ci * - the destination where we will write the allocated URB handle. 680bf215546Sopenharmony_ci */ 681bf215546Sopenharmony_ci struct brw_reg header = 682bf215546Sopenharmony_ci retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD); 683bf215546Sopenharmony_ci 684bf215546Sopenharmony_ci /* Overwrite dword 0 of the header (SO vertices to write) and 685bf215546Sopenharmony_ci * dword 1 (number of primitives written). 686bf215546Sopenharmony_ci */ 687bf215546Sopenharmony_ci brw_push_insn_state(p); 688bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 689bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 690bf215546Sopenharmony_ci brw_MOV(p, get_element_ud(header, 0), get_element_ud(src1, 0)); 691bf215546Sopenharmony_ci brw_MOV(p, get_element_ud(header, 1), get_element_ud(src0, 0)); 692bf215546Sopenharmony_ci brw_pop_insn_state(p); 693bf215546Sopenharmony_ci 694bf215546Sopenharmony_ci /* Allocate URB handle in dst */ 695bf215546Sopenharmony_ci brw_ff_sync(p, 696bf215546Sopenharmony_ci dst, 697bf215546Sopenharmony_ci 0, 698bf215546Sopenharmony_ci header, 699bf215546Sopenharmony_ci 1, /* allocate */ 700bf215546Sopenharmony_ci 1, /* response length */ 701bf215546Sopenharmony_ci 0 /* eot */); 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci /* Now put allocated urb handle in header.0 */ 704bf215546Sopenharmony_ci brw_push_insn_state(p); 705bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 706bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 707bf215546Sopenharmony_ci brw_MOV(p, get_element_ud(header, 0), get_element_ud(dst, 0)); 708bf215546Sopenharmony_ci 709bf215546Sopenharmony_ci /* src1 is not an immediate when we use transform feedback */ 710bf215546Sopenharmony_ci if (src1.file != BRW_IMMEDIATE_VALUE) { 711bf215546Sopenharmony_ci brw_set_default_exec_size(p, BRW_EXECUTE_4); 712bf215546Sopenharmony_ci brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1)); 713bf215546Sopenharmony_ci } 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_ci brw_pop_insn_state(p); 716bf215546Sopenharmony_ci} 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_cistatic void 719bf215546Sopenharmony_cigenerate_gs_set_primitive_id(struct brw_codegen *p, struct brw_reg dst) 720bf215546Sopenharmony_ci{ 721bf215546Sopenharmony_ci /* In gfx6, PrimitiveID is delivered in R0.1 of the payload */ 722bf215546Sopenharmony_ci struct brw_reg src = brw_vec8_grf(0, 0); 723bf215546Sopenharmony_ci brw_push_insn_state(p); 724bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 725bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 726bf215546Sopenharmony_ci brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src, 1)); 727bf215546Sopenharmony_ci brw_pop_insn_state(p); 728bf215546Sopenharmony_ci} 729bf215546Sopenharmony_ci 730bf215546Sopenharmony_cistatic void 731bf215546Sopenharmony_cigenerate_tcs_get_instance_id(struct brw_codegen *p, struct brw_reg dst) 732bf215546Sopenharmony_ci{ 733bf215546Sopenharmony_ci const struct intel_device_info *devinfo = p->devinfo; 734bf215546Sopenharmony_ci const bool ivb = devinfo->platform == INTEL_PLATFORM_IVB || 735bf215546Sopenharmony_ci devinfo->platform == INTEL_PLATFORM_BYT; 736bf215546Sopenharmony_ci 737bf215546Sopenharmony_ci /* "Instance Count" comes as part of the payload in r0.2 bits 23:17. 738bf215546Sopenharmony_ci * 739bf215546Sopenharmony_ci * Since we operate in SIMD4x2 mode, we need run half as many threads 740bf215546Sopenharmony_ci * as necessary. So we assign (2i + 1, 2i) as the thread counts. We 741bf215546Sopenharmony_ci * shift right by one less to accomplish the multiplication by two. 742bf215546Sopenharmony_ci */ 743bf215546Sopenharmony_ci dst = retype(dst, BRW_REGISTER_TYPE_UD); 744bf215546Sopenharmony_ci struct brw_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 745bf215546Sopenharmony_ci 746bf215546Sopenharmony_ci brw_push_insn_state(p); 747bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 748bf215546Sopenharmony_ci 749bf215546Sopenharmony_ci const int mask = ivb ? INTEL_MASK(22, 16) : INTEL_MASK(23, 17); 750bf215546Sopenharmony_ci const int shift = ivb ? 16 : 17; 751bf215546Sopenharmony_ci 752bf215546Sopenharmony_ci brw_AND(p, get_element_ud(dst, 0), get_element_ud(r0, 2), brw_imm_ud(mask)); 753bf215546Sopenharmony_ci brw_SHR(p, get_element_ud(dst, 0), get_element_ud(dst, 0), 754bf215546Sopenharmony_ci brw_imm_ud(shift - 1)); 755bf215546Sopenharmony_ci brw_ADD(p, get_element_ud(dst, 4), get_element_ud(dst, 0), brw_imm_ud(1)); 756bf215546Sopenharmony_ci 757bf215546Sopenharmony_ci brw_pop_insn_state(p); 758bf215546Sopenharmony_ci} 759bf215546Sopenharmony_ci 760bf215546Sopenharmony_cistatic void 761bf215546Sopenharmony_cigenerate_tcs_urb_write(struct brw_codegen *p, 762bf215546Sopenharmony_ci vec4_instruction *inst, 763bf215546Sopenharmony_ci struct brw_reg urb_header) 764bf215546Sopenharmony_ci{ 765bf215546Sopenharmony_ci const struct intel_device_info *devinfo = p->devinfo; 766bf215546Sopenharmony_ci 767bf215546Sopenharmony_ci brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); 768bf215546Sopenharmony_ci brw_set_dest(p, send, brw_null_reg()); 769bf215546Sopenharmony_ci brw_set_src0(p, send, urb_header); 770bf215546Sopenharmony_ci brw_set_desc(p, send, brw_message_desc(devinfo, inst->mlen, 0, true)); 771bf215546Sopenharmony_ci 772bf215546Sopenharmony_ci brw_inst_set_sfid(devinfo, send, BRW_SFID_URB); 773bf215546Sopenharmony_ci brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_WRITE_OWORD); 774bf215546Sopenharmony_ci brw_inst_set_urb_global_offset(devinfo, send, inst->offset); 775bf215546Sopenharmony_ci if (inst->urb_write_flags & BRW_URB_WRITE_EOT) { 776bf215546Sopenharmony_ci brw_inst_set_eot(devinfo, send, 1); 777bf215546Sopenharmony_ci } else { 778bf215546Sopenharmony_ci brw_inst_set_urb_per_slot_offset(devinfo, send, 1); 779bf215546Sopenharmony_ci brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE); 780bf215546Sopenharmony_ci } 781bf215546Sopenharmony_ci 782bf215546Sopenharmony_ci /* what happens to swizzles? */ 783bf215546Sopenharmony_ci} 784bf215546Sopenharmony_ci 785bf215546Sopenharmony_ci 786bf215546Sopenharmony_cistatic void 787bf215546Sopenharmony_cigenerate_tcs_input_urb_offsets(struct brw_codegen *p, 788bf215546Sopenharmony_ci struct brw_reg dst, 789bf215546Sopenharmony_ci struct brw_reg vertex, 790bf215546Sopenharmony_ci struct brw_reg offset) 791bf215546Sopenharmony_ci{ 792bf215546Sopenharmony_ci /* Generates an URB read/write message header for HS/DS operation. 793bf215546Sopenharmony_ci * Inputs are a vertex index, and a byte offset from the beginning of 794bf215546Sopenharmony_ci * the vertex. */ 795bf215546Sopenharmony_ci 796bf215546Sopenharmony_ci /* If `vertex` is not an immediate, we clobber a0.0 */ 797bf215546Sopenharmony_ci 798bf215546Sopenharmony_ci assert(vertex.file == BRW_IMMEDIATE_VALUE || vertex.file == BRW_GENERAL_REGISTER_FILE); 799bf215546Sopenharmony_ci assert(vertex.type == BRW_REGISTER_TYPE_UD || vertex.type == BRW_REGISTER_TYPE_D); 800bf215546Sopenharmony_ci 801bf215546Sopenharmony_ci assert(dst.file == BRW_GENERAL_REGISTER_FILE); 802bf215546Sopenharmony_ci 803bf215546Sopenharmony_ci brw_push_insn_state(p); 804bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 805bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 806bf215546Sopenharmony_ci brw_MOV(p, dst, brw_imm_ud(0)); 807bf215546Sopenharmony_ci 808bf215546Sopenharmony_ci /* m0.5 bits 8-15 are channel enables */ 809bf215546Sopenharmony_ci brw_MOV(p, get_element_ud(dst, 5), brw_imm_ud(0xff00)); 810bf215546Sopenharmony_ci 811bf215546Sopenharmony_ci /* m0.0-0.1: URB handles */ 812bf215546Sopenharmony_ci if (vertex.file == BRW_IMMEDIATE_VALUE) { 813bf215546Sopenharmony_ci uint32_t vertex_index = vertex.ud; 814bf215546Sopenharmony_ci struct brw_reg index_reg = brw_vec1_grf( 815bf215546Sopenharmony_ci 1 + (vertex_index >> 3), vertex_index & 7); 816bf215546Sopenharmony_ci 817bf215546Sopenharmony_ci brw_MOV(p, vec2(get_element_ud(dst, 0)), 818bf215546Sopenharmony_ci retype(index_reg, BRW_REGISTER_TYPE_UD)); 819bf215546Sopenharmony_ci } else { 820bf215546Sopenharmony_ci /* Use indirect addressing. ICP Handles are DWords (single channels 821bf215546Sopenharmony_ci * of a register) and start at g1.0. 822bf215546Sopenharmony_ci * 823bf215546Sopenharmony_ci * In order to start our region at g1.0, we add 8 to the vertex index, 824bf215546Sopenharmony_ci * effectively skipping over the 8 channels in g0.0. This gives us a 825bf215546Sopenharmony_ci * DWord offset to the ICP Handle. 826bf215546Sopenharmony_ci * 827bf215546Sopenharmony_ci * Indirect addressing works in terms of bytes, so we then multiply 828bf215546Sopenharmony_ci * the DWord offset by 4 (by shifting left by 2). 829bf215546Sopenharmony_ci */ 830bf215546Sopenharmony_ci struct brw_reg addr = brw_address_reg(0); 831bf215546Sopenharmony_ci 832bf215546Sopenharmony_ci /* bottom half: m0.0 = g[1.0 + vertex.0]UD */ 833bf215546Sopenharmony_ci brw_ADD(p, addr, retype(get_element_ud(vertex, 0), BRW_REGISTER_TYPE_UW), 834bf215546Sopenharmony_ci brw_imm_uw(0x8)); 835bf215546Sopenharmony_ci brw_SHL(p, addr, addr, brw_imm_uw(2)); 836bf215546Sopenharmony_ci brw_MOV(p, get_element_ud(dst, 0), deref_1ud(brw_indirect(0, 0), 0)); 837bf215546Sopenharmony_ci 838bf215546Sopenharmony_ci /* top half: m0.1 = g[1.0 + vertex.4]UD */ 839bf215546Sopenharmony_ci brw_ADD(p, addr, retype(get_element_ud(vertex, 4), BRW_REGISTER_TYPE_UW), 840bf215546Sopenharmony_ci brw_imm_uw(0x8)); 841bf215546Sopenharmony_ci brw_SHL(p, addr, addr, brw_imm_uw(2)); 842bf215546Sopenharmony_ci brw_MOV(p, get_element_ud(dst, 1), deref_1ud(brw_indirect(0, 0), 0)); 843bf215546Sopenharmony_ci } 844bf215546Sopenharmony_ci 845bf215546Sopenharmony_ci /* m0.3-0.4: 128bit-granular offsets into the URB from the handles */ 846bf215546Sopenharmony_ci if (offset.file != ARF) 847bf215546Sopenharmony_ci brw_MOV(p, vec2(get_element_ud(dst, 3)), stride(offset, 4, 1, 0)); 848bf215546Sopenharmony_ci 849bf215546Sopenharmony_ci brw_pop_insn_state(p); 850bf215546Sopenharmony_ci} 851bf215546Sopenharmony_ci 852bf215546Sopenharmony_ci 853bf215546Sopenharmony_cistatic void 854bf215546Sopenharmony_cigenerate_tcs_output_urb_offsets(struct brw_codegen *p, 855bf215546Sopenharmony_ci struct brw_reg dst, 856bf215546Sopenharmony_ci struct brw_reg write_mask, 857bf215546Sopenharmony_ci struct brw_reg offset) 858bf215546Sopenharmony_ci{ 859bf215546Sopenharmony_ci /* Generates an URB read/write message header for HS/DS operation, for the patch URB entry. */ 860bf215546Sopenharmony_ci assert(dst.file == BRW_GENERAL_REGISTER_FILE || dst.file == BRW_MESSAGE_REGISTER_FILE); 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci assert(write_mask.file == BRW_IMMEDIATE_VALUE); 863bf215546Sopenharmony_ci assert(write_mask.type == BRW_REGISTER_TYPE_UD); 864bf215546Sopenharmony_ci 865bf215546Sopenharmony_ci brw_push_insn_state(p); 866bf215546Sopenharmony_ci 867bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 868bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 869bf215546Sopenharmony_ci brw_MOV(p, dst, brw_imm_ud(0)); 870bf215546Sopenharmony_ci 871bf215546Sopenharmony_ci unsigned mask = write_mask.ud; 872bf215546Sopenharmony_ci 873bf215546Sopenharmony_ci /* m0.5 bits 15:12 and 11:8 are channel enables */ 874bf215546Sopenharmony_ci brw_MOV(p, get_element_ud(dst, 5), brw_imm_ud((mask << 8) | (mask << 12))); 875bf215546Sopenharmony_ci 876bf215546Sopenharmony_ci /* HS patch URB handle is delivered in r0.0 */ 877bf215546Sopenharmony_ci struct brw_reg urb_handle = brw_vec1_grf(0, 0); 878bf215546Sopenharmony_ci 879bf215546Sopenharmony_ci /* m0.0-0.1: URB handles */ 880bf215546Sopenharmony_ci brw_MOV(p, vec2(get_element_ud(dst, 0)), 881bf215546Sopenharmony_ci retype(urb_handle, BRW_REGISTER_TYPE_UD)); 882bf215546Sopenharmony_ci 883bf215546Sopenharmony_ci /* m0.3-0.4: 128bit-granular offsets into the URB from the handles */ 884bf215546Sopenharmony_ci if (offset.file != ARF) 885bf215546Sopenharmony_ci brw_MOV(p, vec2(get_element_ud(dst, 3)), stride(offset, 4, 1, 0)); 886bf215546Sopenharmony_ci 887bf215546Sopenharmony_ci brw_pop_insn_state(p); 888bf215546Sopenharmony_ci} 889bf215546Sopenharmony_ci 890bf215546Sopenharmony_cistatic void 891bf215546Sopenharmony_cigenerate_tes_create_input_read_header(struct brw_codegen *p, 892bf215546Sopenharmony_ci struct brw_reg dst) 893bf215546Sopenharmony_ci{ 894bf215546Sopenharmony_ci brw_push_insn_state(p); 895bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 896bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 897bf215546Sopenharmony_ci 898bf215546Sopenharmony_ci /* Initialize the register to 0 */ 899bf215546Sopenharmony_ci brw_MOV(p, dst, brw_imm_ud(0)); 900bf215546Sopenharmony_ci 901bf215546Sopenharmony_ci /* Enable all the channels in m0.5 bits 15:8 */ 902bf215546Sopenharmony_ci brw_MOV(p, get_element_ud(dst, 5), brw_imm_ud(0xff00)); 903bf215546Sopenharmony_ci 904bf215546Sopenharmony_ci /* Copy g1.3 (the patch URB handle) to m0.0 and m0.1. For safety, 905bf215546Sopenharmony_ci * mask out irrelevant "Reserved" bits, as they're not marked MBZ. 906bf215546Sopenharmony_ci */ 907bf215546Sopenharmony_ci brw_AND(p, vec2(get_element_ud(dst, 0)), 908bf215546Sopenharmony_ci retype(brw_vec1_grf(1, 3), BRW_REGISTER_TYPE_UD), 909bf215546Sopenharmony_ci brw_imm_ud(0x1fff)); 910bf215546Sopenharmony_ci brw_pop_insn_state(p); 911bf215546Sopenharmony_ci} 912bf215546Sopenharmony_ci 913bf215546Sopenharmony_cistatic void 914bf215546Sopenharmony_cigenerate_tes_add_indirect_urb_offset(struct brw_codegen *p, 915bf215546Sopenharmony_ci struct brw_reg dst, 916bf215546Sopenharmony_ci struct brw_reg header, 917bf215546Sopenharmony_ci struct brw_reg offset) 918bf215546Sopenharmony_ci{ 919bf215546Sopenharmony_ci brw_push_insn_state(p); 920bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 921bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci brw_MOV(p, dst, header); 924bf215546Sopenharmony_ci 925bf215546Sopenharmony_ci /* Uniforms will have a stride <0;4,1>, and we need to convert to <0;1,0>. 926bf215546Sopenharmony_ci * Other values get <4;1,0>. 927bf215546Sopenharmony_ci */ 928bf215546Sopenharmony_ci struct brw_reg restrided_offset; 929bf215546Sopenharmony_ci if (offset.vstride == BRW_VERTICAL_STRIDE_0 && 930bf215546Sopenharmony_ci offset.width == BRW_WIDTH_4 && 931bf215546Sopenharmony_ci offset.hstride == BRW_HORIZONTAL_STRIDE_1) { 932bf215546Sopenharmony_ci restrided_offset = stride(offset, 0, 1, 0); 933bf215546Sopenharmony_ci } else { 934bf215546Sopenharmony_ci restrided_offset = stride(offset, 4, 1, 0); 935bf215546Sopenharmony_ci } 936bf215546Sopenharmony_ci 937bf215546Sopenharmony_ci /* m0.3-0.4: 128-bit-granular offsets into the URB from the handles */ 938bf215546Sopenharmony_ci brw_MOV(p, vec2(get_element_ud(dst, 3)), restrided_offset); 939bf215546Sopenharmony_ci 940bf215546Sopenharmony_ci brw_pop_insn_state(p); 941bf215546Sopenharmony_ci} 942bf215546Sopenharmony_ci 943bf215546Sopenharmony_cistatic void 944bf215546Sopenharmony_cigenerate_vec4_urb_read(struct brw_codegen *p, 945bf215546Sopenharmony_ci vec4_instruction *inst, 946bf215546Sopenharmony_ci struct brw_reg dst, 947bf215546Sopenharmony_ci struct brw_reg header) 948bf215546Sopenharmony_ci{ 949bf215546Sopenharmony_ci const struct intel_device_info *devinfo = p->devinfo; 950bf215546Sopenharmony_ci 951bf215546Sopenharmony_ci assert(header.file == BRW_GENERAL_REGISTER_FILE); 952bf215546Sopenharmony_ci assert(header.type == BRW_REGISTER_TYPE_UD); 953bf215546Sopenharmony_ci 954bf215546Sopenharmony_ci brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); 955bf215546Sopenharmony_ci brw_set_dest(p, send, dst); 956bf215546Sopenharmony_ci brw_set_src0(p, send, header); 957bf215546Sopenharmony_ci 958bf215546Sopenharmony_ci brw_set_desc(p, send, brw_message_desc(devinfo, 1, 1, true)); 959bf215546Sopenharmony_ci 960bf215546Sopenharmony_ci brw_inst_set_sfid(devinfo, send, BRW_SFID_URB); 961bf215546Sopenharmony_ci brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_READ_OWORD); 962bf215546Sopenharmony_ci brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE); 963bf215546Sopenharmony_ci brw_inst_set_urb_per_slot_offset(devinfo, send, 1); 964bf215546Sopenharmony_ci 965bf215546Sopenharmony_ci brw_inst_set_urb_global_offset(devinfo, send, inst->offset); 966bf215546Sopenharmony_ci} 967bf215546Sopenharmony_ci 968bf215546Sopenharmony_cistatic void 969bf215546Sopenharmony_cigenerate_tcs_release_input(struct brw_codegen *p, 970bf215546Sopenharmony_ci struct brw_reg header, 971bf215546Sopenharmony_ci struct brw_reg vertex, 972bf215546Sopenharmony_ci struct brw_reg is_unpaired) 973bf215546Sopenharmony_ci{ 974bf215546Sopenharmony_ci const struct intel_device_info *devinfo = p->devinfo; 975bf215546Sopenharmony_ci 976bf215546Sopenharmony_ci assert(vertex.file == BRW_IMMEDIATE_VALUE); 977bf215546Sopenharmony_ci assert(vertex.type == BRW_REGISTER_TYPE_UD); 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci /* m0.0-0.1: URB handles */ 980bf215546Sopenharmony_ci struct brw_reg urb_handles = 981bf215546Sopenharmony_ci retype(brw_vec2_grf(1 + (vertex.ud >> 3), vertex.ud & 7), 982bf215546Sopenharmony_ci BRW_REGISTER_TYPE_UD); 983bf215546Sopenharmony_ci 984bf215546Sopenharmony_ci brw_push_insn_state(p); 985bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 986bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 987bf215546Sopenharmony_ci brw_MOV(p, header, brw_imm_ud(0)); 988bf215546Sopenharmony_ci brw_MOV(p, vec2(get_element_ud(header, 0)), urb_handles); 989bf215546Sopenharmony_ci brw_pop_insn_state(p); 990bf215546Sopenharmony_ci 991bf215546Sopenharmony_ci brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); 992bf215546Sopenharmony_ci brw_set_dest(p, send, brw_null_reg()); 993bf215546Sopenharmony_ci brw_set_src0(p, send, header); 994bf215546Sopenharmony_ci brw_set_desc(p, send, brw_message_desc(devinfo, 1, 0, true)); 995bf215546Sopenharmony_ci 996bf215546Sopenharmony_ci brw_inst_set_sfid(devinfo, send, BRW_SFID_URB); 997bf215546Sopenharmony_ci brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_READ_OWORD); 998bf215546Sopenharmony_ci brw_inst_set_urb_complete(devinfo, send, 1); 999bf215546Sopenharmony_ci brw_inst_set_urb_swizzle_control(devinfo, send, is_unpaired.ud ? 1000bf215546Sopenharmony_ci BRW_URB_SWIZZLE_NONE : 1001bf215546Sopenharmony_ci BRW_URB_SWIZZLE_INTERLEAVE); 1002bf215546Sopenharmony_ci} 1003bf215546Sopenharmony_ci 1004bf215546Sopenharmony_cistatic void 1005bf215546Sopenharmony_cigenerate_tcs_thread_end(struct brw_codegen *p, vec4_instruction *inst) 1006bf215546Sopenharmony_ci{ 1007bf215546Sopenharmony_ci struct brw_reg header = brw_message_reg(inst->base_mrf); 1008bf215546Sopenharmony_ci 1009bf215546Sopenharmony_ci brw_push_insn_state(p); 1010bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 1011bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 1012bf215546Sopenharmony_ci brw_MOV(p, header, brw_imm_ud(0)); 1013bf215546Sopenharmony_ci brw_MOV(p, get_element_ud(header, 5), brw_imm_ud(WRITEMASK_X << 8)); 1014bf215546Sopenharmony_ci brw_MOV(p, get_element_ud(header, 0), 1015bf215546Sopenharmony_ci retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1016bf215546Sopenharmony_ci brw_MOV(p, brw_message_reg(inst->base_mrf + 1), brw_imm_ud(0u)); 1017bf215546Sopenharmony_ci brw_pop_insn_state(p); 1018bf215546Sopenharmony_ci 1019bf215546Sopenharmony_ci brw_urb_WRITE(p, 1020bf215546Sopenharmony_ci brw_null_reg(), /* dest */ 1021bf215546Sopenharmony_ci inst->base_mrf, /* starting mrf reg nr */ 1022bf215546Sopenharmony_ci header, 1023bf215546Sopenharmony_ci BRW_URB_WRITE_EOT | BRW_URB_WRITE_OWORD | 1024bf215546Sopenharmony_ci BRW_URB_WRITE_USE_CHANNEL_MASKS, 1025bf215546Sopenharmony_ci inst->mlen, 1026bf215546Sopenharmony_ci 0, /* response len */ 1027bf215546Sopenharmony_ci 0, /* urb destination offset */ 1028bf215546Sopenharmony_ci 0); 1029bf215546Sopenharmony_ci} 1030bf215546Sopenharmony_ci 1031bf215546Sopenharmony_cistatic void 1032bf215546Sopenharmony_cigenerate_tes_get_primitive_id(struct brw_codegen *p, struct brw_reg dst) 1033bf215546Sopenharmony_ci{ 1034bf215546Sopenharmony_ci brw_push_insn_state(p); 1035bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 1036bf215546Sopenharmony_ci brw_MOV(p, dst, retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_D)); 1037bf215546Sopenharmony_ci brw_pop_insn_state(p); 1038bf215546Sopenharmony_ci} 1039bf215546Sopenharmony_ci 1040bf215546Sopenharmony_cistatic void 1041bf215546Sopenharmony_cigenerate_tcs_get_primitive_id(struct brw_codegen *p, struct brw_reg dst) 1042bf215546Sopenharmony_ci{ 1043bf215546Sopenharmony_ci brw_push_insn_state(p); 1044bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 1045bf215546Sopenharmony_ci brw_MOV(p, dst, retype(brw_vec1_grf(0, 1), BRW_REGISTER_TYPE_UD)); 1046bf215546Sopenharmony_ci brw_pop_insn_state(p); 1047bf215546Sopenharmony_ci} 1048bf215546Sopenharmony_ci 1049bf215546Sopenharmony_cistatic void 1050bf215546Sopenharmony_cigenerate_tcs_create_barrier_header(struct brw_codegen *p, 1051bf215546Sopenharmony_ci struct brw_vue_prog_data *prog_data, 1052bf215546Sopenharmony_ci struct brw_reg dst) 1053bf215546Sopenharmony_ci{ 1054bf215546Sopenharmony_ci const struct intel_device_info *devinfo = p->devinfo; 1055bf215546Sopenharmony_ci const bool ivb = devinfo->platform == INTEL_PLATFORM_IVB || 1056bf215546Sopenharmony_ci devinfo->platform == INTEL_PLATFORM_BYT; 1057bf215546Sopenharmony_ci struct brw_reg m0_2 = get_element_ud(dst, 2); 1058bf215546Sopenharmony_ci unsigned instances = ((struct brw_tcs_prog_data *) prog_data)->instances; 1059bf215546Sopenharmony_ci 1060bf215546Sopenharmony_ci brw_push_insn_state(p); 1061bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 1062bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 1063bf215546Sopenharmony_ci 1064bf215546Sopenharmony_ci /* Zero the message header */ 1065bf215546Sopenharmony_ci brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u)); 1066bf215546Sopenharmony_ci 1067bf215546Sopenharmony_ci /* Copy "Barrier ID" from r0.2, bits 16:13 (Gfx7.5+) or 15:12 (Gfx7) */ 1068bf215546Sopenharmony_ci brw_AND(p, m0_2, 1069bf215546Sopenharmony_ci retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), 1070bf215546Sopenharmony_ci brw_imm_ud(ivb ? INTEL_MASK(15, 12) : INTEL_MASK(16, 13))); 1071bf215546Sopenharmony_ci 1072bf215546Sopenharmony_ci /* Shift it up to bits 27:24. */ 1073bf215546Sopenharmony_ci brw_SHL(p, m0_2, get_element_ud(dst, 2), brw_imm_ud(ivb ? 12 : 11)); 1074bf215546Sopenharmony_ci 1075bf215546Sopenharmony_ci /* Set the Barrier Count and the enable bit */ 1076bf215546Sopenharmony_ci brw_OR(p, m0_2, m0_2, brw_imm_ud(instances << 9 | (1 << 15))); 1077bf215546Sopenharmony_ci 1078bf215546Sopenharmony_ci brw_pop_insn_state(p); 1079bf215546Sopenharmony_ci} 1080bf215546Sopenharmony_ci 1081bf215546Sopenharmony_cistatic void 1082bf215546Sopenharmony_cigenerate_oword_dual_block_offsets(struct brw_codegen *p, 1083bf215546Sopenharmony_ci struct brw_reg m1, 1084bf215546Sopenharmony_ci struct brw_reg index) 1085bf215546Sopenharmony_ci{ 1086bf215546Sopenharmony_ci int second_vertex_offset; 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_ci if (p->devinfo->ver >= 6) 1089bf215546Sopenharmony_ci second_vertex_offset = 1; 1090bf215546Sopenharmony_ci else 1091bf215546Sopenharmony_ci second_vertex_offset = 16; 1092bf215546Sopenharmony_ci 1093bf215546Sopenharmony_ci m1 = retype(m1, BRW_REGISTER_TYPE_D); 1094bf215546Sopenharmony_ci 1095bf215546Sopenharmony_ci /* Set up M1 (message payload). Only the block offsets in M1.0 and 1096bf215546Sopenharmony_ci * M1.4 are used, and the rest are ignored. 1097bf215546Sopenharmony_ci */ 1098bf215546Sopenharmony_ci struct brw_reg m1_0 = suboffset(vec1(m1), 0); 1099bf215546Sopenharmony_ci struct brw_reg m1_4 = suboffset(vec1(m1), 4); 1100bf215546Sopenharmony_ci struct brw_reg index_0 = suboffset(vec1(index), 0); 1101bf215546Sopenharmony_ci struct brw_reg index_4 = suboffset(vec1(index), 4); 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci brw_push_insn_state(p); 1104bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 1105bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 1106bf215546Sopenharmony_ci 1107bf215546Sopenharmony_ci brw_MOV(p, m1_0, index_0); 1108bf215546Sopenharmony_ci 1109bf215546Sopenharmony_ci if (index.file == BRW_IMMEDIATE_VALUE) { 1110bf215546Sopenharmony_ci index_4.ud += second_vertex_offset; 1111bf215546Sopenharmony_ci brw_MOV(p, m1_4, index_4); 1112bf215546Sopenharmony_ci } else { 1113bf215546Sopenharmony_ci brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset)); 1114bf215546Sopenharmony_ci } 1115bf215546Sopenharmony_ci 1116bf215546Sopenharmony_ci brw_pop_insn_state(p); 1117bf215546Sopenharmony_ci} 1118bf215546Sopenharmony_ci 1119bf215546Sopenharmony_cistatic void 1120bf215546Sopenharmony_cigenerate_unpack_flags(struct brw_codegen *p, 1121bf215546Sopenharmony_ci struct brw_reg dst) 1122bf215546Sopenharmony_ci{ 1123bf215546Sopenharmony_ci brw_push_insn_state(p); 1124bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 1125bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 1126bf215546Sopenharmony_ci 1127bf215546Sopenharmony_ci struct brw_reg flags = brw_flag_reg(0, 0); 1128bf215546Sopenharmony_ci struct brw_reg dst_0 = suboffset(vec1(dst), 0); 1129bf215546Sopenharmony_ci struct brw_reg dst_4 = suboffset(vec1(dst), 4); 1130bf215546Sopenharmony_ci 1131bf215546Sopenharmony_ci brw_AND(p, dst_0, flags, brw_imm_ud(0x0f)); 1132bf215546Sopenharmony_ci brw_AND(p, dst_4, flags, brw_imm_ud(0xf0)); 1133bf215546Sopenharmony_ci brw_SHR(p, dst_4, dst_4, brw_imm_ud(4)); 1134bf215546Sopenharmony_ci 1135bf215546Sopenharmony_ci brw_pop_insn_state(p); 1136bf215546Sopenharmony_ci} 1137bf215546Sopenharmony_ci 1138bf215546Sopenharmony_cistatic void 1139bf215546Sopenharmony_cigenerate_scratch_read(struct brw_codegen *p, 1140bf215546Sopenharmony_ci vec4_instruction *inst, 1141bf215546Sopenharmony_ci struct brw_reg dst, 1142bf215546Sopenharmony_ci struct brw_reg index) 1143bf215546Sopenharmony_ci{ 1144bf215546Sopenharmony_ci const struct intel_device_info *devinfo = p->devinfo; 1145bf215546Sopenharmony_ci struct brw_reg header = brw_vec8_grf(0, 0); 1146bf215546Sopenharmony_ci 1147bf215546Sopenharmony_ci gfx6_resolve_implied_move(p, &header, inst->base_mrf); 1148bf215546Sopenharmony_ci 1149bf215546Sopenharmony_ci generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1), 1150bf215546Sopenharmony_ci index); 1151bf215546Sopenharmony_ci 1152bf215546Sopenharmony_ci uint32_t msg_type; 1153bf215546Sopenharmony_ci 1154bf215546Sopenharmony_ci if (devinfo->ver >= 6) 1155bf215546Sopenharmony_ci msg_type = GFX6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1156bf215546Sopenharmony_ci else if (devinfo->verx10 >= 45) 1157bf215546Sopenharmony_ci msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1158bf215546Sopenharmony_ci else 1159bf215546Sopenharmony_ci msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1160bf215546Sopenharmony_ci 1161bf215546Sopenharmony_ci const unsigned target_cache = 1162bf215546Sopenharmony_ci devinfo->ver >= 7 ? GFX7_SFID_DATAPORT_DATA_CACHE : 1163bf215546Sopenharmony_ci devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE : 1164bf215546Sopenharmony_ci BRW_SFID_DATAPORT_READ; 1165bf215546Sopenharmony_ci 1166bf215546Sopenharmony_ci /* Each of the 8 channel enables is considered for whether each 1167bf215546Sopenharmony_ci * dword is written. 1168bf215546Sopenharmony_ci */ 1169bf215546Sopenharmony_ci brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); 1170bf215546Sopenharmony_ci brw_inst_set_sfid(devinfo, send, target_cache); 1171bf215546Sopenharmony_ci brw_set_dest(p, send, dst); 1172bf215546Sopenharmony_ci brw_set_src0(p, send, header); 1173bf215546Sopenharmony_ci if (devinfo->ver < 6) 1174bf215546Sopenharmony_ci brw_inst_set_cond_modifier(devinfo, send, inst->base_mrf); 1175bf215546Sopenharmony_ci brw_set_desc(p, send, 1176bf215546Sopenharmony_ci brw_message_desc(devinfo, 2, 1, true) | 1177bf215546Sopenharmony_ci brw_dp_read_desc(devinfo, 1178bf215546Sopenharmony_ci brw_scratch_surface_idx(p), 1179bf215546Sopenharmony_ci BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 1180bf215546Sopenharmony_ci msg_type, BRW_DATAPORT_READ_TARGET_RENDER_CACHE)); 1181bf215546Sopenharmony_ci} 1182bf215546Sopenharmony_ci 1183bf215546Sopenharmony_cistatic void 1184bf215546Sopenharmony_cigenerate_scratch_write(struct brw_codegen *p, 1185bf215546Sopenharmony_ci vec4_instruction *inst, 1186bf215546Sopenharmony_ci struct brw_reg dst, 1187bf215546Sopenharmony_ci struct brw_reg src, 1188bf215546Sopenharmony_ci struct brw_reg index) 1189bf215546Sopenharmony_ci{ 1190bf215546Sopenharmony_ci const struct intel_device_info *devinfo = p->devinfo; 1191bf215546Sopenharmony_ci const unsigned target_cache = 1192bf215546Sopenharmony_ci (devinfo->ver >= 7 ? GFX7_SFID_DATAPORT_DATA_CACHE : 1193bf215546Sopenharmony_ci devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE : 1194bf215546Sopenharmony_ci BRW_SFID_DATAPORT_WRITE); 1195bf215546Sopenharmony_ci struct brw_reg header = brw_vec8_grf(0, 0); 1196bf215546Sopenharmony_ci bool write_commit; 1197bf215546Sopenharmony_ci 1198bf215546Sopenharmony_ci /* If the instruction is predicated, we'll predicate the send, not 1199bf215546Sopenharmony_ci * the header setup. 1200bf215546Sopenharmony_ci */ 1201bf215546Sopenharmony_ci brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 1202bf215546Sopenharmony_ci 1203bf215546Sopenharmony_ci gfx6_resolve_implied_move(p, &header, inst->base_mrf); 1204bf215546Sopenharmony_ci 1205bf215546Sopenharmony_ci generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1), 1206bf215546Sopenharmony_ci index); 1207bf215546Sopenharmony_ci 1208bf215546Sopenharmony_ci brw_MOV(p, 1209bf215546Sopenharmony_ci retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D), 1210bf215546Sopenharmony_ci retype(src, BRW_REGISTER_TYPE_D)); 1211bf215546Sopenharmony_ci 1212bf215546Sopenharmony_ci uint32_t msg_type; 1213bf215546Sopenharmony_ci 1214bf215546Sopenharmony_ci if (devinfo->ver >= 7) 1215bf215546Sopenharmony_ci msg_type = GFX7_DATAPORT_DC_OWORD_DUAL_BLOCK_WRITE; 1216bf215546Sopenharmony_ci else if (devinfo->ver == 6) 1217bf215546Sopenharmony_ci msg_type = GFX6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; 1218bf215546Sopenharmony_ci else 1219bf215546Sopenharmony_ci msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; 1220bf215546Sopenharmony_ci 1221bf215546Sopenharmony_ci brw_set_default_predicate_control(p, inst->predicate); 1222bf215546Sopenharmony_ci 1223bf215546Sopenharmony_ci /* Pre-gfx6, we have to specify write commits to ensure ordering 1224bf215546Sopenharmony_ci * between reads and writes within a thread. Afterwards, that's 1225bf215546Sopenharmony_ci * guaranteed and write commits only matter for inter-thread 1226bf215546Sopenharmony_ci * synchronization. 1227bf215546Sopenharmony_ci */ 1228bf215546Sopenharmony_ci if (devinfo->ver >= 6) { 1229bf215546Sopenharmony_ci write_commit = false; 1230bf215546Sopenharmony_ci } else { 1231bf215546Sopenharmony_ci /* The visitor set up our destination register to be g0. This 1232bf215546Sopenharmony_ci * means that when the next read comes along, we will end up 1233bf215546Sopenharmony_ci * reading from g0 and causing a block on the write commit. For 1234bf215546Sopenharmony_ci * write-after-read, we are relying on the value of the previous 1235bf215546Sopenharmony_ci * read being used (and thus blocking on completion) before our 1236bf215546Sopenharmony_ci * write is executed. This means we have to be careful in 1237bf215546Sopenharmony_ci * instruction scheduling to not violate this assumption. 1238bf215546Sopenharmony_ci */ 1239bf215546Sopenharmony_ci write_commit = true; 1240bf215546Sopenharmony_ci } 1241bf215546Sopenharmony_ci 1242bf215546Sopenharmony_ci /* Each of the 8 channel enables is considered for whether each 1243bf215546Sopenharmony_ci * dword is written. 1244bf215546Sopenharmony_ci */ 1245bf215546Sopenharmony_ci brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); 1246bf215546Sopenharmony_ci brw_inst_set_sfid(p->devinfo, send, target_cache); 1247bf215546Sopenharmony_ci brw_set_dest(p, send, dst); 1248bf215546Sopenharmony_ci brw_set_src0(p, send, header); 1249bf215546Sopenharmony_ci if (devinfo->ver < 6) 1250bf215546Sopenharmony_ci brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf); 1251bf215546Sopenharmony_ci brw_set_desc(p, send, 1252bf215546Sopenharmony_ci brw_message_desc(devinfo, 3, write_commit, true) | 1253bf215546Sopenharmony_ci brw_dp_write_desc(devinfo, 1254bf215546Sopenharmony_ci brw_scratch_surface_idx(p), 1255bf215546Sopenharmony_ci BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 1256bf215546Sopenharmony_ci msg_type, 1257bf215546Sopenharmony_ci write_commit)); 1258bf215546Sopenharmony_ci} 1259bf215546Sopenharmony_ci 1260bf215546Sopenharmony_cistatic void 1261bf215546Sopenharmony_cigenerate_pull_constant_load(struct brw_codegen *p, 1262bf215546Sopenharmony_ci vec4_instruction *inst, 1263bf215546Sopenharmony_ci struct brw_reg dst, 1264bf215546Sopenharmony_ci struct brw_reg index, 1265bf215546Sopenharmony_ci struct brw_reg offset) 1266bf215546Sopenharmony_ci{ 1267bf215546Sopenharmony_ci const struct intel_device_info *devinfo = p->devinfo; 1268bf215546Sopenharmony_ci const unsigned target_cache = 1269bf215546Sopenharmony_ci (devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_SAMPLER_CACHE : 1270bf215546Sopenharmony_ci BRW_SFID_DATAPORT_READ); 1271bf215546Sopenharmony_ci assert(index.file == BRW_IMMEDIATE_VALUE && 1272bf215546Sopenharmony_ci index.type == BRW_REGISTER_TYPE_UD); 1273bf215546Sopenharmony_ci uint32_t surf_index = index.ud; 1274bf215546Sopenharmony_ci 1275bf215546Sopenharmony_ci struct brw_reg header = brw_vec8_grf(0, 0); 1276bf215546Sopenharmony_ci 1277bf215546Sopenharmony_ci gfx6_resolve_implied_move(p, &header, inst->base_mrf); 1278bf215546Sopenharmony_ci 1279bf215546Sopenharmony_ci if (devinfo->ver >= 6) { 1280bf215546Sopenharmony_ci if (offset.file == BRW_IMMEDIATE_VALUE) { 1281bf215546Sopenharmony_ci brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), 1282bf215546Sopenharmony_ci BRW_REGISTER_TYPE_D), 1283bf215546Sopenharmony_ci brw_imm_d(offset.ud >> 4)); 1284bf215546Sopenharmony_ci } else { 1285bf215546Sopenharmony_ci brw_SHR(p, retype(brw_message_reg(inst->base_mrf + 1), 1286bf215546Sopenharmony_ci BRW_REGISTER_TYPE_D), 1287bf215546Sopenharmony_ci offset, brw_imm_d(4)); 1288bf215546Sopenharmony_ci } 1289bf215546Sopenharmony_ci } else { 1290bf215546Sopenharmony_ci brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), 1291bf215546Sopenharmony_ci BRW_REGISTER_TYPE_D), 1292bf215546Sopenharmony_ci offset); 1293bf215546Sopenharmony_ci } 1294bf215546Sopenharmony_ci 1295bf215546Sopenharmony_ci uint32_t msg_type; 1296bf215546Sopenharmony_ci 1297bf215546Sopenharmony_ci if (devinfo->ver >= 6) 1298bf215546Sopenharmony_ci msg_type = GFX6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1299bf215546Sopenharmony_ci else if (devinfo->verx10 >= 45) 1300bf215546Sopenharmony_ci msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1301bf215546Sopenharmony_ci else 1302bf215546Sopenharmony_ci msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1303bf215546Sopenharmony_ci 1304bf215546Sopenharmony_ci /* Each of the 8 channel enables is considered for whether each 1305bf215546Sopenharmony_ci * dword is written. 1306bf215546Sopenharmony_ci */ 1307bf215546Sopenharmony_ci brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); 1308bf215546Sopenharmony_ci brw_inst_set_sfid(devinfo, send, target_cache); 1309bf215546Sopenharmony_ci brw_set_dest(p, send, dst); 1310bf215546Sopenharmony_ci brw_set_src0(p, send, header); 1311bf215546Sopenharmony_ci if (devinfo->ver < 6) 1312bf215546Sopenharmony_ci brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf); 1313bf215546Sopenharmony_ci brw_set_desc(p, send, 1314bf215546Sopenharmony_ci brw_message_desc(devinfo, 2, 1, true) | 1315bf215546Sopenharmony_ci brw_dp_read_desc(devinfo, surf_index, 1316bf215546Sopenharmony_ci BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 1317bf215546Sopenharmony_ci msg_type, 1318bf215546Sopenharmony_ci BRW_DATAPORT_READ_TARGET_DATA_CACHE)); 1319bf215546Sopenharmony_ci} 1320bf215546Sopenharmony_ci 1321bf215546Sopenharmony_cistatic void 1322bf215546Sopenharmony_cigenerate_get_buffer_size(struct brw_codegen *p, 1323bf215546Sopenharmony_ci vec4_instruction *inst, 1324bf215546Sopenharmony_ci struct brw_reg dst, 1325bf215546Sopenharmony_ci struct brw_reg src, 1326bf215546Sopenharmony_ci struct brw_reg surf_index) 1327bf215546Sopenharmony_ci{ 1328bf215546Sopenharmony_ci assert(p->devinfo->ver >= 7); 1329bf215546Sopenharmony_ci assert(surf_index.type == BRW_REGISTER_TYPE_UD && 1330bf215546Sopenharmony_ci surf_index.file == BRW_IMMEDIATE_VALUE); 1331bf215546Sopenharmony_ci 1332bf215546Sopenharmony_ci brw_SAMPLE(p, 1333bf215546Sopenharmony_ci dst, 1334bf215546Sopenharmony_ci inst->base_mrf, 1335bf215546Sopenharmony_ci src, 1336bf215546Sopenharmony_ci surf_index.ud, 1337bf215546Sopenharmony_ci 0, 1338bf215546Sopenharmony_ci GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO, 1339bf215546Sopenharmony_ci 1, /* response length */ 1340bf215546Sopenharmony_ci inst->mlen, 1341bf215546Sopenharmony_ci inst->header_size > 0, 1342bf215546Sopenharmony_ci BRW_SAMPLER_SIMD_MODE_SIMD4X2, 1343bf215546Sopenharmony_ci BRW_SAMPLER_RETURN_FORMAT_SINT32); 1344bf215546Sopenharmony_ci} 1345bf215546Sopenharmony_ci 1346bf215546Sopenharmony_cistatic void 1347bf215546Sopenharmony_cigenerate_pull_constant_load_gfx7(struct brw_codegen *p, 1348bf215546Sopenharmony_ci vec4_instruction *inst, 1349bf215546Sopenharmony_ci struct brw_reg dst, 1350bf215546Sopenharmony_ci struct brw_reg surf_index, 1351bf215546Sopenharmony_ci struct brw_reg offset) 1352bf215546Sopenharmony_ci{ 1353bf215546Sopenharmony_ci const struct intel_device_info *devinfo = p->devinfo; 1354bf215546Sopenharmony_ci assert(surf_index.type == BRW_REGISTER_TYPE_UD); 1355bf215546Sopenharmony_ci 1356bf215546Sopenharmony_ci if (surf_index.file == BRW_IMMEDIATE_VALUE) { 1357bf215546Sopenharmony_ci 1358bf215546Sopenharmony_ci brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND); 1359bf215546Sopenharmony_ci brw_inst_set_sfid(devinfo, insn, BRW_SFID_SAMPLER); 1360bf215546Sopenharmony_ci brw_set_dest(p, insn, dst); 1361bf215546Sopenharmony_ci brw_set_src0(p, insn, offset); 1362bf215546Sopenharmony_ci brw_set_desc(p, insn, 1363bf215546Sopenharmony_ci brw_message_desc(devinfo, inst->mlen, 1, inst->header_size) | 1364bf215546Sopenharmony_ci brw_sampler_desc(devinfo, surf_index.ud, 1365bf215546Sopenharmony_ci 0, /* LD message ignores sampler unit */ 1366bf215546Sopenharmony_ci GFX5_SAMPLER_MESSAGE_SAMPLE_LD, 1367bf215546Sopenharmony_ci BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0)); 1368bf215546Sopenharmony_ci } else { 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_ci struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); 1371bf215546Sopenharmony_ci 1372bf215546Sopenharmony_ci brw_push_insn_state(p); 1373bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 1374bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 1375bf215546Sopenharmony_ci 1376bf215546Sopenharmony_ci /* a0.0 = surf_index & 0xff */ 1377bf215546Sopenharmony_ci brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND); 1378bf215546Sopenharmony_ci brw_inst_set_exec_size(devinfo, insn_and, BRW_EXECUTE_1); 1379bf215546Sopenharmony_ci brw_set_dest(p, insn_and, addr); 1380bf215546Sopenharmony_ci brw_set_src0(p, insn_and, vec1(retype(surf_index, BRW_REGISTER_TYPE_UD))); 1381bf215546Sopenharmony_ci brw_set_src1(p, insn_and, brw_imm_ud(0x0ff)); 1382bf215546Sopenharmony_ci 1383bf215546Sopenharmony_ci brw_pop_insn_state(p); 1384bf215546Sopenharmony_ci 1385bf215546Sopenharmony_ci /* dst = send(offset, a0.0 | <descriptor>) */ 1386bf215546Sopenharmony_ci brw_send_indirect_message( 1387bf215546Sopenharmony_ci p, BRW_SFID_SAMPLER, dst, offset, addr, 1388bf215546Sopenharmony_ci brw_message_desc(devinfo, inst->mlen, 1, inst->header_size) | 1389bf215546Sopenharmony_ci brw_sampler_desc(devinfo, 1390bf215546Sopenharmony_ci 0 /* surface */, 1391bf215546Sopenharmony_ci 0 /* sampler */, 1392bf215546Sopenharmony_ci GFX5_SAMPLER_MESSAGE_SAMPLE_LD, 1393bf215546Sopenharmony_ci BRW_SAMPLER_SIMD_MODE_SIMD4X2, 1394bf215546Sopenharmony_ci 0), 1395bf215546Sopenharmony_ci false /* EOT */); 1396bf215546Sopenharmony_ci } 1397bf215546Sopenharmony_ci} 1398bf215546Sopenharmony_ci 1399bf215546Sopenharmony_cistatic void 1400bf215546Sopenharmony_cigenerate_mov_indirect(struct brw_codegen *p, 1401bf215546Sopenharmony_ci vec4_instruction *, 1402bf215546Sopenharmony_ci struct brw_reg dst, struct brw_reg reg, 1403bf215546Sopenharmony_ci struct brw_reg indirect) 1404bf215546Sopenharmony_ci{ 1405bf215546Sopenharmony_ci assert(indirect.type == BRW_REGISTER_TYPE_UD); 1406bf215546Sopenharmony_ci assert(p->devinfo->ver >= 6); 1407bf215546Sopenharmony_ci 1408bf215546Sopenharmony_ci unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr * (REG_SIZE / 2); 1409bf215546Sopenharmony_ci 1410bf215546Sopenharmony_ci /* This instruction acts in align1 mode */ 1411bf215546Sopenharmony_ci assert(dst.writemask == WRITEMASK_XYZW); 1412bf215546Sopenharmony_ci 1413bf215546Sopenharmony_ci if (indirect.file == BRW_IMMEDIATE_VALUE) { 1414bf215546Sopenharmony_ci imm_byte_offset += indirect.ud; 1415bf215546Sopenharmony_ci 1416bf215546Sopenharmony_ci reg.nr = imm_byte_offset / REG_SIZE; 1417bf215546Sopenharmony_ci reg.subnr = (imm_byte_offset / (REG_SIZE / 2)) % 2; 1418bf215546Sopenharmony_ci unsigned shift = (imm_byte_offset / 4) % 4; 1419bf215546Sopenharmony_ci reg.swizzle += BRW_SWIZZLE4(shift, shift, shift, shift); 1420bf215546Sopenharmony_ci 1421bf215546Sopenharmony_ci brw_MOV(p, dst, reg); 1422bf215546Sopenharmony_ci } else { 1423bf215546Sopenharmony_ci brw_push_insn_state(p); 1424bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 1425bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 1426bf215546Sopenharmony_ci 1427bf215546Sopenharmony_ci struct brw_reg addr = vec8(brw_address_reg(0)); 1428bf215546Sopenharmony_ci 1429bf215546Sopenharmony_ci /* We need to move the indirect value into the address register. In 1430bf215546Sopenharmony_ci * order to make things make some sense, we want to respect at least the 1431bf215546Sopenharmony_ci * X component of the swizzle. In order to do that, we need to convert 1432bf215546Sopenharmony_ci * the subnr (probably 0) to an align1 subnr and add in the swizzle. 1433bf215546Sopenharmony_ci */ 1434bf215546Sopenharmony_ci assert(brw_is_single_value_swizzle(indirect.swizzle)); 1435bf215546Sopenharmony_ci indirect.subnr = (indirect.subnr * 4 + BRW_GET_SWZ(indirect.swizzle, 0)); 1436bf215546Sopenharmony_ci 1437bf215546Sopenharmony_ci /* We then use a region of <8,4,0>:uw to pick off the first 2 bytes of 1438bf215546Sopenharmony_ci * the indirect and splat it out to all four channels of the given half 1439bf215546Sopenharmony_ci * of a0. 1440bf215546Sopenharmony_ci */ 1441bf215546Sopenharmony_ci indirect.subnr *= 2; 1442bf215546Sopenharmony_ci indirect = stride(retype(indirect, BRW_REGISTER_TYPE_UW), 8, 4, 0); 1443bf215546Sopenharmony_ci brw_ADD(p, addr, indirect, brw_imm_uw(imm_byte_offset)); 1444bf215546Sopenharmony_ci 1445bf215546Sopenharmony_ci /* Now we need to incorporate the swizzle from the source register */ 1446bf215546Sopenharmony_ci if (reg.swizzle != BRW_SWIZZLE_XXXX) { 1447bf215546Sopenharmony_ci uint32_t uv_swiz = BRW_GET_SWZ(reg.swizzle, 0) << 2 | 1448bf215546Sopenharmony_ci BRW_GET_SWZ(reg.swizzle, 1) << 6 | 1449bf215546Sopenharmony_ci BRW_GET_SWZ(reg.swizzle, 2) << 10 | 1450bf215546Sopenharmony_ci BRW_GET_SWZ(reg.swizzle, 3) << 14; 1451bf215546Sopenharmony_ci uv_swiz |= uv_swiz << 16; 1452bf215546Sopenharmony_ci 1453bf215546Sopenharmony_ci brw_ADD(p, addr, addr, brw_imm_uv(uv_swiz)); 1454bf215546Sopenharmony_ci } 1455bf215546Sopenharmony_ci 1456bf215546Sopenharmony_ci brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), reg.type)); 1457bf215546Sopenharmony_ci 1458bf215546Sopenharmony_ci brw_pop_insn_state(p); 1459bf215546Sopenharmony_ci } 1460bf215546Sopenharmony_ci} 1461bf215546Sopenharmony_ci 1462bf215546Sopenharmony_cistatic void 1463bf215546Sopenharmony_cigenerate_zero_oob_push_regs(struct brw_codegen *p, 1464bf215546Sopenharmony_ci struct brw_stage_prog_data *prog_data, 1465bf215546Sopenharmony_ci struct brw_reg scratch, 1466bf215546Sopenharmony_ci struct brw_reg bit_mask_in) 1467bf215546Sopenharmony_ci{ 1468bf215546Sopenharmony_ci const uint64_t want_zero = prog_data->zero_push_reg; 1469bf215546Sopenharmony_ci assert(want_zero); 1470bf215546Sopenharmony_ci 1471bf215546Sopenharmony_ci assert(bit_mask_in.file == BRW_GENERAL_REGISTER_FILE); 1472bf215546Sopenharmony_ci assert(BRW_GET_SWZ(bit_mask_in.swizzle, 1) == 1473bf215546Sopenharmony_ci BRW_GET_SWZ(bit_mask_in.swizzle, 0) + 1); 1474bf215546Sopenharmony_ci bit_mask_in.subnr += BRW_GET_SWZ(bit_mask_in.swizzle, 0) * 4; 1475bf215546Sopenharmony_ci bit_mask_in.type = BRW_REGISTER_TYPE_W; 1476bf215546Sopenharmony_ci 1477bf215546Sopenharmony_ci /* Scratch should be 3 registers in the GRF */ 1478bf215546Sopenharmony_ci assert(scratch.file == BRW_GENERAL_REGISTER_FILE); 1479bf215546Sopenharmony_ci scratch = vec8(scratch); 1480bf215546Sopenharmony_ci struct brw_reg mask_w16 = retype(scratch, BRW_REGISTER_TYPE_W); 1481bf215546Sopenharmony_ci struct brw_reg mask_d16 = retype(byte_offset(scratch, REG_SIZE), 1482bf215546Sopenharmony_ci BRW_REGISTER_TYPE_D); 1483bf215546Sopenharmony_ci 1484bf215546Sopenharmony_ci brw_push_insn_state(p); 1485bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 1486bf215546Sopenharmony_ci brw_set_default_mask_control(p, BRW_MASK_DISABLE); 1487bf215546Sopenharmony_ci 1488bf215546Sopenharmony_ci for (unsigned i = 0; i < 64; i++) { 1489bf215546Sopenharmony_ci if (i % 16 == 0 && (want_zero & BITFIELD64_RANGE(i, 16))) { 1490bf215546Sopenharmony_ci brw_set_default_exec_size(p, BRW_EXECUTE_8); 1491bf215546Sopenharmony_ci brw_SHL(p, suboffset(mask_w16, 8), 1492bf215546Sopenharmony_ci vec1(byte_offset(bit_mask_in, i / 8)), 1493bf215546Sopenharmony_ci brw_imm_v(0x01234567)); 1494bf215546Sopenharmony_ci brw_SHL(p, mask_w16, suboffset(mask_w16, 8), brw_imm_w(8)); 1495bf215546Sopenharmony_ci 1496bf215546Sopenharmony_ci brw_set_default_exec_size(p, BRW_EXECUTE_16); 1497bf215546Sopenharmony_ci brw_ASR(p, mask_d16, mask_w16, brw_imm_w(15)); 1498bf215546Sopenharmony_ci } 1499bf215546Sopenharmony_ci 1500bf215546Sopenharmony_ci if (want_zero & BITFIELD64_BIT(i)) { 1501bf215546Sopenharmony_ci unsigned push_start = prog_data->dispatch_grf_start_reg; 1502bf215546Sopenharmony_ci struct brw_reg push_reg = 1503bf215546Sopenharmony_ci retype(brw_vec8_grf(push_start + i, 0), BRW_REGISTER_TYPE_D); 1504bf215546Sopenharmony_ci 1505bf215546Sopenharmony_ci brw_set_default_exec_size(p, BRW_EXECUTE_8); 1506bf215546Sopenharmony_ci brw_AND(p, push_reg, push_reg, vec1(suboffset(mask_d16, i))); 1507bf215546Sopenharmony_ci } 1508bf215546Sopenharmony_ci } 1509bf215546Sopenharmony_ci 1510bf215546Sopenharmony_ci brw_pop_insn_state(p); 1511bf215546Sopenharmony_ci} 1512bf215546Sopenharmony_ci 1513bf215546Sopenharmony_cistatic void 1514bf215546Sopenharmony_cigenerate_code(struct brw_codegen *p, 1515bf215546Sopenharmony_ci const struct brw_compiler *compiler, 1516bf215546Sopenharmony_ci void *log_data, 1517bf215546Sopenharmony_ci const nir_shader *nir, 1518bf215546Sopenharmony_ci struct brw_vue_prog_data *prog_data, 1519bf215546Sopenharmony_ci const struct cfg_t *cfg, 1520bf215546Sopenharmony_ci const performance &perf, 1521bf215546Sopenharmony_ci struct brw_compile_stats *stats, 1522bf215546Sopenharmony_ci bool debug_enabled) 1523bf215546Sopenharmony_ci{ 1524bf215546Sopenharmony_ci const struct intel_device_info *devinfo = p->devinfo; 1525bf215546Sopenharmony_ci const char *stage_abbrev = _mesa_shader_stage_to_abbrev(nir->info.stage); 1526bf215546Sopenharmony_ci struct disasm_info *disasm_info = disasm_initialize(p->isa, cfg); 1527bf215546Sopenharmony_ci 1528bf215546Sopenharmony_ci /* `send_count` explicitly does not include spills or fills, as we'd 1529bf215546Sopenharmony_ci * like to use it as a metric for intentional memory access or other 1530bf215546Sopenharmony_ci * shared function use. Otherwise, subtle changes to scheduling or 1531bf215546Sopenharmony_ci * register allocation could cause it to fluctuate wildly - and that 1532bf215546Sopenharmony_ci * effect is already counted in spill/fill counts. 1533bf215546Sopenharmony_ci */ 1534bf215546Sopenharmony_ci int spill_count = 0, fill_count = 0; 1535bf215546Sopenharmony_ci int loop_count = 0, send_count = 0; 1536bf215546Sopenharmony_ci 1537bf215546Sopenharmony_ci foreach_block_and_inst (block, vec4_instruction, inst, cfg) { 1538bf215546Sopenharmony_ci struct brw_reg src[3], dst; 1539bf215546Sopenharmony_ci 1540bf215546Sopenharmony_ci if (unlikely(debug_enabled)) 1541bf215546Sopenharmony_ci disasm_annotate(disasm_info, inst, p->next_insn_offset); 1542bf215546Sopenharmony_ci 1543bf215546Sopenharmony_ci for (unsigned int i = 0; i < 3; i++) { 1544bf215546Sopenharmony_ci src[i] = inst->src[i].as_brw_reg(); 1545bf215546Sopenharmony_ci } 1546bf215546Sopenharmony_ci dst = inst->dst.as_brw_reg(); 1547bf215546Sopenharmony_ci 1548bf215546Sopenharmony_ci brw_set_default_predicate_control(p, inst->predicate); 1549bf215546Sopenharmony_ci brw_set_default_predicate_inverse(p, inst->predicate_inverse); 1550bf215546Sopenharmony_ci brw_set_default_flag_reg(p, inst->flag_subreg / 2, inst->flag_subreg % 2); 1551bf215546Sopenharmony_ci brw_set_default_saturate(p, inst->saturate); 1552bf215546Sopenharmony_ci brw_set_default_mask_control(p, inst->force_writemask_all); 1553bf215546Sopenharmony_ci brw_set_default_acc_write_control(p, inst->writes_accumulator); 1554bf215546Sopenharmony_ci 1555bf215546Sopenharmony_ci assert(inst->group % inst->exec_size == 0); 1556bf215546Sopenharmony_ci assert(inst->group % 4 == 0); 1557bf215546Sopenharmony_ci 1558bf215546Sopenharmony_ci /* There are some instructions where the destination is 64-bit 1559bf215546Sopenharmony_ci * but we retype it to a smaller type. In that case, we cannot 1560bf215546Sopenharmony_ci * double the exec_size. 1561bf215546Sopenharmony_ci */ 1562bf215546Sopenharmony_ci const bool is_df = (get_exec_type_size(inst) == 8 || 1563bf215546Sopenharmony_ci inst->dst.type == BRW_REGISTER_TYPE_DF) && 1564bf215546Sopenharmony_ci inst->opcode != VEC4_OPCODE_PICK_LOW_32BIT && 1565bf215546Sopenharmony_ci inst->opcode != VEC4_OPCODE_PICK_HIGH_32BIT && 1566bf215546Sopenharmony_ci inst->opcode != VEC4_OPCODE_SET_LOW_32BIT && 1567bf215546Sopenharmony_ci inst->opcode != VEC4_OPCODE_SET_HIGH_32BIT; 1568bf215546Sopenharmony_ci 1569bf215546Sopenharmony_ci unsigned exec_size = inst->exec_size; 1570bf215546Sopenharmony_ci if (devinfo->verx10 == 70 && is_df) 1571bf215546Sopenharmony_ci exec_size *= 2; 1572bf215546Sopenharmony_ci 1573bf215546Sopenharmony_ci brw_set_default_exec_size(p, cvt(exec_size) - 1); 1574bf215546Sopenharmony_ci 1575bf215546Sopenharmony_ci if (!inst->force_writemask_all) 1576bf215546Sopenharmony_ci brw_set_default_group(p, inst->group); 1577bf215546Sopenharmony_ci 1578bf215546Sopenharmony_ci assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->ver)); 1579bf215546Sopenharmony_ci assert(inst->mlen <= BRW_MAX_MSG_LENGTH); 1580bf215546Sopenharmony_ci 1581bf215546Sopenharmony_ci unsigned pre_emit_nr_insn = p->nr_insn; 1582bf215546Sopenharmony_ci 1583bf215546Sopenharmony_ci switch (inst->opcode) { 1584bf215546Sopenharmony_ci case VEC4_OPCODE_UNPACK_UNIFORM: 1585bf215546Sopenharmony_ci case BRW_OPCODE_MOV: 1586bf215546Sopenharmony_ci case VEC4_OPCODE_MOV_FOR_SCRATCH: 1587bf215546Sopenharmony_ci brw_MOV(p, dst, src[0]); 1588bf215546Sopenharmony_ci break; 1589bf215546Sopenharmony_ci case BRW_OPCODE_ADD: 1590bf215546Sopenharmony_ci brw_ADD(p, dst, src[0], src[1]); 1591bf215546Sopenharmony_ci break; 1592bf215546Sopenharmony_ci case BRW_OPCODE_MUL: 1593bf215546Sopenharmony_ci brw_MUL(p, dst, src[0], src[1]); 1594bf215546Sopenharmony_ci break; 1595bf215546Sopenharmony_ci case BRW_OPCODE_MACH: 1596bf215546Sopenharmony_ci brw_MACH(p, dst, src[0], src[1]); 1597bf215546Sopenharmony_ci break; 1598bf215546Sopenharmony_ci 1599bf215546Sopenharmony_ci case BRW_OPCODE_MAD: 1600bf215546Sopenharmony_ci assert(devinfo->ver >= 6); 1601bf215546Sopenharmony_ci brw_MAD(p, dst, src[0], src[1], src[2]); 1602bf215546Sopenharmony_ci break; 1603bf215546Sopenharmony_ci 1604bf215546Sopenharmony_ci case BRW_OPCODE_FRC: 1605bf215546Sopenharmony_ci brw_FRC(p, dst, src[0]); 1606bf215546Sopenharmony_ci break; 1607bf215546Sopenharmony_ci case BRW_OPCODE_RNDD: 1608bf215546Sopenharmony_ci brw_RNDD(p, dst, src[0]); 1609bf215546Sopenharmony_ci break; 1610bf215546Sopenharmony_ci case BRW_OPCODE_RNDE: 1611bf215546Sopenharmony_ci brw_RNDE(p, dst, src[0]); 1612bf215546Sopenharmony_ci break; 1613bf215546Sopenharmony_ci case BRW_OPCODE_RNDZ: 1614bf215546Sopenharmony_ci brw_RNDZ(p, dst, src[0]); 1615bf215546Sopenharmony_ci break; 1616bf215546Sopenharmony_ci 1617bf215546Sopenharmony_ci case BRW_OPCODE_AND: 1618bf215546Sopenharmony_ci brw_AND(p, dst, src[0], src[1]); 1619bf215546Sopenharmony_ci break; 1620bf215546Sopenharmony_ci case BRW_OPCODE_OR: 1621bf215546Sopenharmony_ci brw_OR(p, dst, src[0], src[1]); 1622bf215546Sopenharmony_ci break; 1623bf215546Sopenharmony_ci case BRW_OPCODE_XOR: 1624bf215546Sopenharmony_ci brw_XOR(p, dst, src[0], src[1]); 1625bf215546Sopenharmony_ci break; 1626bf215546Sopenharmony_ci case BRW_OPCODE_NOT: 1627bf215546Sopenharmony_ci brw_NOT(p, dst, src[0]); 1628bf215546Sopenharmony_ci break; 1629bf215546Sopenharmony_ci case BRW_OPCODE_ASR: 1630bf215546Sopenharmony_ci brw_ASR(p, dst, src[0], src[1]); 1631bf215546Sopenharmony_ci break; 1632bf215546Sopenharmony_ci case BRW_OPCODE_SHR: 1633bf215546Sopenharmony_ci brw_SHR(p, dst, src[0], src[1]); 1634bf215546Sopenharmony_ci break; 1635bf215546Sopenharmony_ci case BRW_OPCODE_SHL: 1636bf215546Sopenharmony_ci brw_SHL(p, dst, src[0], src[1]); 1637bf215546Sopenharmony_ci break; 1638bf215546Sopenharmony_ci 1639bf215546Sopenharmony_ci case BRW_OPCODE_CMP: 1640bf215546Sopenharmony_ci brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 1641bf215546Sopenharmony_ci break; 1642bf215546Sopenharmony_ci case BRW_OPCODE_CMPN: 1643bf215546Sopenharmony_ci brw_CMPN(p, dst, inst->conditional_mod, src[0], src[1]); 1644bf215546Sopenharmony_ci break; 1645bf215546Sopenharmony_ci case BRW_OPCODE_SEL: 1646bf215546Sopenharmony_ci brw_SEL(p, dst, src[0], src[1]); 1647bf215546Sopenharmony_ci break; 1648bf215546Sopenharmony_ci 1649bf215546Sopenharmony_ci case BRW_OPCODE_DPH: 1650bf215546Sopenharmony_ci brw_DPH(p, dst, src[0], src[1]); 1651bf215546Sopenharmony_ci break; 1652bf215546Sopenharmony_ci 1653bf215546Sopenharmony_ci case BRW_OPCODE_DP4: 1654bf215546Sopenharmony_ci brw_DP4(p, dst, src[0], src[1]); 1655bf215546Sopenharmony_ci break; 1656bf215546Sopenharmony_ci 1657bf215546Sopenharmony_ci case BRW_OPCODE_DP3: 1658bf215546Sopenharmony_ci brw_DP3(p, dst, src[0], src[1]); 1659bf215546Sopenharmony_ci break; 1660bf215546Sopenharmony_ci 1661bf215546Sopenharmony_ci case BRW_OPCODE_DP2: 1662bf215546Sopenharmony_ci brw_DP2(p, dst, src[0], src[1]); 1663bf215546Sopenharmony_ci break; 1664bf215546Sopenharmony_ci 1665bf215546Sopenharmony_ci case BRW_OPCODE_F32TO16: 1666bf215546Sopenharmony_ci assert(devinfo->ver >= 7); 1667bf215546Sopenharmony_ci brw_F32TO16(p, dst, src[0]); 1668bf215546Sopenharmony_ci break; 1669bf215546Sopenharmony_ci 1670bf215546Sopenharmony_ci case BRW_OPCODE_F16TO32: 1671bf215546Sopenharmony_ci assert(devinfo->ver >= 7); 1672bf215546Sopenharmony_ci brw_F16TO32(p, dst, src[0]); 1673bf215546Sopenharmony_ci break; 1674bf215546Sopenharmony_ci 1675bf215546Sopenharmony_ci case BRW_OPCODE_LRP: 1676bf215546Sopenharmony_ci assert(devinfo->ver >= 6); 1677bf215546Sopenharmony_ci brw_LRP(p, dst, src[0], src[1], src[2]); 1678bf215546Sopenharmony_ci break; 1679bf215546Sopenharmony_ci 1680bf215546Sopenharmony_ci case BRW_OPCODE_BFREV: 1681bf215546Sopenharmony_ci assert(devinfo->ver >= 7); 1682bf215546Sopenharmony_ci brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD), 1683bf215546Sopenharmony_ci retype(src[0], BRW_REGISTER_TYPE_UD)); 1684bf215546Sopenharmony_ci break; 1685bf215546Sopenharmony_ci case BRW_OPCODE_FBH: 1686bf215546Sopenharmony_ci assert(devinfo->ver >= 7); 1687bf215546Sopenharmony_ci brw_FBH(p, retype(dst, src[0].type), src[0]); 1688bf215546Sopenharmony_ci break; 1689bf215546Sopenharmony_ci case BRW_OPCODE_FBL: 1690bf215546Sopenharmony_ci assert(devinfo->ver >= 7); 1691bf215546Sopenharmony_ci brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), 1692bf215546Sopenharmony_ci retype(src[0], BRW_REGISTER_TYPE_UD)); 1693bf215546Sopenharmony_ci break; 1694bf215546Sopenharmony_ci case BRW_OPCODE_LZD: 1695bf215546Sopenharmony_ci brw_LZD(p, dst, src[0]); 1696bf215546Sopenharmony_ci break; 1697bf215546Sopenharmony_ci case BRW_OPCODE_CBIT: 1698bf215546Sopenharmony_ci assert(devinfo->ver >= 7); 1699bf215546Sopenharmony_ci brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), 1700bf215546Sopenharmony_ci retype(src[0], BRW_REGISTER_TYPE_UD)); 1701bf215546Sopenharmony_ci break; 1702bf215546Sopenharmony_ci case BRW_OPCODE_ADDC: 1703bf215546Sopenharmony_ci assert(devinfo->ver >= 7); 1704bf215546Sopenharmony_ci brw_ADDC(p, dst, src[0], src[1]); 1705bf215546Sopenharmony_ci break; 1706bf215546Sopenharmony_ci case BRW_OPCODE_SUBB: 1707bf215546Sopenharmony_ci assert(devinfo->ver >= 7); 1708bf215546Sopenharmony_ci brw_SUBB(p, dst, src[0], src[1]); 1709bf215546Sopenharmony_ci break; 1710bf215546Sopenharmony_ci case BRW_OPCODE_MAC: 1711bf215546Sopenharmony_ci brw_MAC(p, dst, src[0], src[1]); 1712bf215546Sopenharmony_ci break; 1713bf215546Sopenharmony_ci 1714bf215546Sopenharmony_ci case BRW_OPCODE_BFE: 1715bf215546Sopenharmony_ci assert(devinfo->ver >= 7); 1716bf215546Sopenharmony_ci brw_BFE(p, dst, src[0], src[1], src[2]); 1717bf215546Sopenharmony_ci break; 1718bf215546Sopenharmony_ci 1719bf215546Sopenharmony_ci case BRW_OPCODE_BFI1: 1720bf215546Sopenharmony_ci assert(devinfo->ver >= 7); 1721bf215546Sopenharmony_ci brw_BFI1(p, dst, src[0], src[1]); 1722bf215546Sopenharmony_ci break; 1723bf215546Sopenharmony_ci case BRW_OPCODE_BFI2: 1724bf215546Sopenharmony_ci assert(devinfo->ver >= 7); 1725bf215546Sopenharmony_ci brw_BFI2(p, dst, src[0], src[1], src[2]); 1726bf215546Sopenharmony_ci break; 1727bf215546Sopenharmony_ci 1728bf215546Sopenharmony_ci case BRW_OPCODE_IF: 1729bf215546Sopenharmony_ci if (!inst->src[0].is_null()) { 1730bf215546Sopenharmony_ci /* The instruction has an embedded compare (only allowed on gfx6) */ 1731bf215546Sopenharmony_ci assert(devinfo->ver == 6); 1732bf215546Sopenharmony_ci gfx6_IF(p, inst->conditional_mod, src[0], src[1]); 1733bf215546Sopenharmony_ci } else { 1734bf215546Sopenharmony_ci brw_inst *if_inst = brw_IF(p, BRW_EXECUTE_8); 1735bf215546Sopenharmony_ci brw_inst_set_pred_control(p->devinfo, if_inst, inst->predicate); 1736bf215546Sopenharmony_ci } 1737bf215546Sopenharmony_ci break; 1738bf215546Sopenharmony_ci 1739bf215546Sopenharmony_ci case BRW_OPCODE_ELSE: 1740bf215546Sopenharmony_ci brw_ELSE(p); 1741bf215546Sopenharmony_ci break; 1742bf215546Sopenharmony_ci case BRW_OPCODE_ENDIF: 1743bf215546Sopenharmony_ci brw_ENDIF(p); 1744bf215546Sopenharmony_ci break; 1745bf215546Sopenharmony_ci 1746bf215546Sopenharmony_ci case BRW_OPCODE_DO: 1747bf215546Sopenharmony_ci brw_DO(p, BRW_EXECUTE_8); 1748bf215546Sopenharmony_ci break; 1749bf215546Sopenharmony_ci 1750bf215546Sopenharmony_ci case BRW_OPCODE_BREAK: 1751bf215546Sopenharmony_ci brw_BREAK(p); 1752bf215546Sopenharmony_ci brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 1753bf215546Sopenharmony_ci break; 1754bf215546Sopenharmony_ci case BRW_OPCODE_CONTINUE: 1755bf215546Sopenharmony_ci brw_CONT(p); 1756bf215546Sopenharmony_ci brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 1757bf215546Sopenharmony_ci break; 1758bf215546Sopenharmony_ci 1759bf215546Sopenharmony_ci case BRW_OPCODE_WHILE: 1760bf215546Sopenharmony_ci brw_WHILE(p); 1761bf215546Sopenharmony_ci loop_count++; 1762bf215546Sopenharmony_ci break; 1763bf215546Sopenharmony_ci 1764bf215546Sopenharmony_ci case SHADER_OPCODE_RCP: 1765bf215546Sopenharmony_ci case SHADER_OPCODE_RSQ: 1766bf215546Sopenharmony_ci case SHADER_OPCODE_SQRT: 1767bf215546Sopenharmony_ci case SHADER_OPCODE_EXP2: 1768bf215546Sopenharmony_ci case SHADER_OPCODE_LOG2: 1769bf215546Sopenharmony_ci case SHADER_OPCODE_SIN: 1770bf215546Sopenharmony_ci case SHADER_OPCODE_COS: 1771bf215546Sopenharmony_ci assert(inst->conditional_mod == BRW_CONDITIONAL_NONE); 1772bf215546Sopenharmony_ci if (devinfo->ver >= 7) { 1773bf215546Sopenharmony_ci gfx6_math(p, dst, brw_math_function(inst->opcode), src[0], 1774bf215546Sopenharmony_ci brw_null_reg()); 1775bf215546Sopenharmony_ci } else if (devinfo->ver == 6) { 1776bf215546Sopenharmony_ci generate_math_gfx6(p, inst, dst, src[0], brw_null_reg()); 1777bf215546Sopenharmony_ci } else { 1778bf215546Sopenharmony_ci generate_math1_gfx4(p, inst, dst, src[0]); 1779bf215546Sopenharmony_ci send_count++; 1780bf215546Sopenharmony_ci } 1781bf215546Sopenharmony_ci break; 1782bf215546Sopenharmony_ci 1783bf215546Sopenharmony_ci case SHADER_OPCODE_POW: 1784bf215546Sopenharmony_ci case SHADER_OPCODE_INT_QUOTIENT: 1785bf215546Sopenharmony_ci case SHADER_OPCODE_INT_REMAINDER: 1786bf215546Sopenharmony_ci assert(inst->conditional_mod == BRW_CONDITIONAL_NONE); 1787bf215546Sopenharmony_ci if (devinfo->ver >= 7) { 1788bf215546Sopenharmony_ci gfx6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]); 1789bf215546Sopenharmony_ci } else if (devinfo->ver == 6) { 1790bf215546Sopenharmony_ci generate_math_gfx6(p, inst, dst, src[0], src[1]); 1791bf215546Sopenharmony_ci } else { 1792bf215546Sopenharmony_ci generate_math2_gfx4(p, inst, dst, src[0], src[1]); 1793bf215546Sopenharmony_ci send_count++; 1794bf215546Sopenharmony_ci } 1795bf215546Sopenharmony_ci break; 1796bf215546Sopenharmony_ci 1797bf215546Sopenharmony_ci case SHADER_OPCODE_TEX: 1798bf215546Sopenharmony_ci case SHADER_OPCODE_TXD: 1799bf215546Sopenharmony_ci case SHADER_OPCODE_TXF: 1800bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_CMS: 1801bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_CMS_W: 1802bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_MCS: 1803bf215546Sopenharmony_ci case SHADER_OPCODE_TXL: 1804bf215546Sopenharmony_ci case SHADER_OPCODE_TXS: 1805bf215546Sopenharmony_ci case SHADER_OPCODE_TG4: 1806bf215546Sopenharmony_ci case SHADER_OPCODE_TG4_OFFSET: 1807bf215546Sopenharmony_ci case SHADER_OPCODE_SAMPLEINFO: 1808bf215546Sopenharmony_ci generate_tex(p, prog_data, nir->info.stage, 1809bf215546Sopenharmony_ci inst, dst, src[0], src[1], src[2]); 1810bf215546Sopenharmony_ci send_count++; 1811bf215546Sopenharmony_ci break; 1812bf215546Sopenharmony_ci 1813bf215546Sopenharmony_ci case SHADER_OPCODE_GET_BUFFER_SIZE: 1814bf215546Sopenharmony_ci generate_get_buffer_size(p, inst, dst, src[0], src[1]); 1815bf215546Sopenharmony_ci send_count++; 1816bf215546Sopenharmony_ci break; 1817bf215546Sopenharmony_ci 1818bf215546Sopenharmony_ci case VEC4_VS_OPCODE_URB_WRITE: 1819bf215546Sopenharmony_ci generate_vs_urb_write(p, inst); 1820bf215546Sopenharmony_ci send_count++; 1821bf215546Sopenharmony_ci break; 1822bf215546Sopenharmony_ci 1823bf215546Sopenharmony_ci case SHADER_OPCODE_GFX4_SCRATCH_READ: 1824bf215546Sopenharmony_ci generate_scratch_read(p, inst, dst, src[0]); 1825bf215546Sopenharmony_ci fill_count++; 1826bf215546Sopenharmony_ci break; 1827bf215546Sopenharmony_ci 1828bf215546Sopenharmony_ci case SHADER_OPCODE_GFX4_SCRATCH_WRITE: 1829bf215546Sopenharmony_ci generate_scratch_write(p, inst, dst, src[0], src[1]); 1830bf215546Sopenharmony_ci spill_count++; 1831bf215546Sopenharmony_ci break; 1832bf215546Sopenharmony_ci 1833bf215546Sopenharmony_ci case VS_OPCODE_PULL_CONSTANT_LOAD: 1834bf215546Sopenharmony_ci generate_pull_constant_load(p, inst, dst, src[0], src[1]); 1835bf215546Sopenharmony_ci send_count++; 1836bf215546Sopenharmony_ci break; 1837bf215546Sopenharmony_ci 1838bf215546Sopenharmony_ci case VS_OPCODE_PULL_CONSTANT_LOAD_GFX7: 1839bf215546Sopenharmony_ci generate_pull_constant_load_gfx7(p, inst, dst, src[0], src[1]); 1840bf215546Sopenharmony_ci send_count++; 1841bf215546Sopenharmony_ci break; 1842bf215546Sopenharmony_ci 1843bf215546Sopenharmony_ci case VEC4_GS_OPCODE_URB_WRITE: 1844bf215546Sopenharmony_ci generate_gs_urb_write(p, inst); 1845bf215546Sopenharmony_ci send_count++; 1846bf215546Sopenharmony_ci break; 1847bf215546Sopenharmony_ci 1848bf215546Sopenharmony_ci case VEC4_GS_OPCODE_URB_WRITE_ALLOCATE: 1849bf215546Sopenharmony_ci generate_gs_urb_write_allocate(p, inst); 1850bf215546Sopenharmony_ci send_count++; 1851bf215546Sopenharmony_ci break; 1852bf215546Sopenharmony_ci 1853bf215546Sopenharmony_ci case GS_OPCODE_SVB_WRITE: 1854bf215546Sopenharmony_ci generate_gs_svb_write(p, inst, dst, src[0], src[1]); 1855bf215546Sopenharmony_ci send_count++; 1856bf215546Sopenharmony_ci break; 1857bf215546Sopenharmony_ci 1858bf215546Sopenharmony_ci case GS_OPCODE_SVB_SET_DST_INDEX: 1859bf215546Sopenharmony_ci generate_gs_svb_set_destination_index(p, inst, dst, src[0]); 1860bf215546Sopenharmony_ci break; 1861bf215546Sopenharmony_ci 1862bf215546Sopenharmony_ci case GS_OPCODE_THREAD_END: 1863bf215546Sopenharmony_ci generate_gs_thread_end(p, inst); 1864bf215546Sopenharmony_ci send_count++; 1865bf215546Sopenharmony_ci break; 1866bf215546Sopenharmony_ci 1867bf215546Sopenharmony_ci case GS_OPCODE_SET_WRITE_OFFSET: 1868bf215546Sopenharmony_ci generate_gs_set_write_offset(p, dst, src[0], src[1]); 1869bf215546Sopenharmony_ci break; 1870bf215546Sopenharmony_ci 1871bf215546Sopenharmony_ci case GS_OPCODE_SET_VERTEX_COUNT: 1872bf215546Sopenharmony_ci generate_gs_set_vertex_count(p, dst, src[0]); 1873bf215546Sopenharmony_ci break; 1874bf215546Sopenharmony_ci 1875bf215546Sopenharmony_ci case GS_OPCODE_FF_SYNC: 1876bf215546Sopenharmony_ci generate_gs_ff_sync(p, inst, dst, src[0], src[1]); 1877bf215546Sopenharmony_ci send_count++; 1878bf215546Sopenharmony_ci break; 1879bf215546Sopenharmony_ci 1880bf215546Sopenharmony_ci case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: 1881bf215546Sopenharmony_ci generate_gs_ff_sync_set_primitives(p, dst, src[0], src[1], src[2]); 1882bf215546Sopenharmony_ci break; 1883bf215546Sopenharmony_ci 1884bf215546Sopenharmony_ci case GS_OPCODE_SET_PRIMITIVE_ID: 1885bf215546Sopenharmony_ci generate_gs_set_primitive_id(p, dst); 1886bf215546Sopenharmony_ci break; 1887bf215546Sopenharmony_ci 1888bf215546Sopenharmony_ci case GS_OPCODE_SET_DWORD_2: 1889bf215546Sopenharmony_ci generate_gs_set_dword_2(p, dst, src[0]); 1890bf215546Sopenharmony_ci break; 1891bf215546Sopenharmony_ci 1892bf215546Sopenharmony_ci case GS_OPCODE_PREPARE_CHANNEL_MASKS: 1893bf215546Sopenharmony_ci generate_gs_prepare_channel_masks(p, dst); 1894bf215546Sopenharmony_ci break; 1895bf215546Sopenharmony_ci 1896bf215546Sopenharmony_ci case GS_OPCODE_SET_CHANNEL_MASKS: 1897bf215546Sopenharmony_ci generate_gs_set_channel_masks(p, dst, src[0]); 1898bf215546Sopenharmony_ci break; 1899bf215546Sopenharmony_ci 1900bf215546Sopenharmony_ci case GS_OPCODE_GET_INSTANCE_ID: 1901bf215546Sopenharmony_ci generate_gs_get_instance_id(p, dst); 1902bf215546Sopenharmony_ci break; 1903bf215546Sopenharmony_ci 1904bf215546Sopenharmony_ci case VEC4_OPCODE_UNTYPED_ATOMIC: 1905bf215546Sopenharmony_ci assert(src[2].file == BRW_IMMEDIATE_VALUE); 1906bf215546Sopenharmony_ci brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen, 1907bf215546Sopenharmony_ci !inst->dst.is_null(), inst->header_size); 1908bf215546Sopenharmony_ci send_count++; 1909bf215546Sopenharmony_ci break; 1910bf215546Sopenharmony_ci 1911bf215546Sopenharmony_ci case VEC4_OPCODE_UNTYPED_SURFACE_READ: 1912bf215546Sopenharmony_ci assert(!inst->header_size); 1913bf215546Sopenharmony_ci assert(src[2].file == BRW_IMMEDIATE_VALUE); 1914bf215546Sopenharmony_ci brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen, 1915bf215546Sopenharmony_ci src[2].ud); 1916bf215546Sopenharmony_ci send_count++; 1917bf215546Sopenharmony_ci break; 1918bf215546Sopenharmony_ci 1919bf215546Sopenharmony_ci case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: 1920bf215546Sopenharmony_ci assert(src[2].file == BRW_IMMEDIATE_VALUE); 1921bf215546Sopenharmony_ci brw_untyped_surface_write(p, src[0], src[1], inst->mlen, 1922bf215546Sopenharmony_ci src[2].ud, inst->header_size); 1923bf215546Sopenharmony_ci send_count++; 1924bf215546Sopenharmony_ci break; 1925bf215546Sopenharmony_ci 1926bf215546Sopenharmony_ci case SHADER_OPCODE_MEMORY_FENCE: 1927bf215546Sopenharmony_ci brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, 1928bf215546Sopenharmony_ci brw_message_target(inst->sfid), 1929bf215546Sopenharmony_ci inst->desc, 1930bf215546Sopenharmony_ci /* commit_enable */ false, 1931bf215546Sopenharmony_ci /* bti */ 0); 1932bf215546Sopenharmony_ci send_count++; 1933bf215546Sopenharmony_ci break; 1934bf215546Sopenharmony_ci 1935bf215546Sopenharmony_ci case SHADER_OPCODE_FIND_LIVE_CHANNEL: 1936bf215546Sopenharmony_ci brw_find_live_channel(p, dst, false); 1937bf215546Sopenharmony_ci break; 1938bf215546Sopenharmony_ci 1939bf215546Sopenharmony_ci case SHADER_OPCODE_BROADCAST: 1940bf215546Sopenharmony_ci assert(inst->force_writemask_all); 1941bf215546Sopenharmony_ci brw_broadcast(p, dst, src[0], src[1]); 1942bf215546Sopenharmony_ci break; 1943bf215546Sopenharmony_ci 1944bf215546Sopenharmony_ci case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: 1945bf215546Sopenharmony_ci generate_unpack_flags(p, dst); 1946bf215546Sopenharmony_ci break; 1947bf215546Sopenharmony_ci 1948bf215546Sopenharmony_ci case VEC4_OPCODE_MOV_BYTES: { 1949bf215546Sopenharmony_ci /* Moves the low byte from each channel, using an Align1 access mode 1950bf215546Sopenharmony_ci * and a <4,1,0> source region. 1951bf215546Sopenharmony_ci */ 1952bf215546Sopenharmony_ci assert(src[0].type == BRW_REGISTER_TYPE_UB || 1953bf215546Sopenharmony_ci src[0].type == BRW_REGISTER_TYPE_B); 1954bf215546Sopenharmony_ci 1955bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 1956bf215546Sopenharmony_ci src[0].vstride = BRW_VERTICAL_STRIDE_4; 1957bf215546Sopenharmony_ci src[0].width = BRW_WIDTH_1; 1958bf215546Sopenharmony_ci src[0].hstride = BRW_HORIZONTAL_STRIDE_0; 1959bf215546Sopenharmony_ci brw_MOV(p, dst, src[0]); 1960bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_16); 1961bf215546Sopenharmony_ci break; 1962bf215546Sopenharmony_ci } 1963bf215546Sopenharmony_ci 1964bf215546Sopenharmony_ci case VEC4_OPCODE_DOUBLE_TO_F32: 1965bf215546Sopenharmony_ci case VEC4_OPCODE_DOUBLE_TO_D32: 1966bf215546Sopenharmony_ci case VEC4_OPCODE_DOUBLE_TO_U32: { 1967bf215546Sopenharmony_ci assert(type_sz(src[0].type) == 8); 1968bf215546Sopenharmony_ci assert(type_sz(dst.type) == 8); 1969bf215546Sopenharmony_ci 1970bf215546Sopenharmony_ci brw_reg_type dst_type; 1971bf215546Sopenharmony_ci 1972bf215546Sopenharmony_ci switch (inst->opcode) { 1973bf215546Sopenharmony_ci case VEC4_OPCODE_DOUBLE_TO_F32: 1974bf215546Sopenharmony_ci dst_type = BRW_REGISTER_TYPE_F; 1975bf215546Sopenharmony_ci break; 1976bf215546Sopenharmony_ci case VEC4_OPCODE_DOUBLE_TO_D32: 1977bf215546Sopenharmony_ci dst_type = BRW_REGISTER_TYPE_D; 1978bf215546Sopenharmony_ci break; 1979bf215546Sopenharmony_ci case VEC4_OPCODE_DOUBLE_TO_U32: 1980bf215546Sopenharmony_ci dst_type = BRW_REGISTER_TYPE_UD; 1981bf215546Sopenharmony_ci break; 1982bf215546Sopenharmony_ci default: 1983bf215546Sopenharmony_ci unreachable("Not supported conversion"); 1984bf215546Sopenharmony_ci } 1985bf215546Sopenharmony_ci dst = retype(dst, dst_type); 1986bf215546Sopenharmony_ci 1987bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 1988bf215546Sopenharmony_ci 1989bf215546Sopenharmony_ci /* When converting from DF->F, we set destination's stride as 2 as an 1990bf215546Sopenharmony_ci * alignment requirement. But in IVB/BYT, each DF implicitly writes 1991bf215546Sopenharmony_ci * two floats, being the first one the converted value. So we don't 1992bf215546Sopenharmony_ci * need to explicitly set stride 2, but 1. 1993bf215546Sopenharmony_ci */ 1994bf215546Sopenharmony_ci struct brw_reg spread_dst; 1995bf215546Sopenharmony_ci if (devinfo->verx10 == 70) 1996bf215546Sopenharmony_ci spread_dst = stride(dst, 8, 4, 1); 1997bf215546Sopenharmony_ci else 1998bf215546Sopenharmony_ci spread_dst = stride(dst, 8, 4, 2); 1999bf215546Sopenharmony_ci 2000bf215546Sopenharmony_ci brw_MOV(p, spread_dst, src[0]); 2001bf215546Sopenharmony_ci 2002bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_16); 2003bf215546Sopenharmony_ci break; 2004bf215546Sopenharmony_ci } 2005bf215546Sopenharmony_ci 2006bf215546Sopenharmony_ci case VEC4_OPCODE_TO_DOUBLE: { 2007bf215546Sopenharmony_ci assert(type_sz(src[0].type) == 4); 2008bf215546Sopenharmony_ci assert(type_sz(dst.type) == 8); 2009bf215546Sopenharmony_ci 2010bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 2011bf215546Sopenharmony_ci 2012bf215546Sopenharmony_ci brw_MOV(p, dst, src[0]); 2013bf215546Sopenharmony_ci 2014bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_16); 2015bf215546Sopenharmony_ci break; 2016bf215546Sopenharmony_ci } 2017bf215546Sopenharmony_ci 2018bf215546Sopenharmony_ci case VEC4_OPCODE_PICK_LOW_32BIT: 2019bf215546Sopenharmony_ci case VEC4_OPCODE_PICK_HIGH_32BIT: { 2020bf215546Sopenharmony_ci /* Stores the low/high 32-bit of each 64-bit element in src[0] into 2021bf215546Sopenharmony_ci * dst using ALIGN1 mode and a <8,4,2>:UD region on the source. 2022bf215546Sopenharmony_ci */ 2023bf215546Sopenharmony_ci assert(type_sz(src[0].type) == 8); 2024bf215546Sopenharmony_ci assert(type_sz(dst.type) == 4); 2025bf215546Sopenharmony_ci 2026bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 2027bf215546Sopenharmony_ci 2028bf215546Sopenharmony_ci dst = retype(dst, BRW_REGISTER_TYPE_UD); 2029bf215546Sopenharmony_ci dst.hstride = BRW_HORIZONTAL_STRIDE_1; 2030bf215546Sopenharmony_ci 2031bf215546Sopenharmony_ci src[0] = retype(src[0], BRW_REGISTER_TYPE_UD); 2032bf215546Sopenharmony_ci if (inst->opcode == VEC4_OPCODE_PICK_HIGH_32BIT) 2033bf215546Sopenharmony_ci src[0] = suboffset(src[0], 1); 2034bf215546Sopenharmony_ci src[0] = spread(src[0], 2); 2035bf215546Sopenharmony_ci brw_MOV(p, dst, src[0]); 2036bf215546Sopenharmony_ci 2037bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_16); 2038bf215546Sopenharmony_ci break; 2039bf215546Sopenharmony_ci } 2040bf215546Sopenharmony_ci 2041bf215546Sopenharmony_ci case VEC4_OPCODE_SET_LOW_32BIT: 2042bf215546Sopenharmony_ci case VEC4_OPCODE_SET_HIGH_32BIT: { 2043bf215546Sopenharmony_ci /* Reads consecutive 32-bit elements from src[0] and writes 2044bf215546Sopenharmony_ci * them to the low/high 32-bit of each 64-bit element in dst. 2045bf215546Sopenharmony_ci */ 2046bf215546Sopenharmony_ci assert(type_sz(src[0].type) == 4); 2047bf215546Sopenharmony_ci assert(type_sz(dst.type) == 8); 2048bf215546Sopenharmony_ci 2049bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 2050bf215546Sopenharmony_ci 2051bf215546Sopenharmony_ci dst = retype(dst, BRW_REGISTER_TYPE_UD); 2052bf215546Sopenharmony_ci if (inst->opcode == VEC4_OPCODE_SET_HIGH_32BIT) 2053bf215546Sopenharmony_ci dst = suboffset(dst, 1); 2054bf215546Sopenharmony_ci dst.hstride = BRW_HORIZONTAL_STRIDE_2; 2055bf215546Sopenharmony_ci 2056bf215546Sopenharmony_ci src[0] = retype(src[0], BRW_REGISTER_TYPE_UD); 2057bf215546Sopenharmony_ci brw_MOV(p, dst, src[0]); 2058bf215546Sopenharmony_ci 2059bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_16); 2060bf215546Sopenharmony_ci break; 2061bf215546Sopenharmony_ci } 2062bf215546Sopenharmony_ci 2063bf215546Sopenharmony_ci case VEC4_OPCODE_PACK_BYTES: { 2064bf215546Sopenharmony_ci /* Is effectively: 2065bf215546Sopenharmony_ci * 2066bf215546Sopenharmony_ci * mov(8) dst<16,4,1>:UB src<4,1,0>:UB 2067bf215546Sopenharmony_ci * 2068bf215546Sopenharmony_ci * but destinations' only regioning is horizontal stride, so instead we 2069bf215546Sopenharmony_ci * have to use two instructions: 2070bf215546Sopenharmony_ci * 2071bf215546Sopenharmony_ci * mov(4) dst<1>:UB src<4,1,0>:UB 2072bf215546Sopenharmony_ci * mov(4) dst.16<1>:UB src.16<4,1,0>:UB 2073bf215546Sopenharmony_ci * 2074bf215546Sopenharmony_ci * where they pack the four bytes from the low and high four DW. 2075bf215546Sopenharmony_ci */ 2076bf215546Sopenharmony_ci assert(util_is_power_of_two_nonzero(dst.writemask)); 2077bf215546Sopenharmony_ci unsigned offset = __builtin_ctz(dst.writemask); 2078bf215546Sopenharmony_ci 2079bf215546Sopenharmony_ci dst.type = BRW_REGISTER_TYPE_UB; 2080bf215546Sopenharmony_ci 2081bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_1); 2082bf215546Sopenharmony_ci 2083bf215546Sopenharmony_ci src[0].type = BRW_REGISTER_TYPE_UB; 2084bf215546Sopenharmony_ci src[0].vstride = BRW_VERTICAL_STRIDE_4; 2085bf215546Sopenharmony_ci src[0].width = BRW_WIDTH_1; 2086bf215546Sopenharmony_ci src[0].hstride = BRW_HORIZONTAL_STRIDE_0; 2087bf215546Sopenharmony_ci dst.subnr = offset * 4; 2088bf215546Sopenharmony_ci struct brw_inst *insn = brw_MOV(p, dst, src[0]); 2089bf215546Sopenharmony_ci brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_4); 2090bf215546Sopenharmony_ci brw_inst_set_no_dd_clear(p->devinfo, insn, true); 2091bf215546Sopenharmony_ci brw_inst_set_no_dd_check(p->devinfo, insn, inst->no_dd_check); 2092bf215546Sopenharmony_ci 2093bf215546Sopenharmony_ci src[0].subnr = 16; 2094bf215546Sopenharmony_ci dst.subnr = 16 + offset * 4; 2095bf215546Sopenharmony_ci insn = brw_MOV(p, dst, src[0]); 2096bf215546Sopenharmony_ci brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_4); 2097bf215546Sopenharmony_ci brw_inst_set_no_dd_clear(p->devinfo, insn, inst->no_dd_clear); 2098bf215546Sopenharmony_ci brw_inst_set_no_dd_check(p->devinfo, insn, true); 2099bf215546Sopenharmony_ci 2100bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_16); 2101bf215546Sopenharmony_ci break; 2102bf215546Sopenharmony_ci } 2103bf215546Sopenharmony_ci 2104bf215546Sopenharmony_ci case VEC4_OPCODE_ZERO_OOB_PUSH_REGS: 2105bf215546Sopenharmony_ci generate_zero_oob_push_regs(p, &prog_data->base, dst, src[0]); 2106bf215546Sopenharmony_ci break; 2107bf215546Sopenharmony_ci 2108bf215546Sopenharmony_ci case VEC4_TCS_OPCODE_URB_WRITE: 2109bf215546Sopenharmony_ci generate_tcs_urb_write(p, inst, src[0]); 2110bf215546Sopenharmony_ci send_count++; 2111bf215546Sopenharmony_ci break; 2112bf215546Sopenharmony_ci 2113bf215546Sopenharmony_ci case VEC4_OPCODE_URB_READ: 2114bf215546Sopenharmony_ci generate_vec4_urb_read(p, inst, dst, src[0]); 2115bf215546Sopenharmony_ci send_count++; 2116bf215546Sopenharmony_ci break; 2117bf215546Sopenharmony_ci 2118bf215546Sopenharmony_ci case VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS: 2119bf215546Sopenharmony_ci generate_tcs_input_urb_offsets(p, dst, src[0], src[1]); 2120bf215546Sopenharmony_ci break; 2121bf215546Sopenharmony_ci 2122bf215546Sopenharmony_ci case VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS: 2123bf215546Sopenharmony_ci generate_tcs_output_urb_offsets(p, dst, src[0], src[1]); 2124bf215546Sopenharmony_ci break; 2125bf215546Sopenharmony_ci 2126bf215546Sopenharmony_ci case TCS_OPCODE_GET_INSTANCE_ID: 2127bf215546Sopenharmony_ci generate_tcs_get_instance_id(p, dst); 2128bf215546Sopenharmony_ci break; 2129bf215546Sopenharmony_ci 2130bf215546Sopenharmony_ci case TCS_OPCODE_GET_PRIMITIVE_ID: 2131bf215546Sopenharmony_ci generate_tcs_get_primitive_id(p, dst); 2132bf215546Sopenharmony_ci break; 2133bf215546Sopenharmony_ci 2134bf215546Sopenharmony_ci case TCS_OPCODE_CREATE_BARRIER_HEADER: 2135bf215546Sopenharmony_ci generate_tcs_create_barrier_header(p, prog_data, dst); 2136bf215546Sopenharmony_ci break; 2137bf215546Sopenharmony_ci 2138bf215546Sopenharmony_ci case TES_OPCODE_CREATE_INPUT_READ_HEADER: 2139bf215546Sopenharmony_ci generate_tes_create_input_read_header(p, dst); 2140bf215546Sopenharmony_ci break; 2141bf215546Sopenharmony_ci 2142bf215546Sopenharmony_ci case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: 2143bf215546Sopenharmony_ci generate_tes_add_indirect_urb_offset(p, dst, src[0], src[1]); 2144bf215546Sopenharmony_ci break; 2145bf215546Sopenharmony_ci 2146bf215546Sopenharmony_ci case TES_OPCODE_GET_PRIMITIVE_ID: 2147bf215546Sopenharmony_ci generate_tes_get_primitive_id(p, dst); 2148bf215546Sopenharmony_ci break; 2149bf215546Sopenharmony_ci 2150bf215546Sopenharmony_ci case TCS_OPCODE_SRC0_010_IS_ZERO: 2151bf215546Sopenharmony_ci /* If src_reg had stride like fs_reg, we wouldn't need this. */ 2152bf215546Sopenharmony_ci brw_MOV(p, brw_null_reg(), stride(src[0], 0, 1, 0)); 2153bf215546Sopenharmony_ci break; 2154bf215546Sopenharmony_ci 2155bf215546Sopenharmony_ci case TCS_OPCODE_RELEASE_INPUT: 2156bf215546Sopenharmony_ci generate_tcs_release_input(p, dst, src[0], src[1]); 2157bf215546Sopenharmony_ci send_count++; 2158bf215546Sopenharmony_ci break; 2159bf215546Sopenharmony_ci 2160bf215546Sopenharmony_ci case TCS_OPCODE_THREAD_END: 2161bf215546Sopenharmony_ci generate_tcs_thread_end(p, inst); 2162bf215546Sopenharmony_ci send_count++; 2163bf215546Sopenharmony_ci break; 2164bf215546Sopenharmony_ci 2165bf215546Sopenharmony_ci case SHADER_OPCODE_BARRIER: 2166bf215546Sopenharmony_ci brw_barrier(p, src[0]); 2167bf215546Sopenharmony_ci brw_WAIT(p); 2168bf215546Sopenharmony_ci send_count++; 2169bf215546Sopenharmony_ci break; 2170bf215546Sopenharmony_ci 2171bf215546Sopenharmony_ci case SHADER_OPCODE_MOV_INDIRECT: 2172bf215546Sopenharmony_ci generate_mov_indirect(p, inst, dst, src[0], src[1]); 2173bf215546Sopenharmony_ci break; 2174bf215546Sopenharmony_ci 2175bf215546Sopenharmony_ci case BRW_OPCODE_DIM: 2176bf215546Sopenharmony_ci assert(devinfo->verx10 == 75); 2177bf215546Sopenharmony_ci assert(src[0].type == BRW_REGISTER_TYPE_DF); 2178bf215546Sopenharmony_ci assert(dst.type == BRW_REGISTER_TYPE_DF); 2179bf215546Sopenharmony_ci brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F)); 2180bf215546Sopenharmony_ci break; 2181bf215546Sopenharmony_ci 2182bf215546Sopenharmony_ci default: 2183bf215546Sopenharmony_ci unreachable("Unsupported opcode"); 2184bf215546Sopenharmony_ci } 2185bf215546Sopenharmony_ci 2186bf215546Sopenharmony_ci if (inst->opcode == VEC4_OPCODE_PACK_BYTES) { 2187bf215546Sopenharmony_ci /* Handled dependency hints in the generator. */ 2188bf215546Sopenharmony_ci 2189bf215546Sopenharmony_ci assert(!inst->conditional_mod); 2190bf215546Sopenharmony_ci } else if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) { 2191bf215546Sopenharmony_ci assert(p->nr_insn == pre_emit_nr_insn + 1 || 2192bf215546Sopenharmony_ci !"conditional_mod, no_dd_check, or no_dd_clear set for IR " 2193bf215546Sopenharmony_ci "emitting more than 1 instruction"); 2194bf215546Sopenharmony_ci 2195bf215546Sopenharmony_ci brw_inst *last = &p->store[pre_emit_nr_insn]; 2196bf215546Sopenharmony_ci 2197bf215546Sopenharmony_ci if (inst->conditional_mod) 2198bf215546Sopenharmony_ci brw_inst_set_cond_modifier(p->devinfo, last, inst->conditional_mod); 2199bf215546Sopenharmony_ci brw_inst_set_no_dd_clear(p->devinfo, last, inst->no_dd_clear); 2200bf215546Sopenharmony_ci brw_inst_set_no_dd_check(p->devinfo, last, inst->no_dd_check); 2201bf215546Sopenharmony_ci } 2202bf215546Sopenharmony_ci } 2203bf215546Sopenharmony_ci 2204bf215546Sopenharmony_ci brw_set_uip_jip(p, 0); 2205bf215546Sopenharmony_ci 2206bf215546Sopenharmony_ci /* end of program sentinel */ 2207bf215546Sopenharmony_ci disasm_new_inst_group(disasm_info, p->next_insn_offset); 2208bf215546Sopenharmony_ci 2209bf215546Sopenharmony_ci#ifndef NDEBUG 2210bf215546Sopenharmony_ci bool validated = 2211bf215546Sopenharmony_ci#else 2212bf215546Sopenharmony_ci if (unlikely(debug_enabled)) 2213bf215546Sopenharmony_ci#endif 2214bf215546Sopenharmony_ci brw_validate_instructions(&compiler->isa, p->store, 2215bf215546Sopenharmony_ci 0, p->next_insn_offset, 2216bf215546Sopenharmony_ci disasm_info); 2217bf215546Sopenharmony_ci 2218bf215546Sopenharmony_ci int before_size = p->next_insn_offset; 2219bf215546Sopenharmony_ci brw_compact_instructions(p, 0, disasm_info); 2220bf215546Sopenharmony_ci int after_size = p->next_insn_offset; 2221bf215546Sopenharmony_ci 2222bf215546Sopenharmony_ci if (unlikely(debug_enabled)) { 2223bf215546Sopenharmony_ci unsigned char sha1[21]; 2224bf215546Sopenharmony_ci char sha1buf[41]; 2225bf215546Sopenharmony_ci 2226bf215546Sopenharmony_ci _mesa_sha1_compute(p->store, p->next_insn_offset, sha1); 2227bf215546Sopenharmony_ci _mesa_sha1_format(sha1buf, sha1); 2228bf215546Sopenharmony_ci 2229bf215546Sopenharmony_ci fprintf(stderr, "Native code for %s %s shader %s (sha1 %s):\n", 2230bf215546Sopenharmony_ci nir->info.label ? nir->info.label : "unnamed", 2231bf215546Sopenharmony_ci _mesa_shader_stage_to_string(nir->info.stage), nir->info.name, 2232bf215546Sopenharmony_ci sha1buf); 2233bf215546Sopenharmony_ci 2234bf215546Sopenharmony_ci fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles. %d:%d " 2235bf215546Sopenharmony_ci "spills:fills, %u sends. Compacted %d to %d bytes (%.0f%%)\n", 2236bf215546Sopenharmony_ci stage_abbrev, before_size / 16, loop_count, perf.latency, 2237bf215546Sopenharmony_ci spill_count, fill_count, send_count, before_size, after_size, 2238bf215546Sopenharmony_ci 100.0f * (before_size - after_size) / before_size); 2239bf215546Sopenharmony_ci 2240bf215546Sopenharmony_ci /* overriding the shader makes disasm_info invalid */ 2241bf215546Sopenharmony_ci if (!brw_try_override_assembly(p, 0, sha1buf)) { 2242bf215546Sopenharmony_ci dump_assembly(p->store, 0, p->next_insn_offset, 2243bf215546Sopenharmony_ci disasm_info, perf.block_latency); 2244bf215546Sopenharmony_ci } else { 2245bf215546Sopenharmony_ci fprintf(stderr, "Successfully overrode shader with sha1 %s\n\n", sha1buf); 2246bf215546Sopenharmony_ci } 2247bf215546Sopenharmony_ci } 2248bf215546Sopenharmony_ci ralloc_free(disasm_info); 2249bf215546Sopenharmony_ci assert(validated); 2250bf215546Sopenharmony_ci 2251bf215546Sopenharmony_ci brw_shader_debug_log(compiler, log_data, 2252bf215546Sopenharmony_ci "%s vec4 shader: %d inst, %d loops, %u cycles, " 2253bf215546Sopenharmony_ci "%d:%d spills:fills, %u sends, " 2254bf215546Sopenharmony_ci "compacted %d to %d bytes.\n", 2255bf215546Sopenharmony_ci stage_abbrev, before_size / 16, 2256bf215546Sopenharmony_ci loop_count, perf.latency, spill_count, 2257bf215546Sopenharmony_ci fill_count, send_count, before_size, after_size); 2258bf215546Sopenharmony_ci if (stats) { 2259bf215546Sopenharmony_ci stats->dispatch_width = 0; 2260bf215546Sopenharmony_ci stats->instructions = before_size / 16; 2261bf215546Sopenharmony_ci stats->sends = send_count; 2262bf215546Sopenharmony_ci stats->loops = loop_count; 2263bf215546Sopenharmony_ci stats->cycles = perf.latency; 2264bf215546Sopenharmony_ci stats->spills = spill_count; 2265bf215546Sopenharmony_ci stats->fills = fill_count; 2266bf215546Sopenharmony_ci } 2267bf215546Sopenharmony_ci} 2268bf215546Sopenharmony_ci 2269bf215546Sopenharmony_ciextern "C" const unsigned * 2270bf215546Sopenharmony_cibrw_vec4_generate_assembly(const struct brw_compiler *compiler, 2271bf215546Sopenharmony_ci void *log_data, 2272bf215546Sopenharmony_ci void *mem_ctx, 2273bf215546Sopenharmony_ci const nir_shader *nir, 2274bf215546Sopenharmony_ci struct brw_vue_prog_data *prog_data, 2275bf215546Sopenharmony_ci const struct cfg_t *cfg, 2276bf215546Sopenharmony_ci const performance &perf, 2277bf215546Sopenharmony_ci struct brw_compile_stats *stats, 2278bf215546Sopenharmony_ci bool debug_enabled) 2279bf215546Sopenharmony_ci{ 2280bf215546Sopenharmony_ci struct brw_codegen *p = rzalloc(mem_ctx, struct brw_codegen); 2281bf215546Sopenharmony_ci brw_init_codegen(&compiler->isa, p, mem_ctx); 2282bf215546Sopenharmony_ci brw_set_default_access_mode(p, BRW_ALIGN_16); 2283bf215546Sopenharmony_ci 2284bf215546Sopenharmony_ci generate_code(p, compiler, log_data, nir, prog_data, cfg, perf, stats, 2285bf215546Sopenharmony_ci debug_enabled); 2286bf215546Sopenharmony_ci 2287bf215546Sopenharmony_ci assert(prog_data->base.const_data_size == 0); 2288bf215546Sopenharmony_ci if (nir->constant_data_size > 0) { 2289bf215546Sopenharmony_ci prog_data->base.const_data_size = nir->constant_data_size; 2290bf215546Sopenharmony_ci prog_data->base.const_data_offset = 2291bf215546Sopenharmony_ci brw_append_data(p, nir->constant_data, nir->constant_data_size, 32); 2292bf215546Sopenharmony_ci } 2293bf215546Sopenharmony_ci 2294bf215546Sopenharmony_ci return brw_get_program(p, &prog_data->base.program_size); 2295bf215546Sopenharmony_ci} 2296