1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2013 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci/** 25bf215546Sopenharmony_ci * \file brw_vec4_tes.cpp 26bf215546Sopenharmony_ci * 27bf215546Sopenharmony_ci * Tessellaton evaluation shader specific code derived from the vec4_visitor class. 28bf215546Sopenharmony_ci */ 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "brw_vec4_tes.h" 31bf215546Sopenharmony_ci#include "brw_cfg.h" 32bf215546Sopenharmony_ci#include "dev/intel_debug.h" 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_cinamespace brw { 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_civec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler, 37bf215546Sopenharmony_ci void *log_data, 38bf215546Sopenharmony_ci const struct brw_tes_prog_key *key, 39bf215546Sopenharmony_ci struct brw_tes_prog_data *prog_data, 40bf215546Sopenharmony_ci const nir_shader *shader, 41bf215546Sopenharmony_ci void *mem_ctx, 42bf215546Sopenharmony_ci bool debug_enabled) 43bf215546Sopenharmony_ci : vec4_visitor(compiler, log_data, &key->base.tex, &prog_data->base, 44bf215546Sopenharmony_ci shader, mem_ctx, false, debug_enabled) 45bf215546Sopenharmony_ci{ 46bf215546Sopenharmony_ci} 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_civoid 49bf215546Sopenharmony_civec4_tes_visitor::setup_payload() 50bf215546Sopenharmony_ci{ 51bf215546Sopenharmony_ci int reg = 0; 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci /* The payload always contains important data in r0 and r1, which contains 54bf215546Sopenharmony_ci * the URB handles that are passed on to the URB write at the end 55bf215546Sopenharmony_ci * of the thread. 56bf215546Sopenharmony_ci */ 57bf215546Sopenharmony_ci reg += 2; 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_ci reg = setup_uniforms(reg); 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci foreach_block_and_inst(block, vec4_instruction, inst, cfg) { 62bf215546Sopenharmony_ci for (int i = 0; i < 3; i++) { 63bf215546Sopenharmony_ci if (inst->src[i].file != ATTR) 64bf215546Sopenharmony_ci continue; 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ci unsigned slot = inst->src[i].nr + inst->src[i].offset / 16; 67bf215546Sopenharmony_ci struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2)); 68bf215546Sopenharmony_ci grf = stride(grf, 0, 4, 1); 69bf215546Sopenharmony_ci grf.swizzle = inst->src[i].swizzle; 70bf215546Sopenharmony_ci grf.type = inst->src[i].type; 71bf215546Sopenharmony_ci grf.abs = inst->src[i].abs; 72bf215546Sopenharmony_ci grf.negate = inst->src[i].negate; 73bf215546Sopenharmony_ci inst->src[i] = grf; 74bf215546Sopenharmony_ci } 75bf215546Sopenharmony_ci } 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci reg += 8 * prog_data->urb_read_length; 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci this->first_non_payload_grf = reg; 80bf215546Sopenharmony_ci} 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_civoid 84bf215546Sopenharmony_civec4_tes_visitor::emit_prolog() 85bf215546Sopenharmony_ci{ 86bf215546Sopenharmony_ci input_read_header = src_reg(this, glsl_type::uvec4_type); 87bf215546Sopenharmony_ci emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header)); 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci this->current_annotation = NULL; 90bf215546Sopenharmony_ci} 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_civoid 94bf215546Sopenharmony_civec4_tes_visitor::emit_urb_write_header(int mrf) 95bf215546Sopenharmony_ci{ 96bf215546Sopenharmony_ci /* No need to do anything for DS; an implied write to this MRF will be 97bf215546Sopenharmony_ci * performed by VEC4_VS_OPCODE_URB_WRITE. 98bf215546Sopenharmony_ci */ 99bf215546Sopenharmony_ci (void) mrf; 100bf215546Sopenharmony_ci} 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_civec4_instruction * 104bf215546Sopenharmony_civec4_tes_visitor::emit_urb_write_opcode(bool complete) 105bf215546Sopenharmony_ci{ 106bf215546Sopenharmony_ci vec4_instruction *inst = emit(VEC4_VS_OPCODE_URB_WRITE); 107bf215546Sopenharmony_ci inst->urb_write_flags = complete ? 108bf215546Sopenharmony_ci BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS; 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci return inst; 111bf215546Sopenharmony_ci} 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_civoid 114bf215546Sopenharmony_civec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) 115bf215546Sopenharmony_ci{ 116bf215546Sopenharmony_ci const struct brw_tes_prog_data *tes_prog_data = 117bf215546Sopenharmony_ci (const struct brw_tes_prog_data *) prog_data; 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_ci switch (instr->intrinsic) { 120bf215546Sopenharmony_ci case nir_intrinsic_load_tess_coord: 121bf215546Sopenharmony_ci /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */ 122bf215546Sopenharmony_ci emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 123bf215546Sopenharmony_ci src_reg(brw_vec8_grf(1, 0)))); 124bf215546Sopenharmony_ci break; 125bf215546Sopenharmony_ci case nir_intrinsic_load_tess_level_outer: 126bf215546Sopenharmony_ci if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) { 127bf215546Sopenharmony_ci emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 128bf215546Sopenharmony_ci swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), 129bf215546Sopenharmony_ci BRW_SWIZZLE_ZWZW))); 130bf215546Sopenharmony_ci } else { 131bf215546Sopenharmony_ci emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 132bf215546Sopenharmony_ci swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), 133bf215546Sopenharmony_ci BRW_SWIZZLE_WZYX))); 134bf215546Sopenharmony_ci } 135bf215546Sopenharmony_ci break; 136bf215546Sopenharmony_ci case nir_intrinsic_load_tess_level_inner: 137bf215546Sopenharmony_ci if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) { 138bf215546Sopenharmony_ci emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 139bf215546Sopenharmony_ci swizzle(src_reg(ATTR, 0, glsl_type::vec4_type), 140bf215546Sopenharmony_ci BRW_SWIZZLE_WZYX))); 141bf215546Sopenharmony_ci } else { 142bf215546Sopenharmony_ci emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 143bf215546Sopenharmony_ci src_reg(ATTR, 1, glsl_type::float_type))); 144bf215546Sopenharmony_ci } 145bf215546Sopenharmony_ci break; 146bf215546Sopenharmony_ci case nir_intrinsic_load_primitive_id: 147bf215546Sopenharmony_ci emit(TES_OPCODE_GET_PRIMITIVE_ID, 148bf215546Sopenharmony_ci get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); 149bf215546Sopenharmony_ci break; 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci case nir_intrinsic_load_input: 152bf215546Sopenharmony_ci case nir_intrinsic_load_per_vertex_input: { 153bf215546Sopenharmony_ci assert(nir_dest_bit_size(instr->dest) == 32); 154bf215546Sopenharmony_ci src_reg indirect_offset = get_indirect_offset(instr); 155bf215546Sopenharmony_ci unsigned imm_offset = instr->const_index[0]; 156bf215546Sopenharmony_ci src_reg header = input_read_header; 157bf215546Sopenharmony_ci unsigned first_component = nir_intrinsic_component(instr); 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci if (indirect_offset.file != BAD_FILE) { 160bf215546Sopenharmony_ci src_reg clamped_indirect_offset = src_reg(this, glsl_type::uvec4_type); 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the 163bf215546Sopenharmony_ci * valid range of the offset is [0, 0FFFFFFFh]. 164bf215546Sopenharmony_ci */ 165bf215546Sopenharmony_ci emit_minmax(BRW_CONDITIONAL_L, 166bf215546Sopenharmony_ci dst_reg(clamped_indirect_offset), 167bf215546Sopenharmony_ci retype(indirect_offset, BRW_REGISTER_TYPE_UD), 168bf215546Sopenharmony_ci brw_imm_ud(0x0fffffffu)); 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci header = src_reg(this, glsl_type::uvec4_type); 171bf215546Sopenharmony_ci emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header), 172bf215546Sopenharmony_ci input_read_header, clamped_indirect_offset); 173bf215546Sopenharmony_ci } else { 174bf215546Sopenharmony_ci /* Arbitrarily only push up to 24 vec4 slots worth of data, 175bf215546Sopenharmony_ci * which is 12 registers (since each holds 2 vec4 slots). 176bf215546Sopenharmony_ci */ 177bf215546Sopenharmony_ci const unsigned max_push_slots = 24; 178bf215546Sopenharmony_ci if (imm_offset < max_push_slots) { 179bf215546Sopenharmony_ci src_reg src = src_reg(ATTR, imm_offset, glsl_type::ivec4_type); 180bf215546Sopenharmony_ci src.swizzle = BRW_SWZ_COMP_INPUT(first_component); 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), src)); 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci prog_data->urb_read_length = 185bf215546Sopenharmony_ci MAX2(prog_data->urb_read_length, 186bf215546Sopenharmony_ci DIV_ROUND_UP(imm_offset + 1, 2)); 187bf215546Sopenharmony_ci break; 188bf215546Sopenharmony_ci } 189bf215546Sopenharmony_ci } 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci dst_reg temp(this, glsl_type::ivec4_type); 192bf215546Sopenharmony_ci vec4_instruction *read = 193bf215546Sopenharmony_ci emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); 194bf215546Sopenharmony_ci read->offset = imm_offset; 195bf215546Sopenharmony_ci read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci src_reg src = src_reg(temp); 198bf215546Sopenharmony_ci src.swizzle = BRW_SWZ_COMP_INPUT(first_component); 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci /* Copy to target. We might end up with some funky writemasks landing 201bf215546Sopenharmony_ci * in here, but we really don't want them in the above pseudo-ops. 202bf215546Sopenharmony_ci */ 203bf215546Sopenharmony_ci dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); 204bf215546Sopenharmony_ci dst.writemask = brw_writemask_for_size(instr->num_components); 205bf215546Sopenharmony_ci emit(MOV(dst, src)); 206bf215546Sopenharmony_ci break; 207bf215546Sopenharmony_ci } 208bf215546Sopenharmony_ci default: 209bf215546Sopenharmony_ci vec4_visitor::nir_emit_intrinsic(instr); 210bf215546Sopenharmony_ci } 211bf215546Sopenharmony_ci} 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_civoid 215bf215546Sopenharmony_civec4_tes_visitor::emit_thread_end() 216bf215546Sopenharmony_ci{ 217bf215546Sopenharmony_ci /* For DS, we always end the thread by emitting a single vertex. 218bf215546Sopenharmony_ci * emit_urb_write_opcode() will take care of setting the eot flag on the 219bf215546Sopenharmony_ci * SEND instruction. 220bf215546Sopenharmony_ci */ 221bf215546Sopenharmony_ci emit_vertex(); 222bf215546Sopenharmony_ci} 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci} /* namespace brw */ 225