1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2013 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci/** 25bf215546Sopenharmony_ci * \file brw_vec4_tcs.cpp 26bf215546Sopenharmony_ci * 27bf215546Sopenharmony_ci * Tessellaton control shader specific code derived from the vec4_visitor class. 28bf215546Sopenharmony_ci */ 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "brw_nir.h" 31bf215546Sopenharmony_ci#include "brw_vec4_tcs.h" 32bf215546Sopenharmony_ci#include "brw_fs.h" 33bf215546Sopenharmony_ci#include "dev/intel_debug.h" 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_cinamespace brw { 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_civec4_tcs_visitor::vec4_tcs_visitor(const struct brw_compiler *compiler, 38bf215546Sopenharmony_ci void *log_data, 39bf215546Sopenharmony_ci const struct brw_tcs_prog_key *key, 40bf215546Sopenharmony_ci struct brw_tcs_prog_data *prog_data, 41bf215546Sopenharmony_ci const nir_shader *nir, 42bf215546Sopenharmony_ci void *mem_ctx, 43bf215546Sopenharmony_ci bool debug_enabled) 44bf215546Sopenharmony_ci : vec4_visitor(compiler, log_data, &key->base.tex, &prog_data->base, 45bf215546Sopenharmony_ci nir, mem_ctx, false, debug_enabled), 46bf215546Sopenharmony_ci key(key) 47bf215546Sopenharmony_ci{ 48bf215546Sopenharmony_ci} 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_civoid 52bf215546Sopenharmony_civec4_tcs_visitor::setup_payload() 53bf215546Sopenharmony_ci{ 54bf215546Sopenharmony_ci int reg = 0; 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci /* The payload always contains important data in r0, which contains 57bf215546Sopenharmony_ci * the URB handles that are passed on to the URB write at the end 58bf215546Sopenharmony_ci * of the thread. 59bf215546Sopenharmony_ci */ 60bf215546Sopenharmony_ci reg++; 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci /* r1.0 - r4.7 may contain the input control point URB handles, 63bf215546Sopenharmony_ci * which we use to pull vertex data. 64bf215546Sopenharmony_ci */ 65bf215546Sopenharmony_ci reg += 4; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci /* Push constants may start at r5.0 */ 68bf215546Sopenharmony_ci reg = setup_uniforms(reg); 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci this->first_non_payload_grf = reg; 71bf215546Sopenharmony_ci} 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_civoid 75bf215546Sopenharmony_civec4_tcs_visitor::emit_prolog() 76bf215546Sopenharmony_ci{ 77bf215546Sopenharmony_ci invocation_id = src_reg(this, glsl_type::uint_type); 78bf215546Sopenharmony_ci emit(TCS_OPCODE_GET_INSTANCE_ID, dst_reg(invocation_id)); 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci /* HS threads are dispatched with the dispatch mask set to 0xFF. 81bf215546Sopenharmony_ci * If there are an odd number of output vertices, then the final 82bf215546Sopenharmony_ci * HS instance dispatched will only have its bottom half doing real 83bf215546Sopenharmony_ci * work, and so we need to disable the upper half: 84bf215546Sopenharmony_ci */ 85bf215546Sopenharmony_ci if (nir->info.tess.tcs_vertices_out % 2) { 86bf215546Sopenharmony_ci emit(CMP(dst_null_d(), invocation_id, 87bf215546Sopenharmony_ci brw_imm_ud(nir->info.tess.tcs_vertices_out), 88bf215546Sopenharmony_ci BRW_CONDITIONAL_L)); 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci /* Matching ENDIF is in emit_thread_end() */ 91bf215546Sopenharmony_ci emit(IF(BRW_PREDICATE_NORMAL)); 92bf215546Sopenharmony_ci } 93bf215546Sopenharmony_ci} 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_civoid 97bf215546Sopenharmony_civec4_tcs_visitor::emit_thread_end() 98bf215546Sopenharmony_ci{ 99bf215546Sopenharmony_ci vec4_instruction *inst; 100bf215546Sopenharmony_ci current_annotation = "thread end"; 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci if (nir->info.tess.tcs_vertices_out % 2) { 103bf215546Sopenharmony_ci emit(BRW_OPCODE_ENDIF); 104bf215546Sopenharmony_ci } 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci if (devinfo->ver == 7) { 107bf215546Sopenharmony_ci struct brw_tcs_prog_data *tcs_prog_data = 108bf215546Sopenharmony_ci (struct brw_tcs_prog_data *) prog_data; 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci current_annotation = "release input vertices"; 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci /* Synchronize all threads, so we know that no one is still 113bf215546Sopenharmony_ci * using the input URB handles. 114bf215546Sopenharmony_ci */ 115bf215546Sopenharmony_ci if (tcs_prog_data->instances > 1) { 116bf215546Sopenharmony_ci dst_reg header = dst_reg(this, glsl_type::uvec4_type); 117bf215546Sopenharmony_ci emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header); 118bf215546Sopenharmony_ci emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header)); 119bf215546Sopenharmony_ci } 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci /* Make thread 0 (invocations <1, 0>) release pairs of ICP handles. 122bf215546Sopenharmony_ci * We want to compare the bottom half of invocation_id with 0, but 123bf215546Sopenharmony_ci * use that truth value for the top half as well. Unfortunately, 124bf215546Sopenharmony_ci * we don't have stride in the vec4 world, nor UV immediates in 125bf215546Sopenharmony_ci * align16, so we need an opcode to get invocation_id<0,4,0>. 126bf215546Sopenharmony_ci */ 127bf215546Sopenharmony_ci set_condmod(BRW_CONDITIONAL_Z, 128bf215546Sopenharmony_ci emit(TCS_OPCODE_SRC0_010_IS_ZERO, dst_null_d(), 129bf215546Sopenharmony_ci invocation_id)); 130bf215546Sopenharmony_ci emit(IF(BRW_PREDICATE_NORMAL)); 131bf215546Sopenharmony_ci for (unsigned i = 0; i < key->input_vertices; i += 2) { 132bf215546Sopenharmony_ci /* If we have an odd number of input vertices, the last will be 133bf215546Sopenharmony_ci * unpaired. We don't want to use an interleaved URB write in 134bf215546Sopenharmony_ci * that case. 135bf215546Sopenharmony_ci */ 136bf215546Sopenharmony_ci const bool is_unpaired = i == key->input_vertices - 1; 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci dst_reg header(this, glsl_type::uvec4_type); 139bf215546Sopenharmony_ci emit(TCS_OPCODE_RELEASE_INPUT, header, brw_imm_ud(i), 140bf215546Sopenharmony_ci brw_imm_ud(is_unpaired)); 141bf215546Sopenharmony_ci } 142bf215546Sopenharmony_ci emit(BRW_OPCODE_ENDIF); 143bf215546Sopenharmony_ci } 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci inst = emit(TCS_OPCODE_THREAD_END); 146bf215546Sopenharmony_ci inst->base_mrf = 14; 147bf215546Sopenharmony_ci inst->mlen = 2; 148bf215546Sopenharmony_ci} 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_civoid 152bf215546Sopenharmony_civec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst, 153bf215546Sopenharmony_ci const src_reg &vertex_index, 154bf215546Sopenharmony_ci unsigned base_offset, 155bf215546Sopenharmony_ci unsigned first_component, 156bf215546Sopenharmony_ci const src_reg &indirect_offset) 157bf215546Sopenharmony_ci{ 158bf215546Sopenharmony_ci vec4_instruction *inst; 159bf215546Sopenharmony_ci dst_reg temp(this, glsl_type::ivec4_type); 160bf215546Sopenharmony_ci temp.type = dst.type; 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci /* Set up the message header to reference the proper parts of the URB */ 163bf215546Sopenharmony_ci dst_reg header = dst_reg(this, glsl_type::uvec4_type); 164bf215546Sopenharmony_ci inst = emit(VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS, header, vertex_index, 165bf215546Sopenharmony_ci indirect_offset); 166bf215546Sopenharmony_ci inst->force_writemask_all = true; 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci /* Read into a temporary, ignoring writemasking. */ 169bf215546Sopenharmony_ci inst = emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); 170bf215546Sopenharmony_ci inst->offset = base_offset; 171bf215546Sopenharmony_ci inst->mlen = 1; 172bf215546Sopenharmony_ci inst->base_mrf = -1; 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci /* Copy the temporary to the destination to deal with writemasking. 175bf215546Sopenharmony_ci * 176bf215546Sopenharmony_ci * Also attempt to deal with gl_PointSize being in the .w component. 177bf215546Sopenharmony_ci */ 178bf215546Sopenharmony_ci if (inst->offset == 0 && indirect_offset.file == BAD_FILE) { 179bf215546Sopenharmony_ci emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WWWW))); 180bf215546Sopenharmony_ci } else { 181bf215546Sopenharmony_ci src_reg src = src_reg(temp); 182bf215546Sopenharmony_ci src.swizzle = BRW_SWZ_COMP_INPUT(first_component); 183bf215546Sopenharmony_ci emit(MOV(dst, src)); 184bf215546Sopenharmony_ci } 185bf215546Sopenharmony_ci} 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_civoid 188bf215546Sopenharmony_civec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst, 189bf215546Sopenharmony_ci unsigned base_offset, 190bf215546Sopenharmony_ci unsigned first_component, 191bf215546Sopenharmony_ci const src_reg &indirect_offset) 192bf215546Sopenharmony_ci{ 193bf215546Sopenharmony_ci vec4_instruction *inst; 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci /* Set up the message header to reference the proper parts of the URB */ 196bf215546Sopenharmony_ci dst_reg header = dst_reg(this, glsl_type::uvec4_type); 197bf215546Sopenharmony_ci inst = emit(VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, header, 198bf215546Sopenharmony_ci brw_imm_ud(dst.writemask << first_component), indirect_offset); 199bf215546Sopenharmony_ci inst->force_writemask_all = true; 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, dst, src_reg(header)); 202bf215546Sopenharmony_ci read->offset = base_offset; 203bf215546Sopenharmony_ci read->mlen = 1; 204bf215546Sopenharmony_ci read->base_mrf = -1; 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci if (first_component) { 207bf215546Sopenharmony_ci /* Read into a temporary and copy with a swizzle and writemask. */ 208bf215546Sopenharmony_ci read->dst = retype(dst_reg(this, glsl_type::ivec4_type), dst.type); 209bf215546Sopenharmony_ci emit(MOV(dst, swizzle(src_reg(read->dst), 210bf215546Sopenharmony_ci BRW_SWZ_COMP_INPUT(first_component)))); 211bf215546Sopenharmony_ci } 212bf215546Sopenharmony_ci} 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_civoid 215bf215546Sopenharmony_civec4_tcs_visitor::emit_urb_write(const src_reg &value, 216bf215546Sopenharmony_ci unsigned writemask, 217bf215546Sopenharmony_ci unsigned base_offset, 218bf215546Sopenharmony_ci const src_reg &indirect_offset) 219bf215546Sopenharmony_ci{ 220bf215546Sopenharmony_ci if (writemask == 0) 221bf215546Sopenharmony_ci return; 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci src_reg message(this, glsl_type::uvec4_type, 2); 224bf215546Sopenharmony_ci vec4_instruction *inst; 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci inst = emit(VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, dst_reg(message), 227bf215546Sopenharmony_ci brw_imm_ud(writemask), indirect_offset); 228bf215546Sopenharmony_ci inst->force_writemask_all = true; 229bf215546Sopenharmony_ci inst = emit(MOV(byte_offset(dst_reg(retype(message, value.type)), REG_SIZE), 230bf215546Sopenharmony_ci value)); 231bf215546Sopenharmony_ci inst->force_writemask_all = true; 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci inst = emit(VEC4_TCS_OPCODE_URB_WRITE, dst_null_f(), message); 234bf215546Sopenharmony_ci inst->offset = base_offset; 235bf215546Sopenharmony_ci inst->mlen = 2; 236bf215546Sopenharmony_ci inst->base_mrf = -1; 237bf215546Sopenharmony_ci} 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_civoid 240bf215546Sopenharmony_civec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) 241bf215546Sopenharmony_ci{ 242bf215546Sopenharmony_ci switch (instr->intrinsic) { 243bf215546Sopenharmony_ci case nir_intrinsic_load_invocation_id: 244bf215546Sopenharmony_ci emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD), 245bf215546Sopenharmony_ci invocation_id)); 246bf215546Sopenharmony_ci break; 247bf215546Sopenharmony_ci case nir_intrinsic_load_primitive_id: 248bf215546Sopenharmony_ci emit(TCS_OPCODE_GET_PRIMITIVE_ID, 249bf215546Sopenharmony_ci get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); 250bf215546Sopenharmony_ci break; 251bf215546Sopenharmony_ci case nir_intrinsic_load_patch_vertices_in: 252bf215546Sopenharmony_ci emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), 253bf215546Sopenharmony_ci brw_imm_d(key->input_vertices))); 254bf215546Sopenharmony_ci break; 255bf215546Sopenharmony_ci case nir_intrinsic_load_per_vertex_input: { 256bf215546Sopenharmony_ci assert(nir_dest_bit_size(instr->dest) == 32); 257bf215546Sopenharmony_ci src_reg indirect_offset = get_indirect_offset(instr); 258bf215546Sopenharmony_ci unsigned imm_offset = instr->const_index[0]; 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci src_reg vertex_index = retype(get_nir_src_imm(instr->src[0]), 261bf215546Sopenharmony_ci BRW_REGISTER_TYPE_UD); 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_ci unsigned first_component = nir_intrinsic_component(instr); 264bf215546Sopenharmony_ci dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); 265bf215546Sopenharmony_ci dst.writemask = brw_writemask_for_size(instr->num_components); 266bf215546Sopenharmony_ci emit_input_urb_read(dst, vertex_index, imm_offset, 267bf215546Sopenharmony_ci first_component, indirect_offset); 268bf215546Sopenharmony_ci break; 269bf215546Sopenharmony_ci } 270bf215546Sopenharmony_ci case nir_intrinsic_load_input: 271bf215546Sopenharmony_ci unreachable("nir_lower_io should use load_per_vertex_input intrinsics"); 272bf215546Sopenharmony_ci break; 273bf215546Sopenharmony_ci case nir_intrinsic_load_output: 274bf215546Sopenharmony_ci case nir_intrinsic_load_per_vertex_output: { 275bf215546Sopenharmony_ci src_reg indirect_offset = get_indirect_offset(instr); 276bf215546Sopenharmony_ci unsigned imm_offset = instr->const_index[0]; 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); 279bf215546Sopenharmony_ci dst.writemask = brw_writemask_for_size(instr->num_components); 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr), 282bf215546Sopenharmony_ci indirect_offset); 283bf215546Sopenharmony_ci break; 284bf215546Sopenharmony_ci } 285bf215546Sopenharmony_ci case nir_intrinsic_store_output: 286bf215546Sopenharmony_ci case nir_intrinsic_store_per_vertex_output: { 287bf215546Sopenharmony_ci assert(nir_src_bit_size(instr->src[0]) == 32); 288bf215546Sopenharmony_ci src_reg value = get_nir_src(instr->src[0]); 289bf215546Sopenharmony_ci unsigned mask = instr->const_index[1]; 290bf215546Sopenharmony_ci unsigned swiz = BRW_SWIZZLE_XYZW; 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci src_reg indirect_offset = get_indirect_offset(instr); 293bf215546Sopenharmony_ci unsigned imm_offset = instr->const_index[0]; 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci unsigned first_component = nir_intrinsic_component(instr); 296bf215546Sopenharmony_ci if (first_component) { 297bf215546Sopenharmony_ci assert(swiz == BRW_SWIZZLE_XYZW); 298bf215546Sopenharmony_ci swiz = BRW_SWZ_COMP_OUTPUT(first_component); 299bf215546Sopenharmony_ci mask = mask << first_component; 300bf215546Sopenharmony_ci } 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci emit_urb_write(swizzle(value, swiz), mask, 303bf215546Sopenharmony_ci imm_offset, indirect_offset); 304bf215546Sopenharmony_ci break; 305bf215546Sopenharmony_ci } 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_ci case nir_intrinsic_control_barrier: { 308bf215546Sopenharmony_ci dst_reg header = dst_reg(this, glsl_type::uvec4_type); 309bf215546Sopenharmony_ci emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header); 310bf215546Sopenharmony_ci emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header)); 311bf215546Sopenharmony_ci break; 312bf215546Sopenharmony_ci } 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci case nir_intrinsic_memory_barrier_tcs_patch: 315bf215546Sopenharmony_ci break; 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci default: 318bf215546Sopenharmony_ci vec4_visitor::nir_emit_intrinsic(instr); 319bf215546Sopenharmony_ci } 320bf215546Sopenharmony_ci} 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci/** 323bf215546Sopenharmony_ci * Return the number of patches to accumulate before an 8_PATCH mode thread is 324bf215546Sopenharmony_ci * launched. In cases with a large number of input control points and a large 325bf215546Sopenharmony_ci * amount of VS outputs, the VS URB space needed to store an entire 8 patches 326bf215546Sopenharmony_ci * worth of data can be prohibitive, so it can be beneficial to launch threads 327bf215546Sopenharmony_ci * early. 328bf215546Sopenharmony_ci * 329bf215546Sopenharmony_ci * See the 3DSTATE_HS::Patch Count Threshold documentation for the recommended 330bf215546Sopenharmony_ci * values. Note that 0 means to "disable" early dispatch, meaning to wait for 331bf215546Sopenharmony_ci * a full 8 patches as normal. 332bf215546Sopenharmony_ci */ 333bf215546Sopenharmony_cistatic int 334bf215546Sopenharmony_ciget_patch_count_threshold(int input_control_points) 335bf215546Sopenharmony_ci{ 336bf215546Sopenharmony_ci if (input_control_points <= 4) 337bf215546Sopenharmony_ci return 0; 338bf215546Sopenharmony_ci else if (input_control_points <= 6) 339bf215546Sopenharmony_ci return 5; 340bf215546Sopenharmony_ci else if (input_control_points <= 8) 341bf215546Sopenharmony_ci return 4; 342bf215546Sopenharmony_ci else if (input_control_points <= 10) 343bf215546Sopenharmony_ci return 3; 344bf215546Sopenharmony_ci else if (input_control_points <= 14) 345bf215546Sopenharmony_ci return 2; 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_ci /* Return patch count 1 for PATCHLIST_15 - PATCHLIST_32 */ 348bf215546Sopenharmony_ci return 1; 349bf215546Sopenharmony_ci} 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci} /* namespace brw */ 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ciextern "C" const unsigned * 354bf215546Sopenharmony_cibrw_compile_tcs(const struct brw_compiler *compiler, 355bf215546Sopenharmony_ci void *mem_ctx, 356bf215546Sopenharmony_ci struct brw_compile_tcs_params *params) 357bf215546Sopenharmony_ci{ 358bf215546Sopenharmony_ci const struct intel_device_info *devinfo = compiler->devinfo; 359bf215546Sopenharmony_ci nir_shader *nir = params->nir; 360bf215546Sopenharmony_ci const struct brw_tcs_prog_key *key = params->key; 361bf215546Sopenharmony_ci struct brw_tcs_prog_data *prog_data = params->prog_data; 362bf215546Sopenharmony_ci struct brw_vue_prog_data *vue_prog_data = &prog_data->base; 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_CTRL]; 365bf215546Sopenharmony_ci const bool debug_enabled = INTEL_DEBUG(DEBUG_TCS); 366bf215546Sopenharmony_ci const unsigned *assembly; 367bf215546Sopenharmony_ci 368bf215546Sopenharmony_ci vue_prog_data->base.stage = MESA_SHADER_TESS_CTRL; 369bf215546Sopenharmony_ci prog_data->base.base.ray_queries = nir->info.ray_queries; 370bf215546Sopenharmony_ci prog_data->base.base.total_scratch = 0; 371bf215546Sopenharmony_ci 372bf215546Sopenharmony_ci nir->info.outputs_written = key->outputs_written; 373bf215546Sopenharmony_ci nir->info.patch_outputs_written = key->patch_outputs_written; 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci struct brw_vue_map input_vue_map; 376bf215546Sopenharmony_ci brw_compute_vue_map(devinfo, &input_vue_map, nir->info.inputs_read, 377bf215546Sopenharmony_ci nir->info.separate_shader, 1); 378bf215546Sopenharmony_ci brw_compute_tess_vue_map(&vue_prog_data->vue_map, 379bf215546Sopenharmony_ci nir->info.outputs_written, 380bf215546Sopenharmony_ci nir->info.patch_outputs_written); 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar); 383bf215546Sopenharmony_ci brw_nir_lower_vue_inputs(nir, &input_vue_map); 384bf215546Sopenharmony_ci brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map, 385bf215546Sopenharmony_ci key->_tes_primitive_mode); 386bf215546Sopenharmony_ci if (key->quads_workaround) 387bf215546Sopenharmony_ci brw_nir_apply_tcs_quads_workaround(nir); 388bf215546Sopenharmony_ci 389bf215546Sopenharmony_ci brw_postprocess_nir(nir, compiler, is_scalar, debug_enabled, 390bf215546Sopenharmony_ci key->base.robust_buffer_access); 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ci bool has_primitive_id = 393bf215546Sopenharmony_ci BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID); 394bf215546Sopenharmony_ci 395bf215546Sopenharmony_ci prog_data->patch_count_threshold = brw::get_patch_count_threshold(key->input_vertices); 396bf215546Sopenharmony_ci 397bf215546Sopenharmony_ci if (compiler->use_tcs_8_patch && 398bf215546Sopenharmony_ci nir->info.tess.tcs_vertices_out <= (devinfo->ver >= 12 ? 32 : 16) && 399bf215546Sopenharmony_ci 2 + has_primitive_id + key->input_vertices <= (devinfo->ver >= 12 ? 63 : 31)) { 400bf215546Sopenharmony_ci /* 3DSTATE_HS imposes two constraints on using 8_PATCH mode. First, the 401bf215546Sopenharmony_ci * "Instance" field limits the number of output vertices to [1, 16] on 402bf215546Sopenharmony_ci * gfx11 and below, or [1, 32] on gfx12 and above. Secondly, the 403bf215546Sopenharmony_ci * "Dispatch GRF Start Register for URB Data" field is limited to [0, 404bf215546Sopenharmony_ci * 31] - which imposes a limit on the input vertices. 405bf215546Sopenharmony_ci */ 406bf215546Sopenharmony_ci vue_prog_data->dispatch_mode = DISPATCH_MODE_TCS_8_PATCH; 407bf215546Sopenharmony_ci prog_data->instances = nir->info.tess.tcs_vertices_out; 408bf215546Sopenharmony_ci prog_data->include_primitive_id = has_primitive_id; 409bf215546Sopenharmony_ci } else { 410bf215546Sopenharmony_ci unsigned verts_per_thread = is_scalar ? 8 : 2; 411bf215546Sopenharmony_ci vue_prog_data->dispatch_mode = DISPATCH_MODE_TCS_SINGLE_PATCH; 412bf215546Sopenharmony_ci prog_data->instances = 413bf215546Sopenharmony_ci DIV_ROUND_UP(nir->info.tess.tcs_vertices_out, verts_per_thread); 414bf215546Sopenharmony_ci } 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci /* Compute URB entry size. The maximum allowed URB entry size is 32k. 417bf215546Sopenharmony_ci * That divides up as follows: 418bf215546Sopenharmony_ci * 419bf215546Sopenharmony_ci * 32 bytes for the patch header (tessellation factors) 420bf215546Sopenharmony_ci * 480 bytes for per-patch varyings (a varying component is 4 bytes and 421bf215546Sopenharmony_ci * gl_MaxTessPatchComponents = 120) 422bf215546Sopenharmony_ci * 16384 bytes for per-vertex varyings (a varying component is 4 bytes, 423bf215546Sopenharmony_ci * gl_MaxPatchVertices = 32 and 424bf215546Sopenharmony_ci * gl_MaxTessControlOutputComponents = 128) 425bf215546Sopenharmony_ci * 426bf215546Sopenharmony_ci * 15808 bytes left for varying packing overhead 427bf215546Sopenharmony_ci */ 428bf215546Sopenharmony_ci const int num_per_patch_slots = vue_prog_data->vue_map.num_per_patch_slots; 429bf215546Sopenharmony_ci const int num_per_vertex_slots = vue_prog_data->vue_map.num_per_vertex_slots; 430bf215546Sopenharmony_ci unsigned output_size_bytes = 0; 431bf215546Sopenharmony_ci /* Note that the patch header is counted in num_per_patch_slots. */ 432bf215546Sopenharmony_ci output_size_bytes += num_per_patch_slots * 16; 433bf215546Sopenharmony_ci output_size_bytes += nir->info.tess.tcs_vertices_out * 434bf215546Sopenharmony_ci num_per_vertex_slots * 16; 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci assert(output_size_bytes >= 1); 437bf215546Sopenharmony_ci if (output_size_bytes > GFX7_MAX_HS_URB_ENTRY_SIZE_BYTES) 438bf215546Sopenharmony_ci return NULL; 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci /* URB entry sizes are stored as a multiple of 64 bytes. */ 441bf215546Sopenharmony_ci vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64; 442bf215546Sopenharmony_ci 443bf215546Sopenharmony_ci /* HS does not use the usual payload pushing from URB to GRFs, 444bf215546Sopenharmony_ci * because we don't have enough registers for a full-size payload, and 445bf215546Sopenharmony_ci * the hardware is broken on Haswell anyway. 446bf215546Sopenharmony_ci */ 447bf215546Sopenharmony_ci vue_prog_data->urb_read_length = 0; 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci if (unlikely(debug_enabled)) { 450bf215546Sopenharmony_ci fprintf(stderr, "TCS Input "); 451bf215546Sopenharmony_ci brw_print_vue_map(stderr, &input_vue_map, MESA_SHADER_TESS_CTRL); 452bf215546Sopenharmony_ci fprintf(stderr, "TCS Output "); 453bf215546Sopenharmony_ci brw_print_vue_map(stderr, &vue_prog_data->vue_map, MESA_SHADER_TESS_CTRL); 454bf215546Sopenharmony_ci } 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_ci if (is_scalar) { 457bf215546Sopenharmony_ci fs_visitor v(compiler, params->log_data, mem_ctx, &key->base, 458bf215546Sopenharmony_ci &prog_data->base.base, nir, 8, debug_enabled); 459bf215546Sopenharmony_ci if (!v.run_tcs()) { 460bf215546Sopenharmony_ci params->error_str = ralloc_strdup(mem_ctx, v.fail_msg); 461bf215546Sopenharmony_ci return NULL; 462bf215546Sopenharmony_ci } 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ci prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; 465bf215546Sopenharmony_ci 466bf215546Sopenharmony_ci fs_generator g(compiler, params->log_data, mem_ctx, 467bf215546Sopenharmony_ci &prog_data->base.base, false, MESA_SHADER_TESS_CTRL); 468bf215546Sopenharmony_ci if (unlikely(debug_enabled)) { 469bf215546Sopenharmony_ci g.enable_debug(ralloc_asprintf(mem_ctx, 470bf215546Sopenharmony_ci "%s tessellation control shader %s", 471bf215546Sopenharmony_ci nir->info.label ? nir->info.label 472bf215546Sopenharmony_ci : "unnamed", 473bf215546Sopenharmony_ci nir->info.name)); 474bf215546Sopenharmony_ci } 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_ci g.generate_code(v.cfg, 8, v.shader_stats, 477bf215546Sopenharmony_ci v.performance_analysis.require(), params->stats); 478bf215546Sopenharmony_ci 479bf215546Sopenharmony_ci g.add_const_data(nir->constant_data, nir->constant_data_size); 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci assembly = g.get_assembly(); 482bf215546Sopenharmony_ci } else { 483bf215546Sopenharmony_ci brw::vec4_tcs_visitor v(compiler, params->log_data, key, prog_data, 484bf215546Sopenharmony_ci nir, mem_ctx, debug_enabled); 485bf215546Sopenharmony_ci if (!v.run()) { 486bf215546Sopenharmony_ci params->error_str = ralloc_strdup(mem_ctx, v.fail_msg); 487bf215546Sopenharmony_ci return NULL; 488bf215546Sopenharmony_ci } 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci if (INTEL_DEBUG(DEBUG_TCS)) 491bf215546Sopenharmony_ci v.dump_instructions(); 492bf215546Sopenharmony_ci 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci assembly = brw_vec4_generate_assembly(compiler, params->log_data, mem_ctx, nir, 495bf215546Sopenharmony_ci &prog_data->base, v.cfg, 496bf215546Sopenharmony_ci v.performance_analysis.require(), 497bf215546Sopenharmony_ci params->stats, debug_enabled); 498bf215546Sopenharmony_ci } 499bf215546Sopenharmony_ci 500bf215546Sopenharmony_ci return assembly; 501bf215546Sopenharmony_ci} 502