1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2013 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci/**
25bf215546Sopenharmony_ci * \file brw_vec4_tcs.cpp
26bf215546Sopenharmony_ci *
27bf215546Sopenharmony_ci * Tessellaton control shader specific code derived from the vec4_visitor class.
28bf215546Sopenharmony_ci */
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "brw_nir.h"
31bf215546Sopenharmony_ci#include "brw_vec4_tcs.h"
32bf215546Sopenharmony_ci#include "brw_fs.h"
33bf215546Sopenharmony_ci#include "dev/intel_debug.h"
34bf215546Sopenharmony_ci
35bf215546Sopenharmony_cinamespace brw {
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_civec4_tcs_visitor::vec4_tcs_visitor(const struct brw_compiler *compiler,
38bf215546Sopenharmony_ci                                   void *log_data,
39bf215546Sopenharmony_ci                                   const struct brw_tcs_prog_key *key,
40bf215546Sopenharmony_ci                                   struct brw_tcs_prog_data *prog_data,
41bf215546Sopenharmony_ci                                   const nir_shader *nir,
42bf215546Sopenharmony_ci                                   void *mem_ctx,
43bf215546Sopenharmony_ci                                   bool debug_enabled)
44bf215546Sopenharmony_ci   : vec4_visitor(compiler, log_data, &key->base.tex, &prog_data->base,
45bf215546Sopenharmony_ci                  nir, mem_ctx, false, debug_enabled),
46bf215546Sopenharmony_ci     key(key)
47bf215546Sopenharmony_ci{
48bf215546Sopenharmony_ci}
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_civoid
52bf215546Sopenharmony_civec4_tcs_visitor::setup_payload()
53bf215546Sopenharmony_ci{
54bf215546Sopenharmony_ci   int reg = 0;
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci   /* The payload always contains important data in r0, which contains
57bf215546Sopenharmony_ci    * the URB handles that are passed on to the URB write at the end
58bf215546Sopenharmony_ci    * of the thread.
59bf215546Sopenharmony_ci    */
60bf215546Sopenharmony_ci   reg++;
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_ci   /* r1.0 - r4.7 may contain the input control point URB handles,
63bf215546Sopenharmony_ci    * which we use to pull vertex data.
64bf215546Sopenharmony_ci    */
65bf215546Sopenharmony_ci   reg += 4;
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_ci   /* Push constants may start at r5.0 */
68bf215546Sopenharmony_ci   reg = setup_uniforms(reg);
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_ci   this->first_non_payload_grf = reg;
71bf215546Sopenharmony_ci}
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_civoid
75bf215546Sopenharmony_civec4_tcs_visitor::emit_prolog()
76bf215546Sopenharmony_ci{
77bf215546Sopenharmony_ci   invocation_id = src_reg(this, glsl_type::uint_type);
78bf215546Sopenharmony_ci   emit(TCS_OPCODE_GET_INSTANCE_ID, dst_reg(invocation_id));
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci   /* HS threads are dispatched with the dispatch mask set to 0xFF.
81bf215546Sopenharmony_ci    * If there are an odd number of output vertices, then the final
82bf215546Sopenharmony_ci    * HS instance dispatched will only have its bottom half doing real
83bf215546Sopenharmony_ci    * work, and so we need to disable the upper half:
84bf215546Sopenharmony_ci    */
85bf215546Sopenharmony_ci   if (nir->info.tess.tcs_vertices_out % 2) {
86bf215546Sopenharmony_ci      emit(CMP(dst_null_d(), invocation_id,
87bf215546Sopenharmony_ci               brw_imm_ud(nir->info.tess.tcs_vertices_out),
88bf215546Sopenharmony_ci               BRW_CONDITIONAL_L));
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci      /* Matching ENDIF is in emit_thread_end() */
91bf215546Sopenharmony_ci      emit(IF(BRW_PREDICATE_NORMAL));
92bf215546Sopenharmony_ci   }
93bf215546Sopenharmony_ci}
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_civoid
97bf215546Sopenharmony_civec4_tcs_visitor::emit_thread_end()
98bf215546Sopenharmony_ci{
99bf215546Sopenharmony_ci   vec4_instruction *inst;
100bf215546Sopenharmony_ci   current_annotation = "thread end";
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci   if (nir->info.tess.tcs_vertices_out % 2) {
103bf215546Sopenharmony_ci      emit(BRW_OPCODE_ENDIF);
104bf215546Sopenharmony_ci   }
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci   if (devinfo->ver == 7) {
107bf215546Sopenharmony_ci      struct brw_tcs_prog_data *tcs_prog_data =
108bf215546Sopenharmony_ci         (struct brw_tcs_prog_data *) prog_data;
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci      current_annotation = "release input vertices";
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci      /* Synchronize all threads, so we know that no one is still
113bf215546Sopenharmony_ci       * using the input URB handles.
114bf215546Sopenharmony_ci       */
115bf215546Sopenharmony_ci      if (tcs_prog_data->instances > 1) {
116bf215546Sopenharmony_ci         dst_reg header = dst_reg(this, glsl_type::uvec4_type);
117bf215546Sopenharmony_ci         emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header);
118bf215546Sopenharmony_ci         emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header));
119bf215546Sopenharmony_ci      }
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci      /* Make thread 0 (invocations <1, 0>) release pairs of ICP handles.
122bf215546Sopenharmony_ci       * We want to compare the bottom half of invocation_id with 0, but
123bf215546Sopenharmony_ci       * use that truth value for the top half as well.  Unfortunately,
124bf215546Sopenharmony_ci       * we don't have stride in the vec4 world, nor UV immediates in
125bf215546Sopenharmony_ci       * align16, so we need an opcode to get invocation_id<0,4,0>.
126bf215546Sopenharmony_ci       */
127bf215546Sopenharmony_ci      set_condmod(BRW_CONDITIONAL_Z,
128bf215546Sopenharmony_ci                  emit(TCS_OPCODE_SRC0_010_IS_ZERO, dst_null_d(),
129bf215546Sopenharmony_ci                       invocation_id));
130bf215546Sopenharmony_ci      emit(IF(BRW_PREDICATE_NORMAL));
131bf215546Sopenharmony_ci      for (unsigned i = 0; i < key->input_vertices; i += 2) {
132bf215546Sopenharmony_ci         /* If we have an odd number of input vertices, the last will be
133bf215546Sopenharmony_ci          * unpaired.  We don't want to use an interleaved URB write in
134bf215546Sopenharmony_ci          * that case.
135bf215546Sopenharmony_ci          */
136bf215546Sopenharmony_ci         const bool is_unpaired = i == key->input_vertices - 1;
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci         dst_reg header(this, glsl_type::uvec4_type);
139bf215546Sopenharmony_ci         emit(TCS_OPCODE_RELEASE_INPUT, header, brw_imm_ud(i),
140bf215546Sopenharmony_ci              brw_imm_ud(is_unpaired));
141bf215546Sopenharmony_ci      }
142bf215546Sopenharmony_ci      emit(BRW_OPCODE_ENDIF);
143bf215546Sopenharmony_ci   }
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_ci   inst = emit(TCS_OPCODE_THREAD_END);
146bf215546Sopenharmony_ci   inst->base_mrf = 14;
147bf215546Sopenharmony_ci   inst->mlen = 2;
148bf215546Sopenharmony_ci}
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_civoid
152bf215546Sopenharmony_civec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst,
153bf215546Sopenharmony_ci                                      const src_reg &vertex_index,
154bf215546Sopenharmony_ci                                      unsigned base_offset,
155bf215546Sopenharmony_ci                                      unsigned first_component,
156bf215546Sopenharmony_ci                                      const src_reg &indirect_offset)
157bf215546Sopenharmony_ci{
158bf215546Sopenharmony_ci   vec4_instruction *inst;
159bf215546Sopenharmony_ci   dst_reg temp(this, glsl_type::ivec4_type);
160bf215546Sopenharmony_ci   temp.type = dst.type;
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci   /* Set up the message header to reference the proper parts of the URB */
163bf215546Sopenharmony_ci   dst_reg header = dst_reg(this, glsl_type::uvec4_type);
164bf215546Sopenharmony_ci   inst = emit(VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS, header, vertex_index,
165bf215546Sopenharmony_ci               indirect_offset);
166bf215546Sopenharmony_ci   inst->force_writemask_all = true;
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci   /* Read into a temporary, ignoring writemasking. */
169bf215546Sopenharmony_ci   inst = emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
170bf215546Sopenharmony_ci   inst->offset = base_offset;
171bf215546Sopenharmony_ci   inst->mlen = 1;
172bf215546Sopenharmony_ci   inst->base_mrf = -1;
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci   /* Copy the temporary to the destination to deal with writemasking.
175bf215546Sopenharmony_ci    *
176bf215546Sopenharmony_ci    * Also attempt to deal with gl_PointSize being in the .w component.
177bf215546Sopenharmony_ci    */
178bf215546Sopenharmony_ci   if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
179bf215546Sopenharmony_ci      emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WWWW)));
180bf215546Sopenharmony_ci   } else {
181bf215546Sopenharmony_ci      src_reg src = src_reg(temp);
182bf215546Sopenharmony_ci      src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
183bf215546Sopenharmony_ci      emit(MOV(dst, src));
184bf215546Sopenharmony_ci   }
185bf215546Sopenharmony_ci}
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_civoid
188bf215546Sopenharmony_civec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst,
189bf215546Sopenharmony_ci                                       unsigned base_offset,
190bf215546Sopenharmony_ci                                       unsigned first_component,
191bf215546Sopenharmony_ci                                       const src_reg &indirect_offset)
192bf215546Sopenharmony_ci{
193bf215546Sopenharmony_ci   vec4_instruction *inst;
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci   /* Set up the message header to reference the proper parts of the URB */
196bf215546Sopenharmony_ci   dst_reg header = dst_reg(this, glsl_type::uvec4_type);
197bf215546Sopenharmony_ci   inst = emit(VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, header,
198bf215546Sopenharmony_ci               brw_imm_ud(dst.writemask << first_component), indirect_offset);
199bf215546Sopenharmony_ci   inst->force_writemask_all = true;
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci   vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, dst, src_reg(header));
202bf215546Sopenharmony_ci   read->offset = base_offset;
203bf215546Sopenharmony_ci   read->mlen = 1;
204bf215546Sopenharmony_ci   read->base_mrf = -1;
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci   if (first_component) {
207bf215546Sopenharmony_ci      /* Read into a temporary and copy with a swizzle and writemask. */
208bf215546Sopenharmony_ci      read->dst = retype(dst_reg(this, glsl_type::ivec4_type), dst.type);
209bf215546Sopenharmony_ci      emit(MOV(dst, swizzle(src_reg(read->dst),
210bf215546Sopenharmony_ci                            BRW_SWZ_COMP_INPUT(first_component))));
211bf215546Sopenharmony_ci   }
212bf215546Sopenharmony_ci}
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_civoid
215bf215546Sopenharmony_civec4_tcs_visitor::emit_urb_write(const src_reg &value,
216bf215546Sopenharmony_ci                                 unsigned writemask,
217bf215546Sopenharmony_ci                                 unsigned base_offset,
218bf215546Sopenharmony_ci                                 const src_reg &indirect_offset)
219bf215546Sopenharmony_ci{
220bf215546Sopenharmony_ci   if (writemask == 0)
221bf215546Sopenharmony_ci      return;
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci   src_reg message(this, glsl_type::uvec4_type, 2);
224bf215546Sopenharmony_ci   vec4_instruction *inst;
225bf215546Sopenharmony_ci
226bf215546Sopenharmony_ci   inst = emit(VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, dst_reg(message),
227bf215546Sopenharmony_ci               brw_imm_ud(writemask), indirect_offset);
228bf215546Sopenharmony_ci   inst->force_writemask_all = true;
229bf215546Sopenharmony_ci   inst = emit(MOV(byte_offset(dst_reg(retype(message, value.type)), REG_SIZE),
230bf215546Sopenharmony_ci                   value));
231bf215546Sopenharmony_ci   inst->force_writemask_all = true;
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci   inst = emit(VEC4_TCS_OPCODE_URB_WRITE, dst_null_f(), message);
234bf215546Sopenharmony_ci   inst->offset = base_offset;
235bf215546Sopenharmony_ci   inst->mlen = 2;
236bf215546Sopenharmony_ci   inst->base_mrf = -1;
237bf215546Sopenharmony_ci}
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_civoid
240bf215546Sopenharmony_civec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
241bf215546Sopenharmony_ci{
242bf215546Sopenharmony_ci   switch (instr->intrinsic) {
243bf215546Sopenharmony_ci   case nir_intrinsic_load_invocation_id:
244bf215546Sopenharmony_ci      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD),
245bf215546Sopenharmony_ci               invocation_id));
246bf215546Sopenharmony_ci      break;
247bf215546Sopenharmony_ci   case nir_intrinsic_load_primitive_id:
248bf215546Sopenharmony_ci      emit(TCS_OPCODE_GET_PRIMITIVE_ID,
249bf215546Sopenharmony_ci           get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
250bf215546Sopenharmony_ci      break;
251bf215546Sopenharmony_ci   case nir_intrinsic_load_patch_vertices_in:
252bf215546Sopenharmony_ci      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D),
253bf215546Sopenharmony_ci               brw_imm_d(key->input_vertices)));
254bf215546Sopenharmony_ci      break;
255bf215546Sopenharmony_ci   case nir_intrinsic_load_per_vertex_input: {
256bf215546Sopenharmony_ci      assert(nir_dest_bit_size(instr->dest) == 32);
257bf215546Sopenharmony_ci      src_reg indirect_offset = get_indirect_offset(instr);
258bf215546Sopenharmony_ci      unsigned imm_offset = instr->const_index[0];
259bf215546Sopenharmony_ci
260bf215546Sopenharmony_ci      src_reg vertex_index = retype(get_nir_src_imm(instr->src[0]),
261bf215546Sopenharmony_ci                                    BRW_REGISTER_TYPE_UD);
262bf215546Sopenharmony_ci
263bf215546Sopenharmony_ci      unsigned first_component = nir_intrinsic_component(instr);
264bf215546Sopenharmony_ci      dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
265bf215546Sopenharmony_ci      dst.writemask = brw_writemask_for_size(instr->num_components);
266bf215546Sopenharmony_ci      emit_input_urb_read(dst, vertex_index, imm_offset,
267bf215546Sopenharmony_ci                          first_component, indirect_offset);
268bf215546Sopenharmony_ci      break;
269bf215546Sopenharmony_ci   }
270bf215546Sopenharmony_ci   case nir_intrinsic_load_input:
271bf215546Sopenharmony_ci      unreachable("nir_lower_io should use load_per_vertex_input intrinsics");
272bf215546Sopenharmony_ci      break;
273bf215546Sopenharmony_ci   case nir_intrinsic_load_output:
274bf215546Sopenharmony_ci   case nir_intrinsic_load_per_vertex_output: {
275bf215546Sopenharmony_ci      src_reg indirect_offset = get_indirect_offset(instr);
276bf215546Sopenharmony_ci      unsigned imm_offset = instr->const_index[0];
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci      dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
279bf215546Sopenharmony_ci      dst.writemask = brw_writemask_for_size(instr->num_components);
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci      emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr),
282bf215546Sopenharmony_ci                           indirect_offset);
283bf215546Sopenharmony_ci      break;
284bf215546Sopenharmony_ci   }
285bf215546Sopenharmony_ci   case nir_intrinsic_store_output:
286bf215546Sopenharmony_ci   case nir_intrinsic_store_per_vertex_output: {
287bf215546Sopenharmony_ci      assert(nir_src_bit_size(instr->src[0]) == 32);
288bf215546Sopenharmony_ci      src_reg value = get_nir_src(instr->src[0]);
289bf215546Sopenharmony_ci      unsigned mask = instr->const_index[1];
290bf215546Sopenharmony_ci      unsigned swiz = BRW_SWIZZLE_XYZW;
291bf215546Sopenharmony_ci
292bf215546Sopenharmony_ci      src_reg indirect_offset = get_indirect_offset(instr);
293bf215546Sopenharmony_ci      unsigned imm_offset = instr->const_index[0];
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci      unsigned first_component = nir_intrinsic_component(instr);
296bf215546Sopenharmony_ci      if (first_component) {
297bf215546Sopenharmony_ci         assert(swiz == BRW_SWIZZLE_XYZW);
298bf215546Sopenharmony_ci         swiz = BRW_SWZ_COMP_OUTPUT(first_component);
299bf215546Sopenharmony_ci         mask = mask << first_component;
300bf215546Sopenharmony_ci      }
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci      emit_urb_write(swizzle(value, swiz), mask,
303bf215546Sopenharmony_ci                     imm_offset, indirect_offset);
304bf215546Sopenharmony_ci      break;
305bf215546Sopenharmony_ci   }
306bf215546Sopenharmony_ci
307bf215546Sopenharmony_ci   case nir_intrinsic_control_barrier: {
308bf215546Sopenharmony_ci      dst_reg header = dst_reg(this, glsl_type::uvec4_type);
309bf215546Sopenharmony_ci      emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header);
310bf215546Sopenharmony_ci      emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header));
311bf215546Sopenharmony_ci      break;
312bf215546Sopenharmony_ci   }
313bf215546Sopenharmony_ci
314bf215546Sopenharmony_ci   case nir_intrinsic_memory_barrier_tcs_patch:
315bf215546Sopenharmony_ci      break;
316bf215546Sopenharmony_ci
317bf215546Sopenharmony_ci   default:
318bf215546Sopenharmony_ci      vec4_visitor::nir_emit_intrinsic(instr);
319bf215546Sopenharmony_ci   }
320bf215546Sopenharmony_ci}
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_ci/**
323bf215546Sopenharmony_ci * Return the number of patches to accumulate before an 8_PATCH mode thread is
324bf215546Sopenharmony_ci * launched.  In cases with a large number of input control points and a large
325bf215546Sopenharmony_ci * amount of VS outputs, the VS URB space needed to store an entire 8 patches
326bf215546Sopenharmony_ci * worth of data can be prohibitive, so it can be beneficial to launch threads
327bf215546Sopenharmony_ci * early.
328bf215546Sopenharmony_ci *
329bf215546Sopenharmony_ci * See the 3DSTATE_HS::Patch Count Threshold documentation for the recommended
330bf215546Sopenharmony_ci * values.  Note that 0 means to "disable" early dispatch, meaning to wait for
331bf215546Sopenharmony_ci * a full 8 patches as normal.
332bf215546Sopenharmony_ci */
333bf215546Sopenharmony_cistatic int
334bf215546Sopenharmony_ciget_patch_count_threshold(int input_control_points)
335bf215546Sopenharmony_ci{
336bf215546Sopenharmony_ci   if (input_control_points <= 4)
337bf215546Sopenharmony_ci      return 0;
338bf215546Sopenharmony_ci   else if (input_control_points <= 6)
339bf215546Sopenharmony_ci      return 5;
340bf215546Sopenharmony_ci   else if (input_control_points <= 8)
341bf215546Sopenharmony_ci      return 4;
342bf215546Sopenharmony_ci   else if (input_control_points <= 10)
343bf215546Sopenharmony_ci      return 3;
344bf215546Sopenharmony_ci   else if (input_control_points <= 14)
345bf215546Sopenharmony_ci      return 2;
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_ci   /* Return patch count 1 for PATCHLIST_15 - PATCHLIST_32 */
348bf215546Sopenharmony_ci   return 1;
349bf215546Sopenharmony_ci}
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_ci} /* namespace brw */
352bf215546Sopenharmony_ci
353bf215546Sopenharmony_ciextern "C" const unsigned *
354bf215546Sopenharmony_cibrw_compile_tcs(const struct brw_compiler *compiler,
355bf215546Sopenharmony_ci                void *mem_ctx,
356bf215546Sopenharmony_ci                struct brw_compile_tcs_params *params)
357bf215546Sopenharmony_ci{
358bf215546Sopenharmony_ci   const struct intel_device_info *devinfo = compiler->devinfo;
359bf215546Sopenharmony_ci   nir_shader *nir = params->nir;
360bf215546Sopenharmony_ci   const struct brw_tcs_prog_key *key = params->key;
361bf215546Sopenharmony_ci   struct brw_tcs_prog_data *prog_data = params->prog_data;
362bf215546Sopenharmony_ci   struct brw_vue_prog_data *vue_prog_data = &prog_data->base;
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_ci   const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_CTRL];
365bf215546Sopenharmony_ci   const bool debug_enabled = INTEL_DEBUG(DEBUG_TCS);
366bf215546Sopenharmony_ci   const unsigned *assembly;
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_ci   vue_prog_data->base.stage = MESA_SHADER_TESS_CTRL;
369bf215546Sopenharmony_ci   prog_data->base.base.ray_queries = nir->info.ray_queries;
370bf215546Sopenharmony_ci   prog_data->base.base.total_scratch = 0;
371bf215546Sopenharmony_ci
372bf215546Sopenharmony_ci   nir->info.outputs_written = key->outputs_written;
373bf215546Sopenharmony_ci   nir->info.patch_outputs_written = key->patch_outputs_written;
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_ci   struct brw_vue_map input_vue_map;
376bf215546Sopenharmony_ci   brw_compute_vue_map(devinfo, &input_vue_map, nir->info.inputs_read,
377bf215546Sopenharmony_ci                       nir->info.separate_shader, 1);
378bf215546Sopenharmony_ci   brw_compute_tess_vue_map(&vue_prog_data->vue_map,
379bf215546Sopenharmony_ci                            nir->info.outputs_written,
380bf215546Sopenharmony_ci                            nir->info.patch_outputs_written);
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_ci   brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);
383bf215546Sopenharmony_ci   brw_nir_lower_vue_inputs(nir, &input_vue_map);
384bf215546Sopenharmony_ci   brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map,
385bf215546Sopenharmony_ci                             key->_tes_primitive_mode);
386bf215546Sopenharmony_ci   if (key->quads_workaround)
387bf215546Sopenharmony_ci      brw_nir_apply_tcs_quads_workaround(nir);
388bf215546Sopenharmony_ci
389bf215546Sopenharmony_ci   brw_postprocess_nir(nir, compiler, is_scalar, debug_enabled,
390bf215546Sopenharmony_ci                       key->base.robust_buffer_access);
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_ci   bool has_primitive_id =
393bf215546Sopenharmony_ci      BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
394bf215546Sopenharmony_ci
395bf215546Sopenharmony_ci   prog_data->patch_count_threshold = brw::get_patch_count_threshold(key->input_vertices);
396bf215546Sopenharmony_ci
397bf215546Sopenharmony_ci   if (compiler->use_tcs_8_patch &&
398bf215546Sopenharmony_ci       nir->info.tess.tcs_vertices_out <= (devinfo->ver >= 12 ? 32 : 16) &&
399bf215546Sopenharmony_ci       2 + has_primitive_id + key->input_vertices <= (devinfo->ver >= 12 ? 63 : 31)) {
400bf215546Sopenharmony_ci      /* 3DSTATE_HS imposes two constraints on using 8_PATCH mode. First, the
401bf215546Sopenharmony_ci       * "Instance" field limits the number of output vertices to [1, 16] on
402bf215546Sopenharmony_ci       * gfx11 and below, or [1, 32] on gfx12 and above. Secondly, the
403bf215546Sopenharmony_ci       * "Dispatch GRF Start Register for URB Data" field is limited to [0,
404bf215546Sopenharmony_ci       * 31] - which imposes a limit on the input vertices.
405bf215546Sopenharmony_ci       */
406bf215546Sopenharmony_ci      vue_prog_data->dispatch_mode = DISPATCH_MODE_TCS_8_PATCH;
407bf215546Sopenharmony_ci      prog_data->instances = nir->info.tess.tcs_vertices_out;
408bf215546Sopenharmony_ci      prog_data->include_primitive_id = has_primitive_id;
409bf215546Sopenharmony_ci   } else {
410bf215546Sopenharmony_ci      unsigned verts_per_thread = is_scalar ? 8 : 2;
411bf215546Sopenharmony_ci      vue_prog_data->dispatch_mode = DISPATCH_MODE_TCS_SINGLE_PATCH;
412bf215546Sopenharmony_ci      prog_data->instances =
413bf215546Sopenharmony_ci         DIV_ROUND_UP(nir->info.tess.tcs_vertices_out, verts_per_thread);
414bf215546Sopenharmony_ci   }
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci   /* Compute URB entry size.  The maximum allowed URB entry size is 32k.
417bf215546Sopenharmony_ci    * That divides up as follows:
418bf215546Sopenharmony_ci    *
419bf215546Sopenharmony_ci    *     32 bytes for the patch header (tessellation factors)
420bf215546Sopenharmony_ci    *    480 bytes for per-patch varyings (a varying component is 4 bytes and
421bf215546Sopenharmony_ci    *              gl_MaxTessPatchComponents = 120)
422bf215546Sopenharmony_ci    *  16384 bytes for per-vertex varyings (a varying component is 4 bytes,
423bf215546Sopenharmony_ci    *              gl_MaxPatchVertices = 32 and
424bf215546Sopenharmony_ci    *              gl_MaxTessControlOutputComponents = 128)
425bf215546Sopenharmony_ci    *
426bf215546Sopenharmony_ci    *  15808 bytes left for varying packing overhead
427bf215546Sopenharmony_ci    */
428bf215546Sopenharmony_ci   const int num_per_patch_slots = vue_prog_data->vue_map.num_per_patch_slots;
429bf215546Sopenharmony_ci   const int num_per_vertex_slots = vue_prog_data->vue_map.num_per_vertex_slots;
430bf215546Sopenharmony_ci   unsigned output_size_bytes = 0;
431bf215546Sopenharmony_ci   /* Note that the patch header is counted in num_per_patch_slots. */
432bf215546Sopenharmony_ci   output_size_bytes += num_per_patch_slots * 16;
433bf215546Sopenharmony_ci   output_size_bytes += nir->info.tess.tcs_vertices_out *
434bf215546Sopenharmony_ci                        num_per_vertex_slots * 16;
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_ci   assert(output_size_bytes >= 1);
437bf215546Sopenharmony_ci   if (output_size_bytes > GFX7_MAX_HS_URB_ENTRY_SIZE_BYTES)
438bf215546Sopenharmony_ci      return NULL;
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_ci   /* URB entry sizes are stored as a multiple of 64 bytes. */
441bf215546Sopenharmony_ci   vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
442bf215546Sopenharmony_ci
443bf215546Sopenharmony_ci   /* HS does not use the usual payload pushing from URB to GRFs,
444bf215546Sopenharmony_ci    * because we don't have enough registers for a full-size payload, and
445bf215546Sopenharmony_ci    * the hardware is broken on Haswell anyway.
446bf215546Sopenharmony_ci    */
447bf215546Sopenharmony_ci   vue_prog_data->urb_read_length = 0;
448bf215546Sopenharmony_ci
449bf215546Sopenharmony_ci   if (unlikely(debug_enabled)) {
450bf215546Sopenharmony_ci      fprintf(stderr, "TCS Input ");
451bf215546Sopenharmony_ci      brw_print_vue_map(stderr, &input_vue_map, MESA_SHADER_TESS_CTRL);
452bf215546Sopenharmony_ci      fprintf(stderr, "TCS Output ");
453bf215546Sopenharmony_ci      brw_print_vue_map(stderr, &vue_prog_data->vue_map, MESA_SHADER_TESS_CTRL);
454bf215546Sopenharmony_ci   }
455bf215546Sopenharmony_ci
456bf215546Sopenharmony_ci   if (is_scalar) {
457bf215546Sopenharmony_ci      fs_visitor v(compiler, params->log_data, mem_ctx, &key->base,
458bf215546Sopenharmony_ci                   &prog_data->base.base, nir, 8, debug_enabled);
459bf215546Sopenharmony_ci      if (!v.run_tcs()) {
460bf215546Sopenharmony_ci         params->error_str = ralloc_strdup(mem_ctx, v.fail_msg);
461bf215546Sopenharmony_ci         return NULL;
462bf215546Sopenharmony_ci      }
463bf215546Sopenharmony_ci
464bf215546Sopenharmony_ci      prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
465bf215546Sopenharmony_ci
466bf215546Sopenharmony_ci      fs_generator g(compiler, params->log_data, mem_ctx,
467bf215546Sopenharmony_ci                     &prog_data->base.base, false, MESA_SHADER_TESS_CTRL);
468bf215546Sopenharmony_ci      if (unlikely(debug_enabled)) {
469bf215546Sopenharmony_ci         g.enable_debug(ralloc_asprintf(mem_ctx,
470bf215546Sopenharmony_ci                                        "%s tessellation control shader %s",
471bf215546Sopenharmony_ci                                        nir->info.label ? nir->info.label
472bf215546Sopenharmony_ci                                                        : "unnamed",
473bf215546Sopenharmony_ci                                        nir->info.name));
474bf215546Sopenharmony_ci      }
475bf215546Sopenharmony_ci
476bf215546Sopenharmony_ci      g.generate_code(v.cfg, 8, v.shader_stats,
477bf215546Sopenharmony_ci                      v.performance_analysis.require(), params->stats);
478bf215546Sopenharmony_ci
479bf215546Sopenharmony_ci      g.add_const_data(nir->constant_data, nir->constant_data_size);
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_ci      assembly = g.get_assembly();
482bf215546Sopenharmony_ci   } else {
483bf215546Sopenharmony_ci      brw::vec4_tcs_visitor v(compiler, params->log_data, key, prog_data,
484bf215546Sopenharmony_ci                              nir, mem_ctx, debug_enabled);
485bf215546Sopenharmony_ci      if (!v.run()) {
486bf215546Sopenharmony_ci         params->error_str = ralloc_strdup(mem_ctx, v.fail_msg);
487bf215546Sopenharmony_ci         return NULL;
488bf215546Sopenharmony_ci      }
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci      if (INTEL_DEBUG(DEBUG_TCS))
491bf215546Sopenharmony_ci         v.dump_instructions();
492bf215546Sopenharmony_ci
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci      assembly = brw_vec4_generate_assembly(compiler, params->log_data, mem_ctx, nir,
495bf215546Sopenharmony_ci                                            &prog_data->base, v.cfg,
496bf215546Sopenharmony_ci                                            v.performance_analysis.require(),
497bf215546Sopenharmony_ci                                            params->stats, debug_enabled);
498bf215546Sopenharmony_ci   }
499bf215546Sopenharmony_ci
500bf215546Sopenharmony_ci   return assembly;
501bf215546Sopenharmony_ci}
502