1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2013 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci/**
25bf215546Sopenharmony_ci * \file brw_vec4_tes.cpp
26bf215546Sopenharmony_ci *
27bf215546Sopenharmony_ci * Tessellaton evaluation shader specific code derived from the vec4_visitor class.
28bf215546Sopenharmony_ci */
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "brw_vec4_tes.h"
31bf215546Sopenharmony_ci#include "brw_cfg.h"
32bf215546Sopenharmony_ci#include "dev/intel_debug.h"
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_cinamespace brw {
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_civec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
37bf215546Sopenharmony_ci                                  void *log_data,
38bf215546Sopenharmony_ci                                  const struct brw_tes_prog_key *key,
39bf215546Sopenharmony_ci                                  struct brw_tes_prog_data *prog_data,
40bf215546Sopenharmony_ci                                  const nir_shader *shader,
41bf215546Sopenharmony_ci                                  void *mem_ctx,
42bf215546Sopenharmony_ci                                  bool debug_enabled)
43bf215546Sopenharmony_ci   : vec4_visitor(compiler, log_data, &key->base.tex, &prog_data->base,
44bf215546Sopenharmony_ci                  shader, mem_ctx, false, debug_enabled)
45bf215546Sopenharmony_ci{
46bf215546Sopenharmony_ci}
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_civoid
49bf215546Sopenharmony_civec4_tes_visitor::setup_payload()
50bf215546Sopenharmony_ci{
51bf215546Sopenharmony_ci   int reg = 0;
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_ci   /* The payload always contains important data in r0 and r1, which contains
54bf215546Sopenharmony_ci    * the URB handles that are passed on to the URB write at the end
55bf215546Sopenharmony_ci    * of the thread.
56bf215546Sopenharmony_ci    */
57bf215546Sopenharmony_ci   reg += 2;
58bf215546Sopenharmony_ci
59bf215546Sopenharmony_ci   reg = setup_uniforms(reg);
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
62bf215546Sopenharmony_ci      for (int i = 0; i < 3; i++) {
63bf215546Sopenharmony_ci         if (inst->src[i].file != ATTR)
64bf215546Sopenharmony_ci            continue;
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_ci         unsigned slot = inst->src[i].nr + inst->src[i].offset / 16;
67bf215546Sopenharmony_ci         struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2));
68bf215546Sopenharmony_ci         grf = stride(grf, 0, 4, 1);
69bf215546Sopenharmony_ci         grf.swizzle = inst->src[i].swizzle;
70bf215546Sopenharmony_ci         grf.type = inst->src[i].type;
71bf215546Sopenharmony_ci         grf.abs = inst->src[i].abs;
72bf215546Sopenharmony_ci         grf.negate = inst->src[i].negate;
73bf215546Sopenharmony_ci         inst->src[i] = grf;
74bf215546Sopenharmony_ci      }
75bf215546Sopenharmony_ci   }
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   reg += 8 * prog_data->urb_read_length;
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci   this->first_non_payload_grf = reg;
80bf215546Sopenharmony_ci}
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_civoid
84bf215546Sopenharmony_civec4_tes_visitor::emit_prolog()
85bf215546Sopenharmony_ci{
86bf215546Sopenharmony_ci   input_read_header = src_reg(this, glsl_type::uvec4_type);
87bf215546Sopenharmony_ci   emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci   this->current_annotation = NULL;
90bf215546Sopenharmony_ci}
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_civoid
94bf215546Sopenharmony_civec4_tes_visitor::emit_urb_write_header(int mrf)
95bf215546Sopenharmony_ci{
96bf215546Sopenharmony_ci   /* No need to do anything for DS; an implied write to this MRF will be
97bf215546Sopenharmony_ci    * performed by VEC4_VS_OPCODE_URB_WRITE.
98bf215546Sopenharmony_ci    */
99bf215546Sopenharmony_ci   (void) mrf;
100bf215546Sopenharmony_ci}
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_civec4_instruction *
104bf215546Sopenharmony_civec4_tes_visitor::emit_urb_write_opcode(bool complete)
105bf215546Sopenharmony_ci{
106bf215546Sopenharmony_ci   vec4_instruction *inst = emit(VEC4_VS_OPCODE_URB_WRITE);
107bf215546Sopenharmony_ci   inst->urb_write_flags = complete ?
108bf215546Sopenharmony_ci      BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci   return inst;
111bf215546Sopenharmony_ci}
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_civoid
114bf215546Sopenharmony_civec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
115bf215546Sopenharmony_ci{
116bf215546Sopenharmony_ci   const struct brw_tes_prog_data *tes_prog_data =
117bf215546Sopenharmony_ci      (const struct brw_tes_prog_data *) prog_data;
118bf215546Sopenharmony_ci
119bf215546Sopenharmony_ci   switch (instr->intrinsic) {
120bf215546Sopenharmony_ci   case nir_intrinsic_load_tess_coord:
121bf215546Sopenharmony_ci      /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
122bf215546Sopenharmony_ci      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
123bf215546Sopenharmony_ci               src_reg(brw_vec8_grf(1, 0))));
124bf215546Sopenharmony_ci      break;
125bf215546Sopenharmony_ci   case nir_intrinsic_load_tess_level_outer:
126bf215546Sopenharmony_ci      if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
127bf215546Sopenharmony_ci         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
128bf215546Sopenharmony_ci                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
129bf215546Sopenharmony_ci                          BRW_SWIZZLE_ZWZW)));
130bf215546Sopenharmony_ci      } else {
131bf215546Sopenharmony_ci         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
132bf215546Sopenharmony_ci                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
133bf215546Sopenharmony_ci                          BRW_SWIZZLE_WZYX)));
134bf215546Sopenharmony_ci      }
135bf215546Sopenharmony_ci      break;
136bf215546Sopenharmony_ci   case nir_intrinsic_load_tess_level_inner:
137bf215546Sopenharmony_ci      if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
138bf215546Sopenharmony_ci         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
139bf215546Sopenharmony_ci                  swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
140bf215546Sopenharmony_ci                          BRW_SWIZZLE_WZYX)));
141bf215546Sopenharmony_ci      } else {
142bf215546Sopenharmony_ci         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
143bf215546Sopenharmony_ci                  src_reg(ATTR, 1, glsl_type::float_type)));
144bf215546Sopenharmony_ci      }
145bf215546Sopenharmony_ci      break;
146bf215546Sopenharmony_ci   case nir_intrinsic_load_primitive_id:
147bf215546Sopenharmony_ci      emit(TES_OPCODE_GET_PRIMITIVE_ID,
148bf215546Sopenharmony_ci           get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
149bf215546Sopenharmony_ci      break;
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_ci   case nir_intrinsic_load_input:
152bf215546Sopenharmony_ci   case nir_intrinsic_load_per_vertex_input: {
153bf215546Sopenharmony_ci      assert(nir_dest_bit_size(instr->dest) == 32);
154bf215546Sopenharmony_ci      src_reg indirect_offset = get_indirect_offset(instr);
155bf215546Sopenharmony_ci      unsigned imm_offset = instr->const_index[0];
156bf215546Sopenharmony_ci      src_reg header = input_read_header;
157bf215546Sopenharmony_ci      unsigned first_component = nir_intrinsic_component(instr);
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci      if (indirect_offset.file != BAD_FILE) {
160bf215546Sopenharmony_ci         src_reg clamped_indirect_offset = src_reg(this, glsl_type::uvec4_type);
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci         /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the
163bf215546Sopenharmony_ci          * valid range of the offset is [0, 0FFFFFFFh].
164bf215546Sopenharmony_ci          */
165bf215546Sopenharmony_ci         emit_minmax(BRW_CONDITIONAL_L,
166bf215546Sopenharmony_ci                     dst_reg(clamped_indirect_offset),
167bf215546Sopenharmony_ci                     retype(indirect_offset, BRW_REGISTER_TYPE_UD),
168bf215546Sopenharmony_ci                     brw_imm_ud(0x0fffffffu));
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_ci         header = src_reg(this, glsl_type::uvec4_type);
171bf215546Sopenharmony_ci         emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
172bf215546Sopenharmony_ci              input_read_header, clamped_indirect_offset);
173bf215546Sopenharmony_ci      } else {
174bf215546Sopenharmony_ci         /* Arbitrarily only push up to 24 vec4 slots worth of data,
175bf215546Sopenharmony_ci          * which is 12 registers (since each holds 2 vec4 slots).
176bf215546Sopenharmony_ci          */
177bf215546Sopenharmony_ci         const unsigned max_push_slots = 24;
178bf215546Sopenharmony_ci         if (imm_offset < max_push_slots) {
179bf215546Sopenharmony_ci            src_reg src = src_reg(ATTR, imm_offset, glsl_type::ivec4_type);
180bf215546Sopenharmony_ci            src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci            emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), src));
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci            prog_data->urb_read_length =
185bf215546Sopenharmony_ci               MAX2(prog_data->urb_read_length,
186bf215546Sopenharmony_ci                    DIV_ROUND_UP(imm_offset + 1, 2));
187bf215546Sopenharmony_ci            break;
188bf215546Sopenharmony_ci         }
189bf215546Sopenharmony_ci      }
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_ci      dst_reg temp(this, glsl_type::ivec4_type);
192bf215546Sopenharmony_ci      vec4_instruction *read =
193bf215546Sopenharmony_ci         emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
194bf215546Sopenharmony_ci      read->offset = imm_offset;
195bf215546Sopenharmony_ci      read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci      src_reg src = src_reg(temp);
198bf215546Sopenharmony_ci      src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci      /* Copy to target.  We might end up with some funky writemasks landing
201bf215546Sopenharmony_ci       * in here, but we really don't want them in the above pseudo-ops.
202bf215546Sopenharmony_ci       */
203bf215546Sopenharmony_ci      dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
204bf215546Sopenharmony_ci      dst.writemask = brw_writemask_for_size(instr->num_components);
205bf215546Sopenharmony_ci      emit(MOV(dst, src));
206bf215546Sopenharmony_ci      break;
207bf215546Sopenharmony_ci   }
208bf215546Sopenharmony_ci   default:
209bf215546Sopenharmony_ci      vec4_visitor::nir_emit_intrinsic(instr);
210bf215546Sopenharmony_ci   }
211bf215546Sopenharmony_ci}
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_civoid
215bf215546Sopenharmony_civec4_tes_visitor::emit_thread_end()
216bf215546Sopenharmony_ci{
217bf215546Sopenharmony_ci   /* For DS, we always end the thread by emitting a single vertex.
218bf215546Sopenharmony_ci    * emit_urb_write_opcode() will take care of setting the eot flag on the
219bf215546Sopenharmony_ci    * SEND instruction.
220bf215546Sopenharmony_ci    */
221bf215546Sopenharmony_ci   emit_vertex();
222bf215546Sopenharmony_ci}
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci} /* namespace brw */
225