1/*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef BRW_VEC4_H
25#define BRW_VEC4_H
26
27#include "brw_shader.h"
28
29#ifdef __cplusplus
30#include "brw_ir_vec4.h"
31#include "brw_ir_performance.h"
32#include "brw_vec4_builder.h"
33#include "brw_vec4_live_variables.h"
34#endif
35
36#include "compiler/glsl/ir.h"
37#include "compiler/nir/nir.h"
38
39
40#ifdef __cplusplus
41extern "C" {
42#endif
43
44const unsigned *
45brw_vec4_generate_assembly(const struct brw_compiler *compiler,
46                           void *log_data,
47                           void *mem_ctx,
48                           const nir_shader *nir,
49                           struct brw_vue_prog_data *prog_data,
50                           const struct cfg_t *cfg,
51                           const brw::performance &perf,
52                           struct brw_compile_stats *stats,
53                           bool debug_enabled);
54
55#ifdef __cplusplus
56} /* extern "C" */
57
58namespace brw {
59/**
60 * The vertex shader front-end.
61 *
62 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
63 * fixed-function) into VS IR.
64 */
65class vec4_visitor : public backend_shader
66{
67public:
68   vec4_visitor(const struct brw_compiler *compiler,
69                void *log_data,
70                const struct brw_sampler_prog_key_data *key,
71                struct brw_vue_prog_data *prog_data,
72                const nir_shader *shader,
73		void *mem_ctx,
74                bool no_spills,
75                bool debug_enabled);
76
77   dst_reg dst_null_f()
78   {
79      return dst_reg(brw_null_reg());
80   }
81
82   dst_reg dst_null_df()
83   {
84      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
85   }
86
87   dst_reg dst_null_d()
88   {
89      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
90   }
91
92   dst_reg dst_null_ud()
93   {
94      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
95   }
96
97   const struct brw_sampler_prog_key_data * const key_tex;
98   struct brw_vue_prog_data * const prog_data;
99   char *fail_msg;
100   bool failed;
101
102   /**
103    * GLSL IR currently being processed, which is associated with our
104    * driver IR instructions for debugging purposes.
105    */
106   const void *base_ir;
107   const char *current_annotation;
108
109   int first_non_payload_grf;
110   unsigned ubo_push_start[4];
111   unsigned push_length;
112   unsigned int max_grf;
113   brw_analysis<brw::vec4_live_variables, backend_shader> live_analysis;
114   brw_analysis<brw::performance, vec4_visitor> performance_analysis;
115
116   bool need_all_constants_in_pull_buffer;
117
118   /* Regs for vertex results.  Generated at ir_variable visiting time
119    * for the ir->location's used.
120    */
121   dst_reg output_reg[VARYING_SLOT_TESS_MAX][4];
122   unsigned output_num_components[VARYING_SLOT_TESS_MAX][4];
123   const char *output_reg_annotation[VARYING_SLOT_TESS_MAX];
124   int uniforms;
125
126   src_reg shader_start_time;
127
128   bool run();
129   void fail(const char *msg, ...);
130
131   int setup_uniforms(int payload_reg);
132
133   bool reg_allocate_trivial();
134   bool reg_allocate();
135   void evaluate_spill_costs(float *spill_costs, bool *no_spill);
136   int choose_spill_reg(struct ra_graph *g);
137   void spill_reg(unsigned spill_reg);
138   void move_grf_array_access_to_scratch();
139   void move_uniform_array_access_to_pull_constants();
140   void split_uniform_registers();
141   void setup_push_ranges();
142   virtual void invalidate_analysis(brw::analysis_dependency_class c);
143   void split_virtual_grfs();
144   bool opt_vector_float();
145   bool opt_reduce_swizzle();
146   bool dead_code_eliminate();
147   bool opt_cmod_propagation();
148   bool opt_copy_propagation(bool do_constant_prop = true);
149   bool opt_cse_local(bblock_t *block, const vec4_live_variables &live);
150   bool opt_cse();
151   bool opt_algebraic();
152   bool opt_register_coalesce();
153   bool eliminate_find_live_channel();
154   bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
155   void opt_set_dependency_control();
156   void opt_schedule_instructions();
157   void convert_to_hw_regs();
158   void fixup_3src_null_dest();
159
160   bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
161   bool lower_simd_width();
162   bool scalarize_df();
163   bool lower_64bit_mad_to_mul_add();
164   void apply_logical_swizzle(struct brw_reg *hw_reg,
165                              vec4_instruction *inst, int arg);
166
167   vec4_instruction *emit(vec4_instruction *inst);
168
169   vec4_instruction *emit(enum opcode opcode);
170   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
171   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
172                          const src_reg &src0);
173   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
174                          const src_reg &src0, const src_reg &src1);
175   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
176                          const src_reg &src0, const src_reg &src1,
177                          const src_reg &src2);
178
179   vec4_instruction *emit_before(bblock_t *block,
180                                 vec4_instruction *inst,
181				 vec4_instruction *new_inst);
182
183#define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
184#define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
185#define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
186   EMIT1(MOV)
187   EMIT1(NOT)
188   EMIT1(RNDD)
189   EMIT1(RNDE)
190   EMIT1(RNDZ)
191   EMIT1(FRC)
192   EMIT1(F32TO16)
193   EMIT1(F16TO32)
194   EMIT2(ADD)
195   EMIT2(MUL)
196   EMIT2(MACH)
197   EMIT2(MAC)
198   EMIT2(AND)
199   EMIT2(OR)
200   EMIT2(XOR)
201   EMIT2(DP3)
202   EMIT2(DP4)
203   EMIT2(DPH)
204   EMIT2(SHL)
205   EMIT2(SHR)
206   EMIT2(ASR)
207   vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
208			 enum brw_conditional_mod condition);
209   vec4_instruction *IF(src_reg src0, src_reg src1,
210                        enum brw_conditional_mod condition);
211   vec4_instruction *IF(enum brw_predicate predicate);
212   EMIT1(SCRATCH_READ)
213   EMIT2(SCRATCH_WRITE)
214   EMIT3(LRP)
215   EMIT1(BFREV)
216   EMIT3(BFE)
217   EMIT2(BFI1)
218   EMIT3(BFI2)
219   EMIT1(FBH)
220   EMIT1(FBL)
221   EMIT1(CBIT)
222   EMIT3(MAD)
223   EMIT2(ADDC)
224   EMIT2(SUBB)
225   EMIT1(DIM)
226
227#undef EMIT1
228#undef EMIT2
229#undef EMIT3
230
231   vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
232                                 src_reg src0, src_reg src1);
233
234   /**
235    * Copy any live channel from \p src to the first channel of the
236    * result.
237    */
238   src_reg emit_uniformize(const src_reg &src);
239
240   /** Fix all float operands of a 3-source instruction. */
241   void fix_float_operands(src_reg op[3], nir_alu_instr *instr);
242
243   src_reg fix_3src_operand(const src_reg &src);
244
245   vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
246                               const src_reg &src1 = src_reg());
247
248   src_reg fix_math_operand(const src_reg &src);
249
250   void emit_pack_half_2x16(dst_reg dst, src_reg src0);
251   void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
252   void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
253   void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
254   void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
255   void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
256
257   src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
258                          src_reg surface);
259
260   void emit_ndc_computation();
261   void emit_psiz_and_flags(dst_reg reg);
262   vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
263   virtual void emit_urb_slot(dst_reg reg, int varying);
264
265   src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
266			      src_reg *reladdr, int reg_offset);
267   void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
268			  dst_reg dst,
269			  src_reg orig_src,
270			  int base_offset);
271   void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
272			   int base_offset);
273   void emit_pull_constant_load_reg(dst_reg dst,
274                                    src_reg surf_index,
275                                    src_reg offset,
276                                    bblock_t *before_block,
277                                    vec4_instruction *before_inst);
278   src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
279                                vec4_instruction *inst, src_reg src);
280
281   void resolve_ud_negate(src_reg *reg);
282
283   bool lower_minmax();
284
285   src_reg get_timestamp();
286
287   void dump_instruction(const backend_instruction *inst) const;
288   void dump_instruction(const backend_instruction *inst, FILE *file) const;
289
290   bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate);
291
292   void emit_conversion_from_double(dst_reg dst, src_reg src);
293   void emit_conversion_to_double(dst_reg dst, src_reg src);
294
295   vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
296                                        bool for_write,
297                                        bool for_scratch = false,
298                                        bblock_t *block = NULL,
299                                        vec4_instruction *ref = NULL);
300
301   virtual void emit_nir_code();
302   virtual void nir_setup_uniforms();
303   virtual void nir_emit_impl(nir_function_impl *impl);
304   virtual void nir_emit_cf_list(exec_list *list);
305   virtual void nir_emit_if(nir_if *if_stmt);
306   virtual void nir_emit_loop(nir_loop *loop);
307   virtual void nir_emit_block(nir_block *block);
308   virtual void nir_emit_instr(nir_instr *instr);
309   virtual void nir_emit_load_const(nir_load_const_instr *instr);
310   src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr);
311   virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
312   virtual void nir_emit_alu(nir_alu_instr *instr);
313   virtual void nir_emit_jump(nir_jump_instr *instr);
314   virtual void nir_emit_texture(nir_tex_instr *instr);
315   virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
316   virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
317
318   dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type);
319   dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type);
320   dst_reg get_nir_dest(const nir_dest &dest);
321   src_reg get_nir_src(const nir_src &src, enum brw_reg_type type,
322                       unsigned num_components = 4);
323   src_reg get_nir_src(const nir_src &src, nir_alu_type type,
324                       unsigned num_components = 4);
325   src_reg get_nir_src(const nir_src &src,
326                       unsigned num_components = 4);
327   src_reg get_nir_src_imm(const nir_src &src);
328   src_reg get_indirect_offset(nir_intrinsic_instr *instr);
329
330   dst_reg *nir_locals;
331   dst_reg *nir_ssa_values;
332
333protected:
334   void emit_vertex();
335   void setup_payload_interference(struct ra_graph *g, int first_payload_node,
336                                   int reg_node_count);
337   virtual void setup_payload() = 0;
338   virtual void emit_prolog() = 0;
339   virtual void emit_thread_end() = 0;
340   virtual void emit_urb_write_header(int mrf) = 0;
341   virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
342   virtual void gs_emit_vertex(int stream_id);
343   virtual void gs_end_primitive();
344
345private:
346   /**
347    * If true, then register allocation should fail instead of spilling.
348    */
349   const bool no_spills;
350
351   unsigned last_scratch; /**< measured in 32-byte (register size) units */
352};
353
354} /* namespace brw */
355#endif /* __cplusplus */
356
357#endif /* BRW_VEC4_H */
358