1/*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27#ifndef ACO_SHADER_INFO_H
28#define ACO_SHADER_INFO_H
29
30#include "shader_enums.h"
31
32#ifdef __cplusplus
33extern "C" {
34#endif
35
36#define ACO_MAX_SO_OUTPUTS 64
37#define ACO_MAX_SO_BUFFERS 4
38#define ACO_MAX_VERTEX_ATTRIBS 32
39#define ACO_MAX_VBS 32
40
41struct aco_vs_input_state {
42   uint32_t instance_rate_inputs;
43   uint32_t nontrivial_divisors;
44   uint32_t post_shuffle;
45   /* Having two separate fields instead of a single uint64_t makes it easier to remove attributes
46    * using bitwise arithmetic.
47    */
48   uint32_t alpha_adjust_lo;
49   uint32_t alpha_adjust_hi;
50
51   uint32_t divisors[ACO_MAX_VERTEX_ATTRIBS];
52   uint8_t formats[ACO_MAX_VERTEX_ATTRIBS];
53};
54
55struct aco_vs_prolog_key {
56   struct aco_vs_input_state state;
57   unsigned num_attributes;
58   uint32_t misaligned_mask;
59   bool is_ngg;
60   gl_shader_stage next_stage;
61};
62
63struct aco_ps_epilog_key {
64   uint32_t spi_shader_col_format;
65
66   /* Bitmasks, each bit represents one of the 8 MRTs. */
67   uint8_t color_is_int8;
68   uint8_t color_is_int10;
69   uint8_t enable_mrt_output_nan_fixup;
70};
71
72struct aco_vp_output_info {
73   uint8_t vs_output_param_offset[VARYING_SLOT_MAX];
74   uint8_t clip_dist_mask;
75   uint8_t cull_dist_mask;
76   uint8_t param_exports;
77   uint8_t prim_param_exports;
78   bool writes_pointsize;
79   bool writes_layer;
80   bool writes_layer_per_primitive;
81   bool writes_viewport_index;
82   bool writes_viewport_index_per_primitive;
83   bool writes_primitive_shading_rate;
84   bool writes_primitive_shading_rate_per_primitive;
85   bool export_prim_id;
86   bool export_clip_dists;
87};
88
89struct aco_stream_output {
90   uint8_t location;
91   uint8_t buffer;
92   uint16_t offset;
93   uint8_t component_mask;
94   uint8_t stream;
95};
96
97struct aco_streamout_info {
98   uint16_t num_outputs;
99   struct aco_stream_output outputs[ACO_MAX_SO_OUTPUTS];
100   uint16_t strides[ACO_MAX_SO_BUFFERS];
101};
102
103struct aco_shader_info {
104   uint8_t wave_size;
105   bool is_ngg;
106   bool has_ngg_culling;
107   bool has_ngg_early_prim_export;
108   uint32_t num_tess_patches;
109   unsigned workgroup_size;
110   struct {
111      struct aco_vp_output_info outinfo;
112      bool as_es;
113      bool as_ls;
114      bool tcs_in_out_eq;
115      uint64_t tcs_temp_only_input_mask;
116      bool use_per_attribute_vb_descs;
117      uint32_t vb_desc_usage_mask;
118      bool has_prolog;
119      bool dynamic_inputs;
120   } vs;
121   struct {
122      uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
123      uint8_t num_stream_output_components[4];
124      uint8_t output_streams[VARYING_SLOT_VAR31 + 1];
125      unsigned vertices_out;
126   } gs;
127   struct {
128      uint32_t num_lds_blocks;
129   } tcs;
130   struct {
131      struct aco_vp_output_info outinfo;
132      bool as_es;
133   } tes;
134   struct {
135      bool writes_z;
136      bool writes_stencil;
137      bool writes_sample_mask;
138      bool has_epilog;
139      uint32_t num_interp;
140      unsigned spi_ps_input;
141   } ps;
142   struct {
143      uint8_t subgroup_size;
144   } cs;
145   struct {
146      struct aco_vp_output_info outinfo;
147   } ms;
148   struct aco_streamout_info so;
149
150   uint32_t gfx9_gs_ring_lds_size;
151};
152
153enum aco_compiler_debug_level {
154   ACO_COMPILER_DEBUG_LEVEL_PERFWARN,
155   ACO_COMPILER_DEBUG_LEVEL_ERROR,
156};
157
158struct aco_stage_input {
159   uint32_t optimisations_disabled : 1;
160   uint32_t image_2d_view_of_3d : 1;
161   struct {
162      uint32_t instance_rate_inputs;
163      uint32_t instance_rate_divisors[ACO_MAX_VERTEX_ATTRIBS];
164      uint8_t vertex_attribute_formats[ACO_MAX_VERTEX_ATTRIBS];
165      uint32_t vertex_attribute_bindings[ACO_MAX_VERTEX_ATTRIBS];
166      uint32_t vertex_attribute_offsets[ACO_MAX_VERTEX_ATTRIBS];
167      uint32_t vertex_attribute_strides[ACO_MAX_VERTEX_ATTRIBS];
168      uint8_t vertex_binding_align[ACO_MAX_VBS];
169   } vs;
170
171   struct {
172      unsigned tess_input_vertices;
173   } tcs;
174
175   struct {
176      uint32_t col_format;
177      uint8_t num_samples;
178
179      /* Used to export alpha through MRTZ for alpha-to-coverage (GFX11+). */
180      bool alpha_to_coverage_via_mrtz;
181   } ps;
182};
183
184struct aco_compiler_options {
185   struct aco_stage_input key;
186   bool robust_buffer_access;
187   bool dump_shader;
188   bool dump_preoptir;
189   bool record_ir;
190   bool record_stats;
191   bool has_ls_vgpr_init_bug;
192   bool wgp_mode;
193   enum radeon_family family;
194   enum amd_gfx_level gfx_level;
195   uint32_t address32_hi;
196   struct {
197      void (*func)(void *private_data, enum aco_compiler_debug_level level, const char *message);
198      void *private_data;
199   } debug;
200};
201
202#ifdef __cplusplus
203}
204#endif
205#endif
206