1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#ifndef SHADER_INFO_H
26#define SHADER_INFO_H
27
28#include "util/bitset.h"
29#include "util/sha1/sha1.h"
30#include "shader_enums.h"
31#include <stdint.h>
32
33#ifdef __cplusplus
34extern "C" {
35#endif
36
37#define MAX_XFB_BUFFERS        4
38#define MAX_INLINABLE_UNIFORMS 4
39
40struct spirv_supported_capabilities {
41   bool address;
42   bool atomic_storage;
43   bool demote_to_helper_invocation;
44   bool derivative_group;
45   bool descriptor_array_dynamic_indexing;
46   bool descriptor_array_non_uniform_indexing;
47   bool descriptor_indexing;
48   bool device_group;
49   bool draw_parameters;
50   bool float16_atomic_add;
51   bool float16_atomic_min_max;
52   bool float32_atomic_add;
53   bool float32_atomic_min_max;
54   bool float64;
55   bool float64_atomic_add;
56   bool float64_atomic_min_max;
57   bool fragment_shader_sample_interlock;
58   bool fragment_shader_pixel_interlock;
59   bool fragment_shading_rate;
60   bool generic_pointers;
61   bool geometry_streams;
62   bool groups;
63   bool image_ms_array;
64   bool image_read_without_format;
65   bool image_write_without_format;
66   bool image_atomic_int64;
67   bool int8;
68   bool int16;
69   bool int64;
70   bool int64_atomics;
71   bool integer_functions2;
72   bool kernel;
73   bool kernel_image;
74   bool kernel_image_read_write;
75   bool linkage;
76   bool literal_sampler;
77   bool mesh_shading_nv;
78   bool min_lod;
79   bool multiview;
80   bool per_view_attributes_nv;
81   bool physical_storage_buffer_address;
82   bool post_depth_coverage;
83   bool printf;
84   bool ray_cull_mask;
85   bool ray_tracing;
86   bool ray_query;
87   bool ray_traversal_primitive_culling;
88   bool runtime_descriptor_array;
89   bool float_controls;
90   bool shader_clock;
91   bool shader_viewport_index_layer;
92   bool shader_viewport_mask_nv;
93   bool sparse_residency;
94   bool stencil_export;
95   bool storage_8bit;
96   bool storage_16bit;
97   bool storage_image_ms;
98   bool subgroup_arithmetic;
99   bool subgroup_ballot;
100   bool subgroup_basic;
101   bool subgroup_dispatch;
102   bool subgroup_quad;
103   bool subgroup_shuffle;
104   bool subgroup_uniform_control_flow;
105   bool subgroup_vote;
106   bool tessellation;
107   bool transform_feedback;
108   bool variable_pointers;
109   bool vk_memory_model;
110   bool vk_memory_model_device_scope;
111   bool workgroup_memory_explicit_layout;
112   bool float16;
113   bool amd_fragment_mask;
114   bool amd_gcn_shader;
115   bool amd_shader_ballot;
116   bool amd_trinary_minmax;
117   bool amd_image_read_write_lod;
118   bool amd_shader_explicit_vertex_parameter;
119   bool amd_image_gather_bias_lod;
120
121   bool intel_subgroup_shuffle;
122   bool intel_subgroup_buffer_block_io;
123};
124
125typedef struct shader_info {
126   const char *name;
127
128   /* Descriptive name provided by the client; may be NULL */
129   const char *label;
130
131   /* Shader is internal, and should be ignored by things like NIR_DEBUG=print */
132   bool internal;
133
134   /* SHA1 of the original source, used by shader detection in drivers. */
135   uint8_t source_sha1[SHA1_DIGEST_LENGTH];
136
137   /** The shader stage, such as MESA_SHADER_VERTEX. */
138   gl_shader_stage stage:8;
139
140   /** The shader stage in a non SSO linked program that follows this stage,
141     * such as MESA_SHADER_FRAGMENT.
142     */
143   gl_shader_stage next_stage:8;
144
145   /* Number of textures used by this shader */
146   uint8_t num_textures;
147   /* Number of uniform buffers used by this shader */
148   uint8_t num_ubos;
149   /* Number of atomic buffers used by this shader */
150   uint8_t num_abos;
151   /* Number of shader storage buffers (max .driver_location + 1) used by this
152    * shader.  In the case of nir_lower_atomics_to_ssbo being used, this will
153    * be the number of actual SSBOs in gl_program->info, and the lowered SSBOs
154    * and atomic counters in nir_shader->info.
155    */
156   uint8_t num_ssbos;
157   /* Number of images used by this shader */
158   uint8_t num_images;
159
160   /* Which inputs are actually read */
161   uint64_t inputs_read;
162   /* Which outputs are actually written */
163   uint64_t outputs_written;
164   /* Which outputs are actually read */
165   uint64_t outputs_read;
166   /* Which system values are actually read */
167   BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX);
168
169   /* Which I/O is per-primitive, for read/written information combine with
170    * the fields above.
171    */
172   uint64_t per_primitive_inputs;
173   uint64_t per_primitive_outputs;
174
175   /* Which 16-bit inputs and outputs are used corresponding to
176    * VARYING_SLOT_VARn_16BIT.
177    */
178   uint16_t inputs_read_16bit;
179   uint16_t outputs_written_16bit;
180   uint16_t outputs_read_16bit;
181   uint16_t inputs_read_indirectly_16bit;
182   uint16_t outputs_accessed_indirectly_16bit;
183
184   /* Which patch inputs are actually read */
185   uint32_t patch_inputs_read;
186   /* Which patch outputs are actually written */
187   uint32_t patch_outputs_written;
188   /* Which patch outputs are read */
189   uint32_t patch_outputs_read;
190
191   /* Which inputs are read indirectly (subset of inputs_read) */
192   uint64_t inputs_read_indirectly;
193   /* Which outputs are read or written indirectly */
194   uint64_t outputs_accessed_indirectly;
195   /* Which patch inputs are read indirectly (subset of patch_inputs_read) */
196   uint64_t patch_inputs_read_indirectly;
197   /* Which patch outputs are read or written indirectly */
198   uint64_t patch_outputs_accessed_indirectly;
199
200   /** Bitfield of which textures are used */
201   BITSET_DECLARE(textures_used, 128);
202
203   /** Bitfield of which textures are used by texelFetch() */
204   BITSET_DECLARE(textures_used_by_txf, 128);
205
206   /** Bitfield of which samplers are used */
207   BITSET_DECLARE(samplers_used, 32);
208
209   /** Bitfield of which images are used */
210   BITSET_DECLARE(images_used, 64);
211   /** Bitfield of which images are buffers. */
212   BITSET_DECLARE(image_buffers, 64);
213   /** Bitfield of which images are MSAA. */
214   BITSET_DECLARE(msaa_images, 64);
215
216   /* SPV_KHR_float_controls: execution mode for floating point ops */
217   uint16_t float_controls_execution_mode;
218
219   /**
220    * Size of shared variables accessed by compute/task/mesh shaders.
221    */
222   unsigned shared_size;
223
224   /**
225    * Size of task payload variables accessed by task/mesh shaders.
226    */
227   unsigned task_payload_size;
228
229   /**
230    * Number of ray tracing queries in the shader (counts all elements of all
231    * variables).
232    */
233   unsigned ray_queries;
234
235   /**
236    * Local workgroup size used by compute/task/mesh shaders.
237    */
238   uint16_t workgroup_size[3];
239
240   enum gl_subgroup_size subgroup_size;
241
242   /* Transform feedback buffer strides in dwords, max. 1K - 4. */
243   uint8_t xfb_stride[MAX_XFB_BUFFERS];
244
245   uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS];
246   uint8_t num_inlinable_uniforms:4;
247
248   /* The size of the gl_ClipDistance[] array, if declared. */
249   uint8_t clip_distance_array_size:4;
250
251   /* The size of the gl_CullDistance[] array, if declared. */
252   uint8_t cull_distance_array_size:4;
253
254   /* Whether or not this shader ever uses textureGather() */
255   bool uses_texture_gather:1;
256
257   /**
258    * True if this shader uses the fddx/fddy opcodes.
259    *
260    * Note that this does not include the "fine" and "coarse" variants.
261    */
262   bool uses_fddx_fddy:1;
263
264   /** Has divergence analysis ever been run? */
265   bool divergence_analysis_run:1;
266
267   /* Bitmask of bit-sizes used with ALU instructions. */
268   uint8_t bit_sizes_float;
269   uint8_t bit_sizes_int;
270
271   /* Whether the first UBO is the default uniform buffer, i.e. uniforms. */
272   bool first_ubo_is_default_ubo:1;
273
274   /* Whether or not separate shader objects were used */
275   bool separate_shader:1;
276
277   /** Was this shader linked with any transform feedback varyings? */
278   bool has_transform_feedback_varyings:1;
279
280   /* Whether flrp has been lowered. */
281   bool flrp_lowered:1;
282
283   /* Whether nir_lower_io has been called to lower derefs.
284    * nir_variables for inputs and outputs might not be present in the IR.
285    */
286   bool io_lowered:1;
287
288   /* Whether the shader writes memory, including transform feedback. */
289   bool writes_memory:1;
290
291   /* Whether gl_Layer is viewport-relative */
292   bool layer_viewport_relative:1;
293
294   /* Whether explicit barriers are used */
295   bool uses_control_barrier : 1;
296   bool uses_memory_barrier : 1;
297
298   /**
299    * Shared memory types have explicit layout set.  Used for
300    * SPV_KHR_workgroup_storage_explicit_layout.
301    */
302   bool shared_memory_explicit_layout:1;
303
304   /**
305    * Used for VK_KHR_zero_initialize_workgroup_memory.
306    */
307   bool zero_initialize_shared_memory:1;
308
309   /**
310    * Used for ARB_compute_variable_group_size.
311    */
312   bool workgroup_size_variable:1;
313
314   /**
315     * Set if this shader uses legacy (DX9 or ARB assembly) math rules.
316     *
317     * From the ARB_fragment_program specification:
318     *
319     *    "The following rules apply to multiplication:
320     *
321     *      1. <x> * <y> == <y> * <x>, for all <x> and <y>.
322     *      2. +/-0.0 * <x> = +/-0.0, at least for all <x> that correspond to
323     *         *representable numbers (IEEE "not a number" and "infinity"
324     *         *encodings may be exceptions).
325     *      3. +1.0 * <x> = <x>, for all <x>.""
326     *
327     * However, in effect this was due to DX9 semantics implying that 0*x=0 even
328     * for inf/nan if the hardware generated them instead of float_min/max.  So,
329     * you should not have an exception for inf/nan to rule 2 above.
330     *
331     * One implementation of this behavior would be to flush all generated NaNs
332     * to zero, at which point 0*Inf=Nan=0.  Most DX9/ARB-asm hardware did not
333     * generate NaNs, and the only way the GPU saw one was to possibly feed it
334     * in as a uniform.
335     */
336   bool use_legacy_math_rules;
337
338   union {
339      struct {
340         /* Which inputs are doubles */
341         uint64_t double_inputs;
342
343         /* For AMD-specific driver-internal shaders. It replaces vertex
344          * buffer loads with code generating VS inputs from scalar registers.
345          *
346          * Valid values: SI_VS_BLIT_SGPRS_POS_*
347          */
348         uint8_t blit_sgprs_amd:4;
349
350         /* True if the shader writes position in window space coordinates pre-transform */
351         bool window_space_position:1;
352
353         /** Is an edge flag input needed? */
354         bool needs_edge_flag:1;
355      } vs;
356
357      struct {
358         /** The output primitive type */
359         uint16_t output_primitive;
360
361         /** The input primitive type */
362         uint16_t input_primitive;
363
364         /** The maximum number of vertices the geometry shader might write. */
365         uint16_t vertices_out;
366
367         /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
368         uint8_t invocations;
369
370         /** The number of vertices received per input primitive (max. 6) */
371         uint8_t vertices_in:3;
372
373         /** Whether or not this shader uses EndPrimitive */
374         bool uses_end_primitive:1;
375
376         /** The streams used in this shaders (max. 4) */
377         uint8_t active_stream_mask:4;
378      } gs;
379
380      struct {
381         bool uses_discard:1;
382         bool uses_demote:1;
383         bool uses_fbfetch_output:1;
384         bool color_is_dual_source:1;
385
386         /**
387          * True if this fragment shader requires helper invocations.  This
388          * can be caused by the use of ALU derivative ops, texture
389          * instructions which do implicit derivatives, and the use of quad
390          * subgroup operations.
391          */
392         bool needs_quad_helper_invocations:1;
393
394         /**
395          * True if this fragment shader requires helper invocations for
396          * all subgroup operations, not just quad ops and derivatives.
397          */
398         bool needs_all_helper_invocations:1;
399
400         /**
401          * Whether any inputs are declared with the "sample" qualifier.
402          */
403         bool uses_sample_qualifier:1;
404
405         /**
406          * Whether sample shading is used.
407          */
408         bool uses_sample_shading:1;
409
410         /**
411          * Whether early fragment tests are enabled as defined by
412          * ARB_shader_image_load_store.
413          */
414         bool early_fragment_tests:1;
415
416         /**
417          * Defined by INTEL_conservative_rasterization.
418          */
419         bool inner_coverage:1;
420
421         bool post_depth_coverage:1;
422
423         /**
424          * \name ARB_fragment_coord_conventions
425          * @{
426          */
427         bool pixel_center_integer:1;
428         bool origin_upper_left:1;
429         /*@}*/
430
431         bool pixel_interlock_ordered:1;
432         bool pixel_interlock_unordered:1;
433         bool sample_interlock_ordered:1;
434         bool sample_interlock_unordered:1;
435
436         /**
437          * Flags whether NIR's base types on the FS color outputs should be
438          * ignored.
439          *
440          * GLSL requires that fragment shader output base types match the
441          * render target's base types for the behavior to be defined.  From
442          * the GL 4.6 spec:
443          *
444          *     "If the values written by the fragment shader do not match the
445          *      format(s) of the corresponding color buffer(s), the result is
446          *      undefined."
447          *
448          * However, for NIR shaders translated from TGSI, we don't have the
449          * output types any more, so the driver will need to do whatever
450          * fixups are necessary to handle effectively untyped data being
451          * output from the FS.
452          */
453         bool untyped_color_outputs:1;
454
455         /** gl_FragDepth layout for ARB_conservative_depth. */
456         enum gl_frag_depth_layout depth_layout:3;
457
458         /**
459          * Interpolation qualifiers for drivers that lowers color inputs
460          * to system values.
461          */
462         unsigned color0_interp:3; /* glsl_interp_mode */
463         bool color0_sample:1;
464         bool color0_centroid:1;
465         unsigned color1_interp:3; /* glsl_interp_mode */
466         bool color1_sample:1;
467         bool color1_centroid:1;
468
469         /* Bitmask of gl_advanced_blend_mode values that may be used with this
470          * shader.
471          */
472         unsigned advanced_blend_modes;
473      } fs;
474
475      struct {
476         uint16_t workgroup_size_hint[3];
477
478         uint8_t user_data_components_amd:3;
479
480         /*
481          * Arrangement of invocations used to calculate derivatives in a compute
482          * shader.  From NV_compute_shader_derivatives.
483          */
484         enum gl_derivative_group derivative_group:2;
485
486         /**
487          * pointer size is:
488          *   AddressingModelLogical:    0    (default)
489          *   AddressingModelPhysical32: 32
490          *   AddressingModelPhysical64: 64
491          */
492         unsigned ptr_size;
493
494         /**
495          * Uses subgroup intrinsics which can communicate across a quad.
496          */
497         bool uses_wide_subgroup_intrinsics;
498      } cs;
499
500      /* Applies to both TCS and TES. */
501      struct {
502	 enum tess_primitive_mode _primitive_mode;
503
504         /** The number of vertices in the TCS output patch. */
505         uint8_t tcs_vertices_out;
506         unsigned spacing:2; /*gl_tess_spacing*/
507
508         /** Is the vertex order counterclockwise? */
509         bool ccw:1;
510         bool point_mode:1;
511
512         /* Bit mask of TCS per-vertex inputs (VS outputs) that are used
513          * with a vertex index that is NOT the invocation id
514          */
515         uint64_t tcs_cross_invocation_inputs_read;
516
517         /* Bit mask of TCS per-vertex outputs that are used
518          * with a vertex index that is NOT the invocation id
519          */
520         uint64_t tcs_cross_invocation_outputs_read;
521      } tess;
522
523      /* Applies to MESH. */
524      struct {
525         /* Bit mask of MS outputs that are used
526          * with an index that is NOT the local invocation index.
527          */
528         uint64_t ms_cross_invocation_output_access;
529
530         uint16_t max_vertices_out;
531         uint16_t max_primitives_out;
532         uint16_t primitive_type;  /* GL_POINTS, GL_LINES or GL_TRIANGLES. */
533      } mesh;
534   };
535} shader_info;
536
537#ifdef __cplusplus
538}
539#endif
540
541#endif /* SHADER_INFO_H */
542