1/*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef BLORP_PRIV_H
25#define BLORP_PRIV_H
26
27#include <stdint.h>
28
29#include "common/intel_measure.h"
30#include "compiler/nir/nir.h"
31#include "compiler/brw_compiler.h"
32
33#include "blorp.h"
34
35#ifdef __cplusplus
36extern "C" {
37#endif
38
39/**
40 * Binding table indices used by BLORP.
41 */
42enum {
43   BLORP_RENDERBUFFER_BT_INDEX,
44   BLORP_TEXTURE_BT_INDEX,
45   BLORP_NUM_BT_ENTRIES
46};
47
48#define BLORP_SAMPLER_INDEX 0
49
50struct brw_blorp_surface_info
51{
52   bool enabled;
53
54   struct isl_surf surf;
55   struct blorp_address addr;
56
57   struct isl_surf aux_surf;
58   struct blorp_address aux_addr;
59   enum isl_aux_usage aux_usage;
60
61   union isl_color_value clear_color;
62   struct blorp_address clear_color_addr;
63
64   struct isl_view view;
65
66   /* Z offset into a 3-D texture or slice of a 2-D array texture. */
67   float z_offset;
68
69   uint32_t tile_x_sa, tile_y_sa;
70};
71
72void
73brw_blorp_surface_info_init(struct blorp_batch *batch,
74                            struct brw_blorp_surface_info *info,
75                            const struct blorp_surf *surf,
76                            unsigned int level, float layer,
77                            enum isl_format format, bool is_dest);
78void
79blorp_surf_convert_to_single_slice(const struct isl_device *isl_dev,
80                                   struct brw_blorp_surface_info *info);
81void
82surf_fake_rgb_with_red(const struct isl_device *isl_dev,
83                       struct brw_blorp_surface_info *info);
84void
85blorp_surf_convert_to_uncompressed(const struct isl_device *isl_dev,
86                                   struct brw_blorp_surface_info *info,
87                                   uint32_t *x, uint32_t *y,
88                                   uint32_t *width, uint32_t *height);
89void
90blorp_surf_fake_interleaved_msaa(const struct isl_device *isl_dev,
91                                 struct brw_blorp_surface_info *info);
92void
93blorp_surf_retile_w_to_y(const struct isl_device *isl_dev,
94                         struct brw_blorp_surface_info *info);
95
96
97struct brw_blorp_coord_transform
98{
99   float multiplier;
100   float offset;
101};
102
103/**
104 * Bounding rectangle telling pixel discard which pixels are to be touched.
105 * This is needed in when surfaces are configured as something else what they
106 * really are:
107 *
108 *    - writing W-tiled stencil as Y-tiled
109 *    - writing interleaved multisampled as single sampled.
110 *
111 * See blorp_check_in_bounds().
112 */
113struct brw_blorp_bounds_rect
114{
115   uint32_t x0;
116   uint32_t x1;
117   uint32_t y0;
118   uint32_t y1;
119};
120
121/**
122 * Grid needed for blended and scaled blits of integer formats, see
123 * blorp_nir_manual_blend_bilinear().
124 */
125struct brw_blorp_rect_grid
126{
127   float x1;
128   float y1;
129   float pad[2];
130};
131
132struct blorp_surf_offset {
133   uint32_t x;
134   uint32_t y;
135};
136
137struct brw_blorp_wm_inputs
138{
139   uint32_t clear_color[4];
140
141   struct brw_blorp_bounds_rect bounds_rect;
142   struct brw_blorp_rect_grid rect_grid;
143   struct brw_blorp_coord_transform coord_transform[2];
144
145   struct blorp_surf_offset src_offset;
146   struct blorp_surf_offset dst_offset;
147
148   /* (1/width, 1/height) for the source surface */
149   float src_inv_size[2];
150
151   /* Minimum layer setting works for all the textures types but texture_3d
152    * for which the setting has no effect. Use the z-coordinate instead.
153    */
154   float src_z;
155
156   /* Note: Pad out to an integral number of registers when extending, but
157    * make sure subgroup_id is the last 32-bit item.
158    */
159   /* uint32_t pad[?]; */
160   uint32_t subgroup_id;
161};
162
163static inline nir_variable *
164blorp_create_nir_input(struct nir_shader *nir,
165                       const char *name,
166                       const struct glsl_type *type,
167                       unsigned int offset)
168{
169   nir_variable *input;
170   if (nir->info.stage == MESA_SHADER_COMPUTE) {
171      input = nir_variable_create(nir, nir_var_uniform, type, name);
172      input->data.driver_location = offset;
173      input->data.location = offset;
174   } else {
175      input = nir_variable_create(nir, nir_var_shader_in, type, name);
176      input->data.location = VARYING_SLOT_VAR0 + offset / (4 * sizeof(float));
177      input->data.location_frac = (offset / sizeof(float)) % 4;
178   }
179   if (nir->info.stage == MESA_SHADER_FRAGMENT)
180      input->data.interpolation = INTERP_MODE_FLAT;
181   return input;
182}
183
184#define BLORP_CREATE_NIR_INPUT(shader, name, type) \
185   blorp_create_nir_input((shader), #name, (type), \
186                          offsetof(struct brw_blorp_wm_inputs, name))
187
188struct blorp_vs_inputs {
189   uint32_t base_layer;
190   uint32_t _instance_id; /* Set in hardware by SGVS */
191   uint32_t pad[2];
192};
193
194static inline unsigned
195brw_blorp_get_urb_length(const struct brw_wm_prog_data *prog_data)
196{
197   if (prog_data == NULL)
198      return 1;
199
200   /* From the BSpec: 3D Pipeline - Strips and Fans - 3DSTATE_SBE
201    *
202    * read_length = ceiling((max_source_attr+1)/2)
203    */
204   return MAX2((prog_data->num_varying_inputs + 1) / 2, 1);
205}
206
207enum blorp_shader_type {
208   BLORP_SHADER_TYPE_COPY,
209   BLORP_SHADER_TYPE_BLIT,
210   BLORP_SHADER_TYPE_CLEAR,
211   BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE,
212   BLORP_SHADER_TYPE_LAYER_OFFSET_VS,
213   BLORP_SHADER_TYPE_GFX4_SF,
214};
215
216enum blorp_shader_pipeline {
217   BLORP_SHADER_PIPELINE_RENDER,
218   BLORP_SHADER_PIPELINE_COMPUTE,
219};
220
221struct blorp_params
222{
223   uint32_t x0;
224   uint32_t y0;
225   uint32_t x1;
226   uint32_t y1;
227   float z;
228   uint8_t stencil_mask;
229   uint8_t stencil_ref;
230   struct brw_blorp_surface_info depth;
231   struct brw_blorp_surface_info stencil;
232   uint32_t depth_format;
233   struct brw_blorp_surface_info src;
234   struct brw_blorp_surface_info dst;
235   enum isl_aux_op hiz_op;
236   bool full_surface_hiz_op;
237   enum isl_aux_op fast_clear_op;
238   uint8_t color_write_disable;
239   struct brw_blorp_wm_inputs wm_inputs;
240   struct blorp_vs_inputs vs_inputs;
241   bool dst_clear_color_as_input;
242   unsigned num_samples;
243   unsigned num_draw_buffers;
244   unsigned num_layers;
245   uint32_t vs_prog_kernel;
246   struct brw_vs_prog_data *vs_prog_data;
247   uint32_t sf_prog_kernel;
248   struct brw_sf_prog_data *sf_prog_data;
249   uint32_t wm_prog_kernel;
250   struct brw_wm_prog_data *wm_prog_data;
251   uint32_t cs_prog_kernel;
252   struct brw_cs_prog_data *cs_prog_data;
253
254   bool use_pre_baked_binding_table;
255   uint32_t pre_baked_binding_table_offset;
256   enum blorp_shader_type shader_type;
257   enum blorp_shader_pipeline shader_pipeline;
258   enum intel_measure_snapshot_type snapshot_type;
259};
260
261void blorp_params_init(struct blorp_params *params);
262
263struct brw_blorp_base_key
264{
265   char name[8];
266   enum blorp_shader_type shader_type;
267   enum blorp_shader_pipeline shader_pipeline;
268};
269
270#define BRW_BLORP_BASE_KEY_INIT(_type)                  \
271   (struct brw_blorp_base_key) {                        \
272      .name = "blorp",                                  \
273      .shader_type = _type,                             \
274      .shader_pipeline = BLORP_SHADER_PIPELINE_RENDER,  \
275   }
276
277struct brw_blorp_blit_prog_key
278{
279   struct brw_blorp_base_key base;
280
281   /* Number of samples per pixel that have been configured in the surface
282    * state for texturing from.
283    */
284   unsigned tex_samples;
285
286   /* MSAA layout that has been configured in the surface state for texturing
287    * from.
288    */
289   enum isl_msaa_layout tex_layout;
290
291   enum isl_aux_usage tex_aux_usage;
292
293   /* Actual number of samples per pixel in the source image. */
294   unsigned src_samples;
295
296   /* Actual MSAA layout used by the source image. */
297   enum isl_msaa_layout src_layout;
298
299   /* The swizzle to apply to the source in the shader */
300   struct isl_swizzle src_swizzle;
301
302   /* The format of the source if format-specific workarounds are needed
303    * and 0 (ISL_FORMAT_R32G32B32A32_FLOAT) if the destination is natively
304    * renderable.
305    */
306   enum isl_format src_format;
307
308   /* True if the source requires normalized coordinates */
309   bool src_coords_normalized;
310
311   /* Number of samples per pixel that have been configured in the render
312    * target.
313    */
314   unsigned rt_samples;
315
316   /* MSAA layout that has been configured in the render target. */
317   enum isl_msaa_layout rt_layout;
318
319   /* Actual number of samples per pixel in the destination image. */
320   unsigned dst_samples;
321
322   /* Actual MSAA layout used by the destination image. */
323   enum isl_msaa_layout dst_layout;
324
325   /* The swizzle to apply to the destination in the shader */
326   struct isl_swizzle dst_swizzle;
327
328   /* The format of the destination if format-specific workarounds are needed
329    * and 0 (ISL_FORMAT_R32G32B32A32_FLOAT) if the destination is natively
330    * renderable.
331    */
332   enum isl_format dst_format;
333
334   /* Whether or not the format workarounds are a bitcast operation */
335   bool format_bit_cast;
336
337   /** True if we need to perform SINT -> UINT clamping. */
338   bool sint32_to_uint;
339
340   /** True if we need to perform UINT -> SINT clamping. */
341   bool uint32_to_sint;
342
343   /* Type of the data to be read from the texture (one of
344    * nir_type_(int|uint|float)).
345    */
346   nir_alu_type texture_data_type;
347
348   /* True if the source image is W tiled.  If true, the surface state for the
349    * source image must be configured as Y tiled, and tex_samples must be 0.
350    */
351   bool src_tiled_w;
352
353   /* True if the destination image is W tiled.  If true, the surface state
354    * for the render target must be configured as Y tiled, and rt_samples must
355    * be 0.
356    */
357   bool dst_tiled_w;
358
359   /* True if the destination is an RGB format.  If true, the surface state
360    * for the render target must be configured as red with three times the
361    * normal width.  We need to do this because you cannot render to
362    * non-power-of-two formats.
363    */
364   bool dst_rgb;
365
366   isl_surf_usage_flags_t dst_usage;
367
368   enum blorp_filter filter;
369
370   /* True if the rectangle being sent through the rendering pipeline might be
371    * larger than the destination rectangle, so the WM program should kill any
372    * pixels that are outside the destination rectangle.
373    */
374   bool use_kill;
375
376   /**
377    * True if the WM program should be run in MSDISPMODE_PERSAMPLE with more
378    * than one sample per pixel.
379    */
380   bool persample_msaa_dispatch;
381
382   /* True if this blit operation may involve intratile offsets on the source.
383    * In this case, we need to add the offset before texturing.
384    */
385   bool need_src_offset;
386
387   /* True if this blit operation may involve intratile offsets on the
388    * destination.  In this case, we need to add the offset to gl_FragCoord.
389    */
390   bool need_dst_offset;
391
392   /* Scale factors between the pixel grid and the grid of samples. We're
393    * using grid of samples for bilinear filetring in multisample scaled blits.
394    */
395   float x_scale;
396   float y_scale;
397
398   /* If a compute shader is used, this is the local size y dimension.
399    */
400   uint8_t local_y;
401};
402
403/**
404 * \name BLORP internals
405 * \{
406 *
407 * Used internally by gfx6_blorp_exec() and gfx7_blorp_exec().
408 */
409
410void brw_blorp_init_wm_prog_key(struct brw_wm_prog_key *wm_key);
411void brw_blorp_init_cs_prog_key(struct brw_cs_prog_key *cs_key);
412
413const char *blorp_shader_type_to_name(enum blorp_shader_type type);
414const char *blorp_shader_pipeline_to_name(enum blorp_shader_pipeline pipe);
415
416const unsigned *
417blorp_compile_fs(struct blorp_context *blorp, void *mem_ctx,
418                 struct nir_shader *nir,
419                 struct brw_wm_prog_key *wm_key,
420                 bool use_repclear,
421                 struct brw_wm_prog_data *wm_prog_data);
422
423const unsigned *
424blorp_compile_vs(struct blorp_context *blorp, void *mem_ctx,
425                 struct nir_shader *nir,
426                 struct brw_vs_prog_data *vs_prog_data);
427
428bool
429blorp_ensure_sf_program(struct blorp_batch *batch,
430                        struct blorp_params *params);
431
432static inline uint8_t
433blorp_get_cs_local_y(struct blorp_params *params)
434{
435   uint32_t height = params->y1 - params->y0;
436   uint32_t or_ys = params->y0 | params->y1;
437   if (height > 32 || (or_ys & 3) == 0) {
438      return 4;
439   } else if ((or_ys & 1) == 0) {
440      return 2;
441   } else {
442      return 1;
443   }
444}
445
446static inline void
447blorp_set_cs_dims(struct nir_shader *nir, uint8_t local_y)
448{
449   assert(local_y != 0 && (16 % local_y == 0));
450   nir->info.workgroup_size[0] = 16 / local_y;
451   nir->info.workgroup_size[1] = local_y;
452   nir->info.workgroup_size[2] = 1;
453}
454
455const unsigned *
456blorp_compile_cs(struct blorp_context *blorp, void *mem_ctx,
457                 struct nir_shader *nir,
458                 struct brw_cs_prog_key *cs_key,
459                 struct brw_cs_prog_data *cs_prog_data);
460
461/** \} */
462
463#ifdef __cplusplus
464} /* end extern "C" */
465#endif /* __cplusplus */
466
467#endif /* BLORP_PRIV_H */
468