1/* 2 * Copyright © 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef BLORP_PRIV_H 25#define BLORP_PRIV_H 26 27#include <stdint.h> 28 29#include "common/intel_measure.h" 30#include "compiler/nir/nir.h" 31#include "compiler/brw_compiler.h" 32 33#include "blorp.h" 34 35#ifdef __cplusplus 36extern "C" { 37#endif 38 39/** 40 * Binding table indices used by BLORP. 41 */ 42enum { 43 BLORP_RENDERBUFFER_BT_INDEX, 44 BLORP_TEXTURE_BT_INDEX, 45 BLORP_NUM_BT_ENTRIES 46}; 47 48#define BLORP_SAMPLER_INDEX 0 49 50struct brw_blorp_surface_info 51{ 52 bool enabled; 53 54 struct isl_surf surf; 55 struct blorp_address addr; 56 57 struct isl_surf aux_surf; 58 struct blorp_address aux_addr; 59 enum isl_aux_usage aux_usage; 60 61 union isl_color_value clear_color; 62 struct blorp_address clear_color_addr; 63 64 struct isl_view view; 65 66 /* Z offset into a 3-D texture or slice of a 2-D array texture. */ 67 float z_offset; 68 69 uint32_t tile_x_sa, tile_y_sa; 70}; 71 72void 73brw_blorp_surface_info_init(struct blorp_batch *batch, 74 struct brw_blorp_surface_info *info, 75 const struct blorp_surf *surf, 76 unsigned int level, float layer, 77 enum isl_format format, bool is_dest); 78void 79blorp_surf_convert_to_single_slice(const struct isl_device *isl_dev, 80 struct brw_blorp_surface_info *info); 81void 82surf_fake_rgb_with_red(const struct isl_device *isl_dev, 83 struct brw_blorp_surface_info *info); 84void 85blorp_surf_convert_to_uncompressed(const struct isl_device *isl_dev, 86 struct brw_blorp_surface_info *info, 87 uint32_t *x, uint32_t *y, 88 uint32_t *width, uint32_t *height); 89void 90blorp_surf_fake_interleaved_msaa(const struct isl_device *isl_dev, 91 struct brw_blorp_surface_info *info); 92void 93blorp_surf_retile_w_to_y(const struct isl_device *isl_dev, 94 struct brw_blorp_surface_info *info); 95 96 97struct brw_blorp_coord_transform 98{ 99 float multiplier; 100 float offset; 101}; 102 103/** 104 * Bounding rectangle telling pixel discard which pixels are to be touched. 105 * This is needed in when surfaces are configured as something else what they 106 * really are: 107 * 108 * - writing W-tiled stencil as Y-tiled 109 * - writing interleaved multisampled as single sampled. 110 * 111 * See blorp_check_in_bounds(). 112 */ 113struct brw_blorp_bounds_rect 114{ 115 uint32_t x0; 116 uint32_t x1; 117 uint32_t y0; 118 uint32_t y1; 119}; 120 121/** 122 * Grid needed for blended and scaled blits of integer formats, see 123 * blorp_nir_manual_blend_bilinear(). 124 */ 125struct brw_blorp_rect_grid 126{ 127 float x1; 128 float y1; 129 float pad[2]; 130}; 131 132struct blorp_surf_offset { 133 uint32_t x; 134 uint32_t y; 135}; 136 137struct brw_blorp_wm_inputs 138{ 139 uint32_t clear_color[4]; 140 141 struct brw_blorp_bounds_rect bounds_rect; 142 struct brw_blorp_rect_grid rect_grid; 143 struct brw_blorp_coord_transform coord_transform[2]; 144 145 struct blorp_surf_offset src_offset; 146 struct blorp_surf_offset dst_offset; 147 148 /* (1/width, 1/height) for the source surface */ 149 float src_inv_size[2]; 150 151 /* Minimum layer setting works for all the textures types but texture_3d 152 * for which the setting has no effect. Use the z-coordinate instead. 153 */ 154 float src_z; 155 156 /* Note: Pad out to an integral number of registers when extending, but 157 * make sure subgroup_id is the last 32-bit item. 158 */ 159 /* uint32_t pad[?]; */ 160 uint32_t subgroup_id; 161}; 162 163static inline nir_variable * 164blorp_create_nir_input(struct nir_shader *nir, 165 const char *name, 166 const struct glsl_type *type, 167 unsigned int offset) 168{ 169 nir_variable *input; 170 if (nir->info.stage == MESA_SHADER_COMPUTE) { 171 input = nir_variable_create(nir, nir_var_uniform, type, name); 172 input->data.driver_location = offset; 173 input->data.location = offset; 174 } else { 175 input = nir_variable_create(nir, nir_var_shader_in, type, name); 176 input->data.location = VARYING_SLOT_VAR0 + offset / (4 * sizeof(float)); 177 input->data.location_frac = (offset / sizeof(float)) % 4; 178 } 179 if (nir->info.stage == MESA_SHADER_FRAGMENT) 180 input->data.interpolation = INTERP_MODE_FLAT; 181 return input; 182} 183 184#define BLORP_CREATE_NIR_INPUT(shader, name, type) \ 185 blorp_create_nir_input((shader), #name, (type), \ 186 offsetof(struct brw_blorp_wm_inputs, name)) 187 188struct blorp_vs_inputs { 189 uint32_t base_layer; 190 uint32_t _instance_id; /* Set in hardware by SGVS */ 191 uint32_t pad[2]; 192}; 193 194static inline unsigned 195brw_blorp_get_urb_length(const struct brw_wm_prog_data *prog_data) 196{ 197 if (prog_data == NULL) 198 return 1; 199 200 /* From the BSpec: 3D Pipeline - Strips and Fans - 3DSTATE_SBE 201 * 202 * read_length = ceiling((max_source_attr+1)/2) 203 */ 204 return MAX2((prog_data->num_varying_inputs + 1) / 2, 1); 205} 206 207enum blorp_shader_type { 208 BLORP_SHADER_TYPE_COPY, 209 BLORP_SHADER_TYPE_BLIT, 210 BLORP_SHADER_TYPE_CLEAR, 211 BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE, 212 BLORP_SHADER_TYPE_LAYER_OFFSET_VS, 213 BLORP_SHADER_TYPE_GFX4_SF, 214}; 215 216enum blorp_shader_pipeline { 217 BLORP_SHADER_PIPELINE_RENDER, 218 BLORP_SHADER_PIPELINE_COMPUTE, 219}; 220 221struct blorp_params 222{ 223 uint32_t x0; 224 uint32_t y0; 225 uint32_t x1; 226 uint32_t y1; 227 float z; 228 uint8_t stencil_mask; 229 uint8_t stencil_ref; 230 struct brw_blorp_surface_info depth; 231 struct brw_blorp_surface_info stencil; 232 uint32_t depth_format; 233 struct brw_blorp_surface_info src; 234 struct brw_blorp_surface_info dst; 235 enum isl_aux_op hiz_op; 236 bool full_surface_hiz_op; 237 enum isl_aux_op fast_clear_op; 238 uint8_t color_write_disable; 239 struct brw_blorp_wm_inputs wm_inputs; 240 struct blorp_vs_inputs vs_inputs; 241 bool dst_clear_color_as_input; 242 unsigned num_samples; 243 unsigned num_draw_buffers; 244 unsigned num_layers; 245 uint32_t vs_prog_kernel; 246 struct brw_vs_prog_data *vs_prog_data; 247 uint32_t sf_prog_kernel; 248 struct brw_sf_prog_data *sf_prog_data; 249 uint32_t wm_prog_kernel; 250 struct brw_wm_prog_data *wm_prog_data; 251 uint32_t cs_prog_kernel; 252 struct brw_cs_prog_data *cs_prog_data; 253 254 bool use_pre_baked_binding_table; 255 uint32_t pre_baked_binding_table_offset; 256 enum blorp_shader_type shader_type; 257 enum blorp_shader_pipeline shader_pipeline; 258 enum intel_measure_snapshot_type snapshot_type; 259}; 260 261void blorp_params_init(struct blorp_params *params); 262 263struct brw_blorp_base_key 264{ 265 char name[8]; 266 enum blorp_shader_type shader_type; 267 enum blorp_shader_pipeline shader_pipeline; 268}; 269 270#define BRW_BLORP_BASE_KEY_INIT(_type) \ 271 (struct brw_blorp_base_key) { \ 272 .name = "blorp", \ 273 .shader_type = _type, \ 274 .shader_pipeline = BLORP_SHADER_PIPELINE_RENDER, \ 275 } 276 277struct brw_blorp_blit_prog_key 278{ 279 struct brw_blorp_base_key base; 280 281 /* Number of samples per pixel that have been configured in the surface 282 * state for texturing from. 283 */ 284 unsigned tex_samples; 285 286 /* MSAA layout that has been configured in the surface state for texturing 287 * from. 288 */ 289 enum isl_msaa_layout tex_layout; 290 291 enum isl_aux_usage tex_aux_usage; 292 293 /* Actual number of samples per pixel in the source image. */ 294 unsigned src_samples; 295 296 /* Actual MSAA layout used by the source image. */ 297 enum isl_msaa_layout src_layout; 298 299 /* The swizzle to apply to the source in the shader */ 300 struct isl_swizzle src_swizzle; 301 302 /* The format of the source if format-specific workarounds are needed 303 * and 0 (ISL_FORMAT_R32G32B32A32_FLOAT) if the destination is natively 304 * renderable. 305 */ 306 enum isl_format src_format; 307 308 /* True if the source requires normalized coordinates */ 309 bool src_coords_normalized; 310 311 /* Number of samples per pixel that have been configured in the render 312 * target. 313 */ 314 unsigned rt_samples; 315 316 /* MSAA layout that has been configured in the render target. */ 317 enum isl_msaa_layout rt_layout; 318 319 /* Actual number of samples per pixel in the destination image. */ 320 unsigned dst_samples; 321 322 /* Actual MSAA layout used by the destination image. */ 323 enum isl_msaa_layout dst_layout; 324 325 /* The swizzle to apply to the destination in the shader */ 326 struct isl_swizzle dst_swizzle; 327 328 /* The format of the destination if format-specific workarounds are needed 329 * and 0 (ISL_FORMAT_R32G32B32A32_FLOAT) if the destination is natively 330 * renderable. 331 */ 332 enum isl_format dst_format; 333 334 /* Whether or not the format workarounds are a bitcast operation */ 335 bool format_bit_cast; 336 337 /** True if we need to perform SINT -> UINT clamping. */ 338 bool sint32_to_uint; 339 340 /** True if we need to perform UINT -> SINT clamping. */ 341 bool uint32_to_sint; 342 343 /* Type of the data to be read from the texture (one of 344 * nir_type_(int|uint|float)). 345 */ 346 nir_alu_type texture_data_type; 347 348 /* True if the source image is W tiled. If true, the surface state for the 349 * source image must be configured as Y tiled, and tex_samples must be 0. 350 */ 351 bool src_tiled_w; 352 353 /* True if the destination image is W tiled. If true, the surface state 354 * for the render target must be configured as Y tiled, and rt_samples must 355 * be 0. 356 */ 357 bool dst_tiled_w; 358 359 /* True if the destination is an RGB format. If true, the surface state 360 * for the render target must be configured as red with three times the 361 * normal width. We need to do this because you cannot render to 362 * non-power-of-two formats. 363 */ 364 bool dst_rgb; 365 366 isl_surf_usage_flags_t dst_usage; 367 368 enum blorp_filter filter; 369 370 /* True if the rectangle being sent through the rendering pipeline might be 371 * larger than the destination rectangle, so the WM program should kill any 372 * pixels that are outside the destination rectangle. 373 */ 374 bool use_kill; 375 376 /** 377 * True if the WM program should be run in MSDISPMODE_PERSAMPLE with more 378 * than one sample per pixel. 379 */ 380 bool persample_msaa_dispatch; 381 382 /* True if this blit operation may involve intratile offsets on the source. 383 * In this case, we need to add the offset before texturing. 384 */ 385 bool need_src_offset; 386 387 /* True if this blit operation may involve intratile offsets on the 388 * destination. In this case, we need to add the offset to gl_FragCoord. 389 */ 390 bool need_dst_offset; 391 392 /* Scale factors between the pixel grid and the grid of samples. We're 393 * using grid of samples for bilinear filetring in multisample scaled blits. 394 */ 395 float x_scale; 396 float y_scale; 397 398 /* If a compute shader is used, this is the local size y dimension. 399 */ 400 uint8_t local_y; 401}; 402 403/** 404 * \name BLORP internals 405 * \{ 406 * 407 * Used internally by gfx6_blorp_exec() and gfx7_blorp_exec(). 408 */ 409 410void brw_blorp_init_wm_prog_key(struct brw_wm_prog_key *wm_key); 411void brw_blorp_init_cs_prog_key(struct brw_cs_prog_key *cs_key); 412 413const char *blorp_shader_type_to_name(enum blorp_shader_type type); 414const char *blorp_shader_pipeline_to_name(enum blorp_shader_pipeline pipe); 415 416const unsigned * 417blorp_compile_fs(struct blorp_context *blorp, void *mem_ctx, 418 struct nir_shader *nir, 419 struct brw_wm_prog_key *wm_key, 420 bool use_repclear, 421 struct brw_wm_prog_data *wm_prog_data); 422 423const unsigned * 424blorp_compile_vs(struct blorp_context *blorp, void *mem_ctx, 425 struct nir_shader *nir, 426 struct brw_vs_prog_data *vs_prog_data); 427 428bool 429blorp_ensure_sf_program(struct blorp_batch *batch, 430 struct blorp_params *params); 431 432static inline uint8_t 433blorp_get_cs_local_y(struct blorp_params *params) 434{ 435 uint32_t height = params->y1 - params->y0; 436 uint32_t or_ys = params->y0 | params->y1; 437 if (height > 32 || (or_ys & 3) == 0) { 438 return 4; 439 } else if ((or_ys & 1) == 0) { 440 return 2; 441 } else { 442 return 1; 443 } 444} 445 446static inline void 447blorp_set_cs_dims(struct nir_shader *nir, uint8_t local_y) 448{ 449 assert(local_y != 0 && (16 % local_y == 0)); 450 nir->info.workgroup_size[0] = 16 / local_y; 451 nir->info.workgroup_size[1] = local_y; 452 nir->info.workgroup_size[2] = 1; 453} 454 455const unsigned * 456blorp_compile_cs(struct blorp_context *blorp, void *mem_ctx, 457 struct nir_shader *nir, 458 struct brw_cs_prog_key *cs_key, 459 struct brw_cs_prog_data *cs_prog_data); 460 461/** \} */ 462 463#ifdef __cplusplus 464} /* end extern "C" */ 465#endif /* __cplusplus */ 466 467#endif /* BLORP_PRIV_H */ 468