1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2013 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "util/ralloc.h" 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include "util/macros.h" /* Needed for MAX3 and MAX2 for format_rgb9e5 */ 27bf215546Sopenharmony_ci#include "util/format_rgb9e5.h" 28bf215546Sopenharmony_ci#include "util/format_srgb.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "blorp_priv.h" 31bf215546Sopenharmony_ci#include "compiler/brw_eu_defines.h" 32bf215546Sopenharmony_ci#include "dev/intel_debug.h" 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci#include "blorp_nir_builder.h" 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci#define FILE_DEBUG_FLAG DEBUG_BLORP 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci#pragma pack(push, 1) 39bf215546Sopenharmony_cistruct brw_blorp_const_color_prog_key 40bf215546Sopenharmony_ci{ 41bf215546Sopenharmony_ci struct brw_blorp_base_key base; 42bf215546Sopenharmony_ci bool use_simd16_replicated_data; 43bf215546Sopenharmony_ci bool clear_rgb_as_red; 44bf215546Sopenharmony_ci uint8_t local_y; 45bf215546Sopenharmony_ci}; 46bf215546Sopenharmony_ci#pragma pack(pop) 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_cistatic bool 49bf215546Sopenharmony_ciblorp_params_get_clear_kernel_fs(struct blorp_batch *batch, 50bf215546Sopenharmony_ci struct blorp_params *params, 51bf215546Sopenharmony_ci bool use_replicated_data, 52bf215546Sopenharmony_ci bool clear_rgb_as_red) 53bf215546Sopenharmony_ci{ 54bf215546Sopenharmony_ci struct blorp_context *blorp = batch->blorp; 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci const struct brw_blorp_const_color_prog_key blorp_key = { 57bf215546Sopenharmony_ci .base = BRW_BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_CLEAR), 58bf215546Sopenharmony_ci .base.shader_pipeline = BLORP_SHADER_PIPELINE_RENDER, 59bf215546Sopenharmony_ci .use_simd16_replicated_data = use_replicated_data, 60bf215546Sopenharmony_ci .clear_rgb_as_red = clear_rgb_as_red, 61bf215546Sopenharmony_ci .local_y = 0, 62bf215546Sopenharmony_ci }; 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci params->shader_type = blorp_key.base.shader_type; 65bf215546Sopenharmony_ci params->shader_pipeline = blorp_key.base.shader_pipeline; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key), 68bf215546Sopenharmony_ci ¶ms->wm_prog_kernel, ¶ms->wm_prog_data)) 69bf215546Sopenharmony_ci return true; 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci void *mem_ctx = ralloc_context(NULL); 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci nir_builder b; 74bf215546Sopenharmony_ci blorp_nir_init_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, 75bf215546Sopenharmony_ci blorp_shader_type_to_name(blorp_key.base.shader_type)); 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci nir_variable *v_color = 78bf215546Sopenharmony_ci BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type()); 79bf215546Sopenharmony_ci nir_ssa_def *color = nir_load_var(&b, v_color); 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci if (clear_rgb_as_red) { 82bf215546Sopenharmony_ci nir_ssa_def *pos = nir_f2i32(&b, nir_load_frag_coord(&b)); 83bf215546Sopenharmony_ci nir_ssa_def *comp = nir_umod(&b, nir_channel(&b, pos, 0), 84bf215546Sopenharmony_ci nir_imm_int(&b, 3)); 85bf215546Sopenharmony_ci color = nir_pad_vec4(&b, nir_vector_extract(&b, color, comp)); 86bf215546Sopenharmony_ci } 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci nir_variable *frag_color = nir_variable_create(b.shader, nir_var_shader_out, 89bf215546Sopenharmony_ci glsl_vec4_type(), 90bf215546Sopenharmony_ci "gl_FragColor"); 91bf215546Sopenharmony_ci frag_color->data.location = FRAG_RESULT_COLOR; 92bf215546Sopenharmony_ci nir_store_var(&b, frag_color, color, 0xf); 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci struct brw_wm_prog_key wm_key; 95bf215546Sopenharmony_ci brw_blorp_init_wm_prog_key(&wm_key); 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci struct brw_wm_prog_data prog_data; 98bf215546Sopenharmony_ci const unsigned *program = 99bf215546Sopenharmony_ci blorp_compile_fs(blorp, mem_ctx, b.shader, &wm_key, use_replicated_data, 100bf215546Sopenharmony_ci &prog_data); 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci bool result = 103bf215546Sopenharmony_ci blorp->upload_shader(batch, MESA_SHADER_FRAGMENT, 104bf215546Sopenharmony_ci &blorp_key, sizeof(blorp_key), 105bf215546Sopenharmony_ci program, prog_data.base.program_size, 106bf215546Sopenharmony_ci &prog_data.base, sizeof(prog_data), 107bf215546Sopenharmony_ci ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci ralloc_free(mem_ctx); 110bf215546Sopenharmony_ci return result; 111bf215546Sopenharmony_ci} 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_cistatic bool 114bf215546Sopenharmony_ciblorp_params_get_clear_kernel_cs(struct blorp_batch *batch, 115bf215546Sopenharmony_ci struct blorp_params *params, 116bf215546Sopenharmony_ci bool clear_rgb_as_red) 117bf215546Sopenharmony_ci{ 118bf215546Sopenharmony_ci struct blorp_context *blorp = batch->blorp; 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci const struct brw_blorp_const_color_prog_key blorp_key = { 121bf215546Sopenharmony_ci .base = BRW_BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_CLEAR), 122bf215546Sopenharmony_ci .base.shader_pipeline = BLORP_SHADER_PIPELINE_COMPUTE, 123bf215546Sopenharmony_ci .use_simd16_replicated_data = false, 124bf215546Sopenharmony_ci .clear_rgb_as_red = clear_rgb_as_red, 125bf215546Sopenharmony_ci .local_y = blorp_get_cs_local_y(params), 126bf215546Sopenharmony_ci }; 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci params->shader_type = blorp_key.base.shader_type; 129bf215546Sopenharmony_ci params->shader_pipeline = blorp_key.base.shader_pipeline; 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key), 132bf215546Sopenharmony_ci ¶ms->cs_prog_kernel, ¶ms->cs_prog_data)) 133bf215546Sopenharmony_ci return true; 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci void *mem_ctx = ralloc_context(NULL); 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci nir_builder b; 138bf215546Sopenharmony_ci blorp_nir_init_shader(&b, mem_ctx, MESA_SHADER_COMPUTE, "BLORP-gpgpu-clear"); 139bf215546Sopenharmony_ci blorp_set_cs_dims(b.shader, blorp_key.local_y); 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci nir_ssa_def *dst_pos = nir_load_global_invocation_id(&b, 32); 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci nir_variable *v_color = 144bf215546Sopenharmony_ci BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type()); 145bf215546Sopenharmony_ci nir_ssa_def *color = nir_load_var(&b, v_color); 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ci nir_variable *v_bounds_rect = 148bf215546Sopenharmony_ci BLORP_CREATE_NIR_INPUT(b.shader, bounds_rect, glsl_vec4_type()); 149bf215546Sopenharmony_ci nir_ssa_def *bounds_rect = nir_load_var(&b, v_bounds_rect); 150bf215546Sopenharmony_ci nir_ssa_def *in_bounds = blorp_check_in_bounds(&b, bounds_rect, dst_pos); 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci if (clear_rgb_as_red) { 153bf215546Sopenharmony_ci nir_ssa_def *comp = nir_umod(&b, nir_channel(&b, dst_pos, 0), 154bf215546Sopenharmony_ci nir_imm_int(&b, 3)); 155bf215546Sopenharmony_ci color = nir_pad_vec4(&b, nir_vector_extract(&b, color, comp)); 156bf215546Sopenharmony_ci } 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci nir_push_if(&b, in_bounds); 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_ci nir_image_store(&b, nir_imm_int(&b, 0), 161bf215546Sopenharmony_ci nir_pad_vector_imm_int(&b, dst_pos, 0, 4), 162bf215546Sopenharmony_ci nir_imm_int(&b, 0), 163bf215546Sopenharmony_ci nir_pad_vector_imm_int(&b, color, 0, 4), 164bf215546Sopenharmony_ci nir_imm_int(&b, 0), 165bf215546Sopenharmony_ci .image_dim = GLSL_SAMPLER_DIM_2D, 166bf215546Sopenharmony_ci .image_array = true, 167bf215546Sopenharmony_ci .access = ACCESS_NON_READABLE); 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci struct brw_cs_prog_key cs_key; 172bf215546Sopenharmony_ci brw_blorp_init_cs_prog_key(&cs_key); 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci struct brw_cs_prog_data prog_data; 175bf215546Sopenharmony_ci const unsigned *program = 176bf215546Sopenharmony_ci blorp_compile_cs(blorp, mem_ctx, b.shader, &cs_key, &prog_data); 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci bool result = 179bf215546Sopenharmony_ci blorp->upload_shader(batch, MESA_SHADER_COMPUTE, 180bf215546Sopenharmony_ci &blorp_key, sizeof(blorp_key), 181bf215546Sopenharmony_ci program, prog_data.base.program_size, 182bf215546Sopenharmony_ci &prog_data.base, sizeof(prog_data), 183bf215546Sopenharmony_ci ¶ms->cs_prog_kernel, ¶ms->cs_prog_data); 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci ralloc_free(mem_ctx); 186bf215546Sopenharmony_ci return result; 187bf215546Sopenharmony_ci} 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_cistatic bool 190bf215546Sopenharmony_ciblorp_params_get_clear_kernel(struct blorp_batch *batch, 191bf215546Sopenharmony_ci struct blorp_params *params, 192bf215546Sopenharmony_ci bool use_replicated_data, 193bf215546Sopenharmony_ci bool clear_rgb_as_red) 194bf215546Sopenharmony_ci{ 195bf215546Sopenharmony_ci if (batch->flags & BLORP_BATCH_USE_COMPUTE) { 196bf215546Sopenharmony_ci assert(!use_replicated_data); 197bf215546Sopenharmony_ci return blorp_params_get_clear_kernel_cs(batch, params, clear_rgb_as_red); 198bf215546Sopenharmony_ci } else { 199bf215546Sopenharmony_ci return blorp_params_get_clear_kernel_fs(batch, params, 200bf215546Sopenharmony_ci use_replicated_data, 201bf215546Sopenharmony_ci clear_rgb_as_red); 202bf215546Sopenharmony_ci } 203bf215546Sopenharmony_ci} 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci#pragma pack(push, 1) 206bf215546Sopenharmony_cistruct layer_offset_vs_key { 207bf215546Sopenharmony_ci struct brw_blorp_base_key base; 208bf215546Sopenharmony_ci unsigned num_inputs; 209bf215546Sopenharmony_ci}; 210bf215546Sopenharmony_ci#pragma pack(pop) 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci/* In the case of doing attachment clears, we are using a surface state that 213bf215546Sopenharmony_ci * is handed to us so we can't set (and don't even know) the base array layer. 214bf215546Sopenharmony_ci * In order to do a layered clear in this scenario, we need some way of adding 215bf215546Sopenharmony_ci * the base array layer to the instance id. Unfortunately, our hardware has 216bf215546Sopenharmony_ci * no real concept of "base instance", so we have to do it manually in a 217bf215546Sopenharmony_ci * vertex shader. 218bf215546Sopenharmony_ci */ 219bf215546Sopenharmony_cistatic bool 220bf215546Sopenharmony_ciblorp_params_get_layer_offset_vs(struct blorp_batch *batch, 221bf215546Sopenharmony_ci struct blorp_params *params) 222bf215546Sopenharmony_ci{ 223bf215546Sopenharmony_ci struct blorp_context *blorp = batch->blorp; 224bf215546Sopenharmony_ci struct layer_offset_vs_key blorp_key = { 225bf215546Sopenharmony_ci .base = BRW_BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_LAYER_OFFSET_VS), 226bf215546Sopenharmony_ci }; 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci if (params->wm_prog_data) 229bf215546Sopenharmony_ci blorp_key.num_inputs = params->wm_prog_data->num_varying_inputs; 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key), 232bf215546Sopenharmony_ci ¶ms->vs_prog_kernel, ¶ms->vs_prog_data)) 233bf215546Sopenharmony_ci return true; 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci void *mem_ctx = ralloc_context(NULL); 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci nir_builder b; 238bf215546Sopenharmony_ci blorp_nir_init_shader(&b, mem_ctx, MESA_SHADER_VERTEX, 239bf215546Sopenharmony_ci blorp_shader_type_to_name(blorp_key.base.shader_type)); 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci const struct glsl_type *uvec4_type = glsl_vector_type(GLSL_TYPE_UINT, 4); 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci /* First we deal with the header which has instance and base instance */ 244bf215546Sopenharmony_ci nir_variable *a_header = nir_variable_create(b.shader, nir_var_shader_in, 245bf215546Sopenharmony_ci uvec4_type, "header"); 246bf215546Sopenharmony_ci a_header->data.location = VERT_ATTRIB_GENERIC0; 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci nir_variable *v_layer = nir_variable_create(b.shader, nir_var_shader_out, 249bf215546Sopenharmony_ci glsl_int_type(), "layer_id"); 250bf215546Sopenharmony_ci v_layer->data.location = VARYING_SLOT_LAYER; 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci /* Compute the layer id */ 253bf215546Sopenharmony_ci nir_ssa_def *header = nir_load_var(&b, a_header); 254bf215546Sopenharmony_ci nir_ssa_def *base_layer = nir_channel(&b, header, 0); 255bf215546Sopenharmony_ci nir_ssa_def *instance = nir_channel(&b, header, 1); 256bf215546Sopenharmony_ci nir_store_var(&b, v_layer, nir_iadd(&b, instance, base_layer), 0x1); 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci /* Then we copy the vertex from the next slot to VARYING_SLOT_POS */ 259bf215546Sopenharmony_ci nir_variable *a_vertex = nir_variable_create(b.shader, nir_var_shader_in, 260bf215546Sopenharmony_ci glsl_vec4_type(), "a_vertex"); 261bf215546Sopenharmony_ci a_vertex->data.location = VERT_ATTRIB_GENERIC1; 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_ci nir_variable *v_pos = nir_variable_create(b.shader, nir_var_shader_out, 264bf215546Sopenharmony_ci glsl_vec4_type(), "v_pos"); 265bf215546Sopenharmony_ci v_pos->data.location = VARYING_SLOT_POS; 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci nir_copy_var(&b, v_pos, a_vertex); 268bf215546Sopenharmony_ci 269bf215546Sopenharmony_ci /* Then we copy everything else */ 270bf215546Sopenharmony_ci for (unsigned i = 0; i < blorp_key.num_inputs; i++) { 271bf215546Sopenharmony_ci nir_variable *a_in = nir_variable_create(b.shader, nir_var_shader_in, 272bf215546Sopenharmony_ci uvec4_type, "input"); 273bf215546Sopenharmony_ci a_in->data.location = VERT_ATTRIB_GENERIC2 + i; 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci nir_variable *v_out = nir_variable_create(b.shader, nir_var_shader_out, 276bf215546Sopenharmony_ci uvec4_type, "output"); 277bf215546Sopenharmony_ci v_out->data.location = VARYING_SLOT_VAR0 + i; 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci nir_copy_var(&b, v_out, a_in); 280bf215546Sopenharmony_ci } 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_ci struct brw_vs_prog_data vs_prog_data; 283bf215546Sopenharmony_ci memset(&vs_prog_data, 0, sizeof(vs_prog_data)); 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci const unsigned *program = 286bf215546Sopenharmony_ci blorp_compile_vs(blorp, mem_ctx, b.shader, &vs_prog_data); 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci bool result = 289bf215546Sopenharmony_ci blorp->upload_shader(batch, MESA_SHADER_VERTEX, 290bf215546Sopenharmony_ci &blorp_key, sizeof(blorp_key), 291bf215546Sopenharmony_ci program, vs_prog_data.base.base.program_size, 292bf215546Sopenharmony_ci &vs_prog_data.base.base, sizeof(vs_prog_data), 293bf215546Sopenharmony_ci ¶ms->vs_prog_kernel, ¶ms->vs_prog_data); 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci ralloc_free(mem_ctx); 296bf215546Sopenharmony_ci return result; 297bf215546Sopenharmony_ci} 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_ci/* The x0, y0, x1, and y1 parameters must already be populated with the render 300bf215546Sopenharmony_ci * area of the framebuffer to be cleared. 301bf215546Sopenharmony_ci */ 302bf215546Sopenharmony_cistatic void 303bf215546Sopenharmony_ciget_fast_clear_rect(const struct isl_device *dev, 304bf215546Sopenharmony_ci const struct isl_surf *surf, 305bf215546Sopenharmony_ci const struct isl_surf *aux_surf, 306bf215546Sopenharmony_ci unsigned *x0, unsigned *y0, 307bf215546Sopenharmony_ci unsigned *x1, unsigned *y1) 308bf215546Sopenharmony_ci{ 309bf215546Sopenharmony_ci unsigned int x_align, y_align; 310bf215546Sopenharmony_ci unsigned int x_scaledown, y_scaledown; 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci /* Only single sampled surfaces need to (and actually can) be resolved. */ 313bf215546Sopenharmony_ci if (surf->samples == 1) { 314bf215546Sopenharmony_ci if (dev->info->verx10 >= 125) { 315bf215546Sopenharmony_ci assert(surf->tiling == ISL_TILING_4); 316bf215546Sopenharmony_ci /* From Bspec 47709, "MCS/CCS Buffer for Render Target(s)": 317bf215546Sopenharmony_ci * 318bf215546Sopenharmony_ci * SW must ensure that clearing rectangle dimensions cover the 319bf215546Sopenharmony_ci * entire area desired, to accomplish this task initial X/Y 320bf215546Sopenharmony_ci * dimensions need to be rounded up to next multiple of scaledown 321bf215546Sopenharmony_ci * factor before dividing by scale down factor: 322bf215546Sopenharmony_ci * 323bf215546Sopenharmony_ci * The X and Y scale down factors in the table that follows are used 324bf215546Sopenharmony_ci * for both alignment and scaling down. 325bf215546Sopenharmony_ci */ 326bf215546Sopenharmony_ci const uint32_t bs = isl_format_get_layout(surf->format)->bpb / 8; 327bf215546Sopenharmony_ci x_align = x_scaledown = 1024 / bs; 328bf215546Sopenharmony_ci y_align = y_scaledown = 16; 329bf215546Sopenharmony_ci } else { 330bf215546Sopenharmony_ci assert(aux_surf->usage == ISL_SURF_USAGE_CCS_BIT); 331bf215546Sopenharmony_ci /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render 332bf215546Sopenharmony_ci * Target(s)", beneath the "Fast Color Clear" bullet (p327): 333bf215546Sopenharmony_ci * 334bf215546Sopenharmony_ci * Clear pass must have a clear rectangle that must follow 335bf215546Sopenharmony_ci * alignment rules in terms of pixels and lines as shown in the 336bf215546Sopenharmony_ci * table below. Further, the clear-rectangle height and width 337bf215546Sopenharmony_ci * must be multiple of the following dimensions. If the height 338bf215546Sopenharmony_ci * and width of the render target being cleared do not meet these 339bf215546Sopenharmony_ci * requirements, an MCS buffer can be created such that it 340bf215546Sopenharmony_ci * follows the requirement and covers the RT. 341bf215546Sopenharmony_ci * 342bf215546Sopenharmony_ci * The alignment size in the table that follows is related to the 343bf215546Sopenharmony_ci * alignment size that is baked into the CCS surface format but with X 344bf215546Sopenharmony_ci * alignment multiplied by 16 and Y alignment multiplied by 32. 345bf215546Sopenharmony_ci */ 346bf215546Sopenharmony_ci x_align = isl_format_get_layout(aux_surf->format)->bw; 347bf215546Sopenharmony_ci y_align = isl_format_get_layout(aux_surf->format)->bh; 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci x_align *= 16; 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci /* The line alignment requirement for Y-tiled is halved at SKL and again 352bf215546Sopenharmony_ci * at TGL. 353bf215546Sopenharmony_ci */ 354bf215546Sopenharmony_ci if (dev->info->ver >= 12) 355bf215546Sopenharmony_ci y_align *= 8; 356bf215546Sopenharmony_ci else if (dev->info->ver >= 9) 357bf215546Sopenharmony_ci y_align *= 16; 358bf215546Sopenharmony_ci else 359bf215546Sopenharmony_ci y_align *= 32; 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render 362bf215546Sopenharmony_ci * Target(s)", beneath the "Fast Color Clear" bullet (p327): 363bf215546Sopenharmony_ci * 364bf215546Sopenharmony_ci * In order to optimize the performance MCS buffer (when bound to 365bf215546Sopenharmony_ci * 1X RT) clear similarly to MCS buffer clear for MSRT case, 366bf215546Sopenharmony_ci * clear rect is required to be scaled by the following factors 367bf215546Sopenharmony_ci * in the horizontal and vertical directions: 368bf215546Sopenharmony_ci * 369bf215546Sopenharmony_ci * The X and Y scale down factors in the table that follows are each 370bf215546Sopenharmony_ci * equal to half the alignment value computed above. 371bf215546Sopenharmony_ci */ 372bf215546Sopenharmony_ci x_scaledown = x_align / 2; 373bf215546Sopenharmony_ci y_scaledown = y_align / 2; 374bf215546Sopenharmony_ci } 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci if (ISL_DEV_IS_HASWELL(dev)) { 377bf215546Sopenharmony_ci /* From BSpec: 3D-Media-GPGPU Engine > 3D Pipeline > Pixel > Pixel 378bf215546Sopenharmony_ci * Backend > MCS Buffer for Render Target(s) [DevIVB+] > Table "Color 379bf215546Sopenharmony_ci * Clear of Non-MultiSampled Render Target Restrictions": 380bf215546Sopenharmony_ci * 381bf215546Sopenharmony_ci * Clear rectangle must be aligned to two times the number of 382bf215546Sopenharmony_ci * pixels in the table shown below due to 16x16 hashing across the 383bf215546Sopenharmony_ci * slice. 384bf215546Sopenharmony_ci * 385bf215546Sopenharmony_ci * This restriction is only documented to exist on HSW GT3 but 386bf215546Sopenharmony_ci * empirical evidence suggests that it's also needed GT2. 387bf215546Sopenharmony_ci */ 388bf215546Sopenharmony_ci x_align *= 2; 389bf215546Sopenharmony_ci y_align *= 2; 390bf215546Sopenharmony_ci } 391bf215546Sopenharmony_ci } else { 392bf215546Sopenharmony_ci assert(aux_surf->usage == ISL_SURF_USAGE_MCS_BIT); 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render 395bf215546Sopenharmony_ci * Target(s)", beneath the "MSAA Compression" bullet (p326): 396bf215546Sopenharmony_ci * 397bf215546Sopenharmony_ci * Clear pass for this case requires that scaled down primitive 398bf215546Sopenharmony_ci * is sent down with upper left coordinate to coincide with 399bf215546Sopenharmony_ci * actual rectangle being cleared. For MSAA, clear rectangle’s 400bf215546Sopenharmony_ci * height and width need to as show in the following table in 401bf215546Sopenharmony_ci * terms of (width,height) of the RT. 402bf215546Sopenharmony_ci * 403bf215546Sopenharmony_ci * MSAA Width of Clear Rect Height of Clear Rect 404bf215546Sopenharmony_ci * 2X Ceil(1/8*width) Ceil(1/2*height) 405bf215546Sopenharmony_ci * 4X Ceil(1/8*width) Ceil(1/2*height) 406bf215546Sopenharmony_ci * 8X Ceil(1/2*width) Ceil(1/2*height) 407bf215546Sopenharmony_ci * 16X width Ceil(1/2*height) 408bf215546Sopenharmony_ci * 409bf215546Sopenharmony_ci * The text "with upper left coordinate to coincide with actual 410bf215546Sopenharmony_ci * rectangle being cleared" is a little confusing--it seems to imply 411bf215546Sopenharmony_ci * that to clear a rectangle from (x,y) to (x+w,y+h), one needs to 412bf215546Sopenharmony_ci * feed the pipeline using the rectangle (x,y) to 413bf215546Sopenharmony_ci * (x+Ceil(w/N),y+Ceil(h/2)), where N is either 2 or 8 depending on 414bf215546Sopenharmony_ci * the number of samples. Experiments indicate that this is not 415bf215546Sopenharmony_ci * quite correct; actually, what the hardware appears to do is to 416bf215546Sopenharmony_ci * align whatever rectangle is sent down the pipeline to the nearest 417bf215546Sopenharmony_ci * multiple of 2x2 blocks, and then scale it up by a factor of N 418bf215546Sopenharmony_ci * horizontally and 2 vertically. So the resulting alignment is 4 419bf215546Sopenharmony_ci * vertically and either 4 or 16 horizontally, and the scaledown 420bf215546Sopenharmony_ci * factor is 2 vertically and either 2 or 8 horizontally. 421bf215546Sopenharmony_ci */ 422bf215546Sopenharmony_ci switch (aux_surf->format) { 423bf215546Sopenharmony_ci case ISL_FORMAT_MCS_2X: 424bf215546Sopenharmony_ci case ISL_FORMAT_MCS_4X: 425bf215546Sopenharmony_ci x_scaledown = 8; 426bf215546Sopenharmony_ci break; 427bf215546Sopenharmony_ci case ISL_FORMAT_MCS_8X: 428bf215546Sopenharmony_ci x_scaledown = 2; 429bf215546Sopenharmony_ci break; 430bf215546Sopenharmony_ci case ISL_FORMAT_MCS_16X: 431bf215546Sopenharmony_ci x_scaledown = 1; 432bf215546Sopenharmony_ci break; 433bf215546Sopenharmony_ci default: 434bf215546Sopenharmony_ci unreachable("Unexpected MCS format for fast clear"); 435bf215546Sopenharmony_ci } 436bf215546Sopenharmony_ci y_scaledown = 2; 437bf215546Sopenharmony_ci x_align = x_scaledown * 2; 438bf215546Sopenharmony_ci y_align = y_scaledown * 2; 439bf215546Sopenharmony_ci } 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_ci *x0 = ROUND_DOWN_TO(*x0, x_align) / x_scaledown; 442bf215546Sopenharmony_ci *y0 = ROUND_DOWN_TO(*y0, y_align) / y_scaledown; 443bf215546Sopenharmony_ci *x1 = ALIGN(*x1, x_align) / x_scaledown; 444bf215546Sopenharmony_ci *y1 = ALIGN(*y1, y_align) / y_scaledown; 445bf215546Sopenharmony_ci} 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_civoid 448bf215546Sopenharmony_ciblorp_fast_clear(struct blorp_batch *batch, 449bf215546Sopenharmony_ci const struct blorp_surf *surf, 450bf215546Sopenharmony_ci enum isl_format format, struct isl_swizzle swizzle, 451bf215546Sopenharmony_ci uint32_t level, uint32_t start_layer, uint32_t num_layers, 452bf215546Sopenharmony_ci uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1) 453bf215546Sopenharmony_ci{ 454bf215546Sopenharmony_ci struct blorp_params params; 455bf215546Sopenharmony_ci blorp_params_init(¶ms); 456bf215546Sopenharmony_ci params.num_layers = num_layers; 457bf215546Sopenharmony_ci assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0); 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci params.x0 = x0; 460bf215546Sopenharmony_ci params.y0 = y0; 461bf215546Sopenharmony_ci params.x1 = x1; 462bf215546Sopenharmony_ci params.y1 = y1; 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ci memset(¶ms.wm_inputs.clear_color, 0xff, 4*sizeof(float)); 465bf215546Sopenharmony_ci params.fast_clear_op = ISL_AUX_OP_FAST_CLEAR; 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci get_fast_clear_rect(batch->blorp->isl_dev, surf->surf, surf->aux_surf, 468bf215546Sopenharmony_ci ¶ms.x0, ¶ms.y0, ¶ms.x1, ¶ms.y1); 469bf215546Sopenharmony_ci 470bf215546Sopenharmony_ci if (!blorp_params_get_clear_kernel(batch, ¶ms, true, false)) 471bf215546Sopenharmony_ci return; 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci brw_blorp_surface_info_init(batch, ¶ms.dst, surf, level, 474bf215546Sopenharmony_ci start_layer, format, true); 475bf215546Sopenharmony_ci params.num_samples = params.dst.surf.samples; 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_ci assert(params.num_samples != 0); 478bf215546Sopenharmony_ci if (params.num_samples == 1) 479bf215546Sopenharmony_ci params.snapshot_type = INTEL_SNAPSHOT_CCS_COLOR_CLEAR; 480bf215546Sopenharmony_ci else 481bf215546Sopenharmony_ci params.snapshot_type = INTEL_SNAPSHOT_MCS_COLOR_CLEAR; 482bf215546Sopenharmony_ci 483bf215546Sopenharmony_ci /* If a swizzle was provided, we need to swizzle the clear color so that 484bf215546Sopenharmony_ci * the hardware color format conversion will work properly. 485bf215546Sopenharmony_ci */ 486bf215546Sopenharmony_ci params.dst.clear_color = 487bf215546Sopenharmony_ci isl_color_value_swizzle_inv(params.dst.clear_color, swizzle); 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci batch->blorp->exec(batch, ¶ms); 490bf215546Sopenharmony_ci} 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_cibool 493bf215546Sopenharmony_ciblorp_clear_supports_compute(struct blorp_context *blorp, 494bf215546Sopenharmony_ci uint8_t color_write_disable, bool blend_enabled, 495bf215546Sopenharmony_ci enum isl_aux_usage aux_usage) 496bf215546Sopenharmony_ci{ 497bf215546Sopenharmony_ci if (blorp->isl_dev->info->ver < 7) 498bf215546Sopenharmony_ci return false; 499bf215546Sopenharmony_ci if (color_write_disable != 0 || blend_enabled) 500bf215546Sopenharmony_ci return false; 501bf215546Sopenharmony_ci if (blorp->isl_dev->info->ver >= 12) { 502bf215546Sopenharmony_ci return aux_usage == ISL_AUX_USAGE_GFX12_CCS_E || 503bf215546Sopenharmony_ci aux_usage == ISL_AUX_USAGE_CCS_E || 504bf215546Sopenharmony_ci aux_usage == ISL_AUX_USAGE_NONE; 505bf215546Sopenharmony_ci } else { 506bf215546Sopenharmony_ci return aux_usage == ISL_AUX_USAGE_NONE; 507bf215546Sopenharmony_ci } 508bf215546Sopenharmony_ci} 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_civoid 511bf215546Sopenharmony_ciblorp_clear(struct blorp_batch *batch, 512bf215546Sopenharmony_ci const struct blorp_surf *surf, 513bf215546Sopenharmony_ci enum isl_format format, struct isl_swizzle swizzle, 514bf215546Sopenharmony_ci uint32_t level, uint32_t start_layer, uint32_t num_layers, 515bf215546Sopenharmony_ci uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1, 516bf215546Sopenharmony_ci union isl_color_value clear_color, 517bf215546Sopenharmony_ci uint8_t color_write_disable) 518bf215546Sopenharmony_ci{ 519bf215546Sopenharmony_ci struct blorp_params params; 520bf215546Sopenharmony_ci blorp_params_init(¶ms); 521bf215546Sopenharmony_ci params.snapshot_type = INTEL_SNAPSHOT_SLOW_COLOR_CLEAR; 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci const bool compute = batch->flags & BLORP_BATCH_USE_COMPUTE; 524bf215546Sopenharmony_ci if (compute) 525bf215546Sopenharmony_ci assert(blorp_clear_supports_compute(batch->blorp, color_write_disable, 526bf215546Sopenharmony_ci false, surf->aux_usage)); 527bf215546Sopenharmony_ci 528bf215546Sopenharmony_ci /* Manually apply the clear destination swizzle. This way swizzled clears 529bf215546Sopenharmony_ci * will work for swizzles which we can't normally use for rendering and it 530bf215546Sopenharmony_ci * also ensures that they work on pre-Haswell hardware which can't swizlle 531bf215546Sopenharmony_ci * at all. 532bf215546Sopenharmony_ci */ 533bf215546Sopenharmony_ci clear_color = isl_color_value_swizzle_inv(clear_color, swizzle); 534bf215546Sopenharmony_ci swizzle = ISL_SWIZZLE_IDENTITY; 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci bool clear_rgb_as_red = false; 537bf215546Sopenharmony_ci if (format == ISL_FORMAT_R9G9B9E5_SHAREDEXP) { 538bf215546Sopenharmony_ci clear_color.u32[0] = float3_to_rgb9e5(clear_color.f32); 539bf215546Sopenharmony_ci format = ISL_FORMAT_R32_UINT; 540bf215546Sopenharmony_ci } else if (format == ISL_FORMAT_L8_UNORM_SRGB) { 541bf215546Sopenharmony_ci clear_color.f32[0] = util_format_linear_to_srgb_float(clear_color.f32[0]); 542bf215546Sopenharmony_ci format = ISL_FORMAT_R8_UNORM; 543bf215546Sopenharmony_ci } else if (format == ISL_FORMAT_A4B4G4R4_UNORM) { 544bf215546Sopenharmony_ci /* Broadwell and earlier cannot render to this format so we need to work 545bf215546Sopenharmony_ci * around it by swapping the colors around and using B4G4R4A4 instead. 546bf215546Sopenharmony_ci */ 547bf215546Sopenharmony_ci const struct isl_swizzle ARGB = ISL_SWIZZLE(ALPHA, RED, GREEN, BLUE); 548bf215546Sopenharmony_ci clear_color = isl_color_value_swizzle_inv(clear_color, ARGB); 549bf215546Sopenharmony_ci format = ISL_FORMAT_B4G4R4A4_UNORM; 550bf215546Sopenharmony_ci } else if (isl_format_get_layout(format)->bpb % 3 == 0) { 551bf215546Sopenharmony_ci clear_rgb_as_red = true; 552bf215546Sopenharmony_ci if (format == ISL_FORMAT_R8G8B8_UNORM_SRGB) { 553bf215546Sopenharmony_ci clear_color.f32[0] = util_format_linear_to_srgb_float(clear_color.f32[0]); 554bf215546Sopenharmony_ci clear_color.f32[1] = util_format_linear_to_srgb_float(clear_color.f32[1]); 555bf215546Sopenharmony_ci clear_color.f32[2] = util_format_linear_to_srgb_float(clear_color.f32[2]); 556bf215546Sopenharmony_ci } 557bf215546Sopenharmony_ci } 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci memcpy(¶ms.wm_inputs.clear_color, clear_color.f32, sizeof(float) * 4); 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_ci bool use_simd16_replicated_data = true; 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci /* From the SNB PRM (Vol4_Part1): 564bf215546Sopenharmony_ci * 565bf215546Sopenharmony_ci * "Replicated data (Message Type = 111) is only supported when 566bf215546Sopenharmony_ci * accessing tiled memory. Using this Message Type to access linear 567bf215546Sopenharmony_ci * (untiled) memory is UNDEFINED." 568bf215546Sopenharmony_ci */ 569bf215546Sopenharmony_ci if (surf->surf->tiling == ISL_TILING_LINEAR) 570bf215546Sopenharmony_ci use_simd16_replicated_data = false; 571bf215546Sopenharmony_ci 572bf215546Sopenharmony_ci /* Replicated clears don't work yet before gfx6 */ 573bf215546Sopenharmony_ci if (batch->blorp->isl_dev->info->ver < 6) 574bf215546Sopenharmony_ci use_simd16_replicated_data = false; 575bf215546Sopenharmony_ci 576bf215546Sopenharmony_ci if (compute) 577bf215546Sopenharmony_ci use_simd16_replicated_data = false; 578bf215546Sopenharmony_ci 579bf215546Sopenharmony_ci /* Constant color writes ignore everything in blend and color calculator 580bf215546Sopenharmony_ci * state. This is not documented. 581bf215546Sopenharmony_ci */ 582bf215546Sopenharmony_ci params.color_write_disable = color_write_disable & BITFIELD_MASK(4); 583bf215546Sopenharmony_ci if (color_write_disable) 584bf215546Sopenharmony_ci use_simd16_replicated_data = false; 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci if (!blorp_params_get_clear_kernel(batch, ¶ms, 587bf215546Sopenharmony_ci use_simd16_replicated_data, 588bf215546Sopenharmony_ci clear_rgb_as_red)) 589bf215546Sopenharmony_ci return; 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_ci if (!compute && !blorp_ensure_sf_program(batch, ¶ms)) 592bf215546Sopenharmony_ci return; 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci while (num_layers > 0) { 595bf215546Sopenharmony_ci brw_blorp_surface_info_init(batch, ¶ms.dst, surf, level, 596bf215546Sopenharmony_ci start_layer, format, true); 597bf215546Sopenharmony_ci params.dst.view.swizzle = swizzle; 598bf215546Sopenharmony_ci 599bf215546Sopenharmony_ci params.x0 = x0; 600bf215546Sopenharmony_ci params.y0 = y0; 601bf215546Sopenharmony_ci params.x1 = x1; 602bf215546Sopenharmony_ci params.y1 = y1; 603bf215546Sopenharmony_ci 604bf215546Sopenharmony_ci if (compute) { 605bf215546Sopenharmony_ci params.wm_inputs.bounds_rect.x0 = x0; 606bf215546Sopenharmony_ci params.wm_inputs.bounds_rect.y0 = y0; 607bf215546Sopenharmony_ci params.wm_inputs.bounds_rect.x1 = x1; 608bf215546Sopenharmony_ci params.wm_inputs.bounds_rect.y1 = y1; 609bf215546Sopenharmony_ci } 610bf215546Sopenharmony_ci 611bf215546Sopenharmony_ci if (params.dst.tile_x_sa || params.dst.tile_y_sa) { 612bf215546Sopenharmony_ci assert(params.dst.surf.samples == 1); 613bf215546Sopenharmony_ci assert(num_layers == 1); 614bf215546Sopenharmony_ci params.x0 += params.dst.tile_x_sa; 615bf215546Sopenharmony_ci params.y0 += params.dst.tile_y_sa; 616bf215546Sopenharmony_ci params.x1 += params.dst.tile_x_sa; 617bf215546Sopenharmony_ci params.y1 += params.dst.tile_y_sa; 618bf215546Sopenharmony_ci } 619bf215546Sopenharmony_ci 620bf215546Sopenharmony_ci /* The MinLOD and MinimumArrayElement don't work properly for cube maps. 621bf215546Sopenharmony_ci * Convert them to a single slice on gfx4. 622bf215546Sopenharmony_ci */ 623bf215546Sopenharmony_ci if (batch->blorp->isl_dev->info->ver == 4 && 624bf215546Sopenharmony_ci (params.dst.surf.usage & ISL_SURF_USAGE_CUBE_BIT)) { 625bf215546Sopenharmony_ci blorp_surf_convert_to_single_slice(batch->blorp->isl_dev, ¶ms.dst); 626bf215546Sopenharmony_ci } 627bf215546Sopenharmony_ci 628bf215546Sopenharmony_ci if (clear_rgb_as_red) { 629bf215546Sopenharmony_ci surf_fake_rgb_with_red(batch->blorp->isl_dev, ¶ms.dst); 630bf215546Sopenharmony_ci params.x0 *= 3; 631bf215546Sopenharmony_ci params.x1 *= 3; 632bf215546Sopenharmony_ci } 633bf215546Sopenharmony_ci 634bf215546Sopenharmony_ci if (isl_format_is_compressed(params.dst.surf.format)) { 635bf215546Sopenharmony_ci blorp_surf_convert_to_uncompressed(batch->blorp->isl_dev, ¶ms.dst, 636bf215546Sopenharmony_ci NULL, NULL, NULL, NULL); 637bf215546Sopenharmony_ci //&dst_x, &dst_y, &dst_w, &dst_h); 638bf215546Sopenharmony_ci } 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci if (params.dst.tile_x_sa || params.dst.tile_y_sa) { 641bf215546Sopenharmony_ci /* Either we're on gfx4 where there is no multisampling or the 642bf215546Sopenharmony_ci * surface is compressed which also implies no multisampling. 643bf215546Sopenharmony_ci * Therefore, sa == px and we don't need to do a conversion. 644bf215546Sopenharmony_ci */ 645bf215546Sopenharmony_ci assert(params.dst.surf.samples == 1); 646bf215546Sopenharmony_ci params.x0 += params.dst.tile_x_sa; 647bf215546Sopenharmony_ci params.y0 += params.dst.tile_y_sa; 648bf215546Sopenharmony_ci params.x1 += params.dst.tile_x_sa; 649bf215546Sopenharmony_ci params.y1 += params.dst.tile_y_sa; 650bf215546Sopenharmony_ci } 651bf215546Sopenharmony_ci 652bf215546Sopenharmony_ci params.num_samples = params.dst.surf.samples; 653bf215546Sopenharmony_ci 654bf215546Sopenharmony_ci /* We may be restricted on the number of layers we can bind at any one 655bf215546Sopenharmony_ci * time. In particular, Sandy Bridge has a maximum number of layers of 656bf215546Sopenharmony_ci * 512 but a maximum 3D texture size is much larger. 657bf215546Sopenharmony_ci */ 658bf215546Sopenharmony_ci params.num_layers = MIN2(params.dst.view.array_len, num_layers); 659bf215546Sopenharmony_ci 660bf215546Sopenharmony_ci const unsigned max_image_width = 16 * 1024; 661bf215546Sopenharmony_ci if (params.dst.surf.logical_level0_px.width > max_image_width) { 662bf215546Sopenharmony_ci /* Clearing an RGB image as red multiplies the surface width by 3 663bf215546Sopenharmony_ci * so it may now be too wide for the hardware surface limits. We 664bf215546Sopenharmony_ci * have to break the clear up into pieces in order to clear wide 665bf215546Sopenharmony_ci * images. 666bf215546Sopenharmony_ci */ 667bf215546Sopenharmony_ci assert(clear_rgb_as_red); 668bf215546Sopenharmony_ci assert(params.dst.surf.dim == ISL_SURF_DIM_2D); 669bf215546Sopenharmony_ci assert(params.dst.surf.tiling == ISL_TILING_LINEAR); 670bf215546Sopenharmony_ci assert(params.dst.surf.logical_level0_px.depth == 1); 671bf215546Sopenharmony_ci assert(params.dst.surf.logical_level0_px.array_len == 1); 672bf215546Sopenharmony_ci assert(params.dst.surf.levels == 1); 673bf215546Sopenharmony_ci assert(params.dst.surf.samples == 1); 674bf215546Sopenharmony_ci assert(params.dst.tile_x_sa == 0 || params.dst.tile_y_sa == 0); 675bf215546Sopenharmony_ci assert(params.dst.aux_usage == ISL_AUX_USAGE_NONE); 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_ci /* max_image_width rounded down to a multiple of 3 */ 678bf215546Sopenharmony_ci const unsigned max_fake_rgb_width = (max_image_width / 3) * 3; 679bf215546Sopenharmony_ci const unsigned cpp = 680bf215546Sopenharmony_ci isl_format_get_layout(params.dst.surf.format)->bpb / 8; 681bf215546Sopenharmony_ci 682bf215546Sopenharmony_ci params.dst.surf.logical_level0_px.width = max_fake_rgb_width; 683bf215546Sopenharmony_ci params.dst.surf.phys_level0_sa.width = max_fake_rgb_width; 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_ci uint32_t orig_x0 = params.x0, orig_x1 = params.x1; 686bf215546Sopenharmony_ci uint64_t orig_offset = params.dst.addr.offset; 687bf215546Sopenharmony_ci for (uint32_t x = orig_x0; x < orig_x1; x += max_fake_rgb_width) { 688bf215546Sopenharmony_ci /* Offset to the surface. It's easy because we're linear */ 689bf215546Sopenharmony_ci params.dst.addr.offset = orig_offset + x * cpp; 690bf215546Sopenharmony_ci 691bf215546Sopenharmony_ci params.x0 = 0; 692bf215546Sopenharmony_ci params.x1 = MIN2(orig_x1 - x, max_image_width); 693bf215546Sopenharmony_ci 694bf215546Sopenharmony_ci batch->blorp->exec(batch, ¶ms); 695bf215546Sopenharmony_ci } 696bf215546Sopenharmony_ci } else { 697bf215546Sopenharmony_ci batch->blorp->exec(batch, ¶ms); 698bf215546Sopenharmony_ci } 699bf215546Sopenharmony_ci 700bf215546Sopenharmony_ci start_layer += params.num_layers; 701bf215546Sopenharmony_ci num_layers -= params.num_layers; 702bf215546Sopenharmony_ci } 703bf215546Sopenharmony_ci} 704bf215546Sopenharmony_ci 705bf215546Sopenharmony_cistatic bool 706bf215546Sopenharmony_ciblorp_clear_stencil_as_rgba(struct blorp_batch *batch, 707bf215546Sopenharmony_ci const struct blorp_surf *surf, 708bf215546Sopenharmony_ci uint32_t level, uint32_t start_layer, 709bf215546Sopenharmony_ci uint32_t num_layers, 710bf215546Sopenharmony_ci uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1, 711bf215546Sopenharmony_ci uint8_t stencil_mask, uint8_t stencil_value) 712bf215546Sopenharmony_ci{ 713bf215546Sopenharmony_ci assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0); 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_ci /* We only support separate W-tiled stencil for now */ 716bf215546Sopenharmony_ci if (surf->surf->format != ISL_FORMAT_R8_UINT || 717bf215546Sopenharmony_ci surf->surf->tiling != ISL_TILING_W) 718bf215546Sopenharmony_ci return false; 719bf215546Sopenharmony_ci 720bf215546Sopenharmony_ci /* Stencil mask support would require piles of shader magic */ 721bf215546Sopenharmony_ci if (stencil_mask != 0xff) 722bf215546Sopenharmony_ci return false; 723bf215546Sopenharmony_ci 724bf215546Sopenharmony_ci if (surf->surf->samples > 1) { 725bf215546Sopenharmony_ci /* Adjust x0, y0, x1, and y1 to be in units of samples */ 726bf215546Sopenharmony_ci assert(surf->surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED); 727bf215546Sopenharmony_ci struct isl_extent2d msaa_px_size_sa = 728bf215546Sopenharmony_ci isl_get_interleaved_msaa_px_size_sa(surf->surf->samples); 729bf215546Sopenharmony_ci 730bf215546Sopenharmony_ci x0 *= msaa_px_size_sa.w; 731bf215546Sopenharmony_ci y0 *= msaa_px_size_sa.h; 732bf215546Sopenharmony_ci x1 *= msaa_px_size_sa.w; 733bf215546Sopenharmony_ci y1 *= msaa_px_size_sa.h; 734bf215546Sopenharmony_ci } 735bf215546Sopenharmony_ci 736bf215546Sopenharmony_ci /* W-tiles and Y-tiles have the same layout as far as cache lines are 737bf215546Sopenharmony_ci * concerned: both are 8x8 cache lines laid out Y-major. The difference is 738bf215546Sopenharmony_ci * entirely in how the data is arranged within the cache line. W-tiling 739bf215546Sopenharmony_ci * is 8x8 pixels in a swizzled pattern while Y-tiling is 16B by 4 rows 740bf215546Sopenharmony_ci * regardless of image format size. As long as everything is aligned to 8, 741bf215546Sopenharmony_ci * we can just treat the W-tiled image as Y-tiled, ignore the layout 742bf215546Sopenharmony_ci * difference within a cache line, and blast out data. 743bf215546Sopenharmony_ci */ 744bf215546Sopenharmony_ci if (x0 % 8 != 0 || y0 % 8 != 0 || x1 % 8 != 0 || y1 % 8 != 0) 745bf215546Sopenharmony_ci return false; 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci struct blorp_params params; 748bf215546Sopenharmony_ci blorp_params_init(¶ms); 749bf215546Sopenharmony_ci params.snapshot_type = INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR; 750bf215546Sopenharmony_ci 751bf215546Sopenharmony_ci if (!blorp_params_get_clear_kernel(batch, ¶ms, true, false)) 752bf215546Sopenharmony_ci return false; 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_ci memset(¶ms.wm_inputs.clear_color, stencil_value, 755bf215546Sopenharmony_ci sizeof(params.wm_inputs.clear_color)); 756bf215546Sopenharmony_ci 757bf215546Sopenharmony_ci /* The Sandy Bridge PRM Vol. 4 Pt. 2, section 2.11.2.1.1 has the 758bf215546Sopenharmony_ci * following footnote to the format table: 759bf215546Sopenharmony_ci * 760bf215546Sopenharmony_ci * 128 BPE Formats cannot be Tiled Y when used as render targets 761bf215546Sopenharmony_ci * 762bf215546Sopenharmony_ci * We have to use RGBA16_UINT on SNB. 763bf215546Sopenharmony_ci */ 764bf215546Sopenharmony_ci enum isl_format wide_format; 765bf215546Sopenharmony_ci if (ISL_GFX_VER(batch->blorp->isl_dev) <= 6) { 766bf215546Sopenharmony_ci wide_format = ISL_FORMAT_R16G16B16A16_UINT; 767bf215546Sopenharmony_ci 768bf215546Sopenharmony_ci /* For RGBA16_UINT, we need to mask the stencil value otherwise, we risk 769bf215546Sopenharmony_ci * clamping giving us the wrong values 770bf215546Sopenharmony_ci */ 771bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) 772bf215546Sopenharmony_ci params.wm_inputs.clear_color[i] &= 0xffff; 773bf215546Sopenharmony_ci } else { 774bf215546Sopenharmony_ci wide_format = ISL_FORMAT_R32G32B32A32_UINT; 775bf215546Sopenharmony_ci } 776bf215546Sopenharmony_ci 777bf215546Sopenharmony_ci for (uint32_t a = 0; a < num_layers; a++) { 778bf215546Sopenharmony_ci uint32_t layer = start_layer + a; 779bf215546Sopenharmony_ci 780bf215546Sopenharmony_ci brw_blorp_surface_info_init(batch, ¶ms.dst, surf, level, 781bf215546Sopenharmony_ci layer, ISL_FORMAT_UNSUPPORTED, true); 782bf215546Sopenharmony_ci 783bf215546Sopenharmony_ci if (surf->surf->samples > 1) 784bf215546Sopenharmony_ci blorp_surf_fake_interleaved_msaa(batch->blorp->isl_dev, ¶ms.dst); 785bf215546Sopenharmony_ci 786bf215546Sopenharmony_ci /* Make it Y-tiled */ 787bf215546Sopenharmony_ci blorp_surf_retile_w_to_y(batch->blorp->isl_dev, ¶ms.dst); 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci unsigned wide_Bpp = 790bf215546Sopenharmony_ci isl_format_get_layout(wide_format)->bpb / 8; 791bf215546Sopenharmony_ci 792bf215546Sopenharmony_ci params.dst.view.format = params.dst.surf.format = wide_format; 793bf215546Sopenharmony_ci assert(params.dst.surf.logical_level0_px.width % wide_Bpp == 0); 794bf215546Sopenharmony_ci params.dst.surf.logical_level0_px.width /= wide_Bpp; 795bf215546Sopenharmony_ci assert(params.dst.tile_x_sa % wide_Bpp == 0); 796bf215546Sopenharmony_ci params.dst.tile_x_sa /= wide_Bpp; 797bf215546Sopenharmony_ci 798bf215546Sopenharmony_ci params.x0 = params.dst.tile_x_sa + x0 / (wide_Bpp / 2); 799bf215546Sopenharmony_ci params.y0 = params.dst.tile_y_sa + y0 / 2; 800bf215546Sopenharmony_ci params.x1 = params.dst.tile_x_sa + x1 / (wide_Bpp / 2); 801bf215546Sopenharmony_ci params.y1 = params.dst.tile_y_sa + y1 / 2; 802bf215546Sopenharmony_ci 803bf215546Sopenharmony_ci batch->blorp->exec(batch, ¶ms); 804bf215546Sopenharmony_ci } 805bf215546Sopenharmony_ci 806bf215546Sopenharmony_ci return true; 807bf215546Sopenharmony_ci} 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_civoid 810bf215546Sopenharmony_ciblorp_clear_depth_stencil(struct blorp_batch *batch, 811bf215546Sopenharmony_ci const struct blorp_surf *depth, 812bf215546Sopenharmony_ci const struct blorp_surf *stencil, 813bf215546Sopenharmony_ci uint32_t level, uint32_t start_layer, 814bf215546Sopenharmony_ci uint32_t num_layers, 815bf215546Sopenharmony_ci uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1, 816bf215546Sopenharmony_ci bool clear_depth, float depth_value, 817bf215546Sopenharmony_ci uint8_t stencil_mask, uint8_t stencil_value) 818bf215546Sopenharmony_ci{ 819bf215546Sopenharmony_ci assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0); 820bf215546Sopenharmony_ci 821bf215546Sopenharmony_ci if (!clear_depth && blorp_clear_stencil_as_rgba(batch, stencil, level, 822bf215546Sopenharmony_ci start_layer, num_layers, 823bf215546Sopenharmony_ci x0, y0, x1, y1, 824bf215546Sopenharmony_ci stencil_mask, 825bf215546Sopenharmony_ci stencil_value)) 826bf215546Sopenharmony_ci return; 827bf215546Sopenharmony_ci 828bf215546Sopenharmony_ci struct blorp_params params; 829bf215546Sopenharmony_ci blorp_params_init(¶ms); 830bf215546Sopenharmony_ci params.snapshot_type = INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR; 831bf215546Sopenharmony_ci 832bf215546Sopenharmony_ci params.x0 = x0; 833bf215546Sopenharmony_ci params.y0 = y0; 834bf215546Sopenharmony_ci params.x1 = x1; 835bf215546Sopenharmony_ci params.y1 = y1; 836bf215546Sopenharmony_ci 837bf215546Sopenharmony_ci if (ISL_GFX_VER(batch->blorp->isl_dev) == 6) { 838bf215546Sopenharmony_ci /* For some reason, Sandy Bridge gets occlusion queries wrong if we 839bf215546Sopenharmony_ci * don't have a shader. In particular, it records samples even though 840bf215546Sopenharmony_ci * we disable statistics in 3DSTATE_WM. Give it the usual clear shader 841bf215546Sopenharmony_ci * to work around the issue. 842bf215546Sopenharmony_ci */ 843bf215546Sopenharmony_ci if (!blorp_params_get_clear_kernel(batch, ¶ms, false, false)) 844bf215546Sopenharmony_ci return; 845bf215546Sopenharmony_ci } 846bf215546Sopenharmony_ci 847bf215546Sopenharmony_ci while (num_layers > 0) { 848bf215546Sopenharmony_ci params.num_layers = num_layers; 849bf215546Sopenharmony_ci 850bf215546Sopenharmony_ci if (stencil_mask) { 851bf215546Sopenharmony_ci brw_blorp_surface_info_init(batch, ¶ms.stencil, stencil, 852bf215546Sopenharmony_ci level, start_layer, 853bf215546Sopenharmony_ci ISL_FORMAT_UNSUPPORTED, true); 854bf215546Sopenharmony_ci params.stencil_mask = stencil_mask; 855bf215546Sopenharmony_ci params.stencil_ref = stencil_value; 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_ci params.dst.surf.samples = params.stencil.surf.samples; 858bf215546Sopenharmony_ci params.dst.surf.logical_level0_px = 859bf215546Sopenharmony_ci params.stencil.surf.logical_level0_px; 860bf215546Sopenharmony_ci params.dst.view = params.stencil.view; 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci params.num_samples = params.stencil.surf.samples; 863bf215546Sopenharmony_ci 864bf215546Sopenharmony_ci /* We may be restricted on the number of layers we can bind at any 865bf215546Sopenharmony_ci * one time. In particular, Sandy Bridge has a maximum number of 866bf215546Sopenharmony_ci * layers of 512 but a maximum 3D texture size is much larger. 867bf215546Sopenharmony_ci */ 868bf215546Sopenharmony_ci if (params.stencil.view.array_len < params.num_layers) 869bf215546Sopenharmony_ci params.num_layers = params.stencil.view.array_len; 870bf215546Sopenharmony_ci } 871bf215546Sopenharmony_ci 872bf215546Sopenharmony_ci if (clear_depth) { 873bf215546Sopenharmony_ci brw_blorp_surface_info_init(batch, ¶ms.depth, depth, 874bf215546Sopenharmony_ci level, start_layer, 875bf215546Sopenharmony_ci ISL_FORMAT_UNSUPPORTED, true); 876bf215546Sopenharmony_ci params.z = depth_value; 877bf215546Sopenharmony_ci params.depth_format = 878bf215546Sopenharmony_ci isl_format_get_depth_format(depth->surf->format, false); 879bf215546Sopenharmony_ci 880bf215546Sopenharmony_ci params.dst.surf.samples = params.depth.surf.samples; 881bf215546Sopenharmony_ci params.dst.surf.logical_level0_px = 882bf215546Sopenharmony_ci params.depth.surf.logical_level0_px; 883bf215546Sopenharmony_ci params.dst.view = params.depth.view; 884bf215546Sopenharmony_ci 885bf215546Sopenharmony_ci params.num_samples = params.depth.surf.samples; 886bf215546Sopenharmony_ci 887bf215546Sopenharmony_ci /* We may be restricted on the number of layers we can bind at any 888bf215546Sopenharmony_ci * one time. In particular, Sandy Bridge has a maximum number of 889bf215546Sopenharmony_ci * layers of 512 but a maximum 3D texture size is much larger. 890bf215546Sopenharmony_ci */ 891bf215546Sopenharmony_ci if (params.depth.view.array_len < params.num_layers) 892bf215546Sopenharmony_ci params.num_layers = params.depth.view.array_len; 893bf215546Sopenharmony_ci } 894bf215546Sopenharmony_ci 895bf215546Sopenharmony_ci batch->blorp->exec(batch, ¶ms); 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_ci start_layer += params.num_layers; 898bf215546Sopenharmony_ci num_layers -= params.num_layers; 899bf215546Sopenharmony_ci } 900bf215546Sopenharmony_ci} 901bf215546Sopenharmony_ci 902bf215546Sopenharmony_cibool 903bf215546Sopenharmony_ciblorp_can_hiz_clear_depth(const struct intel_device_info *devinfo, 904bf215546Sopenharmony_ci const struct isl_surf *surf, 905bf215546Sopenharmony_ci enum isl_aux_usage aux_usage, 906bf215546Sopenharmony_ci uint32_t level, uint32_t layer, 907bf215546Sopenharmony_ci uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1) 908bf215546Sopenharmony_ci{ 909bf215546Sopenharmony_ci /* This function currently doesn't support any gen prior to gfx8 */ 910bf215546Sopenharmony_ci assert(devinfo->ver >= 8); 911bf215546Sopenharmony_ci 912bf215546Sopenharmony_ci if (devinfo->ver == 8 && surf->format == ISL_FORMAT_R16_UNORM) { 913bf215546Sopenharmony_ci /* From the BDW PRM, Vol 7, "Depth Buffer Clear": 914bf215546Sopenharmony_ci * 915bf215546Sopenharmony_ci * The following restrictions apply only if the depth buffer surface 916bf215546Sopenharmony_ci * type is D16_UNORM and software does not use the “full surf clear”: 917bf215546Sopenharmony_ci * 918bf215546Sopenharmony_ci * If Number of Multisamples is NUMSAMPLES_1, the rectangle must be 919bf215546Sopenharmony_ci * aligned to an 8x4 pixel block relative to the upper left corner of 920bf215546Sopenharmony_ci * the depth buffer, and contain an integer number of these pixel 921bf215546Sopenharmony_ci * blocks, and all 8x4 pixels must be lit. 922bf215546Sopenharmony_ci * 923bf215546Sopenharmony_ci * Alignment requirements for other sample counts are listed, but they 924bf215546Sopenharmony_ci * can all be satisfied by the one mentioned above. 925bf215546Sopenharmony_ci */ 926bf215546Sopenharmony_ci if (x0 % 8 || y0 % 4 || x1 % 8 || y1 % 4) 927bf215546Sopenharmony_ci return false; 928bf215546Sopenharmony_ci } else if (aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT) { 929bf215546Sopenharmony_ci /* We have to set the WM_HZ_OP::FullSurfaceDepthandStencilClear bit 930bf215546Sopenharmony_ci * whenever we clear an uninitialized HIZ buffer (as some drivers 931bf215546Sopenharmony_ci * currently do). However, this bit seems liable to clear 16x8 pixels in 932bf215546Sopenharmony_ci * the ZCS on Gfx12 - greater than the slice alignments for depth 933bf215546Sopenharmony_ci * buffers. 934bf215546Sopenharmony_ci */ 935bf215546Sopenharmony_ci assert(surf->image_alignment_el.w % 16 != 0 || 936bf215546Sopenharmony_ci surf->image_alignment_el.h % 8 != 0); 937bf215546Sopenharmony_ci 938bf215546Sopenharmony_ci /* This is the hypothesis behind some corruption that was seen with the 939bf215546Sopenharmony_ci * amd_vertex_shader_layer-layered-depth-texture-render piglit test. 940bf215546Sopenharmony_ci * 941bf215546Sopenharmony_ci * From the Compressed Depth Buffers section of the Bspec, under the 942bf215546Sopenharmony_ci * Gfx12 texture performant and ZCS columns: 943bf215546Sopenharmony_ci * 944bf215546Sopenharmony_ci * Update with clear at either 16x8 or 8x4 granularity, based on 945bf215546Sopenharmony_ci * fs_clr or otherwise. 946bf215546Sopenharmony_ci * 947bf215546Sopenharmony_ci * There are a number of ways to avoid full surface CCS clears that 948bf215546Sopenharmony_ci * overlap other slices, but for now we choose to disable fast-clears 949bf215546Sopenharmony_ci * when an initializing clear could hit another miplevel. 950bf215546Sopenharmony_ci * 951bf215546Sopenharmony_ci * NOTE: Because the CCS compresses the depth buffer and not a version 952bf215546Sopenharmony_ci * of it that has been rearranged with different alignments (like Gfx8+ 953bf215546Sopenharmony_ci * HIZ), we have to make sure that the x0 and y0 are at least 16x8 954bf215546Sopenharmony_ci * aligned in the context of the entire surface. 955bf215546Sopenharmony_ci */ 956bf215546Sopenharmony_ci uint32_t slice_x0, slice_y0, slice_z0, slice_a0; 957bf215546Sopenharmony_ci isl_surf_get_image_offset_el(surf, level, 958bf215546Sopenharmony_ci surf->dim == ISL_SURF_DIM_3D ? 0 : layer, 959bf215546Sopenharmony_ci surf->dim == ISL_SURF_DIM_3D ? layer: 0, 960bf215546Sopenharmony_ci &slice_x0, &slice_y0, &slice_z0, &slice_a0); 961bf215546Sopenharmony_ci assert(slice_z0 == 0 && slice_a0 == 0); 962bf215546Sopenharmony_ci const bool max_x1_y1 = 963bf215546Sopenharmony_ci x1 == u_minify(surf->logical_level0_px.width, level) && 964bf215546Sopenharmony_ci y1 == u_minify(surf->logical_level0_px.height, level); 965bf215546Sopenharmony_ci const uint32_t haligned_x1 = ALIGN(x1, surf->image_alignment_el.w); 966bf215546Sopenharmony_ci const uint32_t valigned_y1 = ALIGN(y1, surf->image_alignment_el.h); 967bf215546Sopenharmony_ci const bool unaligned = (slice_x0 + x0) % 16 || (slice_y0 + y0) % 8 || 968bf215546Sopenharmony_ci (max_x1_y1 ? haligned_x1 % 16 || valigned_y1 % 8 : 969bf215546Sopenharmony_ci x1 % 16 || y1 % 8); 970bf215546Sopenharmony_ci const bool partial_clear = x0 > 0 || y0 > 0 || !max_x1_y1; 971bf215546Sopenharmony_ci const bool multislice_surf = surf->levels > 1 || 972bf215546Sopenharmony_ci surf->logical_level0_px.depth > 1 || 973bf215546Sopenharmony_ci surf->logical_level0_px.array_len > 1; 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_ci if (unaligned && (partial_clear || multislice_surf)) 976bf215546Sopenharmony_ci return false; 977bf215546Sopenharmony_ci } 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci return isl_aux_usage_has_hiz(aux_usage); 980bf215546Sopenharmony_ci} 981bf215546Sopenharmony_ci 982bf215546Sopenharmony_cistatic bool 983bf215546Sopenharmony_ciblorp_can_clear_full_surface(const struct blorp_surf *depth, 984bf215546Sopenharmony_ci const struct blorp_surf *stencil, 985bf215546Sopenharmony_ci uint32_t level, 986bf215546Sopenharmony_ci uint32_t x0, uint32_t y0, 987bf215546Sopenharmony_ci uint32_t x1, uint32_t y1, 988bf215546Sopenharmony_ci bool clear_depth, 989bf215546Sopenharmony_ci bool clear_stencil) 990bf215546Sopenharmony_ci{ 991bf215546Sopenharmony_ci uint32_t width = 0, height = 0; 992bf215546Sopenharmony_ci if (clear_stencil) { 993bf215546Sopenharmony_ci width = u_minify(stencil->surf->logical_level0_px.width, level); 994bf215546Sopenharmony_ci height = u_minify(stencil->surf->logical_level0_px.height, level); 995bf215546Sopenharmony_ci } 996bf215546Sopenharmony_ci 997bf215546Sopenharmony_ci if (clear_depth && !(width || height)) { 998bf215546Sopenharmony_ci width = u_minify(depth->surf->logical_level0_px.width, level); 999bf215546Sopenharmony_ci height = u_minify(depth->surf->logical_level0_px.height, level); 1000bf215546Sopenharmony_ci } 1001bf215546Sopenharmony_ci 1002bf215546Sopenharmony_ci return x0 == 0 && y0 == 0 && width == x1 && height == y1; 1003bf215546Sopenharmony_ci} 1004bf215546Sopenharmony_ci 1005bf215546Sopenharmony_civoid 1006bf215546Sopenharmony_ciblorp_hiz_clear_depth_stencil(struct blorp_batch *batch, 1007bf215546Sopenharmony_ci const struct blorp_surf *depth, 1008bf215546Sopenharmony_ci const struct blorp_surf *stencil, 1009bf215546Sopenharmony_ci uint32_t level, 1010bf215546Sopenharmony_ci uint32_t start_layer, uint32_t num_layers, 1011bf215546Sopenharmony_ci uint32_t x0, uint32_t y0, 1012bf215546Sopenharmony_ci uint32_t x1, uint32_t y1, 1013bf215546Sopenharmony_ci bool clear_depth, float depth_value, 1014bf215546Sopenharmony_ci bool clear_stencil, uint8_t stencil_value) 1015bf215546Sopenharmony_ci{ 1016bf215546Sopenharmony_ci struct blorp_params params; 1017bf215546Sopenharmony_ci blorp_params_init(¶ms); 1018bf215546Sopenharmony_ci params.snapshot_type = INTEL_SNAPSHOT_HIZ_CLEAR; 1019bf215546Sopenharmony_ci 1020bf215546Sopenharmony_ci /* This requires WM_HZ_OP which only exists on gfx8+ */ 1021bf215546Sopenharmony_ci assert(ISL_GFX_VER(batch->blorp->isl_dev) >= 8); 1022bf215546Sopenharmony_ci 1023bf215546Sopenharmony_ci params.hiz_op = ISL_AUX_OP_FAST_CLEAR; 1024bf215546Sopenharmony_ci /* From BSpec: 3DSTATE_WM_HZ_OP_BODY >> Full Surface Depth and Stencil Clear 1025bf215546Sopenharmony_ci * 1026bf215546Sopenharmony_ci * "Software must set this only when the APP requires the entire Depth 1027bf215546Sopenharmony_ci * surface to be cleared." 1028bf215546Sopenharmony_ci */ 1029bf215546Sopenharmony_ci params.full_surface_hiz_op = 1030bf215546Sopenharmony_ci blorp_can_clear_full_surface(depth, stencil, level, x0, y0, x1, y1, 1031bf215546Sopenharmony_ci clear_depth, clear_stencil); 1032bf215546Sopenharmony_ci params.num_layers = 1; 1033bf215546Sopenharmony_ci 1034bf215546Sopenharmony_ci params.x0 = x0; 1035bf215546Sopenharmony_ci params.y0 = y0; 1036bf215546Sopenharmony_ci params.x1 = x1; 1037bf215546Sopenharmony_ci params.y1 = y1; 1038bf215546Sopenharmony_ci 1039bf215546Sopenharmony_ci for (uint32_t l = 0; l < num_layers; l++) { 1040bf215546Sopenharmony_ci const uint32_t layer = start_layer + l; 1041bf215546Sopenharmony_ci if (clear_stencil) { 1042bf215546Sopenharmony_ci brw_blorp_surface_info_init(batch, ¶ms.stencil, stencil, 1043bf215546Sopenharmony_ci level, layer, 1044bf215546Sopenharmony_ci ISL_FORMAT_UNSUPPORTED, true); 1045bf215546Sopenharmony_ci params.stencil_mask = 0xff; 1046bf215546Sopenharmony_ci params.stencil_ref = stencil_value; 1047bf215546Sopenharmony_ci params.num_samples = params.stencil.surf.samples; 1048bf215546Sopenharmony_ci } 1049bf215546Sopenharmony_ci 1050bf215546Sopenharmony_ci if (clear_depth) { 1051bf215546Sopenharmony_ci /* If we're clearing depth, we must have HiZ */ 1052bf215546Sopenharmony_ci assert(depth && isl_aux_usage_has_hiz(depth->aux_usage)); 1053bf215546Sopenharmony_ci 1054bf215546Sopenharmony_ci brw_blorp_surface_info_init(batch, ¶ms.depth, depth, 1055bf215546Sopenharmony_ci level, layer, 1056bf215546Sopenharmony_ci ISL_FORMAT_UNSUPPORTED, true); 1057bf215546Sopenharmony_ci params.depth.clear_color.f32[0] = depth_value; 1058bf215546Sopenharmony_ci params.depth_format = 1059bf215546Sopenharmony_ci isl_format_get_depth_format(depth->surf->format, false); 1060bf215546Sopenharmony_ci params.num_samples = params.depth.surf.samples; 1061bf215546Sopenharmony_ci } 1062bf215546Sopenharmony_ci 1063bf215546Sopenharmony_ci batch->blorp->exec(batch, ¶ms); 1064bf215546Sopenharmony_ci } 1065bf215546Sopenharmony_ci} 1066bf215546Sopenharmony_ci 1067bf215546Sopenharmony_ci/* Given a depth stencil attachment, this function performs a fast depth clear 1068bf215546Sopenharmony_ci * on a depth portion and a regular clear on the stencil portion. When 1069bf215546Sopenharmony_ci * performing a fast depth clear on the depth portion, the HiZ buffer is simply 1070bf215546Sopenharmony_ci * tagged as cleared so the depth clear value is not actually needed. 1071bf215546Sopenharmony_ci */ 1072bf215546Sopenharmony_civoid 1073bf215546Sopenharmony_ciblorp_gfx8_hiz_clear_attachments(struct blorp_batch *batch, 1074bf215546Sopenharmony_ci uint32_t num_samples, 1075bf215546Sopenharmony_ci uint32_t x0, uint32_t y0, 1076bf215546Sopenharmony_ci uint32_t x1, uint32_t y1, 1077bf215546Sopenharmony_ci bool clear_depth, bool clear_stencil, 1078bf215546Sopenharmony_ci uint8_t stencil_value) 1079bf215546Sopenharmony_ci{ 1080bf215546Sopenharmony_ci assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL); 1081bf215546Sopenharmony_ci 1082bf215546Sopenharmony_ci struct blorp_params params; 1083bf215546Sopenharmony_ci blorp_params_init(¶ms); 1084bf215546Sopenharmony_ci params.snapshot_type = INTEL_SNAPSHOT_HIZ_CLEAR; 1085bf215546Sopenharmony_ci params.num_layers = 1; 1086bf215546Sopenharmony_ci params.hiz_op = ISL_AUX_OP_FAST_CLEAR; 1087bf215546Sopenharmony_ci params.x0 = x0; 1088bf215546Sopenharmony_ci params.y0 = y0; 1089bf215546Sopenharmony_ci params.x1 = x1; 1090bf215546Sopenharmony_ci params.y1 = y1; 1091bf215546Sopenharmony_ci params.num_samples = num_samples; 1092bf215546Sopenharmony_ci params.depth.enabled = clear_depth; 1093bf215546Sopenharmony_ci params.stencil.enabled = clear_stencil; 1094bf215546Sopenharmony_ci params.stencil_ref = stencil_value; 1095bf215546Sopenharmony_ci batch->blorp->exec(batch, ¶ms); 1096bf215546Sopenharmony_ci} 1097bf215546Sopenharmony_ci 1098bf215546Sopenharmony_ci/** Clear active color/depth/stencili attachments 1099bf215546Sopenharmony_ci * 1100bf215546Sopenharmony_ci * This function performs a clear operation on the currently bound 1101bf215546Sopenharmony_ci * color/depth/stencil attachments. It is assumed that any information passed 1102bf215546Sopenharmony_ci * in here is valid, consistent, and in-bounds relative to the currently 1103bf215546Sopenharmony_ci * attached depth/stencil. The binding_table_offset parameter is the 32-bit 1104bf215546Sopenharmony_ci * offset relative to surface state base address where pre-baked binding table 1105bf215546Sopenharmony_ci * that we are to use lives. If clear_color is false, binding_table_offset 1106bf215546Sopenharmony_ci * must point to a binding table with one entry which is a valid null surface 1107bf215546Sopenharmony_ci * that matches the currently bound depth and stencil. 1108bf215546Sopenharmony_ci */ 1109bf215546Sopenharmony_civoid 1110bf215546Sopenharmony_ciblorp_clear_attachments(struct blorp_batch *batch, 1111bf215546Sopenharmony_ci uint32_t binding_table_offset, 1112bf215546Sopenharmony_ci enum isl_format depth_format, 1113bf215546Sopenharmony_ci uint32_t num_samples, 1114bf215546Sopenharmony_ci uint32_t start_layer, uint32_t num_layers, 1115bf215546Sopenharmony_ci uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1, 1116bf215546Sopenharmony_ci bool clear_color, union isl_color_value color_value, 1117bf215546Sopenharmony_ci bool clear_depth, float depth_value, 1118bf215546Sopenharmony_ci uint8_t stencil_mask, uint8_t stencil_value) 1119bf215546Sopenharmony_ci{ 1120bf215546Sopenharmony_ci struct blorp_params params; 1121bf215546Sopenharmony_ci blorp_params_init(¶ms); 1122bf215546Sopenharmony_ci 1123bf215546Sopenharmony_ci assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0); 1124bf215546Sopenharmony_ci assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL); 1125bf215546Sopenharmony_ci 1126bf215546Sopenharmony_ci params.x0 = x0; 1127bf215546Sopenharmony_ci params.y0 = y0; 1128bf215546Sopenharmony_ci params.x1 = x1; 1129bf215546Sopenharmony_ci params.y1 = y1; 1130bf215546Sopenharmony_ci 1131bf215546Sopenharmony_ci params.use_pre_baked_binding_table = true; 1132bf215546Sopenharmony_ci params.pre_baked_binding_table_offset = binding_table_offset; 1133bf215546Sopenharmony_ci 1134bf215546Sopenharmony_ci params.num_layers = num_layers; 1135bf215546Sopenharmony_ci params.num_samples = num_samples; 1136bf215546Sopenharmony_ci 1137bf215546Sopenharmony_ci if (clear_color) { 1138bf215546Sopenharmony_ci params.dst.enabled = true; 1139bf215546Sopenharmony_ci params.snapshot_type = INTEL_SNAPSHOT_SLOW_COLOR_CLEAR; 1140bf215546Sopenharmony_ci 1141bf215546Sopenharmony_ci memcpy(¶ms.wm_inputs.clear_color, color_value.f32, sizeof(float) * 4); 1142bf215546Sopenharmony_ci 1143bf215546Sopenharmony_ci /* Unfortunately, without knowing whether or not our destination surface 1144bf215546Sopenharmony_ci * is tiled or not, we have to assume it may be linear. This means no 1145bf215546Sopenharmony_ci * SIMD16_REPDATA for us. :-( 1146bf215546Sopenharmony_ci */ 1147bf215546Sopenharmony_ci if (!blorp_params_get_clear_kernel(batch, ¶ms, false, false)) 1148bf215546Sopenharmony_ci return; 1149bf215546Sopenharmony_ci } 1150bf215546Sopenharmony_ci 1151bf215546Sopenharmony_ci if (clear_depth) { 1152bf215546Sopenharmony_ci params.depth.enabled = true; 1153bf215546Sopenharmony_ci params.snapshot_type = INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR; 1154bf215546Sopenharmony_ci 1155bf215546Sopenharmony_ci params.z = depth_value; 1156bf215546Sopenharmony_ci params.depth_format = isl_format_get_depth_format(depth_format, false); 1157bf215546Sopenharmony_ci } 1158bf215546Sopenharmony_ci 1159bf215546Sopenharmony_ci if (stencil_mask) { 1160bf215546Sopenharmony_ci params.stencil.enabled = true; 1161bf215546Sopenharmony_ci params.snapshot_type = INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR; 1162bf215546Sopenharmony_ci 1163bf215546Sopenharmony_ci params.stencil_mask = stencil_mask; 1164bf215546Sopenharmony_ci params.stencil_ref = stencil_value; 1165bf215546Sopenharmony_ci } 1166bf215546Sopenharmony_ci 1167bf215546Sopenharmony_ci if (!blorp_params_get_layer_offset_vs(batch, ¶ms)) 1168bf215546Sopenharmony_ci return; 1169bf215546Sopenharmony_ci 1170bf215546Sopenharmony_ci params.vs_inputs.base_layer = start_layer; 1171bf215546Sopenharmony_ci 1172bf215546Sopenharmony_ci batch->blorp->exec(batch, ¶ms); 1173bf215546Sopenharmony_ci} 1174bf215546Sopenharmony_ci 1175bf215546Sopenharmony_civoid 1176bf215546Sopenharmony_ciblorp_ccs_resolve(struct blorp_batch *batch, 1177bf215546Sopenharmony_ci struct blorp_surf *surf, uint32_t level, 1178bf215546Sopenharmony_ci uint32_t start_layer, uint32_t num_layers, 1179bf215546Sopenharmony_ci enum isl_format format, 1180bf215546Sopenharmony_ci enum isl_aux_op resolve_op) 1181bf215546Sopenharmony_ci{ 1182bf215546Sopenharmony_ci assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0); 1183bf215546Sopenharmony_ci struct blorp_params params; 1184bf215546Sopenharmony_ci 1185bf215546Sopenharmony_ci blorp_params_init(¶ms); 1186bf215546Sopenharmony_ci switch(resolve_op) { 1187bf215546Sopenharmony_ci case ISL_AUX_OP_AMBIGUATE: 1188bf215546Sopenharmony_ci params.snapshot_type = INTEL_SNAPSHOT_CCS_AMBIGUATE; 1189bf215546Sopenharmony_ci break; 1190bf215546Sopenharmony_ci case ISL_AUX_OP_FULL_RESOLVE: 1191bf215546Sopenharmony_ci params.snapshot_type = INTEL_SNAPSHOT_CCS_RESOLVE; 1192bf215546Sopenharmony_ci break; 1193bf215546Sopenharmony_ci case ISL_AUX_OP_PARTIAL_RESOLVE: 1194bf215546Sopenharmony_ci params.snapshot_type = INTEL_SNAPSHOT_CCS_PARTIAL_RESOLVE; 1195bf215546Sopenharmony_ci break; 1196bf215546Sopenharmony_ci default: 1197bf215546Sopenharmony_ci assert(false); 1198bf215546Sopenharmony_ci } 1199bf215546Sopenharmony_ci brw_blorp_surface_info_init(batch, ¶ms.dst, surf, 1200bf215546Sopenharmony_ci level, start_layer, format, true); 1201bf215546Sopenharmony_ci 1202bf215546Sopenharmony_ci params.x0 = params.y0 = 0; 1203bf215546Sopenharmony_ci params.x1 = u_minify(params.dst.surf.logical_level0_px.width, level); 1204bf215546Sopenharmony_ci params.y1 = u_minify(params.dst.surf.logical_level0_px.height, level); 1205bf215546Sopenharmony_ci if (ISL_GFX_VER(batch->blorp->isl_dev) >= 9) { 1206bf215546Sopenharmony_ci /* From Bspec 2424, "Render Target Resolve": 1207bf215546Sopenharmony_ci * 1208bf215546Sopenharmony_ci * The Resolve Rectangle size is same as Clear Rectangle size from 1209bf215546Sopenharmony_ci * SKL+. 1210bf215546Sopenharmony_ci * 1211bf215546Sopenharmony_ci * Note that this differs from Vol7 of the Sky Lake PRM, which only 1212bf215546Sopenharmony_ci * specifies aligning by the scaledown factors. 1213bf215546Sopenharmony_ci */ 1214bf215546Sopenharmony_ci get_fast_clear_rect(batch->blorp->isl_dev, surf->surf, surf->aux_surf, 1215bf215546Sopenharmony_ci ¶ms.x0, ¶ms.y0, ¶ms.x1, ¶ms.y1); 1216bf215546Sopenharmony_ci } else { 1217bf215546Sopenharmony_ci /* From the Ivy Bridge PRM, Vol2 Part1 11.9 "Render Target Resolve": 1218bf215546Sopenharmony_ci * 1219bf215546Sopenharmony_ci * A rectangle primitive must be scaled down by the following factors 1220bf215546Sopenharmony_ci * with respect to render target being resolved. 1221bf215546Sopenharmony_ci * 1222bf215546Sopenharmony_ci * The scaledown factors in the table that follows are related to the 1223bf215546Sopenharmony_ci * block size of the CCS format. For IVB and HSW, we divide by two, for 1224bf215546Sopenharmony_ci * BDW we multiply by 8 and 16. 1225bf215546Sopenharmony_ci */ 1226bf215546Sopenharmony_ci const struct isl_format_layout *aux_fmtl = 1227bf215546Sopenharmony_ci isl_format_get_layout(params.dst.aux_surf.format); 1228bf215546Sopenharmony_ci assert(aux_fmtl->txc == ISL_TXC_CCS); 1229bf215546Sopenharmony_ci 1230bf215546Sopenharmony_ci unsigned x_scaledown, y_scaledown; 1231bf215546Sopenharmony_ci if (ISL_GFX_VER(batch->blorp->isl_dev) >= 8) { 1232bf215546Sopenharmony_ci x_scaledown = aux_fmtl->bw * 8; 1233bf215546Sopenharmony_ci y_scaledown = aux_fmtl->bh * 16; 1234bf215546Sopenharmony_ci } else { 1235bf215546Sopenharmony_ci x_scaledown = aux_fmtl->bw / 2; 1236bf215546Sopenharmony_ci y_scaledown = aux_fmtl->bh / 2; 1237bf215546Sopenharmony_ci } 1238bf215546Sopenharmony_ci params.x1 = ALIGN(params.x1, x_scaledown) / x_scaledown; 1239bf215546Sopenharmony_ci params.y1 = ALIGN(params.y1, y_scaledown) / y_scaledown; 1240bf215546Sopenharmony_ci } 1241bf215546Sopenharmony_ci 1242bf215546Sopenharmony_ci if (batch->blorp->isl_dev->info->ver >= 10) { 1243bf215546Sopenharmony_ci assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE || 1244bf215546Sopenharmony_ci resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE || 1245bf215546Sopenharmony_ci resolve_op == ISL_AUX_OP_AMBIGUATE); 1246bf215546Sopenharmony_ci } else if (batch->blorp->isl_dev->info->ver >= 9) { 1247bf215546Sopenharmony_ci assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE || 1248bf215546Sopenharmony_ci resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE); 1249bf215546Sopenharmony_ci } else { 1250bf215546Sopenharmony_ci /* Broadwell and earlier do not have a partial resolve */ 1251bf215546Sopenharmony_ci assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE); 1252bf215546Sopenharmony_ci } 1253bf215546Sopenharmony_ci params.fast_clear_op = resolve_op; 1254bf215546Sopenharmony_ci params.num_layers = num_layers; 1255bf215546Sopenharmony_ci 1256bf215546Sopenharmony_ci /* Note: there is no need to initialize push constants because it doesn't 1257bf215546Sopenharmony_ci * matter what data gets dispatched to the render target. However, we must 1258bf215546Sopenharmony_ci * ensure that the fragment shader delivers the data using the "replicated 1259bf215546Sopenharmony_ci * color" message. 1260bf215546Sopenharmony_ci */ 1261bf215546Sopenharmony_ci 1262bf215546Sopenharmony_ci if (!blorp_params_get_clear_kernel(batch, ¶ms, true, false)) 1263bf215546Sopenharmony_ci return; 1264bf215546Sopenharmony_ci 1265bf215546Sopenharmony_ci batch->blorp->exec(batch, ¶ms); 1266bf215546Sopenharmony_ci} 1267bf215546Sopenharmony_ci 1268bf215546Sopenharmony_cistatic nir_ssa_def * 1269bf215546Sopenharmony_ciblorp_nir_bit(nir_builder *b, nir_ssa_def *src, unsigned bit) 1270bf215546Sopenharmony_ci{ 1271bf215546Sopenharmony_ci return nir_iand(b, nir_ushr(b, src, nir_imm_int(b, bit)), 1272bf215546Sopenharmony_ci nir_imm_int(b, 1)); 1273bf215546Sopenharmony_ci} 1274bf215546Sopenharmony_ci 1275bf215546Sopenharmony_ci#pragma pack(push, 1) 1276bf215546Sopenharmony_cistruct blorp_mcs_partial_resolve_key 1277bf215546Sopenharmony_ci{ 1278bf215546Sopenharmony_ci struct brw_blorp_base_key base; 1279bf215546Sopenharmony_ci bool indirect_clear_color; 1280bf215546Sopenharmony_ci bool int_format; 1281bf215546Sopenharmony_ci uint32_t num_samples; 1282bf215546Sopenharmony_ci}; 1283bf215546Sopenharmony_ci#pragma pack(pop) 1284bf215546Sopenharmony_ci 1285bf215546Sopenharmony_cistatic bool 1286bf215546Sopenharmony_ciblorp_params_get_mcs_partial_resolve_kernel(struct blorp_batch *batch, 1287bf215546Sopenharmony_ci struct blorp_params *params) 1288bf215546Sopenharmony_ci{ 1289bf215546Sopenharmony_ci struct blorp_context *blorp = batch->blorp; 1290bf215546Sopenharmony_ci const struct blorp_mcs_partial_resolve_key blorp_key = { 1291bf215546Sopenharmony_ci .base = BRW_BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE), 1292bf215546Sopenharmony_ci .indirect_clear_color = params->dst.clear_color_addr.buffer != NULL, 1293bf215546Sopenharmony_ci .int_format = isl_format_has_int_channel(params->dst.view.format), 1294bf215546Sopenharmony_ci .num_samples = params->num_samples, 1295bf215546Sopenharmony_ci }; 1296bf215546Sopenharmony_ci 1297bf215546Sopenharmony_ci if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key), 1298bf215546Sopenharmony_ci ¶ms->wm_prog_kernel, ¶ms->wm_prog_data)) 1299bf215546Sopenharmony_ci return true; 1300bf215546Sopenharmony_ci 1301bf215546Sopenharmony_ci void *mem_ctx = ralloc_context(NULL); 1302bf215546Sopenharmony_ci 1303bf215546Sopenharmony_ci nir_builder b; 1304bf215546Sopenharmony_ci blorp_nir_init_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, 1305bf215546Sopenharmony_ci blorp_shader_type_to_name(blorp_key.base.shader_type)); 1306bf215546Sopenharmony_ci 1307bf215546Sopenharmony_ci nir_variable *v_color = 1308bf215546Sopenharmony_ci BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type()); 1309bf215546Sopenharmony_ci 1310bf215546Sopenharmony_ci nir_variable *frag_color = 1311bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_out, 1312bf215546Sopenharmony_ci glsl_vec4_type(), "gl_FragColor"); 1313bf215546Sopenharmony_ci frag_color->data.location = FRAG_RESULT_COLOR; 1314bf215546Sopenharmony_ci 1315bf215546Sopenharmony_ci /* Do an MCS fetch and check if it is equal to the magic clear value */ 1316bf215546Sopenharmony_ci nir_ssa_def *mcs = 1317bf215546Sopenharmony_ci blorp_nir_txf_ms_mcs(&b, nir_f2i32(&b, nir_load_frag_coord(&b)), 1318bf215546Sopenharmony_ci nir_load_layer_id(&b)); 1319bf215546Sopenharmony_ci nir_ssa_def *is_clear = 1320bf215546Sopenharmony_ci blorp_nir_mcs_is_clear_color(&b, mcs, blorp_key.num_samples); 1321bf215546Sopenharmony_ci 1322bf215546Sopenharmony_ci /* If we aren't the clear value, discard. */ 1323bf215546Sopenharmony_ci nir_discard_if(&b, nir_inot(&b, is_clear)); 1324bf215546Sopenharmony_ci 1325bf215546Sopenharmony_ci nir_ssa_def *clear_color = nir_load_var(&b, v_color); 1326bf215546Sopenharmony_ci if (blorp_key.indirect_clear_color && blorp->isl_dev->info->ver <= 8) { 1327bf215546Sopenharmony_ci /* Gfx7-8 clear colors are stored as single 0/1 bits */ 1328bf215546Sopenharmony_ci clear_color = nir_vec4(&b, blorp_nir_bit(&b, clear_color, 31), 1329bf215546Sopenharmony_ci blorp_nir_bit(&b, clear_color, 30), 1330bf215546Sopenharmony_ci blorp_nir_bit(&b, clear_color, 29), 1331bf215546Sopenharmony_ci blorp_nir_bit(&b, clear_color, 28)); 1332bf215546Sopenharmony_ci 1333bf215546Sopenharmony_ci if (!blorp_key.int_format) 1334bf215546Sopenharmony_ci clear_color = nir_i2f32(&b, clear_color); 1335bf215546Sopenharmony_ci } 1336bf215546Sopenharmony_ci nir_store_var(&b, frag_color, clear_color, 0xf); 1337bf215546Sopenharmony_ci 1338bf215546Sopenharmony_ci struct brw_wm_prog_key wm_key; 1339bf215546Sopenharmony_ci brw_blorp_init_wm_prog_key(&wm_key); 1340bf215546Sopenharmony_ci wm_key.base.tex.compressed_multisample_layout_mask = 1; 1341bf215546Sopenharmony_ci wm_key.base.tex.msaa_16 = blorp_key.num_samples == 16; 1342bf215546Sopenharmony_ci wm_key.multisample_fbo = true; 1343bf215546Sopenharmony_ci 1344bf215546Sopenharmony_ci struct brw_wm_prog_data prog_data; 1345bf215546Sopenharmony_ci const unsigned *program = 1346bf215546Sopenharmony_ci blorp_compile_fs(blorp, mem_ctx, b.shader, &wm_key, false, 1347bf215546Sopenharmony_ci &prog_data); 1348bf215546Sopenharmony_ci 1349bf215546Sopenharmony_ci bool result = 1350bf215546Sopenharmony_ci blorp->upload_shader(batch, MESA_SHADER_FRAGMENT, 1351bf215546Sopenharmony_ci &blorp_key, sizeof(blorp_key), 1352bf215546Sopenharmony_ci program, prog_data.base.program_size, 1353bf215546Sopenharmony_ci &prog_data.base, sizeof(prog_data), 1354bf215546Sopenharmony_ci ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); 1355bf215546Sopenharmony_ci 1356bf215546Sopenharmony_ci ralloc_free(mem_ctx); 1357bf215546Sopenharmony_ci return result; 1358bf215546Sopenharmony_ci} 1359bf215546Sopenharmony_ci 1360bf215546Sopenharmony_civoid 1361bf215546Sopenharmony_ciblorp_mcs_partial_resolve(struct blorp_batch *batch, 1362bf215546Sopenharmony_ci struct blorp_surf *surf, 1363bf215546Sopenharmony_ci enum isl_format format, 1364bf215546Sopenharmony_ci uint32_t start_layer, uint32_t num_layers) 1365bf215546Sopenharmony_ci{ 1366bf215546Sopenharmony_ci struct blorp_params params; 1367bf215546Sopenharmony_ci blorp_params_init(¶ms); 1368bf215546Sopenharmony_ci params.snapshot_type = INTEL_SNAPSHOT_MCS_PARTIAL_RESOLVE; 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_ci assert(batch->blorp->isl_dev->info->ver >= 7); 1371bf215546Sopenharmony_ci 1372bf215546Sopenharmony_ci params.x0 = 0; 1373bf215546Sopenharmony_ci params.y0 = 0; 1374bf215546Sopenharmony_ci params.x1 = surf->surf->logical_level0_px.width; 1375bf215546Sopenharmony_ci params.y1 = surf->surf->logical_level0_px.height; 1376bf215546Sopenharmony_ci 1377bf215546Sopenharmony_ci brw_blorp_surface_info_init(batch, ¶ms.src, surf, 0, 1378bf215546Sopenharmony_ci start_layer, format, false); 1379bf215546Sopenharmony_ci brw_blorp_surface_info_init(batch, ¶ms.dst, surf, 0, 1380bf215546Sopenharmony_ci start_layer, format, true); 1381bf215546Sopenharmony_ci 1382bf215546Sopenharmony_ci params.num_samples = params.dst.surf.samples; 1383bf215546Sopenharmony_ci params.num_layers = num_layers; 1384bf215546Sopenharmony_ci params.dst_clear_color_as_input = surf->clear_color_addr.buffer != NULL; 1385bf215546Sopenharmony_ci 1386bf215546Sopenharmony_ci memcpy(¶ms.wm_inputs.clear_color, 1387bf215546Sopenharmony_ci surf->clear_color.f32, sizeof(float) * 4); 1388bf215546Sopenharmony_ci 1389bf215546Sopenharmony_ci if (!blorp_params_get_mcs_partial_resolve_kernel(batch, ¶ms)) 1390bf215546Sopenharmony_ci return; 1391bf215546Sopenharmony_ci 1392bf215546Sopenharmony_ci batch->blorp->exec(batch, ¶ms); 1393bf215546Sopenharmony_ci} 1394bf215546Sopenharmony_ci 1395bf215546Sopenharmony_ci/** Clear a CCS to the "uncompressed" state 1396bf215546Sopenharmony_ci * 1397bf215546Sopenharmony_ci * This pass is the CCS equivalent of a "HiZ resolve". It sets the CCS values 1398bf215546Sopenharmony_ci * for a given layer/level of a surface to 0x0 which is the "uncompressed" 1399bf215546Sopenharmony_ci * state which tells the sampler to go look at the main surface. 1400bf215546Sopenharmony_ci */ 1401bf215546Sopenharmony_civoid 1402bf215546Sopenharmony_ciblorp_ccs_ambiguate(struct blorp_batch *batch, 1403bf215546Sopenharmony_ci struct blorp_surf *surf, 1404bf215546Sopenharmony_ci uint32_t level, uint32_t layer) 1405bf215546Sopenharmony_ci{ 1406bf215546Sopenharmony_ci assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0); 1407bf215546Sopenharmony_ci 1408bf215546Sopenharmony_ci if (ISL_GFX_VER(batch->blorp->isl_dev) >= 10) { 1409bf215546Sopenharmony_ci /* On gfx10 and above, we have a hardware resolve op for this */ 1410bf215546Sopenharmony_ci return blorp_ccs_resolve(batch, surf, level, layer, 1, 1411bf215546Sopenharmony_ci surf->surf->format, ISL_AUX_OP_AMBIGUATE); 1412bf215546Sopenharmony_ci } 1413bf215546Sopenharmony_ci 1414bf215546Sopenharmony_ci struct blorp_params params; 1415bf215546Sopenharmony_ci blorp_params_init(¶ms); 1416bf215546Sopenharmony_ci params.snapshot_type = INTEL_SNAPSHOT_CCS_AMBIGUATE; 1417bf215546Sopenharmony_ci 1418bf215546Sopenharmony_ci assert(ISL_GFX_VER(batch->blorp->isl_dev) >= 7); 1419bf215546Sopenharmony_ci 1420bf215546Sopenharmony_ci const struct isl_format_layout *aux_fmtl = 1421bf215546Sopenharmony_ci isl_format_get_layout(surf->aux_surf->format); 1422bf215546Sopenharmony_ci assert(aux_fmtl->txc == ISL_TXC_CCS); 1423bf215546Sopenharmony_ci 1424bf215546Sopenharmony_ci params.dst = (struct brw_blorp_surface_info) { 1425bf215546Sopenharmony_ci .enabled = true, 1426bf215546Sopenharmony_ci .addr = surf->aux_addr, 1427bf215546Sopenharmony_ci .view = { 1428bf215546Sopenharmony_ci .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT, 1429bf215546Sopenharmony_ci .format = ISL_FORMAT_R32G32B32A32_UINT, 1430bf215546Sopenharmony_ci .base_level = 0, 1431bf215546Sopenharmony_ci .base_array_layer = 0, 1432bf215546Sopenharmony_ci .levels = 1, 1433bf215546Sopenharmony_ci .array_len = 1, 1434bf215546Sopenharmony_ci .swizzle = ISL_SWIZZLE_IDENTITY, 1435bf215546Sopenharmony_ci }, 1436bf215546Sopenharmony_ci }; 1437bf215546Sopenharmony_ci 1438bf215546Sopenharmony_ci uint32_t z = 0; 1439bf215546Sopenharmony_ci if (surf->surf->dim == ISL_SURF_DIM_3D) { 1440bf215546Sopenharmony_ci z = layer; 1441bf215546Sopenharmony_ci layer = 0; 1442bf215546Sopenharmony_ci } 1443bf215546Sopenharmony_ci 1444bf215546Sopenharmony_ci uint64_t offset_B; 1445bf215546Sopenharmony_ci uint32_t x_offset_el, y_offset_el; 1446bf215546Sopenharmony_ci isl_surf_get_image_offset_B_tile_el(surf->aux_surf, level, layer, z, 1447bf215546Sopenharmony_ci &offset_B, &x_offset_el, &y_offset_el); 1448bf215546Sopenharmony_ci params.dst.addr.offset += offset_B; 1449bf215546Sopenharmony_ci 1450bf215546Sopenharmony_ci const uint32_t width_px = 1451bf215546Sopenharmony_ci u_minify(surf->aux_surf->logical_level0_px.width, level); 1452bf215546Sopenharmony_ci const uint32_t height_px = 1453bf215546Sopenharmony_ci u_minify(surf->aux_surf->logical_level0_px.height, level); 1454bf215546Sopenharmony_ci const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw); 1455bf215546Sopenharmony_ci const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh); 1456bf215546Sopenharmony_ci 1457bf215546Sopenharmony_ci struct isl_tile_info ccs_tile_info; 1458bf215546Sopenharmony_ci isl_surf_get_tile_info(surf->aux_surf, &ccs_tile_info); 1459bf215546Sopenharmony_ci 1460bf215546Sopenharmony_ci /* We're going to map it as a regular RGBA32_UINT surface. We need to 1461bf215546Sopenharmony_ci * downscale a good deal. We start by computing the area on the CCS to 1462bf215546Sopenharmony_ci * clear in units of Y-tiled cache lines. 1463bf215546Sopenharmony_ci */ 1464bf215546Sopenharmony_ci uint32_t x_offset_cl, y_offset_cl, width_cl, height_cl; 1465bf215546Sopenharmony_ci if (ISL_GFX_VER(batch->blorp->isl_dev) >= 8) { 1466bf215546Sopenharmony_ci /* From the Sky Lake PRM Vol. 12 in the section on planes: 1467bf215546Sopenharmony_ci * 1468bf215546Sopenharmony_ci * "The Color Control Surface (CCS) contains the compression status 1469bf215546Sopenharmony_ci * of the cache-line pairs. The compression state of the cache-line 1470bf215546Sopenharmony_ci * pair is specified by 2 bits in the CCS. Each CCS cache-line 1471bf215546Sopenharmony_ci * represents an area on the main surface of 16x16 sets of 128 byte 1472bf215546Sopenharmony_ci * Y-tiled cache-line-pairs. CCS is always Y tiled." 1473bf215546Sopenharmony_ci * 1474bf215546Sopenharmony_ci * Each 2-bit surface element in the CCS corresponds to a single 1475bf215546Sopenharmony_ci * cache-line pair in the main surface. This means that 16x16 el block 1476bf215546Sopenharmony_ci * in the CCS maps to a Y-tiled cache line. Fortunately, CCS layouts 1477bf215546Sopenharmony_ci * are calculated with a very large alignment so we can round up to a 1478bf215546Sopenharmony_ci * whole cache line without worrying about overdraw. 1479bf215546Sopenharmony_ci */ 1480bf215546Sopenharmony_ci 1481bf215546Sopenharmony_ci /* On Broadwell and above, a CCS tile is the same as a Y tile when 1482bf215546Sopenharmony_ci * viewed at the cache-line granularity. Fortunately, the horizontal 1483bf215546Sopenharmony_ci * and vertical alignment requirements of the CCS are such that we can 1484bf215546Sopenharmony_ci * align to an entire cache line without worrying about crossing over 1485bf215546Sopenharmony_ci * from one LOD to another. 1486bf215546Sopenharmony_ci */ 1487bf215546Sopenharmony_ci const uint32_t x_el_per_cl = ccs_tile_info.logical_extent_el.w / 8; 1488bf215546Sopenharmony_ci const uint32_t y_el_per_cl = ccs_tile_info.logical_extent_el.h / 8; 1489bf215546Sopenharmony_ci assert(surf->aux_surf->image_alignment_el.w % x_el_per_cl == 0); 1490bf215546Sopenharmony_ci assert(surf->aux_surf->image_alignment_el.h % y_el_per_cl == 0); 1491bf215546Sopenharmony_ci 1492bf215546Sopenharmony_ci assert(x_offset_el % x_el_per_cl == 0); 1493bf215546Sopenharmony_ci assert(y_offset_el % y_el_per_cl == 0); 1494bf215546Sopenharmony_ci x_offset_cl = x_offset_el / x_el_per_cl; 1495bf215546Sopenharmony_ci y_offset_cl = y_offset_el / y_el_per_cl; 1496bf215546Sopenharmony_ci width_cl = DIV_ROUND_UP(width_el, x_el_per_cl); 1497bf215546Sopenharmony_ci height_cl = DIV_ROUND_UP(height_el, y_el_per_cl); 1498bf215546Sopenharmony_ci } else { 1499bf215546Sopenharmony_ci /* On gfx7, the CCS tiling is not so nice. However, there we are 1500bf215546Sopenharmony_ci * guaranteed that we only have a single level and slice so we don't 1501bf215546Sopenharmony_ci * have to worry about it and can just align to a whole tile. 1502bf215546Sopenharmony_ci */ 1503bf215546Sopenharmony_ci assert(surf->aux_surf->logical_level0_px.depth == 1); 1504bf215546Sopenharmony_ci assert(surf->aux_surf->logical_level0_px.array_len == 1); 1505bf215546Sopenharmony_ci assert(x_offset_el == 0 && y_offset_el == 0); 1506bf215546Sopenharmony_ci const uint32_t width_tl = 1507bf215546Sopenharmony_ci DIV_ROUND_UP(width_el, ccs_tile_info.logical_extent_el.w); 1508bf215546Sopenharmony_ci const uint32_t height_tl = 1509bf215546Sopenharmony_ci DIV_ROUND_UP(height_el, ccs_tile_info.logical_extent_el.h); 1510bf215546Sopenharmony_ci x_offset_cl = 0; 1511bf215546Sopenharmony_ci y_offset_cl = 0; 1512bf215546Sopenharmony_ci width_cl = width_tl * 8; 1513bf215546Sopenharmony_ci height_cl = height_tl * 8; 1514bf215546Sopenharmony_ci } 1515bf215546Sopenharmony_ci 1516bf215546Sopenharmony_ci /* We're going to use a RGBA32 format so as to write data as quickly as 1517bf215546Sopenharmony_ci * possible. A y-tiled cache line will then be 1x4 px. 1518bf215546Sopenharmony_ci */ 1519bf215546Sopenharmony_ci const uint32_t x_offset_rgba_px = x_offset_cl; 1520bf215546Sopenharmony_ci const uint32_t y_offset_rgba_px = y_offset_cl * 4; 1521bf215546Sopenharmony_ci const uint32_t width_rgba_px = width_cl; 1522bf215546Sopenharmony_ci const uint32_t height_rgba_px = height_cl * 4; 1523bf215546Sopenharmony_ci 1524bf215546Sopenharmony_ci ASSERTED bool ok = 1525bf215546Sopenharmony_ci isl_surf_init(batch->blorp->isl_dev, ¶ms.dst.surf, 1526bf215546Sopenharmony_ci .dim = ISL_SURF_DIM_2D, 1527bf215546Sopenharmony_ci .format = ISL_FORMAT_R32G32B32A32_UINT, 1528bf215546Sopenharmony_ci .width = width_rgba_px + x_offset_rgba_px, 1529bf215546Sopenharmony_ci .height = height_rgba_px + y_offset_rgba_px, 1530bf215546Sopenharmony_ci .depth = 1, 1531bf215546Sopenharmony_ci .levels = 1, 1532bf215546Sopenharmony_ci .array_len = 1, 1533bf215546Sopenharmony_ci .samples = 1, 1534bf215546Sopenharmony_ci .row_pitch_B = surf->aux_surf->row_pitch_B, 1535bf215546Sopenharmony_ci .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT, 1536bf215546Sopenharmony_ci .tiling_flags = ISL_TILING_Y0_BIT); 1537bf215546Sopenharmony_ci assert(ok); 1538bf215546Sopenharmony_ci 1539bf215546Sopenharmony_ci params.x0 = x_offset_rgba_px; 1540bf215546Sopenharmony_ci params.y0 = y_offset_rgba_px; 1541bf215546Sopenharmony_ci params.x1 = x_offset_rgba_px + width_rgba_px; 1542bf215546Sopenharmony_ci params.y1 = y_offset_rgba_px + height_rgba_px; 1543bf215546Sopenharmony_ci 1544bf215546Sopenharmony_ci /* A CCS value of 0 means "uncompressed." */ 1545bf215546Sopenharmony_ci memset(¶ms.wm_inputs.clear_color, 0, 1546bf215546Sopenharmony_ci sizeof(params.wm_inputs.clear_color)); 1547bf215546Sopenharmony_ci 1548bf215546Sopenharmony_ci if (!blorp_params_get_clear_kernel(batch, ¶ms, true, false)) 1549bf215546Sopenharmony_ci return; 1550bf215546Sopenharmony_ci 1551bf215546Sopenharmony_ci batch->blorp->exec(batch, ¶ms); 1552bf215546Sopenharmony_ci} 1553