1/* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25 26#include "anv_private.h" 27#include "anv_measure.h" 28 29/* These are defined in anv_private.h and blorp_genX_exec.h */ 30#undef __gen_address_type 31#undef __gen_user_data 32#undef __gen_combine_address 33 34#include "common/intel_l3_config.h" 35#include "blorp/blorp_genX_exec.h" 36 37#include "ds/intel_tracepoints.h" 38 39static void blorp_measure_start(struct blorp_batch *_batch, 40 const struct blorp_params *params) 41{ 42 struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch; 43 trace_intel_begin_blorp(&cmd_buffer->trace); 44 anv_measure_snapshot(cmd_buffer, 45 params->snapshot_type, 46 NULL, 0); 47} 48 49static void blorp_measure_end(struct blorp_batch *_batch, 50 const struct blorp_params *params) 51{ 52 struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch; 53 trace_intel_end_blorp(&cmd_buffer->trace, 54 params->x1 - params->x0, 55 params->y1 - params->y0, 56 params->hiz_op, 57 params->fast_clear_op, 58 params->shader_type, 59 params->shader_pipeline); 60} 61 62static void * 63blorp_emit_dwords(struct blorp_batch *batch, unsigned n) 64{ 65 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 66 return anv_batch_emit_dwords(&cmd_buffer->batch, n); 67} 68 69static uint64_t 70blorp_emit_reloc(struct blorp_batch *batch, 71 void *location, struct blorp_address address, uint32_t delta) 72{ 73 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 74 assert(cmd_buffer->batch.start <= location && 75 location < cmd_buffer->batch.end); 76 return anv_batch_emit_reloc(&cmd_buffer->batch, location, 77 address.buffer, address.offset + delta); 78} 79 80static void 81blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, 82 struct blorp_address address, uint32_t delta) 83{ 84 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 85 VkResult result; 86 87 if (ANV_ALWAYS_SOFTPIN) { 88 result = anv_reloc_list_add_bo(&cmd_buffer->surface_relocs, 89 &cmd_buffer->vk.pool->alloc, 90 address.buffer); 91 if (unlikely(result != VK_SUCCESS)) 92 anv_batch_set_error(&cmd_buffer->batch, result); 93 return; 94 } 95 96 uint64_t address_u64 = 0; 97 result = anv_reloc_list_add(&cmd_buffer->surface_relocs, 98 &cmd_buffer->vk.pool->alloc, 99 ss_offset, address.buffer, 100 address.offset + delta, 101 &address_u64); 102 if (result != VK_SUCCESS) 103 anv_batch_set_error(&cmd_buffer->batch, result); 104 105 void *dest = anv_block_pool_map( 106 &cmd_buffer->device->surface_state_pool.block_pool, ss_offset, 8); 107 write_reloc(cmd_buffer->device, dest, address_u64, false); 108} 109 110static uint64_t 111blorp_get_surface_address(struct blorp_batch *blorp_batch, 112 struct blorp_address address) 113{ 114 if (ANV_ALWAYS_SOFTPIN) { 115 struct anv_address anv_addr = { 116 .bo = address.buffer, 117 .offset = address.offset, 118 }; 119 return anv_address_physical(anv_addr); 120 } else { 121 /* We'll let blorp_surface_reloc write the address. */ 122 return 0; 123 } 124} 125 126#if GFX_VER >= 7 && GFX_VER < 10 127static struct blorp_address 128blorp_get_surface_base_address(struct blorp_batch *batch) 129{ 130 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 131 return (struct blorp_address) { 132 .buffer = cmd_buffer->device->surface_state_pool.block_pool.bo, 133 .offset = 0, 134 }; 135} 136#endif 137 138static void * 139blorp_alloc_dynamic_state(struct blorp_batch *batch, 140 uint32_t size, 141 uint32_t alignment, 142 uint32_t *offset) 143{ 144 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 145 146 struct anv_state state = 147 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment); 148 149 *offset = state.offset; 150 return state.map; 151} 152 153UNUSED static void * 154blorp_alloc_general_state(struct blorp_batch *batch, 155 uint32_t size, 156 uint32_t alignment, 157 uint32_t *offset) 158{ 159 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 160 161 struct anv_state state = 162 anv_state_stream_alloc(&cmd_buffer->general_state_stream, size, 163 alignment); 164 165 *offset = state.offset; 166 return state.map; 167} 168 169static void 170blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries, 171 unsigned state_size, unsigned state_alignment, 172 uint32_t *bt_offset, 173 uint32_t *surface_offsets, void **surface_maps) 174{ 175 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 176 177 uint32_t state_offset; 178 struct anv_state bt_state; 179 180 VkResult result = 181 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, num_entries, 182 &state_offset, &bt_state); 183 if (result != VK_SUCCESS) 184 return; 185 186 uint32_t *bt_map = bt_state.map; 187 *bt_offset = bt_state.offset; 188 189 for (unsigned i = 0; i < num_entries; i++) { 190 struct anv_state surface_state = 191 anv_cmd_buffer_alloc_surface_state(cmd_buffer); 192 bt_map[i] = surface_state.offset + state_offset; 193 surface_offsets[i] = surface_state.offset; 194 surface_maps[i] = surface_state.map; 195 } 196} 197 198static uint32_t 199blorp_binding_table_offset_to_pointer(struct blorp_batch *batch, 200 uint32_t offset) 201{ 202 return offset; 203} 204 205static void * 206blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size, 207 struct blorp_address *addr) 208{ 209 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 210 struct anv_state vb_state = 211 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 64); 212 213 *addr = (struct blorp_address) { 214 .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo, 215 .offset = vb_state.offset, 216 .mocs = isl_mocs(&cmd_buffer->device->isl_dev, 217 ISL_SURF_USAGE_VERTEX_BUFFER_BIT, false), 218 }; 219 220 return vb_state.map; 221} 222 223static void 224blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch, 225 const struct blorp_address *addrs, 226 uint32_t *sizes, 227 unsigned num_vbs) 228{ 229 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 230 231 for (unsigned i = 0; i < num_vbs; i++) { 232 struct anv_address anv_addr = { 233 .bo = addrs[i].buffer, 234 .offset = addrs[i].offset, 235 }; 236 genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, 237 i, anv_addr, sizes[i]); 238 } 239 240 genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); 241 242 /* Technically, we should call this *after* 3DPRIMITIVE but it doesn't 243 * really matter for blorp because we never call apply_pipe_flushes after 244 * this point. 245 */ 246 genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(cmd_buffer, SEQUENTIAL, 247 (1 << num_vbs) - 1); 248} 249 250UNUSED static struct blorp_address 251blorp_get_workaround_address(struct blorp_batch *batch) 252{ 253 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 254 255 return (struct blorp_address) { 256 .buffer = cmd_buffer->device->workaround_address.bo, 257 .offset = cmd_buffer->device->workaround_address.offset, 258 }; 259} 260 261static void 262blorp_flush_range(struct blorp_batch *batch, void *start, size_t size) 263{ 264 /* We don't need to flush states anymore, since everything will be snooped. 265 */ 266} 267 268static const struct intel_l3_config * 269blorp_get_l3_config(struct blorp_batch *batch) 270{ 271 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 272 return cmd_buffer->state.current_l3_config; 273} 274 275static void 276blorp_exec_on_render(struct blorp_batch *batch, 277 const struct blorp_params *params) 278{ 279 assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0); 280 281 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 282 assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT); 283 284 const unsigned scale = params->fast_clear_op ? UINT_MAX : 1; 285 genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, params->x1 - params->x0, 286 params->y1 - params->y0, scale); 287 288#if GFX_VER >= 11 289 /* The PIPE_CONTROL command description says: 290 * 291 * "Whenever a Binding Table Index (BTI) used by a Render Target Message 292 * points to a different RENDER_SURFACE_STATE, SW must issue a Render 293 * Target Cache Flush by enabling this bit. When render target flush 294 * is set due to new association of BTI, PS Scoreboard Stall bit must 295 * be set in this packet." 296 */ 297 anv_add_pending_pipe_bits(cmd_buffer, 298 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | 299 ANV_PIPE_STALL_AT_SCOREBOARD_BIT, 300 "before blorp BTI change"); 301#endif 302 303 if (params->depth.enabled && 304 !(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL)) 305 genX(cmd_buffer_emit_gfx12_depth_wa)(cmd_buffer, ¶ms->depth.surf); 306 307 genX(flush_pipeline_select_3d)(cmd_buffer); 308 309 /* Apply any outstanding flushes in case pipeline select haven't. */ 310 genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); 311 312 genX(cmd_buffer_emit_gfx7_depth_flush)(cmd_buffer); 313 314 /* BLORP doesn't do anything fancy with depth such as discards, so we want 315 * the PMA fix off. Also, off is always the safe option. 316 */ 317 genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false); 318 319 blorp_exec(batch, params); 320 321#if GFX_VER >= 11 322 /* The PIPE_CONTROL command description says: 323 * 324 * "Whenever a Binding Table Index (BTI) used by a Render Target Message 325 * points to a different RENDER_SURFACE_STATE, SW must issue a Render 326 * Target Cache Flush by enabling this bit. When render target flush 327 * is set due to new association of BTI, PS Scoreboard Stall bit must 328 * be set in this packet." 329 */ 330 anv_add_pending_pipe_bits(cmd_buffer, 331 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | 332 ANV_PIPE_STALL_AT_SCOREBOARD_BIT, 333 "after blorp BTI change"); 334#endif 335 336 /* Calculate state that does not get touched by blorp. 337 * Flush everything else. 338 */ 339 anv_cmd_dirty_mask_t dirty = ~(ANV_CMD_DIRTY_INDEX_BUFFER | 340 ANV_CMD_DIRTY_XFB_ENABLE); 341 342 BITSET_DECLARE(dyn_dirty, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX); 343 BITSET_ONES(dyn_dirty); 344 BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE); 345 BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSOR_COUNT); 346 BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSORS); 347 BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE); 348 BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_FSR); 349 BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS); 350 if (!params->wm_prog_data) { 351 BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES); 352 BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP); 353 } 354 355 cmd_buffer->state.gfx.vb_dirty = ~0; 356 cmd_buffer->state.gfx.dirty |= dirty; 357 BITSET_OR(cmd_buffer->vk.dynamic_graphics_state.dirty, 358 cmd_buffer->vk.dynamic_graphics_state.dirty, dyn_dirty); 359 cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; 360} 361 362static void 363blorp_exec_on_compute(struct blorp_batch *batch, 364 const struct blorp_params *params) 365{ 366 assert(batch->flags & BLORP_BATCH_USE_COMPUTE); 367 368 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 369 assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_COMPUTE_BIT); 370 371 genX(flush_pipeline_select_gpgpu)(cmd_buffer); 372 373 /* Apply any outstanding flushes in case pipeline select haven't. */ 374 genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); 375 376 blorp_exec(batch, params); 377 378 cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; 379} 380 381void 382genX(blorp_exec)(struct blorp_batch *batch, 383 const struct blorp_params *params) 384{ 385 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; 386 387 if (!cmd_buffer->state.current_l3_config) { 388 const struct intel_l3_config *cfg = 389 intel_get_default_l3_config(&cmd_buffer->device->info); 390 genX(cmd_buffer_config_l3)(cmd_buffer, cfg); 391 } 392 393#if GFX_VER == 7 394 /* The MI_LOAD/STORE_REGISTER_MEM commands which BLORP uses to implement 395 * indirect fast-clear colors can cause GPU hangs if we don't stall first. 396 * See genX(cmd_buffer_mi_memcpy) for more details. 397 */ 398 if (params->src.clear_color_addr.buffer || 399 params->dst.clear_color_addr.buffer) { 400 anv_add_pending_pipe_bits(cmd_buffer, 401 ANV_PIPE_CS_STALL_BIT, 402 "before blorp prep fast clear"); 403 } 404#endif 405 406 if (batch->flags & BLORP_BATCH_USE_COMPUTE) 407 blorp_exec_on_compute(batch, params); 408 else 409 blorp_exec_on_render(batch, params); 410} 411