1/* 2 * Copyright © 2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/** 24 * @file crocus_blorp.c 25 * 26 * ============================= GENXML CODE ============================= 27 * [This file is compiled once per generation.] 28 * ======================================================================= 29 * 30 * GenX specific code for working with BLORP (blitting, resolves, clears 31 * on the 3D engine). This provides the driver-specific hooks needed to 32 * implement the BLORP API. 33 * 34 * See crocus_blit.c, crocus_clear.c, and so on. 35 */ 36 37#include <assert.h> 38 39#include "crocus_batch.h" 40#include "crocus_resource.h" 41#include "crocus_context.h" 42 43#include "util/u_upload_mgr.h" 44#include "intel/common/intel_l3_config.h" 45 46#include "blorp/blorp_genX_exec.h" 47 48#if GFX_VER <= 5 49#include "gen4_blorp_exec.h" 50#endif 51 52static uint32_t * 53stream_state(struct crocus_batch *batch, 54 unsigned size, 55 unsigned alignment, 56 uint32_t *out_offset, 57 struct crocus_bo **out_bo) 58{ 59 uint32_t offset = ALIGN(batch->state.used, alignment); 60 61 if (offset + size >= STATE_SZ && !batch->no_wrap) { 62 crocus_batch_flush(batch); 63 offset = ALIGN(batch->state.used, alignment); 64 } else if (offset + size >= batch->state.bo->size) { 65 const unsigned new_size = 66 MIN2(batch->state.bo->size + batch->state.bo->size / 2, 67 MAX_STATE_SIZE); 68 crocus_grow_buffer(batch, true, batch->state.used, new_size); 69 assert(offset + size < batch->state.bo->size); 70 } 71 72 crocus_record_state_size(batch->state_sizes, offset, size); 73 74 batch->state.used = offset + size; 75 *out_offset = offset; 76 77 /* If the caller has asked for a BO, we leave them the responsibility of 78 * adding bo->gtt_offset (say, by handing an address to genxml). If not, 79 * we assume they want the offset from a base address. 80 */ 81 if (out_bo) 82 *out_bo = batch->state.bo; 83 84 return (uint32_t *)batch->state.map + (offset >> 2); 85} 86 87static void * 88blorp_emit_dwords(struct blorp_batch *blorp_batch, unsigned n) 89{ 90 struct crocus_batch *batch = blorp_batch->driver_batch; 91 return crocus_get_command_space(batch, n * sizeof(uint32_t)); 92} 93 94static uint64_t 95blorp_emit_reloc(struct blorp_batch *blorp_batch, UNUSED void *location, 96 struct blorp_address addr, uint32_t delta) 97{ 98 struct crocus_batch *batch = blorp_batch->driver_batch; 99 uint32_t offset; 100 101 if (GFX_VER < 6 && crocus_ptr_in_state_buffer(batch, location)) { 102 offset = (char *)location - (char *)batch->state.map; 103 return crocus_state_reloc(batch, offset, 104 addr.buffer, addr.offset + delta, 105 addr.reloc_flags); 106 } 107 108 assert(!crocus_ptr_in_state_buffer(batch, location)); 109 110 offset = (char *)location - (char *)batch->command.map; 111 return crocus_command_reloc(batch, offset, 112 addr.buffer, addr.offset + delta, 113 addr.reloc_flags); 114} 115 116static void 117blorp_surface_reloc(struct blorp_batch *blorp_batch, uint32_t ss_offset, 118 struct blorp_address addr, uint32_t delta) 119{ 120 struct crocus_batch *batch = blorp_batch->driver_batch; 121 struct crocus_bo *bo = addr.buffer; 122 123 uint64_t reloc_val = 124 crocus_state_reloc(batch, ss_offset, bo, addr.offset + delta, 125 addr.reloc_flags); 126 127 void *reloc_ptr = (void *)batch->state.map + ss_offset; 128 *(uint32_t *)reloc_ptr = reloc_val; 129} 130 131static uint64_t 132blorp_get_surface_address(struct blorp_batch *blorp_batch, 133 struct blorp_address addr) 134{ 135 /* We'll let blorp_surface_reloc write the address. */ 136 return 0ull; 137} 138 139#if GFX_VER >= 7 140static struct blorp_address 141blorp_get_surface_base_address(struct blorp_batch *blorp_batch) 142{ 143 struct crocus_batch *batch = blorp_batch->driver_batch; 144 return (struct blorp_address) { 145 .buffer = batch->state.bo, 146 .offset = 0 147 }; 148} 149#endif 150 151static void * 152blorp_alloc_dynamic_state(struct blorp_batch *blorp_batch, 153 uint32_t size, 154 uint32_t alignment, 155 uint32_t *offset) 156{ 157 struct crocus_batch *batch = blorp_batch->driver_batch; 158 159 return stream_state(batch, size, alignment, offset, NULL); 160} 161 162UNUSED static void * 163blorp_alloc_general_state(struct blorp_batch *blorp_batch, 164 uint32_t size, 165 uint32_t alignment, 166 uint32_t *offset) 167{ 168 /* Use dynamic state range for general state on crocus. */ 169 return blorp_alloc_dynamic_state(blorp_batch, size, alignment, offset); 170} 171 172static void 173blorp_alloc_binding_table(struct blorp_batch *blorp_batch, 174 unsigned num_entries, 175 unsigned state_size, 176 unsigned state_alignment, 177 uint32_t *bt_offset, 178 uint32_t *surface_offsets, 179 void **surface_maps) 180{ 181 struct crocus_batch *batch = blorp_batch->driver_batch; 182 uint32_t *bt_map = stream_state(batch, num_entries * sizeof(uint32_t), 32, 183 bt_offset, NULL); 184 185 for (unsigned i = 0; i < num_entries; i++) { 186 surface_maps[i] = stream_state(batch, 187 state_size, state_alignment, 188 &(surface_offsets)[i], NULL); 189 bt_map[i] = surface_offsets[i]; 190 } 191} 192 193static uint32_t 194blorp_binding_table_offset_to_pointer(struct blorp_batch *batch, 195 uint32_t offset) 196{ 197 return offset; 198} 199 200static void * 201blorp_alloc_vertex_buffer(struct blorp_batch *blorp_batch, 202 uint32_t size, 203 struct blorp_address *addr) 204{ 205 struct crocus_batch *batch = blorp_batch->driver_batch; 206 struct crocus_bo *bo; 207 uint32_t offset; 208 209 void *map = stream_state(batch, size, 64, 210 &offset, &bo); 211 212 *addr = (struct blorp_address) { 213 .buffer = bo, 214 .offset = offset, 215 .reloc_flags = RELOC_32BIT, 216#if GFX_VER >= 7 217 .mocs = crocus_mocs(bo, &batch->screen->isl_dev), 218#endif 219 }; 220 221 return map; 222} 223 224/** 225 */ 226static void 227blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch, 228 const struct blorp_address *addrs, 229 UNUSED uint32_t *sizes, 230 unsigned num_vbs) 231{ 232} 233 234static struct blorp_address 235blorp_get_workaround_address(struct blorp_batch *blorp_batch) 236{ 237 struct crocus_batch *batch = blorp_batch->driver_batch; 238 239 return (struct blorp_address) { 240 .buffer = batch->ice->workaround_bo, 241 .offset = batch->ice->workaround_offset, 242 }; 243} 244 245static void 246blorp_flush_range(UNUSED struct blorp_batch *blorp_batch, 247 UNUSED void *start, 248 UNUSED size_t size) 249{ 250 /* All allocated states come from the batch which we will flush before we 251 * submit it. There's nothing for us to do here. 252 */ 253} 254 255#if GFX_VER >= 7 256static const struct intel_l3_config * 257blorp_get_l3_config(struct blorp_batch *blorp_batch) 258{ 259 struct crocus_batch *batch = blorp_batch->driver_batch; 260 return batch->screen->l3_config_3d; 261} 262#else /* GFX_VER < 7 */ 263static void 264blorp_emit_urb_config(struct blorp_batch *blorp_batch, 265 unsigned vs_entry_size, 266 UNUSED unsigned sf_entry_size) 267{ 268 struct crocus_batch *batch = blorp_batch->driver_batch; 269#if GFX_VER <= 5 270 batch->screen->vtbl.calculate_urb_fence(batch, 0, vs_entry_size, sf_entry_size); 271#else 272 genX(crocus_upload_urb)(batch, vs_entry_size, false, vs_entry_size); 273#endif 274} 275#endif 276 277static void 278crocus_blorp_exec(struct blorp_batch *blorp_batch, 279 const struct blorp_params *params) 280{ 281 struct crocus_context *ice = blorp_batch->blorp->driver_ctx; 282 struct crocus_batch *batch = blorp_batch->driver_batch; 283 284 /* Flush the sampler and render caches. We definitely need to flush the 285 * sampler cache so that we get updated contents from the render cache for 286 * the glBlitFramebuffer() source. Also, we are sometimes warned in the 287 * docs to flush the cache between reinterpretations of the same surface 288 * data with different formats, which blorp does for stencil and depth 289 * data. 290 */ 291 if (params->src.enabled) 292 crocus_cache_flush_for_read(batch, params->src.addr.buffer); 293 if (params->dst.enabled) { 294 crocus_cache_flush_for_render(batch, params->dst.addr.buffer, 295 params->dst.view.format, 296 params->dst.aux_usage); 297 } 298 if (params->depth.enabled) 299 crocus_cache_flush_for_depth(batch, params->depth.addr.buffer); 300 if (params->stencil.enabled) 301 crocus_cache_flush_for_depth(batch, params->stencil.addr.buffer); 302 303 crocus_require_command_space(batch, 1400); 304 crocus_require_statebuffer_space(batch, 600); 305 batch->no_wrap = true; 306 307#if GFX_VER == 8 308 genX(crocus_update_pma_fix)(ice, batch, false); 309#endif 310 311#if GFX_VER == 6 312 /* Emit workaround flushes when we switch from drawing to blorping. */ 313 crocus_emit_post_sync_nonzero_flush(batch); 314#endif 315 316#if GFX_VER >= 6 317 crocus_emit_depth_stall_flushes(batch); 318#endif 319 320 blorp_emit(blorp_batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { 321 rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1; 322 rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1; 323 } 324 325 batch->screen->vtbl.update_surface_base_address(batch); 326 crocus_handle_always_flush_cache(batch); 327 328 batch->contains_draw = true; 329 blorp_exec(blorp_batch, params); 330 331 batch->no_wrap = false; 332 crocus_handle_always_flush_cache(batch); 333 334 /* We've smashed all state compared to what the normal 3D pipeline 335 * rendering tracks for GL. 336 */ 337 338 uint64_t skip_bits = (CROCUS_DIRTY_POLYGON_STIPPLE | 339 CROCUS_DIRTY_GEN7_SO_BUFFERS | 340 CROCUS_DIRTY_SO_DECL_LIST | 341 CROCUS_DIRTY_LINE_STIPPLE | 342 CROCUS_ALL_DIRTY_FOR_COMPUTE | 343 CROCUS_DIRTY_GEN6_SCISSOR_RECT | 344 CROCUS_DIRTY_GEN75_VF | 345 CROCUS_DIRTY_SF_CL_VIEWPORT); 346 347 uint64_t skip_stage_bits = (CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE | 348 CROCUS_STAGE_DIRTY_UNCOMPILED_VS | 349 CROCUS_STAGE_DIRTY_UNCOMPILED_TCS | 350 CROCUS_STAGE_DIRTY_UNCOMPILED_TES | 351 CROCUS_STAGE_DIRTY_UNCOMPILED_GS | 352 CROCUS_STAGE_DIRTY_UNCOMPILED_FS | 353 CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS | 354 CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS | 355 CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES | 356 CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS); 357 358 if (!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]) { 359 /* BLORP disabled tessellation, that's fine for the next draw */ 360 skip_stage_bits |= CROCUS_STAGE_DIRTY_TCS | 361 CROCUS_STAGE_DIRTY_TES | 362 CROCUS_STAGE_DIRTY_CONSTANTS_TCS | 363 CROCUS_STAGE_DIRTY_CONSTANTS_TES | 364 CROCUS_STAGE_DIRTY_BINDINGS_TCS | 365 CROCUS_STAGE_DIRTY_BINDINGS_TES; 366 } 367 368 if (!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]) { 369 /* BLORP disabled geometry shaders, that's fine for the next draw */ 370 skip_stage_bits |= CROCUS_STAGE_DIRTY_GS | 371 CROCUS_STAGE_DIRTY_CONSTANTS_GS | 372 CROCUS_STAGE_DIRTY_BINDINGS_GS; 373 } 374 375 /* we can skip flagging CROCUS_DIRTY_DEPTH_BUFFER, if 376 * BLORP_BATCH_NO_EMIT_DEPTH_STENCIL is set. 377 */ 378 if (blorp_batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL) 379 skip_bits |= CROCUS_DIRTY_DEPTH_BUFFER; 380 381 if (!params->wm_prog_data) 382 skip_bits |= CROCUS_DIRTY_GEN6_BLEND_STATE; 383 384 ice->state.dirty |= ~skip_bits; 385 ice->state.stage_dirty |= ~skip_stage_bits; 386 387 ice->urb.vsize = 0; 388 ice->urb.gs_present = false; 389 ice->urb.gsize = 0; 390 ice->urb.tess_present = false; 391 ice->urb.hsize = 0; 392 ice->urb.dsize = 0; 393 394 if (params->dst.enabled) { 395 crocus_render_cache_add_bo(batch, params->dst.addr.buffer, 396 params->dst.view.format, 397 params->dst.aux_usage); 398 } 399 if (params->depth.enabled) 400 crocus_depth_cache_add_bo(batch, params->depth.addr.buffer); 401 if (params->stencil.enabled) 402 crocus_depth_cache_add_bo(batch, params->stencil.addr.buffer); 403} 404 405static void 406blorp_measure_start(struct blorp_batch *blorp_batch, 407 const struct blorp_params *params) 408{ 409} 410 411static void 412blorp_measure_end(struct blorp_batch *blorp_batch, 413 const struct blorp_params *params) 414{ 415} 416 417void 418genX(crocus_init_blorp)(struct crocus_context *ice) 419{ 420 struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 421 422 blorp_init(&ice->blorp, ice, &screen->isl_dev, NULL); 423 ice->blorp.compiler = screen->compiler; 424 ice->blorp.lookup_shader = crocus_blorp_lookup_shader; 425 ice->blorp.upload_shader = crocus_blorp_upload_shader; 426 ice->blorp.exec = crocus_blorp_exec; 427} 428