1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25
26#include "anv_private.h"
27#include "anv_measure.h"
28
29/* These are defined in anv_private.h and blorp_genX_exec.h */
30#undef __gen_address_type
31#undef __gen_user_data
32#undef __gen_combine_address
33
34#include "common/intel_l3_config.h"
35#include "blorp/blorp_genX_exec.h"
36
37#include "ds/intel_tracepoints.h"
38
39static void blorp_measure_start(struct blorp_batch *_batch,
40                                const struct blorp_params *params)
41{
42   struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
43   trace_intel_begin_blorp(&cmd_buffer->trace);
44   anv_measure_snapshot(cmd_buffer,
45                        params->snapshot_type,
46                        NULL, 0);
47}
48
49static void blorp_measure_end(struct blorp_batch *_batch,
50                              const struct blorp_params *params)
51{
52   struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
53   trace_intel_end_blorp(&cmd_buffer->trace,
54                         params->x1 - params->x0,
55                         params->y1 - params->y0,
56                         params->hiz_op,
57                         params->fast_clear_op,
58                         params->shader_type,
59                         params->shader_pipeline);
60}
61
62static void *
63blorp_emit_dwords(struct blorp_batch *batch, unsigned n)
64{
65   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
66   return anv_batch_emit_dwords(&cmd_buffer->batch, n);
67}
68
69static uint64_t
70blorp_emit_reloc(struct blorp_batch *batch,
71                 void *location, struct blorp_address address, uint32_t delta)
72{
73   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
74   assert(cmd_buffer->batch.start <= location &&
75          location < cmd_buffer->batch.end);
76   return anv_batch_emit_reloc(&cmd_buffer->batch, location,
77                               address.buffer, address.offset + delta);
78}
79
80static void
81blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
82                    struct blorp_address address, uint32_t delta)
83{
84   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
85   VkResult result;
86
87   if (ANV_ALWAYS_SOFTPIN) {
88      result = anv_reloc_list_add_bo(&cmd_buffer->surface_relocs,
89                                     &cmd_buffer->vk.pool->alloc,
90                                     address.buffer);
91      if (unlikely(result != VK_SUCCESS))
92         anv_batch_set_error(&cmd_buffer->batch, result);
93      return;
94   }
95
96   uint64_t address_u64 = 0;
97   result = anv_reloc_list_add(&cmd_buffer->surface_relocs,
98                               &cmd_buffer->vk.pool->alloc,
99                               ss_offset, address.buffer,
100                               address.offset + delta,
101                               &address_u64);
102   if (result != VK_SUCCESS)
103      anv_batch_set_error(&cmd_buffer->batch, result);
104
105   void *dest = anv_block_pool_map(
106      &cmd_buffer->device->surface_state_pool.block_pool, ss_offset, 8);
107   write_reloc(cmd_buffer->device, dest, address_u64, false);
108}
109
110static uint64_t
111blorp_get_surface_address(struct blorp_batch *blorp_batch,
112                          struct blorp_address address)
113{
114   if (ANV_ALWAYS_SOFTPIN) {
115      struct anv_address anv_addr = {
116         .bo = address.buffer,
117         .offset = address.offset,
118      };
119      return anv_address_physical(anv_addr);
120   } else {
121      /* We'll let blorp_surface_reloc write the address. */
122      return 0;
123   }
124}
125
126#if GFX_VER >= 7 && GFX_VER < 10
127static struct blorp_address
128blorp_get_surface_base_address(struct blorp_batch *batch)
129{
130   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
131   return (struct blorp_address) {
132      .buffer = cmd_buffer->device->surface_state_pool.block_pool.bo,
133      .offset = 0,
134   };
135}
136#endif
137
138static void *
139blorp_alloc_dynamic_state(struct blorp_batch *batch,
140                          uint32_t size,
141                          uint32_t alignment,
142                          uint32_t *offset)
143{
144   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
145
146   struct anv_state state =
147      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
148
149   *offset = state.offset;
150   return state.map;
151}
152
153UNUSED static void *
154blorp_alloc_general_state(struct blorp_batch *batch,
155                          uint32_t size,
156                          uint32_t alignment,
157                          uint32_t *offset)
158{
159   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
160
161   struct anv_state state =
162      anv_state_stream_alloc(&cmd_buffer->general_state_stream, size,
163                             alignment);
164
165   *offset = state.offset;
166   return state.map;
167}
168
169static void
170blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
171                          unsigned state_size, unsigned state_alignment,
172                          uint32_t *bt_offset,
173                          uint32_t *surface_offsets, void **surface_maps)
174{
175   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
176
177   uint32_t state_offset;
178   struct anv_state bt_state;
179
180   VkResult result =
181      anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, num_entries,
182                                               &state_offset, &bt_state);
183   if (result != VK_SUCCESS)
184      return;
185
186   uint32_t *bt_map = bt_state.map;
187   *bt_offset = bt_state.offset;
188
189   for (unsigned i = 0; i < num_entries; i++) {
190      struct anv_state surface_state =
191         anv_cmd_buffer_alloc_surface_state(cmd_buffer);
192      bt_map[i] = surface_state.offset + state_offset;
193      surface_offsets[i] = surface_state.offset;
194      surface_maps[i] = surface_state.map;
195   }
196}
197
198static uint32_t
199blorp_binding_table_offset_to_pointer(struct blorp_batch *batch,
200                                      uint32_t offset)
201{
202   return offset;
203}
204
205static void *
206blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
207                          struct blorp_address *addr)
208{
209   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
210   struct anv_state vb_state =
211      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 64);
212
213   *addr = (struct blorp_address) {
214      .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
215      .offset = vb_state.offset,
216      .mocs = isl_mocs(&cmd_buffer->device->isl_dev,
217                       ISL_SURF_USAGE_VERTEX_BUFFER_BIT, false),
218   };
219
220   return vb_state.map;
221}
222
223static void
224blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
225                                           const struct blorp_address *addrs,
226                                           uint32_t *sizes,
227                                           unsigned num_vbs)
228{
229   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
230
231   for (unsigned i = 0; i < num_vbs; i++) {
232      struct anv_address anv_addr = {
233         .bo = addrs[i].buffer,
234         .offset = addrs[i].offset,
235      };
236      genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer,
237                                                     i, anv_addr, sizes[i]);
238   }
239
240   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
241
242   /* Technically, we should call this *after* 3DPRIMITIVE but it doesn't
243    * really matter for blorp because we never call apply_pipe_flushes after
244    * this point.
245    */
246   genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(cmd_buffer, SEQUENTIAL,
247                                                       (1 << num_vbs) - 1);
248}
249
250UNUSED static struct blorp_address
251blorp_get_workaround_address(struct blorp_batch *batch)
252{
253   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
254
255   return (struct blorp_address) {
256      .buffer = cmd_buffer->device->workaround_address.bo,
257      .offset = cmd_buffer->device->workaround_address.offset,
258   };
259}
260
261static void
262blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
263{
264   /* We don't need to flush states anymore, since everything will be snooped.
265    */
266}
267
268static const struct intel_l3_config *
269blorp_get_l3_config(struct blorp_batch *batch)
270{
271   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
272   return cmd_buffer->state.current_l3_config;
273}
274
275static void
276blorp_exec_on_render(struct blorp_batch *batch,
277                     const struct blorp_params *params)
278{
279   assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
280
281   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
282   assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT);
283
284   const unsigned scale = params->fast_clear_op ? UINT_MAX : 1;
285   genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, params->x1 - params->x0,
286                                      params->y1 - params->y0, scale);
287
288#if GFX_VER >= 11
289   /* The PIPE_CONTROL command description says:
290    *
291    *    "Whenever a Binding Table Index (BTI) used by a Render Target Message
292    *     points to a different RENDER_SURFACE_STATE, SW must issue a Render
293    *     Target Cache Flush by enabling this bit. When render target flush
294    *     is set due to new association of BTI, PS Scoreboard Stall bit must
295    *     be set in this packet."
296    */
297   anv_add_pending_pipe_bits(cmd_buffer,
298                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
299                             ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
300                             "before blorp BTI change");
301#endif
302
303   if (params->depth.enabled &&
304       !(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL))
305      genX(cmd_buffer_emit_gfx12_depth_wa)(cmd_buffer, &params->depth.surf);
306
307   genX(flush_pipeline_select_3d)(cmd_buffer);
308
309   /* Apply any outstanding flushes in case pipeline select haven't. */
310   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
311
312   genX(cmd_buffer_emit_gfx7_depth_flush)(cmd_buffer);
313
314   /* BLORP doesn't do anything fancy with depth such as discards, so we want
315    * the PMA fix off.  Also, off is always the safe option.
316    */
317   genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false);
318
319   blorp_exec(batch, params);
320
321#if GFX_VER >= 11
322   /* The PIPE_CONTROL command description says:
323    *
324    *    "Whenever a Binding Table Index (BTI) used by a Render Target Message
325    *     points to a different RENDER_SURFACE_STATE, SW must issue a Render
326    *     Target Cache Flush by enabling this bit. When render target flush
327    *     is set due to new association of BTI, PS Scoreboard Stall bit must
328    *     be set in this packet."
329    */
330   anv_add_pending_pipe_bits(cmd_buffer,
331                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
332                             ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
333                             "after blorp BTI change");
334#endif
335
336   /* Calculate state that does not get touched by blorp.
337    * Flush everything else.
338    */
339   anv_cmd_dirty_mask_t dirty = ~(ANV_CMD_DIRTY_INDEX_BUFFER |
340                                  ANV_CMD_DIRTY_XFB_ENABLE);
341
342   BITSET_DECLARE(dyn_dirty, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX);
343   BITSET_ONES(dyn_dirty);
344   BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE);
345   BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSOR_COUNT);
346   BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSORS);
347   BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE);
348   BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_FSR);
349   BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS);
350   if (!params->wm_prog_data) {
351      BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES);
352      BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP);
353   }
354
355   cmd_buffer->state.gfx.vb_dirty = ~0;
356   cmd_buffer->state.gfx.dirty |= dirty;
357   BITSET_OR(cmd_buffer->vk.dynamic_graphics_state.dirty,
358             cmd_buffer->vk.dynamic_graphics_state.dirty, dyn_dirty);
359   cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
360}
361
362static void
363blorp_exec_on_compute(struct blorp_batch *batch,
364                      const struct blorp_params *params)
365{
366   assert(batch->flags & BLORP_BATCH_USE_COMPUTE);
367
368   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
369   assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_COMPUTE_BIT);
370
371   genX(flush_pipeline_select_gpgpu)(cmd_buffer);
372
373   /* Apply any outstanding flushes in case pipeline select haven't. */
374   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
375
376   blorp_exec(batch, params);
377
378   cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
379}
380
381void
382genX(blorp_exec)(struct blorp_batch *batch,
383                 const struct blorp_params *params)
384{
385   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
386
387   if (!cmd_buffer->state.current_l3_config) {
388      const struct intel_l3_config *cfg =
389         intel_get_default_l3_config(&cmd_buffer->device->info);
390      genX(cmd_buffer_config_l3)(cmd_buffer, cfg);
391   }
392
393#if GFX_VER == 7
394   /* The MI_LOAD/STORE_REGISTER_MEM commands which BLORP uses to implement
395    * indirect fast-clear colors can cause GPU hangs if we don't stall first.
396    * See genX(cmd_buffer_mi_memcpy) for more details.
397    */
398   if (params->src.clear_color_addr.buffer ||
399       params->dst.clear_color_addr.buffer) {
400      anv_add_pending_pipe_bits(cmd_buffer,
401                                ANV_PIPE_CS_STALL_BIT,
402                                "before blorp prep fast clear");
403   }
404#endif
405
406   if (batch->flags & BLORP_BATCH_USE_COMPUTE)
407      blorp_exec_on_compute(batch, params);
408   else
409      blorp_exec_on_render(batch, params);
410}
411