1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2015 Intel Corporation
3bf215546Sopenharmony_ci * SPDX-License-Identifier: MIT
4bf215546Sopenharmony_ci */
5bf215546Sopenharmony_ci
6bf215546Sopenharmony_ci#include "tu_util.h"
7bf215546Sopenharmony_ci
8bf215546Sopenharmony_ci#include <errno.h>
9bf215546Sopenharmony_ci#include <stdarg.h>
10bf215546Sopenharmony_ci
11bf215546Sopenharmony_ci#include "util/u_math.h"
12bf215546Sopenharmony_ci#include "util/timespec.h"
13bf215546Sopenharmony_ci#include "vk_enum_to_str.h"
14bf215546Sopenharmony_ci
15bf215546Sopenharmony_ci#include "tu_device.h"
16bf215546Sopenharmony_ci#include "tu_pass.h"
17bf215546Sopenharmony_ci
18bf215546Sopenharmony_civoid PRINTFLIKE(3, 4)
19bf215546Sopenharmony_ci   __tu_finishme(const char *file, int line, const char *format, ...)
20bf215546Sopenharmony_ci{
21bf215546Sopenharmony_ci   va_list ap;
22bf215546Sopenharmony_ci   char buffer[256];
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci   va_start(ap, format);
25bf215546Sopenharmony_ci   vsnprintf(buffer, sizeof(buffer), format, ap);
26bf215546Sopenharmony_ci   va_end(ap);
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci   mesa_loge("%s:%d: FINISHME: %s\n", file, line, buffer);
29bf215546Sopenharmony_ci}
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ciVkResult
32bf215546Sopenharmony_ci__vk_startup_errorf(struct tu_instance *instance,
33bf215546Sopenharmony_ci                    VkResult error,
34bf215546Sopenharmony_ci                    bool always_print,
35bf215546Sopenharmony_ci                    const char *file,
36bf215546Sopenharmony_ci                    int line,
37bf215546Sopenharmony_ci                    const char *format,
38bf215546Sopenharmony_ci                    ...)
39bf215546Sopenharmony_ci{
40bf215546Sopenharmony_ci   va_list ap;
41bf215546Sopenharmony_ci   char buffer[256];
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci   const char *error_str = vk_Result_to_str(error);
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_ci#ifndef DEBUG
46bf215546Sopenharmony_ci   if (!always_print)
47bf215546Sopenharmony_ci      return error;
48bf215546Sopenharmony_ci#endif
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_ci   if (format) {
51bf215546Sopenharmony_ci      va_start(ap, format);
52bf215546Sopenharmony_ci      vsnprintf(buffer, sizeof(buffer), format, ap);
53bf215546Sopenharmony_ci      va_end(ap);
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci      mesa_loge("%s:%d: %s (%s)\n", file, line, buffer, error_str);
56bf215546Sopenharmony_ci   } else {
57bf215546Sopenharmony_ci      mesa_loge("%s:%d: %s\n", file, line, error_str);
58bf215546Sopenharmony_ci   }
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ci   return error;
61bf215546Sopenharmony_ci}
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_cistatic void
64bf215546Sopenharmony_citu_tiling_config_update_tile_layout(struct tu_framebuffer *fb,
65bf215546Sopenharmony_ci                                    const struct tu_device *dev,
66bf215546Sopenharmony_ci                                    const struct tu_render_pass *pass,
67bf215546Sopenharmony_ci                                    enum tu_gmem_layout gmem_layout)
68bf215546Sopenharmony_ci{
69bf215546Sopenharmony_ci   const uint32_t tile_align_w = pass->tile_align_w;
70bf215546Sopenharmony_ci   const uint32_t tile_align_h = dev->physical_device->info->tile_align_h;
71bf215546Sopenharmony_ci   const uint32_t max_tile_width = dev->physical_device->info->tile_max_w;
72bf215546Sopenharmony_ci   const uint32_t max_tile_height = dev->physical_device->info->tile_max_h;
73bf215546Sopenharmony_ci   struct tu_tiling_config *tiling = &fb->tiling[gmem_layout];
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci   /* start from 1 tile */
76bf215546Sopenharmony_ci   tiling->tile_count = (VkExtent2D) {
77bf215546Sopenharmony_ci      .width = 1,
78bf215546Sopenharmony_ci      .height = 1,
79bf215546Sopenharmony_ci   };
80bf215546Sopenharmony_ci   tiling->tile0 = (VkExtent2D) {
81bf215546Sopenharmony_ci      .width = util_align_npot(fb->width, tile_align_w),
82bf215546Sopenharmony_ci      .height = align(fb->height, tile_align_h),
83bf215546Sopenharmony_ci   };
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci   /* will force to sysmem, don't bother trying to have a valid tile config
86bf215546Sopenharmony_ci    * TODO: just skip all GMEM stuff when sysmem is forced?
87bf215546Sopenharmony_ci    */
88bf215546Sopenharmony_ci   if (!pass->gmem_pixels[gmem_layout])
89bf215546Sopenharmony_ci      return;
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_ci   if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) {
92bf215546Sopenharmony_ci      /* start with 2x2 tiles */
93bf215546Sopenharmony_ci      tiling->tile_count.width = 2;
94bf215546Sopenharmony_ci      tiling->tile_count.height = 2;
95bf215546Sopenharmony_ci      tiling->tile0.width = util_align_npot(DIV_ROUND_UP(fb->width, 2), tile_align_w);
96bf215546Sopenharmony_ci      tiling->tile0.height = align(DIV_ROUND_UP(fb->height, 2), tile_align_h);
97bf215546Sopenharmony_ci   }
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci   /* do not exceed max tile width */
100bf215546Sopenharmony_ci   while (tiling->tile0.width > max_tile_width) {
101bf215546Sopenharmony_ci      tiling->tile_count.width++;
102bf215546Sopenharmony_ci      tiling->tile0.width =
103bf215546Sopenharmony_ci         util_align_npot(DIV_ROUND_UP(fb->width, tiling->tile_count.width), tile_align_w);
104bf215546Sopenharmony_ci   }
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci   /* do not exceed max tile height */
107bf215546Sopenharmony_ci   while (tiling->tile0.height > max_tile_height) {
108bf215546Sopenharmony_ci      tiling->tile_count.height++;
109bf215546Sopenharmony_ci      tiling->tile0.height =
110bf215546Sopenharmony_ci         util_align_npot(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h);
111bf215546Sopenharmony_ci   }
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci   /* do not exceed gmem size */
114bf215546Sopenharmony_ci   while (tiling->tile0.width * tiling->tile0.height > pass->gmem_pixels[gmem_layout]) {
115bf215546Sopenharmony_ci      if (tiling->tile0.width > MAX2(tile_align_w, tiling->tile0.height)) {
116bf215546Sopenharmony_ci         tiling->tile_count.width++;
117bf215546Sopenharmony_ci         tiling->tile0.width =
118bf215546Sopenharmony_ci            util_align_npot(DIV_ROUND_UP(fb->width, tiling->tile_count.width), tile_align_w);
119bf215546Sopenharmony_ci      } else {
120bf215546Sopenharmony_ci         /* if this assert fails then layout is impossible.. */
121bf215546Sopenharmony_ci         assert(tiling->tile0.height > tile_align_h);
122bf215546Sopenharmony_ci         tiling->tile_count.height++;
123bf215546Sopenharmony_ci         tiling->tile0.height =
124bf215546Sopenharmony_ci            align(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h);
125bf215546Sopenharmony_ci      }
126bf215546Sopenharmony_ci   }
127bf215546Sopenharmony_ci}
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_cistatic void
130bf215546Sopenharmony_citu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling,
131bf215546Sopenharmony_ci                                    const struct tu_device *dev)
132bf215546Sopenharmony_ci{
133bf215546Sopenharmony_ci   const uint32_t max_pipe_count = 32; /* A6xx */
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci   /* start from 1 tile per pipe */
136bf215546Sopenharmony_ci   tiling->pipe0 = (VkExtent2D) {
137bf215546Sopenharmony_ci      .width = 1,
138bf215546Sopenharmony_ci      .height = 1,
139bf215546Sopenharmony_ci   };
140bf215546Sopenharmony_ci   tiling->pipe_count = tiling->tile_count;
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci   while (tiling->pipe_count.width * tiling->pipe_count.height > max_pipe_count) {
143bf215546Sopenharmony_ci      if (tiling->pipe0.width < tiling->pipe0.height) {
144bf215546Sopenharmony_ci         tiling->pipe0.width += 1;
145bf215546Sopenharmony_ci         tiling->pipe_count.width =
146bf215546Sopenharmony_ci            DIV_ROUND_UP(tiling->tile_count.width, tiling->pipe0.width);
147bf215546Sopenharmony_ci      } else {
148bf215546Sopenharmony_ci         tiling->pipe0.height += 1;
149bf215546Sopenharmony_ci         tiling->pipe_count.height =
150bf215546Sopenharmony_ci            DIV_ROUND_UP(tiling->tile_count.height, tiling->pipe0.height);
151bf215546Sopenharmony_ci      }
152bf215546Sopenharmony_ci   }
153bf215546Sopenharmony_ci}
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_cistatic void
156bf215546Sopenharmony_citu_tiling_config_update_pipes(struct tu_tiling_config *tiling,
157bf215546Sopenharmony_ci                              const struct tu_device *dev)
158bf215546Sopenharmony_ci{
159bf215546Sopenharmony_ci   const uint32_t max_pipe_count = 32; /* A6xx */
160bf215546Sopenharmony_ci   const uint32_t used_pipe_count =
161bf215546Sopenharmony_ci      tiling->pipe_count.width * tiling->pipe_count.height;
162bf215546Sopenharmony_ci   const VkExtent2D last_pipe = {
163bf215546Sopenharmony_ci      .width = (tiling->tile_count.width - 1) % tiling->pipe0.width + 1,
164bf215546Sopenharmony_ci      .height = (tiling->tile_count.height - 1) % tiling->pipe0.height + 1,
165bf215546Sopenharmony_ci   };
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci   assert(used_pipe_count <= max_pipe_count);
168bf215546Sopenharmony_ci   assert(max_pipe_count <= ARRAY_SIZE(tiling->pipe_config));
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_ci   for (uint32_t y = 0; y < tiling->pipe_count.height; y++) {
171bf215546Sopenharmony_ci      for (uint32_t x = 0; x < tiling->pipe_count.width; x++) {
172bf215546Sopenharmony_ci         const uint32_t pipe_x = tiling->pipe0.width * x;
173bf215546Sopenharmony_ci         const uint32_t pipe_y = tiling->pipe0.height * y;
174bf215546Sopenharmony_ci         const uint32_t pipe_w = (x == tiling->pipe_count.width - 1)
175bf215546Sopenharmony_ci                                    ? last_pipe.width
176bf215546Sopenharmony_ci                                    : tiling->pipe0.width;
177bf215546Sopenharmony_ci         const uint32_t pipe_h = (y == tiling->pipe_count.height - 1)
178bf215546Sopenharmony_ci                                    ? last_pipe.height
179bf215546Sopenharmony_ci                                    : tiling->pipe0.height;
180bf215546Sopenharmony_ci         const uint32_t n = tiling->pipe_count.width * y + x;
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci         tiling->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) |
183bf215546Sopenharmony_ci                                  A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) |
184bf215546Sopenharmony_ci                                  A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) |
185bf215546Sopenharmony_ci                                  A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h);
186bf215546Sopenharmony_ci         tiling->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h);
187bf215546Sopenharmony_ci      }
188bf215546Sopenharmony_ci   }
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci   memset(tiling->pipe_config + used_pipe_count, 0,
191bf215546Sopenharmony_ci          sizeof(uint32_t) * (max_pipe_count - used_pipe_count));
192bf215546Sopenharmony_ci}
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_cistatic bool
195bf215546Sopenharmony_ciis_hw_binning_possible(const struct tu_tiling_config *tiling)
196bf215546Sopenharmony_ci{
197bf215546Sopenharmony_ci   /* Similar to older gens, # of tiles per pipe cannot be more than 32.
198bf215546Sopenharmony_ci    * But there are no hangs with 16 or more tiles per pipe in either
199bf215546Sopenharmony_ci    * X or Y direction, so that limit does not seem to apply.
200bf215546Sopenharmony_ci    */
201bf215546Sopenharmony_ci   uint32_t tiles_per_pipe = tiling->pipe0.width * tiling->pipe0.height;
202bf215546Sopenharmony_ci   return tiles_per_pipe <= 32;
203bf215546Sopenharmony_ci}
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_cistatic void
206bf215546Sopenharmony_citu_tiling_config_update_binning(struct tu_tiling_config *tiling, const struct tu_device *device)
207bf215546Sopenharmony_ci{
208bf215546Sopenharmony_ci   tiling->binning_possible = is_hw_binning_possible(tiling);
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci   if (tiling->binning_possible) {
211bf215546Sopenharmony_ci      tiling->binning = (tiling->tile_count.width * tiling->tile_count.height) > 2;
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci      if (unlikely(device->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN))
214bf215546Sopenharmony_ci         tiling->binning = true;
215bf215546Sopenharmony_ci      if (unlikely(device->physical_device->instance->debug_flags &
216bf215546Sopenharmony_ci                   TU_DEBUG_NOBIN))
217bf215546Sopenharmony_ci         tiling->binning = false;
218bf215546Sopenharmony_ci   } else {
219bf215546Sopenharmony_ci      tiling->binning = false;
220bf215546Sopenharmony_ci   }
221bf215546Sopenharmony_ci}
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_civoid
224bf215546Sopenharmony_citu_framebuffer_tiling_config(struct tu_framebuffer *fb,
225bf215546Sopenharmony_ci                             const struct tu_device *device,
226bf215546Sopenharmony_ci                             const struct tu_render_pass *pass)
227bf215546Sopenharmony_ci{
228bf215546Sopenharmony_ci   for (int gmem_layout = 0; gmem_layout < TU_GMEM_LAYOUT_COUNT; gmem_layout++) {
229bf215546Sopenharmony_ci      struct tu_tiling_config *tiling = &fb->tiling[gmem_layout];
230bf215546Sopenharmony_ci      tu_tiling_config_update_tile_layout(fb, device, pass, gmem_layout);
231bf215546Sopenharmony_ci      tu_tiling_config_update_pipe_layout(tiling, device);
232bf215546Sopenharmony_ci      tu_tiling_config_update_pipes(tiling, device);
233bf215546Sopenharmony_ci      tu_tiling_config_update_binning(tiling, device);
234bf215546Sopenharmony_ci   }
235bf215546Sopenharmony_ci}
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_civoid
238bf215546Sopenharmony_citu_dbg_log_gmem_load_store_skips(struct tu_device *device)
239bf215546Sopenharmony_ci{
240bf215546Sopenharmony_ci   static uint32_t last_skipped_loads = 0;
241bf215546Sopenharmony_ci   static uint32_t last_skipped_stores = 0;
242bf215546Sopenharmony_ci   static uint32_t last_total_loads = 0;
243bf215546Sopenharmony_ci   static uint32_t last_total_stores = 0;
244bf215546Sopenharmony_ci   static struct timespec last_time = {};
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci   pthread_mutex_lock(&device->submit_mutex);
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ci   struct timespec current_time;
249bf215546Sopenharmony_ci   clock_gettime(CLOCK_MONOTONIC, &current_time);
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci   if (timespec_sub_to_nsec(&current_time, &last_time) > 1000 * 1000 * 1000) {
252bf215546Sopenharmony_ci      last_time = current_time;
253bf215546Sopenharmony_ci   } else {
254bf215546Sopenharmony_ci      pthread_mutex_unlock(&device->submit_mutex);
255bf215546Sopenharmony_ci      return;
256bf215546Sopenharmony_ci   }
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_ci   struct tu6_global *global = device->global_bo->map;
259bf215546Sopenharmony_ci
260bf215546Sopenharmony_ci   uint32_t current_taken_loads = global->dbg_gmem_taken_loads;
261bf215546Sopenharmony_ci   uint32_t current_taken_stores = global->dbg_gmem_taken_stores;
262bf215546Sopenharmony_ci   uint32_t current_total_loads = global->dbg_gmem_total_loads;
263bf215546Sopenharmony_ci   uint32_t current_total_stores = global->dbg_gmem_total_stores;
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci   uint32_t skipped_loads = current_total_loads - current_taken_loads;
266bf215546Sopenharmony_ci   uint32_t skipped_stores = current_total_stores - current_taken_stores;
267bf215546Sopenharmony_ci
268bf215546Sopenharmony_ci   uint32_t current_time_frame_skipped_loads = skipped_loads - last_skipped_loads;
269bf215546Sopenharmony_ci   uint32_t current_time_frame_skipped_stores = skipped_stores - last_skipped_stores;
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci   uint32_t current_time_frame_total_loads = current_total_loads - last_total_loads;
272bf215546Sopenharmony_ci   uint32_t current_time_frame_total_stores = current_total_stores - last_total_stores;
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci   mesa_logi("[GMEM] loads total: %u skipped: %.1f%%\n",
275bf215546Sopenharmony_ci         current_time_frame_total_loads,
276bf215546Sopenharmony_ci         current_time_frame_skipped_loads / (float) current_time_frame_total_loads * 100.f);
277bf215546Sopenharmony_ci   mesa_logi("[GMEM] stores total: %u skipped: %.1f%%\n",
278bf215546Sopenharmony_ci         current_time_frame_total_stores,
279bf215546Sopenharmony_ci         current_time_frame_skipped_stores / (float) current_time_frame_total_stores * 100.f);
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci   last_skipped_loads = skipped_loads;
282bf215546Sopenharmony_ci   last_skipped_stores = skipped_stores;
283bf215546Sopenharmony_ci   last_total_loads = current_total_loads;
284bf215546Sopenharmony_ci   last_total_stores = current_total_stores;
285bf215546Sopenharmony_ci
286bf215546Sopenharmony_ci   pthread_mutex_unlock(&device->submit_mutex);
287bf215546Sopenharmony_ci}
288