1/*
2 * Copyright © 2015 Intel Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6#include "tu_util.h"
7
8#include <errno.h>
9#include <stdarg.h>
10
11#include "util/u_math.h"
12#include "util/timespec.h"
13#include "vk_enum_to_str.h"
14
15#include "tu_device.h"
16#include "tu_pass.h"
17
18void PRINTFLIKE(3, 4)
19   __tu_finishme(const char *file, int line, const char *format, ...)
20{
21   va_list ap;
22   char buffer[256];
23
24   va_start(ap, format);
25   vsnprintf(buffer, sizeof(buffer), format, ap);
26   va_end(ap);
27
28   mesa_loge("%s:%d: FINISHME: %s\n", file, line, buffer);
29}
30
31VkResult
32__vk_startup_errorf(struct tu_instance *instance,
33                    VkResult error,
34                    bool always_print,
35                    const char *file,
36                    int line,
37                    const char *format,
38                    ...)
39{
40   va_list ap;
41   char buffer[256];
42
43   const char *error_str = vk_Result_to_str(error);
44
45#ifndef DEBUG
46   if (!always_print)
47      return error;
48#endif
49
50   if (format) {
51      va_start(ap, format);
52      vsnprintf(buffer, sizeof(buffer), format, ap);
53      va_end(ap);
54
55      mesa_loge("%s:%d: %s (%s)\n", file, line, buffer, error_str);
56   } else {
57      mesa_loge("%s:%d: %s\n", file, line, error_str);
58   }
59
60   return error;
61}
62
63static void
64tu_tiling_config_update_tile_layout(struct tu_framebuffer *fb,
65                                    const struct tu_device *dev,
66                                    const struct tu_render_pass *pass,
67                                    enum tu_gmem_layout gmem_layout)
68{
69   const uint32_t tile_align_w = pass->tile_align_w;
70   const uint32_t tile_align_h = dev->physical_device->info->tile_align_h;
71   const uint32_t max_tile_width = dev->physical_device->info->tile_max_w;
72   const uint32_t max_tile_height = dev->physical_device->info->tile_max_h;
73   struct tu_tiling_config *tiling = &fb->tiling[gmem_layout];
74
75   /* start from 1 tile */
76   tiling->tile_count = (VkExtent2D) {
77      .width = 1,
78      .height = 1,
79   };
80   tiling->tile0 = (VkExtent2D) {
81      .width = util_align_npot(fb->width, tile_align_w),
82      .height = align(fb->height, tile_align_h),
83   };
84
85   /* will force to sysmem, don't bother trying to have a valid tile config
86    * TODO: just skip all GMEM stuff when sysmem is forced?
87    */
88   if (!pass->gmem_pixels[gmem_layout])
89      return;
90
91   if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) {
92      /* start with 2x2 tiles */
93      tiling->tile_count.width = 2;
94      tiling->tile_count.height = 2;
95      tiling->tile0.width = util_align_npot(DIV_ROUND_UP(fb->width, 2), tile_align_w);
96      tiling->tile0.height = align(DIV_ROUND_UP(fb->height, 2), tile_align_h);
97   }
98
99   /* do not exceed max tile width */
100   while (tiling->tile0.width > max_tile_width) {
101      tiling->tile_count.width++;
102      tiling->tile0.width =
103         util_align_npot(DIV_ROUND_UP(fb->width, tiling->tile_count.width), tile_align_w);
104   }
105
106   /* do not exceed max tile height */
107   while (tiling->tile0.height > max_tile_height) {
108      tiling->tile_count.height++;
109      tiling->tile0.height =
110         util_align_npot(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h);
111   }
112
113   /* do not exceed gmem size */
114   while (tiling->tile0.width * tiling->tile0.height > pass->gmem_pixels[gmem_layout]) {
115      if (tiling->tile0.width > MAX2(tile_align_w, tiling->tile0.height)) {
116         tiling->tile_count.width++;
117         tiling->tile0.width =
118            util_align_npot(DIV_ROUND_UP(fb->width, tiling->tile_count.width), tile_align_w);
119      } else {
120         /* if this assert fails then layout is impossible.. */
121         assert(tiling->tile0.height > tile_align_h);
122         tiling->tile_count.height++;
123         tiling->tile0.height =
124            align(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h);
125      }
126   }
127}
128
129static void
130tu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling,
131                                    const struct tu_device *dev)
132{
133   const uint32_t max_pipe_count = 32; /* A6xx */
134
135   /* start from 1 tile per pipe */
136   tiling->pipe0 = (VkExtent2D) {
137      .width = 1,
138      .height = 1,
139   };
140   tiling->pipe_count = tiling->tile_count;
141
142   while (tiling->pipe_count.width * tiling->pipe_count.height > max_pipe_count) {
143      if (tiling->pipe0.width < tiling->pipe0.height) {
144         tiling->pipe0.width += 1;
145         tiling->pipe_count.width =
146            DIV_ROUND_UP(tiling->tile_count.width, tiling->pipe0.width);
147      } else {
148         tiling->pipe0.height += 1;
149         tiling->pipe_count.height =
150            DIV_ROUND_UP(tiling->tile_count.height, tiling->pipe0.height);
151      }
152   }
153}
154
155static void
156tu_tiling_config_update_pipes(struct tu_tiling_config *tiling,
157                              const struct tu_device *dev)
158{
159   const uint32_t max_pipe_count = 32; /* A6xx */
160   const uint32_t used_pipe_count =
161      tiling->pipe_count.width * tiling->pipe_count.height;
162   const VkExtent2D last_pipe = {
163      .width = (tiling->tile_count.width - 1) % tiling->pipe0.width + 1,
164      .height = (tiling->tile_count.height - 1) % tiling->pipe0.height + 1,
165   };
166
167   assert(used_pipe_count <= max_pipe_count);
168   assert(max_pipe_count <= ARRAY_SIZE(tiling->pipe_config));
169
170   for (uint32_t y = 0; y < tiling->pipe_count.height; y++) {
171      for (uint32_t x = 0; x < tiling->pipe_count.width; x++) {
172         const uint32_t pipe_x = tiling->pipe0.width * x;
173         const uint32_t pipe_y = tiling->pipe0.height * y;
174         const uint32_t pipe_w = (x == tiling->pipe_count.width - 1)
175                                    ? last_pipe.width
176                                    : tiling->pipe0.width;
177         const uint32_t pipe_h = (y == tiling->pipe_count.height - 1)
178                                    ? last_pipe.height
179                                    : tiling->pipe0.height;
180         const uint32_t n = tiling->pipe_count.width * y + x;
181
182         tiling->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) |
183                                  A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) |
184                                  A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) |
185                                  A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h);
186         tiling->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h);
187      }
188   }
189
190   memset(tiling->pipe_config + used_pipe_count, 0,
191          sizeof(uint32_t) * (max_pipe_count - used_pipe_count));
192}
193
194static bool
195is_hw_binning_possible(const struct tu_tiling_config *tiling)
196{
197   /* Similar to older gens, # of tiles per pipe cannot be more than 32.
198    * But there are no hangs with 16 or more tiles per pipe in either
199    * X or Y direction, so that limit does not seem to apply.
200    */
201   uint32_t tiles_per_pipe = tiling->pipe0.width * tiling->pipe0.height;
202   return tiles_per_pipe <= 32;
203}
204
205static void
206tu_tiling_config_update_binning(struct tu_tiling_config *tiling, const struct tu_device *device)
207{
208   tiling->binning_possible = is_hw_binning_possible(tiling);
209
210   if (tiling->binning_possible) {
211      tiling->binning = (tiling->tile_count.width * tiling->tile_count.height) > 2;
212
213      if (unlikely(device->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN))
214         tiling->binning = true;
215      if (unlikely(device->physical_device->instance->debug_flags &
216                   TU_DEBUG_NOBIN))
217         tiling->binning = false;
218   } else {
219      tiling->binning = false;
220   }
221}
222
223void
224tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
225                             const struct tu_device *device,
226                             const struct tu_render_pass *pass)
227{
228   for (int gmem_layout = 0; gmem_layout < TU_GMEM_LAYOUT_COUNT; gmem_layout++) {
229      struct tu_tiling_config *tiling = &fb->tiling[gmem_layout];
230      tu_tiling_config_update_tile_layout(fb, device, pass, gmem_layout);
231      tu_tiling_config_update_pipe_layout(tiling, device);
232      tu_tiling_config_update_pipes(tiling, device);
233      tu_tiling_config_update_binning(tiling, device);
234   }
235}
236
237void
238tu_dbg_log_gmem_load_store_skips(struct tu_device *device)
239{
240   static uint32_t last_skipped_loads = 0;
241   static uint32_t last_skipped_stores = 0;
242   static uint32_t last_total_loads = 0;
243   static uint32_t last_total_stores = 0;
244   static struct timespec last_time = {};
245
246   pthread_mutex_lock(&device->submit_mutex);
247
248   struct timespec current_time;
249   clock_gettime(CLOCK_MONOTONIC, &current_time);
250
251   if (timespec_sub_to_nsec(&current_time, &last_time) > 1000 * 1000 * 1000) {
252      last_time = current_time;
253   } else {
254      pthread_mutex_unlock(&device->submit_mutex);
255      return;
256   }
257
258   struct tu6_global *global = device->global_bo->map;
259
260   uint32_t current_taken_loads = global->dbg_gmem_taken_loads;
261   uint32_t current_taken_stores = global->dbg_gmem_taken_stores;
262   uint32_t current_total_loads = global->dbg_gmem_total_loads;
263   uint32_t current_total_stores = global->dbg_gmem_total_stores;
264
265   uint32_t skipped_loads = current_total_loads - current_taken_loads;
266   uint32_t skipped_stores = current_total_stores - current_taken_stores;
267
268   uint32_t current_time_frame_skipped_loads = skipped_loads - last_skipped_loads;
269   uint32_t current_time_frame_skipped_stores = skipped_stores - last_skipped_stores;
270
271   uint32_t current_time_frame_total_loads = current_total_loads - last_total_loads;
272   uint32_t current_time_frame_total_stores = current_total_stores - last_total_stores;
273
274   mesa_logi("[GMEM] loads total: %u skipped: %.1f%%\n",
275         current_time_frame_total_loads,
276         current_time_frame_skipped_loads / (float) current_time_frame_total_loads * 100.f);
277   mesa_logi("[GMEM] stores total: %u skipped: %.1f%%\n",
278         current_time_frame_total_stores,
279         current_time_frame_skipped_stores / (float) current_time_frame_total_stores * 100.f);
280
281   last_skipped_loads = skipped_loads;
282   last_skipped_stores = skipped_stores;
283   last_total_loads = current_total_loads;
284   last_total_stores = current_total_stores;
285
286   pthread_mutex_unlock(&device->submit_mutex);
287}
288