1/* 2 * Copyright © 2015 Intel Corporation 3 * SPDX-License-Identifier: MIT 4 */ 5 6#include "tu_util.h" 7 8#include <errno.h> 9#include <stdarg.h> 10 11#include "util/u_math.h" 12#include "util/timespec.h" 13#include "vk_enum_to_str.h" 14 15#include "tu_device.h" 16#include "tu_pass.h" 17 18void PRINTFLIKE(3, 4) 19 __tu_finishme(const char *file, int line, const char *format, ...) 20{ 21 va_list ap; 22 char buffer[256]; 23 24 va_start(ap, format); 25 vsnprintf(buffer, sizeof(buffer), format, ap); 26 va_end(ap); 27 28 mesa_loge("%s:%d: FINISHME: %s\n", file, line, buffer); 29} 30 31VkResult 32__vk_startup_errorf(struct tu_instance *instance, 33 VkResult error, 34 bool always_print, 35 const char *file, 36 int line, 37 const char *format, 38 ...) 39{ 40 va_list ap; 41 char buffer[256]; 42 43 const char *error_str = vk_Result_to_str(error); 44 45#ifndef DEBUG 46 if (!always_print) 47 return error; 48#endif 49 50 if (format) { 51 va_start(ap, format); 52 vsnprintf(buffer, sizeof(buffer), format, ap); 53 va_end(ap); 54 55 mesa_loge("%s:%d: %s (%s)\n", file, line, buffer, error_str); 56 } else { 57 mesa_loge("%s:%d: %s\n", file, line, error_str); 58 } 59 60 return error; 61} 62 63static void 64tu_tiling_config_update_tile_layout(struct tu_framebuffer *fb, 65 const struct tu_device *dev, 66 const struct tu_render_pass *pass, 67 enum tu_gmem_layout gmem_layout) 68{ 69 const uint32_t tile_align_w = pass->tile_align_w; 70 const uint32_t tile_align_h = dev->physical_device->info->tile_align_h; 71 const uint32_t max_tile_width = dev->physical_device->info->tile_max_w; 72 const uint32_t max_tile_height = dev->physical_device->info->tile_max_h; 73 struct tu_tiling_config *tiling = &fb->tiling[gmem_layout]; 74 75 /* start from 1 tile */ 76 tiling->tile_count = (VkExtent2D) { 77 .width = 1, 78 .height = 1, 79 }; 80 tiling->tile0 = (VkExtent2D) { 81 .width = util_align_npot(fb->width, tile_align_w), 82 .height = align(fb->height, tile_align_h), 83 }; 84 85 /* will force to sysmem, don't bother trying to have a valid tile config 86 * TODO: just skip all GMEM stuff when sysmem is forced? 87 */ 88 if (!pass->gmem_pixels[gmem_layout]) 89 return; 90 91 if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) { 92 /* start with 2x2 tiles */ 93 tiling->tile_count.width = 2; 94 tiling->tile_count.height = 2; 95 tiling->tile0.width = util_align_npot(DIV_ROUND_UP(fb->width, 2), tile_align_w); 96 tiling->tile0.height = align(DIV_ROUND_UP(fb->height, 2), tile_align_h); 97 } 98 99 /* do not exceed max tile width */ 100 while (tiling->tile0.width > max_tile_width) { 101 tiling->tile_count.width++; 102 tiling->tile0.width = 103 util_align_npot(DIV_ROUND_UP(fb->width, tiling->tile_count.width), tile_align_w); 104 } 105 106 /* do not exceed max tile height */ 107 while (tiling->tile0.height > max_tile_height) { 108 tiling->tile_count.height++; 109 tiling->tile0.height = 110 util_align_npot(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h); 111 } 112 113 /* do not exceed gmem size */ 114 while (tiling->tile0.width * tiling->tile0.height > pass->gmem_pixels[gmem_layout]) { 115 if (tiling->tile0.width > MAX2(tile_align_w, tiling->tile0.height)) { 116 tiling->tile_count.width++; 117 tiling->tile0.width = 118 util_align_npot(DIV_ROUND_UP(fb->width, tiling->tile_count.width), tile_align_w); 119 } else { 120 /* if this assert fails then layout is impossible.. */ 121 assert(tiling->tile0.height > tile_align_h); 122 tiling->tile_count.height++; 123 tiling->tile0.height = 124 align(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h); 125 } 126 } 127} 128 129static void 130tu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling, 131 const struct tu_device *dev) 132{ 133 const uint32_t max_pipe_count = 32; /* A6xx */ 134 135 /* start from 1 tile per pipe */ 136 tiling->pipe0 = (VkExtent2D) { 137 .width = 1, 138 .height = 1, 139 }; 140 tiling->pipe_count = tiling->tile_count; 141 142 while (tiling->pipe_count.width * tiling->pipe_count.height > max_pipe_count) { 143 if (tiling->pipe0.width < tiling->pipe0.height) { 144 tiling->pipe0.width += 1; 145 tiling->pipe_count.width = 146 DIV_ROUND_UP(tiling->tile_count.width, tiling->pipe0.width); 147 } else { 148 tiling->pipe0.height += 1; 149 tiling->pipe_count.height = 150 DIV_ROUND_UP(tiling->tile_count.height, tiling->pipe0.height); 151 } 152 } 153} 154 155static void 156tu_tiling_config_update_pipes(struct tu_tiling_config *tiling, 157 const struct tu_device *dev) 158{ 159 const uint32_t max_pipe_count = 32; /* A6xx */ 160 const uint32_t used_pipe_count = 161 tiling->pipe_count.width * tiling->pipe_count.height; 162 const VkExtent2D last_pipe = { 163 .width = (tiling->tile_count.width - 1) % tiling->pipe0.width + 1, 164 .height = (tiling->tile_count.height - 1) % tiling->pipe0.height + 1, 165 }; 166 167 assert(used_pipe_count <= max_pipe_count); 168 assert(max_pipe_count <= ARRAY_SIZE(tiling->pipe_config)); 169 170 for (uint32_t y = 0; y < tiling->pipe_count.height; y++) { 171 for (uint32_t x = 0; x < tiling->pipe_count.width; x++) { 172 const uint32_t pipe_x = tiling->pipe0.width * x; 173 const uint32_t pipe_y = tiling->pipe0.height * y; 174 const uint32_t pipe_w = (x == tiling->pipe_count.width - 1) 175 ? last_pipe.width 176 : tiling->pipe0.width; 177 const uint32_t pipe_h = (y == tiling->pipe_count.height - 1) 178 ? last_pipe.height 179 : tiling->pipe0.height; 180 const uint32_t n = tiling->pipe_count.width * y + x; 181 182 tiling->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) | 183 A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) | 184 A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) | 185 A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h); 186 tiling->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h); 187 } 188 } 189 190 memset(tiling->pipe_config + used_pipe_count, 0, 191 sizeof(uint32_t) * (max_pipe_count - used_pipe_count)); 192} 193 194static bool 195is_hw_binning_possible(const struct tu_tiling_config *tiling) 196{ 197 /* Similar to older gens, # of tiles per pipe cannot be more than 32. 198 * But there are no hangs with 16 or more tiles per pipe in either 199 * X or Y direction, so that limit does not seem to apply. 200 */ 201 uint32_t tiles_per_pipe = tiling->pipe0.width * tiling->pipe0.height; 202 return tiles_per_pipe <= 32; 203} 204 205static void 206tu_tiling_config_update_binning(struct tu_tiling_config *tiling, const struct tu_device *device) 207{ 208 tiling->binning_possible = is_hw_binning_possible(tiling); 209 210 if (tiling->binning_possible) { 211 tiling->binning = (tiling->tile_count.width * tiling->tile_count.height) > 2; 212 213 if (unlikely(device->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) 214 tiling->binning = true; 215 if (unlikely(device->physical_device->instance->debug_flags & 216 TU_DEBUG_NOBIN)) 217 tiling->binning = false; 218 } else { 219 tiling->binning = false; 220 } 221} 222 223void 224tu_framebuffer_tiling_config(struct tu_framebuffer *fb, 225 const struct tu_device *device, 226 const struct tu_render_pass *pass) 227{ 228 for (int gmem_layout = 0; gmem_layout < TU_GMEM_LAYOUT_COUNT; gmem_layout++) { 229 struct tu_tiling_config *tiling = &fb->tiling[gmem_layout]; 230 tu_tiling_config_update_tile_layout(fb, device, pass, gmem_layout); 231 tu_tiling_config_update_pipe_layout(tiling, device); 232 tu_tiling_config_update_pipes(tiling, device); 233 tu_tiling_config_update_binning(tiling, device); 234 } 235} 236 237void 238tu_dbg_log_gmem_load_store_skips(struct tu_device *device) 239{ 240 static uint32_t last_skipped_loads = 0; 241 static uint32_t last_skipped_stores = 0; 242 static uint32_t last_total_loads = 0; 243 static uint32_t last_total_stores = 0; 244 static struct timespec last_time = {}; 245 246 pthread_mutex_lock(&device->submit_mutex); 247 248 struct timespec current_time; 249 clock_gettime(CLOCK_MONOTONIC, ¤t_time); 250 251 if (timespec_sub_to_nsec(¤t_time, &last_time) > 1000 * 1000 * 1000) { 252 last_time = current_time; 253 } else { 254 pthread_mutex_unlock(&device->submit_mutex); 255 return; 256 } 257 258 struct tu6_global *global = device->global_bo->map; 259 260 uint32_t current_taken_loads = global->dbg_gmem_taken_loads; 261 uint32_t current_taken_stores = global->dbg_gmem_taken_stores; 262 uint32_t current_total_loads = global->dbg_gmem_total_loads; 263 uint32_t current_total_stores = global->dbg_gmem_total_stores; 264 265 uint32_t skipped_loads = current_total_loads - current_taken_loads; 266 uint32_t skipped_stores = current_total_stores - current_taken_stores; 267 268 uint32_t current_time_frame_skipped_loads = skipped_loads - last_skipped_loads; 269 uint32_t current_time_frame_skipped_stores = skipped_stores - last_skipped_stores; 270 271 uint32_t current_time_frame_total_loads = current_total_loads - last_total_loads; 272 uint32_t current_time_frame_total_stores = current_total_stores - last_total_stores; 273 274 mesa_logi("[GMEM] loads total: %u skipped: %.1f%%\n", 275 current_time_frame_total_loads, 276 current_time_frame_skipped_loads / (float) current_time_frame_total_loads * 100.f); 277 mesa_logi("[GMEM] stores total: %u skipped: %.1f%%\n", 278 current_time_frame_total_stores, 279 current_time_frame_skipped_stores / (float) current_time_frame_total_stores * 100.f); 280 281 last_skipped_loads = skipped_loads; 282 last_skipped_stores = skipped_stores; 283 last_total_loads = current_total_loads; 284 last_total_stores = current_total_stores; 285 286 pthread_mutex_unlock(&device->submit_mutex); 287} 288