1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2015 Intel Corporation 3bf215546Sopenharmony_ci * SPDX-License-Identifier: MIT 4bf215546Sopenharmony_ci */ 5bf215546Sopenharmony_ci 6bf215546Sopenharmony_ci#include "tu_util.h" 7bf215546Sopenharmony_ci 8bf215546Sopenharmony_ci#include <errno.h> 9bf215546Sopenharmony_ci#include <stdarg.h> 10bf215546Sopenharmony_ci 11bf215546Sopenharmony_ci#include "util/u_math.h" 12bf215546Sopenharmony_ci#include "util/timespec.h" 13bf215546Sopenharmony_ci#include "vk_enum_to_str.h" 14bf215546Sopenharmony_ci 15bf215546Sopenharmony_ci#include "tu_device.h" 16bf215546Sopenharmony_ci#include "tu_pass.h" 17bf215546Sopenharmony_ci 18bf215546Sopenharmony_civoid PRINTFLIKE(3, 4) 19bf215546Sopenharmony_ci __tu_finishme(const char *file, int line, const char *format, ...) 20bf215546Sopenharmony_ci{ 21bf215546Sopenharmony_ci va_list ap; 22bf215546Sopenharmony_ci char buffer[256]; 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci va_start(ap, format); 25bf215546Sopenharmony_ci vsnprintf(buffer, sizeof(buffer), format, ap); 26bf215546Sopenharmony_ci va_end(ap); 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci mesa_loge("%s:%d: FINISHME: %s\n", file, line, buffer); 29bf215546Sopenharmony_ci} 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ciVkResult 32bf215546Sopenharmony_ci__vk_startup_errorf(struct tu_instance *instance, 33bf215546Sopenharmony_ci VkResult error, 34bf215546Sopenharmony_ci bool always_print, 35bf215546Sopenharmony_ci const char *file, 36bf215546Sopenharmony_ci int line, 37bf215546Sopenharmony_ci const char *format, 38bf215546Sopenharmony_ci ...) 39bf215546Sopenharmony_ci{ 40bf215546Sopenharmony_ci va_list ap; 41bf215546Sopenharmony_ci char buffer[256]; 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci const char *error_str = vk_Result_to_str(error); 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci#ifndef DEBUG 46bf215546Sopenharmony_ci if (!always_print) 47bf215546Sopenharmony_ci return error; 48bf215546Sopenharmony_ci#endif 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci if (format) { 51bf215546Sopenharmony_ci va_start(ap, format); 52bf215546Sopenharmony_ci vsnprintf(buffer, sizeof(buffer), format, ap); 53bf215546Sopenharmony_ci va_end(ap); 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci mesa_loge("%s:%d: %s (%s)\n", file, line, buffer, error_str); 56bf215546Sopenharmony_ci } else { 57bf215546Sopenharmony_ci mesa_loge("%s:%d: %s\n", file, line, error_str); 58bf215546Sopenharmony_ci } 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci return error; 61bf215546Sopenharmony_ci} 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_cistatic void 64bf215546Sopenharmony_citu_tiling_config_update_tile_layout(struct tu_framebuffer *fb, 65bf215546Sopenharmony_ci const struct tu_device *dev, 66bf215546Sopenharmony_ci const struct tu_render_pass *pass, 67bf215546Sopenharmony_ci enum tu_gmem_layout gmem_layout) 68bf215546Sopenharmony_ci{ 69bf215546Sopenharmony_ci const uint32_t tile_align_w = pass->tile_align_w; 70bf215546Sopenharmony_ci const uint32_t tile_align_h = dev->physical_device->info->tile_align_h; 71bf215546Sopenharmony_ci const uint32_t max_tile_width = dev->physical_device->info->tile_max_w; 72bf215546Sopenharmony_ci const uint32_t max_tile_height = dev->physical_device->info->tile_max_h; 73bf215546Sopenharmony_ci struct tu_tiling_config *tiling = &fb->tiling[gmem_layout]; 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci /* start from 1 tile */ 76bf215546Sopenharmony_ci tiling->tile_count = (VkExtent2D) { 77bf215546Sopenharmony_ci .width = 1, 78bf215546Sopenharmony_ci .height = 1, 79bf215546Sopenharmony_ci }; 80bf215546Sopenharmony_ci tiling->tile0 = (VkExtent2D) { 81bf215546Sopenharmony_ci .width = util_align_npot(fb->width, tile_align_w), 82bf215546Sopenharmony_ci .height = align(fb->height, tile_align_h), 83bf215546Sopenharmony_ci }; 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci /* will force to sysmem, don't bother trying to have a valid tile config 86bf215546Sopenharmony_ci * TODO: just skip all GMEM stuff when sysmem is forced? 87bf215546Sopenharmony_ci */ 88bf215546Sopenharmony_ci if (!pass->gmem_pixels[gmem_layout]) 89bf215546Sopenharmony_ci return; 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) { 92bf215546Sopenharmony_ci /* start with 2x2 tiles */ 93bf215546Sopenharmony_ci tiling->tile_count.width = 2; 94bf215546Sopenharmony_ci tiling->tile_count.height = 2; 95bf215546Sopenharmony_ci tiling->tile0.width = util_align_npot(DIV_ROUND_UP(fb->width, 2), tile_align_w); 96bf215546Sopenharmony_ci tiling->tile0.height = align(DIV_ROUND_UP(fb->height, 2), tile_align_h); 97bf215546Sopenharmony_ci } 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci /* do not exceed max tile width */ 100bf215546Sopenharmony_ci while (tiling->tile0.width > max_tile_width) { 101bf215546Sopenharmony_ci tiling->tile_count.width++; 102bf215546Sopenharmony_ci tiling->tile0.width = 103bf215546Sopenharmony_ci util_align_npot(DIV_ROUND_UP(fb->width, tiling->tile_count.width), tile_align_w); 104bf215546Sopenharmony_ci } 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci /* do not exceed max tile height */ 107bf215546Sopenharmony_ci while (tiling->tile0.height > max_tile_height) { 108bf215546Sopenharmony_ci tiling->tile_count.height++; 109bf215546Sopenharmony_ci tiling->tile0.height = 110bf215546Sopenharmony_ci util_align_npot(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h); 111bf215546Sopenharmony_ci } 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci /* do not exceed gmem size */ 114bf215546Sopenharmony_ci while (tiling->tile0.width * tiling->tile0.height > pass->gmem_pixels[gmem_layout]) { 115bf215546Sopenharmony_ci if (tiling->tile0.width > MAX2(tile_align_w, tiling->tile0.height)) { 116bf215546Sopenharmony_ci tiling->tile_count.width++; 117bf215546Sopenharmony_ci tiling->tile0.width = 118bf215546Sopenharmony_ci util_align_npot(DIV_ROUND_UP(fb->width, tiling->tile_count.width), tile_align_w); 119bf215546Sopenharmony_ci } else { 120bf215546Sopenharmony_ci /* if this assert fails then layout is impossible.. */ 121bf215546Sopenharmony_ci assert(tiling->tile0.height > tile_align_h); 122bf215546Sopenharmony_ci tiling->tile_count.height++; 123bf215546Sopenharmony_ci tiling->tile0.height = 124bf215546Sopenharmony_ci align(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h); 125bf215546Sopenharmony_ci } 126bf215546Sopenharmony_ci } 127bf215546Sopenharmony_ci} 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_cistatic void 130bf215546Sopenharmony_citu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling, 131bf215546Sopenharmony_ci const struct tu_device *dev) 132bf215546Sopenharmony_ci{ 133bf215546Sopenharmony_ci const uint32_t max_pipe_count = 32; /* A6xx */ 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci /* start from 1 tile per pipe */ 136bf215546Sopenharmony_ci tiling->pipe0 = (VkExtent2D) { 137bf215546Sopenharmony_ci .width = 1, 138bf215546Sopenharmony_ci .height = 1, 139bf215546Sopenharmony_ci }; 140bf215546Sopenharmony_ci tiling->pipe_count = tiling->tile_count; 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci while (tiling->pipe_count.width * tiling->pipe_count.height > max_pipe_count) { 143bf215546Sopenharmony_ci if (tiling->pipe0.width < tiling->pipe0.height) { 144bf215546Sopenharmony_ci tiling->pipe0.width += 1; 145bf215546Sopenharmony_ci tiling->pipe_count.width = 146bf215546Sopenharmony_ci DIV_ROUND_UP(tiling->tile_count.width, tiling->pipe0.width); 147bf215546Sopenharmony_ci } else { 148bf215546Sopenharmony_ci tiling->pipe0.height += 1; 149bf215546Sopenharmony_ci tiling->pipe_count.height = 150bf215546Sopenharmony_ci DIV_ROUND_UP(tiling->tile_count.height, tiling->pipe0.height); 151bf215546Sopenharmony_ci } 152bf215546Sopenharmony_ci } 153bf215546Sopenharmony_ci} 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_cistatic void 156bf215546Sopenharmony_citu_tiling_config_update_pipes(struct tu_tiling_config *tiling, 157bf215546Sopenharmony_ci const struct tu_device *dev) 158bf215546Sopenharmony_ci{ 159bf215546Sopenharmony_ci const uint32_t max_pipe_count = 32; /* A6xx */ 160bf215546Sopenharmony_ci const uint32_t used_pipe_count = 161bf215546Sopenharmony_ci tiling->pipe_count.width * tiling->pipe_count.height; 162bf215546Sopenharmony_ci const VkExtent2D last_pipe = { 163bf215546Sopenharmony_ci .width = (tiling->tile_count.width - 1) % tiling->pipe0.width + 1, 164bf215546Sopenharmony_ci .height = (tiling->tile_count.height - 1) % tiling->pipe0.height + 1, 165bf215546Sopenharmony_ci }; 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci assert(used_pipe_count <= max_pipe_count); 168bf215546Sopenharmony_ci assert(max_pipe_count <= ARRAY_SIZE(tiling->pipe_config)); 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci for (uint32_t y = 0; y < tiling->pipe_count.height; y++) { 171bf215546Sopenharmony_ci for (uint32_t x = 0; x < tiling->pipe_count.width; x++) { 172bf215546Sopenharmony_ci const uint32_t pipe_x = tiling->pipe0.width * x; 173bf215546Sopenharmony_ci const uint32_t pipe_y = tiling->pipe0.height * y; 174bf215546Sopenharmony_ci const uint32_t pipe_w = (x == tiling->pipe_count.width - 1) 175bf215546Sopenharmony_ci ? last_pipe.width 176bf215546Sopenharmony_ci : tiling->pipe0.width; 177bf215546Sopenharmony_ci const uint32_t pipe_h = (y == tiling->pipe_count.height - 1) 178bf215546Sopenharmony_ci ? last_pipe.height 179bf215546Sopenharmony_ci : tiling->pipe0.height; 180bf215546Sopenharmony_ci const uint32_t n = tiling->pipe_count.width * y + x; 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci tiling->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) | 183bf215546Sopenharmony_ci A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) | 184bf215546Sopenharmony_ci A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) | 185bf215546Sopenharmony_ci A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h); 186bf215546Sopenharmony_ci tiling->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h); 187bf215546Sopenharmony_ci } 188bf215546Sopenharmony_ci } 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci memset(tiling->pipe_config + used_pipe_count, 0, 191bf215546Sopenharmony_ci sizeof(uint32_t) * (max_pipe_count - used_pipe_count)); 192bf215546Sopenharmony_ci} 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_cistatic bool 195bf215546Sopenharmony_ciis_hw_binning_possible(const struct tu_tiling_config *tiling) 196bf215546Sopenharmony_ci{ 197bf215546Sopenharmony_ci /* Similar to older gens, # of tiles per pipe cannot be more than 32. 198bf215546Sopenharmony_ci * But there are no hangs with 16 or more tiles per pipe in either 199bf215546Sopenharmony_ci * X or Y direction, so that limit does not seem to apply. 200bf215546Sopenharmony_ci */ 201bf215546Sopenharmony_ci uint32_t tiles_per_pipe = tiling->pipe0.width * tiling->pipe0.height; 202bf215546Sopenharmony_ci return tiles_per_pipe <= 32; 203bf215546Sopenharmony_ci} 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_cistatic void 206bf215546Sopenharmony_citu_tiling_config_update_binning(struct tu_tiling_config *tiling, const struct tu_device *device) 207bf215546Sopenharmony_ci{ 208bf215546Sopenharmony_ci tiling->binning_possible = is_hw_binning_possible(tiling); 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci if (tiling->binning_possible) { 211bf215546Sopenharmony_ci tiling->binning = (tiling->tile_count.width * tiling->tile_count.height) > 2; 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci if (unlikely(device->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) 214bf215546Sopenharmony_ci tiling->binning = true; 215bf215546Sopenharmony_ci if (unlikely(device->physical_device->instance->debug_flags & 216bf215546Sopenharmony_ci TU_DEBUG_NOBIN)) 217bf215546Sopenharmony_ci tiling->binning = false; 218bf215546Sopenharmony_ci } else { 219bf215546Sopenharmony_ci tiling->binning = false; 220bf215546Sopenharmony_ci } 221bf215546Sopenharmony_ci} 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_civoid 224bf215546Sopenharmony_citu_framebuffer_tiling_config(struct tu_framebuffer *fb, 225bf215546Sopenharmony_ci const struct tu_device *device, 226bf215546Sopenharmony_ci const struct tu_render_pass *pass) 227bf215546Sopenharmony_ci{ 228bf215546Sopenharmony_ci for (int gmem_layout = 0; gmem_layout < TU_GMEM_LAYOUT_COUNT; gmem_layout++) { 229bf215546Sopenharmony_ci struct tu_tiling_config *tiling = &fb->tiling[gmem_layout]; 230bf215546Sopenharmony_ci tu_tiling_config_update_tile_layout(fb, device, pass, gmem_layout); 231bf215546Sopenharmony_ci tu_tiling_config_update_pipe_layout(tiling, device); 232bf215546Sopenharmony_ci tu_tiling_config_update_pipes(tiling, device); 233bf215546Sopenharmony_ci tu_tiling_config_update_binning(tiling, device); 234bf215546Sopenharmony_ci } 235bf215546Sopenharmony_ci} 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_civoid 238bf215546Sopenharmony_citu_dbg_log_gmem_load_store_skips(struct tu_device *device) 239bf215546Sopenharmony_ci{ 240bf215546Sopenharmony_ci static uint32_t last_skipped_loads = 0; 241bf215546Sopenharmony_ci static uint32_t last_skipped_stores = 0; 242bf215546Sopenharmony_ci static uint32_t last_total_loads = 0; 243bf215546Sopenharmony_ci static uint32_t last_total_stores = 0; 244bf215546Sopenharmony_ci static struct timespec last_time = {}; 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci pthread_mutex_lock(&device->submit_mutex); 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci struct timespec current_time; 249bf215546Sopenharmony_ci clock_gettime(CLOCK_MONOTONIC, ¤t_time); 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci if (timespec_sub_to_nsec(¤t_time, &last_time) > 1000 * 1000 * 1000) { 252bf215546Sopenharmony_ci last_time = current_time; 253bf215546Sopenharmony_ci } else { 254bf215546Sopenharmony_ci pthread_mutex_unlock(&device->submit_mutex); 255bf215546Sopenharmony_ci return; 256bf215546Sopenharmony_ci } 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci struct tu6_global *global = device->global_bo->map; 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci uint32_t current_taken_loads = global->dbg_gmem_taken_loads; 261bf215546Sopenharmony_ci uint32_t current_taken_stores = global->dbg_gmem_taken_stores; 262bf215546Sopenharmony_ci uint32_t current_total_loads = global->dbg_gmem_total_loads; 263bf215546Sopenharmony_ci uint32_t current_total_stores = global->dbg_gmem_total_stores; 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci uint32_t skipped_loads = current_total_loads - current_taken_loads; 266bf215546Sopenharmony_ci uint32_t skipped_stores = current_total_stores - current_taken_stores; 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_ci uint32_t current_time_frame_skipped_loads = skipped_loads - last_skipped_loads; 269bf215546Sopenharmony_ci uint32_t current_time_frame_skipped_stores = skipped_stores - last_skipped_stores; 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci uint32_t current_time_frame_total_loads = current_total_loads - last_total_loads; 272bf215546Sopenharmony_ci uint32_t current_time_frame_total_stores = current_total_stores - last_total_stores; 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci mesa_logi("[GMEM] loads total: %u skipped: %.1f%%\n", 275bf215546Sopenharmony_ci current_time_frame_total_loads, 276bf215546Sopenharmony_ci current_time_frame_skipped_loads / (float) current_time_frame_total_loads * 100.f); 277bf215546Sopenharmony_ci mesa_logi("[GMEM] stores total: %u skipped: %.1f%%\n", 278bf215546Sopenharmony_ci current_time_frame_total_stores, 279bf215546Sopenharmony_ci current_time_frame_skipped_stores / (float) current_time_frame_total_stores * 100.f); 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci last_skipped_loads = skipped_loads; 282bf215546Sopenharmony_ci last_skipped_stores = skipped_stores; 283bf215546Sopenharmony_ci last_total_loads = current_total_loads; 284bf215546Sopenharmony_ci last_total_stores = current_total_stores; 285bf215546Sopenharmony_ci 286bf215546Sopenharmony_ci pthread_mutex_unlock(&device->submit_mutex); 287bf215546Sopenharmony_ci} 288