1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Google, Inc. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "freedreno_autotune.h" 25bf215546Sopenharmony_ci#include "freedreno_batch.h" 26bf215546Sopenharmony_ci#include "freedreno_util.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/** 29bf215546Sopenharmony_ci * Tracks, for a given batch key (which maps to a FBO/framebuffer state), 30bf215546Sopenharmony_ci * 31bf215546Sopenharmony_ci * ralloc parent is fd_autotune::ht 32bf215546Sopenharmony_ci */ 33bf215546Sopenharmony_cistruct fd_batch_history { 34bf215546Sopenharmony_ci struct fd_batch_key *key; 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci /* Entry in fd_autotune::lru: */ 37bf215546Sopenharmony_ci struct list_head node; 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci unsigned num_results; 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_ci /** 42bf215546Sopenharmony_ci * List of recent fd_batch_result's 43bf215546Sopenharmony_ci */ 44bf215546Sopenharmony_ci struct list_head results; 45bf215546Sopenharmony_ci#define MAX_RESULTS 5 46bf215546Sopenharmony_ci}; 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_cistatic struct fd_batch_history * 49bf215546Sopenharmony_ciget_history(struct fd_autotune *at, struct fd_batch *batch) 50bf215546Sopenharmony_ci{ 51bf215546Sopenharmony_ci struct fd_batch_history *history; 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci /* draw batches should still have their key at this point. */ 54bf215546Sopenharmony_ci assert(batch->key || batch->nondraw); 55bf215546Sopenharmony_ci if (!batch->key) 56bf215546Sopenharmony_ci return NULL; 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci struct hash_entry *entry = 59bf215546Sopenharmony_ci _mesa_hash_table_search_pre_hashed(at->ht, batch->hash, batch->key); 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci if (entry) { 62bf215546Sopenharmony_ci history = entry->data; 63bf215546Sopenharmony_ci goto found; 64bf215546Sopenharmony_ci } 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ci history = rzalloc_size(at->ht, sizeof(*history)); 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci history->key = fd_batch_key_clone(history, batch->key); 69bf215546Sopenharmony_ci list_inithead(&history->node); 70bf215546Sopenharmony_ci list_inithead(&history->results); 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci /* Note: We cap # of cached GMEM states at 20.. so assuming double- 73bf215546Sopenharmony_ci * buffering, 40 should be a good place to cap cached autotune state 74bf215546Sopenharmony_ci */ 75bf215546Sopenharmony_ci if (at->ht->entries >= 40) { 76bf215546Sopenharmony_ci struct fd_batch_history *last = 77bf215546Sopenharmony_ci list_last_entry(&at->lru, struct fd_batch_history, node); 78bf215546Sopenharmony_ci _mesa_hash_table_remove_key(at->ht, last->key); 79bf215546Sopenharmony_ci list_del(&last->node); 80bf215546Sopenharmony_ci ralloc_free(last); 81bf215546Sopenharmony_ci } 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci _mesa_hash_table_insert_pre_hashed(at->ht, batch->hash, history->key, 84bf215546Sopenharmony_ci history); 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_cifound: 87bf215546Sopenharmony_ci /* Move to the head of the LRU: */ 88bf215546Sopenharmony_ci list_delinit(&history->node); 89bf215546Sopenharmony_ci list_add(&history->node, &at->lru); 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci return history; 92bf215546Sopenharmony_ci} 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_cistatic void 95bf215546Sopenharmony_ciresult_destructor(void *r) 96bf215546Sopenharmony_ci{ 97bf215546Sopenharmony_ci struct fd_batch_result *result = r; 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci /* Just in case we manage to somehow still be on the pending_results list: */ 100bf215546Sopenharmony_ci list_del(&result->node); 101bf215546Sopenharmony_ci} 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_cistatic struct fd_batch_result * 104bf215546Sopenharmony_ciget_result(struct fd_autotune *at, struct fd_batch_history *history) 105bf215546Sopenharmony_ci{ 106bf215546Sopenharmony_ci struct fd_batch_result *result = rzalloc_size(history, sizeof(*result)); 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci result->fence = 109bf215546Sopenharmony_ci ++at->fence_counter; /* pre-increment so zero isn't valid fence */ 110bf215546Sopenharmony_ci result->idx = at->idx_counter++; 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci if (at->idx_counter >= ARRAY_SIZE(at->results->result)) 113bf215546Sopenharmony_ci at->idx_counter = 0; 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci result->history = history; 116bf215546Sopenharmony_ci list_addtail(&result->node, &at->pending_results); 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci ralloc_set_destructor(result, result_destructor); 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci return result; 121bf215546Sopenharmony_ci} 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_cistatic void 124bf215546Sopenharmony_ciprocess_results(struct fd_autotune *at) 125bf215546Sopenharmony_ci{ 126bf215546Sopenharmony_ci uint32_t current_fence = at->results->fence; 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci list_for_each_entry_safe (struct fd_batch_result, result, 129bf215546Sopenharmony_ci &at->pending_results, node) { 130bf215546Sopenharmony_ci if (result->fence > current_fence) 131bf215546Sopenharmony_ci break; 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci struct fd_batch_history *history = result->history; 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci result->samples_passed = at->results->result[result->idx].samples_end - 136bf215546Sopenharmony_ci at->results->result[result->idx].samples_start; 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci list_delinit(&result->node); 139bf215546Sopenharmony_ci list_add(&result->node, &history->results); 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci if (history->num_results < MAX_RESULTS) { 142bf215546Sopenharmony_ci history->num_results++; 143bf215546Sopenharmony_ci } else { 144bf215546Sopenharmony_ci /* Once above a limit, start popping old results off the 145bf215546Sopenharmony_ci * tail of the list: 146bf215546Sopenharmony_ci */ 147bf215546Sopenharmony_ci struct fd_batch_result *old_result = 148bf215546Sopenharmony_ci list_last_entry(&history->results, struct fd_batch_result, node); 149bf215546Sopenharmony_ci list_delinit(&old_result->node); 150bf215546Sopenharmony_ci ralloc_free(old_result); 151bf215546Sopenharmony_ci } 152bf215546Sopenharmony_ci } 153bf215546Sopenharmony_ci} 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_cistatic bool 156bf215546Sopenharmony_cifallback_use_bypass(struct fd_batch *batch) 157bf215546Sopenharmony_ci{ 158bf215546Sopenharmony_ci struct pipe_framebuffer_state *pfb = &batch->framebuffer; 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_ci /* Fallback logic if we have no historical data about the rendertarget: */ 161bf215546Sopenharmony_ci if (batch->cleared || batch->gmem_reason || 162bf215546Sopenharmony_ci (batch->num_draws > 5) || (pfb->samples > 1)) { 163bf215546Sopenharmony_ci return false; 164bf215546Sopenharmony_ci } 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci return true; 167bf215546Sopenharmony_ci} 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci/** 170bf215546Sopenharmony_ci * A magic 8-ball that tells the gmem code whether we should do bypass mode 171bf215546Sopenharmony_ci * for moar fps. 172bf215546Sopenharmony_ci */ 173bf215546Sopenharmony_cibool 174bf215546Sopenharmony_cifd_autotune_use_bypass(struct fd_autotune *at, struct fd_batch *batch) 175bf215546Sopenharmony_ci{ 176bf215546Sopenharmony_ci struct pipe_framebuffer_state *pfb = &batch->framebuffer; 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci process_results(at); 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci /* Only enable on gen's that opt-in (and actually have sample-passed 181bf215546Sopenharmony_ci * collection wired up: 182bf215546Sopenharmony_ci */ 183bf215546Sopenharmony_ci if (!batch->ctx->screen->gmem_reason_mask) 184bf215546Sopenharmony_ci return fallback_use_bypass(batch); 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci if (batch->gmem_reason & ~batch->ctx->screen->gmem_reason_mask) 187bf215546Sopenharmony_ci return fallback_use_bypass(batch); 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci for (unsigned i = 0; i < pfb->nr_cbufs; i++) { 190bf215546Sopenharmony_ci /* If ms-rtt is involved, force GMEM, as we don't currently 191bf215546Sopenharmony_ci * implement a temporary render target that we can MSAA resolve 192bf215546Sopenharmony_ci * from 193bf215546Sopenharmony_ci */ 194bf215546Sopenharmony_ci if (pfb->cbufs[i] && pfb->cbufs[i]->nr_samples) 195bf215546Sopenharmony_ci return fallback_use_bypass(batch); 196bf215546Sopenharmony_ci } 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci struct fd_batch_history *history = get_history(at, batch); 199bf215546Sopenharmony_ci if (!history) 200bf215546Sopenharmony_ci return fallback_use_bypass(batch); 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci batch->autotune_result = get_result(at, history); 203bf215546Sopenharmony_ci batch->autotune_result->cost = batch->cost; 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci bool use_bypass = fallback_use_bypass(batch); 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci if (use_bypass) 208bf215546Sopenharmony_ci return true; 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci if (history->num_results > 0) { 211bf215546Sopenharmony_ci uint32_t total_samples = 0; 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci // TODO we should account for clears somehow 214bf215546Sopenharmony_ci // TODO should we try to notice if there is a drastic change from 215bf215546Sopenharmony_ci // frame to frame? 216bf215546Sopenharmony_ci list_for_each_entry (struct fd_batch_result, result, &history->results, 217bf215546Sopenharmony_ci node) { 218bf215546Sopenharmony_ci total_samples += result->samples_passed; 219bf215546Sopenharmony_ci } 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci float avg_samples = (float)total_samples / (float)history->num_results; 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci /* Low sample count could mean there was only a clear.. or there was 224bf215546Sopenharmony_ci * a clear plus draws that touch no or few samples 225bf215546Sopenharmony_ci */ 226bf215546Sopenharmony_ci if (avg_samples < 500.0f) 227bf215546Sopenharmony_ci return true; 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci /* Cost-per-sample is an estimate for the average number of reads+ 230bf215546Sopenharmony_ci * writes for a given passed sample. 231bf215546Sopenharmony_ci */ 232bf215546Sopenharmony_ci float sample_cost = batch->cost; 233bf215546Sopenharmony_ci sample_cost /= batch->num_draws; 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci float total_draw_cost = (avg_samples * sample_cost) / batch->num_draws; 236bf215546Sopenharmony_ci DBG("%08x:%u\ttotal_samples=%u, avg_samples=%f, sample_cost=%f, " 237bf215546Sopenharmony_ci "total_draw_cost=%f\n", 238bf215546Sopenharmony_ci batch->hash, batch->num_draws, total_samples, avg_samples, 239bf215546Sopenharmony_ci sample_cost, total_draw_cost); 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci if (total_draw_cost < 3000.0f) 242bf215546Sopenharmony_ci return true; 243bf215546Sopenharmony_ci } 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci return use_bypass; 246bf215546Sopenharmony_ci} 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_civoid 249bf215546Sopenharmony_cifd_autotune_init(struct fd_autotune *at, struct fd_device *dev) 250bf215546Sopenharmony_ci{ 251bf215546Sopenharmony_ci at->ht = 252bf215546Sopenharmony_ci _mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals); 253bf215546Sopenharmony_ci list_inithead(&at->lru); 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci at->results_mem = fd_bo_new(dev, sizeof(struct fd_autotune_results), 256bf215546Sopenharmony_ci 0, "autotune"); 257bf215546Sopenharmony_ci at->results = fd_bo_map(at->results_mem); 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci list_inithead(&at->pending_results); 260bf215546Sopenharmony_ci} 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_civoid 263bf215546Sopenharmony_cifd_autotune_fini(struct fd_autotune *at) 264bf215546Sopenharmony_ci{ 265bf215546Sopenharmony_ci _mesa_hash_table_destroy(at->ht, NULL); 266bf215546Sopenharmony_ci fd_bo_del(at->results_mem); 267bf215546Sopenharmony_ci} 268