1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2021 Google, Inc.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "freedreno_autotune.h"
25bf215546Sopenharmony_ci#include "freedreno_batch.h"
26bf215546Sopenharmony_ci#include "freedreno_util.h"
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci/**
29bf215546Sopenharmony_ci * Tracks, for a given batch key (which maps to a FBO/framebuffer state),
30bf215546Sopenharmony_ci *
31bf215546Sopenharmony_ci * ralloc parent is fd_autotune::ht
32bf215546Sopenharmony_ci */
33bf215546Sopenharmony_cistruct fd_batch_history {
34bf215546Sopenharmony_ci   struct fd_batch_key *key;
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci   /* Entry in fd_autotune::lru: */
37bf215546Sopenharmony_ci   struct list_head node;
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci   unsigned num_results;
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_ci   /**
42bf215546Sopenharmony_ci    * List of recent fd_batch_result's
43bf215546Sopenharmony_ci    */
44bf215546Sopenharmony_ci   struct list_head results;
45bf215546Sopenharmony_ci#define MAX_RESULTS 5
46bf215546Sopenharmony_ci};
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_cistatic struct fd_batch_history *
49bf215546Sopenharmony_ciget_history(struct fd_autotune *at, struct fd_batch *batch)
50bf215546Sopenharmony_ci{
51bf215546Sopenharmony_ci   struct fd_batch_history *history;
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_ci   /* draw batches should still have their key at this point. */
54bf215546Sopenharmony_ci   assert(batch->key || batch->nondraw);
55bf215546Sopenharmony_ci   if (!batch->key)
56bf215546Sopenharmony_ci      return NULL;
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_ci   struct hash_entry *entry =
59bf215546Sopenharmony_ci      _mesa_hash_table_search_pre_hashed(at->ht, batch->hash, batch->key);
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci   if (entry) {
62bf215546Sopenharmony_ci      history = entry->data;
63bf215546Sopenharmony_ci      goto found;
64bf215546Sopenharmony_ci   }
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_ci   history = rzalloc_size(at->ht, sizeof(*history));
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci   history->key = fd_batch_key_clone(history, batch->key);
69bf215546Sopenharmony_ci   list_inithead(&history->node);
70bf215546Sopenharmony_ci   list_inithead(&history->results);
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci   /* Note: We cap # of cached GMEM states at 20.. so assuming double-
73bf215546Sopenharmony_ci    * buffering, 40 should be a good place to cap cached autotune state
74bf215546Sopenharmony_ci    */
75bf215546Sopenharmony_ci   if (at->ht->entries >= 40) {
76bf215546Sopenharmony_ci      struct fd_batch_history *last =
77bf215546Sopenharmony_ci         list_last_entry(&at->lru, struct fd_batch_history, node);
78bf215546Sopenharmony_ci      _mesa_hash_table_remove_key(at->ht, last->key);
79bf215546Sopenharmony_ci      list_del(&last->node);
80bf215546Sopenharmony_ci      ralloc_free(last);
81bf215546Sopenharmony_ci   }
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci   _mesa_hash_table_insert_pre_hashed(at->ht, batch->hash, history->key,
84bf215546Sopenharmony_ci                                      history);
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_cifound:
87bf215546Sopenharmony_ci   /* Move to the head of the LRU: */
88bf215546Sopenharmony_ci   list_delinit(&history->node);
89bf215546Sopenharmony_ci   list_add(&history->node, &at->lru);
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_ci   return history;
92bf215546Sopenharmony_ci}
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_cistatic void
95bf215546Sopenharmony_ciresult_destructor(void *r)
96bf215546Sopenharmony_ci{
97bf215546Sopenharmony_ci   struct fd_batch_result *result = r;
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci   /* Just in case we manage to somehow still be on the pending_results list: */
100bf215546Sopenharmony_ci   list_del(&result->node);
101bf215546Sopenharmony_ci}
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_cistatic struct fd_batch_result *
104bf215546Sopenharmony_ciget_result(struct fd_autotune *at, struct fd_batch_history *history)
105bf215546Sopenharmony_ci{
106bf215546Sopenharmony_ci   struct fd_batch_result *result = rzalloc_size(history, sizeof(*result));
107bf215546Sopenharmony_ci
108bf215546Sopenharmony_ci   result->fence =
109bf215546Sopenharmony_ci      ++at->fence_counter; /* pre-increment so zero isn't valid fence */
110bf215546Sopenharmony_ci   result->idx = at->idx_counter++;
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci   if (at->idx_counter >= ARRAY_SIZE(at->results->result))
113bf215546Sopenharmony_ci      at->idx_counter = 0;
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci   result->history = history;
116bf215546Sopenharmony_ci   list_addtail(&result->node, &at->pending_results);
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci   ralloc_set_destructor(result, result_destructor);
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci   return result;
121bf215546Sopenharmony_ci}
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_cistatic void
124bf215546Sopenharmony_ciprocess_results(struct fd_autotune *at)
125bf215546Sopenharmony_ci{
126bf215546Sopenharmony_ci   uint32_t current_fence = at->results->fence;
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci   list_for_each_entry_safe (struct fd_batch_result, result,
129bf215546Sopenharmony_ci                             &at->pending_results, node) {
130bf215546Sopenharmony_ci      if (result->fence > current_fence)
131bf215546Sopenharmony_ci         break;
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci      struct fd_batch_history *history = result->history;
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci      result->samples_passed = at->results->result[result->idx].samples_end -
136bf215546Sopenharmony_ci                               at->results->result[result->idx].samples_start;
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci      list_delinit(&result->node);
139bf215546Sopenharmony_ci      list_add(&result->node, &history->results);
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci      if (history->num_results < MAX_RESULTS) {
142bf215546Sopenharmony_ci         history->num_results++;
143bf215546Sopenharmony_ci      } else {
144bf215546Sopenharmony_ci         /* Once above a limit, start popping old results off the
145bf215546Sopenharmony_ci          * tail of the list:
146bf215546Sopenharmony_ci          */
147bf215546Sopenharmony_ci         struct fd_batch_result *old_result =
148bf215546Sopenharmony_ci            list_last_entry(&history->results, struct fd_batch_result, node);
149bf215546Sopenharmony_ci         list_delinit(&old_result->node);
150bf215546Sopenharmony_ci         ralloc_free(old_result);
151bf215546Sopenharmony_ci      }
152bf215546Sopenharmony_ci   }
153bf215546Sopenharmony_ci}
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_cistatic bool
156bf215546Sopenharmony_cifallback_use_bypass(struct fd_batch *batch)
157bf215546Sopenharmony_ci{
158bf215546Sopenharmony_ci   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_ci   /* Fallback logic if we have no historical data about the rendertarget: */
161bf215546Sopenharmony_ci   if (batch->cleared || batch->gmem_reason ||
162bf215546Sopenharmony_ci       (batch->num_draws > 5) || (pfb->samples > 1)) {
163bf215546Sopenharmony_ci      return false;
164bf215546Sopenharmony_ci   }
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci   return true;
167bf215546Sopenharmony_ci}
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_ci/**
170bf215546Sopenharmony_ci * A magic 8-ball that tells the gmem code whether we should do bypass mode
171bf215546Sopenharmony_ci * for moar fps.
172bf215546Sopenharmony_ci */
173bf215546Sopenharmony_cibool
174bf215546Sopenharmony_cifd_autotune_use_bypass(struct fd_autotune *at, struct fd_batch *batch)
175bf215546Sopenharmony_ci{
176bf215546Sopenharmony_ci   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ci   process_results(at);
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_ci   /* Only enable on gen's that opt-in (and actually have sample-passed
181bf215546Sopenharmony_ci    * collection wired up:
182bf215546Sopenharmony_ci    */
183bf215546Sopenharmony_ci   if (!batch->ctx->screen->gmem_reason_mask)
184bf215546Sopenharmony_ci      return fallback_use_bypass(batch);
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci   if (batch->gmem_reason & ~batch->ctx->screen->gmem_reason_mask)
187bf215546Sopenharmony_ci      return fallback_use_bypass(batch);
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci   for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
190bf215546Sopenharmony_ci      /* If ms-rtt is involved, force GMEM, as we don't currently
191bf215546Sopenharmony_ci       * implement a temporary render target that we can MSAA resolve
192bf215546Sopenharmony_ci       * from
193bf215546Sopenharmony_ci       */
194bf215546Sopenharmony_ci      if (pfb->cbufs[i] && pfb->cbufs[i]->nr_samples)
195bf215546Sopenharmony_ci         return fallback_use_bypass(batch);
196bf215546Sopenharmony_ci   }
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci   struct fd_batch_history *history = get_history(at, batch);
199bf215546Sopenharmony_ci   if (!history)
200bf215546Sopenharmony_ci      return fallback_use_bypass(batch);
201bf215546Sopenharmony_ci
202bf215546Sopenharmony_ci   batch->autotune_result = get_result(at, history);
203bf215546Sopenharmony_ci   batch->autotune_result->cost = batch->cost;
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci   bool use_bypass = fallback_use_bypass(batch);
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_ci   if (use_bypass)
208bf215546Sopenharmony_ci      return true;
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci   if (history->num_results > 0) {
211bf215546Sopenharmony_ci      uint32_t total_samples = 0;
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci      // TODO we should account for clears somehow
214bf215546Sopenharmony_ci      // TODO should we try to notice if there is a drastic change from
215bf215546Sopenharmony_ci      // frame to frame?
216bf215546Sopenharmony_ci      list_for_each_entry (struct fd_batch_result, result, &history->results,
217bf215546Sopenharmony_ci                           node) {
218bf215546Sopenharmony_ci         total_samples += result->samples_passed;
219bf215546Sopenharmony_ci      }
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci      float avg_samples = (float)total_samples / (float)history->num_results;
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci      /* Low sample count could mean there was only a clear.. or there was
224bf215546Sopenharmony_ci       * a clear plus draws that touch no or few samples
225bf215546Sopenharmony_ci       */
226bf215546Sopenharmony_ci      if (avg_samples < 500.0f)
227bf215546Sopenharmony_ci         return true;
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_ci      /* Cost-per-sample is an estimate for the average number of reads+
230bf215546Sopenharmony_ci       * writes for a given passed sample.
231bf215546Sopenharmony_ci       */
232bf215546Sopenharmony_ci      float sample_cost = batch->cost;
233bf215546Sopenharmony_ci      sample_cost /= batch->num_draws;
234bf215546Sopenharmony_ci
235bf215546Sopenharmony_ci      float total_draw_cost = (avg_samples * sample_cost) / batch->num_draws;
236bf215546Sopenharmony_ci      DBG("%08x:%u\ttotal_samples=%u, avg_samples=%f, sample_cost=%f, "
237bf215546Sopenharmony_ci          "total_draw_cost=%f\n",
238bf215546Sopenharmony_ci          batch->hash, batch->num_draws, total_samples, avg_samples,
239bf215546Sopenharmony_ci          sample_cost, total_draw_cost);
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_ci      if (total_draw_cost < 3000.0f)
242bf215546Sopenharmony_ci         return true;
243bf215546Sopenharmony_ci   }
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci   return use_bypass;
246bf215546Sopenharmony_ci}
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_civoid
249bf215546Sopenharmony_cifd_autotune_init(struct fd_autotune *at, struct fd_device *dev)
250bf215546Sopenharmony_ci{
251bf215546Sopenharmony_ci   at->ht =
252bf215546Sopenharmony_ci      _mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals);
253bf215546Sopenharmony_ci   list_inithead(&at->lru);
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_ci   at->results_mem = fd_bo_new(dev, sizeof(struct fd_autotune_results),
256bf215546Sopenharmony_ci                               0, "autotune");
257bf215546Sopenharmony_ci   at->results = fd_bo_map(at->results_mem);
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci   list_inithead(&at->pending_results);
260bf215546Sopenharmony_ci}
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_civoid
263bf215546Sopenharmony_cifd_autotune_fini(struct fd_autotune *at)
264bf215546Sopenharmony_ci{
265bf215546Sopenharmony_ci   _mesa_hash_table_destroy(at->ht, NULL);
266bf215546Sopenharmony_ci   fd_bo_del(at->results_mem);
267bf215546Sopenharmony_ci}
268