1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2017 Advanced Micro Devices, Inc.
3bf215546Sopenharmony_ci * All Rights Reserved.
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub
9bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom
10bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
14bf215546Sopenharmony_ci * Software.
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE.
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci/* This file handles register programming of primitive binning. */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "si_build_pm4.h"
28bf215546Sopenharmony_ci#include "sid.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_cistruct uvec2 {
31bf215546Sopenharmony_ci   unsigned x, y;
32bf215546Sopenharmony_ci};
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_cistruct si_bin_size_map {
35bf215546Sopenharmony_ci   unsigned start;
36bf215546Sopenharmony_ci   unsigned bin_size_x;
37bf215546Sopenharmony_ci   unsigned bin_size_y;
38bf215546Sopenharmony_ci};
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_citypedef struct si_bin_size_map si_bin_size_subtable[3][10];
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_ci/* Find the bin size where sum is >= table[i].start and < table[i + 1].start. */
43bf215546Sopenharmony_cistatic struct uvec2 si_find_bin_size(struct si_screen *sscreen, const si_bin_size_subtable table[],
44bf215546Sopenharmony_ci                                     unsigned sum)
45bf215546Sopenharmony_ci{
46bf215546Sopenharmony_ci   unsigned log_num_rb_per_se =
47bf215546Sopenharmony_ci      util_logbase2_ceil(sscreen->info.max_render_backends / sscreen->info.max_se);
48bf215546Sopenharmony_ci   unsigned log_num_se = util_logbase2_ceil(sscreen->info.max_se);
49bf215546Sopenharmony_ci   unsigned i;
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci   /* Get the chip-specific subtable. */
52bf215546Sopenharmony_ci   const struct si_bin_size_map *subtable = &table[log_num_rb_per_se][log_num_se][0];
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci   for (i = 0; subtable[i].bin_size_x != 0; i++) {
55bf215546Sopenharmony_ci      if (sum >= subtable[i].start && sum < subtable[i + 1].start)
56bf215546Sopenharmony_ci         break;
57bf215546Sopenharmony_ci   }
58bf215546Sopenharmony_ci
59bf215546Sopenharmony_ci   struct uvec2 size = {subtable[i].bin_size_x, subtable[i].bin_size_y};
60bf215546Sopenharmony_ci   return size;
61bf215546Sopenharmony_ci}
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_cistatic struct uvec2 si_get_color_bin_size(struct si_context *sctx, unsigned cb_target_enabled_4bit)
64bf215546Sopenharmony_ci{
65bf215546Sopenharmony_ci   unsigned num_fragments = sctx->framebuffer.nr_color_samples;
66bf215546Sopenharmony_ci   unsigned sum = 0;
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci   /* Compute the sum of all Bpp. */
69bf215546Sopenharmony_ci   for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
70bf215546Sopenharmony_ci      if (!(cb_target_enabled_4bit & (0xf << (i * 4))))
71bf215546Sopenharmony_ci         continue;
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci      struct si_texture *tex = (struct si_texture *)sctx->framebuffer.state.cbufs[i]->texture;
74bf215546Sopenharmony_ci      sum += tex->surface.bpe;
75bf215546Sopenharmony_ci   }
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   /* Multiply the sum by some function of the number of samples. */
78bf215546Sopenharmony_ci   if (num_fragments >= 2) {
79bf215546Sopenharmony_ci      if (si_get_ps_iter_samples(sctx) >= 2)
80bf215546Sopenharmony_ci         sum *= num_fragments;
81bf215546Sopenharmony_ci      else
82bf215546Sopenharmony_ci         sum *= 2;
83bf215546Sopenharmony_ci   }
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci   static const si_bin_size_subtable table[] = {
86bf215546Sopenharmony_ci      {
87bf215546Sopenharmony_ci         /* One RB / SE */
88bf215546Sopenharmony_ci         {
89bf215546Sopenharmony_ci            /* One shader engine */
90bf215546Sopenharmony_ci            {0, 128, 128},
91bf215546Sopenharmony_ci            {1, 64, 128},
92bf215546Sopenharmony_ci            {2, 32, 128},
93bf215546Sopenharmony_ci            {3, 16, 128},
94bf215546Sopenharmony_ci            {17, 0, 0},
95bf215546Sopenharmony_ci         },
96bf215546Sopenharmony_ci         {
97bf215546Sopenharmony_ci            /* Two shader engines */
98bf215546Sopenharmony_ci            {0, 128, 128},
99bf215546Sopenharmony_ci            {2, 64, 128},
100bf215546Sopenharmony_ci            {3, 32, 128},
101bf215546Sopenharmony_ci            {5, 16, 128},
102bf215546Sopenharmony_ci            {17, 0, 0},
103bf215546Sopenharmony_ci         },
104bf215546Sopenharmony_ci         {
105bf215546Sopenharmony_ci            /* Four shader engines */
106bf215546Sopenharmony_ci            {0, 128, 128},
107bf215546Sopenharmony_ci            {3, 64, 128},
108bf215546Sopenharmony_ci            {5, 16, 128},
109bf215546Sopenharmony_ci            {17, 0, 0},
110bf215546Sopenharmony_ci         },
111bf215546Sopenharmony_ci      },
112bf215546Sopenharmony_ci      {
113bf215546Sopenharmony_ci         /* Two RB / SE */
114bf215546Sopenharmony_ci         {
115bf215546Sopenharmony_ci            /* One shader engine */
116bf215546Sopenharmony_ci            {0, 128, 128},
117bf215546Sopenharmony_ci            {2, 64, 128},
118bf215546Sopenharmony_ci            {3, 32, 128},
119bf215546Sopenharmony_ci            {9, 16, 128},
120bf215546Sopenharmony_ci            {33, 0, 0},
121bf215546Sopenharmony_ci         },
122bf215546Sopenharmony_ci         {
123bf215546Sopenharmony_ci            /* Two shader engines */
124bf215546Sopenharmony_ci            {0, 128, 128},
125bf215546Sopenharmony_ci            {3, 64, 128},
126bf215546Sopenharmony_ci            {5, 32, 128},
127bf215546Sopenharmony_ci            {9, 16, 128},
128bf215546Sopenharmony_ci            {33, 0, 0},
129bf215546Sopenharmony_ci         },
130bf215546Sopenharmony_ci         {
131bf215546Sopenharmony_ci            /* Four shader engines */
132bf215546Sopenharmony_ci            {0, 256, 256},
133bf215546Sopenharmony_ci            {2, 128, 256},
134bf215546Sopenharmony_ci            {3, 128, 128},
135bf215546Sopenharmony_ci            {5, 64, 128},
136bf215546Sopenharmony_ci            {9, 16, 128},
137bf215546Sopenharmony_ci            {33, 0, 0},
138bf215546Sopenharmony_ci         },
139bf215546Sopenharmony_ci      },
140bf215546Sopenharmony_ci      {
141bf215546Sopenharmony_ci         /* Four RB / SE */
142bf215546Sopenharmony_ci         {
143bf215546Sopenharmony_ci            /* One shader engine */
144bf215546Sopenharmony_ci            {0, 128, 256},
145bf215546Sopenharmony_ci            {2, 128, 128},
146bf215546Sopenharmony_ci            {3, 64, 128},
147bf215546Sopenharmony_ci            {5, 32, 128},
148bf215546Sopenharmony_ci            {9, 16, 128},
149bf215546Sopenharmony_ci            {17, 0, 0},
150bf215546Sopenharmony_ci         },
151bf215546Sopenharmony_ci         {
152bf215546Sopenharmony_ci            /* Two shader engines */
153bf215546Sopenharmony_ci            {0, 256, 256},
154bf215546Sopenharmony_ci            {2, 128, 256},
155bf215546Sopenharmony_ci            {3, 128, 128},
156bf215546Sopenharmony_ci            {5, 64, 128},
157bf215546Sopenharmony_ci            {9, 32, 128},
158bf215546Sopenharmony_ci            {17, 16, 128},
159bf215546Sopenharmony_ci            {33, 0, 0},
160bf215546Sopenharmony_ci         },
161bf215546Sopenharmony_ci         {
162bf215546Sopenharmony_ci            /* Four shader engines */
163bf215546Sopenharmony_ci            {0, 256, 512},
164bf215546Sopenharmony_ci            {2, 128, 512},
165bf215546Sopenharmony_ci            {3, 64, 512},
166bf215546Sopenharmony_ci            {5, 32, 512},
167bf215546Sopenharmony_ci            {9, 32, 256},
168bf215546Sopenharmony_ci            {17, 32, 128},
169bf215546Sopenharmony_ci            {33, 0, 0},
170bf215546Sopenharmony_ci         },
171bf215546Sopenharmony_ci      },
172bf215546Sopenharmony_ci   };
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci   return si_find_bin_size(sctx->screen, table, sum);
175bf215546Sopenharmony_ci}
176bf215546Sopenharmony_ci
177bf215546Sopenharmony_cistatic struct uvec2 si_get_depth_bin_size(struct si_context *sctx)
178bf215546Sopenharmony_ci{
179bf215546Sopenharmony_ci   struct si_state_dsa *dsa = sctx->queued.named.dsa;
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci   if (!sctx->framebuffer.state.zsbuf || (!dsa->depth_enabled && !dsa->stencil_enabled)) {
182bf215546Sopenharmony_ci      /* Return the max size. */
183bf215546Sopenharmony_ci      struct uvec2 size = {512, 512};
184bf215546Sopenharmony_ci      return size;
185bf215546Sopenharmony_ci   }
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci   struct si_texture *tex = (struct si_texture *)sctx->framebuffer.state.zsbuf->texture;
188bf215546Sopenharmony_ci   unsigned depth_coeff = dsa->depth_enabled ? 5 : 0;
189bf215546Sopenharmony_ci   unsigned stencil_coeff = tex->surface.has_stencil && dsa->stencil_enabled ? 1 : 0;
190bf215546Sopenharmony_ci   unsigned sum = 4 * (depth_coeff + stencil_coeff) * MAX2(tex->buffer.b.b.nr_samples, 1);
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci   static const si_bin_size_subtable table[] = {
193bf215546Sopenharmony_ci      {
194bf215546Sopenharmony_ci         // One RB / SE
195bf215546Sopenharmony_ci         {
196bf215546Sopenharmony_ci            // One shader engine
197bf215546Sopenharmony_ci            {0, 64, 512},
198bf215546Sopenharmony_ci            {2, 64, 256},
199bf215546Sopenharmony_ci            {4, 64, 128},
200bf215546Sopenharmony_ci            {7, 32, 128},
201bf215546Sopenharmony_ci            {13, 16, 128},
202bf215546Sopenharmony_ci            {49, 0, 0},
203bf215546Sopenharmony_ci         },
204bf215546Sopenharmony_ci         {
205bf215546Sopenharmony_ci            // Two shader engines
206bf215546Sopenharmony_ci            {0, 128, 512},
207bf215546Sopenharmony_ci            {2, 64, 512},
208bf215546Sopenharmony_ci            {4, 64, 256},
209bf215546Sopenharmony_ci            {7, 64, 128},
210bf215546Sopenharmony_ci            {13, 32, 128},
211bf215546Sopenharmony_ci            {25, 16, 128},
212bf215546Sopenharmony_ci            {49, 0, 0},
213bf215546Sopenharmony_ci         },
214bf215546Sopenharmony_ci         {
215bf215546Sopenharmony_ci            // Four shader engines
216bf215546Sopenharmony_ci            {0, 256, 512},
217bf215546Sopenharmony_ci            {2, 128, 512},
218bf215546Sopenharmony_ci            {4, 64, 512},
219bf215546Sopenharmony_ci            {7, 64, 256},
220bf215546Sopenharmony_ci            {13, 64, 128},
221bf215546Sopenharmony_ci            {25, 16, 128},
222bf215546Sopenharmony_ci            {49, 0, 0},
223bf215546Sopenharmony_ci         },
224bf215546Sopenharmony_ci      },
225bf215546Sopenharmony_ci      {
226bf215546Sopenharmony_ci         // Two RB / SE
227bf215546Sopenharmony_ci         {
228bf215546Sopenharmony_ci            // One shader engine
229bf215546Sopenharmony_ci            {0, 128, 512},
230bf215546Sopenharmony_ci            {2, 64, 512},
231bf215546Sopenharmony_ci            {4, 64, 256},
232bf215546Sopenharmony_ci            {7, 64, 128},
233bf215546Sopenharmony_ci            {13, 32, 128},
234bf215546Sopenharmony_ci            {25, 16, 128},
235bf215546Sopenharmony_ci            {97, 0, 0},
236bf215546Sopenharmony_ci         },
237bf215546Sopenharmony_ci         {
238bf215546Sopenharmony_ci            // Two shader engines
239bf215546Sopenharmony_ci            {0, 256, 512},
240bf215546Sopenharmony_ci            {2, 128, 512},
241bf215546Sopenharmony_ci            {4, 64, 512},
242bf215546Sopenharmony_ci            {7, 64, 256},
243bf215546Sopenharmony_ci            {13, 64, 128},
244bf215546Sopenharmony_ci            {25, 32, 128},
245bf215546Sopenharmony_ci            {49, 16, 128},
246bf215546Sopenharmony_ci            {97, 0, 0},
247bf215546Sopenharmony_ci         },
248bf215546Sopenharmony_ci         {
249bf215546Sopenharmony_ci            // Four shader engines
250bf215546Sopenharmony_ci            {0, 512, 512},
251bf215546Sopenharmony_ci            {2, 256, 512},
252bf215546Sopenharmony_ci            {4, 128, 512},
253bf215546Sopenharmony_ci            {7, 64, 512},
254bf215546Sopenharmony_ci            {13, 64, 256},
255bf215546Sopenharmony_ci            {25, 64, 128},
256bf215546Sopenharmony_ci            {49, 16, 128},
257bf215546Sopenharmony_ci            {97, 0, 0},
258bf215546Sopenharmony_ci         },
259bf215546Sopenharmony_ci      },
260bf215546Sopenharmony_ci      {
261bf215546Sopenharmony_ci         // Four RB / SE
262bf215546Sopenharmony_ci         {
263bf215546Sopenharmony_ci            // One shader engine
264bf215546Sopenharmony_ci            {0, 256, 512},
265bf215546Sopenharmony_ci            {2, 128, 512},
266bf215546Sopenharmony_ci            {4, 64, 512},
267bf215546Sopenharmony_ci            {7, 64, 256},
268bf215546Sopenharmony_ci            {13, 64, 128},
269bf215546Sopenharmony_ci            {25, 32, 128},
270bf215546Sopenharmony_ci            {49, 16, 128},
271bf215546Sopenharmony_ci            {193, 0, 0},
272bf215546Sopenharmony_ci         },
273bf215546Sopenharmony_ci         {
274bf215546Sopenharmony_ci            // Two shader engines
275bf215546Sopenharmony_ci            {0, 512, 512},
276bf215546Sopenharmony_ci            {2, 256, 512},
277bf215546Sopenharmony_ci            {4, 128, 512},
278bf215546Sopenharmony_ci            {7, 64, 512},
279bf215546Sopenharmony_ci            {13, 64, 256},
280bf215546Sopenharmony_ci            {25, 64, 128},
281bf215546Sopenharmony_ci            {49, 32, 128},
282bf215546Sopenharmony_ci            {97, 16, 128},
283bf215546Sopenharmony_ci            {193, 0, 0},
284bf215546Sopenharmony_ci         },
285bf215546Sopenharmony_ci         {
286bf215546Sopenharmony_ci            // Four shader engines
287bf215546Sopenharmony_ci            {0, 512, 512},
288bf215546Sopenharmony_ci            {4, 256, 512},
289bf215546Sopenharmony_ci            {7, 128, 512},
290bf215546Sopenharmony_ci            {13, 64, 512},
291bf215546Sopenharmony_ci            {25, 32, 512},
292bf215546Sopenharmony_ci            {49, 32, 256},
293bf215546Sopenharmony_ci            {97, 16, 128},
294bf215546Sopenharmony_ci            {193, 0, 0},
295bf215546Sopenharmony_ci         },
296bf215546Sopenharmony_ci      },
297bf215546Sopenharmony_ci   };
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_ci   return si_find_bin_size(sctx->screen, table, sum);
300bf215546Sopenharmony_ci}
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_cistatic void gfx10_get_bin_sizes(struct si_context *sctx, unsigned cb_target_enabled_4bit,
303bf215546Sopenharmony_ci                                struct uvec2 *color_bin_size, struct uvec2 *depth_bin_size)
304bf215546Sopenharmony_ci{
305bf215546Sopenharmony_ci   const unsigned ZsTagSize = 64;
306bf215546Sopenharmony_ci   const unsigned ZsNumTags = 312;
307bf215546Sopenharmony_ci   const unsigned CcTagSize = 1024;
308bf215546Sopenharmony_ci   const unsigned CcReadTags = 31;
309bf215546Sopenharmony_ci   const unsigned FcTagSize = 256;
310bf215546Sopenharmony_ci   const unsigned FcReadTags = 44;
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci   const unsigned num_rbs = sctx->screen->info.max_render_backends;
313bf215546Sopenharmony_ci   const unsigned num_pipes = MAX2(num_rbs, sctx->screen->info.num_tcc_blocks);
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci   const unsigned depthBinSizeTagPart =
316bf215546Sopenharmony_ci      ((ZsNumTags * num_rbs / num_pipes) * (ZsTagSize * num_pipes));
317bf215546Sopenharmony_ci   const unsigned colorBinSizeTagPart =
318bf215546Sopenharmony_ci      ((CcReadTags * num_rbs / num_pipes) * (CcTagSize * num_pipes));
319bf215546Sopenharmony_ci   const unsigned fmaskBinSizeTagPart =
320bf215546Sopenharmony_ci      ((FcReadTags * num_rbs / num_pipes) * (FcTagSize * num_pipes));
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_ci   const unsigned minBinSizeX = 128;
323bf215546Sopenharmony_ci   const unsigned minBinSizeY = 64;
324bf215546Sopenharmony_ci
325bf215546Sopenharmony_ci   const unsigned num_fragments = sctx->framebuffer.nr_color_samples;
326bf215546Sopenharmony_ci   const unsigned num_samples = sctx->framebuffer.nr_samples;
327bf215546Sopenharmony_ci   const bool ps_iter_sample = si_get_ps_iter_samples(sctx) >= 2;
328bf215546Sopenharmony_ci
329bf215546Sopenharmony_ci   /* Calculate cColor and cFmask(if applicable) */
330bf215546Sopenharmony_ci   unsigned cColor = 0;
331bf215546Sopenharmony_ci   unsigned cFmask = 0;
332bf215546Sopenharmony_ci   bool has_fmask = false;
333bf215546Sopenharmony_ci
334bf215546Sopenharmony_ci   for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
335bf215546Sopenharmony_ci      if (!sctx->framebuffer.state.cbufs[i])
336bf215546Sopenharmony_ci         continue;
337bf215546Sopenharmony_ci
338bf215546Sopenharmony_ci      struct si_texture *tex = (struct si_texture *)sctx->framebuffer.state.cbufs[i]->texture;
339bf215546Sopenharmony_ci      const unsigned mmrt = num_fragments == 1 ? 1 : (ps_iter_sample ? num_fragments : 2);
340bf215546Sopenharmony_ci
341bf215546Sopenharmony_ci      cColor += tex->surface.bpe * mmrt;
342bf215546Sopenharmony_ci      if (num_samples >= 2 /* if FMASK is bound */) {
343bf215546Sopenharmony_ci         const unsigned fragmentsLog2 = util_logbase2(num_fragments);
344bf215546Sopenharmony_ci         const unsigned samplesLog2 = util_logbase2(num_samples);
345bf215546Sopenharmony_ci
346bf215546Sopenharmony_ci         static const unsigned cFmaskMrt[4 /* fragments */][5 /* samples */] = {
347bf215546Sopenharmony_ci            {0, 1, 1, 1, 2}, /* fragments = 1 */
348bf215546Sopenharmony_ci            {0, 1, 1, 2, 4}, /* fragments = 2 */
349bf215546Sopenharmony_ci            {0, 1, 1, 4, 8}, /* fragments = 4 */
350bf215546Sopenharmony_ci            {0, 1, 2, 4, 8}  /* fragments = 8 */
351bf215546Sopenharmony_ci         };
352bf215546Sopenharmony_ci         cFmask += cFmaskMrt[fragmentsLog2][samplesLog2];
353bf215546Sopenharmony_ci         has_fmask = true;
354bf215546Sopenharmony_ci      }
355bf215546Sopenharmony_ci   }
356bf215546Sopenharmony_ci   cColor = MAX2(cColor, 1u);
357bf215546Sopenharmony_ci
358bf215546Sopenharmony_ci   const unsigned colorLog2Pixels = util_logbase2(colorBinSizeTagPart / cColor);
359bf215546Sopenharmony_ci   const unsigned colorBinSizeX = 1 << ((colorLog2Pixels + 1) / 2); /* round up width */
360bf215546Sopenharmony_ci   const unsigned colorBinSizeY = 1 << (colorLog2Pixels / 2);       /* round down height */
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci   unsigned binSizeX = colorBinSizeX;
363bf215546Sopenharmony_ci   unsigned binSizeY = colorBinSizeY;
364bf215546Sopenharmony_ci
365bf215546Sopenharmony_ci   if (has_fmask) {
366bf215546Sopenharmony_ci      cFmask = MAX2(cFmask, 1u);
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_ci      const unsigned fmaskLog2Pixels = util_logbase2(fmaskBinSizeTagPart / cFmask);
369bf215546Sopenharmony_ci      const unsigned fmaskBinSizeX = 1 << ((fmaskLog2Pixels + 1) / 2); /* round up width */
370bf215546Sopenharmony_ci      const unsigned fmaskBinSizeY = 1 << (fmaskLog2Pixels / 2);       /* round down height */
371bf215546Sopenharmony_ci
372bf215546Sopenharmony_ci      /* use the smaller of the Color vs. Fmask bin sizes */
373bf215546Sopenharmony_ci      if (fmaskLog2Pixels < colorLog2Pixels) {
374bf215546Sopenharmony_ci         binSizeX = fmaskBinSizeX;
375bf215546Sopenharmony_ci         binSizeY = fmaskBinSizeY;
376bf215546Sopenharmony_ci      }
377bf215546Sopenharmony_ci   }
378bf215546Sopenharmony_ci
379bf215546Sopenharmony_ci   /* Return size adjusted for minimum bin size */
380bf215546Sopenharmony_ci   color_bin_size->x = MAX2(binSizeX, minBinSizeX);
381bf215546Sopenharmony_ci   color_bin_size->y = MAX2(binSizeY, minBinSizeY);
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci   if (!sctx->framebuffer.state.zsbuf) {
384bf215546Sopenharmony_ci      /* Set to max sizes when no depth buffer is bound. */
385bf215546Sopenharmony_ci      depth_bin_size->x = 512;
386bf215546Sopenharmony_ci      depth_bin_size->y = 512;
387bf215546Sopenharmony_ci   } else {
388bf215546Sopenharmony_ci      struct si_texture *zstex = (struct si_texture *)sctx->framebuffer.state.zsbuf->texture;
389bf215546Sopenharmony_ci      struct si_state_dsa *dsa = sctx->queued.named.dsa;
390bf215546Sopenharmony_ci
391bf215546Sopenharmony_ci      const unsigned cPerDepthSample = dsa->depth_enabled ? 5 : 0;
392bf215546Sopenharmony_ci      const unsigned cPerStencilSample = dsa->stencil_enabled ? 1 : 0;
393bf215546Sopenharmony_ci      const unsigned cDepth =
394bf215546Sopenharmony_ci         (cPerDepthSample + cPerStencilSample) * MAX2(zstex->buffer.b.b.nr_samples, 1);
395bf215546Sopenharmony_ci
396bf215546Sopenharmony_ci      const unsigned depthLog2Pixels = util_logbase2(depthBinSizeTagPart / MAX2(cDepth, 1u));
397bf215546Sopenharmony_ci      unsigned depthBinSizeX = 1 << ((depthLog2Pixels + 1) / 2);
398bf215546Sopenharmony_ci      unsigned depthBinSizeY = 1 << (depthLog2Pixels / 2);
399bf215546Sopenharmony_ci
400bf215546Sopenharmony_ci      depth_bin_size->x = MAX2(depthBinSizeX, minBinSizeX);
401bf215546Sopenharmony_ci      depth_bin_size->y = MAX2(depthBinSizeY, minBinSizeY);
402bf215546Sopenharmony_ci   }
403bf215546Sopenharmony_ci}
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_cistatic void si_emit_dpbb_disable(struct si_context *sctx)
406bf215546Sopenharmony_ci{
407bf215546Sopenharmony_ci   radeon_begin(&sctx->gfx_cs);
408bf215546Sopenharmony_ci
409bf215546Sopenharmony_ci   if (sctx->gfx_level >= GFX10) {
410bf215546Sopenharmony_ci      struct uvec2 bin_size = {};
411bf215546Sopenharmony_ci      struct uvec2 bin_size_extend = {};
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_ci      bin_size.x = 128;
414bf215546Sopenharmony_ci      bin_size.y = sctx->framebuffer.min_bytes_per_pixel <= 4 ? 128 : 64;
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci      if (bin_size.x >= 32)
417bf215546Sopenharmony_ci         bin_size_extend.x = util_logbase2(bin_size.x) - 5;
418bf215546Sopenharmony_ci      if (bin_size.y >= 32)
419bf215546Sopenharmony_ci         bin_size_extend.y = util_logbase2(bin_size.y) - 5;
420bf215546Sopenharmony_ci
421bf215546Sopenharmony_ci      radeon_opt_set_context_reg(
422bf215546Sopenharmony_ci         sctx, R_028C44_PA_SC_BINNER_CNTL_0, SI_TRACKED_PA_SC_BINNER_CNTL_0,
423bf215546Sopenharmony_ci         S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_NEW_SC) |
424bf215546Sopenharmony_ci            S_028C44_BIN_SIZE_X(bin_size.x == 16) | S_028C44_BIN_SIZE_Y(bin_size.y == 16) |
425bf215546Sopenharmony_ci            S_028C44_BIN_SIZE_X_EXTEND(bin_size_extend.x) |
426bf215546Sopenharmony_ci            S_028C44_BIN_SIZE_Y_EXTEND(bin_size_extend.y) | S_028C44_DISABLE_START_OF_PRIM(1) |
427bf215546Sopenharmony_ci            S_028C44_FPOVS_PER_BATCH(63) |
428bf215546Sopenharmony_ci            S_028C44_OPTIMAL_BIN_SELECTION(1) |
429bf215546Sopenharmony_ci            S_028C44_FLUSH_ON_BINNING_TRANSITION(1));
430bf215546Sopenharmony_ci   } else {
431bf215546Sopenharmony_ci      radeon_opt_set_context_reg(
432bf215546Sopenharmony_ci         sctx, R_028C44_PA_SC_BINNER_CNTL_0, SI_TRACKED_PA_SC_BINNER_CNTL_0,
433bf215546Sopenharmony_ci         S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
434bf215546Sopenharmony_ci            S_028C44_DISABLE_START_OF_PRIM(1) |
435bf215546Sopenharmony_ci            S_028C44_FLUSH_ON_BINNING_TRANSITION(sctx->family == CHIP_VEGA12 ||
436bf215546Sopenharmony_ci                                                 sctx->family == CHIP_VEGA20 ||
437bf215546Sopenharmony_ci                                                 sctx->family >= CHIP_RAVEN2));
438bf215546Sopenharmony_ci   }
439bf215546Sopenharmony_ci   radeon_end_update_context_roll(sctx);
440bf215546Sopenharmony_ci}
441bf215546Sopenharmony_ci
442bf215546Sopenharmony_civoid si_emit_dpbb_state(struct si_context *sctx)
443bf215546Sopenharmony_ci{
444bf215546Sopenharmony_ci   struct si_screen *sscreen = sctx->screen;
445bf215546Sopenharmony_ci   struct si_state_blend *blend = sctx->queued.named.blend;
446bf215546Sopenharmony_ci   struct si_state_dsa *dsa = sctx->queued.named.dsa;
447bf215546Sopenharmony_ci   unsigned db_shader_control = sctx->ps_db_shader_control;
448bf215546Sopenharmony_ci
449bf215546Sopenharmony_ci   assert(sctx->gfx_level >= GFX9);
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci   if (!sscreen->dpbb_allowed || sctx->dpbb_force_off ||
452bf215546Sopenharmony_ci       sctx->dpbb_force_off_profile_vs || sctx->dpbb_force_off_profile_ps) {
453bf215546Sopenharmony_ci      si_emit_dpbb_disable(sctx);
454bf215546Sopenharmony_ci      return;
455bf215546Sopenharmony_ci   }
456bf215546Sopenharmony_ci
457bf215546Sopenharmony_ci   bool ps_can_kill =
458bf215546Sopenharmony_ci      G_02880C_KILL_ENABLE(db_shader_control) || G_02880C_MASK_EXPORT_ENABLE(db_shader_control) ||
459bf215546Sopenharmony_ci      G_02880C_COVERAGE_TO_MASK_ENABLE(db_shader_control) || blend->alpha_to_coverage;
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci   bool db_can_reject_z_trivially = !G_02880C_Z_EXPORT_ENABLE(db_shader_control) ||
462bf215546Sopenharmony_ci                                    G_02880C_CONSERVATIVE_Z_EXPORT(db_shader_control) ||
463bf215546Sopenharmony_ci                                    G_02880C_DEPTH_BEFORE_SHADER(db_shader_control);
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci   /* Disable DPBB when it's believed to be inefficient. */
466bf215546Sopenharmony_ci   if (sscreen->info.max_render_backends > 4 && ps_can_kill && db_can_reject_z_trivially &&
467bf215546Sopenharmony_ci       sctx->framebuffer.state.zsbuf && dsa->db_can_write) {
468bf215546Sopenharmony_ci      si_emit_dpbb_disable(sctx);
469bf215546Sopenharmony_ci      return;
470bf215546Sopenharmony_ci   }
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_ci   /* Compute the bin size. */
473bf215546Sopenharmony_ci   /* TODO: We could also look at enabled pixel shader outputs. */
474bf215546Sopenharmony_ci   unsigned cb_target_enabled_4bit =
475bf215546Sopenharmony_ci      sctx->framebuffer.colorbuf_enabled_4bit & blend->cb_target_enabled_4bit;
476bf215546Sopenharmony_ci   struct uvec2 color_bin_size, depth_bin_size;
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_ci   if (sctx->gfx_level >= GFX10) {
479bf215546Sopenharmony_ci      gfx10_get_bin_sizes(sctx, cb_target_enabled_4bit, &color_bin_size, &depth_bin_size);
480bf215546Sopenharmony_ci   } else {
481bf215546Sopenharmony_ci      color_bin_size = si_get_color_bin_size(sctx, cb_target_enabled_4bit);
482bf215546Sopenharmony_ci      depth_bin_size = si_get_depth_bin_size(sctx);
483bf215546Sopenharmony_ci   }
484bf215546Sopenharmony_ci
485bf215546Sopenharmony_ci   unsigned color_area = color_bin_size.x * color_bin_size.y;
486bf215546Sopenharmony_ci   unsigned depth_area = depth_bin_size.x * depth_bin_size.y;
487bf215546Sopenharmony_ci
488bf215546Sopenharmony_ci   struct uvec2 bin_size = color_area < depth_area ? color_bin_size : depth_bin_size;
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci   if (!bin_size.x || !bin_size.y) {
491bf215546Sopenharmony_ci      si_emit_dpbb_disable(sctx);
492bf215546Sopenharmony_ci      return;
493bf215546Sopenharmony_ci   }
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_ci   /* Tunable parameters. */
496bf215546Sopenharmony_ci   /* Allowed range:
497bf215546Sopenharmony_ci    *    gfx9-10: [0, 255] (0 = unlimited)
498bf215546Sopenharmony_ci    *    gfx11: [1, 255] (255 = unlimited)
499bf215546Sopenharmony_ci    */
500bf215546Sopenharmony_ci   unsigned fpovs_per_batch = 63;
501bf215546Sopenharmony_ci
502bf215546Sopenharmony_ci   /* Emit registers. */
503bf215546Sopenharmony_ci   struct uvec2 bin_size_extend = {};
504bf215546Sopenharmony_ci   if (bin_size.x >= 32)
505bf215546Sopenharmony_ci      bin_size_extend.x = util_logbase2(bin_size.x) - 5;
506bf215546Sopenharmony_ci   if (bin_size.y >= 32)
507bf215546Sopenharmony_ci      bin_size_extend.y = util_logbase2(bin_size.y) - 5;
508bf215546Sopenharmony_ci
509bf215546Sopenharmony_ci   radeon_begin(&sctx->gfx_cs);
510bf215546Sopenharmony_ci   radeon_opt_set_context_reg(
511bf215546Sopenharmony_ci      sctx, R_028C44_PA_SC_BINNER_CNTL_0, SI_TRACKED_PA_SC_BINNER_CNTL_0,
512bf215546Sopenharmony_ci      S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | S_028C44_BIN_SIZE_X(bin_size.x == 16) |
513bf215546Sopenharmony_ci         S_028C44_BIN_SIZE_Y(bin_size.y == 16) | S_028C44_BIN_SIZE_X_EXTEND(bin_size_extend.x) |
514bf215546Sopenharmony_ci         S_028C44_BIN_SIZE_Y_EXTEND(bin_size_extend.y) |
515bf215546Sopenharmony_ci         S_028C44_CONTEXT_STATES_PER_BIN(sscreen->pbb_context_states_per_bin - 1) |
516bf215546Sopenharmony_ci         S_028C44_PERSISTENT_STATES_PER_BIN(sscreen->pbb_persistent_states_per_bin - 1) |
517bf215546Sopenharmony_ci         S_028C44_DISABLE_START_OF_PRIM(1) |
518bf215546Sopenharmony_ci         S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) | S_028C44_OPTIMAL_BIN_SELECTION(1) |
519bf215546Sopenharmony_ci         S_028C44_FLUSH_ON_BINNING_TRANSITION(sctx->family == CHIP_VEGA12 ||
520bf215546Sopenharmony_ci                                              sctx->family == CHIP_VEGA20 ||
521bf215546Sopenharmony_ci                                              sctx->family >= CHIP_RAVEN2));
522bf215546Sopenharmony_ci   radeon_end_update_context_roll(sctx);
523bf215546Sopenharmony_ci}
524