1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3bf215546Sopenharmony_ci * Copyright © 2018-2019 Google, Inc.
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
14bf215546Sopenharmony_ci * Software.
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22bf215546Sopenharmony_ci * SOFTWARE.
23bf215546Sopenharmony_ci *
24bf215546Sopenharmony_ci * Authors:
25bf215546Sopenharmony_ci *    Rob Clark <robclark@freedesktop.org>
26bf215546Sopenharmony_ci */
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#include <stdio.h>
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "freedreno_layout.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_cistatic bool
33bf215546Sopenharmony_ciis_r8g8(const struct fdl_layout *layout)
34bf215546Sopenharmony_ci{
35bf215546Sopenharmony_ci   return layout->cpp == 2 &&
36bf215546Sopenharmony_ci          util_format_get_nr_components(layout->format) == 2;
37bf215546Sopenharmony_ci}
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_civoid
40bf215546Sopenharmony_cifdl6_get_ubwc_blockwidth(const struct fdl_layout *layout,
41bf215546Sopenharmony_ci                         uint32_t *blockwidth, uint32_t *blockheight)
42bf215546Sopenharmony_ci{
43bf215546Sopenharmony_ci   static const struct {
44bf215546Sopenharmony_ci      uint8_t width;
45bf215546Sopenharmony_ci      uint8_t height;
46bf215546Sopenharmony_ci   } blocksize[] = {
47bf215546Sopenharmony_ci      { 16, 4 }, /* cpp = 1 */
48bf215546Sopenharmony_ci      { 16, 4 }, /* cpp = 2 */
49bf215546Sopenharmony_ci      { 16, 4 }, /* cpp = 4 */
50bf215546Sopenharmony_ci      {  8, 4 }, /* cpp = 8 */
51bf215546Sopenharmony_ci      {  4, 4 }, /* cpp = 16 */
52bf215546Sopenharmony_ci      {  4, 2 }, /* cpp = 32 */
53bf215546Sopenharmony_ci      {  0, 0 }, /* cpp = 64 (TODO) */
54bf215546Sopenharmony_ci   };
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci   /* special case for r8g8: */
57bf215546Sopenharmony_ci   if (is_r8g8(layout)) {
58bf215546Sopenharmony_ci      *blockwidth = 16;
59bf215546Sopenharmony_ci      *blockheight = 8;
60bf215546Sopenharmony_ci      return;
61bf215546Sopenharmony_ci   } else if (layout->format == PIPE_FORMAT_Y8_UNORM) {
62bf215546Sopenharmony_ci      *blockwidth = 32;
63bf215546Sopenharmony_ci      *blockheight = 8;
64bf215546Sopenharmony_ci      return;
65bf215546Sopenharmony_ci   }
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_ci   uint32_t cpp = fdl_cpp_shift(layout);
68bf215546Sopenharmony_ci   assert(cpp < ARRAY_SIZE(blocksize));
69bf215546Sopenharmony_ci   *blockwidth = blocksize[cpp].width;
70bf215546Sopenharmony_ci   *blockheight = blocksize[cpp].height;
71bf215546Sopenharmony_ci}
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_cistatic void
74bf215546Sopenharmony_cifdl6_tile_alignment(struct fdl_layout *layout, uint32_t *heightalign)
75bf215546Sopenharmony_ci{
76bf215546Sopenharmony_ci   layout->pitchalign = fdl_cpp_shift(layout);
77bf215546Sopenharmony_ci   *heightalign = 16;
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci   if (is_r8g8(layout) || layout->cpp == 1) {
80bf215546Sopenharmony_ci      layout->pitchalign = 1;
81bf215546Sopenharmony_ci      *heightalign = 32;
82bf215546Sopenharmony_ci   } else if (layout->cpp == 2) {
83bf215546Sopenharmony_ci      layout->pitchalign = 2;
84bf215546Sopenharmony_ci   }
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci   /* Empirical evidence suggests that images with UBWC could have much
87bf215546Sopenharmony_ci    * looser alignment requirements, however the validity of alignment is
88bf215546Sopenharmony_ci    * heavily undertested and the "officially" supported alignment is 4096b.
89bf215546Sopenharmony_ci    */
90bf215546Sopenharmony_ci   if (layout->ubwc)
91bf215546Sopenharmony_ci      layout->base_align = 4096;
92bf215546Sopenharmony_ci   else if (layout->cpp == 1)
93bf215546Sopenharmony_ci      layout->base_align = 64;
94bf215546Sopenharmony_ci   else if (layout->cpp == 2)
95bf215546Sopenharmony_ci      layout->base_align = 128;
96bf215546Sopenharmony_ci   else
97bf215546Sopenharmony_ci      layout->base_align = 256;
98bf215546Sopenharmony_ci}
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_ci/* NOTE: good way to test this is:  (for example)
101bf215546Sopenharmony_ci *  piglit/bin/texelFetch fs sampler3D 100x100x8
102bf215546Sopenharmony_ci */
103bf215546Sopenharmony_cibool
104bf215546Sopenharmony_cifdl6_layout(struct fdl_layout *layout, enum pipe_format format,
105bf215546Sopenharmony_ci            uint32_t nr_samples, uint32_t width0, uint32_t height0,
106bf215546Sopenharmony_ci            uint32_t depth0, uint32_t mip_levels, uint32_t array_size,
107bf215546Sopenharmony_ci            bool is_3d, struct fdl_explicit_layout *explicit_layout)
108bf215546Sopenharmony_ci{
109bf215546Sopenharmony_ci   uint32_t offset = 0, heightalign;
110bf215546Sopenharmony_ci   uint32_t ubwc_blockwidth, ubwc_blockheight;
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci   assert(nr_samples > 0);
113bf215546Sopenharmony_ci   layout->width0 = width0;
114bf215546Sopenharmony_ci   layout->height0 = height0;
115bf215546Sopenharmony_ci   layout->depth0 = depth0;
116bf215546Sopenharmony_ci   layout->mip_levels = mip_levels;
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci   layout->cpp = util_format_get_blocksize(format);
119bf215546Sopenharmony_ci   layout->cpp *= nr_samples;
120bf215546Sopenharmony_ci   layout->cpp_shift = ffs(layout->cpp) - 1;
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_ci   layout->format = format;
123bf215546Sopenharmony_ci   layout->nr_samples = nr_samples;
124bf215546Sopenharmony_ci   layout->layer_first = !is_3d;
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci   fdl6_get_ubwc_blockwidth(layout, &ubwc_blockwidth, &ubwc_blockheight);
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci   if (depth0 > 1 || ubwc_blockwidth == 0)
129bf215546Sopenharmony_ci      layout->ubwc = false;
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci   if (layout->ubwc || util_format_is_depth_or_stencil(format))
132bf215546Sopenharmony_ci      layout->tile_all = true;
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci   /* in layer_first layout, the level (slice) contains just one
135bf215546Sopenharmony_ci    * layer (since in fact the layer contains the slices)
136bf215546Sopenharmony_ci    */
137bf215546Sopenharmony_ci   uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci   /* note: for tiled+noubwc layouts, we can use a lower pitchalign
140bf215546Sopenharmony_ci    * which will affect the linear levels only, (the hardware will still
141bf215546Sopenharmony_ci    * expect the tiled alignment on the tiled levels)
142bf215546Sopenharmony_ci    */
143bf215546Sopenharmony_ci   if (layout->tile_mode) {
144bf215546Sopenharmony_ci      fdl6_tile_alignment(layout, &heightalign);
145bf215546Sopenharmony_ci   } else {
146bf215546Sopenharmony_ci      layout->base_align = 64;
147bf215546Sopenharmony_ci      layout->pitchalign = 0;
148bf215546Sopenharmony_ci      /* align pitch to at least 16 pixels:
149bf215546Sopenharmony_ci       * both turnip and galium assume there is enough alignment for 16x4
150bf215546Sopenharmony_ci       * aligned gmem store. turnip can use CP_BLIT to work without this
151bf215546Sopenharmony_ci       * extra alignment, but gallium driver doesn't implement it yet
152bf215546Sopenharmony_ci       */
153bf215546Sopenharmony_ci      if (layout->cpp > 4)
154bf215546Sopenharmony_ci         layout->pitchalign = fdl_cpp_shift(layout) - 2;
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci      /* when possible, use a bit more alignment than necessary
157bf215546Sopenharmony_ci       * presumably this is better for performance?
158bf215546Sopenharmony_ci       */
159bf215546Sopenharmony_ci      if (!explicit_layout)
160bf215546Sopenharmony_ci         layout->pitchalign = fdl_cpp_shift(layout);
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci      /* not used, avoid "may be used uninitialized" warning */
163bf215546Sopenharmony_ci      heightalign = 1;
164bf215546Sopenharmony_ci   }
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci   fdl_set_pitchalign(layout, layout->pitchalign + 6);
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci   if (explicit_layout) {
169bf215546Sopenharmony_ci      offset = explicit_layout->offset;
170bf215546Sopenharmony_ci      layout->pitch0 = explicit_layout->pitch;
171bf215546Sopenharmony_ci      if (align(layout->pitch0, 1 << layout->pitchalign) != layout->pitch0)
172bf215546Sopenharmony_ci         return false;
173bf215546Sopenharmony_ci   }
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ci   uint32_t ubwc_width0 = width0;
176bf215546Sopenharmony_ci   uint32_t ubwc_height0 = height0;
177bf215546Sopenharmony_ci   uint32_t ubwc_tile_height_alignment = RGB_TILE_HEIGHT_ALIGNMENT;
178bf215546Sopenharmony_ci   if (mip_levels > 1) {
179bf215546Sopenharmony_ci      /* With mipmapping enabled, UBWC layout is power-of-two sized,
180bf215546Sopenharmony_ci       * specified in log2 width/height in the descriptors.  The height
181bf215546Sopenharmony_ci       * alignment is 64 for mipmapping, but for buffer sharing (always
182bf215546Sopenharmony_ci       * single level) other participants expect 16.
183bf215546Sopenharmony_ci       */
184bf215546Sopenharmony_ci      ubwc_width0 = util_next_power_of_two(width0);
185bf215546Sopenharmony_ci      ubwc_height0 = util_next_power_of_two(height0);
186bf215546Sopenharmony_ci      ubwc_tile_height_alignment = 64;
187bf215546Sopenharmony_ci   }
188bf215546Sopenharmony_ci   layout->ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ubwc_blockwidth),
189bf215546Sopenharmony_ci                               RGB_TILE_WIDTH_ALIGNMENT);
190bf215546Sopenharmony_ci   ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0, ubwc_blockheight),
191bf215546Sopenharmony_ci                        ubwc_tile_height_alignment);
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci   uint32_t min_3d_layer_size = 0;
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci   for (uint32_t level = 0; level < mip_levels; level++) {
196bf215546Sopenharmony_ci      uint32_t depth = u_minify(depth0, level);
197bf215546Sopenharmony_ci      struct fdl_slice *slice = &layout->slices[level];
198bf215546Sopenharmony_ci      struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
199bf215546Sopenharmony_ci      uint32_t tile_mode = fdl_tile_mode(layout, level);
200bf215546Sopenharmony_ci      uint32_t pitch = fdl_pitch(layout, level);
201bf215546Sopenharmony_ci      uint32_t height = u_minify(height0, level);
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci      uint32_t nblocksy = util_format_get_nblocksy(format, height);
204bf215546Sopenharmony_ci      if (tile_mode)
205bf215546Sopenharmony_ci         nblocksy = align(nblocksy, heightalign);
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_ci      /* The blits used for mem<->gmem work at a granularity of
208bf215546Sopenharmony_ci       * 16x4, which can cause faults due to over-fetch on the
209bf215546Sopenharmony_ci       * last level.  The simple solution is to over-allocate a
210bf215546Sopenharmony_ci       * bit the last level to ensure any over-fetch is harmless.
211bf215546Sopenharmony_ci       * The pitch is already sufficiently aligned, but height
212bf215546Sopenharmony_ci       * may not be. note this only matters if last level is linear
213bf215546Sopenharmony_ci       */
214bf215546Sopenharmony_ci      if (level == mip_levels - 1)
215bf215546Sopenharmony_ci         nblocksy = align(nblocksy, 4);
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci      slice->offset = offset + layout->size;
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci      /* 1d array and 2d array textures must all have the same layer size for
220bf215546Sopenharmony_ci       * each miplevel on a6xx.  For 3D, the layer size automatically reduces
221bf215546Sopenharmony_ci       * until the value we specify in TEX_CONST_3_MIN_LAYERSZ, which is used to
222bf215546Sopenharmony_ci       * make sure that we follow alignment requirements after minification.
223bf215546Sopenharmony_ci       */
224bf215546Sopenharmony_ci      if (is_3d) {
225bf215546Sopenharmony_ci         if (level == 0) {
226bf215546Sopenharmony_ci            slice->size0 = align(nblocksy * pitch, 4096);
227bf215546Sopenharmony_ci         } else if (min_3d_layer_size) {
228bf215546Sopenharmony_ci            slice->size0 = min_3d_layer_size;
229bf215546Sopenharmony_ci         } else {
230bf215546Sopenharmony_ci            /* Note: level * 2 for minifying in both X and Y. */
231bf215546Sopenharmony_ci            slice->size0 = u_minify(layout->slices[0].size0, level * 2);
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci            /* If this level didn't reduce the pitch by half, then fix it up,
234bf215546Sopenharmony_ci             * and this is the end of layer size reduction.
235bf215546Sopenharmony_ci             */
236bf215546Sopenharmony_ci            uint32_t pitch = fdl_pitch(layout, level);
237bf215546Sopenharmony_ci            if (pitch != fdl_pitch(layout, level - 1) / 2)
238bf215546Sopenharmony_ci               min_3d_layer_size = slice->size0 = nblocksy * pitch;
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci            /* If the height is now less than the alignment requirement, then
241bf215546Sopenharmony_ci             * scale it up and let this be the minimum layer size.
242bf215546Sopenharmony_ci             */
243bf215546Sopenharmony_ci            if (tile_mode && util_format_get_nblocksy(format, height) < heightalign)
244bf215546Sopenharmony_ci               min_3d_layer_size = slice->size0 = nblocksy * pitch;
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci            /* If the size would become un-page-aligned, stay aligned instead. */
247bf215546Sopenharmony_ci            if (align(slice->size0, 4096) != slice->size0)
248bf215546Sopenharmony_ci               min_3d_layer_size = slice->size0 = align(slice->size0, 4096);
249bf215546Sopenharmony_ci         }
250bf215546Sopenharmony_ci      } else {
251bf215546Sopenharmony_ci         slice->size0 = nblocksy * pitch;
252bf215546Sopenharmony_ci      }
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci      layout->size += slice->size0 * depth * layers_in_level;
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_ci      if (layout->ubwc) {
257bf215546Sopenharmony_ci         /* with UBWC every level is aligned to 4K */
258bf215546Sopenharmony_ci         layout->size = align(layout->size, 4096);
259bf215546Sopenharmony_ci
260bf215546Sopenharmony_ci         uint32_t meta_pitch = fdl_ubwc_pitch(layout, level);
261bf215546Sopenharmony_ci         uint32_t meta_height =
262bf215546Sopenharmony_ci            align(u_minify(ubwc_height0, level), ubwc_tile_height_alignment);
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci         ubwc_slice->size0 =
265bf215546Sopenharmony_ci            align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
266bf215546Sopenharmony_ci         ubwc_slice->offset = offset + layout->ubwc_layer_size;
267bf215546Sopenharmony_ci         layout->ubwc_layer_size += ubwc_slice->size0;
268bf215546Sopenharmony_ci      }
269bf215546Sopenharmony_ci   }
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci   if (layout->layer_first) {
272bf215546Sopenharmony_ci      layout->layer_size = align(layout->size, 4096);
273bf215546Sopenharmony_ci      layout->size = layout->layer_size * array_size;
274bf215546Sopenharmony_ci   }
275bf215546Sopenharmony_ci
276bf215546Sopenharmony_ci   /* Place the UBWC slices before the uncompressed slices, because the
277bf215546Sopenharmony_ci    * kernel expects UBWC to be at the start of the buffer.  In the HW, we
278bf215546Sopenharmony_ci    * get to program the UBWC and non-UBWC offset/strides
279bf215546Sopenharmony_ci    * independently.
280bf215546Sopenharmony_ci    */
281bf215546Sopenharmony_ci   if (layout->ubwc) {
282bf215546Sopenharmony_ci      for (uint32_t level = 0; level < mip_levels; level++)
283bf215546Sopenharmony_ci         layout->slices[level].offset += layout->ubwc_layer_size * array_size;
284bf215546Sopenharmony_ci      layout->size += layout->ubwc_layer_size * array_size;
285bf215546Sopenharmony_ci   }
286bf215546Sopenharmony_ci
287bf215546Sopenharmony_ci   /* include explicit offset in size */
288bf215546Sopenharmony_ci   layout->size += offset;
289bf215546Sopenharmony_ci
290bf215546Sopenharmony_ci   return true;
291bf215546Sopenharmony_ci}
292