1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2022 Collabora, Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "pan_tiling.h"
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci#include <gtest/gtest.h>
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci/*
29bf215546Sopenharmony_ci * Reference tiling algorithm, written for clarity rather than performance. See
30bf215546Sopenharmony_ci * docs/drivers/panfrost.rst for details on the format.
31bf215546Sopenharmony_ci */
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_cistatic unsigned
34bf215546Sopenharmony_ciu_order(unsigned x, unsigned y)
35bf215546Sopenharmony_ci{
36bf215546Sopenharmony_ci   assert(x < 16 && y < 16);
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_ci   unsigned xy0 = ((x ^ y) & 1) ? 1 : 0;
39bf215546Sopenharmony_ci   unsigned xy1 = ((x ^ y) & 2) ? 1 : 0;
40bf215546Sopenharmony_ci   unsigned xy2 = ((x ^ y) & 4) ? 1 : 0;
41bf215546Sopenharmony_ci   unsigned xy3 = ((x ^ y) & 8) ? 1 : 0;
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci   unsigned y0 = (y & 1) ? 1 : 0;
44bf215546Sopenharmony_ci   unsigned y1 = (y & 2) ? 1 : 0;
45bf215546Sopenharmony_ci   unsigned y2 = (y & 4) ? 1 : 0;
46bf215546Sopenharmony_ci   unsigned y3 = (y & 8) ? 1 : 0;
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci   return (xy0 << 0) | (y0 << 1) | (xy1 << 2) | (y1 << 3) |
49bf215546Sopenharmony_ci          (xy2 << 4) | (y2 << 5) | (xy3 << 6) | (y3 << 7);
50bf215546Sopenharmony_ci}
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci/* x/y are in blocks */
53bf215546Sopenharmony_cistatic unsigned
54bf215546Sopenharmony_citiled_offset(unsigned x, unsigned y, unsigned stride, unsigned tilesize, unsigned blocksize)
55bf215546Sopenharmony_ci{
56bf215546Sopenharmony_ci   unsigned tile_x = x / tilesize;
57bf215546Sopenharmony_ci   unsigned tile_y = y / tilesize;
58bf215546Sopenharmony_ci
59bf215546Sopenharmony_ci   unsigned x_in_tile = x % tilesize;
60bf215546Sopenharmony_ci   unsigned y_in_tile = y % tilesize;
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_ci   unsigned index_in_tile = u_order(x_in_tile, y_in_tile);
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci   unsigned row_offset = tile_y * stride;
65bf215546Sopenharmony_ci   unsigned col_offset = (tile_x * tilesize * tilesize) * blocksize;
66bf215546Sopenharmony_ci   unsigned block_offset = index_in_tile * blocksize;
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci   return row_offset + col_offset + block_offset;
69bf215546Sopenharmony_ci}
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_cistatic unsigned
72bf215546Sopenharmony_cilinear_offset(unsigned x, unsigned y, unsigned stride, unsigned blocksize)
73bf215546Sopenharmony_ci{
74bf215546Sopenharmony_ci   return (stride * y) + (x * blocksize);
75bf215546Sopenharmony_ci}
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_cistatic void
78bf215546Sopenharmony_ciref_access_tiled(void *dst, const void *src,
79bf215546Sopenharmony_ci                 unsigned region_x, unsigned region_y,
80bf215546Sopenharmony_ci                 unsigned w, unsigned h,
81bf215546Sopenharmony_ci                 uint32_t dst_stride,
82bf215546Sopenharmony_ci                 uint32_t src_stride,
83bf215546Sopenharmony_ci                 enum pipe_format format,
84bf215546Sopenharmony_ci                 bool dst_is_tiled)
85bf215546Sopenharmony_ci{
86bf215546Sopenharmony_ci   const struct util_format_description *desc = util_format_description(format);;
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_ci   unsigned tilesize = (desc->block.width > 1) ? 4 : 16;
89bf215546Sopenharmony_ci   unsigned blocksize = (desc->block.bits / 8);
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_ci   unsigned w_block = w / desc->block.width;
92bf215546Sopenharmony_ci   unsigned h_block = h / desc->block.height;
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_ci   unsigned region_x_block = region_x / desc->block.width;
95bf215546Sopenharmony_ci   unsigned region_y_block = region_y / desc->block.height;
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_ci   for (unsigned linear_y_block = 0; linear_y_block < h_block; ++linear_y_block) {
98bf215546Sopenharmony_ci      for (unsigned linear_x_block = 0; linear_x_block < w_block; ++linear_x_block) {
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_ci         unsigned tiled_x_block = region_x_block + linear_x_block;
101bf215546Sopenharmony_ci         unsigned tiled_y_block = region_y_block + linear_y_block;
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci         unsigned dst_offset, src_offset;
104bf215546Sopenharmony_ci
105bf215546Sopenharmony_ci         if (dst_is_tiled) {
106bf215546Sopenharmony_ci            dst_offset = tiled_offset(tiled_x_block, tiled_y_block, dst_stride, tilesize, blocksize);
107bf215546Sopenharmony_ci            src_offset = linear_offset(linear_x_block, linear_y_block, src_stride, blocksize);
108bf215546Sopenharmony_ci         } else {
109bf215546Sopenharmony_ci            dst_offset = linear_offset(linear_x_block, linear_y_block, dst_stride, blocksize);
110bf215546Sopenharmony_ci            src_offset = tiled_offset(tiled_x_block, tiled_y_block, src_stride, tilesize, blocksize);
111bf215546Sopenharmony_ci         }
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci         memcpy((uint8_t *) dst + dst_offset,
114bf215546Sopenharmony_ci                (const uint8_t *) src + src_offset,
115bf215546Sopenharmony_ci                desc->block.bits / 8);
116bf215546Sopenharmony_ci      }
117bf215546Sopenharmony_ci   }
118bf215546Sopenharmony_ci}
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci/*
121bf215546Sopenharmony_ci * Helper to build test cases for tiled texture access. This test suite compares
122bf215546Sopenharmony_ci * the above reference tiling algorithm to the optimized algorithm used in
123bf215546Sopenharmony_ci * production.
124bf215546Sopenharmony_ci */
125bf215546Sopenharmony_cistatic void
126bf215546Sopenharmony_citest(unsigned width, unsigned height, unsigned rx, unsigned ry,
127bf215546Sopenharmony_ci     unsigned rw, unsigned rh, unsigned linear_stride,
128bf215546Sopenharmony_ci     enum pipe_format format, bool store)
129bf215546Sopenharmony_ci{
130bf215546Sopenharmony_ci   unsigned bpp = util_format_get_blocksize(format);
131bf215546Sopenharmony_ci   unsigned tile_height = util_format_is_compressed(format) ? 4 : 16;
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci   unsigned tiled_width  = ALIGN_POT(width, 16);
134bf215546Sopenharmony_ci   unsigned tiled_height = ALIGN_POT(height, 16);
135bf215546Sopenharmony_ci   unsigned tiled_stride = tiled_width * tile_height * bpp;
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci   unsigned dst_stride = store ? tiled_stride : linear_stride;
138bf215546Sopenharmony_ci   unsigned src_stride = store ? linear_stride : tiled_stride;
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci   void *tiled = calloc(bpp, tiled_width * tiled_height);
141bf215546Sopenharmony_ci   void *linear = calloc(bpp, rw * linear_stride);
142bf215546Sopenharmony_ci   void *ref = calloc(bpp, store ? (tiled_width * tiled_height) : (rw * linear_stride));
143bf215546Sopenharmony_ci
144bf215546Sopenharmony_ci   if (store) {
145bf215546Sopenharmony_ci      for (unsigned i = 0; i < bpp * rw * linear_stride; ++i) {
146bf215546Sopenharmony_ci         ((uint8_t *) linear)[i] = (i & 0xFF);
147bf215546Sopenharmony_ci      }
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci      panfrost_store_tiled_image(tiled, linear, rx, ry, rw, rh,
150bf215546Sopenharmony_ci                                 dst_stride, src_stride, format);
151bf215546Sopenharmony_ci   } else {
152bf215546Sopenharmony_ci      for (unsigned i = 0; i < bpp * tiled_width * tiled_height; ++i) {
153bf215546Sopenharmony_ci         ((uint8_t *) tiled)[i] = (i & 0xFF);
154bf215546Sopenharmony_ci      }
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci      panfrost_load_tiled_image(linear, tiled, rx, ry, rw, rh,
157bf215546Sopenharmony_ci                                dst_stride, src_stride, format);
158bf215546Sopenharmony_ci   }
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_ci   ref_access_tiled(ref, store ? linear : tiled, rx, ry, rw, rh,
161bf215546Sopenharmony_ci                    dst_stride, src_stride, format, store);
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci   if (store)
164bf215546Sopenharmony_ci      EXPECT_EQ(memcmp(ref, tiled, bpp * tiled_width * tiled_height), 0);
165bf215546Sopenharmony_ci   else
166bf215546Sopenharmony_ci      EXPECT_EQ(memcmp(ref, linear, bpp * rw * linear_stride), 0);
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci   free(ref);
169bf215546Sopenharmony_ci   free(tiled);
170bf215546Sopenharmony_ci   free(linear);
171bf215546Sopenharmony_ci}
172bf215546Sopenharmony_ci
173bf215546Sopenharmony_cistatic void
174bf215546Sopenharmony_citest_ldst(unsigned width, unsigned height, unsigned rx, unsigned ry,
175bf215546Sopenharmony_ci          unsigned rw, unsigned rh, unsigned linear_stride,
176bf215546Sopenharmony_ci          enum pipe_format format)
177bf215546Sopenharmony_ci{
178bf215546Sopenharmony_ci   test(width, height, rx, ry, rw, rh, linear_stride, format, true);
179bf215546Sopenharmony_ci   test(width, height, rx, ry, rw, rh, linear_stride, format, false);
180bf215546Sopenharmony_ci}
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ciTEST(UInterleavedTiling, RegulatFormats)
183bf215546Sopenharmony_ci{
184bf215546Sopenharmony_ci   /* 8-bit */
185bf215546Sopenharmony_ci   test_ldst(23, 17, 0, 0, 23, 17, 23, PIPE_FORMAT_R8_UINT);
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci   /* 16-bit */
188bf215546Sopenharmony_ci   test_ldst(23, 17, 0, 0, 23, 17, 23 * 2, PIPE_FORMAT_R8G8_UINT);
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci   /* 24-bit */
191bf215546Sopenharmony_ci   test_ldst(23, 17, 0, 0, 23, 17, 23 * 3, PIPE_FORMAT_R8G8B8_UINT);
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci   /* 32-bit */
194bf215546Sopenharmony_ci   test_ldst(23, 17, 0, 0, 23, 17, 23 * 4, PIPE_FORMAT_R32_UINT);
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci   /* 48-bit */
197bf215546Sopenharmony_ci   test_ldst(23, 17, 0, 0, 23, 17, 23 * 6, PIPE_FORMAT_R16G16B16_UINT);
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci   /* 64-bit */
200bf215546Sopenharmony_ci   test_ldst(23, 17, 0, 0, 23, 17, 23 * 8, PIPE_FORMAT_R32G32_UINT);
201bf215546Sopenharmony_ci
202bf215546Sopenharmony_ci   /* 96-bit */
203bf215546Sopenharmony_ci   test_ldst(23, 17, 0, 0, 23, 17, 23 * 12, PIPE_FORMAT_R32G32B32_UINT);
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci   /* 128-bit */
206bf215546Sopenharmony_ci   test_ldst(23, 17, 0, 0, 23, 17, 23 * 16, PIPE_FORMAT_R32G32B32A32_UINT);
207bf215546Sopenharmony_ci}
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ciTEST(UInterleavedTiling, UnpackedStrides)
210bf215546Sopenharmony_ci{
211bf215546Sopenharmony_ci   test_ldst(23, 17, 0, 0, 23, 17, 369 * 1, PIPE_FORMAT_R8_SINT);
212bf215546Sopenharmony_ci   test_ldst(23, 17, 0, 0, 23, 17, 369 * 2, PIPE_FORMAT_R8G8_SINT);
213bf215546Sopenharmony_ci   test_ldst(23, 17, 0, 0, 23, 17, 369 * 3, PIPE_FORMAT_R8G8B8_SINT);
214bf215546Sopenharmony_ci   test_ldst(23, 17, 0, 0, 23, 17, 369 * 4, PIPE_FORMAT_R32_SINT);
215bf215546Sopenharmony_ci   test_ldst(23, 17, 0, 0, 23, 17, 369 * 6, PIPE_FORMAT_R16G16B16_SINT);
216bf215546Sopenharmony_ci   test_ldst(23, 17, 0, 0, 23, 17, 369 * 8, PIPE_FORMAT_R32G32_SINT);
217bf215546Sopenharmony_ci   test_ldst(23, 17, 0, 0, 23, 17, 369 * 12, PIPE_FORMAT_R32G32B32_SINT);
218bf215546Sopenharmony_ci   test_ldst(23, 17, 0, 0, 23, 17, 369 * 16, PIPE_FORMAT_R32G32B32A32_SINT);
219bf215546Sopenharmony_ci}
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ciTEST(UInterleavedTiling, PartialAccess)
222bf215546Sopenharmony_ci{
223bf215546Sopenharmony_ci   test_ldst(23, 17, 3, 1, 13, 7, 369 * 1, PIPE_FORMAT_R8_UNORM);
224bf215546Sopenharmony_ci   test_ldst(23, 17, 3, 1, 13, 7, 369 * 2, PIPE_FORMAT_R8G8_UNORM);
225bf215546Sopenharmony_ci   test_ldst(23, 17, 3, 1, 13, 7, 369 * 3, PIPE_FORMAT_R8G8B8_UNORM);
226bf215546Sopenharmony_ci   test_ldst(23, 17, 3, 1, 13, 7, 369 * 4, PIPE_FORMAT_R32_UNORM);
227bf215546Sopenharmony_ci   test_ldst(23, 17, 3, 1, 13, 7, 369 * 6, PIPE_FORMAT_R16G16B16_UNORM);
228bf215546Sopenharmony_ci   test_ldst(23, 17, 3, 1, 13, 7, 369 * 8, PIPE_FORMAT_R32G32_UNORM);
229bf215546Sopenharmony_ci   test_ldst(23, 17, 3, 1, 13, 7, 369 * 12, PIPE_FORMAT_R32G32B32_UNORM);
230bf215546Sopenharmony_ci   test_ldst(23, 17, 3, 1, 13, 7, 369 * 16, PIPE_FORMAT_R32G32B32A32_UNORM);
231bf215546Sopenharmony_ci}
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ciTEST(UInterleavedTiling, ETC)
234bf215546Sopenharmony_ci{
235bf215546Sopenharmony_ci   /* Block alignment assumed */
236bf215546Sopenharmony_ci   test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC1_RGB8);
237bf215546Sopenharmony_ci   test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC2_RGB8A1);
238bf215546Sopenharmony_ci   test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC2_RG11_SNORM);
239bf215546Sopenharmony_ci}
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_ciTEST(UInterleavedTiling, PartialETC)
242bf215546Sopenharmony_ci{
243bf215546Sopenharmony_ci   /* Block alignment assumed */
244bf215546Sopenharmony_ci   test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC1_RGB8);
245bf215546Sopenharmony_ci   test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC2_RGB8A1);
246bf215546Sopenharmony_ci   test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC2_RG11_SNORM);
247bf215546Sopenharmony_ci}
248bf215546Sopenharmony_ci
249bf215546Sopenharmony_ciTEST(UInterleavedTiling, DXT)
250bf215546Sopenharmony_ci{
251bf215546Sopenharmony_ci   /* Block alignment assumed */
252bf215546Sopenharmony_ci   test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT1_RGB);
253bf215546Sopenharmony_ci   test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT3_RGBA);
254bf215546Sopenharmony_ci   test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT5_RGBA);
255bf215546Sopenharmony_ci}
256bf215546Sopenharmony_ci
257bf215546Sopenharmony_ciTEST(UInterleavedTiling, PartialDXT)
258bf215546Sopenharmony_ci{
259bf215546Sopenharmony_ci   /* Block alignment assumed */
260bf215546Sopenharmony_ci   test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT1_RGB);
261bf215546Sopenharmony_ci   test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT3_RGBA);
262bf215546Sopenharmony_ci   test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT5_RGBA);
263bf215546Sopenharmony_ci}
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ciTEST(UInterleavedTiling, ASTC)
266bf215546Sopenharmony_ci{
267bf215546Sopenharmony_ci   /* Block alignment assumed */
268bf215546Sopenharmony_ci   test_ldst(40, 40, 0, 0, 40, 40, 512, PIPE_FORMAT_ASTC_4x4);
269bf215546Sopenharmony_ci   test_ldst(50, 40, 0, 0, 50, 40, 512, PIPE_FORMAT_ASTC_5x4);
270bf215546Sopenharmony_ci   test_ldst(50, 50, 0, 0, 50, 50, 512, PIPE_FORMAT_ASTC_5x5);
271bf215546Sopenharmony_ci}
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ciTEST(UInterleavedTiling, PartialASTC)
274bf215546Sopenharmony_ci{
275bf215546Sopenharmony_ci   /* Block alignment assumed */
276bf215546Sopenharmony_ci   test_ldst(40, 40, 4, 4, 16,  8, 512, PIPE_FORMAT_ASTC_4x4);
277bf215546Sopenharmony_ci   test_ldst(50, 40, 5, 4, 10,  8, 512, PIPE_FORMAT_ASTC_5x4);
278bf215546Sopenharmony_ci   test_ldst(50, 50, 5, 5, 10, 10, 512, PIPE_FORMAT_ASTC_5x5);
279bf215546Sopenharmony_ci}
280