1/*
2 * Copyright (C) 2022 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "pan_tiling.h"
25
26#include <gtest/gtest.h>
27
28/*
29 * Reference tiling algorithm, written for clarity rather than performance. See
30 * docs/drivers/panfrost.rst for details on the format.
31 */
32
33static unsigned
34u_order(unsigned x, unsigned y)
35{
36   assert(x < 16 && y < 16);
37
38   unsigned xy0 = ((x ^ y) & 1) ? 1 : 0;
39   unsigned xy1 = ((x ^ y) & 2) ? 1 : 0;
40   unsigned xy2 = ((x ^ y) & 4) ? 1 : 0;
41   unsigned xy3 = ((x ^ y) & 8) ? 1 : 0;
42
43   unsigned y0 = (y & 1) ? 1 : 0;
44   unsigned y1 = (y & 2) ? 1 : 0;
45   unsigned y2 = (y & 4) ? 1 : 0;
46   unsigned y3 = (y & 8) ? 1 : 0;
47
48   return (xy0 << 0) | (y0 << 1) | (xy1 << 2) | (y1 << 3) |
49          (xy2 << 4) | (y2 << 5) | (xy3 << 6) | (y3 << 7);
50}
51
52/* x/y are in blocks */
53static unsigned
54tiled_offset(unsigned x, unsigned y, unsigned stride, unsigned tilesize, unsigned blocksize)
55{
56   unsigned tile_x = x / tilesize;
57   unsigned tile_y = y / tilesize;
58
59   unsigned x_in_tile = x % tilesize;
60   unsigned y_in_tile = y % tilesize;
61
62   unsigned index_in_tile = u_order(x_in_tile, y_in_tile);
63
64   unsigned row_offset = tile_y * stride;
65   unsigned col_offset = (tile_x * tilesize * tilesize) * blocksize;
66   unsigned block_offset = index_in_tile * blocksize;
67
68   return row_offset + col_offset + block_offset;
69}
70
71static unsigned
72linear_offset(unsigned x, unsigned y, unsigned stride, unsigned blocksize)
73{
74   return (stride * y) + (x * blocksize);
75}
76
77static void
78ref_access_tiled(void *dst, const void *src,
79                 unsigned region_x, unsigned region_y,
80                 unsigned w, unsigned h,
81                 uint32_t dst_stride,
82                 uint32_t src_stride,
83                 enum pipe_format format,
84                 bool dst_is_tiled)
85{
86   const struct util_format_description *desc = util_format_description(format);;
87
88   unsigned tilesize = (desc->block.width > 1) ? 4 : 16;
89   unsigned blocksize = (desc->block.bits / 8);
90
91   unsigned w_block = w / desc->block.width;
92   unsigned h_block = h / desc->block.height;
93
94   unsigned region_x_block = region_x / desc->block.width;
95   unsigned region_y_block = region_y / desc->block.height;
96
97   for (unsigned linear_y_block = 0; linear_y_block < h_block; ++linear_y_block) {
98      for (unsigned linear_x_block = 0; linear_x_block < w_block; ++linear_x_block) {
99
100         unsigned tiled_x_block = region_x_block + linear_x_block;
101         unsigned tiled_y_block = region_y_block + linear_y_block;
102
103         unsigned dst_offset, src_offset;
104
105         if (dst_is_tiled) {
106            dst_offset = tiled_offset(tiled_x_block, tiled_y_block, dst_stride, tilesize, blocksize);
107            src_offset = linear_offset(linear_x_block, linear_y_block, src_stride, blocksize);
108         } else {
109            dst_offset = linear_offset(linear_x_block, linear_y_block, dst_stride, blocksize);
110            src_offset = tiled_offset(tiled_x_block, tiled_y_block, src_stride, tilesize, blocksize);
111         }
112
113         memcpy((uint8_t *) dst + dst_offset,
114                (const uint8_t *) src + src_offset,
115                desc->block.bits / 8);
116      }
117   }
118}
119
120/*
121 * Helper to build test cases for tiled texture access. This test suite compares
122 * the above reference tiling algorithm to the optimized algorithm used in
123 * production.
124 */
125static void
126test(unsigned width, unsigned height, unsigned rx, unsigned ry,
127     unsigned rw, unsigned rh, unsigned linear_stride,
128     enum pipe_format format, bool store)
129{
130   unsigned bpp = util_format_get_blocksize(format);
131   unsigned tile_height = util_format_is_compressed(format) ? 4 : 16;
132
133   unsigned tiled_width  = ALIGN_POT(width, 16);
134   unsigned tiled_height = ALIGN_POT(height, 16);
135   unsigned tiled_stride = tiled_width * tile_height * bpp;
136
137   unsigned dst_stride = store ? tiled_stride : linear_stride;
138   unsigned src_stride = store ? linear_stride : tiled_stride;
139
140   void *tiled = calloc(bpp, tiled_width * tiled_height);
141   void *linear = calloc(bpp, rw * linear_stride);
142   void *ref = calloc(bpp, store ? (tiled_width * tiled_height) : (rw * linear_stride));
143
144   if (store) {
145      for (unsigned i = 0; i < bpp * rw * linear_stride; ++i) {
146         ((uint8_t *) linear)[i] = (i & 0xFF);
147      }
148
149      panfrost_store_tiled_image(tiled, linear, rx, ry, rw, rh,
150                                 dst_stride, src_stride, format);
151   } else {
152      for (unsigned i = 0; i < bpp * tiled_width * tiled_height; ++i) {
153         ((uint8_t *) tiled)[i] = (i & 0xFF);
154      }
155
156      panfrost_load_tiled_image(linear, tiled, rx, ry, rw, rh,
157                                dst_stride, src_stride, format);
158   }
159
160   ref_access_tiled(ref, store ? linear : tiled, rx, ry, rw, rh,
161                    dst_stride, src_stride, format, store);
162
163   if (store)
164      EXPECT_EQ(memcmp(ref, tiled, bpp * tiled_width * tiled_height), 0);
165   else
166      EXPECT_EQ(memcmp(ref, linear, bpp * rw * linear_stride), 0);
167
168   free(ref);
169   free(tiled);
170   free(linear);
171}
172
173static void
174test_ldst(unsigned width, unsigned height, unsigned rx, unsigned ry,
175          unsigned rw, unsigned rh, unsigned linear_stride,
176          enum pipe_format format)
177{
178   test(width, height, rx, ry, rw, rh, linear_stride, format, true);
179   test(width, height, rx, ry, rw, rh, linear_stride, format, false);
180}
181
182TEST(UInterleavedTiling, RegulatFormats)
183{
184   /* 8-bit */
185   test_ldst(23, 17, 0, 0, 23, 17, 23, PIPE_FORMAT_R8_UINT);
186
187   /* 16-bit */
188   test_ldst(23, 17, 0, 0, 23, 17, 23 * 2, PIPE_FORMAT_R8G8_UINT);
189
190   /* 24-bit */
191   test_ldst(23, 17, 0, 0, 23, 17, 23 * 3, PIPE_FORMAT_R8G8B8_UINT);
192
193   /* 32-bit */
194   test_ldst(23, 17, 0, 0, 23, 17, 23 * 4, PIPE_FORMAT_R32_UINT);
195
196   /* 48-bit */
197   test_ldst(23, 17, 0, 0, 23, 17, 23 * 6, PIPE_FORMAT_R16G16B16_UINT);
198
199   /* 64-bit */
200   test_ldst(23, 17, 0, 0, 23, 17, 23 * 8, PIPE_FORMAT_R32G32_UINT);
201
202   /* 96-bit */
203   test_ldst(23, 17, 0, 0, 23, 17, 23 * 12, PIPE_FORMAT_R32G32B32_UINT);
204
205   /* 128-bit */
206   test_ldst(23, 17, 0, 0, 23, 17, 23 * 16, PIPE_FORMAT_R32G32B32A32_UINT);
207}
208
209TEST(UInterleavedTiling, UnpackedStrides)
210{
211   test_ldst(23, 17, 0, 0, 23, 17, 369 * 1, PIPE_FORMAT_R8_SINT);
212   test_ldst(23, 17, 0, 0, 23, 17, 369 * 2, PIPE_FORMAT_R8G8_SINT);
213   test_ldst(23, 17, 0, 0, 23, 17, 369 * 3, PIPE_FORMAT_R8G8B8_SINT);
214   test_ldst(23, 17, 0, 0, 23, 17, 369 * 4, PIPE_FORMAT_R32_SINT);
215   test_ldst(23, 17, 0, 0, 23, 17, 369 * 6, PIPE_FORMAT_R16G16B16_SINT);
216   test_ldst(23, 17, 0, 0, 23, 17, 369 * 8, PIPE_FORMAT_R32G32_SINT);
217   test_ldst(23, 17, 0, 0, 23, 17, 369 * 12, PIPE_FORMAT_R32G32B32_SINT);
218   test_ldst(23, 17, 0, 0, 23, 17, 369 * 16, PIPE_FORMAT_R32G32B32A32_SINT);
219}
220
221TEST(UInterleavedTiling, PartialAccess)
222{
223   test_ldst(23, 17, 3, 1, 13, 7, 369 * 1, PIPE_FORMAT_R8_UNORM);
224   test_ldst(23, 17, 3, 1, 13, 7, 369 * 2, PIPE_FORMAT_R8G8_UNORM);
225   test_ldst(23, 17, 3, 1, 13, 7, 369 * 3, PIPE_FORMAT_R8G8B8_UNORM);
226   test_ldst(23, 17, 3, 1, 13, 7, 369 * 4, PIPE_FORMAT_R32_UNORM);
227   test_ldst(23, 17, 3, 1, 13, 7, 369 * 6, PIPE_FORMAT_R16G16B16_UNORM);
228   test_ldst(23, 17, 3, 1, 13, 7, 369 * 8, PIPE_FORMAT_R32G32_UNORM);
229   test_ldst(23, 17, 3, 1, 13, 7, 369 * 12, PIPE_FORMAT_R32G32B32_UNORM);
230   test_ldst(23, 17, 3, 1, 13, 7, 369 * 16, PIPE_FORMAT_R32G32B32A32_UNORM);
231}
232
233TEST(UInterleavedTiling, ETC)
234{
235   /* Block alignment assumed */
236   test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC1_RGB8);
237   test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC2_RGB8A1);
238   test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC2_RG11_SNORM);
239}
240
241TEST(UInterleavedTiling, PartialETC)
242{
243   /* Block alignment assumed */
244   test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC1_RGB8);
245   test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC2_RGB8A1);
246   test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC2_RG11_SNORM);
247}
248
249TEST(UInterleavedTiling, DXT)
250{
251   /* Block alignment assumed */
252   test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT1_RGB);
253   test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT3_RGBA);
254   test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT5_RGBA);
255}
256
257TEST(UInterleavedTiling, PartialDXT)
258{
259   /* Block alignment assumed */
260   test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT1_RGB);
261   test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT3_RGBA);
262   test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT5_RGBA);
263}
264
265TEST(UInterleavedTiling, ASTC)
266{
267   /* Block alignment assumed */
268   test_ldst(40, 40, 0, 0, 40, 40, 512, PIPE_FORMAT_ASTC_4x4);
269   test_ldst(50, 40, 0, 0, 50, 40, 512, PIPE_FORMAT_ASTC_5x4);
270   test_ldst(50, 50, 0, 0, 50, 50, 512, PIPE_FORMAT_ASTC_5x5);
271}
272
273TEST(UInterleavedTiling, PartialASTC)
274{
275   /* Block alignment assumed */
276   test_ldst(40, 40, 4, 4, 16,  8, 512, PIPE_FORMAT_ASTC_4x4);
277   test_ldst(50, 40, 5, 4, 10,  8, 512, PIPE_FORMAT_ASTC_5x4);
278   test_ldst(50, 50, 5, 5, 10, 10, 512, PIPE_FORMAT_ASTC_5x5);
279}
280