1/* 2 * Copyright (C) 2022 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "pan_tiling.h" 25 26#include <gtest/gtest.h> 27 28/* 29 * Reference tiling algorithm, written for clarity rather than performance. See 30 * docs/drivers/panfrost.rst for details on the format. 31 */ 32 33static unsigned 34u_order(unsigned x, unsigned y) 35{ 36 assert(x < 16 && y < 16); 37 38 unsigned xy0 = ((x ^ y) & 1) ? 1 : 0; 39 unsigned xy1 = ((x ^ y) & 2) ? 1 : 0; 40 unsigned xy2 = ((x ^ y) & 4) ? 1 : 0; 41 unsigned xy3 = ((x ^ y) & 8) ? 1 : 0; 42 43 unsigned y0 = (y & 1) ? 1 : 0; 44 unsigned y1 = (y & 2) ? 1 : 0; 45 unsigned y2 = (y & 4) ? 1 : 0; 46 unsigned y3 = (y & 8) ? 1 : 0; 47 48 return (xy0 << 0) | (y0 << 1) | (xy1 << 2) | (y1 << 3) | 49 (xy2 << 4) | (y2 << 5) | (xy3 << 6) | (y3 << 7); 50} 51 52/* x/y are in blocks */ 53static unsigned 54tiled_offset(unsigned x, unsigned y, unsigned stride, unsigned tilesize, unsigned blocksize) 55{ 56 unsigned tile_x = x / tilesize; 57 unsigned tile_y = y / tilesize; 58 59 unsigned x_in_tile = x % tilesize; 60 unsigned y_in_tile = y % tilesize; 61 62 unsigned index_in_tile = u_order(x_in_tile, y_in_tile); 63 64 unsigned row_offset = tile_y * stride; 65 unsigned col_offset = (tile_x * tilesize * tilesize) * blocksize; 66 unsigned block_offset = index_in_tile * blocksize; 67 68 return row_offset + col_offset + block_offset; 69} 70 71static unsigned 72linear_offset(unsigned x, unsigned y, unsigned stride, unsigned blocksize) 73{ 74 return (stride * y) + (x * blocksize); 75} 76 77static void 78ref_access_tiled(void *dst, const void *src, 79 unsigned region_x, unsigned region_y, 80 unsigned w, unsigned h, 81 uint32_t dst_stride, 82 uint32_t src_stride, 83 enum pipe_format format, 84 bool dst_is_tiled) 85{ 86 const struct util_format_description *desc = util_format_description(format);; 87 88 unsigned tilesize = (desc->block.width > 1) ? 4 : 16; 89 unsigned blocksize = (desc->block.bits / 8); 90 91 unsigned w_block = w / desc->block.width; 92 unsigned h_block = h / desc->block.height; 93 94 unsigned region_x_block = region_x / desc->block.width; 95 unsigned region_y_block = region_y / desc->block.height; 96 97 for (unsigned linear_y_block = 0; linear_y_block < h_block; ++linear_y_block) { 98 for (unsigned linear_x_block = 0; linear_x_block < w_block; ++linear_x_block) { 99 100 unsigned tiled_x_block = region_x_block + linear_x_block; 101 unsigned tiled_y_block = region_y_block + linear_y_block; 102 103 unsigned dst_offset, src_offset; 104 105 if (dst_is_tiled) { 106 dst_offset = tiled_offset(tiled_x_block, tiled_y_block, dst_stride, tilesize, blocksize); 107 src_offset = linear_offset(linear_x_block, linear_y_block, src_stride, blocksize); 108 } else { 109 dst_offset = linear_offset(linear_x_block, linear_y_block, dst_stride, blocksize); 110 src_offset = tiled_offset(tiled_x_block, tiled_y_block, src_stride, tilesize, blocksize); 111 } 112 113 memcpy((uint8_t *) dst + dst_offset, 114 (const uint8_t *) src + src_offset, 115 desc->block.bits / 8); 116 } 117 } 118} 119 120/* 121 * Helper to build test cases for tiled texture access. This test suite compares 122 * the above reference tiling algorithm to the optimized algorithm used in 123 * production. 124 */ 125static void 126test(unsigned width, unsigned height, unsigned rx, unsigned ry, 127 unsigned rw, unsigned rh, unsigned linear_stride, 128 enum pipe_format format, bool store) 129{ 130 unsigned bpp = util_format_get_blocksize(format); 131 unsigned tile_height = util_format_is_compressed(format) ? 4 : 16; 132 133 unsigned tiled_width = ALIGN_POT(width, 16); 134 unsigned tiled_height = ALIGN_POT(height, 16); 135 unsigned tiled_stride = tiled_width * tile_height * bpp; 136 137 unsigned dst_stride = store ? tiled_stride : linear_stride; 138 unsigned src_stride = store ? linear_stride : tiled_stride; 139 140 void *tiled = calloc(bpp, tiled_width * tiled_height); 141 void *linear = calloc(bpp, rw * linear_stride); 142 void *ref = calloc(bpp, store ? (tiled_width * tiled_height) : (rw * linear_stride)); 143 144 if (store) { 145 for (unsigned i = 0; i < bpp * rw * linear_stride; ++i) { 146 ((uint8_t *) linear)[i] = (i & 0xFF); 147 } 148 149 panfrost_store_tiled_image(tiled, linear, rx, ry, rw, rh, 150 dst_stride, src_stride, format); 151 } else { 152 for (unsigned i = 0; i < bpp * tiled_width * tiled_height; ++i) { 153 ((uint8_t *) tiled)[i] = (i & 0xFF); 154 } 155 156 panfrost_load_tiled_image(linear, tiled, rx, ry, rw, rh, 157 dst_stride, src_stride, format); 158 } 159 160 ref_access_tiled(ref, store ? linear : tiled, rx, ry, rw, rh, 161 dst_stride, src_stride, format, store); 162 163 if (store) 164 EXPECT_EQ(memcmp(ref, tiled, bpp * tiled_width * tiled_height), 0); 165 else 166 EXPECT_EQ(memcmp(ref, linear, bpp * rw * linear_stride), 0); 167 168 free(ref); 169 free(tiled); 170 free(linear); 171} 172 173static void 174test_ldst(unsigned width, unsigned height, unsigned rx, unsigned ry, 175 unsigned rw, unsigned rh, unsigned linear_stride, 176 enum pipe_format format) 177{ 178 test(width, height, rx, ry, rw, rh, linear_stride, format, true); 179 test(width, height, rx, ry, rw, rh, linear_stride, format, false); 180} 181 182TEST(UInterleavedTiling, RegulatFormats) 183{ 184 /* 8-bit */ 185 test_ldst(23, 17, 0, 0, 23, 17, 23, PIPE_FORMAT_R8_UINT); 186 187 /* 16-bit */ 188 test_ldst(23, 17, 0, 0, 23, 17, 23 * 2, PIPE_FORMAT_R8G8_UINT); 189 190 /* 24-bit */ 191 test_ldst(23, 17, 0, 0, 23, 17, 23 * 3, PIPE_FORMAT_R8G8B8_UINT); 192 193 /* 32-bit */ 194 test_ldst(23, 17, 0, 0, 23, 17, 23 * 4, PIPE_FORMAT_R32_UINT); 195 196 /* 48-bit */ 197 test_ldst(23, 17, 0, 0, 23, 17, 23 * 6, PIPE_FORMAT_R16G16B16_UINT); 198 199 /* 64-bit */ 200 test_ldst(23, 17, 0, 0, 23, 17, 23 * 8, PIPE_FORMAT_R32G32_UINT); 201 202 /* 96-bit */ 203 test_ldst(23, 17, 0, 0, 23, 17, 23 * 12, PIPE_FORMAT_R32G32B32_UINT); 204 205 /* 128-bit */ 206 test_ldst(23, 17, 0, 0, 23, 17, 23 * 16, PIPE_FORMAT_R32G32B32A32_UINT); 207} 208 209TEST(UInterleavedTiling, UnpackedStrides) 210{ 211 test_ldst(23, 17, 0, 0, 23, 17, 369 * 1, PIPE_FORMAT_R8_SINT); 212 test_ldst(23, 17, 0, 0, 23, 17, 369 * 2, PIPE_FORMAT_R8G8_SINT); 213 test_ldst(23, 17, 0, 0, 23, 17, 369 * 3, PIPE_FORMAT_R8G8B8_SINT); 214 test_ldst(23, 17, 0, 0, 23, 17, 369 * 4, PIPE_FORMAT_R32_SINT); 215 test_ldst(23, 17, 0, 0, 23, 17, 369 * 6, PIPE_FORMAT_R16G16B16_SINT); 216 test_ldst(23, 17, 0, 0, 23, 17, 369 * 8, PIPE_FORMAT_R32G32_SINT); 217 test_ldst(23, 17, 0, 0, 23, 17, 369 * 12, PIPE_FORMAT_R32G32B32_SINT); 218 test_ldst(23, 17, 0, 0, 23, 17, 369 * 16, PIPE_FORMAT_R32G32B32A32_SINT); 219} 220 221TEST(UInterleavedTiling, PartialAccess) 222{ 223 test_ldst(23, 17, 3, 1, 13, 7, 369 * 1, PIPE_FORMAT_R8_UNORM); 224 test_ldst(23, 17, 3, 1, 13, 7, 369 * 2, PIPE_FORMAT_R8G8_UNORM); 225 test_ldst(23, 17, 3, 1, 13, 7, 369 * 3, PIPE_FORMAT_R8G8B8_UNORM); 226 test_ldst(23, 17, 3, 1, 13, 7, 369 * 4, PIPE_FORMAT_R32_UNORM); 227 test_ldst(23, 17, 3, 1, 13, 7, 369 * 6, PIPE_FORMAT_R16G16B16_UNORM); 228 test_ldst(23, 17, 3, 1, 13, 7, 369 * 8, PIPE_FORMAT_R32G32_UNORM); 229 test_ldst(23, 17, 3, 1, 13, 7, 369 * 12, PIPE_FORMAT_R32G32B32_UNORM); 230 test_ldst(23, 17, 3, 1, 13, 7, 369 * 16, PIPE_FORMAT_R32G32B32A32_UNORM); 231} 232 233TEST(UInterleavedTiling, ETC) 234{ 235 /* Block alignment assumed */ 236 test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC1_RGB8); 237 test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC2_RGB8A1); 238 test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC2_RG11_SNORM); 239} 240 241TEST(UInterleavedTiling, PartialETC) 242{ 243 /* Block alignment assumed */ 244 test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC1_RGB8); 245 test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC2_RGB8A1); 246 test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC2_RG11_SNORM); 247} 248 249TEST(UInterleavedTiling, DXT) 250{ 251 /* Block alignment assumed */ 252 test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT1_RGB); 253 test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT3_RGBA); 254 test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT5_RGBA); 255} 256 257TEST(UInterleavedTiling, PartialDXT) 258{ 259 /* Block alignment assumed */ 260 test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT1_RGB); 261 test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT3_RGBA); 262 test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT5_RGBA); 263} 264 265TEST(UInterleavedTiling, ASTC) 266{ 267 /* Block alignment assumed */ 268 test_ldst(40, 40, 0, 0, 40, 40, 512, PIPE_FORMAT_ASTC_4x4); 269 test_ldst(50, 40, 0, 0, 50, 40, 512, PIPE_FORMAT_ASTC_5x4); 270 test_ldst(50, 50, 0, 0, 50, 50, 512, PIPE_FORMAT_ASTC_5x5); 271} 272 273TEST(UInterleavedTiling, PartialASTC) 274{ 275 /* Block alignment assumed */ 276 test_ldst(40, 40, 4, 4, 16, 8, 512, PIPE_FORMAT_ASTC_4x4); 277 test_ldst(50, 40, 5, 4, 10, 8, 512, PIPE_FORMAT_ASTC_5x4); 278 test_ldst(50, 50, 5, 5, 10, 10, 512, PIPE_FORMAT_ASTC_5x5); 279} 280