1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2022 Collabora, Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "pan_tiling.h" 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include <gtest/gtest.h> 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/* 29bf215546Sopenharmony_ci * Reference tiling algorithm, written for clarity rather than performance. See 30bf215546Sopenharmony_ci * docs/drivers/panfrost.rst for details on the format. 31bf215546Sopenharmony_ci */ 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_cistatic unsigned 34bf215546Sopenharmony_ciu_order(unsigned x, unsigned y) 35bf215546Sopenharmony_ci{ 36bf215546Sopenharmony_ci assert(x < 16 && y < 16); 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci unsigned xy0 = ((x ^ y) & 1) ? 1 : 0; 39bf215546Sopenharmony_ci unsigned xy1 = ((x ^ y) & 2) ? 1 : 0; 40bf215546Sopenharmony_ci unsigned xy2 = ((x ^ y) & 4) ? 1 : 0; 41bf215546Sopenharmony_ci unsigned xy3 = ((x ^ y) & 8) ? 1 : 0; 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci unsigned y0 = (y & 1) ? 1 : 0; 44bf215546Sopenharmony_ci unsigned y1 = (y & 2) ? 1 : 0; 45bf215546Sopenharmony_ci unsigned y2 = (y & 4) ? 1 : 0; 46bf215546Sopenharmony_ci unsigned y3 = (y & 8) ? 1 : 0; 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci return (xy0 << 0) | (y0 << 1) | (xy1 << 2) | (y1 << 3) | 49bf215546Sopenharmony_ci (xy2 << 4) | (y2 << 5) | (xy3 << 6) | (y3 << 7); 50bf215546Sopenharmony_ci} 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci/* x/y are in blocks */ 53bf215546Sopenharmony_cistatic unsigned 54bf215546Sopenharmony_citiled_offset(unsigned x, unsigned y, unsigned stride, unsigned tilesize, unsigned blocksize) 55bf215546Sopenharmony_ci{ 56bf215546Sopenharmony_ci unsigned tile_x = x / tilesize; 57bf215546Sopenharmony_ci unsigned tile_y = y / tilesize; 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_ci unsigned x_in_tile = x % tilesize; 60bf215546Sopenharmony_ci unsigned y_in_tile = y % tilesize; 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci unsigned index_in_tile = u_order(x_in_tile, y_in_tile); 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci unsigned row_offset = tile_y * stride; 65bf215546Sopenharmony_ci unsigned col_offset = (tile_x * tilesize * tilesize) * blocksize; 66bf215546Sopenharmony_ci unsigned block_offset = index_in_tile * blocksize; 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci return row_offset + col_offset + block_offset; 69bf215546Sopenharmony_ci} 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_cistatic unsigned 72bf215546Sopenharmony_cilinear_offset(unsigned x, unsigned y, unsigned stride, unsigned blocksize) 73bf215546Sopenharmony_ci{ 74bf215546Sopenharmony_ci return (stride * y) + (x * blocksize); 75bf215546Sopenharmony_ci} 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_cistatic void 78bf215546Sopenharmony_ciref_access_tiled(void *dst, const void *src, 79bf215546Sopenharmony_ci unsigned region_x, unsigned region_y, 80bf215546Sopenharmony_ci unsigned w, unsigned h, 81bf215546Sopenharmony_ci uint32_t dst_stride, 82bf215546Sopenharmony_ci uint32_t src_stride, 83bf215546Sopenharmony_ci enum pipe_format format, 84bf215546Sopenharmony_ci bool dst_is_tiled) 85bf215546Sopenharmony_ci{ 86bf215546Sopenharmony_ci const struct util_format_description *desc = util_format_description(format);; 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci unsigned tilesize = (desc->block.width > 1) ? 4 : 16; 89bf215546Sopenharmony_ci unsigned blocksize = (desc->block.bits / 8); 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci unsigned w_block = w / desc->block.width; 92bf215546Sopenharmony_ci unsigned h_block = h / desc->block.height; 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci unsigned region_x_block = region_x / desc->block.width; 95bf215546Sopenharmony_ci unsigned region_y_block = region_y / desc->block.height; 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci for (unsigned linear_y_block = 0; linear_y_block < h_block; ++linear_y_block) { 98bf215546Sopenharmony_ci for (unsigned linear_x_block = 0; linear_x_block < w_block; ++linear_x_block) { 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci unsigned tiled_x_block = region_x_block + linear_x_block; 101bf215546Sopenharmony_ci unsigned tiled_y_block = region_y_block + linear_y_block; 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci unsigned dst_offset, src_offset; 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_ci if (dst_is_tiled) { 106bf215546Sopenharmony_ci dst_offset = tiled_offset(tiled_x_block, tiled_y_block, dst_stride, tilesize, blocksize); 107bf215546Sopenharmony_ci src_offset = linear_offset(linear_x_block, linear_y_block, src_stride, blocksize); 108bf215546Sopenharmony_ci } else { 109bf215546Sopenharmony_ci dst_offset = linear_offset(linear_x_block, linear_y_block, dst_stride, blocksize); 110bf215546Sopenharmony_ci src_offset = tiled_offset(tiled_x_block, tiled_y_block, src_stride, tilesize, blocksize); 111bf215546Sopenharmony_ci } 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci memcpy((uint8_t *) dst + dst_offset, 114bf215546Sopenharmony_ci (const uint8_t *) src + src_offset, 115bf215546Sopenharmony_ci desc->block.bits / 8); 116bf215546Sopenharmony_ci } 117bf215546Sopenharmony_ci } 118bf215546Sopenharmony_ci} 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci/* 121bf215546Sopenharmony_ci * Helper to build test cases for tiled texture access. This test suite compares 122bf215546Sopenharmony_ci * the above reference tiling algorithm to the optimized algorithm used in 123bf215546Sopenharmony_ci * production. 124bf215546Sopenharmony_ci */ 125bf215546Sopenharmony_cistatic void 126bf215546Sopenharmony_citest(unsigned width, unsigned height, unsigned rx, unsigned ry, 127bf215546Sopenharmony_ci unsigned rw, unsigned rh, unsigned linear_stride, 128bf215546Sopenharmony_ci enum pipe_format format, bool store) 129bf215546Sopenharmony_ci{ 130bf215546Sopenharmony_ci unsigned bpp = util_format_get_blocksize(format); 131bf215546Sopenharmony_ci unsigned tile_height = util_format_is_compressed(format) ? 4 : 16; 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci unsigned tiled_width = ALIGN_POT(width, 16); 134bf215546Sopenharmony_ci unsigned tiled_height = ALIGN_POT(height, 16); 135bf215546Sopenharmony_ci unsigned tiled_stride = tiled_width * tile_height * bpp; 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci unsigned dst_stride = store ? tiled_stride : linear_stride; 138bf215546Sopenharmony_ci unsigned src_stride = store ? linear_stride : tiled_stride; 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci void *tiled = calloc(bpp, tiled_width * tiled_height); 141bf215546Sopenharmony_ci void *linear = calloc(bpp, rw * linear_stride); 142bf215546Sopenharmony_ci void *ref = calloc(bpp, store ? (tiled_width * tiled_height) : (rw * linear_stride)); 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci if (store) { 145bf215546Sopenharmony_ci for (unsigned i = 0; i < bpp * rw * linear_stride; ++i) { 146bf215546Sopenharmony_ci ((uint8_t *) linear)[i] = (i & 0xFF); 147bf215546Sopenharmony_ci } 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci panfrost_store_tiled_image(tiled, linear, rx, ry, rw, rh, 150bf215546Sopenharmony_ci dst_stride, src_stride, format); 151bf215546Sopenharmony_ci } else { 152bf215546Sopenharmony_ci for (unsigned i = 0; i < bpp * tiled_width * tiled_height; ++i) { 153bf215546Sopenharmony_ci ((uint8_t *) tiled)[i] = (i & 0xFF); 154bf215546Sopenharmony_ci } 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci panfrost_load_tiled_image(linear, tiled, rx, ry, rw, rh, 157bf215546Sopenharmony_ci dst_stride, src_stride, format); 158bf215546Sopenharmony_ci } 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_ci ref_access_tiled(ref, store ? linear : tiled, rx, ry, rw, rh, 161bf215546Sopenharmony_ci dst_stride, src_stride, format, store); 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci if (store) 164bf215546Sopenharmony_ci EXPECT_EQ(memcmp(ref, tiled, bpp * tiled_width * tiled_height), 0); 165bf215546Sopenharmony_ci else 166bf215546Sopenharmony_ci EXPECT_EQ(memcmp(ref, linear, bpp * rw * linear_stride), 0); 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci free(ref); 169bf215546Sopenharmony_ci free(tiled); 170bf215546Sopenharmony_ci free(linear); 171bf215546Sopenharmony_ci} 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_cistatic void 174bf215546Sopenharmony_citest_ldst(unsigned width, unsigned height, unsigned rx, unsigned ry, 175bf215546Sopenharmony_ci unsigned rw, unsigned rh, unsigned linear_stride, 176bf215546Sopenharmony_ci enum pipe_format format) 177bf215546Sopenharmony_ci{ 178bf215546Sopenharmony_ci test(width, height, rx, ry, rw, rh, linear_stride, format, true); 179bf215546Sopenharmony_ci test(width, height, rx, ry, rw, rh, linear_stride, format, false); 180bf215546Sopenharmony_ci} 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ciTEST(UInterleavedTiling, RegulatFormats) 183bf215546Sopenharmony_ci{ 184bf215546Sopenharmony_ci /* 8-bit */ 185bf215546Sopenharmony_ci test_ldst(23, 17, 0, 0, 23, 17, 23, PIPE_FORMAT_R8_UINT); 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci /* 16-bit */ 188bf215546Sopenharmony_ci test_ldst(23, 17, 0, 0, 23, 17, 23 * 2, PIPE_FORMAT_R8G8_UINT); 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci /* 24-bit */ 191bf215546Sopenharmony_ci test_ldst(23, 17, 0, 0, 23, 17, 23 * 3, PIPE_FORMAT_R8G8B8_UINT); 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci /* 32-bit */ 194bf215546Sopenharmony_ci test_ldst(23, 17, 0, 0, 23, 17, 23 * 4, PIPE_FORMAT_R32_UINT); 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci /* 48-bit */ 197bf215546Sopenharmony_ci test_ldst(23, 17, 0, 0, 23, 17, 23 * 6, PIPE_FORMAT_R16G16B16_UINT); 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci /* 64-bit */ 200bf215546Sopenharmony_ci test_ldst(23, 17, 0, 0, 23, 17, 23 * 8, PIPE_FORMAT_R32G32_UINT); 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci /* 96-bit */ 203bf215546Sopenharmony_ci test_ldst(23, 17, 0, 0, 23, 17, 23 * 12, PIPE_FORMAT_R32G32B32_UINT); 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci /* 128-bit */ 206bf215546Sopenharmony_ci test_ldst(23, 17, 0, 0, 23, 17, 23 * 16, PIPE_FORMAT_R32G32B32A32_UINT); 207bf215546Sopenharmony_ci} 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ciTEST(UInterleavedTiling, UnpackedStrides) 210bf215546Sopenharmony_ci{ 211bf215546Sopenharmony_ci test_ldst(23, 17, 0, 0, 23, 17, 369 * 1, PIPE_FORMAT_R8_SINT); 212bf215546Sopenharmony_ci test_ldst(23, 17, 0, 0, 23, 17, 369 * 2, PIPE_FORMAT_R8G8_SINT); 213bf215546Sopenharmony_ci test_ldst(23, 17, 0, 0, 23, 17, 369 * 3, PIPE_FORMAT_R8G8B8_SINT); 214bf215546Sopenharmony_ci test_ldst(23, 17, 0, 0, 23, 17, 369 * 4, PIPE_FORMAT_R32_SINT); 215bf215546Sopenharmony_ci test_ldst(23, 17, 0, 0, 23, 17, 369 * 6, PIPE_FORMAT_R16G16B16_SINT); 216bf215546Sopenharmony_ci test_ldst(23, 17, 0, 0, 23, 17, 369 * 8, PIPE_FORMAT_R32G32_SINT); 217bf215546Sopenharmony_ci test_ldst(23, 17, 0, 0, 23, 17, 369 * 12, PIPE_FORMAT_R32G32B32_SINT); 218bf215546Sopenharmony_ci test_ldst(23, 17, 0, 0, 23, 17, 369 * 16, PIPE_FORMAT_R32G32B32A32_SINT); 219bf215546Sopenharmony_ci} 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ciTEST(UInterleavedTiling, PartialAccess) 222bf215546Sopenharmony_ci{ 223bf215546Sopenharmony_ci test_ldst(23, 17, 3, 1, 13, 7, 369 * 1, PIPE_FORMAT_R8_UNORM); 224bf215546Sopenharmony_ci test_ldst(23, 17, 3, 1, 13, 7, 369 * 2, PIPE_FORMAT_R8G8_UNORM); 225bf215546Sopenharmony_ci test_ldst(23, 17, 3, 1, 13, 7, 369 * 3, PIPE_FORMAT_R8G8B8_UNORM); 226bf215546Sopenharmony_ci test_ldst(23, 17, 3, 1, 13, 7, 369 * 4, PIPE_FORMAT_R32_UNORM); 227bf215546Sopenharmony_ci test_ldst(23, 17, 3, 1, 13, 7, 369 * 6, PIPE_FORMAT_R16G16B16_UNORM); 228bf215546Sopenharmony_ci test_ldst(23, 17, 3, 1, 13, 7, 369 * 8, PIPE_FORMAT_R32G32_UNORM); 229bf215546Sopenharmony_ci test_ldst(23, 17, 3, 1, 13, 7, 369 * 12, PIPE_FORMAT_R32G32B32_UNORM); 230bf215546Sopenharmony_ci test_ldst(23, 17, 3, 1, 13, 7, 369 * 16, PIPE_FORMAT_R32G32B32A32_UNORM); 231bf215546Sopenharmony_ci} 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ciTEST(UInterleavedTiling, ETC) 234bf215546Sopenharmony_ci{ 235bf215546Sopenharmony_ci /* Block alignment assumed */ 236bf215546Sopenharmony_ci test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC1_RGB8); 237bf215546Sopenharmony_ci test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC2_RGB8A1); 238bf215546Sopenharmony_ci test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC2_RG11_SNORM); 239bf215546Sopenharmony_ci} 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ciTEST(UInterleavedTiling, PartialETC) 242bf215546Sopenharmony_ci{ 243bf215546Sopenharmony_ci /* Block alignment assumed */ 244bf215546Sopenharmony_ci test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC1_RGB8); 245bf215546Sopenharmony_ci test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC2_RGB8A1); 246bf215546Sopenharmony_ci test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC2_RG11_SNORM); 247bf215546Sopenharmony_ci} 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ciTEST(UInterleavedTiling, DXT) 250bf215546Sopenharmony_ci{ 251bf215546Sopenharmony_ci /* Block alignment assumed */ 252bf215546Sopenharmony_ci test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT1_RGB); 253bf215546Sopenharmony_ci test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT3_RGBA); 254bf215546Sopenharmony_ci test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT5_RGBA); 255bf215546Sopenharmony_ci} 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ciTEST(UInterleavedTiling, PartialDXT) 258bf215546Sopenharmony_ci{ 259bf215546Sopenharmony_ci /* Block alignment assumed */ 260bf215546Sopenharmony_ci test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT1_RGB); 261bf215546Sopenharmony_ci test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT3_RGBA); 262bf215546Sopenharmony_ci test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT5_RGBA); 263bf215546Sopenharmony_ci} 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ciTEST(UInterleavedTiling, ASTC) 266bf215546Sopenharmony_ci{ 267bf215546Sopenharmony_ci /* Block alignment assumed */ 268bf215546Sopenharmony_ci test_ldst(40, 40, 0, 0, 40, 40, 512, PIPE_FORMAT_ASTC_4x4); 269bf215546Sopenharmony_ci test_ldst(50, 40, 0, 0, 50, 40, 512, PIPE_FORMAT_ASTC_5x4); 270bf215546Sopenharmony_ci test_ldst(50, 50, 0, 0, 50, 50, 512, PIPE_FORMAT_ASTC_5x5); 271bf215546Sopenharmony_ci} 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_ciTEST(UInterleavedTiling, PartialASTC) 274bf215546Sopenharmony_ci{ 275bf215546Sopenharmony_ci /* Block alignment assumed */ 276bf215546Sopenharmony_ci test_ldst(40, 40, 4, 4, 16, 8, 512, PIPE_FORMAT_ASTC_4x4); 277bf215546Sopenharmony_ci test_ldst(50, 40, 5, 4, 10, 8, 512, PIPE_FORMAT_ASTC_5x4); 278bf215546Sopenharmony_ci test_ldst(50, 50, 5, 5, 10, 10, 512, PIPE_FORMAT_ASTC_5x5); 279bf215546Sopenharmony_ci} 280