1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (c) 2011-2013 Luc Verhaegen <libv@skynet.be> 3bf215546Sopenharmony_ci * Copyright (c) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io> 4bf215546Sopenharmony_ci * Copyright (c) 2018 Vasily Khoruzhick <anarsoul@gmail.com> 5bf215546Sopenharmony_ci * Copyright (c) 2019 Collabora, Ltd. 6bf215546Sopenharmony_ci * 7bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 8bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 9bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 10bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sub license, 11bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 12bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 15bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions 16bf215546Sopenharmony_ci * of the Software. 17bf215546Sopenharmony_ci * 18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 21bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE. 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci */ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include "pan_tiling.h" 29bf215546Sopenharmony_ci#include <stdbool.h> 30bf215546Sopenharmony_ci#include "util/macros.h" 31bf215546Sopenharmony_ci#include "util/bitscan.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci/* 34bf215546Sopenharmony_ci * This file implements software encode/decode of u-interleaved textures. 35bf215546Sopenharmony_ci * See docs/drivers/panfrost.rst for details on the format. 36bf215546Sopenharmony_ci * 37bf215546Sopenharmony_ci * The tricky bit is ordering along the space-filling curve: 38bf215546Sopenharmony_ci * 39bf215546Sopenharmony_ci * | y3 | (x3 ^ y3) | y2 | (y2 ^ x2) | y1 | (y1 ^ x1) | y0 | (y0 ^ x0) | 40bf215546Sopenharmony_ci * 41bf215546Sopenharmony_ci * While interleaving bits is trivial in hardware, it is nontrivial in software. 42bf215546Sopenharmony_ci * The trick is to divide the pattern up: 43bf215546Sopenharmony_ci * 44bf215546Sopenharmony_ci * | y3 | y3 | y2 | y2 | y1 | y1 | y0 | y0 | 45bf215546Sopenharmony_ci * ^ | 0 | x3 | 0 | x2 | 0 | x1 | 0 | x0 | 46bf215546Sopenharmony_ci * 47bf215546Sopenharmony_ci * That is, duplicate the bits of the Y and space out the bits of the X. The top 48bf215546Sopenharmony_ci * line is a function only of Y, so it can be calculated once per row and stored 49bf215546Sopenharmony_ci * in a register. The bottom line is simply X with the bits spaced out. Spacing 50bf215546Sopenharmony_ci * out the X is easy enough with a LUT, or by subtracting+ANDing the mask 51bf215546Sopenharmony_ci * pattern (abusing carry bits). 52bf215546Sopenharmony_ci * 53bf215546Sopenharmony_ci */ 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci/* Given the lower 4-bits of the Y coordinate, we would like to 56bf215546Sopenharmony_ci * duplicate every bit over. So instead of 0b1010, we would like 57bf215546Sopenharmony_ci * 0b11001100. The idea is that for the bits in the solely Y place, we 58bf215546Sopenharmony_ci * get a Y place, and the bits in the XOR place *also* get a Y. */ 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ciconst uint32_t bit_duplication[16] = { 61bf215546Sopenharmony_ci 0b00000000, 62bf215546Sopenharmony_ci 0b00000011, 63bf215546Sopenharmony_ci 0b00001100, 64bf215546Sopenharmony_ci 0b00001111, 65bf215546Sopenharmony_ci 0b00110000, 66bf215546Sopenharmony_ci 0b00110011, 67bf215546Sopenharmony_ci 0b00111100, 68bf215546Sopenharmony_ci 0b00111111, 69bf215546Sopenharmony_ci 0b11000000, 70bf215546Sopenharmony_ci 0b11000011, 71bf215546Sopenharmony_ci 0b11001100, 72bf215546Sopenharmony_ci 0b11001111, 73bf215546Sopenharmony_ci 0b11110000, 74bf215546Sopenharmony_ci 0b11110011, 75bf215546Sopenharmony_ci 0b11111100, 76bf215546Sopenharmony_ci 0b11111111, 77bf215546Sopenharmony_ci}; 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci/* Space the bits out of a 4-bit nibble */ 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ciconst unsigned space_4[16] = { 82bf215546Sopenharmony_ci 0b0000000, 83bf215546Sopenharmony_ci 0b0000001, 84bf215546Sopenharmony_ci 0b0000100, 85bf215546Sopenharmony_ci 0b0000101, 86bf215546Sopenharmony_ci 0b0010000, 87bf215546Sopenharmony_ci 0b0010001, 88bf215546Sopenharmony_ci 0b0010100, 89bf215546Sopenharmony_ci 0b0010101, 90bf215546Sopenharmony_ci 0b1000000, 91bf215546Sopenharmony_ci 0b1000001, 92bf215546Sopenharmony_ci 0b1000100, 93bf215546Sopenharmony_ci 0b1000101, 94bf215546Sopenharmony_ci 0b1010000, 95bf215546Sopenharmony_ci 0b1010001, 96bf215546Sopenharmony_ci 0b1010100, 97bf215546Sopenharmony_ci 0b1010101 98bf215546Sopenharmony_ci}; 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci/* The scheme uses 16x16 tiles */ 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci#define TILE_WIDTH 16 103bf215546Sopenharmony_ci#define TILE_HEIGHT 16 104bf215546Sopenharmony_ci#define PIXELS_PER_TILE (TILE_WIDTH * TILE_HEIGHT) 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci/* We need a 128-bit type for idiomatically tiling bpp128 formats. The type must 107bf215546Sopenharmony_ci * only support copies and sizeof, so emulating with a packed structure works 108bf215546Sopenharmony_ci * well enough, but if there's a native 128-bit type we may we well prefer 109bf215546Sopenharmony_ci * that. */ 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci#ifdef __SIZEOF_INT128__ 112bf215546Sopenharmony_citypedef __uint128_t pan_uint128_t; 113bf215546Sopenharmony_ci#else 114bf215546Sopenharmony_citypedef struct { 115bf215546Sopenharmony_ci uint64_t lo; 116bf215546Sopenharmony_ci uint64_t hi; 117bf215546Sopenharmony_ci} __attribute__((packed)) pan_uint128_t; 118bf215546Sopenharmony_ci#endif 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_citypedef struct { 121bf215546Sopenharmony_ci uint16_t lo; 122bf215546Sopenharmony_ci uint8_t hi; 123bf215546Sopenharmony_ci} __attribute__((packed)) pan_uint24_t; 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_citypedef struct { 126bf215546Sopenharmony_ci uint32_t lo; 127bf215546Sopenharmony_ci uint16_t hi; 128bf215546Sopenharmony_ci} __attribute__((packed)) pan_uint48_t; 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_citypedef struct { 131bf215546Sopenharmony_ci uint64_t lo; 132bf215546Sopenharmony_ci uint32_t hi; 133bf215546Sopenharmony_ci} __attribute__((packed)) pan_uint96_t; 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci/* Optimized routine to tile an aligned (w & 0xF == 0) texture. Explanation: 136bf215546Sopenharmony_ci * 137bf215546Sopenharmony_ci * dest_start precomputes the offset to the beginning of the first horizontal 138bf215546Sopenharmony_ci * tile we're writing to, knowing that x is 16-aligned. Tiles themselves are 139bf215546Sopenharmony_ci * stored linearly, so we get the X tile number by shifting and then multiply 140bf215546Sopenharmony_ci * by the bytes per tile . 141bf215546Sopenharmony_ci * 142bf215546Sopenharmony_ci * We iterate across the pixels we're trying to store in source-order. For each 143bf215546Sopenharmony_ci * row in the destination image, we figure out which row of 16x16 block we're 144bf215546Sopenharmony_ci * in, by slicing off the lower 4-bits (block_y). 145bf215546Sopenharmony_ci * 146bf215546Sopenharmony_ci * dest then precomputes the location of the top-left corner of the block the 147bf215546Sopenharmony_ci * row starts in. In pixel coordinates (where the origin is the top-left), 148bf215546Sopenharmony_ci * (block_y, 0) is the top-left corner of the leftmost tile in this row. While 149bf215546Sopenharmony_ci * pixels are reordered within a block, the blocks themselves are stored 150bf215546Sopenharmony_ci * linearly, so multiplying block_y by the pixel stride of the destination 151bf215546Sopenharmony_ci * image equals the byte offset of that top-left corner of the block this row 152bf215546Sopenharmony_ci * is in. 153bf215546Sopenharmony_ci * 154bf215546Sopenharmony_ci * On the other hand, the source is linear so we compute the locations of the 155bf215546Sopenharmony_ci * start and end of the row in the source by a simple linear addressing. 156bf215546Sopenharmony_ci * 157bf215546Sopenharmony_ci * For indexing within the tile, we need to XOR with the [y3 y3 y2 y2 y1 y1 y0 158bf215546Sopenharmony_ci * y0] value. Since this is constant across a row, we look it up per-row and 159bf215546Sopenharmony_ci * store in expanded_y. 160bf215546Sopenharmony_ci * 161bf215546Sopenharmony_ci * Finally, we iterate each row in source order. In the outer loop, we iterate 162bf215546Sopenharmony_ci * each 16 pixel tile. Within each tile, we iterate the 16 pixels (this should 163bf215546Sopenharmony_ci * be unrolled), calculating the index within the tile and writing. 164bf215546Sopenharmony_ci */ 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci#define TILED_ACCESS_TYPE(pixel_t, shift) \ 167bf215546Sopenharmony_cistatic ALWAYS_INLINE void \ 168bf215546Sopenharmony_cipanfrost_access_tiled_image_##pixel_t \ 169bf215546Sopenharmony_ci (void *dst, void *src, \ 170bf215546Sopenharmony_ci uint16_t sx, uint16_t sy, \ 171bf215546Sopenharmony_ci uint16_t w, uint16_t h, \ 172bf215546Sopenharmony_ci uint32_t dst_stride, \ 173bf215546Sopenharmony_ci uint32_t src_stride, \ 174bf215546Sopenharmony_ci bool is_store) \ 175bf215546Sopenharmony_ci{ \ 176bf215546Sopenharmony_ci uint8_t *dest_start = dst + ((sx >> 4) * PIXELS_PER_TILE * sizeof(pixel_t)); \ 177bf215546Sopenharmony_ci for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) { \ 178bf215546Sopenharmony_ci uint8_t *dest = (uint8_t *) (dest_start + ((y >> 4) * dst_stride)); \ 179bf215546Sopenharmony_ci pixel_t *source = src + (src_y * src_stride); \ 180bf215546Sopenharmony_ci pixel_t *source_end = source + w; \ 181bf215546Sopenharmony_ci unsigned expanded_y = bit_duplication[y & 0xF] << shift; \ 182bf215546Sopenharmony_ci for (; source < source_end; dest += (PIXELS_PER_TILE << shift)) { \ 183bf215546Sopenharmony_ci for (uint8_t i = 0; i < 16; ++i) { \ 184bf215546Sopenharmony_ci unsigned index = expanded_y ^ (space_4[i] << shift); \ 185bf215546Sopenharmony_ci if (is_store) \ 186bf215546Sopenharmony_ci *((pixel_t *) (dest + index)) = *(source++); \ 187bf215546Sopenharmony_ci else \ 188bf215546Sopenharmony_ci *(source++) = *((pixel_t *) (dest + index)); \ 189bf215546Sopenharmony_ci } \ 190bf215546Sopenharmony_ci } \ 191bf215546Sopenharmony_ci } \ 192bf215546Sopenharmony_ci} \ 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ciTILED_ACCESS_TYPE(uint8_t, 0); 195bf215546Sopenharmony_ciTILED_ACCESS_TYPE(uint16_t, 1); 196bf215546Sopenharmony_ciTILED_ACCESS_TYPE(uint32_t, 2); 197bf215546Sopenharmony_ciTILED_ACCESS_TYPE(uint64_t, 3); 198bf215546Sopenharmony_ciTILED_ACCESS_TYPE(pan_uint128_t, 4); 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci#define TILED_UNALIGNED_TYPE(pixel_t, is_store, tile_shift) { \ 201bf215546Sopenharmony_ci const unsigned mask = (1 << tile_shift) - 1; \ 202bf215546Sopenharmony_ci for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) { \ 203bf215546Sopenharmony_ci unsigned block_start_s = (y >> tile_shift) * dst_stride; \ 204bf215546Sopenharmony_ci unsigned source_start = src_y * src_stride; \ 205bf215546Sopenharmony_ci unsigned expanded_y = bit_duplication[y & mask]; \ 206bf215546Sopenharmony_ci \ 207bf215546Sopenharmony_ci for (int x = sx, src_x = 0; src_x < w; ++x, ++src_x) { \ 208bf215546Sopenharmony_ci unsigned block_x_s = (x >> tile_shift) * (1 << (tile_shift * 2)); \ 209bf215546Sopenharmony_ci unsigned index = expanded_y ^ space_4[x & mask]; \ 210bf215546Sopenharmony_ci uint8_t *source = src + source_start + sizeof(pixel_t) * src_x; \ 211bf215546Sopenharmony_ci uint8_t *dest = dst + block_start_s + sizeof(pixel_t) * (block_x_s + index); \ 212bf215546Sopenharmony_ci \ 213bf215546Sopenharmony_ci pixel_t *outp = (pixel_t *) (is_store ? dest : source); \ 214bf215546Sopenharmony_ci pixel_t *inp = (pixel_t *) (is_store ? source : dest); \ 215bf215546Sopenharmony_ci *outp = *inp; \ 216bf215546Sopenharmony_ci } \ 217bf215546Sopenharmony_ci } \ 218bf215546Sopenharmony_ci} 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci#define TILED_UNALIGNED_TYPES(store, shift) { \ 221bf215546Sopenharmony_ci if (bpp == 8) \ 222bf215546Sopenharmony_ci TILED_UNALIGNED_TYPE(uint8_t, store, shift) \ 223bf215546Sopenharmony_ci else if (bpp == 16) \ 224bf215546Sopenharmony_ci TILED_UNALIGNED_TYPE(uint16_t, store, shift) \ 225bf215546Sopenharmony_ci else if (bpp == 24) \ 226bf215546Sopenharmony_ci TILED_UNALIGNED_TYPE(pan_uint24_t, store, shift) \ 227bf215546Sopenharmony_ci else if (bpp == 32) \ 228bf215546Sopenharmony_ci TILED_UNALIGNED_TYPE(uint32_t, store, shift) \ 229bf215546Sopenharmony_ci else if (bpp == 48) \ 230bf215546Sopenharmony_ci TILED_UNALIGNED_TYPE(pan_uint48_t, store, shift) \ 231bf215546Sopenharmony_ci else if (bpp == 64) \ 232bf215546Sopenharmony_ci TILED_UNALIGNED_TYPE(uint64_t, store, shift) \ 233bf215546Sopenharmony_ci else if (bpp == 96) \ 234bf215546Sopenharmony_ci TILED_UNALIGNED_TYPE(pan_uint96_t, store, shift) \ 235bf215546Sopenharmony_ci else if (bpp == 128) \ 236bf215546Sopenharmony_ci TILED_UNALIGNED_TYPE(pan_uint128_t, store, shift) \ 237bf215546Sopenharmony_ci} 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci/* 240bf215546Sopenharmony_ci * Perform a generic access to a tiled image with a given format. This works 241bf215546Sopenharmony_ci * even for block-compressed images on entire blocks at a time. sx/sy/w/h are 242bf215546Sopenharmony_ci * specified in pixels, not blocks, but our internal routines work in blocks, 243bf215546Sopenharmony_ci * so we divide here. Alignment is assumed. 244bf215546Sopenharmony_ci */ 245bf215546Sopenharmony_cistatic void 246bf215546Sopenharmony_cipanfrost_access_tiled_image_generic(void *dst, void *src, 247bf215546Sopenharmony_ci unsigned sx, unsigned sy, 248bf215546Sopenharmony_ci unsigned w, unsigned h, 249bf215546Sopenharmony_ci uint32_t dst_stride, 250bf215546Sopenharmony_ci uint32_t src_stride, 251bf215546Sopenharmony_ci const struct util_format_description *desc, 252bf215546Sopenharmony_ci bool _is_store) 253bf215546Sopenharmony_ci{ 254bf215546Sopenharmony_ci unsigned bpp = desc->block.bits; 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_ci /* Convert units */ 257bf215546Sopenharmony_ci sx /= desc->block.width; 258bf215546Sopenharmony_ci sy /= desc->block.height; 259bf215546Sopenharmony_ci w = DIV_ROUND_UP(w, desc->block.width); 260bf215546Sopenharmony_ci h = DIV_ROUND_UP(h, desc->block.height); 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci if (desc->block.width > 1) { 263bf215546Sopenharmony_ci if (_is_store) 264bf215546Sopenharmony_ci TILED_UNALIGNED_TYPES(true, 2) 265bf215546Sopenharmony_ci else 266bf215546Sopenharmony_ci TILED_UNALIGNED_TYPES(false, 2) 267bf215546Sopenharmony_ci } else { 268bf215546Sopenharmony_ci if (_is_store) 269bf215546Sopenharmony_ci TILED_UNALIGNED_TYPES(true, 4) 270bf215546Sopenharmony_ci else 271bf215546Sopenharmony_ci TILED_UNALIGNED_TYPES(false, 4) 272bf215546Sopenharmony_ci } 273bf215546Sopenharmony_ci} 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci#define OFFSET(src, _x, _y) (void *) ((uint8_t *) src + ((_y) - orig_y) * src_stride + (((_x) - orig_x) * (bpp / 8))) 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_cistatic ALWAYS_INLINE void 278bf215546Sopenharmony_cipanfrost_access_tiled_image(void *dst, void *src, 279bf215546Sopenharmony_ci unsigned x, unsigned y, 280bf215546Sopenharmony_ci unsigned w, unsigned h, 281bf215546Sopenharmony_ci uint32_t dst_stride, 282bf215546Sopenharmony_ci uint32_t src_stride, 283bf215546Sopenharmony_ci enum pipe_format format, 284bf215546Sopenharmony_ci bool is_store) 285bf215546Sopenharmony_ci{ 286bf215546Sopenharmony_ci const struct util_format_description *desc = util_format_description(format); 287bf215546Sopenharmony_ci unsigned bpp = desc->block.bits; 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci /* Our optimized routines cannot handle unaligned blocks (without depending 290bf215546Sopenharmony_ci * on platform-specific behaviour), and there is no good reason to do so. If 291bf215546Sopenharmony_ci * these assertions fail, there is either a driver bug or a non-portable unit 292bf215546Sopenharmony_ci * test. 293bf215546Sopenharmony_ci */ 294bf215546Sopenharmony_ci assert((dst_stride % (bpp / 8)) == 0 && "unaligned destination stride"); 295bf215546Sopenharmony_ci assert((src_stride % (bpp / 8)) == 0 && "unaligned source stride"); 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci if (desc->block.width > 1 || !util_is_power_of_two_nonzero(desc->block.bits)) { 298bf215546Sopenharmony_ci panfrost_access_tiled_image_generic(dst, (void *) src, 299bf215546Sopenharmony_ci x, y, w, h, 300bf215546Sopenharmony_ci dst_stride, src_stride, desc, is_store); 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci return; 303bf215546Sopenharmony_ci } 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci unsigned first_full_tile_x = DIV_ROUND_UP(x, TILE_WIDTH) * TILE_WIDTH; 306bf215546Sopenharmony_ci unsigned first_full_tile_y = DIV_ROUND_UP(y, TILE_HEIGHT) * TILE_HEIGHT; 307bf215546Sopenharmony_ci unsigned last_full_tile_x = ((x + w) / TILE_WIDTH) * TILE_WIDTH; 308bf215546Sopenharmony_ci unsigned last_full_tile_y = ((y + h) / TILE_HEIGHT) * TILE_HEIGHT; 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci /* First, tile the top portion */ 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci unsigned orig_x = x, orig_y = y; 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci if (first_full_tile_y != y) { 315bf215546Sopenharmony_ci unsigned dist = MIN2(first_full_tile_y - y, h); 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci panfrost_access_tiled_image_generic(dst, OFFSET(src, x, y), 318bf215546Sopenharmony_ci x, y, w, dist, 319bf215546Sopenharmony_ci dst_stride, src_stride, desc, is_store); 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci if (dist == h) 322bf215546Sopenharmony_ci return; 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci y += dist; 325bf215546Sopenharmony_ci h -= dist; 326bf215546Sopenharmony_ci } 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci /* Next, the bottom portion */ 329bf215546Sopenharmony_ci if (last_full_tile_y != (y + h)) { 330bf215546Sopenharmony_ci unsigned dist = (y + h) - last_full_tile_y; 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_ci panfrost_access_tiled_image_generic(dst, OFFSET(src, x, last_full_tile_y), 333bf215546Sopenharmony_ci x, last_full_tile_y, w, dist, 334bf215546Sopenharmony_ci dst_stride, src_stride, desc, is_store); 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci h -= dist; 337bf215546Sopenharmony_ci } 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci /* The left portion */ 340bf215546Sopenharmony_ci if (first_full_tile_x != x) { 341bf215546Sopenharmony_ci unsigned dist = MIN2(first_full_tile_x - x, w); 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci panfrost_access_tiled_image_generic(dst, OFFSET(src, x, y), 344bf215546Sopenharmony_ci x, y, dist, h, 345bf215546Sopenharmony_ci dst_stride, src_stride, desc, is_store); 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_ci if (dist == w) 348bf215546Sopenharmony_ci return; 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_ci x += dist; 351bf215546Sopenharmony_ci w -= dist; 352bf215546Sopenharmony_ci } 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci /* Finally, the right portion */ 355bf215546Sopenharmony_ci if (last_full_tile_x != (x + w)) { 356bf215546Sopenharmony_ci unsigned dist = (x + w) - last_full_tile_x; 357bf215546Sopenharmony_ci 358bf215546Sopenharmony_ci panfrost_access_tiled_image_generic(dst, OFFSET(src, last_full_tile_x, y), 359bf215546Sopenharmony_ci last_full_tile_x, y, dist, h, 360bf215546Sopenharmony_ci dst_stride, src_stride, desc, is_store); 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci w -= dist; 363bf215546Sopenharmony_ci } 364bf215546Sopenharmony_ci 365bf215546Sopenharmony_ci if (bpp == 8) 366bf215546Sopenharmony_ci panfrost_access_tiled_image_uint8_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store); 367bf215546Sopenharmony_ci else if (bpp == 16) 368bf215546Sopenharmony_ci panfrost_access_tiled_image_uint16_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store); 369bf215546Sopenharmony_ci else if (bpp == 32) 370bf215546Sopenharmony_ci panfrost_access_tiled_image_uint32_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store); 371bf215546Sopenharmony_ci else if (bpp == 64) 372bf215546Sopenharmony_ci panfrost_access_tiled_image_uint64_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store); 373bf215546Sopenharmony_ci else if (bpp == 128) 374bf215546Sopenharmony_ci panfrost_access_tiled_image_pan_uint128_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store); 375bf215546Sopenharmony_ci} 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci/** 378bf215546Sopenharmony_ci * Access a tiled image (load or store). Note: the region of interest (x, y, w, 379bf215546Sopenharmony_ci * h) is specified in pixels, not blocks. It is expected that these quantities 380bf215546Sopenharmony_ci * are aligned to the block size. 381bf215546Sopenharmony_ci */ 382bf215546Sopenharmony_civoid 383bf215546Sopenharmony_cipanfrost_store_tiled_image(void *dst, const void *src, 384bf215546Sopenharmony_ci unsigned x, unsigned y, 385bf215546Sopenharmony_ci unsigned w, unsigned h, 386bf215546Sopenharmony_ci uint32_t dst_stride, 387bf215546Sopenharmony_ci uint32_t src_stride, 388bf215546Sopenharmony_ci enum pipe_format format) 389bf215546Sopenharmony_ci{ 390bf215546Sopenharmony_ci panfrost_access_tiled_image(dst, (void *) src, 391bf215546Sopenharmony_ci x, y, w, h, 392bf215546Sopenharmony_ci dst_stride, src_stride, format, true); 393bf215546Sopenharmony_ci} 394bf215546Sopenharmony_ci 395bf215546Sopenharmony_civoid 396bf215546Sopenharmony_cipanfrost_load_tiled_image(void *dst, const void *src, 397bf215546Sopenharmony_ci unsigned x, unsigned y, 398bf215546Sopenharmony_ci unsigned w, unsigned h, 399bf215546Sopenharmony_ci uint32_t dst_stride, 400bf215546Sopenharmony_ci uint32_t src_stride, 401bf215546Sopenharmony_ci enum pipe_format format) 402bf215546Sopenharmony_ci{ 403bf215546Sopenharmony_ci panfrost_access_tiled_image((void *) src, dst, 404bf215546Sopenharmony_ci x, y, w, h, 405bf215546Sopenharmony_ci src_stride, dst_stride, format, false); 406bf215546Sopenharmony_ci} 407