1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (c) 2011-2013 Luc Verhaegen <libv@skynet.be>
3bf215546Sopenharmony_ci * Copyright (c) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
4bf215546Sopenharmony_ci * Copyright (c) 2018 Vasily Khoruzhick <anarsoul@gmail.com>
5bf215546Sopenharmony_ci * Copyright (c) 2019 Collabora, Ltd.
6bf215546Sopenharmony_ci *
7bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
8bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
9bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
10bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sub license,
11bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
12bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the
15bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions
16bf215546Sopenharmony_ci * of the Software.
17bf215546Sopenharmony_ci *
18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE.
25bf215546Sopenharmony_ci *
26bf215546Sopenharmony_ci */
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#include "pan_tiling.h"
29bf215546Sopenharmony_ci#include <stdbool.h>
30bf215546Sopenharmony_ci#include "util/macros.h"
31bf215546Sopenharmony_ci#include "util/bitscan.h"
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_ci/*
34bf215546Sopenharmony_ci * This file implements software encode/decode of u-interleaved textures.
35bf215546Sopenharmony_ci * See docs/drivers/panfrost.rst for details on the format.
36bf215546Sopenharmony_ci *
37bf215546Sopenharmony_ci * The tricky bit is ordering along the space-filling curve:
38bf215546Sopenharmony_ci *
39bf215546Sopenharmony_ci *    | y3 | (x3 ^ y3) | y2 | (y2 ^ x2) | y1 | (y1 ^ x1) | y0 | (y0 ^ x0) |
40bf215546Sopenharmony_ci *
41bf215546Sopenharmony_ci * While interleaving bits is trivial in hardware, it is nontrivial in software.
42bf215546Sopenharmony_ci * The trick is to divide the pattern up:
43bf215546Sopenharmony_ci *
44bf215546Sopenharmony_ci *    | y3 | y3 | y2 | y2 | y1 | y1 | y0 | y0 |
45bf215546Sopenharmony_ci *  ^ |  0 | x3 |  0 | x2 |  0 | x1 |  0 | x0 |
46bf215546Sopenharmony_ci *
47bf215546Sopenharmony_ci * That is, duplicate the bits of the Y and space out the bits of the X. The top
48bf215546Sopenharmony_ci * line is a function only of Y, so it can be calculated once per row and stored
49bf215546Sopenharmony_ci * in a register. The bottom line is simply X with the bits spaced out. Spacing
50bf215546Sopenharmony_ci * out the X is easy enough with a LUT, or by subtracting+ANDing the mask
51bf215546Sopenharmony_ci * pattern (abusing carry bits).
52bf215546Sopenharmony_ci *
53bf215546Sopenharmony_ci */
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci/* Given the lower 4-bits of the Y coordinate, we would like to
56bf215546Sopenharmony_ci * duplicate every bit over. So instead of 0b1010, we would like
57bf215546Sopenharmony_ci * 0b11001100. The idea is that for the bits in the solely Y place, we
58bf215546Sopenharmony_ci * get a Y place, and the bits in the XOR place *also* get a Y. */
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ciconst uint32_t bit_duplication[16] = {
61bf215546Sopenharmony_ci   0b00000000,
62bf215546Sopenharmony_ci   0b00000011,
63bf215546Sopenharmony_ci   0b00001100,
64bf215546Sopenharmony_ci   0b00001111,
65bf215546Sopenharmony_ci   0b00110000,
66bf215546Sopenharmony_ci   0b00110011,
67bf215546Sopenharmony_ci   0b00111100,
68bf215546Sopenharmony_ci   0b00111111,
69bf215546Sopenharmony_ci   0b11000000,
70bf215546Sopenharmony_ci   0b11000011,
71bf215546Sopenharmony_ci   0b11001100,
72bf215546Sopenharmony_ci   0b11001111,
73bf215546Sopenharmony_ci   0b11110000,
74bf215546Sopenharmony_ci   0b11110011,
75bf215546Sopenharmony_ci   0b11111100,
76bf215546Sopenharmony_ci   0b11111111,
77bf215546Sopenharmony_ci};
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci/* Space the bits out of a 4-bit nibble */
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ciconst unsigned space_4[16] = {
82bf215546Sopenharmony_ci   0b0000000,
83bf215546Sopenharmony_ci   0b0000001,
84bf215546Sopenharmony_ci   0b0000100,
85bf215546Sopenharmony_ci   0b0000101,
86bf215546Sopenharmony_ci   0b0010000,
87bf215546Sopenharmony_ci   0b0010001,
88bf215546Sopenharmony_ci   0b0010100,
89bf215546Sopenharmony_ci   0b0010101,
90bf215546Sopenharmony_ci   0b1000000,
91bf215546Sopenharmony_ci   0b1000001,
92bf215546Sopenharmony_ci   0b1000100,
93bf215546Sopenharmony_ci   0b1000101,
94bf215546Sopenharmony_ci   0b1010000,
95bf215546Sopenharmony_ci   0b1010001,
96bf215546Sopenharmony_ci   0b1010100,
97bf215546Sopenharmony_ci   0b1010101
98bf215546Sopenharmony_ci};
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_ci/* The scheme uses 16x16 tiles */
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci#define TILE_WIDTH 16
103bf215546Sopenharmony_ci#define TILE_HEIGHT 16
104bf215546Sopenharmony_ci#define PIXELS_PER_TILE (TILE_WIDTH * TILE_HEIGHT)
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci/* We need a 128-bit type for idiomatically tiling bpp128 formats. The type must
107bf215546Sopenharmony_ci * only support copies and sizeof, so emulating with a packed structure works
108bf215546Sopenharmony_ci * well enough, but if there's a native 128-bit type we may we well prefer
109bf215546Sopenharmony_ci * that. */
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci#ifdef __SIZEOF_INT128__
112bf215546Sopenharmony_citypedef __uint128_t pan_uint128_t;
113bf215546Sopenharmony_ci#else
114bf215546Sopenharmony_citypedef struct {
115bf215546Sopenharmony_ci  uint64_t lo;
116bf215546Sopenharmony_ci  uint64_t hi;
117bf215546Sopenharmony_ci} __attribute__((packed)) pan_uint128_t;
118bf215546Sopenharmony_ci#endif
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_citypedef struct {
121bf215546Sopenharmony_ci  uint16_t lo;
122bf215546Sopenharmony_ci  uint8_t hi;
123bf215546Sopenharmony_ci} __attribute__((packed)) pan_uint24_t;
124bf215546Sopenharmony_ci
125bf215546Sopenharmony_citypedef struct {
126bf215546Sopenharmony_ci  uint32_t lo;
127bf215546Sopenharmony_ci  uint16_t hi;
128bf215546Sopenharmony_ci} __attribute__((packed)) pan_uint48_t;
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_citypedef struct {
131bf215546Sopenharmony_ci  uint64_t lo;
132bf215546Sopenharmony_ci  uint32_t hi;
133bf215546Sopenharmony_ci} __attribute__((packed)) pan_uint96_t;
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci/* Optimized routine to tile an aligned (w & 0xF == 0) texture. Explanation:
136bf215546Sopenharmony_ci *
137bf215546Sopenharmony_ci * dest_start precomputes the offset to the beginning of the first horizontal
138bf215546Sopenharmony_ci * tile we're writing to, knowing that x is 16-aligned. Tiles themselves are
139bf215546Sopenharmony_ci * stored linearly, so we get the X tile number by shifting and then multiply
140bf215546Sopenharmony_ci * by the bytes per tile .
141bf215546Sopenharmony_ci *
142bf215546Sopenharmony_ci * We iterate across the pixels we're trying to store in source-order. For each
143bf215546Sopenharmony_ci * row in the destination image, we figure out which row of 16x16 block we're
144bf215546Sopenharmony_ci * in, by slicing off the lower 4-bits (block_y).
145bf215546Sopenharmony_ci *
146bf215546Sopenharmony_ci * dest then precomputes the location of the top-left corner of the block the
147bf215546Sopenharmony_ci * row starts in. In pixel coordinates (where the origin is the top-left),
148bf215546Sopenharmony_ci * (block_y, 0) is the top-left corner of the leftmost tile in this row.  While
149bf215546Sopenharmony_ci * pixels are reordered within a block, the blocks themselves are stored
150bf215546Sopenharmony_ci * linearly, so multiplying block_y by the pixel stride of the destination
151bf215546Sopenharmony_ci * image equals the byte offset of that top-left corner of the block this row
152bf215546Sopenharmony_ci * is in.
153bf215546Sopenharmony_ci *
154bf215546Sopenharmony_ci * On the other hand, the source is linear so we compute the locations of the
155bf215546Sopenharmony_ci * start and end of the row in the source by a simple linear addressing.
156bf215546Sopenharmony_ci *
157bf215546Sopenharmony_ci * For indexing within the tile, we need to XOR with the [y3 y3 y2 y2 y1 y1 y0
158bf215546Sopenharmony_ci * y0] value. Since this is constant across a row, we look it up per-row and
159bf215546Sopenharmony_ci * store in expanded_y.
160bf215546Sopenharmony_ci *
161bf215546Sopenharmony_ci * Finally, we iterate each row in source order. In the outer loop, we iterate
162bf215546Sopenharmony_ci * each 16 pixel tile. Within each tile, we iterate the 16 pixels (this should
163bf215546Sopenharmony_ci * be unrolled), calculating the index within the tile and writing.
164bf215546Sopenharmony_ci */
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci#define TILED_ACCESS_TYPE(pixel_t, shift) \
167bf215546Sopenharmony_cistatic ALWAYS_INLINE void \
168bf215546Sopenharmony_cipanfrost_access_tiled_image_##pixel_t \
169bf215546Sopenharmony_ci                              (void *dst, void *src, \
170bf215546Sopenharmony_ci                               uint16_t sx, uint16_t sy, \
171bf215546Sopenharmony_ci                               uint16_t w, uint16_t h, \
172bf215546Sopenharmony_ci                               uint32_t dst_stride, \
173bf215546Sopenharmony_ci                               uint32_t src_stride, \
174bf215546Sopenharmony_ci                               bool is_store) \
175bf215546Sopenharmony_ci{ \
176bf215546Sopenharmony_ci   uint8_t *dest_start = dst + ((sx >> 4) * PIXELS_PER_TILE * sizeof(pixel_t)); \
177bf215546Sopenharmony_ci   for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) { \
178bf215546Sopenharmony_ci      uint8_t *dest = (uint8_t *) (dest_start + ((y >> 4) * dst_stride)); \
179bf215546Sopenharmony_ci      pixel_t *source = src + (src_y * src_stride); \
180bf215546Sopenharmony_ci      pixel_t *source_end = source + w; \
181bf215546Sopenharmony_ci      unsigned expanded_y = bit_duplication[y & 0xF] << shift; \
182bf215546Sopenharmony_ci      for (; source < source_end; dest += (PIXELS_PER_TILE << shift)) { \
183bf215546Sopenharmony_ci         for (uint8_t i = 0; i < 16; ++i) { \
184bf215546Sopenharmony_ci            unsigned index = expanded_y ^ (space_4[i] << shift); \
185bf215546Sopenharmony_ci            if (is_store) \
186bf215546Sopenharmony_ci                *((pixel_t *) (dest + index)) = *(source++); \
187bf215546Sopenharmony_ci            else \
188bf215546Sopenharmony_ci                *(source++) = *((pixel_t *) (dest + index)); \
189bf215546Sopenharmony_ci         } \
190bf215546Sopenharmony_ci      } \
191bf215546Sopenharmony_ci   } \
192bf215546Sopenharmony_ci} \
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_ciTILED_ACCESS_TYPE(uint8_t, 0);
195bf215546Sopenharmony_ciTILED_ACCESS_TYPE(uint16_t, 1);
196bf215546Sopenharmony_ciTILED_ACCESS_TYPE(uint32_t, 2);
197bf215546Sopenharmony_ciTILED_ACCESS_TYPE(uint64_t, 3);
198bf215546Sopenharmony_ciTILED_ACCESS_TYPE(pan_uint128_t, 4);
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci#define TILED_UNALIGNED_TYPE(pixel_t, is_store, tile_shift) { \
201bf215546Sopenharmony_ci   const unsigned mask = (1 << tile_shift) - 1; \
202bf215546Sopenharmony_ci   for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) { \
203bf215546Sopenharmony_ci      unsigned block_start_s = (y >> tile_shift) * dst_stride; \
204bf215546Sopenharmony_ci      unsigned source_start = src_y * src_stride; \
205bf215546Sopenharmony_ci      unsigned expanded_y = bit_duplication[y & mask]; \
206bf215546Sopenharmony_ci \
207bf215546Sopenharmony_ci      for (int x = sx, src_x = 0; src_x < w; ++x, ++src_x) { \
208bf215546Sopenharmony_ci         unsigned block_x_s = (x >> tile_shift) * (1 << (tile_shift * 2)); \
209bf215546Sopenharmony_ci         unsigned index = expanded_y ^ space_4[x & mask]; \
210bf215546Sopenharmony_ci         uint8_t *source = src + source_start + sizeof(pixel_t) * src_x; \
211bf215546Sopenharmony_ci         uint8_t *dest = dst + block_start_s + sizeof(pixel_t) * (block_x_s + index); \
212bf215546Sopenharmony_ci \
213bf215546Sopenharmony_ci         pixel_t *outp = (pixel_t *) (is_store ? dest : source); \
214bf215546Sopenharmony_ci         pixel_t *inp = (pixel_t *) (is_store ? source : dest); \
215bf215546Sopenharmony_ci         *outp = *inp; \
216bf215546Sopenharmony_ci      } \
217bf215546Sopenharmony_ci   } \
218bf215546Sopenharmony_ci}
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_ci#define TILED_UNALIGNED_TYPES(store, shift) { \
221bf215546Sopenharmony_ci   if (bpp == 8) \
222bf215546Sopenharmony_ci      TILED_UNALIGNED_TYPE(uint8_t, store, shift) \
223bf215546Sopenharmony_ci   else if (bpp == 16) \
224bf215546Sopenharmony_ci      TILED_UNALIGNED_TYPE(uint16_t, store, shift) \
225bf215546Sopenharmony_ci   else if (bpp == 24) \
226bf215546Sopenharmony_ci      TILED_UNALIGNED_TYPE(pan_uint24_t, store, shift) \
227bf215546Sopenharmony_ci   else if (bpp == 32) \
228bf215546Sopenharmony_ci      TILED_UNALIGNED_TYPE(uint32_t, store, shift) \
229bf215546Sopenharmony_ci   else if (bpp == 48) \
230bf215546Sopenharmony_ci      TILED_UNALIGNED_TYPE(pan_uint48_t, store, shift) \
231bf215546Sopenharmony_ci   else if (bpp == 64) \
232bf215546Sopenharmony_ci      TILED_UNALIGNED_TYPE(uint64_t, store, shift) \
233bf215546Sopenharmony_ci   else if (bpp == 96) \
234bf215546Sopenharmony_ci      TILED_UNALIGNED_TYPE(pan_uint96_t, store, shift) \
235bf215546Sopenharmony_ci   else if (bpp == 128) \
236bf215546Sopenharmony_ci      TILED_UNALIGNED_TYPE(pan_uint128_t, store, shift) \
237bf215546Sopenharmony_ci}
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_ci/*
240bf215546Sopenharmony_ci * Perform a generic access to a tiled image with a given format. This works
241bf215546Sopenharmony_ci * even for block-compressed images on entire blocks at a time. sx/sy/w/h are
242bf215546Sopenharmony_ci * specified in pixels, not blocks, but our internal routines work in blocks,
243bf215546Sopenharmony_ci * so we divide here. Alignment is assumed.
244bf215546Sopenharmony_ci */
245bf215546Sopenharmony_cistatic void
246bf215546Sopenharmony_cipanfrost_access_tiled_image_generic(void *dst, void *src,
247bf215546Sopenharmony_ci                               unsigned sx, unsigned sy,
248bf215546Sopenharmony_ci                               unsigned w, unsigned h,
249bf215546Sopenharmony_ci                               uint32_t dst_stride,
250bf215546Sopenharmony_ci                               uint32_t src_stride,
251bf215546Sopenharmony_ci                               const struct util_format_description *desc,
252bf215546Sopenharmony_ci                               bool _is_store)
253bf215546Sopenharmony_ci{
254bf215546Sopenharmony_ci   unsigned bpp = desc->block.bits;
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_ci   /* Convert units */
257bf215546Sopenharmony_ci   sx /= desc->block.width;
258bf215546Sopenharmony_ci   sy /= desc->block.height;
259bf215546Sopenharmony_ci   w = DIV_ROUND_UP(w, desc->block.width);
260bf215546Sopenharmony_ci   h = DIV_ROUND_UP(h, desc->block.height);
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci   if (desc->block.width > 1) {
263bf215546Sopenharmony_ci      if (_is_store)
264bf215546Sopenharmony_ci         TILED_UNALIGNED_TYPES(true, 2)
265bf215546Sopenharmony_ci      else
266bf215546Sopenharmony_ci         TILED_UNALIGNED_TYPES(false, 2)
267bf215546Sopenharmony_ci   } else {
268bf215546Sopenharmony_ci      if (_is_store)
269bf215546Sopenharmony_ci         TILED_UNALIGNED_TYPES(true, 4)
270bf215546Sopenharmony_ci      else
271bf215546Sopenharmony_ci         TILED_UNALIGNED_TYPES(false, 4)
272bf215546Sopenharmony_ci   }
273bf215546Sopenharmony_ci}
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ci#define OFFSET(src, _x, _y) (void *) ((uint8_t *) src + ((_y) - orig_y) * src_stride + (((_x) - orig_x) * (bpp / 8)))
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_cistatic ALWAYS_INLINE void
278bf215546Sopenharmony_cipanfrost_access_tiled_image(void *dst, void *src,
279bf215546Sopenharmony_ci                           unsigned x, unsigned y,
280bf215546Sopenharmony_ci                           unsigned w, unsigned h,
281bf215546Sopenharmony_ci                           uint32_t dst_stride,
282bf215546Sopenharmony_ci                           uint32_t src_stride,
283bf215546Sopenharmony_ci                           enum pipe_format format,
284bf215546Sopenharmony_ci                           bool is_store)
285bf215546Sopenharmony_ci{
286bf215546Sopenharmony_ci   const struct util_format_description *desc = util_format_description(format);
287bf215546Sopenharmony_ci   unsigned bpp = desc->block.bits;
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci   /* Our optimized routines cannot handle unaligned blocks (without depending
290bf215546Sopenharmony_ci    * on platform-specific behaviour), and there is no good reason to do so. If
291bf215546Sopenharmony_ci    * these assertions fail, there is either a driver bug or a non-portable unit
292bf215546Sopenharmony_ci    * test.
293bf215546Sopenharmony_ci    */
294bf215546Sopenharmony_ci   assert((dst_stride % (bpp / 8)) == 0 && "unaligned destination stride");
295bf215546Sopenharmony_ci   assert((src_stride % (bpp / 8)) == 0 && "unaligned source stride");
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_ci   if (desc->block.width > 1 || !util_is_power_of_two_nonzero(desc->block.bits)) {
298bf215546Sopenharmony_ci      panfrost_access_tiled_image_generic(dst, (void *) src,
299bf215546Sopenharmony_ci            x, y, w, h,
300bf215546Sopenharmony_ci            dst_stride, src_stride, desc, is_store);
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci      return;
303bf215546Sopenharmony_ci   }
304bf215546Sopenharmony_ci
305bf215546Sopenharmony_ci   unsigned first_full_tile_x = DIV_ROUND_UP(x, TILE_WIDTH) * TILE_WIDTH;
306bf215546Sopenharmony_ci   unsigned first_full_tile_y = DIV_ROUND_UP(y, TILE_HEIGHT) * TILE_HEIGHT;
307bf215546Sopenharmony_ci   unsigned last_full_tile_x = ((x + w) / TILE_WIDTH) * TILE_WIDTH;
308bf215546Sopenharmony_ci   unsigned last_full_tile_y = ((y + h) / TILE_HEIGHT) * TILE_HEIGHT;
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci   /* First, tile the top portion */
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci   unsigned orig_x = x, orig_y = y;
313bf215546Sopenharmony_ci
314bf215546Sopenharmony_ci   if (first_full_tile_y != y) {
315bf215546Sopenharmony_ci      unsigned dist = MIN2(first_full_tile_y - y, h);
316bf215546Sopenharmony_ci
317bf215546Sopenharmony_ci      panfrost_access_tiled_image_generic(dst, OFFSET(src, x, y),
318bf215546Sopenharmony_ci            x, y, w, dist,
319bf215546Sopenharmony_ci            dst_stride, src_stride, desc, is_store);
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_ci      if (dist == h)
322bf215546Sopenharmony_ci         return;
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_ci      y += dist;
325bf215546Sopenharmony_ci      h -= dist;
326bf215546Sopenharmony_ci   }
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_ci   /* Next, the bottom portion */
329bf215546Sopenharmony_ci   if (last_full_tile_y != (y + h)) {
330bf215546Sopenharmony_ci      unsigned dist = (y + h) - last_full_tile_y;
331bf215546Sopenharmony_ci
332bf215546Sopenharmony_ci      panfrost_access_tiled_image_generic(dst, OFFSET(src, x, last_full_tile_y),
333bf215546Sopenharmony_ci            x, last_full_tile_y, w, dist,
334bf215546Sopenharmony_ci            dst_stride, src_stride, desc, is_store);
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_ci      h -= dist;
337bf215546Sopenharmony_ci   }
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_ci   /* The left portion */
340bf215546Sopenharmony_ci   if (first_full_tile_x != x) {
341bf215546Sopenharmony_ci      unsigned dist = MIN2(first_full_tile_x - x, w);
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_ci      panfrost_access_tiled_image_generic(dst, OFFSET(src, x, y),
344bf215546Sopenharmony_ci            x, y, dist, h,
345bf215546Sopenharmony_ci            dst_stride, src_stride, desc, is_store);
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_ci      if (dist == w)
348bf215546Sopenharmony_ci         return;
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_ci      x += dist;
351bf215546Sopenharmony_ci      w -= dist;
352bf215546Sopenharmony_ci   }
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci   /* Finally, the right portion */
355bf215546Sopenharmony_ci   if (last_full_tile_x != (x + w)) {
356bf215546Sopenharmony_ci      unsigned dist = (x + w) - last_full_tile_x;
357bf215546Sopenharmony_ci
358bf215546Sopenharmony_ci      panfrost_access_tiled_image_generic(dst, OFFSET(src, last_full_tile_x, y),
359bf215546Sopenharmony_ci            last_full_tile_x, y, dist, h,
360bf215546Sopenharmony_ci            dst_stride, src_stride, desc, is_store);
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci      w -= dist;
363bf215546Sopenharmony_ci   }
364bf215546Sopenharmony_ci
365bf215546Sopenharmony_ci   if (bpp == 8)
366bf215546Sopenharmony_ci      panfrost_access_tiled_image_uint8_t(dst,  OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
367bf215546Sopenharmony_ci   else if (bpp == 16)
368bf215546Sopenharmony_ci      panfrost_access_tiled_image_uint16_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
369bf215546Sopenharmony_ci   else if (bpp == 32)
370bf215546Sopenharmony_ci      panfrost_access_tiled_image_uint32_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
371bf215546Sopenharmony_ci   else if (bpp == 64)
372bf215546Sopenharmony_ci      panfrost_access_tiled_image_uint64_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
373bf215546Sopenharmony_ci   else if (bpp == 128)
374bf215546Sopenharmony_ci      panfrost_access_tiled_image_pan_uint128_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
375bf215546Sopenharmony_ci}
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_ci/**
378bf215546Sopenharmony_ci * Access a tiled image (load or store). Note: the region of interest (x, y, w,
379bf215546Sopenharmony_ci * h) is specified in pixels, not blocks. It is expected that these quantities
380bf215546Sopenharmony_ci * are aligned to the block size.
381bf215546Sopenharmony_ci */
382bf215546Sopenharmony_civoid
383bf215546Sopenharmony_cipanfrost_store_tiled_image(void *dst, const void *src,
384bf215546Sopenharmony_ci                           unsigned x, unsigned y,
385bf215546Sopenharmony_ci                           unsigned w, unsigned h,
386bf215546Sopenharmony_ci                           uint32_t dst_stride,
387bf215546Sopenharmony_ci                           uint32_t src_stride,
388bf215546Sopenharmony_ci                           enum pipe_format format)
389bf215546Sopenharmony_ci{
390bf215546Sopenharmony_ci    panfrost_access_tiled_image(dst, (void *) src,
391bf215546Sopenharmony_ci        x, y, w, h,
392bf215546Sopenharmony_ci        dst_stride, src_stride, format, true);
393bf215546Sopenharmony_ci}
394bf215546Sopenharmony_ci
395bf215546Sopenharmony_civoid
396bf215546Sopenharmony_cipanfrost_load_tiled_image(void *dst, const void *src,
397bf215546Sopenharmony_ci                           unsigned x, unsigned y,
398bf215546Sopenharmony_ci                           unsigned w, unsigned h,
399bf215546Sopenharmony_ci                           uint32_t dst_stride,
400bf215546Sopenharmony_ci                           uint32_t src_stride,
401bf215546Sopenharmony_ci                           enum pipe_format format)
402bf215546Sopenharmony_ci{
403bf215546Sopenharmony_ci   panfrost_access_tiled_image((void *) src, dst,
404bf215546Sopenharmony_ci       x, y, w, h,
405bf215546Sopenharmony_ci       src_stride, dst_stride, format, false);
406bf215546Sopenharmony_ci}
407