1/*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/** @file v3d_tiling.c
25 *
26 * Handles information about the V3D tiling formats, and loading and storing
27 * from them.
28 */
29
30#include <stdint.h>
31#include "v3d_tiling.h"
32#include "broadcom/common/v3d_cpu_tiling.h"
33
34/** Return the width in pixels of a 64-byte microtile. */
35uint32_t
36v3d_utile_width(int cpp)
37{
38        switch (cpp) {
39        case 1:
40        case 2:
41                return 8;
42        case 4:
43        case 8:
44                return 4;
45        case 16:
46                return 2;
47        default:
48                unreachable("unknown cpp");
49        }
50}
51
52/** Return the height in pixels of a 64-byte microtile. */
53uint32_t
54v3d_utile_height(int cpp)
55{
56        switch (cpp) {
57        case 1:
58                return 8;
59        case 2:
60        case 4:
61                return 4;
62        case 8:
63        case 16:
64                return 2;
65        default:
66                unreachable("unknown cpp");
67        }
68}
69
70/**
71 * Returns the byte address for a given pixel within a utile.
72 *
73 * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4
74 * arrangement.
75 */
76static inline uint32_t
77v3d_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y)
78{
79        uint32_t utile_w = v3d_utile_width(cpp);
80
81        assert(x < utile_w && y < v3d_utile_height(cpp));
82
83        return x * cpp + y * utile_w * cpp;
84}
85
86/**
87 * Returns the byte offset for a given pixel in a LINEARTILE layout.
88 *
89 * LINEARTILE is a single line of utiles in either the X or Y direction.
90 */
91static inline uint32_t
92v3d_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y)
93{
94        uint32_t utile_w = v3d_utile_width(cpp);
95        uint32_t utile_h = v3d_utile_height(cpp);
96        uint32_t utile_index_x = x / utile_w;
97        uint32_t utile_index_y = y / utile_h;
98
99        assert(utile_index_x == 0 || utile_index_y == 0);
100
101        return (64 * (utile_index_x + utile_index_y) +
102                v3d_get_utile_pixel_offset(cpp,
103                                           x & (utile_w - 1),
104                                           y & (utile_h - 1)));
105}
106
107/**
108 * Returns the byte offset for a given pixel in a UBLINEAR layout.
109 *
110 * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2
111 * utiles), and the UIF blocks are in 1 or 2 columns in raster order.
112 */
113static inline uint32_t
114v3d_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y,
115                              int ublinear_number)
116{
117        uint32_t utile_w = v3d_utile_width(cpp);
118        uint32_t utile_h = v3d_utile_height(cpp);
119        uint32_t ub_w = utile_w * 2;
120        uint32_t ub_h = utile_h * 2;
121        uint32_t ub_x = x / ub_w;
122        uint32_t ub_y = y / ub_h;
123
124        return (256 * (ub_y * ublinear_number +
125                       ub_x) +
126                ((x & utile_w) ? 64 : 0) +
127                ((y & utile_h) ? 128 : 0) +
128                + v3d_get_utile_pixel_offset(cpp,
129                                             x & (utile_w - 1),
130                                             y & (utile_h - 1)));
131}
132
133static inline uint32_t
134v3d_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h,
135                                       uint32_t x, uint32_t y)
136{
137        return v3d_get_ublinear_pixel_offset(cpp, x, y, 2);
138}
139
140static inline uint32_t
141v3d_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h,
142                                       uint32_t x, uint32_t y)
143{
144        return v3d_get_ublinear_pixel_offset(cpp, x, y, 1);
145}
146
147/**
148 * Returns the byte offset for a given pixel in a UIF layout.
149 *
150 * UIF is the general V3D tiling layout shared across 3D, media, and scanout.
151 * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in
152 * 4x4 groups, and those 4x4 groups are then stored in raster order.
153 */
154static inline uint32_t
155v3d_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y,
156                         bool do_xor)
157{
158        uint32_t utile_w = v3d_utile_width(cpp);
159        uint32_t utile_h = v3d_utile_height(cpp);
160        uint32_t mb_width = utile_w * 2;
161        uint32_t mb_height = utile_h * 2;
162        uint32_t log2_mb_width = ffs(mb_width) - 1;
163        uint32_t log2_mb_height = ffs(mb_height) - 1;
164
165        /* Macroblock X, y */
166        uint32_t mb_x = x >> log2_mb_width;
167        uint32_t mb_y = y >> log2_mb_height;
168        /* X, y within the macroblock */
169        uint32_t mb_pixel_x = x - (mb_x << log2_mb_width);
170        uint32_t mb_pixel_y = y - (mb_y << log2_mb_height);
171
172        if (do_xor && (mb_x / 4) & 1)
173                mb_y ^= 0x10;
174
175        uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height;
176        uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4;
177
178        uint32_t mb_base_addr = mb_id * 256;
179
180        bool top = mb_pixel_y < utile_h;
181        bool left = mb_pixel_x < utile_w;
182
183        /* Docs have this in pixels, we do bytes here. */
184        uint32_t mb_tile_offset = (!top * 128 + !left * 64);
185
186        uint32_t utile_x = mb_pixel_x & (utile_w - 1);
187        uint32_t utile_y = mb_pixel_y & (utile_h - 1);
188
189        uint32_t mb_pixel_address = (mb_base_addr +
190                                     mb_tile_offset +
191                                     v3d_get_utile_pixel_offset(cpp,
192                                                                utile_x,
193                                                                utile_y));
194
195        return mb_pixel_address;
196}
197
198static inline uint32_t
199v3d_get_uif_xor_pixel_offset(uint32_t cpp, uint32_t image_h,
200                             uint32_t x, uint32_t y)
201{
202        return v3d_get_uif_pixel_offset(cpp, image_h, x, y, true);
203}
204
205static inline uint32_t
206v3d_get_uif_no_xor_pixel_offset(uint32_t cpp, uint32_t image_h,
207                                uint32_t x, uint32_t y)
208{
209        return v3d_get_uif_pixel_offset(cpp, image_h, x, y, false);
210}
211
212/* Loads/stores non-utile-aligned boxes by walking over the destination
213 * rectangle, computing the address on the GPU, and storing/loading a pixel at
214 * a time.
215 */
216static inline void
217v3d_move_pixels_unaligned(void *gpu, uint32_t gpu_stride,
218                          void *cpu, uint32_t cpu_stride,
219                          int cpp, uint32_t image_h,
220                          const struct pipe_box *box,
221                          uint32_t (*get_pixel_offset)(uint32_t cpp,
222                                                       uint32_t image_h,
223                                                       uint32_t x, uint32_t y),
224                          bool is_load)
225{
226        for (uint32_t y = 0; y < box->height; y++) {
227                void *cpu_row = cpu + y * cpu_stride;
228
229                for (int x = 0; x < box->width; x++) {
230                        uint32_t pixel_offset = get_pixel_offset(cpp, image_h,
231                                                                 box->x + x,
232                                                                 box->y + y);
233
234                        if (false) {
235                                fprintf(stderr, "%3d,%3d -> %d\n",
236                                        box->x + x, box->y + y,
237                                        pixel_offset);
238                        }
239
240                        if (is_load) {
241                                memcpy(cpu_row + x * cpp,
242                                       gpu + pixel_offset,
243                                       cpp);
244                        } else {
245                                memcpy(gpu + pixel_offset,
246                                       cpu_row + x * cpp,
247                                       cpp);
248                        }
249                }
250        }
251}
252
253/* Breaks the image down into utiles and calls either the fast whole-utile
254 * load/store functions, or the unaligned fallback case.
255 */
256static inline void
257v3d_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride,
258                               void *cpu, uint32_t cpu_stride,
259                               int cpp, uint32_t image_h,
260                               const struct pipe_box *box,
261                               uint32_t (*get_pixel_offset)(uint32_t cpp,
262                                                            uint32_t image_h,
263                                                            uint32_t x, uint32_t y),
264                               bool is_load)
265{
266        uint32_t utile_w = v3d_utile_width(cpp);
267        uint32_t utile_h = v3d_utile_height(cpp);
268        uint32_t utile_gpu_stride = utile_w * cpp;
269        uint32_t x1 = box->x;
270        uint32_t y1 = box->y;
271        uint32_t x2 = box->x + box->width;
272        uint32_t y2 = box->y + box->height;
273        uint32_t align_x1 = align(x1, utile_w);
274        uint32_t align_y1 = align(y1, utile_h);
275        uint32_t align_x2 = x2 & ~(utile_w - 1);
276        uint32_t align_y2 = y2 & ~(utile_h - 1);
277
278        /* Load/store all the whole utiles first. */
279        for (uint32_t y = align_y1; y < align_y2; y += utile_h) {
280                void *cpu_row = cpu + (y - box->y) * cpu_stride;
281
282                for (uint32_t x = align_x1; x < align_x2; x += utile_w) {
283                        void *utile_gpu = (gpu +
284                                           get_pixel_offset(cpp, image_h, x, y));
285                        void *utile_cpu = cpu_row + (x - box->x) * cpp;
286
287                        if (is_load) {
288                                v3d_load_utile(utile_cpu, cpu_stride,
289                                               utile_gpu, utile_gpu_stride);
290                        } else {
291                                v3d_store_utile(utile_gpu, utile_gpu_stride,
292                                                utile_cpu, cpu_stride);
293                        }
294                }
295        }
296
297        /* If there were no aligned utiles in the middle, load/store the whole
298         * thing unaligned.
299         */
300        if (align_y2 <= align_y1 ||
301            align_x2 <= align_x1) {
302                v3d_move_pixels_unaligned(gpu, gpu_stride,
303                                          cpu, cpu_stride,
304                                          cpp, image_h,
305                                          box,
306                                          get_pixel_offset, is_load);
307                return;
308        }
309
310        /* Load/store the partial utiles. */
311        struct pipe_box partial_boxes[4] = {
312                /* Top */
313                {
314                        .x = x1,
315                        .width = x2 - x1,
316                        .y = y1,
317                        .height = align_y1 - y1,
318                },
319                /* Bottom */
320                {
321                        .x = x1,
322                        .width = x2 - x1,
323                        .y = align_y2,
324                        .height = y2 - align_y2,
325                },
326                /* Left */
327                {
328                        .x = x1,
329                        .width = align_x1 - x1,
330                        .y = align_y1,
331                        .height = align_y2 - align_y1,
332                },
333                /* Right */
334                {
335                        .x = align_x2,
336                        .width = x2 - align_x2,
337                        .y = align_y1,
338                        .height = align_y2 - align_y1,
339                },
340        };
341        for (int i = 0; i < ARRAY_SIZE(partial_boxes); i++) {
342                void *partial_cpu = (cpu +
343                                     (partial_boxes[i].y - y1) * cpu_stride +
344                                     (partial_boxes[i].x - x1) * cpp);
345
346                v3d_move_pixels_unaligned(gpu, gpu_stride,
347                                          partial_cpu, cpu_stride,
348                                          cpp, image_h,
349                                          &partial_boxes[i],
350                                          get_pixel_offset, is_load);
351        }
352}
353
354static inline void
355v3d_move_pixels_general(void *gpu, uint32_t gpu_stride,
356                               void *cpu, uint32_t cpu_stride,
357                               int cpp, uint32_t image_h,
358                               const struct pipe_box *box,
359                               uint32_t (*get_pixel_offset)(uint32_t cpp,
360                                                            uint32_t image_h,
361                                                            uint32_t x, uint32_t y),
362                               bool is_load)
363{
364        switch (cpp) {
365        case 1:
366                v3d_move_pixels_general_percpp(gpu, gpu_stride,
367                                               cpu, cpu_stride,
368                                               1, image_h, box,
369                                               get_pixel_offset,
370                                               is_load);
371                break;
372        case 2:
373                v3d_move_pixels_general_percpp(gpu, gpu_stride,
374                                               cpu, cpu_stride,
375                                               2, image_h, box,
376                                               get_pixel_offset,
377                                               is_load);
378                break;
379        case 4:
380                v3d_move_pixels_general_percpp(gpu, gpu_stride,
381                                               cpu, cpu_stride,
382                                               4, image_h, box,
383                                               get_pixel_offset,
384                                               is_load);
385                break;
386        case 8:
387                v3d_move_pixels_general_percpp(gpu, gpu_stride,
388                                               cpu, cpu_stride,
389                                               8, image_h, box,
390                                               get_pixel_offset,
391                                               is_load);
392                break;
393        case 16:
394                v3d_move_pixels_general_percpp(gpu, gpu_stride,
395                                               cpu, cpu_stride,
396                                               16, image_h, box,
397                                               get_pixel_offset,
398                                               is_load);
399                break;
400        }
401}
402
403static inline void
404v3d_move_tiled_image(void *gpu, uint32_t gpu_stride,
405                     void *cpu, uint32_t cpu_stride,
406                     enum v3d_tiling_mode tiling_format,
407                     int cpp,
408                     uint32_t image_h,
409                     const struct pipe_box *box,
410                     bool is_load)
411{
412        switch (tiling_format) {
413        case V3D_TILING_UIF_XOR:
414                v3d_move_pixels_general(gpu, gpu_stride,
415                                        cpu, cpu_stride,
416                                        cpp, image_h, box,
417                                        v3d_get_uif_xor_pixel_offset,
418                                        is_load);
419                break;
420        case V3D_TILING_UIF_NO_XOR:
421                v3d_move_pixels_general(gpu, gpu_stride,
422                                        cpu, cpu_stride,
423                                        cpp, image_h, box,
424                                        v3d_get_uif_no_xor_pixel_offset,
425                                        is_load);
426                break;
427        case V3D_TILING_UBLINEAR_2_COLUMN:
428                v3d_move_pixels_general(gpu, gpu_stride,
429                                        cpu, cpu_stride,
430                                        cpp, image_h, box,
431                                        v3d_get_ublinear_2_column_pixel_offset,
432                                        is_load);
433                break;
434        case V3D_TILING_UBLINEAR_1_COLUMN:
435                v3d_move_pixels_general(gpu, gpu_stride,
436                                        cpu, cpu_stride,
437                                        cpp, image_h, box,
438                                        v3d_get_ublinear_1_column_pixel_offset,
439                                        is_load);
440                break;
441        case V3D_TILING_LINEARTILE:
442                v3d_move_pixels_general(gpu, gpu_stride,
443                                        cpu, cpu_stride,
444                                        cpp, image_h, box,
445                                        v3d_get_lt_pixel_offset,
446                                        is_load);
447                break;
448        default:
449                unreachable("Unsupported tiling format");
450                break;
451        }
452}
453
454/**
455 * Loads pixel data from the start (microtile-aligned) box in \p src to the
456 * start of \p dst according to the given tiling format.
457 */
458void
459v3d_load_tiled_image(void *dst, uint32_t dst_stride,
460                     void *src, uint32_t src_stride,
461                     enum v3d_tiling_mode tiling_format, int cpp,
462                     uint32_t image_h,
463                     const struct pipe_box *box)
464{
465        v3d_move_tiled_image(src, src_stride,
466                             dst, dst_stride,
467                             tiling_format,
468                             cpp,
469                             image_h,
470                             box,
471                             true);
472}
473
474/**
475 * Stores pixel data from the start of \p src into a (microtile-aligned) box in
476 * \p dst according to the given tiling format.
477 */
478void
479v3d_store_tiled_image(void *dst, uint32_t dst_stride,
480                      void *src, uint32_t src_stride,
481                      enum v3d_tiling_mode tiling_format, int cpp,
482                      uint32_t image_h,
483                      const struct pipe_box *box)
484{
485        v3d_move_tiled_image(dst, dst_stride,
486                             src, src_stride,
487                             tiling_format,
488                             cpp,
489                             image_h,
490                             box,
491                             false);
492}
493