1/* 2 * Copyright © 2014-2017 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** @file v3d_tiling.c 25 * 26 * Handles information about the V3D tiling formats, and loading and storing 27 * from them. 28 */ 29 30#include <stdint.h> 31#include "v3d_tiling.h" 32#include "broadcom/common/v3d_cpu_tiling.h" 33 34/** Return the width in pixels of a 64-byte microtile. */ 35uint32_t 36v3d_utile_width(int cpp) 37{ 38 switch (cpp) { 39 case 1: 40 case 2: 41 return 8; 42 case 4: 43 case 8: 44 return 4; 45 case 16: 46 return 2; 47 default: 48 unreachable("unknown cpp"); 49 } 50} 51 52/** Return the height in pixels of a 64-byte microtile. */ 53uint32_t 54v3d_utile_height(int cpp) 55{ 56 switch (cpp) { 57 case 1: 58 return 8; 59 case 2: 60 case 4: 61 return 4; 62 case 8: 63 case 16: 64 return 2; 65 default: 66 unreachable("unknown cpp"); 67 } 68} 69 70/** 71 * Returns the byte address for a given pixel within a utile. 72 * 73 * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4 74 * arrangement. 75 */ 76static inline uint32_t 77v3d_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y) 78{ 79 uint32_t utile_w = v3d_utile_width(cpp); 80 81 assert(x < utile_w && y < v3d_utile_height(cpp)); 82 83 return x * cpp + y * utile_w * cpp; 84} 85 86/** 87 * Returns the byte offset for a given pixel in a LINEARTILE layout. 88 * 89 * LINEARTILE is a single line of utiles in either the X or Y direction. 90 */ 91static inline uint32_t 92v3d_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y) 93{ 94 uint32_t utile_w = v3d_utile_width(cpp); 95 uint32_t utile_h = v3d_utile_height(cpp); 96 uint32_t utile_index_x = x / utile_w; 97 uint32_t utile_index_y = y / utile_h; 98 99 assert(utile_index_x == 0 || utile_index_y == 0); 100 101 return (64 * (utile_index_x + utile_index_y) + 102 v3d_get_utile_pixel_offset(cpp, 103 x & (utile_w - 1), 104 y & (utile_h - 1))); 105} 106 107/** 108 * Returns the byte offset for a given pixel in a UBLINEAR layout. 109 * 110 * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2 111 * utiles), and the UIF blocks are in 1 or 2 columns in raster order. 112 */ 113static inline uint32_t 114v3d_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y, 115 int ublinear_number) 116{ 117 uint32_t utile_w = v3d_utile_width(cpp); 118 uint32_t utile_h = v3d_utile_height(cpp); 119 uint32_t ub_w = utile_w * 2; 120 uint32_t ub_h = utile_h * 2; 121 uint32_t ub_x = x / ub_w; 122 uint32_t ub_y = y / ub_h; 123 124 return (256 * (ub_y * ublinear_number + 125 ub_x) + 126 ((x & utile_w) ? 64 : 0) + 127 ((y & utile_h) ? 128 : 0) + 128 + v3d_get_utile_pixel_offset(cpp, 129 x & (utile_w - 1), 130 y & (utile_h - 1))); 131} 132 133static inline uint32_t 134v3d_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h, 135 uint32_t x, uint32_t y) 136{ 137 return v3d_get_ublinear_pixel_offset(cpp, x, y, 2); 138} 139 140static inline uint32_t 141v3d_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h, 142 uint32_t x, uint32_t y) 143{ 144 return v3d_get_ublinear_pixel_offset(cpp, x, y, 1); 145} 146 147/** 148 * Returns the byte offset for a given pixel in a UIF layout. 149 * 150 * UIF is the general V3D tiling layout shared across 3D, media, and scanout. 151 * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in 152 * 4x4 groups, and those 4x4 groups are then stored in raster order. 153 */ 154static inline uint32_t 155v3d_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y, 156 bool do_xor) 157{ 158 uint32_t utile_w = v3d_utile_width(cpp); 159 uint32_t utile_h = v3d_utile_height(cpp); 160 uint32_t mb_width = utile_w * 2; 161 uint32_t mb_height = utile_h * 2; 162 uint32_t log2_mb_width = ffs(mb_width) - 1; 163 uint32_t log2_mb_height = ffs(mb_height) - 1; 164 165 /* Macroblock X, y */ 166 uint32_t mb_x = x >> log2_mb_width; 167 uint32_t mb_y = y >> log2_mb_height; 168 /* X, y within the macroblock */ 169 uint32_t mb_pixel_x = x - (mb_x << log2_mb_width); 170 uint32_t mb_pixel_y = y - (mb_y << log2_mb_height); 171 172 if (do_xor && (mb_x / 4) & 1) 173 mb_y ^= 0x10; 174 175 uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height; 176 uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4; 177 178 uint32_t mb_base_addr = mb_id * 256; 179 180 bool top = mb_pixel_y < utile_h; 181 bool left = mb_pixel_x < utile_w; 182 183 /* Docs have this in pixels, we do bytes here. */ 184 uint32_t mb_tile_offset = (!top * 128 + !left * 64); 185 186 uint32_t utile_x = mb_pixel_x & (utile_w - 1); 187 uint32_t utile_y = mb_pixel_y & (utile_h - 1); 188 189 uint32_t mb_pixel_address = (mb_base_addr + 190 mb_tile_offset + 191 v3d_get_utile_pixel_offset(cpp, 192 utile_x, 193 utile_y)); 194 195 return mb_pixel_address; 196} 197 198static inline uint32_t 199v3d_get_uif_xor_pixel_offset(uint32_t cpp, uint32_t image_h, 200 uint32_t x, uint32_t y) 201{ 202 return v3d_get_uif_pixel_offset(cpp, image_h, x, y, true); 203} 204 205static inline uint32_t 206v3d_get_uif_no_xor_pixel_offset(uint32_t cpp, uint32_t image_h, 207 uint32_t x, uint32_t y) 208{ 209 return v3d_get_uif_pixel_offset(cpp, image_h, x, y, false); 210} 211 212/* Loads/stores non-utile-aligned boxes by walking over the destination 213 * rectangle, computing the address on the GPU, and storing/loading a pixel at 214 * a time. 215 */ 216static inline void 217v3d_move_pixels_unaligned(void *gpu, uint32_t gpu_stride, 218 void *cpu, uint32_t cpu_stride, 219 int cpp, uint32_t image_h, 220 const struct pipe_box *box, 221 uint32_t (*get_pixel_offset)(uint32_t cpp, 222 uint32_t image_h, 223 uint32_t x, uint32_t y), 224 bool is_load) 225{ 226 for (uint32_t y = 0; y < box->height; y++) { 227 void *cpu_row = cpu + y * cpu_stride; 228 229 for (int x = 0; x < box->width; x++) { 230 uint32_t pixel_offset = get_pixel_offset(cpp, image_h, 231 box->x + x, 232 box->y + y); 233 234 if (false) { 235 fprintf(stderr, "%3d,%3d -> %d\n", 236 box->x + x, box->y + y, 237 pixel_offset); 238 } 239 240 if (is_load) { 241 memcpy(cpu_row + x * cpp, 242 gpu + pixel_offset, 243 cpp); 244 } else { 245 memcpy(gpu + pixel_offset, 246 cpu_row + x * cpp, 247 cpp); 248 } 249 } 250 } 251} 252 253/* Breaks the image down into utiles and calls either the fast whole-utile 254 * load/store functions, or the unaligned fallback case. 255 */ 256static inline void 257v3d_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride, 258 void *cpu, uint32_t cpu_stride, 259 int cpp, uint32_t image_h, 260 const struct pipe_box *box, 261 uint32_t (*get_pixel_offset)(uint32_t cpp, 262 uint32_t image_h, 263 uint32_t x, uint32_t y), 264 bool is_load) 265{ 266 uint32_t utile_w = v3d_utile_width(cpp); 267 uint32_t utile_h = v3d_utile_height(cpp); 268 uint32_t utile_gpu_stride = utile_w * cpp; 269 uint32_t x1 = box->x; 270 uint32_t y1 = box->y; 271 uint32_t x2 = box->x + box->width; 272 uint32_t y2 = box->y + box->height; 273 uint32_t align_x1 = align(x1, utile_w); 274 uint32_t align_y1 = align(y1, utile_h); 275 uint32_t align_x2 = x2 & ~(utile_w - 1); 276 uint32_t align_y2 = y2 & ~(utile_h - 1); 277 278 /* Load/store all the whole utiles first. */ 279 for (uint32_t y = align_y1; y < align_y2; y += utile_h) { 280 void *cpu_row = cpu + (y - box->y) * cpu_stride; 281 282 for (uint32_t x = align_x1; x < align_x2; x += utile_w) { 283 void *utile_gpu = (gpu + 284 get_pixel_offset(cpp, image_h, x, y)); 285 void *utile_cpu = cpu_row + (x - box->x) * cpp; 286 287 if (is_load) { 288 v3d_load_utile(utile_cpu, cpu_stride, 289 utile_gpu, utile_gpu_stride); 290 } else { 291 v3d_store_utile(utile_gpu, utile_gpu_stride, 292 utile_cpu, cpu_stride); 293 } 294 } 295 } 296 297 /* If there were no aligned utiles in the middle, load/store the whole 298 * thing unaligned. 299 */ 300 if (align_y2 <= align_y1 || 301 align_x2 <= align_x1) { 302 v3d_move_pixels_unaligned(gpu, gpu_stride, 303 cpu, cpu_stride, 304 cpp, image_h, 305 box, 306 get_pixel_offset, is_load); 307 return; 308 } 309 310 /* Load/store the partial utiles. */ 311 struct pipe_box partial_boxes[4] = { 312 /* Top */ 313 { 314 .x = x1, 315 .width = x2 - x1, 316 .y = y1, 317 .height = align_y1 - y1, 318 }, 319 /* Bottom */ 320 { 321 .x = x1, 322 .width = x2 - x1, 323 .y = align_y2, 324 .height = y2 - align_y2, 325 }, 326 /* Left */ 327 { 328 .x = x1, 329 .width = align_x1 - x1, 330 .y = align_y1, 331 .height = align_y2 - align_y1, 332 }, 333 /* Right */ 334 { 335 .x = align_x2, 336 .width = x2 - align_x2, 337 .y = align_y1, 338 .height = align_y2 - align_y1, 339 }, 340 }; 341 for (int i = 0; i < ARRAY_SIZE(partial_boxes); i++) { 342 void *partial_cpu = (cpu + 343 (partial_boxes[i].y - y1) * cpu_stride + 344 (partial_boxes[i].x - x1) * cpp); 345 346 v3d_move_pixels_unaligned(gpu, gpu_stride, 347 partial_cpu, cpu_stride, 348 cpp, image_h, 349 &partial_boxes[i], 350 get_pixel_offset, is_load); 351 } 352} 353 354static inline void 355v3d_move_pixels_general(void *gpu, uint32_t gpu_stride, 356 void *cpu, uint32_t cpu_stride, 357 int cpp, uint32_t image_h, 358 const struct pipe_box *box, 359 uint32_t (*get_pixel_offset)(uint32_t cpp, 360 uint32_t image_h, 361 uint32_t x, uint32_t y), 362 bool is_load) 363{ 364 switch (cpp) { 365 case 1: 366 v3d_move_pixels_general_percpp(gpu, gpu_stride, 367 cpu, cpu_stride, 368 1, image_h, box, 369 get_pixel_offset, 370 is_load); 371 break; 372 case 2: 373 v3d_move_pixels_general_percpp(gpu, gpu_stride, 374 cpu, cpu_stride, 375 2, image_h, box, 376 get_pixel_offset, 377 is_load); 378 break; 379 case 4: 380 v3d_move_pixels_general_percpp(gpu, gpu_stride, 381 cpu, cpu_stride, 382 4, image_h, box, 383 get_pixel_offset, 384 is_load); 385 break; 386 case 8: 387 v3d_move_pixels_general_percpp(gpu, gpu_stride, 388 cpu, cpu_stride, 389 8, image_h, box, 390 get_pixel_offset, 391 is_load); 392 break; 393 case 16: 394 v3d_move_pixels_general_percpp(gpu, gpu_stride, 395 cpu, cpu_stride, 396 16, image_h, box, 397 get_pixel_offset, 398 is_load); 399 break; 400 } 401} 402 403static inline void 404v3d_move_tiled_image(void *gpu, uint32_t gpu_stride, 405 void *cpu, uint32_t cpu_stride, 406 enum v3d_tiling_mode tiling_format, 407 int cpp, 408 uint32_t image_h, 409 const struct pipe_box *box, 410 bool is_load) 411{ 412 switch (tiling_format) { 413 case V3D_TILING_UIF_XOR: 414 v3d_move_pixels_general(gpu, gpu_stride, 415 cpu, cpu_stride, 416 cpp, image_h, box, 417 v3d_get_uif_xor_pixel_offset, 418 is_load); 419 break; 420 case V3D_TILING_UIF_NO_XOR: 421 v3d_move_pixels_general(gpu, gpu_stride, 422 cpu, cpu_stride, 423 cpp, image_h, box, 424 v3d_get_uif_no_xor_pixel_offset, 425 is_load); 426 break; 427 case V3D_TILING_UBLINEAR_2_COLUMN: 428 v3d_move_pixels_general(gpu, gpu_stride, 429 cpu, cpu_stride, 430 cpp, image_h, box, 431 v3d_get_ublinear_2_column_pixel_offset, 432 is_load); 433 break; 434 case V3D_TILING_UBLINEAR_1_COLUMN: 435 v3d_move_pixels_general(gpu, gpu_stride, 436 cpu, cpu_stride, 437 cpp, image_h, box, 438 v3d_get_ublinear_1_column_pixel_offset, 439 is_load); 440 break; 441 case V3D_TILING_LINEARTILE: 442 v3d_move_pixels_general(gpu, gpu_stride, 443 cpu, cpu_stride, 444 cpp, image_h, box, 445 v3d_get_lt_pixel_offset, 446 is_load); 447 break; 448 default: 449 unreachable("Unsupported tiling format"); 450 break; 451 } 452} 453 454/** 455 * Loads pixel data from the start (microtile-aligned) box in \p src to the 456 * start of \p dst according to the given tiling format. 457 */ 458void 459v3d_load_tiled_image(void *dst, uint32_t dst_stride, 460 void *src, uint32_t src_stride, 461 enum v3d_tiling_mode tiling_format, int cpp, 462 uint32_t image_h, 463 const struct pipe_box *box) 464{ 465 v3d_move_tiled_image(src, src_stride, 466 dst, dst_stride, 467 tiling_format, 468 cpp, 469 image_h, 470 box, 471 true); 472} 473 474/** 475 * Stores pixel data from the start of \p src into a (microtile-aligned) box in 476 * \p dst according to the given tiling format. 477 */ 478void 479v3d_store_tiled_image(void *dst, uint32_t dst_stride, 480 void *src, uint32_t src_stride, 481 enum v3d_tiling_mode tiling_format, int cpp, 482 uint32_t image_h, 483 const struct pipe_box *box) 484{ 485 v3d_move_tiled_image(dst, dst_stride, 486 src, src_stride, 487 tiling_format, 488 cpp, 489 image_h, 490 box, 491 false); 492} 493