1/* 2 * Copyright © 2022 Imagination Technologies Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include <stdbool.h> 25#include <stdint.h> 26 27#include "hwdef/rogue_hw_defs.h" 28#include "hwdef/rogue_hw_utils.h" 29#include "pvr_device_info.h" 30#include "pvr_job_common.h" 31#include "pvr_private.h" 32#include "util/macros.h" 33#include "util/u_math.h" 34#include "vk_alloc.h" 35#include "vk_format.h" 36#include "vk_object.h" 37 38/* clang-format off */ 39static enum PVRX(PBESTATE_SWIZ) 40pvr_get_pbe_hw_swizzle(VkComponentSwizzle comp, enum pipe_swizzle swz) 41/* clang-format on */ 42{ 43 switch (swz) { 44 case PIPE_SWIZZLE_0: 45 return ROGUE_PBESTATE_SWIZ_ZERO; 46 case PIPE_SWIZZLE_1: 47 return ROGUE_PBESTATE_SWIZ_ONE; 48 case PIPE_SWIZZLE_X: 49 return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN0; 50 case PIPE_SWIZZLE_Y: 51 return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN1; 52 case PIPE_SWIZZLE_Z: 53 return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN2; 54 case PIPE_SWIZZLE_W: 55 return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN3; 56 case PIPE_SWIZZLE_NONE: 57 if (comp == VK_COMPONENT_SWIZZLE_A) 58 return ROGUE_PBESTATE_SWIZ_ONE; 59 else 60 return ROGUE_PBESTATE_SWIZ_ZERO; 61 default: 62 unreachable("Unknown enum pipe_swizzle"); 63 }; 64} 65 66void pvr_pbe_get_src_format_and_gamma(VkFormat vk_format, 67 enum pvr_pbe_gamma default_gamma, 68 bool with_packed_usc_channel, 69 uint32_t *const src_format_out, 70 enum pvr_pbe_gamma *const gamma_out) 71{ 72 uint32_t chan_0_width = vk_format_get_channel_width(vk_format, 0); 73 74 *gamma_out = default_gamma; 75 76 if (vk_format_has_32bit_component(vk_format) || 77 vk_format_is_int(vk_format)) { 78 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL); 79 } else if (vk_format_is_float(vk_format)) { 80 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL); 81 } else if (vk_format_is_srgb(vk_format)) { 82 *gamma_out = PVR_PBE_GAMMA_ENABLED; 83 84 /* F16 source for gamma'd formats. */ 85 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL); 86 } else if (vk_format_has_depth(vk_format) && 87 vk_format_get_component_bits(vk_format, 88 UTIL_FORMAT_COLORSPACE_ZS, 89 0) > 16) { 90 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL); 91 } else if (vk_format_has_stencil(vk_format) && 92 vk_format_get_component_bits(vk_format, 93 UTIL_FORMAT_COLORSPACE_ZS, 94 1) > 0) { 95 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL); 96 } else if (chan_0_width > 16) { 97 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL); 98 } else if (chan_0_width > 8) { 99 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL); 100 } else if (!with_packed_usc_channel) { 101 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL); 102 } else { 103 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL); 104 } 105} 106 107static void pvr_pbe_get_src_pos(const struct pvr_device_info *dev_info, 108 enum pvr_pbe_source_start_pos source_start, 109 uint32_t *const src_pos_out, 110 bool *const src_pos_offset_128_out) 111{ 112 *src_pos_offset_128_out = false; 113 114 switch (source_start) { 115 case PVR_PBE_STARTPOS_BIT32: 116 *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT32); 117 break; 118 119 case PVR_PBE_STARTPOS_BIT64: 120 *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT64); 121 break; 122 123 case PVR_PBE_STARTPOS_BIT96: 124 *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT96); 125 break; 126 127 case PVR_PBE_STARTPOS_BIT0: 128 default: 129 if (PVR_HAS_FEATURE(dev_info, eight_output_registers)) { 130 switch (source_start) { 131 case PVR_PBE_STARTPOS_BIT128: 132 *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0); 133 *src_pos_offset_128_out = true; 134 break; 135 136 case PVR_PBE_STARTPOS_BIT160: 137 *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT32); 138 *src_pos_offset_128_out = true; 139 break; 140 141 case PVR_PBE_STARTPOS_BIT192: 142 *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT64); 143 *src_pos_offset_128_out = true; 144 break; 145 146 case PVR_PBE_STARTPOS_BIT224: 147 *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT96); 148 *src_pos_offset_128_out = true; 149 break; 150 151 default: 152 *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0); 153 break; 154 } 155 } else { 156 *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0); 157 } 158 break; 159 } 160} 161 162void pvr_pbe_pack_state( 163 const struct pvr_device_info *dev_info, 164 const struct pvr_pbe_surf_params *surface_params, 165 const struct pvr_pbe_render_params *render_params, 166 uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS], 167 uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS]) 168{ 169 /* This function needs updating if the value of 170 * ROGUE_NUM_PBESTATE_STATE_WORDS changes, so check that it's the expected 171 * value. 172 */ 173 STATIC_ASSERT(ROGUE_NUM_PBESTATE_STATE_WORDS == 2); 174 175 /* This function needs updating if the value of ROGUE_NUM_PBESTATE_REG_WORDS 176 * changes, so check that it's the expected value. 177 */ 178 STATIC_ASSERT(ROGUE_NUM_PBESTATE_REG_WORDS == 3); 179 180 pbe_reg_words[2] = 0; 181 182 if (surface_params->z_only_render) { 183 pbe_cs_words[0] = 0; 184 185 pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) { 186 state.emptytile = true; 187 } 188 189 pbe_reg_words[0] = 0; 190 pbe_reg_words[1] = 0; 191 192 return; 193 } 194 195 pvr_csb_pack (&pbe_cs_words[0], PBESTATE_STATE_WORD0, state) { 196 state.address_low = surface_params->addr; 197 } 198 199 pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) { 200 state.address_high = surface_params->addr; 201 202 state.source_format = surface_params->source_format; 203 204 pvr_pbe_get_src_pos(dev_info, 205 render_params->source_start, 206 &state.source_pos, 207 &state.source_pos_offset_128); 208 209 /* MRT index (Use 0 for a single render target)/ */ 210 state.mrt_index = render_params->mrt_index; 211 212 /* Normalization flag based on output format. */ 213 state.norm = surface_params->is_normalized; 214 215 state.packmode = surface_params->pbe_packmode; 216 } 217 218 pvr_csb_pack (&pbe_reg_words[0], PBESTATE_REG_WORD0, reg) { 219 reg.tilerelative = true; 220 221 switch (surface_params->mem_layout) { 222 case PVR_MEMLAYOUT_TWIDDLED: 223 reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_2D); 224 break; 225 226 case PVR_MEMLAYOUT_3DTWIDDLED: 227 reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_3D); 228 break; 229 230 case PVR_MEMLAYOUT_LINEAR: 231 default: 232 reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_LINEAR); 233 break; 234 } 235 236 /* FIXME: Remove rotation and y_flip hardcoding if needed. */ 237 reg.rotation = PVRX(PBESTATE_ROTATION_TYPE_0_DEG); 238 reg.y_flip = false; 239 240 /* Note: Due to gamma being overridden above, anything other than 241 * ENABLED/NONE is ignored. 242 */ 243 if (surface_params->gamma == PVR_PBE_GAMMA_ENABLED) { 244 reg.gamma = true; 245 246 if (surface_params->nr_components == 2) 247 reg.twocomp_gamma = 248 PVRX(PBESTATE_TWOCOMP_GAMMA_GAMMA_BOTH_CHANNELS); 249 } 250 251 reg.linestride = (surface_params->stride - 1) / 252 PVRX(PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE); 253 reg.minclip_x = render_params->min_x_clip; 254 255 reg.swiz_chan0 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_R, 256 surface_params->swizzle[0]); 257 reg.swiz_chan1 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_G, 258 surface_params->swizzle[1]); 259 reg.swiz_chan2 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_B, 260 surface_params->swizzle[2]); 261 reg.swiz_chan3 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_A, 262 surface_params->swizzle[3]); 263 264 if (surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) 265 reg.size_z = util_logbase2_ceil(surface_params->depth); 266 267 reg.downscale = surface_params->down_scale; 268 } 269 270 pvr_csb_pack (&pbe_reg_words[1], PBESTATE_REG_WORD1, reg) { 271 if (surface_params->mem_layout == PVR_MEMLAYOUT_TWIDDLED || 272 surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) { 273 reg.size_x = util_logbase2_ceil(surface_params->width); 274 reg.size_y = util_logbase2_ceil(surface_params->height); 275 } 276 277 reg.minclip_y = render_params->min_y_clip; 278 reg.maxclip_x = render_params->max_x_clip; 279 reg.zslice = render_params->slice; 280 reg.maxclip_y = render_params->max_y_clip; 281 } 282} 283 284/* TODO: Split this into smaller functions to make it easier to follow. When 285 * doing this, it would be nice to have a function that returns 286 * total_tiles_in_flight so that CR_ISP_CTL can be fully packed in 287 * pvr_render_job_ws_fragment_state_init(). 288 */ 289void pvr_setup_tiles_in_flight( 290 const struct pvr_device_info *dev_info, 291 const struct pvr_device_runtime_info *dev_runtime_info, 292 uint32_t msaa_mode, 293 uint32_t pixel_width, 294 bool paired_tiles, 295 uint32_t max_tiles_in_flight, 296 uint32_t *const isp_ctl_out, 297 uint32_t *const pixel_ctl_out) 298{ 299 uint32_t total_tiles_in_flight = 0; 300 uint32_t usable_partition_size; 301 uint32_t partitions_available; 302 uint32_t usc_min_output_regs; 303 uint32_t max_partitions; 304 uint32_t partition_size; 305 uint32_t max_phantoms; 306 uint32_t tile_size_x; 307 uint32_t tile_size_y; 308 uint32_t isp_samples; 309 310 /* Round up the pixel width to the next allocation granularity. */ 311 usc_min_output_regs = 312 PVR_GET_FEATURE_VALUE(dev_info, usc_min_output_registers_per_pix, 0); 313 pixel_width = MAX2(pixel_width, usc_min_output_regs); 314 pixel_width = util_next_power_of_two(pixel_width); 315 316 assert(pixel_width <= rogue_get_max_output_regs_per_pixel(dev_info)); 317 318 partition_size = pixel_width; 319 320 isp_samples = PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 1); 321 if (isp_samples == 2) { 322 if (msaa_mode != PVRX(CR_ISP_AA_MODE_TYPE_AA_NONE)) 323 partition_size *= 2U; 324 } else if (isp_samples == 4) { 325 if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_4X) || 326 msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_8X)) 327 partition_size *= 4U; 328 else if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_2X)) 329 partition_size *= 2U; 330 } 331 332 /* Cores with a tile size of 16x16 don't have quadrant affinity. Hence the 333 * partition size is the same as for a 32x32 tile quadrant (with no MSAA). 334 * When MSAA is enabled, the USC has to process half the tile (16x8 pixels). 335 */ 336 tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0); 337 tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0); 338 339 /* We only support square tiles. */ 340 assert(tile_size_x == tile_size_y); 341 342 if (tile_size_x == 16U) { 343 /* Cores with 16x16 tiles does not use tile quadrants. */ 344 partition_size *= tile_size_x * tile_size_y; 345 } else { 346 /* Size of a tile quadrant (in dwords). */ 347 partition_size *= (tile_size_x * tile_size_y / 4U); 348 } 349 350 /* Maximum available partition space for partitions of this size. */ 351 max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0); 352 usable_partition_size = MIN2(dev_runtime_info->total_reserved_partition_size, 353 partition_size * max_partitions); 354 355 if (PVR_GET_FEATURE_VALUE(dev_info, common_store_size_in_dwords, 0) < 356 (1024 * 4 * 4)) { 357 /* Do not apply the limit for cores with 16x16 tile size (no quadrant 358 * affinity). */ 359 if (tile_size_x != 16) { 360 /* This is to counter the extremely limited CS size on some cores. 361 */ 362 /* Available partition space is limited to 8 tile quadrants. */ 363 usable_partition_size = 364 MIN2((tile_size_x * tile_size_y / 4U) * 8U, usable_partition_size); 365 } 366 } 367 368 /* Ensure that maximum number of partitions in use is not greater 369 * than the total number of partitions available. 370 */ 371 partitions_available = 372 MIN2(max_partitions, usable_partition_size / partition_size); 373 374 if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure)) 375 max_phantoms = dev_runtime_info->num_phantoms; 376 else if (PVR_HAS_FEATURE(dev_info, roguexe)) 377 max_phantoms = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0); 378 else 379 max_phantoms = 1; 380 381 for (uint32_t i = 0; i < max_phantoms; i++) { 382 uint32_t usc_tiles_in_flight = partitions_available; 383 uint32_t isp_tiles_in_flight; 384 385 /* Cores with tiles size other than 16x16 use tile quadrants. */ 386 if (tile_size_x != 16) { 387 uint32_t num_clusters = 388 PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0U); 389 usc_tiles_in_flight = 390 (usc_tiles_in_flight * MIN2(4U, num_clusters - (4U * i))) / 4U; 391 } 392 393 assert(usc_tiles_in_flight > 0); 394 395 isp_tiles_in_flight = 396 PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0); 397 /* Ensure that maximum number of ISP tiles in flight is not greater 398 * than the maximum number of USC tiles in flight. 399 */ 400 if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) || 401 PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) != 402 2) { 403 isp_tiles_in_flight /= dev_runtime_info->num_phantoms; 404 } 405 406 isp_tiles_in_flight = MIN2(usc_tiles_in_flight, isp_tiles_in_flight); 407 408 /* Limit the number of tiles in flight if the shaders have 409 * requested a large allocation of local memory. 410 */ 411 if (max_tiles_in_flight > 0U) { 412 isp_tiles_in_flight = MIN2(usc_tiles_in_flight, max_tiles_in_flight); 413 414 if (PVR_HAS_FEATURE(dev_info, roguexe)) { 415 if (tile_size_x == 16) { 416 /* The FW infers the tiles in flight value from the 417 * partitions setting. 418 */ 419 /* Partitions per tile. */ 420 partitions_available = isp_tiles_in_flight; 421 } else { 422 /* Partitions per tile quadrant. */ 423 partitions_available = isp_tiles_in_flight * 4U; 424 } 425 } 426 } 427 428 /* Due to limitations of ISP_CTL_PIPE there can only be a difference of 429 * 1 between Phantoms. 430 */ 431 if (total_tiles_in_flight > (isp_tiles_in_flight + 1U)) 432 total_tiles_in_flight = isp_tiles_in_flight + 1U; 433 434 total_tiles_in_flight += isp_tiles_in_flight; 435 } 436 437 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) && 438 PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) == 439 2) { 440 /* Limit the ISP tiles in flight to fit into the available USC partition 441 * store. 442 */ 443 total_tiles_in_flight = MIN2(total_tiles_in_flight, partitions_available); 444 } 445 446 if (PVR_HAS_FEATURE(dev_info, paired_tiles) && paired_tiles) { 447 total_tiles_in_flight = 448 MIN2(total_tiles_in_flight, partitions_available / 2); 449 } 450 451 pvr_csb_pack (pixel_ctl_out, CR_USC_PIXEL_OUTPUT_CTRL, reg) { 452 if (pixel_width == 1 && usc_min_output_regs == 1) { 453 reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER); 454 } else if (pixel_width == 2) { 455 reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS); 456 } else if (pixel_width == 4) { 457 reg.width = PVRX(CR_PIXEL_WIDTH_4REGISTERS); 458 } else if (pixel_width == 8 && 459 PVR_HAS_FEATURE(dev_info, eight_output_registers)) { 460 reg.width = PVRX(CR_PIXEL_WIDTH_8REGISTERS); 461 } else if (usc_min_output_regs == 1) { 462 reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER); 463 } else { 464 reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS); 465 } 466 467 if (PVR_HAS_FEATURE(dev_info, usc_pixel_partition_mask)) { 468 /* Setup the partition mask based on the maximum number of 469 * partitions available. 470 */ 471 reg.partition_mask = (1 << max_partitions) - 1; 472 } else { 473 reg.enable_4th_partition = true; 474 475 /* Setup the partition mask based on the number of partitions 476 * available. 477 */ 478 reg.partition_mask = (1U << partitions_available) - 1U; 479 } 480 } 481 482 pvr_csb_pack (isp_ctl_out, CR_ISP_CTL, reg) { 483 if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure)) 484 reg.pipe_enable = (2 * total_tiles_in_flight) - 1; 485 else 486 reg.pipe_enable = total_tiles_in_flight - 1; 487 } 488} 489