1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2022 Imagination Technologies Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a copy 5bf215546Sopenharmony_ci * of this software and associated documentation files (the "Software"), to deal 6bf215546Sopenharmony_ci * in the Software without restriction, including without limitation the rights 7bf215546Sopenharmony_ci * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8bf215546Sopenharmony_ci * copies of the Software, and to permit persons to whom the Software is 9bf215546Sopenharmony_ci * furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18bf215546Sopenharmony_ci * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include <assert.h> 25bf215546Sopenharmony_ci#include <limits.h> 26bf215546Sopenharmony_ci#include <stdbool.h> 27bf215546Sopenharmony_ci#include <stddef.h> 28bf215546Sopenharmony_ci#include <stdint.h> 29bf215546Sopenharmony_ci#include <string.h> 30bf215546Sopenharmony_ci#include <vulkan/vulkan.h> 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci#include "hwdef/rogue_hw_defs.h" 33bf215546Sopenharmony_ci#include "hwdef/rogue_hw_utils.h" 34bf215546Sopenharmony_ci#include "pvr_bo.h" 35bf215546Sopenharmony_ci#include "pvr_csb.h" 36bf215546Sopenharmony_ci#include "pvr_csb_enum_helpers.h" 37bf215546Sopenharmony_ci#include "pvr_device_info.h" 38bf215546Sopenharmony_ci#include "pvr_end_of_tile.h" 39bf215546Sopenharmony_ci#include "pvr_formats.h" 40bf215546Sopenharmony_ci#include "pvr_hw_pass.h" 41bf215546Sopenharmony_ci#include "pvr_job_common.h" 42bf215546Sopenharmony_ci#include "pvr_job_render.h" 43bf215546Sopenharmony_ci#include "pvr_limits.h" 44bf215546Sopenharmony_ci#include "pvr_pds.h" 45bf215546Sopenharmony_ci#include "pvr_private.h" 46bf215546Sopenharmony_ci#include "pvr_types.h" 47bf215546Sopenharmony_ci#include "pvr_winsys.h" 48bf215546Sopenharmony_ci#include "util/bitscan.h" 49bf215546Sopenharmony_ci#include "util/compiler.h" 50bf215546Sopenharmony_ci#include "util/list.h" 51bf215546Sopenharmony_ci#include "util/macros.h" 52bf215546Sopenharmony_ci#include "util/u_dynarray.h" 53bf215546Sopenharmony_ci#include "util/u_pack_color.h" 54bf215546Sopenharmony_ci#include "vk_alloc.h" 55bf215546Sopenharmony_ci#include "vk_command_buffer.h" 56bf215546Sopenharmony_ci#include "vk_command_pool.h" 57bf215546Sopenharmony_ci#include "vk_format.h" 58bf215546Sopenharmony_ci#include "vk_log.h" 59bf215546Sopenharmony_ci#include "vk_object.h" 60bf215546Sopenharmony_ci#include "vk_util.h" 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci/* Structure used to pass data into pvr_compute_generate_control_stream() 63bf215546Sopenharmony_ci * function. 64bf215546Sopenharmony_ci */ 65bf215546Sopenharmony_cistruct pvr_compute_kernel_info { 66bf215546Sopenharmony_ci pvr_dev_addr_t indirect_buffer_addr; 67bf215546Sopenharmony_ci bool global_offsets_present; 68bf215546Sopenharmony_ci uint32_t usc_common_size; 69bf215546Sopenharmony_ci uint32_t usc_unified_size; 70bf215546Sopenharmony_ci uint32_t pds_temp_size; 71bf215546Sopenharmony_ci uint32_t pds_data_size; 72bf215546Sopenharmony_ci enum PVRX(CDMCTRL_USC_TARGET) usc_target; 73bf215546Sopenharmony_ci bool is_fence; 74bf215546Sopenharmony_ci uint32_t pds_data_offset; 75bf215546Sopenharmony_ci uint32_t pds_code_offset; 76bf215546Sopenharmony_ci enum PVRX(CDMCTRL_SD_TYPE) sd_type; 77bf215546Sopenharmony_ci bool usc_common_shared; 78bf215546Sopenharmony_ci uint32_t local_size[PVR_WORKGROUP_DIMENSIONS]; 79bf215546Sopenharmony_ci uint32_t global_size[PVR_WORKGROUP_DIMENSIONS]; 80bf215546Sopenharmony_ci uint32_t max_instances; 81bf215546Sopenharmony_ci}; 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_cistatic void pvr_cmd_buffer_free_sub_cmd(struct pvr_cmd_buffer *cmd_buffer, 84bf215546Sopenharmony_ci struct pvr_sub_cmd *sub_cmd) 85bf215546Sopenharmony_ci{ 86bf215546Sopenharmony_ci switch (sub_cmd->type) { 87bf215546Sopenharmony_ci case PVR_SUB_CMD_TYPE_GRAPHICS: 88bf215546Sopenharmony_ci pvr_csb_finish(&sub_cmd->gfx.control_stream); 89bf215546Sopenharmony_ci pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.depth_bias_bo); 90bf215546Sopenharmony_ci pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.scissor_bo); 91bf215546Sopenharmony_ci break; 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci case PVR_SUB_CMD_TYPE_COMPUTE: 94bf215546Sopenharmony_ci pvr_csb_finish(&sub_cmd->compute.control_stream); 95bf215546Sopenharmony_ci break; 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci case PVR_SUB_CMD_TYPE_TRANSFER: 98bf215546Sopenharmony_ci list_for_each_entry_safe (struct pvr_transfer_cmd, 99bf215546Sopenharmony_ci transfer_cmd, 100bf215546Sopenharmony_ci &sub_cmd->transfer.transfer_cmds, 101bf215546Sopenharmony_ci link) { 102bf215546Sopenharmony_ci list_del(&transfer_cmd->link); 103bf215546Sopenharmony_ci vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd); 104bf215546Sopenharmony_ci } 105bf215546Sopenharmony_ci break; 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_ci default: 108bf215546Sopenharmony_ci pvr_finishme("Unsupported sub-command type %d", sub_cmd->type); 109bf215546Sopenharmony_ci break; 110bf215546Sopenharmony_ci } 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci list_del(&sub_cmd->link); 113bf215546Sopenharmony_ci vk_free(&cmd_buffer->vk.pool->alloc, sub_cmd); 114bf215546Sopenharmony_ci} 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_cistatic void pvr_cmd_buffer_free_sub_cmds(struct pvr_cmd_buffer *cmd_buffer) 117bf215546Sopenharmony_ci{ 118bf215546Sopenharmony_ci list_for_each_entry_safe (struct pvr_sub_cmd, 119bf215546Sopenharmony_ci sub_cmd, 120bf215546Sopenharmony_ci &cmd_buffer->sub_cmds, 121bf215546Sopenharmony_ci link) { 122bf215546Sopenharmony_ci pvr_cmd_buffer_free_sub_cmd(cmd_buffer, sub_cmd); 123bf215546Sopenharmony_ci } 124bf215546Sopenharmony_ci} 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_cistatic void pvr_cmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer) 127bf215546Sopenharmony_ci{ 128bf215546Sopenharmony_ci struct pvr_cmd_buffer *cmd_buffer = 129bf215546Sopenharmony_ci container_of(vk_cmd_buffer, struct pvr_cmd_buffer, vk); 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci vk_free(&cmd_buffer->vk.pool->alloc, 132bf215546Sopenharmony_ci cmd_buffer->state.render_pass_info.attachments); 133bf215546Sopenharmony_ci vk_free(&cmd_buffer->vk.pool->alloc, 134bf215546Sopenharmony_ci cmd_buffer->state.render_pass_info.clear_values); 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci pvr_cmd_buffer_free_sub_cmds(cmd_buffer); 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci list_for_each_entry_safe (struct pvr_bo, bo, &cmd_buffer->bo_list, link) { 139bf215546Sopenharmony_ci list_del(&bo->link); 140bf215546Sopenharmony_ci pvr_bo_free(cmd_buffer->device, bo); 141bf215546Sopenharmony_ci } 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci util_dynarray_fini(&cmd_buffer->scissor_array); 144bf215546Sopenharmony_ci util_dynarray_fini(&cmd_buffer->depth_bias_array); 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci vk_command_buffer_finish(&cmd_buffer->vk); 147bf215546Sopenharmony_ci vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer); 148bf215546Sopenharmony_ci} 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_cistatic VkResult pvr_cmd_buffer_create(struct pvr_device *device, 151bf215546Sopenharmony_ci struct vk_command_pool *pool, 152bf215546Sopenharmony_ci VkCommandBufferLevel level, 153bf215546Sopenharmony_ci VkCommandBuffer *pCommandBuffer) 154bf215546Sopenharmony_ci{ 155bf215546Sopenharmony_ci struct pvr_cmd_buffer *cmd_buffer; 156bf215546Sopenharmony_ci VkResult result; 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci cmd_buffer = vk_zalloc(&pool->alloc, 159bf215546Sopenharmony_ci sizeof(*cmd_buffer), 160bf215546Sopenharmony_ci 8U, 161bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 162bf215546Sopenharmony_ci if (!cmd_buffer) 163bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci result = vk_command_buffer_init(&cmd_buffer->vk, pool, level); 166bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 167bf215546Sopenharmony_ci vk_free(&pool->alloc, cmd_buffer); 168bf215546Sopenharmony_ci return result; 169bf215546Sopenharmony_ci } 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci cmd_buffer->vk.destroy = pvr_cmd_buffer_destroy; 172bf215546Sopenharmony_ci cmd_buffer->device = device; 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci util_dynarray_init(&cmd_buffer->depth_bias_array, NULL); 175bf215546Sopenharmony_ci util_dynarray_init(&cmd_buffer->scissor_array, NULL); 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci cmd_buffer->state.status = VK_SUCCESS; 178bf215546Sopenharmony_ci cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INITIAL; 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci list_inithead(&cmd_buffer->sub_cmds); 181bf215546Sopenharmony_ci list_inithead(&cmd_buffer->bo_list); 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci *pCommandBuffer = pvr_cmd_buffer_to_handle(cmd_buffer); 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci return VK_SUCCESS; 186bf215546Sopenharmony_ci} 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ciVkResult 189bf215546Sopenharmony_cipvr_AllocateCommandBuffers(VkDevice _device, 190bf215546Sopenharmony_ci const VkCommandBufferAllocateInfo *pAllocateInfo, 191bf215546Sopenharmony_ci VkCommandBuffer *pCommandBuffers) 192bf215546Sopenharmony_ci{ 193bf215546Sopenharmony_ci VK_FROM_HANDLE(vk_command_pool, pool, pAllocateInfo->commandPool); 194bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_device, device, _device); 195bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 196bf215546Sopenharmony_ci uint32_t i; 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { 199bf215546Sopenharmony_ci result = pvr_cmd_buffer_create(device, 200bf215546Sopenharmony_ci pool, 201bf215546Sopenharmony_ci pAllocateInfo->level, 202bf215546Sopenharmony_ci &pCommandBuffers[i]); 203bf215546Sopenharmony_ci if (result != VK_SUCCESS) 204bf215546Sopenharmony_ci break; 205bf215546Sopenharmony_ci } 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 208bf215546Sopenharmony_ci while (i--) { 209bf215546Sopenharmony_ci VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, pCommandBuffers[i]); 210bf215546Sopenharmony_ci pvr_cmd_buffer_destroy(cmd_buffer); 211bf215546Sopenharmony_ci } 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci for (i = 0; i < pAllocateInfo->commandBufferCount; i++) 214bf215546Sopenharmony_ci pCommandBuffers[i] = VK_NULL_HANDLE; 215bf215546Sopenharmony_ci } 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci return result; 218bf215546Sopenharmony_ci} 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_cistatic void pvr_cmd_buffer_update_barriers(struct pvr_cmd_buffer *cmd_buffer, 221bf215546Sopenharmony_ci enum pvr_sub_cmd_type type) 222bf215546Sopenharmony_ci{ 223bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *state = &cmd_buffer->state; 224bf215546Sopenharmony_ci uint32_t barriers; 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci switch (type) { 227bf215546Sopenharmony_ci case PVR_SUB_CMD_TYPE_GRAPHICS: 228bf215546Sopenharmony_ci barriers = PVR_PIPELINE_STAGE_GEOM_BIT | PVR_PIPELINE_STAGE_FRAG_BIT; 229bf215546Sopenharmony_ci break; 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci case PVR_SUB_CMD_TYPE_COMPUTE: 232bf215546Sopenharmony_ci barriers = PVR_PIPELINE_STAGE_COMPUTE_BIT; 233bf215546Sopenharmony_ci break; 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci case PVR_SUB_CMD_TYPE_TRANSFER: 236bf215546Sopenharmony_ci barriers = PVR_PIPELINE_STAGE_TRANSFER_BIT; 237bf215546Sopenharmony_ci break; 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci default: 240bf215546Sopenharmony_ci barriers = 0; 241bf215546Sopenharmony_ci pvr_finishme("Unsupported sub-command type %d", type); 242bf215546Sopenharmony_ci break; 243bf215546Sopenharmony_ci } 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci for (uint32_t i = 0; i < ARRAY_SIZE(state->barriers_needed); i++) 246bf215546Sopenharmony_ci state->barriers_needed[i] |= barriers; 247bf215546Sopenharmony_ci} 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_cistatic VkResult 250bf215546Sopenharmony_cipvr_cmd_buffer_upload_tables(struct pvr_device *device, 251bf215546Sopenharmony_ci struct pvr_cmd_buffer *cmd_buffer, 252bf215546Sopenharmony_ci struct pvr_sub_cmd_gfx *const sub_cmd) 253bf215546Sopenharmony_ci{ 254bf215546Sopenharmony_ci const uint32_t cache_line_size = 255bf215546Sopenharmony_ci rogue_get_slc_cache_line_size(&device->pdevice->dev_info); 256bf215546Sopenharmony_ci VkResult result; 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci assert(!sub_cmd->depth_bias_bo && !sub_cmd->scissor_bo); 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci if (cmd_buffer->depth_bias_array.size > 0) { 261bf215546Sopenharmony_ci result = 262bf215546Sopenharmony_ci pvr_gpu_upload(device, 263bf215546Sopenharmony_ci device->heaps.general_heap, 264bf215546Sopenharmony_ci util_dynarray_begin(&cmd_buffer->depth_bias_array), 265bf215546Sopenharmony_ci cmd_buffer->depth_bias_array.size, 266bf215546Sopenharmony_ci cache_line_size, 267bf215546Sopenharmony_ci &sub_cmd->depth_bias_bo); 268bf215546Sopenharmony_ci if (result != VK_SUCCESS) 269bf215546Sopenharmony_ci return result; 270bf215546Sopenharmony_ci } 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_ci if (cmd_buffer->scissor_array.size > 0) { 273bf215546Sopenharmony_ci result = pvr_gpu_upload(device, 274bf215546Sopenharmony_ci device->heaps.general_heap, 275bf215546Sopenharmony_ci util_dynarray_begin(&cmd_buffer->scissor_array), 276bf215546Sopenharmony_ci cmd_buffer->scissor_array.size, 277bf215546Sopenharmony_ci cache_line_size, 278bf215546Sopenharmony_ci &sub_cmd->scissor_bo); 279bf215546Sopenharmony_ci if (result != VK_SUCCESS) 280bf215546Sopenharmony_ci goto err_free_depth_bias_bo; 281bf215546Sopenharmony_ci } 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_ci util_dynarray_clear(&cmd_buffer->depth_bias_array); 284bf215546Sopenharmony_ci util_dynarray_clear(&cmd_buffer->scissor_array); 285bf215546Sopenharmony_ci 286bf215546Sopenharmony_ci return VK_SUCCESS; 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_cierr_free_depth_bias_bo: 289bf215546Sopenharmony_ci pvr_bo_free(device, sub_cmd->depth_bias_bo); 290bf215546Sopenharmony_ci sub_cmd->depth_bias_bo = NULL; 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci return result; 293bf215546Sopenharmony_ci} 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_cistatic VkResult 296bf215546Sopenharmony_cipvr_cmd_buffer_emit_ppp_state(struct pvr_cmd_buffer *cmd_buffer, 297bf215546Sopenharmony_ci struct pvr_sub_cmd_gfx *const sub_cmd) 298bf215546Sopenharmony_ci{ 299bf215546Sopenharmony_ci struct pvr_framebuffer *framebuffer = 300bf215546Sopenharmony_ci cmd_buffer->state.render_pass_info.framebuffer; 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci pvr_csb_emit (&sub_cmd->control_stream, VDMCTRL_PPP_STATE0, state0) { 303bf215546Sopenharmony_ci state0.addrmsb = framebuffer->ppp_state_bo->vma->dev_addr; 304bf215546Sopenharmony_ci state0.word_count = framebuffer->ppp_state_size; 305bf215546Sopenharmony_ci } 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_ci pvr_csb_emit (&sub_cmd->control_stream, VDMCTRL_PPP_STATE1, state1) { 308bf215546Sopenharmony_ci state1.addrlsb = framebuffer->ppp_state_bo->vma->dev_addr; 309bf215546Sopenharmony_ci } 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci return VK_SUCCESS; 312bf215546Sopenharmony_ci} 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_cistatic VkResult 315bf215546Sopenharmony_cipvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer, 316bf215546Sopenharmony_ci const void *const data, 317bf215546Sopenharmony_ci const size_t size, 318bf215546Sopenharmony_ci struct pvr_bo **const pvr_bo_out) 319bf215546Sopenharmony_ci{ 320bf215546Sopenharmony_ci struct pvr_device *const device = cmd_buffer->device; 321bf215546Sopenharmony_ci const uint32_t cache_line_size = 322bf215546Sopenharmony_ci rogue_get_slc_cache_line_size(&device->pdevice->dev_info); 323bf215546Sopenharmony_ci struct pvr_bo *pvr_bo; 324bf215546Sopenharmony_ci VkResult result; 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_ci result = pvr_gpu_upload(device, 327bf215546Sopenharmony_ci device->heaps.general_heap, 328bf215546Sopenharmony_ci data, 329bf215546Sopenharmony_ci size, 330bf215546Sopenharmony_ci cache_line_size, 331bf215546Sopenharmony_ci &pvr_bo); 332bf215546Sopenharmony_ci if (result != VK_SUCCESS) 333bf215546Sopenharmony_ci return result; 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci list_add(&pvr_bo->link, &cmd_buffer->bo_list); 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci *pvr_bo_out = pvr_bo; 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci return VK_SUCCESS; 340bf215546Sopenharmony_ci} 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_cistatic VkResult 343bf215546Sopenharmony_cipvr_cmd_buffer_upload_usc(struct pvr_cmd_buffer *const cmd_buffer, 344bf215546Sopenharmony_ci const void *const code, 345bf215546Sopenharmony_ci const size_t code_size, 346bf215546Sopenharmony_ci uint64_t code_alignment, 347bf215546Sopenharmony_ci struct pvr_bo **const pvr_bo_out) 348bf215546Sopenharmony_ci{ 349bf215546Sopenharmony_ci struct pvr_device *const device = cmd_buffer->device; 350bf215546Sopenharmony_ci const uint32_t cache_line_size = 351bf215546Sopenharmony_ci rogue_get_slc_cache_line_size(&device->pdevice->dev_info); 352bf215546Sopenharmony_ci struct pvr_bo *pvr_bo; 353bf215546Sopenharmony_ci VkResult result; 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci code_alignment = MAX2(code_alignment, cache_line_size); 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci result = 358bf215546Sopenharmony_ci pvr_gpu_upload_usc(device, code, code_size, code_alignment, &pvr_bo); 359bf215546Sopenharmony_ci if (result != VK_SUCCESS) 360bf215546Sopenharmony_ci return result; 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci list_add(&pvr_bo->link, &cmd_buffer->bo_list); 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci *pvr_bo_out = pvr_bo; 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci return VK_SUCCESS; 367bf215546Sopenharmony_ci} 368bf215546Sopenharmony_ci 369bf215546Sopenharmony_cistatic VkResult 370bf215546Sopenharmony_cipvr_cmd_buffer_upload_pds(struct pvr_cmd_buffer *const cmd_buffer, 371bf215546Sopenharmony_ci const uint32_t *data, 372bf215546Sopenharmony_ci uint32_t data_size_dwords, 373bf215546Sopenharmony_ci uint32_t data_alignment, 374bf215546Sopenharmony_ci const uint32_t *code, 375bf215546Sopenharmony_ci uint32_t code_size_dwords, 376bf215546Sopenharmony_ci uint32_t code_alignment, 377bf215546Sopenharmony_ci uint64_t min_alignment, 378bf215546Sopenharmony_ci struct pvr_pds_upload *const pds_upload_out) 379bf215546Sopenharmony_ci{ 380bf215546Sopenharmony_ci struct pvr_device *const device = cmd_buffer->device; 381bf215546Sopenharmony_ci VkResult result; 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci result = pvr_gpu_upload_pds(device, 384bf215546Sopenharmony_ci data, 385bf215546Sopenharmony_ci data_size_dwords, 386bf215546Sopenharmony_ci data_alignment, 387bf215546Sopenharmony_ci code, 388bf215546Sopenharmony_ci code_size_dwords, 389bf215546Sopenharmony_ci code_alignment, 390bf215546Sopenharmony_ci min_alignment, 391bf215546Sopenharmony_ci pds_upload_out); 392bf215546Sopenharmony_ci if (result != VK_SUCCESS) 393bf215546Sopenharmony_ci return result; 394bf215546Sopenharmony_ci 395bf215546Sopenharmony_ci list_add(&pds_upload_out->pvr_bo->link, &cmd_buffer->bo_list); 396bf215546Sopenharmony_ci 397bf215546Sopenharmony_ci return VK_SUCCESS; 398bf215546Sopenharmony_ci} 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_cistatic inline VkResult 401bf215546Sopenharmony_cipvr_cmd_buffer_upload_pds_data(struct pvr_cmd_buffer *const cmd_buffer, 402bf215546Sopenharmony_ci const uint32_t *data, 403bf215546Sopenharmony_ci uint32_t data_size_dwords, 404bf215546Sopenharmony_ci uint32_t data_alignment, 405bf215546Sopenharmony_ci struct pvr_pds_upload *const pds_upload_out) 406bf215546Sopenharmony_ci{ 407bf215546Sopenharmony_ci return pvr_cmd_buffer_upload_pds(cmd_buffer, 408bf215546Sopenharmony_ci data, 409bf215546Sopenharmony_ci data_size_dwords, 410bf215546Sopenharmony_ci data_alignment, 411bf215546Sopenharmony_ci NULL, 412bf215546Sopenharmony_ci 0, 413bf215546Sopenharmony_ci 0, 414bf215546Sopenharmony_ci data_alignment, 415bf215546Sopenharmony_ci pds_upload_out); 416bf215546Sopenharmony_ci} 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_cistatic VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload( 419bf215546Sopenharmony_ci struct pvr_cmd_buffer *const cmd_buffer, 420bf215546Sopenharmony_ci const uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS], 421bf215546Sopenharmony_ci struct pvr_pds_upload *const pds_upload_out) 422bf215546Sopenharmony_ci{ 423bf215546Sopenharmony_ci struct pvr_pds_event_program pixel_event_program = { 424bf215546Sopenharmony_ci /* No data to DMA, just a DOUTU needed. */ 425bf215546Sopenharmony_ci .num_emit_word_pairs = 0, 426bf215546Sopenharmony_ci }; 427bf215546Sopenharmony_ci const uint32_t staging_buffer_size = 428bf215546Sopenharmony_ci cmd_buffer->device->pixel_event_data_size_in_dwords * sizeof(uint32_t); 429bf215546Sopenharmony_ci const VkAllocationCallbacks *const allocator = &cmd_buffer->vk.pool->alloc; 430bf215546Sopenharmony_ci struct pvr_device *const device = cmd_buffer->device; 431bf215546Sopenharmony_ci /* FIXME: This should come from the compiler for the USC pixel program. */ 432bf215546Sopenharmony_ci const uint32_t usc_temp_count = 0; 433bf215546Sopenharmony_ci struct pvr_bo *usc_eot_program; 434bf215546Sopenharmony_ci uint8_t *usc_eot_program_ptr; 435bf215546Sopenharmony_ci uint32_t *staging_buffer; 436bf215546Sopenharmony_ci VkResult result; 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_ci result = pvr_cmd_buffer_upload_usc(cmd_buffer, 439bf215546Sopenharmony_ci pvr_end_of_tile_program, 440bf215546Sopenharmony_ci sizeof(pvr_end_of_tile_program), 441bf215546Sopenharmony_ci 4, 442bf215546Sopenharmony_ci &usc_eot_program); 443bf215546Sopenharmony_ci if (result != VK_SUCCESS) 444bf215546Sopenharmony_ci return result; 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci assert((pbe_cs_words[1] & 0x3F) == 0x20); 447bf215546Sopenharmony_ci 448bf215546Sopenharmony_ci /* FIXME: Stop patching the framebuffer address (this will require the 449bf215546Sopenharmony_ci * end-of-tile program to be generated at run-time). 450bf215546Sopenharmony_ci */ 451bf215546Sopenharmony_ci pvr_bo_cpu_map(device, usc_eot_program); 452bf215546Sopenharmony_ci usc_eot_program_ptr = usc_eot_program->bo->map; 453bf215546Sopenharmony_ci usc_eot_program_ptr[6] = (pbe_cs_words[0] >> 0) & 0xFF; 454bf215546Sopenharmony_ci usc_eot_program_ptr[7] = (pbe_cs_words[0] >> 8) & 0xFF; 455bf215546Sopenharmony_ci usc_eot_program_ptr[8] = (pbe_cs_words[0] >> 16) & 0xFF; 456bf215546Sopenharmony_ci usc_eot_program_ptr[9] = (pbe_cs_words[0] >> 24) & 0xFF; 457bf215546Sopenharmony_ci pvr_bo_cpu_unmap(device, usc_eot_program); 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci pvr_pds_setup_doutu(&pixel_event_program.task_control, 460bf215546Sopenharmony_ci usc_eot_program->vma->dev_addr.addr, 461bf215546Sopenharmony_ci usc_temp_count, 462bf215546Sopenharmony_ci PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE), 463bf215546Sopenharmony_ci false); 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci /* TODO: We could skip allocating this and generate directly into the device 466bf215546Sopenharmony_ci * buffer thus removing one allocation and memcpy() per job. Would this 467bf215546Sopenharmony_ci * speed up things in a noticeable way? 468bf215546Sopenharmony_ci */ 469bf215546Sopenharmony_ci staging_buffer = vk_alloc(allocator, 470bf215546Sopenharmony_ci staging_buffer_size, 471bf215546Sopenharmony_ci 8, 472bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 473bf215546Sopenharmony_ci if (!staging_buffer) { 474bf215546Sopenharmony_ci result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 475bf215546Sopenharmony_ci goto err_free_usc_pixel_program; 476bf215546Sopenharmony_ci } 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_ci /* Generate the data segment. The code segment was uploaded earlier when 479bf215546Sopenharmony_ci * setting up the PDS static heap data. 480bf215546Sopenharmony_ci */ 481bf215546Sopenharmony_ci pvr_pds_generate_pixel_event_data_segment(&pixel_event_program, 482bf215546Sopenharmony_ci staging_buffer, 483bf215546Sopenharmony_ci &device->pdevice->dev_info); 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci result = pvr_cmd_buffer_upload_pds_data( 486bf215546Sopenharmony_ci cmd_buffer, 487bf215546Sopenharmony_ci staging_buffer, 488bf215546Sopenharmony_ci cmd_buffer->device->pixel_event_data_size_in_dwords, 489bf215546Sopenharmony_ci 4, 490bf215546Sopenharmony_ci pds_upload_out); 491bf215546Sopenharmony_ci if (result != VK_SUCCESS) 492bf215546Sopenharmony_ci goto err_free_pixel_event_staging_buffer; 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci vk_free(allocator, staging_buffer); 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_ci return VK_SUCCESS; 497bf215546Sopenharmony_ci 498bf215546Sopenharmony_cierr_free_pixel_event_staging_buffer: 499bf215546Sopenharmony_ci vk_free(allocator, staging_buffer); 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_cierr_free_usc_pixel_program: 502bf215546Sopenharmony_ci list_del(&usc_eot_program->link); 503bf215546Sopenharmony_ci pvr_bo_free(device, usc_eot_program); 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_ci return result; 506bf215546Sopenharmony_ci} 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_cistatic uint32_t pvr_get_hw_clear_color(VkFormat vk_format, 509bf215546Sopenharmony_ci const VkClearValue *clear_value) 510bf215546Sopenharmony_ci{ 511bf215546Sopenharmony_ci union util_color uc = { .ui = 0 }; 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_ci switch (vk_format) { 514bf215546Sopenharmony_ci case VK_FORMAT_B8G8R8A8_UNORM: 515bf215546Sopenharmony_ci util_pack_color(clear_value->color.float32, 516bf215546Sopenharmony_ci PIPE_FORMAT_R8G8B8A8_UNORM, 517bf215546Sopenharmony_ci &uc); 518bf215546Sopenharmony_ci break; 519bf215546Sopenharmony_ci 520bf215546Sopenharmony_ci default: 521bf215546Sopenharmony_ci assert(!"Unsupported format"); 522bf215546Sopenharmony_ci uc.ui[0] = 0; 523bf215546Sopenharmony_ci break; 524bf215546Sopenharmony_ci } 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_ci return uc.ui[0]; 527bf215546Sopenharmony_ci} 528bf215546Sopenharmony_ci 529bf215546Sopenharmony_cistatic VkResult 530bf215546Sopenharmony_cipvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer, 531bf215546Sopenharmony_ci uint32_t idx, 532bf215546Sopenharmony_ci pvr_dev_addr_t *const addr_out) 533bf215546Sopenharmony_ci{ 534bf215546Sopenharmony_ci const struct pvr_render_pass_info *render_pass_info = 535bf215546Sopenharmony_ci &cmd_buffer->state.render_pass_info; 536bf215546Sopenharmony_ci const struct pvr_render_pass *pass = render_pass_info->pass; 537bf215546Sopenharmony_ci const struct pvr_renderpass_hwsetup_render *hw_render = 538bf215546Sopenharmony_ci &pass->hw_setup->renders[idx]; 539bf215546Sopenharmony_ci ASSERTED const struct pvr_load_op *load_op = hw_render->client_data; 540bf215546Sopenharmony_ci const struct pvr_renderpass_colorinit *color_init = 541bf215546Sopenharmony_ci &hw_render->color_init[0]; 542bf215546Sopenharmony_ci const struct pvr_render_pass_attachment *attachment = 543bf215546Sopenharmony_ci &pass->attachments[color_init->driver_id]; 544bf215546Sopenharmony_ci const VkClearValue *clear_value = 545bf215546Sopenharmony_ci &render_pass_info->clear_values[color_init->driver_id]; 546bf215546Sopenharmony_ci uint32_t hw_clear_value; 547bf215546Sopenharmony_ci struct pvr_bo *clear_bo; 548bf215546Sopenharmony_ci VkResult result; 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_ci pvr_finishme("Add missing load op data support"); 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_ci assert(load_op->is_hw_object); 553bf215546Sopenharmony_ci assert(hw_render->color_init_count == 1); 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci /* FIXME: add support for RENDERPASS_SURFACE_INITOP_LOAD. */ 556bf215546Sopenharmony_ci assert(color_init->op == RENDERPASS_SURFACE_INITOP_CLEAR); 557bf215546Sopenharmony_ci 558bf215546Sopenharmony_ci /* FIXME: do this at the point we store the clear values? */ 559bf215546Sopenharmony_ci hw_clear_value = pvr_get_hw_clear_color(attachment->vk_format, clear_value); 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_ci result = pvr_cmd_buffer_upload_general(cmd_buffer, 562bf215546Sopenharmony_ci &hw_clear_value, 563bf215546Sopenharmony_ci sizeof(hw_clear_value), 564bf215546Sopenharmony_ci &clear_bo); 565bf215546Sopenharmony_ci if (result != VK_SUCCESS) 566bf215546Sopenharmony_ci return result; 567bf215546Sopenharmony_ci 568bf215546Sopenharmony_ci *addr_out = clear_bo->vma->dev_addr; 569bf215546Sopenharmony_ci 570bf215546Sopenharmony_ci return VK_SUCCESS; 571bf215546Sopenharmony_ci} 572bf215546Sopenharmony_ci 573bf215546Sopenharmony_cistatic VkResult pvr_load_op_pds_data_create_and_upload( 574bf215546Sopenharmony_ci struct pvr_cmd_buffer *cmd_buffer, 575bf215546Sopenharmony_ci uint32_t idx, 576bf215546Sopenharmony_ci pvr_dev_addr_t constants_addr, 577bf215546Sopenharmony_ci struct pvr_pds_upload *const pds_upload_out) 578bf215546Sopenharmony_ci{ 579bf215546Sopenharmony_ci const struct pvr_render_pass_info *render_pass_info = 580bf215546Sopenharmony_ci &cmd_buffer->state.render_pass_info; 581bf215546Sopenharmony_ci const struct pvr_load_op *load_op = 582bf215546Sopenharmony_ci render_pass_info->pass->hw_setup->renders[idx].client_data; 583bf215546Sopenharmony_ci struct pvr_device *device = cmd_buffer->device; 584bf215546Sopenharmony_ci const struct pvr_device_info *dev_info = &device->pdevice->dev_info; 585bf215546Sopenharmony_ci struct pvr_pds_pixel_shader_sa_program program = { 0 }; 586bf215546Sopenharmony_ci uint32_t staging_buffer_size; 587bf215546Sopenharmony_ci uint32_t *staging_buffer; 588bf215546Sopenharmony_ci VkResult result; 589bf215546Sopenharmony_ci 590bf215546Sopenharmony_ci program.num_texture_dma_kicks = 1; 591bf215546Sopenharmony_ci 592bf215546Sopenharmony_ci pvr_csb_pack (&program.texture_dma_address[0], 593bf215546Sopenharmony_ci PDSINST_DOUT_FIELDS_DOUTD_SRC0, 594bf215546Sopenharmony_ci value) { 595bf215546Sopenharmony_ci value.sbase = constants_addr; 596bf215546Sopenharmony_ci } 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci pvr_csb_pack (&program.texture_dma_control[0], 599bf215546Sopenharmony_ci PDSINST_DOUT_FIELDS_DOUTD_SRC1, 600bf215546Sopenharmony_ci value) { 601bf215546Sopenharmony_ci value.dest = PVRX(PDSINST_DOUTD_DEST_COMMON_STORE); 602bf215546Sopenharmony_ci value.a0 = load_op->shareds_dest_offset; 603bf215546Sopenharmony_ci value.bsize = load_op->shareds_count; 604bf215546Sopenharmony_ci } 605bf215546Sopenharmony_ci 606bf215546Sopenharmony_ci pvr_pds_set_sizes_pixel_shader_sa_texture_data(&program, dev_info); 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_ci staging_buffer_size = program.data_size * sizeof(*staging_buffer); 609bf215546Sopenharmony_ci 610bf215546Sopenharmony_ci staging_buffer = vk_alloc(&cmd_buffer->vk.pool->alloc, 611bf215546Sopenharmony_ci staging_buffer_size, 612bf215546Sopenharmony_ci 8, 613bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 614bf215546Sopenharmony_ci if (!staging_buffer) 615bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci pvr_pds_generate_pixel_shader_sa_texture_state_data(&program, 618bf215546Sopenharmony_ci staging_buffer, 619bf215546Sopenharmony_ci dev_info); 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_ci result = pvr_cmd_buffer_upload_pds_data(cmd_buffer, 622bf215546Sopenharmony_ci staging_buffer, 623bf215546Sopenharmony_ci program.data_size, 624bf215546Sopenharmony_ci 1, 625bf215546Sopenharmony_ci pds_upload_out); 626bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 627bf215546Sopenharmony_ci vk_free(&cmd_buffer->vk.pool->alloc, staging_buffer); 628bf215546Sopenharmony_ci return result; 629bf215546Sopenharmony_ci } 630bf215546Sopenharmony_ci 631bf215546Sopenharmony_ci vk_free(&cmd_buffer->vk.pool->alloc, staging_buffer); 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci return VK_SUCCESS; 634bf215546Sopenharmony_ci} 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci/* FIXME: Should this function be specific to the HW background object, in 637bf215546Sopenharmony_ci * which case its name should be changed, or should it have the load op 638bf215546Sopenharmony_ci * structure passed in? 639bf215546Sopenharmony_ci */ 640bf215546Sopenharmony_cistatic VkResult 641bf215546Sopenharmony_cipvr_load_op_data_create_and_upload(struct pvr_cmd_buffer *cmd_buffer, 642bf215546Sopenharmony_ci uint32_t idx, 643bf215546Sopenharmony_ci struct pvr_pds_upload *const pds_upload_out) 644bf215546Sopenharmony_ci{ 645bf215546Sopenharmony_ci pvr_dev_addr_t constants_addr; 646bf215546Sopenharmony_ci VkResult result; 647bf215546Sopenharmony_ci 648bf215546Sopenharmony_ci result = 649bf215546Sopenharmony_ci pvr_load_op_constants_create_and_upload(cmd_buffer, idx, &constants_addr); 650bf215546Sopenharmony_ci if (result != VK_SUCCESS) 651bf215546Sopenharmony_ci return result; 652bf215546Sopenharmony_ci 653bf215546Sopenharmony_ci return pvr_load_op_pds_data_create_and_upload(cmd_buffer, 654bf215546Sopenharmony_ci idx, 655bf215546Sopenharmony_ci constants_addr, 656bf215546Sopenharmony_ci pds_upload_out); 657bf215546Sopenharmony_ci} 658bf215546Sopenharmony_ci 659bf215546Sopenharmony_cistatic void pvr_pds_bgnd_pack_state( 660bf215546Sopenharmony_ci const struct pvr_load_op *load_op, 661bf215546Sopenharmony_ci const struct pvr_pds_upload *load_op_program, 662bf215546Sopenharmony_ci uint64_t pds_reg_values[static const ROGUE_NUM_CR_PDS_BGRND_WORDS]) 663bf215546Sopenharmony_ci{ 664bf215546Sopenharmony_ci pvr_csb_pack (&pds_reg_values[0], CR_PDS_BGRND0_BASE, value) { 665bf215546Sopenharmony_ci value.shader_addr = PVR_DEV_ADDR(load_op->pds_frag_prog.data_offset); 666bf215546Sopenharmony_ci value.texunicode_addr = 667bf215546Sopenharmony_ci PVR_DEV_ADDR(load_op->pds_tex_state_prog.code_offset); 668bf215546Sopenharmony_ci } 669bf215546Sopenharmony_ci 670bf215546Sopenharmony_ci pvr_csb_pack (&pds_reg_values[1], CR_PDS_BGRND1_BASE, value) { 671bf215546Sopenharmony_ci value.texturedata_addr = PVR_DEV_ADDR(load_op_program->data_offset); 672bf215546Sopenharmony_ci } 673bf215546Sopenharmony_ci 674bf215546Sopenharmony_ci pvr_csb_pack (&pds_reg_values[2], CR_PDS_BGRND3_SIZEINFO, value) { 675bf215546Sopenharmony_ci value.usc_sharedsize = 676bf215546Sopenharmony_ci DIV_ROUND_UP(load_op->const_shareds_count, 677bf215546Sopenharmony_ci PVRX(CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE)); 678bf215546Sopenharmony_ci value.pds_texturestatesize = DIV_ROUND_UP( 679bf215546Sopenharmony_ci load_op_program->data_size, 680bf215546Sopenharmony_ci PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE)); 681bf215546Sopenharmony_ci value.pds_tempsize = 682bf215546Sopenharmony_ci DIV_ROUND_UP(load_op->temps_count, 683bf215546Sopenharmony_ci PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE)); 684bf215546Sopenharmony_ci } 685bf215546Sopenharmony_ci} 686bf215546Sopenharmony_ci 687bf215546Sopenharmony_ci/** 688bf215546Sopenharmony_ci * \brief Calculates the stride in pixels based on the pitch in bytes and pixel 689bf215546Sopenharmony_ci * format. 690bf215546Sopenharmony_ci * 691bf215546Sopenharmony_ci * \param[in] pitch Width pitch in bytes. 692bf215546Sopenharmony_ci * \param[in] vk_format Vulkan image format. 693bf215546Sopenharmony_ci * \return Stride in pixels. 694bf215546Sopenharmony_ci */ 695bf215546Sopenharmony_cistatic inline uint32_t pvr_stride_from_pitch(uint32_t pitch, VkFormat vk_format) 696bf215546Sopenharmony_ci{ 697bf215546Sopenharmony_ci const unsigned int cpp = vk_format_get_blocksize(vk_format); 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci assert(pitch % cpp == 0); 700bf215546Sopenharmony_ci 701bf215546Sopenharmony_ci return pitch / cpp; 702bf215546Sopenharmony_ci} 703bf215546Sopenharmony_ci 704bf215546Sopenharmony_cistatic void pvr_setup_pbe_state( 705bf215546Sopenharmony_ci const struct pvr_device_info *dev_info, 706bf215546Sopenharmony_ci struct pvr_framebuffer *framebuffer, 707bf215546Sopenharmony_ci uint32_t mrt_index, 708bf215546Sopenharmony_ci const struct usc_mrt_resource *mrt_resource, 709bf215546Sopenharmony_ci const struct pvr_image_view *const iview, 710bf215546Sopenharmony_ci const VkRect2D *render_area, 711bf215546Sopenharmony_ci const bool down_scale, 712bf215546Sopenharmony_ci const uint32_t samples, 713bf215546Sopenharmony_ci uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS], 714bf215546Sopenharmony_ci uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS]) 715bf215546Sopenharmony_ci{ 716bf215546Sopenharmony_ci const struct pvr_image *image = iview->image; 717bf215546Sopenharmony_ci uint32_t level_pitch = image->mip_levels[iview->vk.base_mip_level].pitch; 718bf215546Sopenharmony_ci 719bf215546Sopenharmony_ci struct pvr_pbe_surf_params surface_params; 720bf215546Sopenharmony_ci struct pvr_pbe_render_params render_params; 721bf215546Sopenharmony_ci bool with_packed_usc_channel; 722bf215546Sopenharmony_ci const uint8_t *swizzle; 723bf215546Sopenharmony_ci uint32_t position; 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci /* down_scale should be true when performing a resolve, in which case there 726bf215546Sopenharmony_ci * should be more than one sample. 727bf215546Sopenharmony_ci */ 728bf215546Sopenharmony_ci assert((down_scale && samples > 1U) || (!down_scale && samples == 1U)); 729bf215546Sopenharmony_ci 730bf215546Sopenharmony_ci /* Setup surface parameters. */ 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_ci if (PVR_HAS_FEATURE(dev_info, usc_f16sop_u8)) { 733bf215546Sopenharmony_ci switch (iview->vk.format) { 734bf215546Sopenharmony_ci case VK_FORMAT_B8G8R8A8_UNORM: 735bf215546Sopenharmony_ci with_packed_usc_channel = true; 736bf215546Sopenharmony_ci break; 737bf215546Sopenharmony_ci case VK_FORMAT_D32_SFLOAT: 738bf215546Sopenharmony_ci with_packed_usc_channel = false; 739bf215546Sopenharmony_ci break; 740bf215546Sopenharmony_ci default: 741bf215546Sopenharmony_ci unreachable("Unsupported Vulkan image format"); 742bf215546Sopenharmony_ci } 743bf215546Sopenharmony_ci } else { 744bf215546Sopenharmony_ci with_packed_usc_channel = false; 745bf215546Sopenharmony_ci } 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci swizzle = pvr_get_format_swizzle(iview->vk.format); 748bf215546Sopenharmony_ci memcpy(surface_params.swizzle, swizzle, sizeof(surface_params.swizzle)); 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_ci pvr_pbe_get_src_format_and_gamma(iview->vk.format, 751bf215546Sopenharmony_ci PVR_PBE_GAMMA_NONE, 752bf215546Sopenharmony_ci with_packed_usc_channel, 753bf215546Sopenharmony_ci &surface_params.source_format, 754bf215546Sopenharmony_ci &surface_params.gamma); 755bf215546Sopenharmony_ci 756bf215546Sopenharmony_ci surface_params.is_normalized = vk_format_is_normalized(iview->vk.format); 757bf215546Sopenharmony_ci surface_params.pbe_packmode = pvr_get_pbe_packmode(iview->vk.format); 758bf215546Sopenharmony_ci surface_params.nr_components = vk_format_get_nr_components(iview->vk.format); 759bf215546Sopenharmony_ci 760bf215546Sopenharmony_ci /* FIXME: Should we have an inline function to return the address of a mip 761bf215546Sopenharmony_ci * level? 762bf215546Sopenharmony_ci */ 763bf215546Sopenharmony_ci surface_params.addr = 764bf215546Sopenharmony_ci PVR_DEV_ADDR_OFFSET(image->vma->dev_addr, 765bf215546Sopenharmony_ci image->mip_levels[iview->vk.base_mip_level].offset); 766bf215546Sopenharmony_ci 767bf215546Sopenharmony_ci surface_params.mem_layout = image->memlayout; 768bf215546Sopenharmony_ci surface_params.stride = pvr_stride_from_pitch(level_pitch, iview->vk.format); 769bf215546Sopenharmony_ci surface_params.depth = iview->vk.extent.depth; 770bf215546Sopenharmony_ci surface_params.width = iview->vk.extent.width; 771bf215546Sopenharmony_ci surface_params.height = iview->vk.extent.height; 772bf215546Sopenharmony_ci surface_params.z_only_render = false; 773bf215546Sopenharmony_ci surface_params.down_scale = down_scale; 774bf215546Sopenharmony_ci surface_params.msaa_mode = samples; 775bf215546Sopenharmony_ci 776bf215546Sopenharmony_ci /* Setup render parameters. */ 777bf215546Sopenharmony_ci 778bf215546Sopenharmony_ci if (mrt_resource->type == USC_MRT_RESOURCE_TYPE_MEMORY) { 779bf215546Sopenharmony_ci position = mrt_resource->u.mem.offset_in_dwords; 780bf215546Sopenharmony_ci } else { 781bf215546Sopenharmony_ci assert(mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER); 782bf215546Sopenharmony_ci assert(mrt_resource->u.reg.offset == 0); 783bf215546Sopenharmony_ci 784bf215546Sopenharmony_ci position = mrt_resource->u.reg.out_reg; 785bf215546Sopenharmony_ci } 786bf215546Sopenharmony_ci 787bf215546Sopenharmony_ci assert(position <= 3 || PVR_HAS_FEATURE(dev_info, eight_output_registers)); 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci switch (position) { 790bf215546Sopenharmony_ci case 0: 791bf215546Sopenharmony_ci case 4: 792bf215546Sopenharmony_ci render_params.source_start = PVR_PBE_STARTPOS_BIT0; 793bf215546Sopenharmony_ci break; 794bf215546Sopenharmony_ci case 1: 795bf215546Sopenharmony_ci case 5: 796bf215546Sopenharmony_ci render_params.source_start = PVR_PBE_STARTPOS_BIT32; 797bf215546Sopenharmony_ci break; 798bf215546Sopenharmony_ci case 2: 799bf215546Sopenharmony_ci case 6: 800bf215546Sopenharmony_ci render_params.source_start = PVR_PBE_STARTPOS_BIT64; 801bf215546Sopenharmony_ci break; 802bf215546Sopenharmony_ci case 3: 803bf215546Sopenharmony_ci case 7: 804bf215546Sopenharmony_ci render_params.source_start = PVR_PBE_STARTPOS_BIT96; 805bf215546Sopenharmony_ci break; 806bf215546Sopenharmony_ci default: 807bf215546Sopenharmony_ci assert(!"Invalid output register"); 808bf215546Sopenharmony_ci break; 809bf215546Sopenharmony_ci } 810bf215546Sopenharmony_ci 811bf215546Sopenharmony_ci render_params.min_x_clip = MAX2(0, render_area->offset.x); 812bf215546Sopenharmony_ci render_params.min_y_clip = MAX2(0, render_area->offset.y); 813bf215546Sopenharmony_ci render_params.max_x_clip = 814bf215546Sopenharmony_ci MIN2(framebuffer->width, 815bf215546Sopenharmony_ci render_area->offset.x + render_area->extent.width) - 816bf215546Sopenharmony_ci 1; 817bf215546Sopenharmony_ci render_params.max_y_clip = 818bf215546Sopenharmony_ci MIN2(framebuffer->height, 819bf215546Sopenharmony_ci render_area->offset.y + render_area->extent.height) - 820bf215546Sopenharmony_ci 1; 821bf215546Sopenharmony_ci 822bf215546Sopenharmony_ci render_params.slice = 0; 823bf215546Sopenharmony_ci render_params.mrt_index = mrt_index; 824bf215546Sopenharmony_ci 825bf215546Sopenharmony_ci pvr_pbe_pack_state(dev_info, 826bf215546Sopenharmony_ci &surface_params, 827bf215546Sopenharmony_ci &render_params, 828bf215546Sopenharmony_ci pbe_cs_words, 829bf215546Sopenharmony_ci pbe_reg_words); 830bf215546Sopenharmony_ci} 831bf215546Sopenharmony_ci 832bf215546Sopenharmony_cistatic struct pvr_render_target * 833bf215546Sopenharmony_cipvr_get_render_target(const struct pvr_render_pass *pass, 834bf215546Sopenharmony_ci const struct pvr_framebuffer *framebuffer, 835bf215546Sopenharmony_ci uint32_t idx) 836bf215546Sopenharmony_ci{ 837bf215546Sopenharmony_ci const struct pvr_renderpass_hwsetup_render *hw_render = 838bf215546Sopenharmony_ci &pass->hw_setup->renders[idx]; 839bf215546Sopenharmony_ci uint32_t rt_idx = 0; 840bf215546Sopenharmony_ci 841bf215546Sopenharmony_ci switch (hw_render->sample_count) { 842bf215546Sopenharmony_ci case 1: 843bf215546Sopenharmony_ci case 2: 844bf215546Sopenharmony_ci case 4: 845bf215546Sopenharmony_ci case 8: 846bf215546Sopenharmony_ci rt_idx = util_logbase2(hw_render->sample_count); 847bf215546Sopenharmony_ci break; 848bf215546Sopenharmony_ci 849bf215546Sopenharmony_ci default: 850bf215546Sopenharmony_ci unreachable("Unsupported sample count"); 851bf215546Sopenharmony_ci break; 852bf215546Sopenharmony_ci } 853bf215546Sopenharmony_ci 854bf215546Sopenharmony_ci return &framebuffer->render_targets[rt_idx]; 855bf215546Sopenharmony_ci} 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_cistatic uint32_t 858bf215546Sopenharmony_cipvr_pass_get_pixel_output_width(const struct pvr_render_pass *pass, 859bf215546Sopenharmony_ci uint32_t idx, 860bf215546Sopenharmony_ci const struct pvr_device_info *dev_info) 861bf215546Sopenharmony_ci{ 862bf215546Sopenharmony_ci const struct pvr_renderpass_hwsetup_render *hw_render = 863bf215546Sopenharmony_ci &pass->hw_setup->renders[idx]; 864bf215546Sopenharmony_ci /* Default value based on the maximum value found in all existing cores. The 865bf215546Sopenharmony_ci * maximum is used as this is being treated as a lower bound, making it a 866bf215546Sopenharmony_ci * "safer" choice than the minimum value found in all existing cores. 867bf215546Sopenharmony_ci */ 868bf215546Sopenharmony_ci const uint32_t min_output_regs = 869bf215546Sopenharmony_ci PVR_GET_FEATURE_VALUE(dev_info, usc_min_output_registers_per_pix, 2U); 870bf215546Sopenharmony_ci const uint32_t width = MAX2(hw_render->output_regs_count, min_output_regs); 871bf215546Sopenharmony_ci 872bf215546Sopenharmony_ci return util_next_power_of_two(width); 873bf215546Sopenharmony_ci} 874bf215546Sopenharmony_ci 875bf215546Sopenharmony_cistatic VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, 876bf215546Sopenharmony_ci struct pvr_cmd_buffer *cmd_buffer, 877bf215546Sopenharmony_ci struct pvr_sub_cmd_gfx *sub_cmd) 878bf215546Sopenharmony_ci{ 879bf215546Sopenharmony_ci struct pvr_render_pass_info *render_pass_info = 880bf215546Sopenharmony_ci &cmd_buffer->state.render_pass_info; 881bf215546Sopenharmony_ci const struct pvr_renderpass_hwsetup_render *hw_render = 882bf215546Sopenharmony_ci &render_pass_info->pass->hw_setup->renders[sub_cmd->hw_render_idx]; 883bf215546Sopenharmony_ci struct pvr_render_job *job = &sub_cmd->job; 884bf215546Sopenharmony_ci struct pvr_pds_upload pds_pixel_event_program; 885bf215546Sopenharmony_ci 886bf215546Sopenharmony_ci uint32_t pbe_cs_words[PVR_MAX_COLOR_ATTACHMENTS] 887bf215546Sopenharmony_ci [ROGUE_NUM_PBESTATE_STATE_WORDS]; 888bf215546Sopenharmony_ci struct pvr_render_target *render_target; 889bf215546Sopenharmony_ci VkResult result; 890bf215546Sopenharmony_ci 891bf215546Sopenharmony_ci assert(hw_render->eot_surface_count < ARRAY_SIZE(pbe_cs_words)); 892bf215546Sopenharmony_ci 893bf215546Sopenharmony_ci for (uint32_t i = 0; i < hw_render->eot_surface_count; i++) { 894bf215546Sopenharmony_ci const struct pvr_renderpass_hwsetup_eot_surface *surface = 895bf215546Sopenharmony_ci &hw_render->eot_surfaces[i]; 896bf215546Sopenharmony_ci const struct pvr_image_view *iview = 897bf215546Sopenharmony_ci render_pass_info->attachments[surface->attachment_index]; 898bf215546Sopenharmony_ci const struct usc_mrt_resource *mrt_resource = 899bf215546Sopenharmony_ci &hw_render->eot_setup.mrt_resources[surface->mrt_index]; 900bf215546Sopenharmony_ci uint32_t samples = 1; 901bf215546Sopenharmony_ci 902bf215546Sopenharmony_ci if (surface->need_resolve) 903bf215546Sopenharmony_ci pvr_finishme("Set up job resolve information."); 904bf215546Sopenharmony_ci 905bf215546Sopenharmony_ci pvr_setup_pbe_state(dev_info, 906bf215546Sopenharmony_ci render_pass_info->framebuffer, 907bf215546Sopenharmony_ci surface->mrt_index, 908bf215546Sopenharmony_ci mrt_resource, 909bf215546Sopenharmony_ci iview, 910bf215546Sopenharmony_ci &render_pass_info->render_area, 911bf215546Sopenharmony_ci surface->need_resolve, 912bf215546Sopenharmony_ci samples, 913bf215546Sopenharmony_ci pbe_cs_words[i], 914bf215546Sopenharmony_ci job->pbe_reg_words[i]); 915bf215546Sopenharmony_ci } 916bf215546Sopenharmony_ci 917bf215546Sopenharmony_ci /* FIXME: The fragment program only supports a single surface at present. */ 918bf215546Sopenharmony_ci assert(hw_render->eot_surface_count == 1); 919bf215546Sopenharmony_ci result = pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload( 920bf215546Sopenharmony_ci cmd_buffer, 921bf215546Sopenharmony_ci pbe_cs_words[0], 922bf215546Sopenharmony_ci &pds_pixel_event_program); 923bf215546Sopenharmony_ci if (result != VK_SUCCESS) 924bf215546Sopenharmony_ci return result; 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_ci job->pds_pixel_event_data_offset = pds_pixel_event_program.data_offset; 927bf215546Sopenharmony_ci 928bf215546Sopenharmony_ci /* FIXME: Don't do this if there is a barrier load. */ 929bf215546Sopenharmony_ci if (render_pass_info->enable_bg_tag) { 930bf215546Sopenharmony_ci const struct pvr_load_op *load_op = hw_render->client_data; 931bf215546Sopenharmony_ci struct pvr_pds_upload load_op_program; 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_ci /* FIXME: Should we free the PDS pixel event data or let it be freed 934bf215546Sopenharmony_ci * when the pool gets emptied? 935bf215546Sopenharmony_ci */ 936bf215546Sopenharmony_ci result = pvr_load_op_data_create_and_upload(cmd_buffer, 937bf215546Sopenharmony_ci sub_cmd->hw_render_idx, 938bf215546Sopenharmony_ci &load_op_program); 939bf215546Sopenharmony_ci if (result != VK_SUCCESS) 940bf215546Sopenharmony_ci return result; 941bf215546Sopenharmony_ci 942bf215546Sopenharmony_ci pvr_pds_bgnd_pack_state(load_op, 943bf215546Sopenharmony_ci &load_op_program, 944bf215546Sopenharmony_ci job->pds_bgnd_reg_values); 945bf215546Sopenharmony_ci } 946bf215546Sopenharmony_ci 947bf215546Sopenharmony_ci job->enable_bg_tag = render_pass_info->enable_bg_tag; 948bf215546Sopenharmony_ci job->process_empty_tiles = render_pass_info->process_empty_tiles; 949bf215546Sopenharmony_ci 950bf215546Sopenharmony_ci render_target = pvr_get_render_target(render_pass_info->pass, 951bf215546Sopenharmony_ci render_pass_info->framebuffer, 952bf215546Sopenharmony_ci sub_cmd->hw_render_idx); 953bf215546Sopenharmony_ci job->rt_dataset = render_target->rt_dataset; 954bf215546Sopenharmony_ci 955bf215546Sopenharmony_ci job->ctrl_stream_addr = pvr_csb_get_start_address(&sub_cmd->control_stream); 956bf215546Sopenharmony_ci 957bf215546Sopenharmony_ci /* FIXME: Need to set up the border color table at device creation 958bf215546Sopenharmony_ci * time. Set to invalid for the time being. 959bf215546Sopenharmony_ci */ 960bf215546Sopenharmony_ci job->border_colour_table_addr = PVR_DEV_ADDR_INVALID; 961bf215546Sopenharmony_ci 962bf215546Sopenharmony_ci if (sub_cmd->depth_bias_bo) 963bf215546Sopenharmony_ci job->depth_bias_table_addr = sub_cmd->depth_bias_bo->vma->dev_addr; 964bf215546Sopenharmony_ci else 965bf215546Sopenharmony_ci job->depth_bias_table_addr = PVR_DEV_ADDR_INVALID; 966bf215546Sopenharmony_ci 967bf215546Sopenharmony_ci if (sub_cmd->scissor_bo) 968bf215546Sopenharmony_ci job->scissor_table_addr = sub_cmd->scissor_bo->vma->dev_addr; 969bf215546Sopenharmony_ci else 970bf215546Sopenharmony_ci job->scissor_table_addr = PVR_DEV_ADDR_INVALID; 971bf215546Sopenharmony_ci 972bf215546Sopenharmony_ci job->pixel_output_width = 973bf215546Sopenharmony_ci pvr_pass_get_pixel_output_width(render_pass_info->pass, 974bf215546Sopenharmony_ci sub_cmd->hw_render_idx, 975bf215546Sopenharmony_ci dev_info); 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_ci if (hw_render->ds_surface_id != -1) { 978bf215546Sopenharmony_ci struct pvr_image_view *iview = 979bf215546Sopenharmony_ci render_pass_info->attachments[hw_render->ds_surface_id]; 980bf215546Sopenharmony_ci const struct pvr_image *image = iview->image; 981bf215546Sopenharmony_ci 982bf215546Sopenharmony_ci if (vk_format_has_depth(image->vk.format)) { 983bf215546Sopenharmony_ci uint32_t level_pitch = 984bf215546Sopenharmony_ci image->mip_levels[iview->vk.base_mip_level].pitch; 985bf215546Sopenharmony_ci 986bf215546Sopenharmony_ci /* FIXME: Is this sufficient for depth buffers? */ 987bf215546Sopenharmony_ci job->depth_addr = image->dev_addr; 988bf215546Sopenharmony_ci 989bf215546Sopenharmony_ci job->depth_stride = 990bf215546Sopenharmony_ci pvr_stride_from_pitch(level_pitch, iview->vk.format); 991bf215546Sopenharmony_ci job->depth_height = iview->vk.extent.height; 992bf215546Sopenharmony_ci job->depth_physical_width = 993bf215546Sopenharmony_ci u_minify(image->physical_extent.width, iview->vk.base_mip_level); 994bf215546Sopenharmony_ci job->depth_physical_height = 995bf215546Sopenharmony_ci u_minify(image->physical_extent.height, iview->vk.base_mip_level); 996bf215546Sopenharmony_ci job->depth_layer_size = image->layer_size; 997bf215546Sopenharmony_ci 998bf215546Sopenharmony_ci if (hw_render->ds_surface_id < render_pass_info->clear_value_count) { 999bf215546Sopenharmony_ci VkClearValue *clear_values = 1000bf215546Sopenharmony_ci &render_pass_info->clear_values[hw_render->ds_surface_id]; 1001bf215546Sopenharmony_ci 1002bf215546Sopenharmony_ci job->depth_clear_value = clear_values->depthStencil.depth; 1003bf215546Sopenharmony_ci } else { 1004bf215546Sopenharmony_ci job->depth_clear_value = 1.0f; 1005bf215546Sopenharmony_ci } 1006bf215546Sopenharmony_ci 1007bf215546Sopenharmony_ci job->depth_vk_format = iview->vk.format; 1008bf215546Sopenharmony_ci 1009bf215546Sopenharmony_ci job->depth_memlayout = image->memlayout; 1010bf215546Sopenharmony_ci } else { 1011bf215546Sopenharmony_ci job->depth_addr = PVR_DEV_ADDR_INVALID; 1012bf215546Sopenharmony_ci job->depth_stride = 0; 1013bf215546Sopenharmony_ci job->depth_height = 0; 1014bf215546Sopenharmony_ci job->depth_physical_width = 0; 1015bf215546Sopenharmony_ci job->depth_physical_height = 0; 1016bf215546Sopenharmony_ci job->depth_layer_size = 0; 1017bf215546Sopenharmony_ci job->depth_clear_value = 1.0f; 1018bf215546Sopenharmony_ci job->depth_vk_format = VK_FORMAT_UNDEFINED; 1019bf215546Sopenharmony_ci job->depth_memlayout = PVR_MEMLAYOUT_LINEAR; 1020bf215546Sopenharmony_ci } 1021bf215546Sopenharmony_ci 1022bf215546Sopenharmony_ci if (vk_format_has_stencil(image->vk.format)) { 1023bf215546Sopenharmony_ci /* FIXME: Is this sufficient for stencil buffers? */ 1024bf215546Sopenharmony_ci job->stencil_addr = image->dev_addr; 1025bf215546Sopenharmony_ci } else { 1026bf215546Sopenharmony_ci job->stencil_addr = PVR_DEV_ADDR_INVALID; 1027bf215546Sopenharmony_ci } 1028bf215546Sopenharmony_ci 1029bf215546Sopenharmony_ci job->samples = image->vk.samples; 1030bf215546Sopenharmony_ci } else { 1031bf215546Sopenharmony_ci pvr_finishme("Set up correct number of samples for render job"); 1032bf215546Sopenharmony_ci 1033bf215546Sopenharmony_ci job->depth_addr = PVR_DEV_ADDR_INVALID; 1034bf215546Sopenharmony_ci job->depth_stride = 0; 1035bf215546Sopenharmony_ci job->depth_height = 0; 1036bf215546Sopenharmony_ci job->depth_physical_width = 0; 1037bf215546Sopenharmony_ci job->depth_physical_height = 0; 1038bf215546Sopenharmony_ci job->depth_layer_size = 0; 1039bf215546Sopenharmony_ci job->depth_clear_value = 1.0f; 1040bf215546Sopenharmony_ci job->depth_vk_format = VK_FORMAT_UNDEFINED; 1041bf215546Sopenharmony_ci job->depth_memlayout = PVR_MEMLAYOUT_LINEAR; 1042bf215546Sopenharmony_ci 1043bf215546Sopenharmony_ci job->stencil_addr = PVR_DEV_ADDR_INVALID; 1044bf215546Sopenharmony_ci 1045bf215546Sopenharmony_ci job->samples = 1; 1046bf215546Sopenharmony_ci } 1047bf215546Sopenharmony_ci 1048bf215546Sopenharmony_ci if (sub_cmd->max_tiles_in_flight == 1049bf215546Sopenharmony_ci PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U)) { 1050bf215546Sopenharmony_ci /* Use the default limit based on the partition store. */ 1051bf215546Sopenharmony_ci job->max_tiles_in_flight = 0U; 1052bf215546Sopenharmony_ci } else { 1053bf215546Sopenharmony_ci job->max_tiles_in_flight = sub_cmd->max_tiles_in_flight; 1054bf215546Sopenharmony_ci } 1055bf215546Sopenharmony_ci 1056bf215546Sopenharmony_ci job->frag_uses_atomic_ops = sub_cmd->frag_uses_atomic_ops; 1057bf215546Sopenharmony_ci job->disable_compute_overlap = false; 1058bf215546Sopenharmony_ci job->max_shared_registers = cmd_buffer->state.max_shared_regs; 1059bf215546Sopenharmony_ci job->run_frag = true; 1060bf215546Sopenharmony_ci job->geometry_terminate = true; 1061bf215546Sopenharmony_ci 1062bf215546Sopenharmony_ci return VK_SUCCESS; 1063bf215546Sopenharmony_ci} 1064bf215546Sopenharmony_ci 1065bf215546Sopenharmony_ci/* Number of shareds used in the Issue Data Fence(IDF)/Wait Data Fence(WDF) 1066bf215546Sopenharmony_ci * kernel. 1067bf215546Sopenharmony_ci */ 1068bf215546Sopenharmony_ci#define PVR_IDF_WDF_IN_REGISTER_CONST_COUNT 12U 1069bf215546Sopenharmony_ci 1070bf215546Sopenharmony_cistatic void 1071bf215546Sopenharmony_cipvr_sub_cmd_compute_job_init(const struct pvr_physical_device *pdevice, 1072bf215546Sopenharmony_ci struct pvr_cmd_buffer *cmd_buffer, 1073bf215546Sopenharmony_ci struct pvr_sub_cmd_compute *sub_cmd) 1074bf215546Sopenharmony_ci{ 1075bf215546Sopenharmony_ci const struct pvr_device_runtime_info *dev_runtime_info = 1076bf215546Sopenharmony_ci &pdevice->dev_runtime_info; 1077bf215546Sopenharmony_ci const struct pvr_device_info *dev_info = &pdevice->dev_info; 1078bf215546Sopenharmony_ci 1079bf215546Sopenharmony_ci if (sub_cmd->uses_barrier) 1080bf215546Sopenharmony_ci sub_cmd->submit_info.flags |= PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP; 1081bf215546Sopenharmony_ci 1082bf215546Sopenharmony_ci pvr_csb_pack (&sub_cmd->submit_info.regs.cdm_ctrl_stream_base, 1083bf215546Sopenharmony_ci CR_CDM_CTRL_STREAM_BASE, 1084bf215546Sopenharmony_ci value) { 1085bf215546Sopenharmony_ci value.addr = pvr_csb_get_start_address(&sub_cmd->control_stream); 1086bf215546Sopenharmony_ci } 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_ci /* FIXME: Need to set up the border color table at device creation 1089bf215546Sopenharmony_ci * time. Set to invalid for the time being. 1090bf215546Sopenharmony_ci */ 1091bf215546Sopenharmony_ci pvr_csb_pack (&sub_cmd->submit_info.regs.tpu_border_colour_table, 1092bf215546Sopenharmony_ci CR_TPU_BORDER_COLOUR_TABLE_CDM, 1093bf215546Sopenharmony_ci value) { 1094bf215546Sopenharmony_ci value.border_colour_table_address = PVR_DEV_ADDR_INVALID; 1095bf215546Sopenharmony_ci } 1096bf215546Sopenharmony_ci 1097bf215546Sopenharmony_ci sub_cmd->num_shared_regs = MAX2(cmd_buffer->device->idfwdf_state.usc_shareds, 1098bf215546Sopenharmony_ci cmd_buffer->state.max_shared_regs); 1099bf215546Sopenharmony_ci 1100bf215546Sopenharmony_ci cmd_buffer->state.max_shared_regs = 0U; 1101bf215546Sopenharmony_ci 1102bf215546Sopenharmony_ci if (PVR_HAS_FEATURE(dev_info, compute_morton_capable)) 1103bf215546Sopenharmony_ci sub_cmd->submit_info.regs.cdm_item = 0; 1104bf215546Sopenharmony_ci 1105bf215546Sopenharmony_ci pvr_csb_pack (&sub_cmd->submit_info.regs.tpu, CR_TPU, value) { 1106bf215546Sopenharmony_ci value.tag_cem_4k_face_packing = true; 1107bf215546Sopenharmony_ci } 1108bf215546Sopenharmony_ci 1109bf215546Sopenharmony_ci if (PVR_HAS_FEATURE(dev_info, cluster_grouping) && 1110bf215546Sopenharmony_ci PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) && 1111bf215546Sopenharmony_ci dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) { 1112bf215546Sopenharmony_ci /* Each phantom has its own MCU, so atomicity can only be guaranteed 1113bf215546Sopenharmony_ci * when all work items are processed on the same phantom. This means we 1114bf215546Sopenharmony_ci * need to disable all USCs other than those of the first phantom, which 1115bf215546Sopenharmony_ci * has 4 clusters. 1116bf215546Sopenharmony_ci */ 1117bf215546Sopenharmony_ci pvr_csb_pack (&sub_cmd->submit_info.regs.compute_cluster, 1118bf215546Sopenharmony_ci CR_COMPUTE_CLUSTER, 1119bf215546Sopenharmony_ci value) { 1120bf215546Sopenharmony_ci value.mask = 0xFU; 1121bf215546Sopenharmony_ci } 1122bf215546Sopenharmony_ci } else { 1123bf215546Sopenharmony_ci pvr_csb_pack (&sub_cmd->submit_info.regs.compute_cluster, 1124bf215546Sopenharmony_ci CR_COMPUTE_CLUSTER, 1125bf215546Sopenharmony_ci value) { 1126bf215546Sopenharmony_ci value.mask = 0U; 1127bf215546Sopenharmony_ci } 1128bf215546Sopenharmony_ci } 1129bf215546Sopenharmony_ci 1130bf215546Sopenharmony_ci if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support) && 1131bf215546Sopenharmony_ci sub_cmd->uses_atomic_ops) { 1132bf215546Sopenharmony_ci sub_cmd->submit_info.flags |= PVR_WINSYS_COMPUTE_FLAG_SINGLE_CORE; 1133bf215546Sopenharmony_ci } 1134bf215546Sopenharmony_ci} 1135bf215546Sopenharmony_ci 1136bf215546Sopenharmony_ci#define PIXEL_ALLOCATION_SIZE_MAX_IN_BLOCKS \ 1137bf215546Sopenharmony_ci (1024 / PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE)) 1138bf215546Sopenharmony_ci 1139bf215546Sopenharmony_cistatic uint32_t 1140bf215546Sopenharmony_cipvr_compute_flat_slot_size(const struct pvr_physical_device *pdevice, 1141bf215546Sopenharmony_ci uint32_t coeff_regs_count, 1142bf215546Sopenharmony_ci bool use_barrier, 1143bf215546Sopenharmony_ci uint32_t total_workitems) 1144bf215546Sopenharmony_ci{ 1145bf215546Sopenharmony_ci const struct pvr_device_runtime_info *dev_runtime_info = 1146bf215546Sopenharmony_ci &pdevice->dev_runtime_info; 1147bf215546Sopenharmony_ci const struct pvr_device_info *dev_info = &pdevice->dev_info; 1148bf215546Sopenharmony_ci uint32_t max_workgroups_per_task = ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK; 1149bf215546Sopenharmony_ci uint32_t max_avail_coeff_regs = 1150bf215546Sopenharmony_ci dev_runtime_info->cdm_max_local_mem_size_regs; 1151bf215546Sopenharmony_ci uint32_t localstore_chunks_count = 1152bf215546Sopenharmony_ci DIV_ROUND_UP(coeff_regs_count << 2, 1153bf215546Sopenharmony_ci PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE)); 1154bf215546Sopenharmony_ci 1155bf215546Sopenharmony_ci /* Ensure that we cannot have more workgroups in a slot than the available 1156bf215546Sopenharmony_ci * number of coefficients allow us to have. 1157bf215546Sopenharmony_ci */ 1158bf215546Sopenharmony_ci if (coeff_regs_count > 0U) { 1159bf215546Sopenharmony_ci /* If TA or 3D can overlap with CDM, or if the TA is running a geometry 1160bf215546Sopenharmony_ci * shader then we need to consider this in calculating max allowed 1161bf215546Sopenharmony_ci * work-groups. 1162bf215546Sopenharmony_ci */ 1163bf215546Sopenharmony_ci if (PVR_HAS_QUIRK(dev_info, 52354) && 1164bf215546Sopenharmony_ci (PVR_HAS_FEATURE(dev_info, compute_overlap) || 1165bf215546Sopenharmony_ci PVR_HAS_FEATURE(dev_info, gs_rta_support))) { 1166bf215546Sopenharmony_ci /* Solve for n (number of work-groups per task). All values are in 1167bf215546Sopenharmony_ci * size of common store alloc blocks: 1168bf215546Sopenharmony_ci * 1169bf215546Sopenharmony_ci * n + (2n + 7) * (local_memory_size_max - 1) = 1170bf215546Sopenharmony_ci * (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max) 1171bf215546Sopenharmony_ci * ==> 1172bf215546Sopenharmony_ci * n + 2n * (local_memory_size_max - 1) = 1173bf215546Sopenharmony_ci * (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max) 1174bf215546Sopenharmony_ci * - (7 * (local_memory_size_max - 1)) 1175bf215546Sopenharmony_ci * ==> 1176bf215546Sopenharmony_ci * n * (1 + 2 * (local_memory_size_max - 1)) = 1177bf215546Sopenharmony_ci * (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max) 1178bf215546Sopenharmony_ci * - (7 * (local_memory_size_max - 1)) 1179bf215546Sopenharmony_ci * ==> 1180bf215546Sopenharmony_ci * n = ((coefficient_memory_pool_size) - 1181bf215546Sopenharmony_ci * (7 * pixel_allocation_size_max) - 1182bf215546Sopenharmony_ci * (7 * (local_memory_size_max - 1)) / (1 + 1183bf215546Sopenharmony_ci * 2 * (local_memory_size_max - 1))) 1184bf215546Sopenharmony_ci */ 1185bf215546Sopenharmony_ci uint32_t max_common_store_blocks = 1186bf215546Sopenharmony_ci DIV_ROUND_UP(max_avail_coeff_regs * 4U, 1187bf215546Sopenharmony_ci PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE)); 1188bf215546Sopenharmony_ci 1189bf215546Sopenharmony_ci /* (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max) 1190bf215546Sopenharmony_ci */ 1191bf215546Sopenharmony_ci max_common_store_blocks -= ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES * 1192bf215546Sopenharmony_ci PIXEL_ALLOCATION_SIZE_MAX_IN_BLOCKS; 1193bf215546Sopenharmony_ci 1194bf215546Sopenharmony_ci /* - (7 * (local_memory_size_max - 1)) */ 1195bf215546Sopenharmony_ci max_common_store_blocks -= (ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES * 1196bf215546Sopenharmony_ci (localstore_chunks_count - 1U)); 1197bf215546Sopenharmony_ci 1198bf215546Sopenharmony_ci /* Divide by (1 + 2 * (local_memory_size_max - 1)) */ 1199bf215546Sopenharmony_ci max_workgroups_per_task = max_common_store_blocks / 1200bf215546Sopenharmony_ci (1U + 2U * (localstore_chunks_count - 1U)); 1201bf215546Sopenharmony_ci 1202bf215546Sopenharmony_ci max_workgroups_per_task = 1203bf215546Sopenharmony_ci MIN2(max_workgroups_per_task, 1204bf215546Sopenharmony_ci ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK); 1205bf215546Sopenharmony_ci 1206bf215546Sopenharmony_ci } else { 1207bf215546Sopenharmony_ci max_workgroups_per_task = 1208bf215546Sopenharmony_ci MIN2((max_avail_coeff_regs / coeff_regs_count), 1209bf215546Sopenharmony_ci max_workgroups_per_task); 1210bf215546Sopenharmony_ci } 1211bf215546Sopenharmony_ci } 1212bf215546Sopenharmony_ci 1213bf215546Sopenharmony_ci /* max_workgroups_per_task should at least be one. */ 1214bf215546Sopenharmony_ci assert(max_workgroups_per_task >= 1U); 1215bf215546Sopenharmony_ci 1216bf215546Sopenharmony_ci if (total_workitems >= ROGUE_MAX_INSTANCES_PER_TASK) { 1217bf215546Sopenharmony_ci /* In this case, the work group size will have been padded up to the 1218bf215546Sopenharmony_ci * next ROGUE_MAX_INSTANCES_PER_TASK so we just set max instances to be 1219bf215546Sopenharmony_ci * ROGUE_MAX_INSTANCES_PER_TASK. 1220bf215546Sopenharmony_ci */ 1221bf215546Sopenharmony_ci return ROGUE_MAX_INSTANCES_PER_TASK; 1222bf215546Sopenharmony_ci } 1223bf215546Sopenharmony_ci 1224bf215546Sopenharmony_ci /* In this case, the number of instances in the slot must be clamped to 1225bf215546Sopenharmony_ci * accommodate whole work-groups only. 1226bf215546Sopenharmony_ci */ 1227bf215546Sopenharmony_ci if (PVR_HAS_QUIRK(dev_info, 49032) || use_barrier) { 1228bf215546Sopenharmony_ci max_workgroups_per_task = 1229bf215546Sopenharmony_ci MIN2(max_workgroups_per_task, 1230bf215546Sopenharmony_ci ROGUE_MAX_INSTANCES_PER_TASK / total_workitems); 1231bf215546Sopenharmony_ci return total_workitems * max_workgroups_per_task; 1232bf215546Sopenharmony_ci } 1233bf215546Sopenharmony_ci 1234bf215546Sopenharmony_ci return MIN2(total_workitems * max_workgroups_per_task, 1235bf215546Sopenharmony_ci ROGUE_MAX_INSTANCES_PER_TASK); 1236bf215546Sopenharmony_ci} 1237bf215546Sopenharmony_ci 1238bf215546Sopenharmony_cistatic void 1239bf215546Sopenharmony_cipvr_compute_generate_control_stream(struct pvr_csb *csb, 1240bf215546Sopenharmony_ci struct pvr_sub_cmd_compute *sub_cmd, 1241bf215546Sopenharmony_ci const struct pvr_compute_kernel_info *info) 1242bf215546Sopenharmony_ci{ 1243bf215546Sopenharmony_ci /* Compute kernel 0. */ 1244bf215546Sopenharmony_ci pvr_csb_emit (csb, CDMCTRL_KERNEL0, kernel0) { 1245bf215546Sopenharmony_ci kernel0.indirect_present = !!info->indirect_buffer_addr.addr; 1246bf215546Sopenharmony_ci kernel0.global_offsets_present = info->global_offsets_present; 1247bf215546Sopenharmony_ci kernel0.usc_common_size = info->usc_common_size; 1248bf215546Sopenharmony_ci kernel0.usc_unified_size = info->usc_unified_size; 1249bf215546Sopenharmony_ci kernel0.pds_temp_size = info->pds_temp_size; 1250bf215546Sopenharmony_ci kernel0.pds_data_size = info->pds_data_size; 1251bf215546Sopenharmony_ci kernel0.usc_target = info->usc_target; 1252bf215546Sopenharmony_ci kernel0.fence = info->is_fence; 1253bf215546Sopenharmony_ci } 1254bf215546Sopenharmony_ci 1255bf215546Sopenharmony_ci /* Compute kernel 1. */ 1256bf215546Sopenharmony_ci pvr_csb_emit (csb, CDMCTRL_KERNEL1, kernel1) { 1257bf215546Sopenharmony_ci kernel1.data_addr = PVR_DEV_ADDR(info->pds_data_offset); 1258bf215546Sopenharmony_ci kernel1.sd_type = info->sd_type; 1259bf215546Sopenharmony_ci kernel1.usc_common_shared = info->usc_common_shared; 1260bf215546Sopenharmony_ci } 1261bf215546Sopenharmony_ci 1262bf215546Sopenharmony_ci /* Compute kernel 2. */ 1263bf215546Sopenharmony_ci pvr_csb_emit (csb, CDMCTRL_KERNEL2, kernel2) { 1264bf215546Sopenharmony_ci kernel2.code_addr = PVR_DEV_ADDR(info->pds_code_offset); 1265bf215546Sopenharmony_ci } 1266bf215546Sopenharmony_ci 1267bf215546Sopenharmony_ci if (info->indirect_buffer_addr.addr) { 1268bf215546Sopenharmony_ci /* Compute kernel 6. */ 1269bf215546Sopenharmony_ci pvr_csb_emit (csb, CDMCTRL_KERNEL6, kernel6) { 1270bf215546Sopenharmony_ci kernel6.indirect_addrmsb = info->indirect_buffer_addr; 1271bf215546Sopenharmony_ci } 1272bf215546Sopenharmony_ci 1273bf215546Sopenharmony_ci /* Compute kernel 7. */ 1274bf215546Sopenharmony_ci pvr_csb_emit (csb, CDMCTRL_KERNEL7, kernel7) { 1275bf215546Sopenharmony_ci kernel7.indirect_addrlsb = info->indirect_buffer_addr; 1276bf215546Sopenharmony_ci } 1277bf215546Sopenharmony_ci } else { 1278bf215546Sopenharmony_ci /* Compute kernel 3. */ 1279bf215546Sopenharmony_ci pvr_csb_emit (csb, CDMCTRL_KERNEL3, kernel3) { 1280bf215546Sopenharmony_ci assert(info->global_size[0U] > 0U); 1281bf215546Sopenharmony_ci kernel3.workgroup_x = info->global_size[0U] - 1U; 1282bf215546Sopenharmony_ci } 1283bf215546Sopenharmony_ci 1284bf215546Sopenharmony_ci /* Compute kernel 4. */ 1285bf215546Sopenharmony_ci pvr_csb_emit (csb, CDMCTRL_KERNEL4, kernel4) { 1286bf215546Sopenharmony_ci assert(info->global_size[1U] > 0U); 1287bf215546Sopenharmony_ci kernel4.workgroup_y = info->global_size[1U] - 1U; 1288bf215546Sopenharmony_ci } 1289bf215546Sopenharmony_ci 1290bf215546Sopenharmony_ci /* Compute kernel 5. */ 1291bf215546Sopenharmony_ci pvr_csb_emit (csb, CDMCTRL_KERNEL5, kernel5) { 1292bf215546Sopenharmony_ci assert(info->global_size[2U] > 0U); 1293bf215546Sopenharmony_ci kernel5.workgroup_z = info->global_size[2U] - 1U; 1294bf215546Sopenharmony_ci } 1295bf215546Sopenharmony_ci } 1296bf215546Sopenharmony_ci 1297bf215546Sopenharmony_ci /* Compute kernel 8. */ 1298bf215546Sopenharmony_ci pvr_csb_emit (csb, CDMCTRL_KERNEL8, kernel8) { 1299bf215546Sopenharmony_ci if (info->max_instances == ROGUE_MAX_INSTANCES_PER_TASK) 1300bf215546Sopenharmony_ci kernel8.max_instances = 0U; 1301bf215546Sopenharmony_ci else 1302bf215546Sopenharmony_ci kernel8.max_instances = info->max_instances; 1303bf215546Sopenharmony_ci 1304bf215546Sopenharmony_ci assert(info->local_size[0U] > 0U); 1305bf215546Sopenharmony_ci kernel8.workgroup_size_x = info->local_size[0U] - 1U; 1306bf215546Sopenharmony_ci assert(info->local_size[1U] > 0U); 1307bf215546Sopenharmony_ci kernel8.workgroup_size_y = info->local_size[1U] - 1U; 1308bf215546Sopenharmony_ci assert(info->local_size[2U] > 0U); 1309bf215546Sopenharmony_ci kernel8.workgroup_size_z = info->local_size[2U] - 1U; 1310bf215546Sopenharmony_ci } 1311bf215546Sopenharmony_ci 1312bf215546Sopenharmony_ci /* Track the highest amount of shared registers usage in this dispatch. 1313bf215546Sopenharmony_ci * This is used by the FW for context switching, so must be large enough 1314bf215546Sopenharmony_ci * to contain all the shared registers that might be in use for this compute 1315bf215546Sopenharmony_ci * job. Coefficients don't need to be included as the context switch will not 1316bf215546Sopenharmony_ci * happen within the execution of a single workgroup, thus nothing needs to 1317bf215546Sopenharmony_ci * be preserved. 1318bf215546Sopenharmony_ci */ 1319bf215546Sopenharmony_ci if (info->usc_common_shared) { 1320bf215546Sopenharmony_ci sub_cmd->num_shared_regs = 1321bf215546Sopenharmony_ci MAX2(sub_cmd->num_shared_regs, info->usc_common_size); 1322bf215546Sopenharmony_ci } 1323bf215546Sopenharmony_ci} 1324bf215546Sopenharmony_ci 1325bf215546Sopenharmony_ci/* TODO: This can be pre-packed and uploaded directly. Would that provide any 1326bf215546Sopenharmony_ci * speed up? 1327bf215546Sopenharmony_ci */ 1328bf215546Sopenharmony_cistatic void 1329bf215546Sopenharmony_cipvr_compute_generate_idfwdf(struct pvr_cmd_buffer *cmd_buffer, 1330bf215546Sopenharmony_ci struct pvr_sub_cmd_compute *const sub_cmd) 1331bf215546Sopenharmony_ci{ 1332bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *state = &cmd_buffer->state; 1333bf215546Sopenharmony_ci bool *const is_sw_barier_required = 1334bf215546Sopenharmony_ci &state->current_sub_cmd->compute.pds_sw_barrier_requires_clearing; 1335bf215546Sopenharmony_ci const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice; 1336bf215546Sopenharmony_ci struct pvr_csb *csb = &sub_cmd->control_stream; 1337bf215546Sopenharmony_ci const struct pvr_pds_upload *program; 1338bf215546Sopenharmony_ci 1339bf215546Sopenharmony_ci if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(&pdevice->dev_info) && 1340bf215546Sopenharmony_ci *is_sw_barier_required) { 1341bf215546Sopenharmony_ci *is_sw_barier_required = false; 1342bf215546Sopenharmony_ci program = &cmd_buffer->device->idfwdf_state.sw_compute_barrier_pds; 1343bf215546Sopenharmony_ci } else { 1344bf215546Sopenharmony_ci program = &cmd_buffer->device->idfwdf_state.pds; 1345bf215546Sopenharmony_ci } 1346bf215546Sopenharmony_ci 1347bf215546Sopenharmony_ci struct pvr_compute_kernel_info info = { 1348bf215546Sopenharmony_ci .indirect_buffer_addr = PVR_DEV_ADDR_INVALID, 1349bf215546Sopenharmony_ci .global_offsets_present = false, 1350bf215546Sopenharmony_ci .usc_common_size = 1351bf215546Sopenharmony_ci DIV_ROUND_UP(cmd_buffer->device->idfwdf_state.usc_shareds << 2, 1352bf215546Sopenharmony_ci PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE)), 1353bf215546Sopenharmony_ci .usc_unified_size = 0U, 1354bf215546Sopenharmony_ci .pds_temp_size = 0U, 1355bf215546Sopenharmony_ci .pds_data_size = 1356bf215546Sopenharmony_ci DIV_ROUND_UP(program->data_size << 2, 1357bf215546Sopenharmony_ci PVRX(CDMCTRL_KERNEL0_PDS_DATA_SIZE_UNIT_SIZE)), 1358bf215546Sopenharmony_ci .usc_target = PVRX(CDMCTRL_USC_TARGET_ALL), 1359bf215546Sopenharmony_ci .is_fence = false, 1360bf215546Sopenharmony_ci .pds_data_offset = program->data_offset, 1361bf215546Sopenharmony_ci .sd_type = PVRX(CDMCTRL_SD_TYPE_USC), 1362bf215546Sopenharmony_ci .usc_common_shared = true, 1363bf215546Sopenharmony_ci .pds_code_offset = program->code_offset, 1364bf215546Sopenharmony_ci .global_size = { 1U, 1U, 1U }, 1365bf215546Sopenharmony_ci .local_size = { 1U, 1U, 1U }, 1366bf215546Sopenharmony_ci }; 1367bf215546Sopenharmony_ci 1368bf215546Sopenharmony_ci /* We don't need to pad work-group size for this case. */ 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_ci info.max_instances = 1371bf215546Sopenharmony_ci pvr_compute_flat_slot_size(pdevice, 1372bf215546Sopenharmony_ci cmd_buffer->device->idfwdf_state.usc_shareds, 1373bf215546Sopenharmony_ci false, 1374bf215546Sopenharmony_ci 1U); 1375bf215546Sopenharmony_ci 1376bf215546Sopenharmony_ci pvr_compute_generate_control_stream(csb, sub_cmd, &info); 1377bf215546Sopenharmony_ci} 1378bf215546Sopenharmony_ci 1379bf215546Sopenharmony_cistatic void 1380bf215546Sopenharmony_cipvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer, 1381bf215546Sopenharmony_ci struct pvr_sub_cmd_compute *const sub_cmd, 1382bf215546Sopenharmony_ci bool deallocate_shareds) 1383bf215546Sopenharmony_ci{ 1384bf215546Sopenharmony_ci const struct pvr_pds_upload *program = 1385bf215546Sopenharmony_ci &cmd_buffer->device->pds_compute_fence_program; 1386bf215546Sopenharmony_ci const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice; 1387bf215546Sopenharmony_ci struct pvr_csb *csb = &sub_cmd->control_stream; 1388bf215546Sopenharmony_ci 1389bf215546Sopenharmony_ci struct pvr_compute_kernel_info info = { 1390bf215546Sopenharmony_ci .indirect_buffer_addr = PVR_DEV_ADDR_INVALID, 1391bf215546Sopenharmony_ci .global_offsets_present = false, 1392bf215546Sopenharmony_ci .usc_common_size = 0U, 1393bf215546Sopenharmony_ci .usc_unified_size = 0U, 1394bf215546Sopenharmony_ci .pds_temp_size = 0U, 1395bf215546Sopenharmony_ci .pds_data_size = 1396bf215546Sopenharmony_ci DIV_ROUND_UP(program->data_size << 2, 1397bf215546Sopenharmony_ci PVRX(CDMCTRL_KERNEL0_PDS_DATA_SIZE_UNIT_SIZE)), 1398bf215546Sopenharmony_ci .usc_target = PVRX(CDMCTRL_USC_TARGET_ANY), 1399bf215546Sopenharmony_ci .is_fence = true, 1400bf215546Sopenharmony_ci .pds_data_offset = program->data_offset, 1401bf215546Sopenharmony_ci .sd_type = PVRX(CDMCTRL_SD_TYPE_PDS), 1402bf215546Sopenharmony_ci .usc_common_shared = deallocate_shareds, 1403bf215546Sopenharmony_ci .pds_code_offset = program->code_offset, 1404bf215546Sopenharmony_ci .global_size = { 1U, 1U, 1U }, 1405bf215546Sopenharmony_ci .local_size = { 1U, 1U, 1U }, 1406bf215546Sopenharmony_ci }; 1407bf215546Sopenharmony_ci 1408bf215546Sopenharmony_ci /* We don't need to pad work-group size for this case. */ 1409bf215546Sopenharmony_ci /* Here we calculate the slot size. This can depend on the use of barriers, 1410bf215546Sopenharmony_ci * local memory, BRN's or other factors. 1411bf215546Sopenharmony_ci */ 1412bf215546Sopenharmony_ci info.max_instances = pvr_compute_flat_slot_size(pdevice, 0U, false, 1U); 1413bf215546Sopenharmony_ci 1414bf215546Sopenharmony_ci pvr_compute_generate_control_stream(csb, sub_cmd, &info); 1415bf215546Sopenharmony_ci} 1416bf215546Sopenharmony_ci 1417bf215546Sopenharmony_cistatic VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer) 1418bf215546Sopenharmony_ci{ 1419bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *state = &cmd_buffer->state; 1420bf215546Sopenharmony_ci struct pvr_sub_cmd *sub_cmd = state->current_sub_cmd; 1421bf215546Sopenharmony_ci struct pvr_device *device = cmd_buffer->device; 1422bf215546Sopenharmony_ci VkResult result; 1423bf215546Sopenharmony_ci 1424bf215546Sopenharmony_ci /* FIXME: Is this NULL check required because this function is called from 1425bf215546Sopenharmony_ci * pvr_resolve_unemitted_resolve_attachments()? See comment about this 1426bf215546Sopenharmony_ci * function being called twice in a row in pvr_CmdEndRenderPass(). 1427bf215546Sopenharmony_ci */ 1428bf215546Sopenharmony_ci if (!sub_cmd) 1429bf215546Sopenharmony_ci return VK_SUCCESS; 1430bf215546Sopenharmony_ci 1431bf215546Sopenharmony_ci switch (sub_cmd->type) { 1432bf215546Sopenharmony_ci case PVR_SUB_CMD_TYPE_GRAPHICS: { 1433bf215546Sopenharmony_ci struct pvr_sub_cmd_gfx *const gfx_sub_cmd = &sub_cmd->gfx; 1434bf215546Sopenharmony_ci 1435bf215546Sopenharmony_ci if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { 1436bf215546Sopenharmony_ci result = pvr_csb_emit_return(&gfx_sub_cmd->control_stream); 1437bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 1438bf215546Sopenharmony_ci state->status = result; 1439bf215546Sopenharmony_ci return result; 1440bf215546Sopenharmony_ci } 1441bf215546Sopenharmony_ci 1442bf215546Sopenharmony_ci break; 1443bf215546Sopenharmony_ci } 1444bf215546Sopenharmony_ci 1445bf215546Sopenharmony_ci /* TODO: Check if the sub_cmd can be skipped based on 1446bf215546Sopenharmony_ci * sub_cmd->gfx.empty_cmd flag. 1447bf215546Sopenharmony_ci */ 1448bf215546Sopenharmony_ci 1449bf215546Sopenharmony_ci result = pvr_cmd_buffer_upload_tables(device, cmd_buffer, gfx_sub_cmd); 1450bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 1451bf215546Sopenharmony_ci state->status = result; 1452bf215546Sopenharmony_ci return result; 1453bf215546Sopenharmony_ci } 1454bf215546Sopenharmony_ci 1455bf215546Sopenharmony_ci result = pvr_cmd_buffer_emit_ppp_state(cmd_buffer, gfx_sub_cmd); 1456bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 1457bf215546Sopenharmony_ci state->status = result; 1458bf215546Sopenharmony_ci return result; 1459bf215546Sopenharmony_ci } 1460bf215546Sopenharmony_ci 1461bf215546Sopenharmony_ci result = pvr_csb_emit_terminate(&gfx_sub_cmd->control_stream); 1462bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 1463bf215546Sopenharmony_ci state->status = result; 1464bf215546Sopenharmony_ci return result; 1465bf215546Sopenharmony_ci } 1466bf215546Sopenharmony_ci 1467bf215546Sopenharmony_ci result = pvr_sub_cmd_gfx_job_init(&device->pdevice->dev_info, 1468bf215546Sopenharmony_ci cmd_buffer, 1469bf215546Sopenharmony_ci gfx_sub_cmd); 1470bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 1471bf215546Sopenharmony_ci state->status = result; 1472bf215546Sopenharmony_ci return result; 1473bf215546Sopenharmony_ci } 1474bf215546Sopenharmony_ci 1475bf215546Sopenharmony_ci break; 1476bf215546Sopenharmony_ci } 1477bf215546Sopenharmony_ci 1478bf215546Sopenharmony_ci case PVR_SUB_CMD_TYPE_COMPUTE: { 1479bf215546Sopenharmony_ci struct pvr_sub_cmd_compute *const compute_sub_cmd = &sub_cmd->compute; 1480bf215546Sopenharmony_ci 1481bf215546Sopenharmony_ci pvr_compute_generate_fence(cmd_buffer, compute_sub_cmd, true); 1482bf215546Sopenharmony_ci 1483bf215546Sopenharmony_ci result = pvr_csb_emit_terminate(&compute_sub_cmd->control_stream); 1484bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 1485bf215546Sopenharmony_ci state->status = result; 1486bf215546Sopenharmony_ci return result; 1487bf215546Sopenharmony_ci } 1488bf215546Sopenharmony_ci 1489bf215546Sopenharmony_ci pvr_sub_cmd_compute_job_init(device->pdevice, 1490bf215546Sopenharmony_ci cmd_buffer, 1491bf215546Sopenharmony_ci compute_sub_cmd); 1492bf215546Sopenharmony_ci break; 1493bf215546Sopenharmony_ci } 1494bf215546Sopenharmony_ci 1495bf215546Sopenharmony_ci case PVR_SUB_CMD_TYPE_TRANSFER: 1496bf215546Sopenharmony_ci break; 1497bf215546Sopenharmony_ci 1498bf215546Sopenharmony_ci default: 1499bf215546Sopenharmony_ci pvr_finishme("Unsupported sub-command type %d", sub_cmd->type); 1500bf215546Sopenharmony_ci break; 1501bf215546Sopenharmony_ci } 1502bf215546Sopenharmony_ci 1503bf215546Sopenharmony_ci state->current_sub_cmd = NULL; 1504bf215546Sopenharmony_ci 1505bf215546Sopenharmony_ci return VK_SUCCESS; 1506bf215546Sopenharmony_ci} 1507bf215546Sopenharmony_ci 1508bf215546Sopenharmony_cistatic void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer_state *state, 1509bf215546Sopenharmony_ci bool start_geom) 1510bf215546Sopenharmony_ci{ 1511bf215546Sopenharmony_ci if (start_geom) { 1512bf215546Sopenharmony_ci /* 1513bf215546Sopenharmony_ci * Initial geometry phase State. 1514bf215546Sopenharmony_ci * It's the driver's responsibility to ensure that the state of the 1515bf215546Sopenharmony_ci * hardware is correctly initialized at the start of every geometry 1516bf215546Sopenharmony_ci * phase. This is required to prevent stale state from a previous 1517bf215546Sopenharmony_ci * geometry phase erroneously affecting the next geometry phase. The 1518bf215546Sopenharmony_ci * following fields in PPP State Header, and their corresponding state 1519bf215546Sopenharmony_ci * words, must be supplied in the first PPP State Update of a geometry 1520bf215546Sopenharmony_ci * phase that contains any geometry (draw calls). Any field not listed 1521bf215546Sopenharmony_ci * below is safe to ignore. 1522bf215546Sopenharmony_ci * 1523bf215546Sopenharmony_ci * TA_PRES_STREAM_OUT_SIZE 1524bf215546Sopenharmony_ci * TA_PRES_PPPCTRL 1525bf215546Sopenharmony_ci * TA_PRES_VARYING_WORD2 1526bf215546Sopenharmony_ci * TA_PRES_VARYING_WORD1 1527bf215546Sopenharmony_ci * TA_PRES_VARYING_WORD0 1528bf215546Sopenharmony_ci * TA_PRES_OUTSELECTS 1529bf215546Sopenharmony_ci * TA_PRES_WCLAMP 1530bf215546Sopenharmony_ci * TA_VIEWPORT_COUNT 1531bf215546Sopenharmony_ci * TA_PRES_VIEWPORT 1532bf215546Sopenharmony_ci * TA_PRES_REGION_CLIP 1533bf215546Sopenharmony_ci * TA_PRES_PDSSTATEPTR0 1534bf215546Sopenharmony_ci * TA_PRES_ISPCTLFB 1535bf215546Sopenharmony_ci * TA_PRES_ISPCTLFA 1536bf215546Sopenharmony_ci * TA_PRES_ISPCTL 1537bf215546Sopenharmony_ci * 1538bf215546Sopenharmony_ci * If a geometry phase does not contain any geometry, this restriction 1539bf215546Sopenharmony_ci * can be ignored. If the first draw call in a geometry phase will only 1540bf215546Sopenharmony_ci * update the depth or stencil buffers i.e. ISP_TAGWRITEDISABLE is set 1541bf215546Sopenharmony_ci * in the ISP State Control Word, the PDS State Pointers 1542bf215546Sopenharmony_ci * (TA_PRES_PDSSTATEPTR*) in the first PPP State Update do not need to 1543bf215546Sopenharmony_ci * be supplied, since they will never reach the PDS in the fragment 1544bf215546Sopenharmony_ci * phase. 1545bf215546Sopenharmony_ci */ 1546bf215546Sopenharmony_ci 1547bf215546Sopenharmony_ci state->emit_state_bits = 0; 1548bf215546Sopenharmony_ci 1549bf215546Sopenharmony_ci state->emit_state.stream_out = true; 1550bf215546Sopenharmony_ci state->emit_state.ppp_control = true; 1551bf215546Sopenharmony_ci state->emit_state.varying_word2 = true; 1552bf215546Sopenharmony_ci state->emit_state.varying_word1 = true; 1553bf215546Sopenharmony_ci state->emit_state.varying_word0 = true; 1554bf215546Sopenharmony_ci state->emit_state.output_selects = true; 1555bf215546Sopenharmony_ci state->emit_state.wclamp = true; 1556bf215546Sopenharmony_ci state->emit_state.viewport = true; 1557bf215546Sopenharmony_ci state->emit_state.region_clip = true; 1558bf215546Sopenharmony_ci state->emit_state.pds_fragment_stateptr0 = true; 1559bf215546Sopenharmony_ci state->emit_state.isp_fb = true; 1560bf215546Sopenharmony_ci state->emit_state.isp = true; 1561bf215546Sopenharmony_ci } else { 1562bf215546Sopenharmony_ci state->emit_state.ppp_control = true; 1563bf215546Sopenharmony_ci state->emit_state.varying_word1 = true; 1564bf215546Sopenharmony_ci state->emit_state.varying_word0 = true; 1565bf215546Sopenharmony_ci state->emit_state.output_selects = true; 1566bf215546Sopenharmony_ci state->emit_state.viewport = true; 1567bf215546Sopenharmony_ci state->emit_state.region_clip = true; 1568bf215546Sopenharmony_ci state->emit_state.pds_fragment_stateptr0 = true; 1569bf215546Sopenharmony_ci state->emit_state.isp_fb = true; 1570bf215546Sopenharmony_ci state->emit_state.isp = true; 1571bf215546Sopenharmony_ci } 1572bf215546Sopenharmony_ci 1573bf215546Sopenharmony_ci memset(&state->ppp_state, 0U, sizeof(state->ppp_state)); 1574bf215546Sopenharmony_ci 1575bf215546Sopenharmony_ci state->dirty.vertex_bindings = true; 1576bf215546Sopenharmony_ci state->dirty.gfx_pipeline_binding = true; 1577bf215546Sopenharmony_ci state->dirty.viewport = true; 1578bf215546Sopenharmony_ci} 1579bf215546Sopenharmony_ci 1580bf215546Sopenharmony_cistatic VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer, 1581bf215546Sopenharmony_ci enum pvr_sub_cmd_type type) 1582bf215546Sopenharmony_ci{ 1583bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *state = &cmd_buffer->state; 1584bf215546Sopenharmony_ci struct pvr_device *device = cmd_buffer->device; 1585bf215546Sopenharmony_ci struct pvr_sub_cmd *sub_cmd; 1586bf215546Sopenharmony_ci VkResult result; 1587bf215546Sopenharmony_ci 1588bf215546Sopenharmony_ci /* Check the current status of the buffer. */ 1589bf215546Sopenharmony_ci if (state->status != VK_SUCCESS) 1590bf215546Sopenharmony_ci return state->status; 1591bf215546Sopenharmony_ci 1592bf215546Sopenharmony_ci pvr_cmd_buffer_update_barriers(cmd_buffer, type); 1593bf215546Sopenharmony_ci 1594bf215546Sopenharmony_ci if (state->current_sub_cmd) { 1595bf215546Sopenharmony_ci if (state->current_sub_cmd->type == type) { 1596bf215546Sopenharmony_ci /* Continue adding to the current sub command. */ 1597bf215546Sopenharmony_ci return VK_SUCCESS; 1598bf215546Sopenharmony_ci } 1599bf215546Sopenharmony_ci 1600bf215546Sopenharmony_ci /* End the current sub command. */ 1601bf215546Sopenharmony_ci result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer); 1602bf215546Sopenharmony_ci if (result != VK_SUCCESS) 1603bf215546Sopenharmony_ci return result; 1604bf215546Sopenharmony_ci } 1605bf215546Sopenharmony_ci 1606bf215546Sopenharmony_ci sub_cmd = vk_zalloc(&cmd_buffer->vk.pool->alloc, 1607bf215546Sopenharmony_ci sizeof(*sub_cmd), 1608bf215546Sopenharmony_ci 8, 1609bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 1610bf215546Sopenharmony_ci if (!sub_cmd) { 1611bf215546Sopenharmony_ci state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); 1612bf215546Sopenharmony_ci return state->status; 1613bf215546Sopenharmony_ci } 1614bf215546Sopenharmony_ci 1615bf215546Sopenharmony_ci sub_cmd->type = type; 1616bf215546Sopenharmony_ci 1617bf215546Sopenharmony_ci switch (type) { 1618bf215546Sopenharmony_ci case PVR_SUB_CMD_TYPE_GRAPHICS: 1619bf215546Sopenharmony_ci 1620bf215546Sopenharmony_ci sub_cmd->gfx.depth_usage = PVR_DEPTH_STENCIL_USAGE_UNDEFINED; 1621bf215546Sopenharmony_ci sub_cmd->gfx.stencil_usage = PVR_DEPTH_STENCIL_USAGE_UNDEFINED; 1622bf215546Sopenharmony_ci sub_cmd->gfx.modifies_depth = false; 1623bf215546Sopenharmony_ci sub_cmd->gfx.modifies_stencil = false; 1624bf215546Sopenharmony_ci sub_cmd->gfx.max_tiles_in_flight = 1625bf215546Sopenharmony_ci PVR_GET_FEATURE_VALUE(&device->pdevice->dev_info, 1626bf215546Sopenharmony_ci isp_max_tiles_in_flight, 1627bf215546Sopenharmony_ci 1); 1628bf215546Sopenharmony_ci sub_cmd->gfx.hw_render_idx = state->render_pass_info.current_hw_subpass; 1629bf215546Sopenharmony_ci sub_cmd->gfx.framebuffer = state->render_pass_info.framebuffer; 1630bf215546Sopenharmony_ci sub_cmd->gfx.empty_cmd = true; 1631bf215546Sopenharmony_ci 1632bf215546Sopenharmony_ci pvr_reset_graphics_dirty_state(state, true); 1633bf215546Sopenharmony_ci pvr_csb_init(device, 1634bf215546Sopenharmony_ci PVR_CMD_STREAM_TYPE_GRAPHICS, 1635bf215546Sopenharmony_ci &sub_cmd->gfx.control_stream); 1636bf215546Sopenharmony_ci break; 1637bf215546Sopenharmony_ci 1638bf215546Sopenharmony_ci case PVR_SUB_CMD_TYPE_COMPUTE: 1639bf215546Sopenharmony_ci pvr_csb_init(device, 1640bf215546Sopenharmony_ci PVR_CMD_STREAM_TYPE_COMPUTE, 1641bf215546Sopenharmony_ci &sub_cmd->compute.control_stream); 1642bf215546Sopenharmony_ci break; 1643bf215546Sopenharmony_ci 1644bf215546Sopenharmony_ci case PVR_SUB_CMD_TYPE_TRANSFER: 1645bf215546Sopenharmony_ci list_inithead(&sub_cmd->transfer.transfer_cmds); 1646bf215546Sopenharmony_ci break; 1647bf215546Sopenharmony_ci 1648bf215546Sopenharmony_ci default: 1649bf215546Sopenharmony_ci pvr_finishme("Unsupported sub-command type %d", type); 1650bf215546Sopenharmony_ci break; 1651bf215546Sopenharmony_ci } 1652bf215546Sopenharmony_ci 1653bf215546Sopenharmony_ci list_addtail(&sub_cmd->link, &cmd_buffer->sub_cmds); 1654bf215546Sopenharmony_ci state->current_sub_cmd = sub_cmd; 1655bf215546Sopenharmony_ci 1656bf215546Sopenharmony_ci return VK_SUCCESS; 1657bf215546Sopenharmony_ci} 1658bf215546Sopenharmony_ci 1659bf215546Sopenharmony_ciVkResult pvr_cmd_buffer_alloc_mem(struct pvr_cmd_buffer *cmd_buffer, 1660bf215546Sopenharmony_ci struct pvr_winsys_heap *heap, 1661bf215546Sopenharmony_ci uint64_t size, 1662bf215546Sopenharmony_ci uint32_t flags, 1663bf215546Sopenharmony_ci struct pvr_bo **const pvr_bo_out) 1664bf215546Sopenharmony_ci{ 1665bf215546Sopenharmony_ci const uint32_t cache_line_size = 1666bf215546Sopenharmony_ci rogue_get_slc_cache_line_size(&cmd_buffer->device->pdevice->dev_info); 1667bf215546Sopenharmony_ci struct pvr_bo *pvr_bo; 1668bf215546Sopenharmony_ci VkResult result; 1669bf215546Sopenharmony_ci 1670bf215546Sopenharmony_ci result = pvr_bo_alloc(cmd_buffer->device, 1671bf215546Sopenharmony_ci heap, 1672bf215546Sopenharmony_ci size, 1673bf215546Sopenharmony_ci cache_line_size, 1674bf215546Sopenharmony_ci flags, 1675bf215546Sopenharmony_ci &pvr_bo); 1676bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 1677bf215546Sopenharmony_ci cmd_buffer->state.status = result; 1678bf215546Sopenharmony_ci return result; 1679bf215546Sopenharmony_ci } 1680bf215546Sopenharmony_ci 1681bf215546Sopenharmony_ci list_add(&pvr_bo->link, &cmd_buffer->bo_list); 1682bf215546Sopenharmony_ci 1683bf215546Sopenharmony_ci *pvr_bo_out = pvr_bo; 1684bf215546Sopenharmony_ci 1685bf215546Sopenharmony_ci return VK_SUCCESS; 1686bf215546Sopenharmony_ci} 1687bf215546Sopenharmony_ci 1688bf215546Sopenharmony_ciVkResult pvr_ResetCommandBuffer(VkCommandBuffer commandBuffer, 1689bf215546Sopenharmony_ci VkCommandBufferResetFlags flags) 1690bf215546Sopenharmony_ci{ 1691bf215546Sopenharmony_ci assert(!"Unimplemented"); 1692bf215546Sopenharmony_ci return VK_SUCCESS; 1693bf215546Sopenharmony_ci} 1694bf215546Sopenharmony_ci 1695bf215546Sopenharmony_cistatic void pvr_cmd_bind_compute_pipeline( 1696bf215546Sopenharmony_ci const struct pvr_compute_pipeline *const compute_pipeline, 1697bf215546Sopenharmony_ci struct pvr_cmd_buffer *const cmd_buffer) 1698bf215546Sopenharmony_ci{ 1699bf215546Sopenharmony_ci cmd_buffer->state.compute_pipeline = compute_pipeline; 1700bf215546Sopenharmony_ci cmd_buffer->state.dirty.compute_pipeline_binding = true; 1701bf215546Sopenharmony_ci} 1702bf215546Sopenharmony_ci 1703bf215546Sopenharmony_cistatic void pvr_cmd_bind_graphics_pipeline( 1704bf215546Sopenharmony_ci const struct pvr_graphics_pipeline *const gfx_pipeline, 1705bf215546Sopenharmony_ci struct pvr_cmd_buffer *const cmd_buffer) 1706bf215546Sopenharmony_ci{ 1707bf215546Sopenharmony_ci struct pvr_dynamic_state *const dest_state = 1708bf215546Sopenharmony_ci &cmd_buffer->state.dynamic.common; 1709bf215546Sopenharmony_ci const struct pvr_dynamic_state *const src_state = 1710bf215546Sopenharmony_ci &gfx_pipeline->dynamic_state; 1711bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const cmd_buffer_state = &cmd_buffer->state; 1712bf215546Sopenharmony_ci const uint32_t state_mask = src_state->mask; 1713bf215546Sopenharmony_ci 1714bf215546Sopenharmony_ci cmd_buffer_state->gfx_pipeline = gfx_pipeline; 1715bf215546Sopenharmony_ci cmd_buffer_state->dirty.gfx_pipeline_binding = true; 1716bf215546Sopenharmony_ci 1717bf215546Sopenharmony_ci /* FIXME: Handle PVR_DYNAMIC_STATE_BIT_VIEWPORT. */ 1718bf215546Sopenharmony_ci if (!(state_mask & PVR_DYNAMIC_STATE_BIT_VIEWPORT)) { 1719bf215546Sopenharmony_ci assert(!"Unimplemented"); 1720bf215546Sopenharmony_ci } 1721bf215546Sopenharmony_ci 1722bf215546Sopenharmony_ci /* FIXME: Handle PVR_DYNAMIC_STATE_BIT_SCISSOR. */ 1723bf215546Sopenharmony_ci if (!(state_mask & PVR_DYNAMIC_STATE_BIT_SCISSOR)) { 1724bf215546Sopenharmony_ci assert(!"Unimplemented"); 1725bf215546Sopenharmony_ci } 1726bf215546Sopenharmony_ci 1727bf215546Sopenharmony_ci if (!(state_mask & PVR_DYNAMIC_STATE_BIT_LINE_WIDTH)) { 1728bf215546Sopenharmony_ci dest_state->line_width = src_state->line_width; 1729bf215546Sopenharmony_ci 1730bf215546Sopenharmony_ci cmd_buffer_state->dirty.line_width = true; 1731bf215546Sopenharmony_ci } 1732bf215546Sopenharmony_ci 1733bf215546Sopenharmony_ci if (!(state_mask & PVR_DYNAMIC_STATE_BIT_DEPTH_BIAS)) { 1734bf215546Sopenharmony_ci memcpy(&dest_state->depth_bias, 1735bf215546Sopenharmony_ci &src_state->depth_bias, 1736bf215546Sopenharmony_ci sizeof(src_state->depth_bias)); 1737bf215546Sopenharmony_ci 1738bf215546Sopenharmony_ci cmd_buffer_state->dirty.depth_bias = true; 1739bf215546Sopenharmony_ci } 1740bf215546Sopenharmony_ci 1741bf215546Sopenharmony_ci if (!(state_mask & PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS)) { 1742bf215546Sopenharmony_ci STATIC_ASSERT( 1743bf215546Sopenharmony_ci __same_type(dest_state->blend_constants, src_state->blend_constants)); 1744bf215546Sopenharmony_ci 1745bf215546Sopenharmony_ci typed_memcpy(dest_state->blend_constants, 1746bf215546Sopenharmony_ci src_state->blend_constants, 1747bf215546Sopenharmony_ci ARRAY_SIZE(dest_state->blend_constants)); 1748bf215546Sopenharmony_ci 1749bf215546Sopenharmony_ci cmd_buffer_state->dirty.blend_constants = true; 1750bf215546Sopenharmony_ci } 1751bf215546Sopenharmony_ci 1752bf215546Sopenharmony_ci if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_COMPARE_MASK)) { 1753bf215546Sopenharmony_ci dest_state->compare_mask.front = src_state->compare_mask.front; 1754bf215546Sopenharmony_ci dest_state->compare_mask.back = src_state->compare_mask.back; 1755bf215546Sopenharmony_ci 1756bf215546Sopenharmony_ci cmd_buffer_state->dirty.compare_mask = true; 1757bf215546Sopenharmony_ci } 1758bf215546Sopenharmony_ci 1759bf215546Sopenharmony_ci if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_WRITE_MASK)) { 1760bf215546Sopenharmony_ci dest_state->write_mask.front = src_state->write_mask.front; 1761bf215546Sopenharmony_ci dest_state->write_mask.back = src_state->write_mask.back; 1762bf215546Sopenharmony_ci 1763bf215546Sopenharmony_ci cmd_buffer_state->dirty.write_mask = true; 1764bf215546Sopenharmony_ci } 1765bf215546Sopenharmony_ci 1766bf215546Sopenharmony_ci if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_REFERENCE)) { 1767bf215546Sopenharmony_ci dest_state->reference.front = src_state->reference.front; 1768bf215546Sopenharmony_ci dest_state->reference.back = src_state->reference.back; 1769bf215546Sopenharmony_ci 1770bf215546Sopenharmony_ci cmd_buffer_state->dirty.reference = true; 1771bf215546Sopenharmony_ci } 1772bf215546Sopenharmony_ci} 1773bf215546Sopenharmony_ci 1774bf215546Sopenharmony_civoid pvr_CmdBindPipeline(VkCommandBuffer commandBuffer, 1775bf215546Sopenharmony_ci VkPipelineBindPoint pipelineBindPoint, 1776bf215546Sopenharmony_ci VkPipeline _pipeline) 1777bf215546Sopenharmony_ci{ 1778bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 1779bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_pipeline, pipeline, _pipeline); 1780bf215546Sopenharmony_ci 1781bf215546Sopenharmony_ci switch (pipelineBindPoint) { 1782bf215546Sopenharmony_ci case VK_PIPELINE_BIND_POINT_COMPUTE: 1783bf215546Sopenharmony_ci pvr_cmd_bind_compute_pipeline(to_pvr_compute_pipeline(pipeline), 1784bf215546Sopenharmony_ci cmd_buffer); 1785bf215546Sopenharmony_ci break; 1786bf215546Sopenharmony_ci 1787bf215546Sopenharmony_ci case VK_PIPELINE_BIND_POINT_GRAPHICS: 1788bf215546Sopenharmony_ci pvr_cmd_bind_graphics_pipeline(to_pvr_graphics_pipeline(pipeline), 1789bf215546Sopenharmony_ci cmd_buffer); 1790bf215546Sopenharmony_ci break; 1791bf215546Sopenharmony_ci 1792bf215546Sopenharmony_ci default: 1793bf215546Sopenharmony_ci unreachable("Invalid bind point."); 1794bf215546Sopenharmony_ci break; 1795bf215546Sopenharmony_ci } 1796bf215546Sopenharmony_ci} 1797bf215546Sopenharmony_ci 1798bf215546Sopenharmony_ci#if defined(DEBUG) 1799bf215546Sopenharmony_cistatic void check_viewport_quirk_70165(const struct pvr_device *device, 1800bf215546Sopenharmony_ci const VkViewport *pViewport) 1801bf215546Sopenharmony_ci{ 1802bf215546Sopenharmony_ci const struct pvr_device_info *dev_info = &device->pdevice->dev_info; 1803bf215546Sopenharmony_ci float min_vertex_x, max_vertex_x, min_vertex_y, max_vertex_y; 1804bf215546Sopenharmony_ci float min_screen_space_value, max_screen_space_value; 1805bf215546Sopenharmony_ci float sign_to_unsigned_offset, fixed_point_max; 1806bf215546Sopenharmony_ci float guardband_width, guardband_height; 1807bf215546Sopenharmony_ci 1808bf215546Sopenharmony_ci if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) { 1809bf215546Sopenharmony_ci /* Max representable value in 13.4 fixed point format. 1810bf215546Sopenharmony_ci * Round-down to avoid precision issues. 1811bf215546Sopenharmony_ci * Calculated as (2 ** 13) - 2*(2 ** -4) 1812bf215546Sopenharmony_ci */ 1813bf215546Sopenharmony_ci fixed_point_max = 8192.0f - 2.0f / 16.0f; 1814bf215546Sopenharmony_ci 1815bf215546Sopenharmony_ci if (PVR_HAS_FEATURE(dev_info, screen_size8K)) { 1816bf215546Sopenharmony_ci if (pViewport->width <= 4096 && pViewport->height <= 4096) { 1817bf215546Sopenharmony_ci guardband_width = pViewport->width / 4.0f; 1818bf215546Sopenharmony_ci guardband_height = pViewport->height / 4.0f; 1819bf215546Sopenharmony_ci 1820bf215546Sopenharmony_ci /* 2k of the range is negative */ 1821bf215546Sopenharmony_ci sign_to_unsigned_offset = 2048.0f; 1822bf215546Sopenharmony_ci } else { 1823bf215546Sopenharmony_ci guardband_width = 0.0f; 1824bf215546Sopenharmony_ci guardband_height = 0.0f; 1825bf215546Sopenharmony_ci 1826bf215546Sopenharmony_ci /* For > 4k renders, the entire range is positive */ 1827bf215546Sopenharmony_ci sign_to_unsigned_offset = 0.0f; 1828bf215546Sopenharmony_ci } 1829bf215546Sopenharmony_ci } else { 1830bf215546Sopenharmony_ci guardband_width = pViewport->width / 4.0f; 1831bf215546Sopenharmony_ci guardband_height = pViewport->height / 4.0f; 1832bf215546Sopenharmony_ci 1833bf215546Sopenharmony_ci /* 2k of the range is negative */ 1834bf215546Sopenharmony_ci sign_to_unsigned_offset = 2048.0f; 1835bf215546Sopenharmony_ci } 1836bf215546Sopenharmony_ci } else { 1837bf215546Sopenharmony_ci /* Max representable value in 16.8 fixed point format 1838bf215546Sopenharmony_ci * Calculated as (2 ** 16) - (2 ** -8) 1839bf215546Sopenharmony_ci */ 1840bf215546Sopenharmony_ci fixed_point_max = 65535.99609375f; 1841bf215546Sopenharmony_ci guardband_width = pViewport->width / 4.0f; 1842bf215546Sopenharmony_ci guardband_height = pViewport->height / 4.0f; 1843bf215546Sopenharmony_ci 1844bf215546Sopenharmony_ci /* 4k/20k of the range is negative */ 1845bf215546Sopenharmony_ci sign_to_unsigned_offset = (float)PVR_MAX_NEG_OFFSCREEN_OFFSET; 1846bf215546Sopenharmony_ci } 1847bf215546Sopenharmony_ci 1848bf215546Sopenharmony_ci min_screen_space_value = -sign_to_unsigned_offset; 1849bf215546Sopenharmony_ci max_screen_space_value = fixed_point_max - sign_to_unsigned_offset; 1850bf215546Sopenharmony_ci 1851bf215546Sopenharmony_ci min_vertex_x = pViewport->x - guardband_width; 1852bf215546Sopenharmony_ci max_vertex_x = pViewport->x + pViewport->width + guardband_width; 1853bf215546Sopenharmony_ci min_vertex_y = pViewport->y - guardband_height; 1854bf215546Sopenharmony_ci max_vertex_y = pViewport->y + pViewport->height + guardband_height; 1855bf215546Sopenharmony_ci if (min_vertex_x < min_screen_space_value || 1856bf215546Sopenharmony_ci max_vertex_x > max_screen_space_value || 1857bf215546Sopenharmony_ci min_vertex_y < min_screen_space_value || 1858bf215546Sopenharmony_ci max_vertex_y > max_screen_space_value) { 1859bf215546Sopenharmony_ci mesa_logw("Viewport is affected by BRN70165, geometry outside " 1860bf215546Sopenharmony_ci "the viewport could be corrupted"); 1861bf215546Sopenharmony_ci } 1862bf215546Sopenharmony_ci} 1863bf215546Sopenharmony_ci#endif 1864bf215546Sopenharmony_ci 1865bf215546Sopenharmony_civoid pvr_CmdSetViewport(VkCommandBuffer commandBuffer, 1866bf215546Sopenharmony_ci uint32_t firstViewport, 1867bf215546Sopenharmony_ci uint32_t viewportCount, 1868bf215546Sopenharmony_ci const VkViewport *pViewports) 1869bf215546Sopenharmony_ci{ 1870bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 1871bf215546Sopenharmony_ci const uint32_t total_count = firstViewport + viewportCount; 1872bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 1873bf215546Sopenharmony_ci 1874bf215546Sopenharmony_ci assert(firstViewport < PVR_MAX_VIEWPORTS && viewportCount > 0); 1875bf215546Sopenharmony_ci assert(total_count >= 1 && total_count <= PVR_MAX_VIEWPORTS); 1876bf215546Sopenharmony_ci 1877bf215546Sopenharmony_ci PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); 1878bf215546Sopenharmony_ci 1879bf215546Sopenharmony_ci#if defined(DEBUG) 1880bf215546Sopenharmony_ci if (PVR_HAS_QUIRK(&cmd_buffer->device->pdevice->dev_info, 70165)) { 1881bf215546Sopenharmony_ci for (uint32_t viewport = 0; viewport < viewportCount; viewport++) { 1882bf215546Sopenharmony_ci check_viewport_quirk_70165(cmd_buffer->device, &pViewports[viewport]); 1883bf215546Sopenharmony_ci } 1884bf215546Sopenharmony_ci } 1885bf215546Sopenharmony_ci#endif 1886bf215546Sopenharmony_ci 1887bf215546Sopenharmony_ci if (state->dynamic.common.viewport.count < total_count) 1888bf215546Sopenharmony_ci state->dynamic.common.viewport.count = total_count; 1889bf215546Sopenharmony_ci 1890bf215546Sopenharmony_ci memcpy(&state->dynamic.common.viewport.viewports[firstViewport], 1891bf215546Sopenharmony_ci pViewports, 1892bf215546Sopenharmony_ci viewportCount * sizeof(*pViewports)); 1893bf215546Sopenharmony_ci 1894bf215546Sopenharmony_ci state->dirty.viewport = true; 1895bf215546Sopenharmony_ci} 1896bf215546Sopenharmony_ci 1897bf215546Sopenharmony_civoid pvr_CmdSetScissor(VkCommandBuffer commandBuffer, 1898bf215546Sopenharmony_ci uint32_t firstScissor, 1899bf215546Sopenharmony_ci uint32_t scissorCount, 1900bf215546Sopenharmony_ci const VkRect2D *pScissors) 1901bf215546Sopenharmony_ci{ 1902bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 1903bf215546Sopenharmony_ci const uint32_t total_count = firstScissor + scissorCount; 1904bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 1905bf215546Sopenharmony_ci 1906bf215546Sopenharmony_ci assert(firstScissor < PVR_MAX_VIEWPORTS && scissorCount > 0); 1907bf215546Sopenharmony_ci assert(total_count >= 1 && total_count <= PVR_MAX_VIEWPORTS); 1908bf215546Sopenharmony_ci 1909bf215546Sopenharmony_ci PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); 1910bf215546Sopenharmony_ci 1911bf215546Sopenharmony_ci if (state->dynamic.common.scissor.count < total_count) 1912bf215546Sopenharmony_ci state->dynamic.common.scissor.count = total_count; 1913bf215546Sopenharmony_ci 1914bf215546Sopenharmony_ci memcpy(&state->dynamic.common.scissor.scissors[firstScissor], 1915bf215546Sopenharmony_ci pScissors, 1916bf215546Sopenharmony_ci scissorCount * sizeof(*pScissors)); 1917bf215546Sopenharmony_ci 1918bf215546Sopenharmony_ci state->dirty.scissor = true; 1919bf215546Sopenharmony_ci} 1920bf215546Sopenharmony_ci 1921bf215546Sopenharmony_civoid pvr_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) 1922bf215546Sopenharmony_ci{ 1923bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 1924bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 1925bf215546Sopenharmony_ci 1926bf215546Sopenharmony_ci state->dynamic.common.line_width = lineWidth; 1927bf215546Sopenharmony_ci state->dirty.line_width = true; 1928bf215546Sopenharmony_ci} 1929bf215546Sopenharmony_ci 1930bf215546Sopenharmony_civoid pvr_CmdSetDepthBias(VkCommandBuffer commandBuffer, 1931bf215546Sopenharmony_ci float depthBiasConstantFactor, 1932bf215546Sopenharmony_ci float depthBiasClamp, 1933bf215546Sopenharmony_ci float depthBiasSlopeFactor) 1934bf215546Sopenharmony_ci{ 1935bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 1936bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 1937bf215546Sopenharmony_ci 1938bf215546Sopenharmony_ci state->dynamic.common.depth_bias.constant_factor = depthBiasConstantFactor; 1939bf215546Sopenharmony_ci state->dynamic.common.depth_bias.clamp = depthBiasClamp; 1940bf215546Sopenharmony_ci state->dynamic.common.depth_bias.slope_factor = depthBiasSlopeFactor; 1941bf215546Sopenharmony_ci state->dirty.depth_bias = true; 1942bf215546Sopenharmony_ci} 1943bf215546Sopenharmony_ci 1944bf215546Sopenharmony_civoid pvr_CmdSetBlendConstants(VkCommandBuffer commandBuffer, 1945bf215546Sopenharmony_ci const float blendConstants[4]) 1946bf215546Sopenharmony_ci{ 1947bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 1948bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 1949bf215546Sopenharmony_ci 1950bf215546Sopenharmony_ci STATIC_ASSERT(ARRAY_SIZE(state->dynamic.common.blend_constants) == 4); 1951bf215546Sopenharmony_ci memcpy(state->dynamic.common.blend_constants, 1952bf215546Sopenharmony_ci blendConstants, 1953bf215546Sopenharmony_ci sizeof(state->dynamic.common.blend_constants)); 1954bf215546Sopenharmony_ci 1955bf215546Sopenharmony_ci state->dirty.blend_constants = true; 1956bf215546Sopenharmony_ci} 1957bf215546Sopenharmony_ci 1958bf215546Sopenharmony_civoid pvr_CmdSetDepthBounds(VkCommandBuffer commandBuffer, 1959bf215546Sopenharmony_ci float minDepthBounds, 1960bf215546Sopenharmony_ci float maxDepthBounds) 1961bf215546Sopenharmony_ci{ 1962bf215546Sopenharmony_ci mesa_logd("No support for depth bounds testing."); 1963bf215546Sopenharmony_ci} 1964bf215546Sopenharmony_ci 1965bf215546Sopenharmony_civoid pvr_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, 1966bf215546Sopenharmony_ci VkStencilFaceFlags faceMask, 1967bf215546Sopenharmony_ci uint32_t compareMask) 1968bf215546Sopenharmony_ci{ 1969bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 1970bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 1971bf215546Sopenharmony_ci 1972bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 1973bf215546Sopenharmony_ci state->dynamic.common.compare_mask.front = compareMask; 1974bf215546Sopenharmony_ci 1975bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_BACK_BIT) 1976bf215546Sopenharmony_ci state->dynamic.common.compare_mask.back = compareMask; 1977bf215546Sopenharmony_ci 1978bf215546Sopenharmony_ci state->dirty.compare_mask = true; 1979bf215546Sopenharmony_ci} 1980bf215546Sopenharmony_ci 1981bf215546Sopenharmony_civoid pvr_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, 1982bf215546Sopenharmony_ci VkStencilFaceFlags faceMask, 1983bf215546Sopenharmony_ci uint32_t writeMask) 1984bf215546Sopenharmony_ci{ 1985bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 1986bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 1987bf215546Sopenharmony_ci 1988bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 1989bf215546Sopenharmony_ci state->dynamic.common.write_mask.front = writeMask; 1990bf215546Sopenharmony_ci 1991bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_BACK_BIT) 1992bf215546Sopenharmony_ci state->dynamic.common.write_mask.back = writeMask; 1993bf215546Sopenharmony_ci 1994bf215546Sopenharmony_ci state->dirty.write_mask = true; 1995bf215546Sopenharmony_ci} 1996bf215546Sopenharmony_ci 1997bf215546Sopenharmony_civoid pvr_CmdSetStencilReference(VkCommandBuffer commandBuffer, 1998bf215546Sopenharmony_ci VkStencilFaceFlags faceMask, 1999bf215546Sopenharmony_ci uint32_t reference) 2000bf215546Sopenharmony_ci{ 2001bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 2002bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 2003bf215546Sopenharmony_ci 2004bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 2005bf215546Sopenharmony_ci state->dynamic.common.reference.front = reference; 2006bf215546Sopenharmony_ci 2007bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_BACK_BIT) 2008bf215546Sopenharmony_ci state->dynamic.common.reference.back = reference; 2009bf215546Sopenharmony_ci 2010bf215546Sopenharmony_ci state->dirty.reference = true; 2011bf215546Sopenharmony_ci} 2012bf215546Sopenharmony_ci 2013bf215546Sopenharmony_civoid pvr_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, 2014bf215546Sopenharmony_ci VkPipelineBindPoint pipelineBindPoint, 2015bf215546Sopenharmony_ci VkPipelineLayout _layout, 2016bf215546Sopenharmony_ci uint32_t firstSet, 2017bf215546Sopenharmony_ci uint32_t descriptorSetCount, 2018bf215546Sopenharmony_ci const VkDescriptorSet *pDescriptorSets, 2019bf215546Sopenharmony_ci uint32_t dynamicOffsetCount, 2020bf215546Sopenharmony_ci const uint32_t *pDynamicOffsets) 2021bf215546Sopenharmony_ci{ 2022bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 2023bf215546Sopenharmony_ci struct pvr_descriptor_state *descriptor_state; 2024bf215546Sopenharmony_ci 2025bf215546Sopenharmony_ci assert(firstSet + descriptorSetCount <= PVR_MAX_DESCRIPTOR_SETS); 2026bf215546Sopenharmony_ci 2027bf215546Sopenharmony_ci PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); 2028bf215546Sopenharmony_ci 2029bf215546Sopenharmony_ci switch (pipelineBindPoint) { 2030bf215546Sopenharmony_ci case VK_PIPELINE_BIND_POINT_GRAPHICS: 2031bf215546Sopenharmony_ci case VK_PIPELINE_BIND_POINT_COMPUTE: 2032bf215546Sopenharmony_ci break; 2033bf215546Sopenharmony_ci 2034bf215546Sopenharmony_ci default: 2035bf215546Sopenharmony_ci unreachable("Unsupported bind point."); 2036bf215546Sopenharmony_ci break; 2037bf215546Sopenharmony_ci } 2038bf215546Sopenharmony_ci 2039bf215546Sopenharmony_ci if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { 2040bf215546Sopenharmony_ci descriptor_state = &cmd_buffer->state.gfx_desc_state; 2041bf215546Sopenharmony_ci cmd_buffer->state.dirty.gfx_desc_dirty = true; 2042bf215546Sopenharmony_ci } else { 2043bf215546Sopenharmony_ci descriptor_state = &cmd_buffer->state.compute_desc_state; 2044bf215546Sopenharmony_ci cmd_buffer->state.dirty.compute_desc_dirty = true; 2045bf215546Sopenharmony_ci } 2046bf215546Sopenharmony_ci 2047bf215546Sopenharmony_ci for (uint32_t i = 0; i < descriptorSetCount; i++) { 2048bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_descriptor_set, set, pDescriptorSets[i]); 2049bf215546Sopenharmony_ci uint32_t index = firstSet + i; 2050bf215546Sopenharmony_ci 2051bf215546Sopenharmony_ci if (descriptor_state->descriptor_sets[index] != set) { 2052bf215546Sopenharmony_ci descriptor_state->descriptor_sets[index] = set; 2053bf215546Sopenharmony_ci descriptor_state->valid_mask |= (1u << index); 2054bf215546Sopenharmony_ci } 2055bf215546Sopenharmony_ci } 2056bf215546Sopenharmony_ci} 2057bf215546Sopenharmony_ci 2058bf215546Sopenharmony_civoid pvr_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, 2059bf215546Sopenharmony_ci uint32_t firstBinding, 2060bf215546Sopenharmony_ci uint32_t bindingCount, 2061bf215546Sopenharmony_ci const VkBuffer *pBuffers, 2062bf215546Sopenharmony_ci const VkDeviceSize *pOffsets) 2063bf215546Sopenharmony_ci{ 2064bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 2065bf215546Sopenharmony_ci struct pvr_vertex_binding *const vb = cmd_buffer->state.vertex_bindings; 2066bf215546Sopenharmony_ci 2067bf215546Sopenharmony_ci /* We have to defer setting up vertex buffer since we need the buffer 2068bf215546Sopenharmony_ci * stride from the pipeline. 2069bf215546Sopenharmony_ci */ 2070bf215546Sopenharmony_ci 2071bf215546Sopenharmony_ci assert(firstBinding < PVR_MAX_VERTEX_INPUT_BINDINGS && 2072bf215546Sopenharmony_ci bindingCount <= PVR_MAX_VERTEX_INPUT_BINDINGS); 2073bf215546Sopenharmony_ci 2074bf215546Sopenharmony_ci PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); 2075bf215546Sopenharmony_ci 2076bf215546Sopenharmony_ci for (uint32_t i = 0; i < bindingCount; i++) { 2077bf215546Sopenharmony_ci vb[firstBinding + i].buffer = pvr_buffer_from_handle(pBuffers[i]); 2078bf215546Sopenharmony_ci vb[firstBinding + i].offset = pOffsets[i]; 2079bf215546Sopenharmony_ci } 2080bf215546Sopenharmony_ci 2081bf215546Sopenharmony_ci cmd_buffer->state.dirty.vertex_bindings = true; 2082bf215546Sopenharmony_ci} 2083bf215546Sopenharmony_ci 2084bf215546Sopenharmony_civoid pvr_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, 2085bf215546Sopenharmony_ci VkBuffer buffer, 2086bf215546Sopenharmony_ci VkDeviceSize offset, 2087bf215546Sopenharmony_ci VkIndexType indexType) 2088bf215546Sopenharmony_ci{ 2089bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 2090bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_buffer, index_buffer, buffer); 2091bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 2092bf215546Sopenharmony_ci 2093bf215546Sopenharmony_ci assert(offset < index_buffer->vk.size); 2094bf215546Sopenharmony_ci assert(indexType == VK_INDEX_TYPE_UINT32 || 2095bf215546Sopenharmony_ci indexType == VK_INDEX_TYPE_UINT16); 2096bf215546Sopenharmony_ci 2097bf215546Sopenharmony_ci PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); 2098bf215546Sopenharmony_ci 2099bf215546Sopenharmony_ci state->index_buffer_binding.buffer = index_buffer; 2100bf215546Sopenharmony_ci state->index_buffer_binding.offset = offset; 2101bf215546Sopenharmony_ci state->index_buffer_binding.type = indexType; 2102bf215546Sopenharmony_ci state->dirty.index_buffer_binding = true; 2103bf215546Sopenharmony_ci} 2104bf215546Sopenharmony_ci 2105bf215546Sopenharmony_civoid pvr_CmdPushConstants(VkCommandBuffer commandBuffer, 2106bf215546Sopenharmony_ci VkPipelineLayout layout, 2107bf215546Sopenharmony_ci VkShaderStageFlags stageFlags, 2108bf215546Sopenharmony_ci uint32_t offset, 2109bf215546Sopenharmony_ci uint32_t size, 2110bf215546Sopenharmony_ci const void *pValues) 2111bf215546Sopenharmony_ci{ 2112bf215546Sopenharmony_ci#if defined(DEBUG) 2113bf215546Sopenharmony_ci const uint64_t ending = (uint64_t)offset + (uint64_t)size; 2114bf215546Sopenharmony_ci#endif 2115bf215546Sopenharmony_ci 2116bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 2117bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 2118bf215546Sopenharmony_ci 2119bf215546Sopenharmony_ci PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); 2120bf215546Sopenharmony_ci 2121bf215546Sopenharmony_ci pvr_assert(ending <= PVR_MAX_PUSH_CONSTANTS_SIZE); 2122bf215546Sopenharmony_ci 2123bf215546Sopenharmony_ci memcpy(&state->push_constants.data[offset], pValues, size); 2124bf215546Sopenharmony_ci 2125bf215546Sopenharmony_ci state->push_constants.dirty_stages |= stageFlags; 2126bf215546Sopenharmony_ci} 2127bf215546Sopenharmony_ci 2128bf215546Sopenharmony_cistatic VkResult 2129bf215546Sopenharmony_cipvr_cmd_buffer_setup_attachments(struct pvr_cmd_buffer *cmd_buffer, 2130bf215546Sopenharmony_ci const struct pvr_render_pass *pass, 2131bf215546Sopenharmony_ci const struct pvr_framebuffer *framebuffer) 2132bf215546Sopenharmony_ci{ 2133bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *state = &cmd_buffer->state; 2134bf215546Sopenharmony_ci struct pvr_render_pass_info *info = &state->render_pass_info; 2135bf215546Sopenharmony_ci 2136bf215546Sopenharmony_ci assert(pass->attachment_count == framebuffer->attachment_count); 2137bf215546Sopenharmony_ci 2138bf215546Sopenharmony_ci /* Free any previously allocated attachments. */ 2139bf215546Sopenharmony_ci vk_free(&cmd_buffer->vk.pool->alloc, state->render_pass_info.attachments); 2140bf215546Sopenharmony_ci 2141bf215546Sopenharmony_ci if (pass->attachment_count == 0) { 2142bf215546Sopenharmony_ci info->attachments = NULL; 2143bf215546Sopenharmony_ci return VK_SUCCESS; 2144bf215546Sopenharmony_ci } 2145bf215546Sopenharmony_ci 2146bf215546Sopenharmony_ci info->attachments = 2147bf215546Sopenharmony_ci vk_zalloc(&cmd_buffer->vk.pool->alloc, 2148bf215546Sopenharmony_ci pass->attachment_count * sizeof(*info->attachments), 2149bf215546Sopenharmony_ci 8, 2150bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2151bf215546Sopenharmony_ci if (!info->attachments) { 2152bf215546Sopenharmony_ci /* Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */ 2153bf215546Sopenharmony_ci state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); 2154bf215546Sopenharmony_ci return state->status; 2155bf215546Sopenharmony_ci } 2156bf215546Sopenharmony_ci 2157bf215546Sopenharmony_ci if (framebuffer) { 2158bf215546Sopenharmony_ci for (uint32_t i = 0; i < pass->attachment_count; i++) 2159bf215546Sopenharmony_ci info->attachments[i] = framebuffer->attachments[i]; 2160bf215546Sopenharmony_ci } 2161bf215546Sopenharmony_ci 2162bf215546Sopenharmony_ci return VK_SUCCESS; 2163bf215546Sopenharmony_ci} 2164bf215546Sopenharmony_ci 2165bf215546Sopenharmony_cistatic VkResult pvr_init_render_targets(struct pvr_device *device, 2166bf215546Sopenharmony_ci struct pvr_render_pass *pass, 2167bf215546Sopenharmony_ci struct pvr_framebuffer *framebuffer) 2168bf215546Sopenharmony_ci{ 2169bf215546Sopenharmony_ci for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) { 2170bf215546Sopenharmony_ci struct pvr_render_target *render_target = 2171bf215546Sopenharmony_ci pvr_get_render_target(pass, framebuffer, i); 2172bf215546Sopenharmony_ci 2173bf215546Sopenharmony_ci pthread_mutex_lock(&render_target->mutex); 2174bf215546Sopenharmony_ci 2175bf215546Sopenharmony_ci if (!render_target->valid) { 2176bf215546Sopenharmony_ci const struct pvr_renderpass_hwsetup_render *hw_render = 2177bf215546Sopenharmony_ci &pass->hw_setup->renders[i]; 2178bf215546Sopenharmony_ci VkResult result; 2179bf215546Sopenharmony_ci 2180bf215546Sopenharmony_ci result = pvr_render_target_dataset_create(device, 2181bf215546Sopenharmony_ci framebuffer->width, 2182bf215546Sopenharmony_ci framebuffer->height, 2183bf215546Sopenharmony_ci hw_render->sample_count, 2184bf215546Sopenharmony_ci framebuffer->layers, 2185bf215546Sopenharmony_ci &render_target->rt_dataset); 2186bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 2187bf215546Sopenharmony_ci pthread_mutex_unlock(&render_target->mutex); 2188bf215546Sopenharmony_ci return result; 2189bf215546Sopenharmony_ci } 2190bf215546Sopenharmony_ci 2191bf215546Sopenharmony_ci render_target->valid = true; 2192bf215546Sopenharmony_ci } 2193bf215546Sopenharmony_ci 2194bf215546Sopenharmony_ci pthread_mutex_unlock(&render_target->mutex); 2195bf215546Sopenharmony_ci } 2196bf215546Sopenharmony_ci 2197bf215546Sopenharmony_ci return VK_SUCCESS; 2198bf215546Sopenharmony_ci} 2199bf215546Sopenharmony_ci 2200bf215546Sopenharmony_cistatic const struct pvr_renderpass_hwsetup_subpass * 2201bf215546Sopenharmony_cipvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass) 2202bf215546Sopenharmony_ci{ 2203bf215546Sopenharmony_ci const struct pvr_renderpass_hw_map *map = 2204bf215546Sopenharmony_ci &pass->hw_setup->subpass_map[subpass]; 2205bf215546Sopenharmony_ci 2206bf215546Sopenharmony_ci return &pass->hw_setup->renders[map->render].subpasses[map->subpass]; 2207bf215546Sopenharmony_ci} 2208bf215546Sopenharmony_ci 2209bf215546Sopenharmony_cistatic void pvr_perform_start_of_render_attachment_clear( 2210bf215546Sopenharmony_ci struct pvr_cmd_buffer *cmd_buffer, 2211bf215546Sopenharmony_ci const struct pvr_framebuffer *framebuffer, 2212bf215546Sopenharmony_ci uint32_t index, 2213bf215546Sopenharmony_ci bool is_depth_stencil, 2214bf215546Sopenharmony_ci uint32_t *index_list_clear_mask) 2215bf215546Sopenharmony_ci{ 2216bf215546Sopenharmony_ci struct pvr_render_pass_info *info = &cmd_buffer->state.render_pass_info; 2217bf215546Sopenharmony_ci const struct pvr_render_pass *pass = info->pass; 2218bf215546Sopenharmony_ci const struct pvr_renderpass_hwsetup_render *hw_render; 2219bf215546Sopenharmony_ci const struct pvr_renderpass_hwsetup *hw_setup; 2220bf215546Sopenharmony_ci struct pvr_image_view *iview; 2221bf215546Sopenharmony_ci uint32_t view_idx; 2222bf215546Sopenharmony_ci uint32_t height; 2223bf215546Sopenharmony_ci uint32_t width; 2224bf215546Sopenharmony_ci 2225bf215546Sopenharmony_ci hw_setup = pass->hw_setup; 2226bf215546Sopenharmony_ci hw_render = 2227bf215546Sopenharmony_ci &hw_setup->renders[hw_setup->subpass_map[info->subpass_idx].render]; 2228bf215546Sopenharmony_ci 2229bf215546Sopenharmony_ci if (is_depth_stencil) { 2230bf215546Sopenharmony_ci bool stencil_clear; 2231bf215546Sopenharmony_ci bool depth_clear; 2232bf215546Sopenharmony_ci bool is_stencil; 2233bf215546Sopenharmony_ci bool is_depth; 2234bf215546Sopenharmony_ci 2235bf215546Sopenharmony_ci assert(hw_render->ds_surface_id != -1); 2236bf215546Sopenharmony_ci assert(index == 0); 2237bf215546Sopenharmony_ci 2238bf215546Sopenharmony_ci view_idx = hw_render->ds_surface_id; 2239bf215546Sopenharmony_ci 2240bf215546Sopenharmony_ci is_depth = vk_format_has_depth(pass->attachments[view_idx].vk_format); 2241bf215546Sopenharmony_ci is_stencil = vk_format_has_stencil(pass->attachments[view_idx].vk_format); 2242bf215546Sopenharmony_ci depth_clear = hw_render->depth_init == RENDERPASS_SURFACE_INITOP_CLEAR; 2243bf215546Sopenharmony_ci stencil_clear = hw_render->stencil_init == 2244bf215546Sopenharmony_ci RENDERPASS_SURFACE_INITOP_CLEAR; 2245bf215546Sopenharmony_ci 2246bf215546Sopenharmony_ci /* Attempt to clear the ds attachment. Do not erroneously discard an 2247bf215546Sopenharmony_ci * attachment that has no depth clear but has a stencil attachment. 2248bf215546Sopenharmony_ci */ 2249bf215546Sopenharmony_ci /* if not (a ∧ c) ∨ (b ∧ d) */ 2250bf215546Sopenharmony_ci if (!((is_depth && depth_clear) || (is_stencil && stencil_clear))) 2251bf215546Sopenharmony_ci return; 2252bf215546Sopenharmony_ci } else if (hw_render->color_init[index].op != 2253bf215546Sopenharmony_ci RENDERPASS_SURFACE_INITOP_CLEAR) { 2254bf215546Sopenharmony_ci return; 2255bf215546Sopenharmony_ci } else { 2256bf215546Sopenharmony_ci view_idx = hw_render->color_init[index].driver_id; 2257bf215546Sopenharmony_ci } 2258bf215546Sopenharmony_ci 2259bf215546Sopenharmony_ci iview = info->attachments[view_idx]; 2260bf215546Sopenharmony_ci width = iview->vk.extent.width; 2261bf215546Sopenharmony_ci height = iview->vk.extent.height; 2262bf215546Sopenharmony_ci 2263bf215546Sopenharmony_ci /* FIXME: It would be nice if this function and pvr_sub_cmd_gfx_job_init() 2264bf215546Sopenharmony_ci * were doing the same check (even if it's just an assert) to determine if a 2265bf215546Sopenharmony_ci * clear is needed. 2266bf215546Sopenharmony_ci */ 2267bf215546Sopenharmony_ci /* If this is single-layer fullscreen, we already do the clears in 2268bf215546Sopenharmony_ci * pvr_sub_cmd_gfx_job_init(). 2269bf215546Sopenharmony_ci */ 2270bf215546Sopenharmony_ci if (info->render_area.offset.x == 0 && info->render_area.offset.y == 0 && 2271bf215546Sopenharmony_ci info->render_area.extent.width == width && 2272bf215546Sopenharmony_ci info->render_area.extent.height == height && framebuffer->layers == 1) { 2273bf215546Sopenharmony_ci return; 2274bf215546Sopenharmony_ci } 2275bf215546Sopenharmony_ci 2276bf215546Sopenharmony_ci pvr_finishme("Unimplemented path!"); 2277bf215546Sopenharmony_ci} 2278bf215546Sopenharmony_ci 2279bf215546Sopenharmony_cistatic void 2280bf215546Sopenharmony_cipvr_perform_start_of_render_clears(struct pvr_cmd_buffer *cmd_buffer) 2281bf215546Sopenharmony_ci{ 2282bf215546Sopenharmony_ci struct pvr_render_pass_info *info = &cmd_buffer->state.render_pass_info; 2283bf215546Sopenharmony_ci const struct pvr_framebuffer *framebuffer = info->framebuffer; 2284bf215546Sopenharmony_ci const struct pvr_render_pass *pass = info->pass; 2285bf215546Sopenharmony_ci const struct pvr_renderpass_hwsetup *hw_setup = pass->hw_setup; 2286bf215546Sopenharmony_ci const struct pvr_renderpass_hwsetup_render *hw_render; 2287bf215546Sopenharmony_ci 2288bf215546Sopenharmony_ci /* Mask of attachment clears using index lists instead of background object 2289bf215546Sopenharmony_ci * to clear. 2290bf215546Sopenharmony_ci */ 2291bf215546Sopenharmony_ci uint32_t index_list_clear_mask = 0; 2292bf215546Sopenharmony_ci 2293bf215546Sopenharmony_ci hw_render = 2294bf215546Sopenharmony_ci &hw_setup->renders[hw_setup->subpass_map[info->subpass_idx].render]; 2295bf215546Sopenharmony_ci if (!hw_render) { 2296bf215546Sopenharmony_ci info->process_empty_tiles = false; 2297bf215546Sopenharmony_ci info->enable_bg_tag = false; 2298bf215546Sopenharmony_ci return; 2299bf215546Sopenharmony_ci } 2300bf215546Sopenharmony_ci 2301bf215546Sopenharmony_ci for (uint32_t i = 0; i < hw_render->color_init_count; i++) { 2302bf215546Sopenharmony_ci pvr_perform_start_of_render_attachment_clear(cmd_buffer, 2303bf215546Sopenharmony_ci framebuffer, 2304bf215546Sopenharmony_ci i, 2305bf215546Sopenharmony_ci false, 2306bf215546Sopenharmony_ci &index_list_clear_mask); 2307bf215546Sopenharmony_ci } 2308bf215546Sopenharmony_ci 2309bf215546Sopenharmony_ci info->enable_bg_tag = !!hw_render->color_init_count; 2310bf215546Sopenharmony_ci 2311bf215546Sopenharmony_ci /* If we're not using index list for all clears/loads then we need to run 2312bf215546Sopenharmony_ci * the background object on empty tiles. 2313bf215546Sopenharmony_ci */ 2314bf215546Sopenharmony_ci if (hw_render->color_init_count && 2315bf215546Sopenharmony_ci index_list_clear_mask != ((1u << hw_render->color_init_count) - 1u)) { 2316bf215546Sopenharmony_ci info->process_empty_tiles = true; 2317bf215546Sopenharmony_ci } else { 2318bf215546Sopenharmony_ci info->process_empty_tiles = false; 2319bf215546Sopenharmony_ci } 2320bf215546Sopenharmony_ci 2321bf215546Sopenharmony_ci if (hw_render->ds_surface_id != -1) { 2322bf215546Sopenharmony_ci uint32_t ds_index_list = 0; 2323bf215546Sopenharmony_ci 2324bf215546Sopenharmony_ci pvr_perform_start_of_render_attachment_clear(cmd_buffer, 2325bf215546Sopenharmony_ci framebuffer, 2326bf215546Sopenharmony_ci 0, 2327bf215546Sopenharmony_ci true, 2328bf215546Sopenharmony_ci &ds_index_list); 2329bf215546Sopenharmony_ci } 2330bf215546Sopenharmony_ci 2331bf215546Sopenharmony_ci if (index_list_clear_mask) 2332bf215546Sopenharmony_ci pvr_finishme("Add support for generating loadops shaders!"); 2333bf215546Sopenharmony_ci} 2334bf215546Sopenharmony_ci 2335bf215546Sopenharmony_cistatic void pvr_stash_depth_format(struct pvr_cmd_buffer_state *state, 2336bf215546Sopenharmony_ci struct pvr_sub_cmd_gfx *const sub_cmd) 2337bf215546Sopenharmony_ci{ 2338bf215546Sopenharmony_ci const struct pvr_render_pass *pass = state->render_pass_info.pass; 2339bf215546Sopenharmony_ci const struct pvr_renderpass_hwsetup_render *hw_render = 2340bf215546Sopenharmony_ci &pass->hw_setup->renders[sub_cmd->hw_render_idx]; 2341bf215546Sopenharmony_ci 2342bf215546Sopenharmony_ci if (hw_render->ds_surface_id != -1) { 2343bf215546Sopenharmony_ci struct pvr_image_view **iviews = state->render_pass_info.attachments; 2344bf215546Sopenharmony_ci 2345bf215546Sopenharmony_ci state->depth_format = iviews[hw_render->ds_surface_id]->vk.format; 2346bf215546Sopenharmony_ci } 2347bf215546Sopenharmony_ci} 2348bf215546Sopenharmony_ci 2349bf215546Sopenharmony_cistatic bool pvr_loadops_contain_clear(struct pvr_renderpass_hwsetup *hw_setup) 2350bf215546Sopenharmony_ci{ 2351bf215546Sopenharmony_ci for (uint32_t i = 0; i < hw_setup->render_count; i++) { 2352bf215546Sopenharmony_ci struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i]; 2353bf215546Sopenharmony_ci uint32_t render_targets_count = 2354bf215546Sopenharmony_ci hw_render->init_setup.render_targets_count; 2355bf215546Sopenharmony_ci 2356bf215546Sopenharmony_ci for (uint32_t j = 0; 2357bf215546Sopenharmony_ci j < (hw_render->color_init_count * render_targets_count); 2358bf215546Sopenharmony_ci j += render_targets_count) { 2359bf215546Sopenharmony_ci for (uint32_t k = 0; k < hw_render->init_setup.render_targets_count; 2360bf215546Sopenharmony_ci k++) { 2361bf215546Sopenharmony_ci if (hw_render->color_init[j + k].op == 2362bf215546Sopenharmony_ci RENDERPASS_SURFACE_INITOP_CLEAR) { 2363bf215546Sopenharmony_ci return true; 2364bf215546Sopenharmony_ci } 2365bf215546Sopenharmony_ci } 2366bf215546Sopenharmony_ci } 2367bf215546Sopenharmony_ci if (hw_render->depth_init == RENDERPASS_SURFACE_INITOP_CLEAR || 2368bf215546Sopenharmony_ci hw_render->stencil_init == RENDERPASS_SURFACE_INITOP_CLEAR) { 2369bf215546Sopenharmony_ci return true; 2370bf215546Sopenharmony_ci } 2371bf215546Sopenharmony_ci } 2372bf215546Sopenharmony_ci 2373bf215546Sopenharmony_ci return false; 2374bf215546Sopenharmony_ci} 2375bf215546Sopenharmony_ci 2376bf215546Sopenharmony_cistatic VkResult 2377bf215546Sopenharmony_cipvr_cmd_buffer_set_clear_values(struct pvr_cmd_buffer *cmd_buffer, 2378bf215546Sopenharmony_ci const VkRenderPassBeginInfo *pRenderPassBegin) 2379bf215546Sopenharmony_ci{ 2380bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *state = &cmd_buffer->state; 2381bf215546Sopenharmony_ci 2382bf215546Sopenharmony_ci /* Free any previously allocated clear values. */ 2383bf215546Sopenharmony_ci vk_free(&cmd_buffer->vk.pool->alloc, state->render_pass_info.clear_values); 2384bf215546Sopenharmony_ci 2385bf215546Sopenharmony_ci if (pRenderPassBegin->clearValueCount) { 2386bf215546Sopenharmony_ci const size_t size = pRenderPassBegin->clearValueCount * 2387bf215546Sopenharmony_ci sizeof(*state->render_pass_info.clear_values); 2388bf215546Sopenharmony_ci 2389bf215546Sopenharmony_ci state->render_pass_info.clear_values = 2390bf215546Sopenharmony_ci vk_zalloc(&cmd_buffer->vk.pool->alloc, 2391bf215546Sopenharmony_ci size, 2392bf215546Sopenharmony_ci 8, 2393bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 2394bf215546Sopenharmony_ci if (!state->render_pass_info.clear_values) { 2395bf215546Sopenharmony_ci state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); 2396bf215546Sopenharmony_ci return state->status; 2397bf215546Sopenharmony_ci } 2398bf215546Sopenharmony_ci 2399bf215546Sopenharmony_ci memcpy(state->render_pass_info.clear_values, 2400bf215546Sopenharmony_ci pRenderPassBegin->pClearValues, 2401bf215546Sopenharmony_ci size); 2402bf215546Sopenharmony_ci } else { 2403bf215546Sopenharmony_ci state->render_pass_info.clear_values = NULL; 2404bf215546Sopenharmony_ci } 2405bf215546Sopenharmony_ci 2406bf215546Sopenharmony_ci state->render_pass_info.clear_value_count = 2407bf215546Sopenharmony_ci pRenderPassBegin->clearValueCount; 2408bf215546Sopenharmony_ci 2409bf215546Sopenharmony_ci return VK_SUCCESS; 2410bf215546Sopenharmony_ci} 2411bf215546Sopenharmony_ci 2412bf215546Sopenharmony_civoid pvr_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, 2413bf215546Sopenharmony_ci const VkRenderPassBeginInfo *pRenderPassBeginInfo, 2414bf215546Sopenharmony_ci const VkSubpassBeginInfo *pSubpassBeginInfo) 2415bf215546Sopenharmony_ci{ 2416bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_framebuffer, 2417bf215546Sopenharmony_ci framebuffer, 2418bf215546Sopenharmony_ci pRenderPassBeginInfo->framebuffer); 2419bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_render_pass, pass, pRenderPassBeginInfo->renderPass); 2420bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 2421bf215546Sopenharmony_ci const struct pvr_renderpass_hwsetup_subpass *hw_subpass; 2422bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *state = &cmd_buffer->state; 2423bf215546Sopenharmony_ci VkResult result; 2424bf215546Sopenharmony_ci 2425bf215546Sopenharmony_ci PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); 2426bf215546Sopenharmony_ci 2427bf215546Sopenharmony_ci assert(!state->render_pass_info.pass); 2428bf215546Sopenharmony_ci assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); 2429bf215546Sopenharmony_ci 2430bf215546Sopenharmony_ci /* FIXME: Create a separate function for everything using pass->subpasses, 2431bf215546Sopenharmony_ci * look at cmd_buffer_begin_subpass() for example. */ 2432bf215546Sopenharmony_ci state->render_pass_info.pass = pass; 2433bf215546Sopenharmony_ci state->render_pass_info.framebuffer = framebuffer; 2434bf215546Sopenharmony_ci state->render_pass_info.subpass_idx = 0; 2435bf215546Sopenharmony_ci state->render_pass_info.render_area = pRenderPassBeginInfo->renderArea; 2436bf215546Sopenharmony_ci state->render_pass_info.current_hw_subpass = 0; 2437bf215546Sopenharmony_ci state->render_pass_info.pipeline_bind_point = 2438bf215546Sopenharmony_ci pass->subpasses[0].pipeline_bind_point; 2439bf215546Sopenharmony_ci state->render_pass_info.userpass_spawn = pass->subpasses[0].userpass_spawn; 2440bf215546Sopenharmony_ci state->dirty.userpass_spawn = true; 2441bf215546Sopenharmony_ci 2442bf215546Sopenharmony_ci result = pvr_cmd_buffer_setup_attachments(cmd_buffer, pass, framebuffer); 2443bf215546Sopenharmony_ci if (result != VK_SUCCESS) 2444bf215546Sopenharmony_ci return; 2445bf215546Sopenharmony_ci 2446bf215546Sopenharmony_ci state->status = 2447bf215546Sopenharmony_ci pvr_init_render_targets(cmd_buffer->device, pass, framebuffer); 2448bf215546Sopenharmony_ci if (state->status != VK_SUCCESS) 2449bf215546Sopenharmony_ci return; 2450bf215546Sopenharmony_ci 2451bf215546Sopenharmony_ci result = pvr_cmd_buffer_set_clear_values(cmd_buffer, pRenderPassBeginInfo); 2452bf215546Sopenharmony_ci if (result != VK_SUCCESS) 2453bf215546Sopenharmony_ci return; 2454bf215546Sopenharmony_ci 2455bf215546Sopenharmony_ci assert(pass->subpasses[0].pipeline_bind_point == 2456bf215546Sopenharmony_ci VK_PIPELINE_BIND_POINT_GRAPHICS); 2457bf215546Sopenharmony_ci 2458bf215546Sopenharmony_ci result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS); 2459bf215546Sopenharmony_ci if (result != VK_SUCCESS) 2460bf215546Sopenharmony_ci return; 2461bf215546Sopenharmony_ci 2462bf215546Sopenharmony_ci /* Run subpass 0 "soft" background object after the actual background 2463bf215546Sopenharmony_ci * object. 2464bf215546Sopenharmony_ci */ 2465bf215546Sopenharmony_ci hw_subpass = pvr_get_hw_subpass(pass, 0); 2466bf215546Sopenharmony_ci if (hw_subpass->client_data) 2467bf215546Sopenharmony_ci pvr_finishme("Unimplemented path!"); 2468bf215546Sopenharmony_ci 2469bf215546Sopenharmony_ci pvr_perform_start_of_render_clears(cmd_buffer); 2470bf215546Sopenharmony_ci pvr_stash_depth_format(&cmd_buffer->state, 2471bf215546Sopenharmony_ci &cmd_buffer->state.current_sub_cmd->gfx); 2472bf215546Sopenharmony_ci 2473bf215546Sopenharmony_ci if (!pvr_loadops_contain_clear(pass->hw_setup)) { 2474bf215546Sopenharmony_ci state->dynamic.scissor_accum_state = PVR_SCISSOR_ACCUM_CHECK_FOR_CLEAR; 2475bf215546Sopenharmony_ci state->dynamic.scissor_accum_bounds.offset.x = 0; 2476bf215546Sopenharmony_ci state->dynamic.scissor_accum_bounds.offset.y = 0; 2477bf215546Sopenharmony_ci state->dynamic.scissor_accum_bounds.extent.width = 0; 2478bf215546Sopenharmony_ci state->dynamic.scissor_accum_bounds.extent.height = 0; 2479bf215546Sopenharmony_ci } else { 2480bf215546Sopenharmony_ci state->dynamic.scissor_accum_state = PVR_SCISSOR_ACCUM_DISABLED; 2481bf215546Sopenharmony_ci } 2482bf215546Sopenharmony_ci} 2483bf215546Sopenharmony_ci 2484bf215546Sopenharmony_cistatic void pvr_cmd_buffer_reset(struct pvr_cmd_buffer *cmd_buffer) 2485bf215546Sopenharmony_ci{ 2486bf215546Sopenharmony_ci if (cmd_buffer->status != PVR_CMD_BUFFER_STATUS_INITIAL) { 2487bf215546Sopenharmony_ci /* FIXME: For now we always free all resources as if 2488bf215546Sopenharmony_ci * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT was set. 2489bf215546Sopenharmony_ci */ 2490bf215546Sopenharmony_ci pvr_cmd_buffer_free_sub_cmds(cmd_buffer); 2491bf215546Sopenharmony_ci 2492bf215546Sopenharmony_ci list_for_each_entry_safe (struct pvr_bo, bo, &cmd_buffer->bo_list, link) { 2493bf215546Sopenharmony_ci list_del(&bo->link); 2494bf215546Sopenharmony_ci pvr_bo_free(cmd_buffer->device, bo); 2495bf215546Sopenharmony_ci } 2496bf215546Sopenharmony_ci 2497bf215546Sopenharmony_ci util_dynarray_clear(&cmd_buffer->scissor_array); 2498bf215546Sopenharmony_ci util_dynarray_clear(&cmd_buffer->depth_bias_array); 2499bf215546Sopenharmony_ci 2500bf215546Sopenharmony_ci cmd_buffer->state.status = VK_SUCCESS; 2501bf215546Sopenharmony_ci cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INITIAL; 2502bf215546Sopenharmony_ci } 2503bf215546Sopenharmony_ci} 2504bf215546Sopenharmony_ci 2505bf215546Sopenharmony_ciVkResult pvr_BeginCommandBuffer(VkCommandBuffer commandBuffer, 2506bf215546Sopenharmony_ci const VkCommandBufferBeginInfo *pBeginInfo) 2507bf215546Sopenharmony_ci{ 2508bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 2509bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *state; 2510bf215546Sopenharmony_ci VkResult result; 2511bf215546Sopenharmony_ci 2512bf215546Sopenharmony_ci pvr_cmd_buffer_reset(cmd_buffer); 2513bf215546Sopenharmony_ci 2514bf215546Sopenharmony_ci cmd_buffer->usage_flags = pBeginInfo->flags; 2515bf215546Sopenharmony_ci state = &cmd_buffer->state; 2516bf215546Sopenharmony_ci 2517bf215546Sopenharmony_ci /* VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT must be ignored for 2518bf215546Sopenharmony_ci * primary level command buffers. 2519bf215546Sopenharmony_ci * 2520bf215546Sopenharmony_ci * From the Vulkan 1.0 spec: 2521bf215546Sopenharmony_ci * 2522bf215546Sopenharmony_ci * VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT specifies that a 2523bf215546Sopenharmony_ci * secondary command buffer is considered to be entirely inside a render 2524bf215546Sopenharmony_ci * pass. If this is a primary command buffer, then this bit is ignored. 2525bf215546Sopenharmony_ci */ 2526bf215546Sopenharmony_ci if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { 2527bf215546Sopenharmony_ci cmd_buffer->usage_flags &= 2528bf215546Sopenharmony_ci ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; 2529bf215546Sopenharmony_ci } 2530bf215546Sopenharmony_ci 2531bf215546Sopenharmony_ci if (cmd_buffer->usage_flags & 2532bf215546Sopenharmony_ci VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { 2533bf215546Sopenharmony_ci const VkCommandBufferInheritanceInfo *inheritance_info = 2534bf215546Sopenharmony_ci pBeginInfo->pInheritanceInfo; 2535bf215546Sopenharmony_ci struct pvr_render_pass *pass; 2536bf215546Sopenharmony_ci 2537bf215546Sopenharmony_ci pass = pvr_render_pass_from_handle(inheritance_info->renderPass); 2538bf215546Sopenharmony_ci state->render_pass_info.pass = pass; 2539bf215546Sopenharmony_ci state->render_pass_info.framebuffer = 2540bf215546Sopenharmony_ci pvr_framebuffer_from_handle(inheritance_info->framebuffer); 2541bf215546Sopenharmony_ci state->render_pass_info.subpass_idx = inheritance_info->subpass; 2542bf215546Sopenharmony_ci state->render_pass_info.userpass_spawn = 2543bf215546Sopenharmony_ci pass->subpasses[inheritance_info->subpass].userpass_spawn; 2544bf215546Sopenharmony_ci 2545bf215546Sopenharmony_ci result = 2546bf215546Sopenharmony_ci pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS); 2547bf215546Sopenharmony_ci if (result != VK_SUCCESS) 2548bf215546Sopenharmony_ci return result; 2549bf215546Sopenharmony_ci } 2550bf215546Sopenharmony_ci 2551bf215546Sopenharmony_ci memset(state->barriers_needed, 2552bf215546Sopenharmony_ci 0xFF, 2553bf215546Sopenharmony_ci sizeof(*state->barriers_needed) * ARRAY_SIZE(state->barriers_needed)); 2554bf215546Sopenharmony_ci 2555bf215546Sopenharmony_ci cmd_buffer->status = PVR_CMD_BUFFER_STATUS_RECORDING; 2556bf215546Sopenharmony_ci 2557bf215546Sopenharmony_ci return VK_SUCCESS; 2558bf215546Sopenharmony_ci} 2559bf215546Sopenharmony_ci 2560bf215546Sopenharmony_ciVkResult pvr_cmd_buffer_add_transfer_cmd(struct pvr_cmd_buffer *cmd_buffer, 2561bf215546Sopenharmony_ci struct pvr_transfer_cmd *transfer_cmd) 2562bf215546Sopenharmony_ci{ 2563bf215546Sopenharmony_ci struct pvr_sub_cmd_transfer *sub_cmd; 2564bf215546Sopenharmony_ci VkResult result; 2565bf215546Sopenharmony_ci 2566bf215546Sopenharmony_ci result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_TRANSFER); 2567bf215546Sopenharmony_ci if (result != VK_SUCCESS) 2568bf215546Sopenharmony_ci return result; 2569bf215546Sopenharmony_ci 2570bf215546Sopenharmony_ci sub_cmd = &cmd_buffer->state.current_sub_cmd->transfer; 2571bf215546Sopenharmony_ci 2572bf215546Sopenharmony_ci list_addtail(&transfer_cmd->link, &sub_cmd->transfer_cmds); 2573bf215546Sopenharmony_ci 2574bf215546Sopenharmony_ci return VK_SUCCESS; 2575bf215546Sopenharmony_ci} 2576bf215546Sopenharmony_ci 2577bf215546Sopenharmony_cistatic void 2578bf215546Sopenharmony_cipvr_validate_push_descriptors(struct pvr_cmd_buffer *cmd_buffer, 2579bf215546Sopenharmony_ci bool *const push_descriptors_dirty_out) 2580bf215546Sopenharmony_ci{ 2581bf215546Sopenharmony_ci /* TODO: Implement this function, based on ValidatePushDescriptors. */ 2582bf215546Sopenharmony_ci pvr_finishme("Add support for push descriptors!"); 2583bf215546Sopenharmony_ci *push_descriptors_dirty_out = false; 2584bf215546Sopenharmony_ci} 2585bf215546Sopenharmony_ci 2586bf215546Sopenharmony_ci#define PVR_WRITE(_buffer, _value, _offset, _max) \ 2587bf215546Sopenharmony_ci do { \ 2588bf215546Sopenharmony_ci __typeof__(_value) __value = _value; \ 2589bf215546Sopenharmony_ci uint64_t __offset = _offset; \ 2590bf215546Sopenharmony_ci uint32_t __nr_dwords = sizeof(__value) / sizeof(uint32_t); \ 2591bf215546Sopenharmony_ci static_assert(__same_type(*_buffer, __value), \ 2592bf215546Sopenharmony_ci "Buffer and value type mismatch"); \ 2593bf215546Sopenharmony_ci assert((__offset + __nr_dwords) <= (_max)); \ 2594bf215546Sopenharmony_ci assert((__offset % __nr_dwords) == 0U); \ 2595bf215546Sopenharmony_ci _buffer[__offset / __nr_dwords] = __value; \ 2596bf215546Sopenharmony_ci } while (0) 2597bf215546Sopenharmony_ci 2598bf215546Sopenharmony_cistatic VkResult 2599bf215546Sopenharmony_cipvr_setup_vertex_buffers(struct pvr_cmd_buffer *cmd_buffer, 2600bf215546Sopenharmony_ci const struct pvr_graphics_pipeline *const gfx_pipeline) 2601bf215546Sopenharmony_ci{ 2602bf215546Sopenharmony_ci const struct pvr_vertex_shader_state *const vertex_state = 2603bf215546Sopenharmony_ci &gfx_pipeline->vertex_shader_state; 2604bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 2605bf215546Sopenharmony_ci const struct pvr_pds_info *const pds_info = state->pds_shader.info; 2606bf215546Sopenharmony_ci const uint8_t *entries; 2607bf215546Sopenharmony_ci uint32_t *dword_buffer; 2608bf215546Sopenharmony_ci uint64_t *qword_buffer; 2609bf215546Sopenharmony_ci struct pvr_bo *pvr_bo; 2610bf215546Sopenharmony_ci VkResult result; 2611bf215546Sopenharmony_ci 2612bf215546Sopenharmony_ci result = pvr_cmd_buffer_alloc_mem(cmd_buffer, 2613bf215546Sopenharmony_ci cmd_buffer->device->heaps.pds_heap, 2614bf215546Sopenharmony_ci pds_info->data_size_in_dwords, 2615bf215546Sopenharmony_ci PVR_BO_ALLOC_FLAG_CPU_MAPPED, 2616bf215546Sopenharmony_ci &pvr_bo); 2617bf215546Sopenharmony_ci if (result != VK_SUCCESS) 2618bf215546Sopenharmony_ci return result; 2619bf215546Sopenharmony_ci 2620bf215546Sopenharmony_ci dword_buffer = (uint32_t *)pvr_bo->bo->map; 2621bf215546Sopenharmony_ci qword_buffer = (uint64_t *)pvr_bo->bo->map; 2622bf215546Sopenharmony_ci 2623bf215546Sopenharmony_ci entries = (uint8_t *)pds_info->entries; 2624bf215546Sopenharmony_ci 2625bf215546Sopenharmony_ci for (uint32_t i = 0; i < pds_info->entry_count; i++) { 2626bf215546Sopenharmony_ci const struct pvr_const_map_entry *const entry_header = 2627bf215546Sopenharmony_ci (struct pvr_const_map_entry *)entries; 2628bf215546Sopenharmony_ci 2629bf215546Sopenharmony_ci switch (entry_header->type) { 2630bf215546Sopenharmony_ci case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: { 2631bf215546Sopenharmony_ci const struct pvr_const_map_entry_literal32 *const literal = 2632bf215546Sopenharmony_ci (struct pvr_const_map_entry_literal32 *)entries; 2633bf215546Sopenharmony_ci 2634bf215546Sopenharmony_ci PVR_WRITE(dword_buffer, 2635bf215546Sopenharmony_ci literal->literal_value, 2636bf215546Sopenharmony_ci literal->const_offset, 2637bf215546Sopenharmony_ci pds_info->data_size_in_dwords); 2638bf215546Sopenharmony_ci 2639bf215546Sopenharmony_ci entries += sizeof(*literal); 2640bf215546Sopenharmony_ci break; 2641bf215546Sopenharmony_ci } 2642bf215546Sopenharmony_ci 2643bf215546Sopenharmony_ci case PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS: { 2644bf215546Sopenharmony_ci const struct pvr_const_map_entry_doutu_address *const doutu_addr = 2645bf215546Sopenharmony_ci (struct pvr_const_map_entry_doutu_address *)entries; 2646bf215546Sopenharmony_ci const pvr_dev_addr_t exec_addr = 2647bf215546Sopenharmony_ci PVR_DEV_ADDR_OFFSET(vertex_state->bo->vma->dev_addr, 2648bf215546Sopenharmony_ci vertex_state->entry_offset); 2649bf215546Sopenharmony_ci uint64_t addr = 0ULL; 2650bf215546Sopenharmony_ci 2651bf215546Sopenharmony_ci pvr_set_usc_execution_address64(&addr, exec_addr.addr); 2652bf215546Sopenharmony_ci 2653bf215546Sopenharmony_ci PVR_WRITE(qword_buffer, 2654bf215546Sopenharmony_ci addr | doutu_addr->doutu_control, 2655bf215546Sopenharmony_ci doutu_addr->const_offset, 2656bf215546Sopenharmony_ci pds_info->data_size_in_dwords); 2657bf215546Sopenharmony_ci 2658bf215546Sopenharmony_ci entries += sizeof(*doutu_addr); 2659bf215546Sopenharmony_ci break; 2660bf215546Sopenharmony_ci } 2661bf215546Sopenharmony_ci 2662bf215546Sopenharmony_ci case PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE: { 2663bf215546Sopenharmony_ci const struct pvr_const_map_entry_base_instance *const base_instance = 2664bf215546Sopenharmony_ci (struct pvr_const_map_entry_base_instance *)entries; 2665bf215546Sopenharmony_ci 2666bf215546Sopenharmony_ci PVR_WRITE(dword_buffer, 2667bf215546Sopenharmony_ci state->draw_state.base_instance, 2668bf215546Sopenharmony_ci base_instance->const_offset, 2669bf215546Sopenharmony_ci pds_info->data_size_in_dwords); 2670bf215546Sopenharmony_ci 2671bf215546Sopenharmony_ci entries += sizeof(*base_instance); 2672bf215546Sopenharmony_ci break; 2673bf215546Sopenharmony_ci } 2674bf215546Sopenharmony_ci 2675bf215546Sopenharmony_ci case PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS: { 2676bf215546Sopenharmony_ci const struct pvr_const_map_entry_vertex_attribute_address 2677bf215546Sopenharmony_ci *const attribute = 2678bf215546Sopenharmony_ci (struct pvr_const_map_entry_vertex_attribute_address *)entries; 2679bf215546Sopenharmony_ci const struct pvr_vertex_binding *const binding = 2680bf215546Sopenharmony_ci &state->vertex_bindings[attribute->binding_index]; 2681bf215546Sopenharmony_ci const pvr_dev_addr_t addr = 2682bf215546Sopenharmony_ci PVR_DEV_ADDR_OFFSET(binding->buffer->dev_addr, 2683bf215546Sopenharmony_ci binding->offset + attribute->offset); 2684bf215546Sopenharmony_ci 2685bf215546Sopenharmony_ci PVR_WRITE(qword_buffer, 2686bf215546Sopenharmony_ci addr.addr, 2687bf215546Sopenharmony_ci attribute->const_offset, 2688bf215546Sopenharmony_ci pds_info->data_size_in_dwords); 2689bf215546Sopenharmony_ci 2690bf215546Sopenharmony_ci entries += sizeof(*attribute); 2691bf215546Sopenharmony_ci break; 2692bf215546Sopenharmony_ci } 2693bf215546Sopenharmony_ci 2694bf215546Sopenharmony_ci default: 2695bf215546Sopenharmony_ci unreachable("Unsupported data section map"); 2696bf215546Sopenharmony_ci break; 2697bf215546Sopenharmony_ci } 2698bf215546Sopenharmony_ci } 2699bf215546Sopenharmony_ci 2700bf215546Sopenharmony_ci state->pds_vertex_attrib_offset = 2701bf215546Sopenharmony_ci pvr_bo->vma->dev_addr.addr - 2702bf215546Sopenharmony_ci cmd_buffer->device->heaps.pds_heap->base_addr.addr; 2703bf215546Sopenharmony_ci 2704bf215546Sopenharmony_ci pvr_bo_cpu_unmap(cmd_buffer->device, pvr_bo); 2705bf215546Sopenharmony_ci 2706bf215546Sopenharmony_ci return VK_SUCCESS; 2707bf215546Sopenharmony_ci} 2708bf215546Sopenharmony_ci 2709bf215546Sopenharmony_cistatic VkResult pvr_setup_descriptor_mappings( 2710bf215546Sopenharmony_ci struct pvr_cmd_buffer *const cmd_buffer, 2711bf215546Sopenharmony_ci enum pvr_stage_allocation stage, 2712bf215546Sopenharmony_ci const struct pvr_stage_allocation_descriptor_state *descriptor_state, 2713bf215546Sopenharmony_ci UNUSED const pvr_dev_addr_t *const num_worgroups_buff_addr, 2714bf215546Sopenharmony_ci uint32_t *const descriptor_data_offset_out) 2715bf215546Sopenharmony_ci{ 2716bf215546Sopenharmony_ci const struct pvr_pds_info *const pds_info = &descriptor_state->pds_info; 2717bf215546Sopenharmony_ci const struct pvr_descriptor_state *desc_state; 2718bf215546Sopenharmony_ci const uint8_t *entries; 2719bf215546Sopenharmony_ci uint32_t *dword_buffer; 2720bf215546Sopenharmony_ci uint64_t *qword_buffer; 2721bf215546Sopenharmony_ci struct pvr_bo *pvr_bo; 2722bf215546Sopenharmony_ci VkResult result; 2723bf215546Sopenharmony_ci 2724bf215546Sopenharmony_ci pvr_finishme("Handle num_worgroups_buff_addr"); 2725bf215546Sopenharmony_ci 2726bf215546Sopenharmony_ci if (!pds_info->data_size_in_dwords) 2727bf215546Sopenharmony_ci return VK_SUCCESS; 2728bf215546Sopenharmony_ci 2729bf215546Sopenharmony_ci result = pvr_cmd_buffer_alloc_mem(cmd_buffer, 2730bf215546Sopenharmony_ci cmd_buffer->device->heaps.pds_heap, 2731bf215546Sopenharmony_ci pds_info->data_size_in_dwords, 2732bf215546Sopenharmony_ci PVR_BO_ALLOC_FLAG_CPU_MAPPED, 2733bf215546Sopenharmony_ci &pvr_bo); 2734bf215546Sopenharmony_ci if (result != VK_SUCCESS) 2735bf215546Sopenharmony_ci return result; 2736bf215546Sopenharmony_ci 2737bf215546Sopenharmony_ci dword_buffer = (uint32_t *)pvr_bo->bo->map; 2738bf215546Sopenharmony_ci qword_buffer = (uint64_t *)pvr_bo->bo->map; 2739bf215546Sopenharmony_ci 2740bf215546Sopenharmony_ci entries = (uint8_t *)pds_info->entries; 2741bf215546Sopenharmony_ci 2742bf215546Sopenharmony_ci switch (stage) { 2743bf215546Sopenharmony_ci case PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY: 2744bf215546Sopenharmony_ci case PVR_STAGE_ALLOCATION_FRAGMENT: 2745bf215546Sopenharmony_ci desc_state = &cmd_buffer->state.gfx_desc_state; 2746bf215546Sopenharmony_ci break; 2747bf215546Sopenharmony_ci 2748bf215546Sopenharmony_ci case PVR_STAGE_ALLOCATION_COMPUTE: 2749bf215546Sopenharmony_ci desc_state = &cmd_buffer->state.compute_desc_state; 2750bf215546Sopenharmony_ci break; 2751bf215546Sopenharmony_ci 2752bf215546Sopenharmony_ci default: 2753bf215546Sopenharmony_ci unreachable("Unsupported stage."); 2754bf215546Sopenharmony_ci break; 2755bf215546Sopenharmony_ci } 2756bf215546Sopenharmony_ci 2757bf215546Sopenharmony_ci for (uint32_t i = 0; i < pds_info->entry_count; i++) { 2758bf215546Sopenharmony_ci const struct pvr_const_map_entry *const entry_header = 2759bf215546Sopenharmony_ci (struct pvr_const_map_entry *)entries; 2760bf215546Sopenharmony_ci 2761bf215546Sopenharmony_ci /* TODO: See if instead of reusing the blend constant buffer type entry, 2762bf215546Sopenharmony_ci * we can setup a new buffer type specifically for num_workgroups or other 2763bf215546Sopenharmony_ci * built-in variables. The mappings are setup at pipeline creation when 2764bf215546Sopenharmony_ci * creating the descriptor program. 2765bf215546Sopenharmony_ci */ 2766bf215546Sopenharmony_ci pvr_finishme("Handle blend constant reuse for compute."); 2767bf215546Sopenharmony_ci 2768bf215546Sopenharmony_ci switch (entry_header->type) { 2769bf215546Sopenharmony_ci case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: { 2770bf215546Sopenharmony_ci const struct pvr_const_map_entry_literal32 *const literal = 2771bf215546Sopenharmony_ci (struct pvr_const_map_entry_literal32 *)entries; 2772bf215546Sopenharmony_ci 2773bf215546Sopenharmony_ci PVR_WRITE(dword_buffer, 2774bf215546Sopenharmony_ci literal->literal_value, 2775bf215546Sopenharmony_ci literal->const_offset, 2776bf215546Sopenharmony_ci pds_info->data_size_in_dwords); 2777bf215546Sopenharmony_ci 2778bf215546Sopenharmony_ci entries += sizeof(*literal); 2779bf215546Sopenharmony_ci break; 2780bf215546Sopenharmony_ci } 2781bf215546Sopenharmony_ci 2782bf215546Sopenharmony_ci case PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER: { 2783bf215546Sopenharmony_ci const struct pvr_const_map_entry_constant_buffer *const_buffer_entry = 2784bf215546Sopenharmony_ci (struct pvr_const_map_entry_constant_buffer *)entries; 2785bf215546Sopenharmony_ci const uint32_t desc_set = const_buffer_entry->desc_set; 2786bf215546Sopenharmony_ci const uint32_t binding = const_buffer_entry->binding; 2787bf215546Sopenharmony_ci const struct pvr_descriptor_set *descriptor_set; 2788bf215546Sopenharmony_ci const struct pvr_descriptor *descriptor; 2789bf215546Sopenharmony_ci pvr_dev_addr_t buffer_addr; 2790bf215546Sopenharmony_ci 2791bf215546Sopenharmony_ci /* TODO: Handle push descriptors. */ 2792bf215546Sopenharmony_ci 2793bf215546Sopenharmony_ci assert(desc_set < PVR_MAX_DESCRIPTOR_SETS); 2794bf215546Sopenharmony_ci descriptor_set = desc_state->descriptor_sets[desc_set]; 2795bf215546Sopenharmony_ci 2796bf215546Sopenharmony_ci /* TODO: Handle dynamic buffers. */ 2797bf215546Sopenharmony_ci descriptor = &descriptor_set->descriptors[binding]; 2798bf215546Sopenharmony_ci assert(descriptor->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); 2799bf215546Sopenharmony_ci 2800bf215546Sopenharmony_ci assert(descriptor->buffer_desc_range == 2801bf215546Sopenharmony_ci const_buffer_entry->size_in_dwords * sizeof(uint32_t)); 2802bf215546Sopenharmony_ci assert(descriptor->buffer_create_info_size == 2803bf215546Sopenharmony_ci const_buffer_entry->size_in_dwords * sizeof(uint32_t)); 2804bf215546Sopenharmony_ci 2805bf215546Sopenharmony_ci buffer_addr = 2806bf215546Sopenharmony_ci PVR_DEV_ADDR_OFFSET(descriptor->buffer_dev_addr, 2807bf215546Sopenharmony_ci const_buffer_entry->offset * sizeof(uint32_t)); 2808bf215546Sopenharmony_ci 2809bf215546Sopenharmony_ci PVR_WRITE(qword_buffer, 2810bf215546Sopenharmony_ci buffer_addr.addr, 2811bf215546Sopenharmony_ci const_buffer_entry->const_offset, 2812bf215546Sopenharmony_ci pds_info->data_size_in_dwords); 2813bf215546Sopenharmony_ci 2814bf215546Sopenharmony_ci entries += sizeof(*const_buffer_entry); 2815bf215546Sopenharmony_ci break; 2816bf215546Sopenharmony_ci } 2817bf215546Sopenharmony_ci 2818bf215546Sopenharmony_ci case PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET: { 2819bf215546Sopenharmony_ci const struct pvr_const_map_entry_descriptor_set *desc_set_entry = 2820bf215546Sopenharmony_ci (struct pvr_const_map_entry_descriptor_set *)entries; 2821bf215546Sopenharmony_ci const uint32_t desc_set_num = desc_set_entry->descriptor_set; 2822bf215546Sopenharmony_ci const struct pvr_descriptor_set *descriptor_set; 2823bf215546Sopenharmony_ci pvr_dev_addr_t desc_set_addr; 2824bf215546Sopenharmony_ci 2825bf215546Sopenharmony_ci assert(desc_set_num < PVR_MAX_DESCRIPTOR_SETS); 2826bf215546Sopenharmony_ci 2827bf215546Sopenharmony_ci /* TODO: Remove this when the compiler provides us with usage info? 2828bf215546Sopenharmony_ci */ 2829bf215546Sopenharmony_ci /* We skip DMAing unbound descriptor sets. */ 2830bf215546Sopenharmony_ci if (!(desc_state->valid_mask & BITFIELD_BIT(desc_set_num))) { 2831bf215546Sopenharmony_ci const struct pvr_const_map_entry_literal32 *literal; 2832bf215546Sopenharmony_ci uint32_t zero_literal_value; 2833bf215546Sopenharmony_ci 2834bf215546Sopenharmony_ci entries += sizeof(*desc_set_entry); 2835bf215546Sopenharmony_ci literal = (struct pvr_const_map_entry_literal32 *)entries; 2836bf215546Sopenharmony_ci 2837bf215546Sopenharmony_ci /* TODO: Is there any guarantee that a literal will follow the 2838bf215546Sopenharmony_ci * descriptor set entry? 2839bf215546Sopenharmony_ci */ 2840bf215546Sopenharmony_ci assert(literal->type == PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32); 2841bf215546Sopenharmony_ci 2842bf215546Sopenharmony_ci /* We zero out the DMA size so the DMA isn't performed. */ 2843bf215546Sopenharmony_ci zero_literal_value = 2844bf215546Sopenharmony_ci literal->literal_value & 2845bf215546Sopenharmony_ci PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_CLRMSK; 2846bf215546Sopenharmony_ci 2847bf215546Sopenharmony_ci PVR_WRITE(qword_buffer, 2848bf215546Sopenharmony_ci UINT64_C(0), 2849bf215546Sopenharmony_ci desc_set_entry->const_offset, 2850bf215546Sopenharmony_ci pds_info->data_size_in_dwords); 2851bf215546Sopenharmony_ci 2852bf215546Sopenharmony_ci PVR_WRITE(dword_buffer, 2853bf215546Sopenharmony_ci zero_literal_value, 2854bf215546Sopenharmony_ci desc_set_entry->const_offset, 2855bf215546Sopenharmony_ci pds_info->data_size_in_dwords); 2856bf215546Sopenharmony_ci 2857bf215546Sopenharmony_ci entries += sizeof(*literal); 2858bf215546Sopenharmony_ci i++; 2859bf215546Sopenharmony_ci continue; 2860bf215546Sopenharmony_ci } 2861bf215546Sopenharmony_ci 2862bf215546Sopenharmony_ci descriptor_set = desc_state->descriptor_sets[desc_set_num]; 2863bf215546Sopenharmony_ci 2864bf215546Sopenharmony_ci pvr_finishme("Handle push descriptor entry."); 2865bf215546Sopenharmony_ci 2866bf215546Sopenharmony_ci desc_set_addr = descriptor_set->pvr_bo->vma->dev_addr; 2867bf215546Sopenharmony_ci 2868bf215546Sopenharmony_ci if (desc_set_entry->primary) { 2869bf215546Sopenharmony_ci desc_set_addr = PVR_DEV_ADDR_OFFSET( 2870bf215546Sopenharmony_ci desc_set_addr, 2871bf215546Sopenharmony_ci descriptor_set->layout->memory_layout_in_dwords_per_stage[stage] 2872bf215546Sopenharmony_ci .primary_offset 2873bf215546Sopenharmony_ci << 2U); 2874bf215546Sopenharmony_ci } else { 2875bf215546Sopenharmony_ci desc_set_addr = PVR_DEV_ADDR_OFFSET( 2876bf215546Sopenharmony_ci desc_set_addr, 2877bf215546Sopenharmony_ci descriptor_set->layout->memory_layout_in_dwords_per_stage[stage] 2878bf215546Sopenharmony_ci .secondary_offset 2879bf215546Sopenharmony_ci << 2U); 2880bf215546Sopenharmony_ci } 2881bf215546Sopenharmony_ci 2882bf215546Sopenharmony_ci desc_set_addr = PVR_DEV_ADDR_OFFSET( 2883bf215546Sopenharmony_ci desc_set_addr, 2884bf215546Sopenharmony_ci (uint64_t)desc_set_entry->offset_in_dwords << 2U); 2885bf215546Sopenharmony_ci 2886bf215546Sopenharmony_ci PVR_WRITE(qword_buffer, 2887bf215546Sopenharmony_ci desc_set_addr.addr, 2888bf215546Sopenharmony_ci desc_set_entry->const_offset, 2889bf215546Sopenharmony_ci pds_info->data_size_in_dwords); 2890bf215546Sopenharmony_ci 2891bf215546Sopenharmony_ci entries += sizeof(*desc_set_entry); 2892bf215546Sopenharmony_ci break; 2893bf215546Sopenharmony_ci } 2894bf215546Sopenharmony_ci 2895bf215546Sopenharmony_ci case PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER: { 2896bf215546Sopenharmony_ci const struct pvr_const_map_entry_special_buffer *special_buff_entry = 2897bf215546Sopenharmony_ci (struct pvr_const_map_entry_special_buffer *)entries; 2898bf215546Sopenharmony_ci 2899bf215546Sopenharmony_ci switch (special_buff_entry->buffer_type) { 2900bf215546Sopenharmony_ci case PVR_BUFFER_TYPES_COMPILE_TIME: { 2901bf215546Sopenharmony_ci uint64_t addr = descriptor_state->static_consts->vma->dev_addr.addr; 2902bf215546Sopenharmony_ci 2903bf215546Sopenharmony_ci PVR_WRITE(qword_buffer, 2904bf215546Sopenharmony_ci addr, 2905bf215546Sopenharmony_ci special_buff_entry->const_offset, 2906bf215546Sopenharmony_ci pds_info->data_size_in_dwords); 2907bf215546Sopenharmony_ci break; 2908bf215546Sopenharmony_ci } 2909bf215546Sopenharmony_ci 2910bf215546Sopenharmony_ci default: 2911bf215546Sopenharmony_ci unreachable("Unsupported special buffer type."); 2912bf215546Sopenharmony_ci } 2913bf215546Sopenharmony_ci 2914bf215546Sopenharmony_ci entries += sizeof(*special_buff_entry); 2915bf215546Sopenharmony_ci break; 2916bf215546Sopenharmony_ci } 2917bf215546Sopenharmony_ci 2918bf215546Sopenharmony_ci default: 2919bf215546Sopenharmony_ci unreachable("Unsupported map entry type."); 2920bf215546Sopenharmony_ci } 2921bf215546Sopenharmony_ci } 2922bf215546Sopenharmony_ci 2923bf215546Sopenharmony_ci pvr_bo_cpu_unmap(cmd_buffer->device, pvr_bo); 2924bf215546Sopenharmony_ci 2925bf215546Sopenharmony_ci *descriptor_data_offset_out = 2926bf215546Sopenharmony_ci pvr_bo->vma->dev_addr.addr - 2927bf215546Sopenharmony_ci cmd_buffer->device->heaps.pds_heap->base_addr.addr; 2928bf215546Sopenharmony_ci 2929bf215546Sopenharmony_ci return VK_SUCCESS; 2930bf215546Sopenharmony_ci} 2931bf215546Sopenharmony_ci 2932bf215546Sopenharmony_ci#undef PVR_WRITE 2933bf215546Sopenharmony_ci 2934bf215546Sopenharmony_cistatic void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer, 2935bf215546Sopenharmony_ci struct pvr_sub_cmd_compute *const sub_cmd) 2936bf215546Sopenharmony_ci{ 2937bf215546Sopenharmony_ci const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice; 2938bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *state = &cmd_buffer->state; 2939bf215546Sopenharmony_ci struct pvr_csb *csb = &sub_cmd->control_stream; 2940bf215546Sopenharmony_ci const struct pvr_compute_pipeline *pipeline = state->compute_pipeline; 2941bf215546Sopenharmony_ci const uint32_t const_shared_reg_count = 2942bf215546Sopenharmony_ci pipeline->state.shader.const_shared_reg_count; 2943bf215546Sopenharmony_ci struct pvr_compute_kernel_info info; 2944bf215546Sopenharmony_ci 2945bf215546Sopenharmony_ci /* No shared regs, no need to use an allocation kernel. */ 2946bf215546Sopenharmony_ci if (!const_shared_reg_count) 2947bf215546Sopenharmony_ci return; 2948bf215546Sopenharmony_ci 2949bf215546Sopenharmony_ci info = (struct pvr_compute_kernel_info){ 2950bf215546Sopenharmony_ci .indirect_buffer_addr = PVR_DEV_ADDR_INVALID, 2951bf215546Sopenharmony_ci .sd_type = PVRX(CDMCTRL_SD_TYPE_NONE), 2952bf215546Sopenharmony_ci 2953bf215546Sopenharmony_ci .usc_target = PVRX(CDMCTRL_USC_TARGET_ALL), 2954bf215546Sopenharmony_ci .usc_common_shared = true, 2955bf215546Sopenharmony_ci .usc_common_size = 2956bf215546Sopenharmony_ci DIV_ROUND_UP(const_shared_reg_count, 2957bf215546Sopenharmony_ci PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE)), 2958bf215546Sopenharmony_ci 2959bf215546Sopenharmony_ci .local_size = { 1, 1, 1 }, 2960bf215546Sopenharmony_ci .global_size = { 1, 1, 1 }, 2961bf215546Sopenharmony_ci }; 2962bf215546Sopenharmony_ci 2963bf215546Sopenharmony_ci /* Sometimes we don't have a secondary program if there were no constants to 2964bf215546Sopenharmony_ci * write, but we still need to run a PDS program to accomplish the 2965bf215546Sopenharmony_ci * allocation of the local/common store shared registers so we repurpose the 2966bf215546Sopenharmony_ci * deallocation PDS program. 2967bf215546Sopenharmony_ci */ 2968bf215546Sopenharmony_ci if (pipeline->state.descriptor.pds_info.code_size_in_dwords) { 2969bf215546Sopenharmony_ci uint32_t pds_data_size_in_dwords = 2970bf215546Sopenharmony_ci pipeline->state.descriptor.pds_info.data_size_in_dwords; 2971bf215546Sopenharmony_ci 2972bf215546Sopenharmony_ci info.pds_data_offset = state->pds_compute_descriptor_data_offset; 2973bf215546Sopenharmony_ci info.pds_data_size = 2974bf215546Sopenharmony_ci DIV_ROUND_UP(pds_data_size_in_dwords << 2U, 2975bf215546Sopenharmony_ci PVRX(CDMCTRL_KERNEL0_PDS_DATA_SIZE_UNIT_SIZE)); 2976bf215546Sopenharmony_ci 2977bf215546Sopenharmony_ci /* Check that we have upload the code section. */ 2978bf215546Sopenharmony_ci assert(pipeline->state.descriptor.pds_code.code_size); 2979bf215546Sopenharmony_ci info.pds_code_offset = pipeline->state.descriptor.pds_code.code_offset; 2980bf215546Sopenharmony_ci } else { 2981bf215546Sopenharmony_ci /* FIXME: There should be a deallocation pds program already uploaded 2982bf215546Sopenharmony_ci * that we use at this point. 2983bf215546Sopenharmony_ci */ 2984bf215546Sopenharmony_ci assert(!"Unimplemented"); 2985bf215546Sopenharmony_ci } 2986bf215546Sopenharmony_ci 2987bf215546Sopenharmony_ci /* We don't need to pad the workgroup size. */ 2988bf215546Sopenharmony_ci 2989bf215546Sopenharmony_ci info.max_instances = 2990bf215546Sopenharmony_ci pvr_compute_flat_slot_size(pdevice, const_shared_reg_count, false, 1U); 2991bf215546Sopenharmony_ci 2992bf215546Sopenharmony_ci pvr_compute_generate_control_stream(csb, sub_cmd, &info); 2993bf215546Sopenharmony_ci} 2994bf215546Sopenharmony_ci 2995bf215546Sopenharmony_cistatic uint32_t 2996bf215546Sopenharmony_cipvr_compute_flat_pad_workgroup_size(const struct pvr_physical_device *pdevice, 2997bf215546Sopenharmony_ci uint32_t workgroup_size, 2998bf215546Sopenharmony_ci uint32_t coeff_regs_count) 2999bf215546Sopenharmony_ci{ 3000bf215546Sopenharmony_ci const struct pvr_device_runtime_info *dev_runtime_info = 3001bf215546Sopenharmony_ci &pdevice->dev_runtime_info; 3002bf215546Sopenharmony_ci const struct pvr_device_info *dev_info = &pdevice->dev_info; 3003bf215546Sopenharmony_ci uint32_t max_avail_coeff_regs = 3004bf215546Sopenharmony_ci dev_runtime_info->cdm_max_local_mem_size_regs; 3005bf215546Sopenharmony_ci uint32_t coeff_regs_count_aligned = 3006bf215546Sopenharmony_ci ALIGN_POT(coeff_regs_count, 3007bf215546Sopenharmony_ci PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE) >> 2U); 3008bf215546Sopenharmony_ci 3009bf215546Sopenharmony_ci /* If the work group size is > ROGUE_MAX_INSTANCES_PER_TASK. We now *always* 3010bf215546Sopenharmony_ci * pad the work group size to the next multiple of 3011bf215546Sopenharmony_ci * ROGUE_MAX_INSTANCES_PER_TASK. 3012bf215546Sopenharmony_ci * 3013bf215546Sopenharmony_ci * If we use more than 1/8th of the max coefficient registers then we round 3014bf215546Sopenharmony_ci * work group size up to the next multiple of ROGUE_MAX_INSTANCES_PER_TASK 3015bf215546Sopenharmony_ci */ 3016bf215546Sopenharmony_ci /* TODO: See if this can be optimized. */ 3017bf215546Sopenharmony_ci if (workgroup_size > ROGUE_MAX_INSTANCES_PER_TASK || 3018bf215546Sopenharmony_ci coeff_regs_count_aligned > (max_avail_coeff_regs / 8)) { 3019bf215546Sopenharmony_ci assert(workgroup_size < rogue_get_compute_max_work_group_size(dev_info)); 3020bf215546Sopenharmony_ci 3021bf215546Sopenharmony_ci return ALIGN_POT(workgroup_size, ROGUE_MAX_INSTANCES_PER_TASK); 3022bf215546Sopenharmony_ci } 3023bf215546Sopenharmony_ci 3024bf215546Sopenharmony_ci return workgroup_size; 3025bf215546Sopenharmony_ci} 3026bf215546Sopenharmony_ci 3027bf215546Sopenharmony_ci/* TODO: Wire up the base_workgroup variant program when implementing 3028bf215546Sopenharmony_ci * VK_KHR_device_group. The values will also need patching into the program. 3029bf215546Sopenharmony_ci */ 3030bf215546Sopenharmony_cistatic void pvr_compute_update_kernel( 3031bf215546Sopenharmony_ci struct pvr_cmd_buffer *cmd_buffer, 3032bf215546Sopenharmony_ci struct pvr_sub_cmd_compute *const sub_cmd, 3033bf215546Sopenharmony_ci const uint32_t global_workgroup_size[static const PVR_WORKGROUP_DIMENSIONS]) 3034bf215546Sopenharmony_ci{ 3035bf215546Sopenharmony_ci const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice; 3036bf215546Sopenharmony_ci const struct pvr_device_runtime_info *dev_runtime_info = 3037bf215546Sopenharmony_ci &pdevice->dev_runtime_info; 3038bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *state = &cmd_buffer->state; 3039bf215546Sopenharmony_ci struct pvr_csb *csb = &sub_cmd->control_stream; 3040bf215546Sopenharmony_ci const struct pvr_compute_pipeline *pipeline = state->compute_pipeline; 3041bf215546Sopenharmony_ci const struct pvr_pds_info *program_info = 3042bf215546Sopenharmony_ci &pipeline->state.primary_program_info; 3043bf215546Sopenharmony_ci 3044bf215546Sopenharmony_ci struct pvr_compute_kernel_info info = { 3045bf215546Sopenharmony_ci .indirect_buffer_addr = PVR_DEV_ADDR_INVALID, 3046bf215546Sopenharmony_ci .usc_target = PVRX(CDMCTRL_USC_TARGET_ANY), 3047bf215546Sopenharmony_ci .pds_temp_size = 3048bf215546Sopenharmony_ci DIV_ROUND_UP(program_info->temps_required << 2U, 3049bf215546Sopenharmony_ci PVRX(CDMCTRL_KERNEL0_PDS_TEMP_SIZE_UNIT_SIZE)), 3050bf215546Sopenharmony_ci 3051bf215546Sopenharmony_ci .pds_data_size = 3052bf215546Sopenharmony_ci DIV_ROUND_UP(program_info->data_size_in_dwords << 2U, 3053bf215546Sopenharmony_ci PVRX(CDMCTRL_KERNEL0_PDS_DATA_SIZE_UNIT_SIZE)), 3054bf215546Sopenharmony_ci .pds_data_offset = pipeline->state.primary_program.data_offset, 3055bf215546Sopenharmony_ci .pds_code_offset = pipeline->state.primary_program.code_offset, 3056bf215546Sopenharmony_ci 3057bf215546Sopenharmony_ci .sd_type = PVRX(CDMCTRL_SD_TYPE_USC), 3058bf215546Sopenharmony_ci 3059bf215546Sopenharmony_ci .usc_unified_size = 3060bf215546Sopenharmony_ci DIV_ROUND_UP(pipeline->state.shader.input_register_count << 2U, 3061bf215546Sopenharmony_ci PVRX(CDMCTRL_KERNEL0_USC_UNIFIED_SIZE_UNIT_SIZE)), 3062bf215546Sopenharmony_ci 3063bf215546Sopenharmony_ci /* clang-format off */ 3064bf215546Sopenharmony_ci .global_size = { 3065bf215546Sopenharmony_ci global_workgroup_size[0], 3066bf215546Sopenharmony_ci global_workgroup_size[1], 3067bf215546Sopenharmony_ci global_workgroup_size[2] 3068bf215546Sopenharmony_ci }, 3069bf215546Sopenharmony_ci /* clang-format on */ 3070bf215546Sopenharmony_ci }; 3071bf215546Sopenharmony_ci 3072bf215546Sopenharmony_ci uint32_t work_size = pipeline->state.shader.work_size; 3073bf215546Sopenharmony_ci uint32_t coeff_regs; 3074bf215546Sopenharmony_ci 3075bf215546Sopenharmony_ci if (work_size > ROGUE_MAX_INSTANCES_PER_TASK) { 3076bf215546Sopenharmony_ci /* Enforce a single workgroup per cluster through allocation starvation. 3077bf215546Sopenharmony_ci */ 3078bf215546Sopenharmony_ci coeff_regs = dev_runtime_info->cdm_max_local_mem_size_regs; 3079bf215546Sopenharmony_ci } else { 3080bf215546Sopenharmony_ci coeff_regs = pipeline->state.shader.coefficient_register_count; 3081bf215546Sopenharmony_ci } 3082bf215546Sopenharmony_ci 3083bf215546Sopenharmony_ci info.usc_common_size = 3084bf215546Sopenharmony_ci DIV_ROUND_UP(coeff_regs << 2U, 3085bf215546Sopenharmony_ci PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE)); 3086bf215546Sopenharmony_ci 3087bf215546Sopenharmony_ci /* Use a whole slot per workgroup. */ 3088bf215546Sopenharmony_ci work_size = MAX2(work_size, ROGUE_MAX_INSTANCES_PER_TASK); 3089bf215546Sopenharmony_ci 3090bf215546Sopenharmony_ci coeff_regs += pipeline->state.shader.const_shared_reg_count; 3091bf215546Sopenharmony_ci 3092bf215546Sopenharmony_ci work_size = 3093bf215546Sopenharmony_ci pvr_compute_flat_pad_workgroup_size(pdevice, work_size, coeff_regs); 3094bf215546Sopenharmony_ci 3095bf215546Sopenharmony_ci info.local_size[0] = work_size; 3096bf215546Sopenharmony_ci info.local_size[1] = 1U; 3097bf215546Sopenharmony_ci info.local_size[2] = 1U; 3098bf215546Sopenharmony_ci 3099bf215546Sopenharmony_ci info.max_instances = 3100bf215546Sopenharmony_ci pvr_compute_flat_slot_size(pdevice, coeff_regs, false, work_size); 3101bf215546Sopenharmony_ci 3102bf215546Sopenharmony_ci pvr_compute_generate_control_stream(csb, sub_cmd, &info); 3103bf215546Sopenharmony_ci} 3104bf215546Sopenharmony_ci 3105bf215546Sopenharmony_civoid pvr_CmdDispatch(VkCommandBuffer commandBuffer, 3106bf215546Sopenharmony_ci uint32_t groupCountX, 3107bf215546Sopenharmony_ci uint32_t groupCountY, 3108bf215546Sopenharmony_ci uint32_t groupCountZ) 3109bf215546Sopenharmony_ci{ 3110bf215546Sopenharmony_ci const uint32_t workgroup_size[] = { groupCountX, groupCountY, groupCountZ }; 3111bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 3112bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *state = &cmd_buffer->state; 3113bf215546Sopenharmony_ci const struct pvr_compute_pipeline *compute_pipeline = 3114bf215546Sopenharmony_ci state->compute_pipeline; 3115bf215546Sopenharmony_ci const VkShaderStageFlags push_consts_stage_mask = 3116bf215546Sopenharmony_ci compute_pipeline->base.layout->push_constants_shader_stages; 3117bf215546Sopenharmony_ci bool push_descriptors_dirty; 3118bf215546Sopenharmony_ci struct pvr_sub_cmd_compute *sub_cmd; 3119bf215546Sopenharmony_ci VkResult result; 3120bf215546Sopenharmony_ci 3121bf215546Sopenharmony_ci PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); 3122bf215546Sopenharmony_ci assert(compute_pipeline); 3123bf215546Sopenharmony_ci 3124bf215546Sopenharmony_ci if (!groupCountX || !groupCountY || !groupCountZ) 3125bf215546Sopenharmony_ci return; 3126bf215546Sopenharmony_ci 3127bf215546Sopenharmony_ci pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_COMPUTE); 3128bf215546Sopenharmony_ci 3129bf215546Sopenharmony_ci sub_cmd = &state->current_sub_cmd->compute; 3130bf215546Sopenharmony_ci 3131bf215546Sopenharmony_ci sub_cmd->uses_atomic_ops |= compute_pipeline->state.shader.uses_atomic_ops; 3132bf215546Sopenharmony_ci sub_cmd->uses_barrier |= compute_pipeline->state.shader.uses_barrier; 3133bf215546Sopenharmony_ci 3134bf215546Sopenharmony_ci if (push_consts_stage_mask & VK_SHADER_STAGE_COMPUTE_BIT) { 3135bf215546Sopenharmony_ci /* TODO: Add a dirty push constants mask in the cmd_buffer state and 3136bf215546Sopenharmony_ci * check for dirty compute stage. 3137bf215546Sopenharmony_ci */ 3138bf215546Sopenharmony_ci pvr_finishme("Add support for push constants."); 3139bf215546Sopenharmony_ci } 3140bf215546Sopenharmony_ci 3141bf215546Sopenharmony_ci pvr_validate_push_descriptors(cmd_buffer, &push_descriptors_dirty); 3142bf215546Sopenharmony_ci 3143bf215546Sopenharmony_ci if (compute_pipeline->state.shader.uses_num_workgroups) { 3144bf215546Sopenharmony_ci struct pvr_bo *num_workgroups_bo; 3145bf215546Sopenharmony_ci 3146bf215546Sopenharmony_ci result = pvr_cmd_buffer_upload_general(cmd_buffer, 3147bf215546Sopenharmony_ci workgroup_size, 3148bf215546Sopenharmony_ci sizeof(workgroup_size), 3149bf215546Sopenharmony_ci &num_workgroups_bo); 3150bf215546Sopenharmony_ci if (result != VK_SUCCESS) 3151bf215546Sopenharmony_ci return; 3152bf215546Sopenharmony_ci 3153bf215546Sopenharmony_ci result = pvr_setup_descriptor_mappings( 3154bf215546Sopenharmony_ci cmd_buffer, 3155bf215546Sopenharmony_ci PVR_STAGE_ALLOCATION_COMPUTE, 3156bf215546Sopenharmony_ci &compute_pipeline->state.descriptor, 3157bf215546Sopenharmony_ci &num_workgroups_bo->vma->dev_addr, 3158bf215546Sopenharmony_ci &state->pds_compute_descriptor_data_offset); 3159bf215546Sopenharmony_ci if (result != VK_SUCCESS) 3160bf215546Sopenharmony_ci return; 3161bf215546Sopenharmony_ci } else if ((compute_pipeline->base.layout 3162bf215546Sopenharmony_ci ->per_stage_descriptor_masks[PVR_STAGE_ALLOCATION_COMPUTE] && 3163bf215546Sopenharmony_ci state->dirty.compute_desc_dirty) || 3164bf215546Sopenharmony_ci state->dirty.compute_pipeline_binding || push_descriptors_dirty) { 3165bf215546Sopenharmony_ci result = pvr_setup_descriptor_mappings( 3166bf215546Sopenharmony_ci cmd_buffer, 3167bf215546Sopenharmony_ci PVR_STAGE_ALLOCATION_COMPUTE, 3168bf215546Sopenharmony_ci &compute_pipeline->state.descriptor, 3169bf215546Sopenharmony_ci NULL, 3170bf215546Sopenharmony_ci &state->pds_compute_descriptor_data_offset); 3171bf215546Sopenharmony_ci if (result != VK_SUCCESS) 3172bf215546Sopenharmony_ci return; 3173bf215546Sopenharmony_ci } 3174bf215546Sopenharmony_ci 3175bf215546Sopenharmony_ci pvr_compute_update_shared(cmd_buffer, sub_cmd); 3176bf215546Sopenharmony_ci 3177bf215546Sopenharmony_ci pvr_compute_update_kernel(cmd_buffer, sub_cmd, workgroup_size); 3178bf215546Sopenharmony_ci} 3179bf215546Sopenharmony_ci 3180bf215546Sopenharmony_civoid pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer, 3181bf215546Sopenharmony_ci VkBuffer _buffer, 3182bf215546Sopenharmony_ci VkDeviceSize offset) 3183bf215546Sopenharmony_ci{ 3184bf215546Sopenharmony_ci assert(!"Unimplemented"); 3185bf215546Sopenharmony_ci} 3186bf215546Sopenharmony_ci 3187bf215546Sopenharmony_cistatic void 3188bf215546Sopenharmony_cipvr_update_draw_state(struct pvr_cmd_buffer_state *const state, 3189bf215546Sopenharmony_ci const struct pvr_cmd_buffer_draw_state *const draw_state) 3190bf215546Sopenharmony_ci{ 3191bf215546Sopenharmony_ci /* We don't have a state to tell us that base_instance is being used so it 3192bf215546Sopenharmony_ci * gets used as a boolean - 0 means we'll use a pds program that skips the 3193bf215546Sopenharmony_ci * base instance addition. If the base_instance gets used (and the last 3194bf215546Sopenharmony_ci * draw's base_instance was 0) then we switch to the BASE_INSTANCE attrib 3195bf215546Sopenharmony_ci * program. 3196bf215546Sopenharmony_ci * 3197bf215546Sopenharmony_ci * If base_instance changes then we only need to update the data section. 3198bf215546Sopenharmony_ci * 3199bf215546Sopenharmony_ci * The only draw call state that doesn't really matter is the start vertex 3200bf215546Sopenharmony_ci * as that is handled properly in the VDM state in all cases. 3201bf215546Sopenharmony_ci */ 3202bf215546Sopenharmony_ci if ((state->draw_state.draw_indexed != draw_state->draw_indexed) || 3203bf215546Sopenharmony_ci (state->draw_state.draw_indirect != draw_state->draw_indirect) || 3204bf215546Sopenharmony_ci (state->draw_state.base_instance == 0 && 3205bf215546Sopenharmony_ci draw_state->base_instance != 0)) { 3206bf215546Sopenharmony_ci state->dirty.draw_variant = true; 3207bf215546Sopenharmony_ci } else if (state->draw_state.base_instance != draw_state->base_instance) { 3208bf215546Sopenharmony_ci state->dirty.draw_base_instance = true; 3209bf215546Sopenharmony_ci } 3210bf215546Sopenharmony_ci 3211bf215546Sopenharmony_ci state->draw_state = *draw_state; 3212bf215546Sopenharmony_ci} 3213bf215546Sopenharmony_ci 3214bf215546Sopenharmony_cistatic uint32_t pvr_calc_shared_regs_count( 3215bf215546Sopenharmony_ci const struct pvr_graphics_pipeline *const gfx_pipeline) 3216bf215546Sopenharmony_ci{ 3217bf215546Sopenharmony_ci const struct pvr_pipeline_stage_state *const vertex_state = 3218bf215546Sopenharmony_ci &gfx_pipeline->vertex_shader_state.stage_state; 3219bf215546Sopenharmony_ci uint32_t shared_regs = vertex_state->const_shared_reg_count + 3220bf215546Sopenharmony_ci vertex_state->const_shared_reg_offset; 3221bf215546Sopenharmony_ci 3222bf215546Sopenharmony_ci if (gfx_pipeline->fragment_shader_state.bo) { 3223bf215546Sopenharmony_ci const struct pvr_pipeline_stage_state *const fragment_state = 3224bf215546Sopenharmony_ci &gfx_pipeline->fragment_shader_state.stage_state; 3225bf215546Sopenharmony_ci uint32_t fragment_regs = fragment_state->const_shared_reg_count + 3226bf215546Sopenharmony_ci fragment_state->const_shared_reg_offset; 3227bf215546Sopenharmony_ci 3228bf215546Sopenharmony_ci shared_regs = MAX2(shared_regs, fragment_regs); 3229bf215546Sopenharmony_ci } 3230bf215546Sopenharmony_ci 3231bf215546Sopenharmony_ci return shared_regs; 3232bf215546Sopenharmony_ci} 3233bf215546Sopenharmony_ci 3234bf215546Sopenharmony_cistatic void 3235bf215546Sopenharmony_cipvr_emit_dirty_pds_state(const struct pvr_cmd_buffer *const cmd_buffer, 3236bf215546Sopenharmony_ci struct pvr_sub_cmd_gfx *const sub_cmd, 3237bf215546Sopenharmony_ci const uint32_t pds_vertex_descriptor_data_offset) 3238bf215546Sopenharmony_ci{ 3239bf215546Sopenharmony_ci const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 3240bf215546Sopenharmony_ci const struct pvr_stage_allocation_descriptor_state 3241bf215546Sopenharmony_ci *const vertex_descriptor_state = 3242bf215546Sopenharmony_ci &state->gfx_pipeline->vertex_shader_state.descriptor_state; 3243bf215546Sopenharmony_ci const struct pvr_pipeline_stage_state *const vertex_stage_state = 3244bf215546Sopenharmony_ci &state->gfx_pipeline->vertex_shader_state.stage_state; 3245bf215546Sopenharmony_ci struct pvr_csb *const csb = &sub_cmd->control_stream; 3246bf215546Sopenharmony_ci 3247bf215546Sopenharmony_ci if (!vertex_descriptor_state->pds_info.code_size_in_dwords) 3248bf215546Sopenharmony_ci return; 3249bf215546Sopenharmony_ci 3250bf215546Sopenharmony_ci pvr_csb_emit (csb, VDMCTRL_PDS_STATE0, state0) { 3251bf215546Sopenharmony_ci state0.usc_target = PVRX(VDMCTRL_USC_TARGET_ALL); 3252bf215546Sopenharmony_ci 3253bf215546Sopenharmony_ci state0.usc_common_size = 3254bf215546Sopenharmony_ci DIV_ROUND_UP(vertex_stage_state->const_shared_reg_count << 2, 3255bf215546Sopenharmony_ci PVRX(VDMCTRL_PDS_STATE0_USC_COMMON_SIZE_UNIT_SIZE)); 3256bf215546Sopenharmony_ci 3257bf215546Sopenharmony_ci state0.pds_data_size = DIV_ROUND_UP( 3258bf215546Sopenharmony_ci vertex_descriptor_state->pds_info.data_size_in_dwords << 2, 3259bf215546Sopenharmony_ci PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE)); 3260bf215546Sopenharmony_ci } 3261bf215546Sopenharmony_ci 3262bf215546Sopenharmony_ci pvr_csb_emit (csb, VDMCTRL_PDS_STATE1, state1) { 3263bf215546Sopenharmony_ci state1.pds_data_addr = PVR_DEV_ADDR(pds_vertex_descriptor_data_offset); 3264bf215546Sopenharmony_ci state1.sd_type = PVRX(VDMCTRL_SD_TYPE_NONE); 3265bf215546Sopenharmony_ci } 3266bf215546Sopenharmony_ci 3267bf215546Sopenharmony_ci pvr_csb_emit (csb, VDMCTRL_PDS_STATE2, state2) { 3268bf215546Sopenharmony_ci state2.pds_code_addr = 3269bf215546Sopenharmony_ci PVR_DEV_ADDR(vertex_descriptor_state->pds_code.code_offset); 3270bf215546Sopenharmony_ci } 3271bf215546Sopenharmony_ci} 3272bf215546Sopenharmony_ci 3273bf215546Sopenharmony_cistatic void pvr_setup_output_select(struct pvr_cmd_buffer *const cmd_buffer) 3274bf215546Sopenharmony_ci{ 3275bf215546Sopenharmony_ci struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state; 3276bf215546Sopenharmony_ci const struct pvr_graphics_pipeline *const gfx_pipeline = 3277bf215546Sopenharmony_ci cmd_buffer->state.gfx_pipeline; 3278bf215546Sopenharmony_ci struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state; 3279bf215546Sopenharmony_ci const struct pvr_vertex_shader_state *const vertex_state = 3280bf215546Sopenharmony_ci &gfx_pipeline->vertex_shader_state; 3281bf215546Sopenharmony_ci uint32_t output_selects; 3282bf215546Sopenharmony_ci 3283bf215546Sopenharmony_ci /* TODO: Handle vertex and fragment shader state flags. */ 3284bf215546Sopenharmony_ci 3285bf215546Sopenharmony_ci pvr_csb_pack (&output_selects, TA_OUTPUT_SEL, state) { 3286bf215546Sopenharmony_ci const VkPrimitiveTopology topology = 3287bf215546Sopenharmony_ci gfx_pipeline->input_asm_state.topology; 3288bf215546Sopenharmony_ci 3289bf215546Sopenharmony_ci state.rhw_pres = true; 3290bf215546Sopenharmony_ci state.vtxsize = DIV_ROUND_UP(vertex_state->vertex_output_size, 4U); 3291bf215546Sopenharmony_ci state.psprite_size_pres = (topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST); 3292bf215546Sopenharmony_ci } 3293bf215546Sopenharmony_ci 3294bf215546Sopenharmony_ci if (ppp_state->output_selects != output_selects) { 3295bf215546Sopenharmony_ci ppp_state->output_selects = output_selects; 3296bf215546Sopenharmony_ci emit_state->output_selects = true; 3297bf215546Sopenharmony_ci } 3298bf215546Sopenharmony_ci 3299bf215546Sopenharmony_ci if (ppp_state->varying_word[0] != vertex_state->varying[0]) { 3300bf215546Sopenharmony_ci ppp_state->varying_word[0] = vertex_state->varying[0]; 3301bf215546Sopenharmony_ci emit_state->varying_word0 = true; 3302bf215546Sopenharmony_ci } 3303bf215546Sopenharmony_ci 3304bf215546Sopenharmony_ci if (ppp_state->varying_word[1] != vertex_state->varying[1]) { 3305bf215546Sopenharmony_ci ppp_state->varying_word[1] = vertex_state->varying[1]; 3306bf215546Sopenharmony_ci emit_state->varying_word1 = true; 3307bf215546Sopenharmony_ci } 3308bf215546Sopenharmony_ci} 3309bf215546Sopenharmony_ci 3310bf215546Sopenharmony_cistatic void 3311bf215546Sopenharmony_cipvr_setup_isp_faces_and_control(struct pvr_cmd_buffer *const cmd_buffer, 3312bf215546Sopenharmony_ci struct PVRX(TA_STATE_ISPA) *const ispa_out) 3313bf215546Sopenharmony_ci{ 3314bf215546Sopenharmony_ci struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state; 3315bf215546Sopenharmony_ci const struct pvr_graphics_pipeline *const gfx_pipeline = 3316bf215546Sopenharmony_ci cmd_buffer->state.gfx_pipeline; 3317bf215546Sopenharmony_ci struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state; 3318bf215546Sopenharmony_ci const struct pvr_dynamic_state *const dynamic_state = 3319bf215546Sopenharmony_ci &cmd_buffer->state.dynamic.common; 3320bf215546Sopenharmony_ci const struct pvr_render_pass_info *const pass_info = 3321bf215546Sopenharmony_ci &cmd_buffer->state.render_pass_info; 3322bf215546Sopenharmony_ci const uint32_t subpass_idx = pass_info->subpass_idx; 3323bf215546Sopenharmony_ci const uint32_t *depth_stencil_attachment_idx = 3324bf215546Sopenharmony_ci pass_info->pass->subpasses[subpass_idx].depth_stencil_attachment; 3325bf215546Sopenharmony_ci const struct pvr_image_view *const attachment = 3326bf215546Sopenharmony_ci (!depth_stencil_attachment_idx) 3327bf215546Sopenharmony_ci ? NULL 3328bf215546Sopenharmony_ci : pass_info->attachments[*depth_stencil_attachment_idx]; 3329bf215546Sopenharmony_ci 3330bf215546Sopenharmony_ci const VkCullModeFlags cull_mode = gfx_pipeline->raster_state.cull_mode; 3331bf215546Sopenharmony_ci const bool raster_discard_enabled = 3332bf215546Sopenharmony_ci gfx_pipeline->raster_state.discard_enable; 3333bf215546Sopenharmony_ci const bool disable_all = raster_discard_enabled || !attachment; 3334bf215546Sopenharmony_ci 3335bf215546Sopenharmony_ci const VkPrimitiveTopology topology = gfx_pipeline->input_asm_state.topology; 3336bf215546Sopenharmony_ci const enum PVRX(TA_OBJTYPE) obj_type = pvr_ta_objtype(topology); 3337bf215546Sopenharmony_ci 3338bf215546Sopenharmony_ci const bool disable_stencil_write = disable_all; 3339bf215546Sopenharmony_ci const bool disable_stencil_test = 3340bf215546Sopenharmony_ci disable_all || !vk_format_has_stencil(attachment->vk.format); 3341bf215546Sopenharmony_ci 3342bf215546Sopenharmony_ci const bool disable_depth_write = disable_all; 3343bf215546Sopenharmony_ci const bool disable_depth_test = disable_all || 3344bf215546Sopenharmony_ci !vk_format_has_depth(attachment->vk.format); 3345bf215546Sopenharmony_ci 3346bf215546Sopenharmony_ci uint32_t ispb_stencil_off; 3347bf215546Sopenharmony_ci bool is_two_sided = false; 3348bf215546Sopenharmony_ci uint32_t isp_control; 3349bf215546Sopenharmony_ci 3350bf215546Sopenharmony_ci uint32_t line_width; 3351bf215546Sopenharmony_ci uint32_t common_a; 3352bf215546Sopenharmony_ci uint32_t front_a; 3353bf215546Sopenharmony_ci uint32_t front_b; 3354bf215546Sopenharmony_ci uint32_t back_a; 3355bf215546Sopenharmony_ci uint32_t back_b; 3356bf215546Sopenharmony_ci 3357bf215546Sopenharmony_ci /* Convert to 4.4 fixed point format. */ 3358bf215546Sopenharmony_ci line_width = util_unsigned_fixed(dynamic_state->line_width, 4); 3359bf215546Sopenharmony_ci 3360bf215546Sopenharmony_ci /* Subtract 1 to shift values from range [0=0,256=16] to [0=1/16,255=16]. 3361bf215546Sopenharmony_ci * If 0 it stays at 0, otherwise we subtract 1. 3362bf215546Sopenharmony_ci */ 3363bf215546Sopenharmony_ci line_width = (!!line_width) * (line_width - 1); 3364bf215546Sopenharmony_ci 3365bf215546Sopenharmony_ci line_width = MIN2(line_width, PVRX(TA_STATE_ISPA_POINTLINEWIDTH_SIZE_MAX)); 3366bf215546Sopenharmony_ci 3367bf215546Sopenharmony_ci /* TODO: Part of the logic in this function is duplicated in another part 3368bf215546Sopenharmony_ci * of the code. E.g. the dcmpmode, and sop1/2/3. Could we do this earlier? 3369bf215546Sopenharmony_ci */ 3370bf215546Sopenharmony_ci 3371bf215546Sopenharmony_ci pvr_csb_pack (&common_a, TA_STATE_ISPA, ispa) { 3372bf215546Sopenharmony_ci ispa.pointlinewidth = line_width; 3373bf215546Sopenharmony_ci 3374bf215546Sopenharmony_ci if (disable_depth_test) 3375bf215546Sopenharmony_ci ispa.dcmpmode = PVRX(TA_CMPMODE_ALWAYS); 3376bf215546Sopenharmony_ci else 3377bf215546Sopenharmony_ci ispa.dcmpmode = pvr_ta_cmpmode(gfx_pipeline->depth_compare_op); 3378bf215546Sopenharmony_ci 3379bf215546Sopenharmony_ci /* FIXME: Can we just have this and remove the assignment above? 3380bf215546Sopenharmony_ci * The user provides a depthTestEnable at vkCreateGraphicsPipelines() 3381bf215546Sopenharmony_ci * should we be using that? 3382bf215546Sopenharmony_ci */ 3383bf215546Sopenharmony_ci ispa.dcmpmode |= gfx_pipeline->depth_compare_op; 3384bf215546Sopenharmony_ci 3385bf215546Sopenharmony_ci ispa.dwritedisable = disable_depth_test || disable_depth_write; 3386bf215546Sopenharmony_ci /* FIXME: Can we just have this and remove the assignment above? */ 3387bf215546Sopenharmony_ci ispa.dwritedisable = ispa.dwritedisable || 3388bf215546Sopenharmony_ci gfx_pipeline->depth_write_disable; 3389bf215546Sopenharmony_ci 3390bf215546Sopenharmony_ci ispa.passtype = gfx_pipeline->fragment_shader_state.pass_type; 3391bf215546Sopenharmony_ci 3392bf215546Sopenharmony_ci ispa.objtype = obj_type; 3393bf215546Sopenharmony_ci 3394bf215546Sopenharmony_ci /* Return unpacked ispa structure. dcmpmode, dwritedisable, passtype and 3395bf215546Sopenharmony_ci * objtype are needed by pvr_setup_triangle_merging_flag. 3396bf215546Sopenharmony_ci */ 3397bf215546Sopenharmony_ci if (ispa_out) 3398bf215546Sopenharmony_ci *ispa_out = ispa; 3399bf215546Sopenharmony_ci } 3400bf215546Sopenharmony_ci 3401bf215546Sopenharmony_ci /* FIXME: This logic should be redone and improved. Can we also get rid of 3402bf215546Sopenharmony_ci * the front and back variants? 3403bf215546Sopenharmony_ci */ 3404bf215546Sopenharmony_ci 3405bf215546Sopenharmony_ci pvr_csb_pack (&front_a, TA_STATE_ISPA, ispa) { 3406bf215546Sopenharmony_ci ispa.sref = (!disable_stencil_test) * dynamic_state->reference.front; 3407bf215546Sopenharmony_ci } 3408bf215546Sopenharmony_ci front_a |= common_a; 3409bf215546Sopenharmony_ci 3410bf215546Sopenharmony_ci pvr_csb_pack (&back_a, TA_STATE_ISPA, ispa) { 3411bf215546Sopenharmony_ci ispa.sref = (!disable_stencil_test) * dynamic_state->compare_mask.back; 3412bf215546Sopenharmony_ci } 3413bf215546Sopenharmony_ci back_a |= common_a; 3414bf215546Sopenharmony_ci 3415bf215546Sopenharmony_ci /* TODO: Does this actually represent the ispb control word on stencil off? 3416bf215546Sopenharmony_ci * If not, rename the variable. 3417bf215546Sopenharmony_ci */ 3418bf215546Sopenharmony_ci pvr_csb_pack (&ispb_stencil_off, TA_STATE_ISPB, ispb) { 3419bf215546Sopenharmony_ci ispb.sop3 = PVRX(TA_ISPB_STENCILOP_KEEP); 3420bf215546Sopenharmony_ci ispb.sop2 = PVRX(TA_ISPB_STENCILOP_KEEP); 3421bf215546Sopenharmony_ci ispb.sop1 = PVRX(TA_ISPB_STENCILOP_KEEP); 3422bf215546Sopenharmony_ci ispb.scmpmode = PVRX(TA_CMPMODE_ALWAYS); 3423bf215546Sopenharmony_ci } 3424bf215546Sopenharmony_ci 3425bf215546Sopenharmony_ci if (disable_stencil_test) { 3426bf215546Sopenharmony_ci back_b = front_b = ispb_stencil_off; 3427bf215546Sopenharmony_ci } else { 3428bf215546Sopenharmony_ci pvr_csb_pack (&front_b, TA_STATE_ISPB, ispb) { 3429bf215546Sopenharmony_ci ispb.swmask = 3430bf215546Sopenharmony_ci (!disable_stencil_write) * dynamic_state->write_mask.front; 3431bf215546Sopenharmony_ci ispb.scmpmask = dynamic_state->compare_mask.front; 3432bf215546Sopenharmony_ci 3433bf215546Sopenharmony_ci ispb.sop3 = pvr_ta_stencilop(gfx_pipeline->stencil_front.pass_op); 3434bf215546Sopenharmony_ci ispb.sop2 = 3435bf215546Sopenharmony_ci pvr_ta_stencilop(gfx_pipeline->stencil_front.depth_fail_op); 3436bf215546Sopenharmony_ci ispb.sop1 = pvr_ta_stencilop(gfx_pipeline->stencil_front.fail_op); 3437bf215546Sopenharmony_ci 3438bf215546Sopenharmony_ci ispb.scmpmode = pvr_ta_cmpmode(gfx_pipeline->stencil_front.compare_op); 3439bf215546Sopenharmony_ci } 3440bf215546Sopenharmony_ci 3441bf215546Sopenharmony_ci pvr_csb_pack (&back_b, TA_STATE_ISPB, ispb) { 3442bf215546Sopenharmony_ci ispb.swmask = 3443bf215546Sopenharmony_ci (!disable_stencil_write) * dynamic_state->write_mask.back; 3444bf215546Sopenharmony_ci ispb.scmpmask = dynamic_state->compare_mask.back; 3445bf215546Sopenharmony_ci 3446bf215546Sopenharmony_ci ispb.sop3 = pvr_ta_stencilop(gfx_pipeline->stencil_back.pass_op); 3447bf215546Sopenharmony_ci ispb.sop2 = pvr_ta_stencilop(gfx_pipeline->stencil_back.depth_fail_op); 3448bf215546Sopenharmony_ci ispb.sop1 = pvr_ta_stencilop(gfx_pipeline->stencil_back.fail_op); 3449bf215546Sopenharmony_ci 3450bf215546Sopenharmony_ci ispb.scmpmode = pvr_ta_cmpmode(gfx_pipeline->stencil_back.compare_op); 3451bf215546Sopenharmony_ci } 3452bf215546Sopenharmony_ci } 3453bf215546Sopenharmony_ci 3454bf215546Sopenharmony_ci if (front_a != back_a || front_b != back_b) { 3455bf215546Sopenharmony_ci if (cull_mode & VK_CULL_MODE_BACK_BIT) { 3456bf215546Sopenharmony_ci /* Single face, using front state. */ 3457bf215546Sopenharmony_ci } else if (cull_mode & VK_CULL_MODE_FRONT_BIT) { 3458bf215546Sopenharmony_ci /* Single face, using back state. */ 3459bf215546Sopenharmony_ci 3460bf215546Sopenharmony_ci front_a = back_a; 3461bf215546Sopenharmony_ci front_b = back_b; 3462bf215546Sopenharmony_ci } else { 3463bf215546Sopenharmony_ci /* Both faces. */ 3464bf215546Sopenharmony_ci 3465bf215546Sopenharmony_ci emit_state->isp_ba = is_two_sided = true; 3466bf215546Sopenharmony_ci 3467bf215546Sopenharmony_ci if (gfx_pipeline->raster_state.front_face == 3468bf215546Sopenharmony_ci VK_FRONT_FACE_COUNTER_CLOCKWISE) { 3469bf215546Sopenharmony_ci uint32_t tmp = front_a; 3470bf215546Sopenharmony_ci 3471bf215546Sopenharmony_ci front_a = back_a; 3472bf215546Sopenharmony_ci back_a = tmp; 3473bf215546Sopenharmony_ci 3474bf215546Sopenharmony_ci tmp = front_b; 3475bf215546Sopenharmony_ci front_b = back_b; 3476bf215546Sopenharmony_ci back_b = tmp; 3477bf215546Sopenharmony_ci } 3478bf215546Sopenharmony_ci 3479bf215546Sopenharmony_ci /* HW defaults to stencil off. */ 3480bf215546Sopenharmony_ci if (back_b != ispb_stencil_off) 3481bf215546Sopenharmony_ci emit_state->isp_fb = emit_state->isp_bb = true; 3482bf215546Sopenharmony_ci } 3483bf215546Sopenharmony_ci } 3484bf215546Sopenharmony_ci 3485bf215546Sopenharmony_ci if (!disable_stencil_test && front_b != ispb_stencil_off) 3486bf215546Sopenharmony_ci emit_state->isp_fb = true; 3487bf215546Sopenharmony_ci 3488bf215546Sopenharmony_ci pvr_csb_pack (&isp_control, TA_STATE_ISPCTL, ispctl) { 3489bf215546Sopenharmony_ci ispctl.upass = pass_info->userpass_spawn; 3490bf215546Sopenharmony_ci 3491bf215546Sopenharmony_ci /* TODO: is bo ever NULL? Figure out what to do. */ 3492bf215546Sopenharmony_ci ispctl.tagwritedisable = raster_discard_enabled || 3493bf215546Sopenharmony_ci !gfx_pipeline->fragment_shader_state.bo; 3494bf215546Sopenharmony_ci 3495bf215546Sopenharmony_ci ispctl.two_sided = is_two_sided; 3496bf215546Sopenharmony_ci ispctl.bpres = emit_state->isp_fb || emit_state->isp_bb; 3497bf215546Sopenharmony_ci 3498bf215546Sopenharmony_ci ispctl.dbenable = !raster_discard_enabled && 3499bf215546Sopenharmony_ci gfx_pipeline->raster_state.depth_bias_enable && 3500bf215546Sopenharmony_ci obj_type == PVRX(TA_OBJTYPE_TRIANGLE); 3501bf215546Sopenharmony_ci ispctl.scenable = !raster_discard_enabled; 3502bf215546Sopenharmony_ci 3503bf215546Sopenharmony_ci ppp_state->isp.control_struct = ispctl; 3504bf215546Sopenharmony_ci } 3505bf215546Sopenharmony_ci 3506bf215546Sopenharmony_ci emit_state->isp = true; 3507bf215546Sopenharmony_ci 3508bf215546Sopenharmony_ci ppp_state->isp.control = isp_control; 3509bf215546Sopenharmony_ci ppp_state->isp.front_a = front_a; 3510bf215546Sopenharmony_ci ppp_state->isp.front_b = front_b; 3511bf215546Sopenharmony_ci ppp_state->isp.back_a = back_a; 3512bf215546Sopenharmony_ci ppp_state->isp.back_b = back_b; 3513bf215546Sopenharmony_ci} 3514bf215546Sopenharmony_ci 3515bf215546Sopenharmony_cistatic void pvr_get_viewport_scissor_overlap(const VkViewport *const viewport, 3516bf215546Sopenharmony_ci const VkRect2D *const scissor, 3517bf215546Sopenharmony_ci VkRect2D *const rect_out) 3518bf215546Sopenharmony_ci{ 3519bf215546Sopenharmony_ci /* TODO: See if we can remove this struct. */ 3520bf215546Sopenharmony_ci struct pvr_rect { 3521bf215546Sopenharmony_ci int32_t x0, y0; 3522bf215546Sopenharmony_ci int32_t x1, y1; 3523bf215546Sopenharmony_ci }; 3524bf215546Sopenharmony_ci 3525bf215546Sopenharmony_ci /* TODO: Worry about overflow? */ 3526bf215546Sopenharmony_ci const struct pvr_rect scissor_rect = { 3527bf215546Sopenharmony_ci .x0 = scissor->offset.x, 3528bf215546Sopenharmony_ci .y0 = scissor->offset.y, 3529bf215546Sopenharmony_ci .x1 = scissor->offset.x + scissor->extent.width, 3530bf215546Sopenharmony_ci .y1 = scissor->offset.y + scissor->extent.height 3531bf215546Sopenharmony_ci }; 3532bf215546Sopenharmony_ci struct pvr_rect viewport_rect = { 0 }; 3533bf215546Sopenharmony_ci 3534bf215546Sopenharmony_ci assert(viewport->width >= 0.0f); 3535bf215546Sopenharmony_ci assert(scissor_rect.x0 >= 0); 3536bf215546Sopenharmony_ci assert(scissor_rect.y0 >= 0); 3537bf215546Sopenharmony_ci 3538bf215546Sopenharmony_ci if (scissor->extent.width == 0 || scissor->extent.height == 0) { 3539bf215546Sopenharmony_ci *rect_out = (VkRect2D){ 0 }; 3540bf215546Sopenharmony_ci return; 3541bf215546Sopenharmony_ci } 3542bf215546Sopenharmony_ci 3543bf215546Sopenharmony_ci viewport_rect.x0 = (int32_t)viewport->x; 3544bf215546Sopenharmony_ci viewport_rect.x1 = (int32_t)viewport->x + (int32_t)viewport->width; 3545bf215546Sopenharmony_ci 3546bf215546Sopenharmony_ci /* TODO: Is there a mathematical way of doing all this and then clamp at 3547bf215546Sopenharmony_ci * the end? 3548bf215546Sopenharmony_ci */ 3549bf215546Sopenharmony_ci /* We flip the y0 and y1 when height is negative. */ 3550bf215546Sopenharmony_ci viewport_rect.y0 = (int32_t)viewport->y + MIN2(0, (int32_t)viewport->height); 3551bf215546Sopenharmony_ci viewport_rect.y1 = (int32_t)viewport->y + MAX2(0, (int32_t)viewport->height); 3552bf215546Sopenharmony_ci 3553bf215546Sopenharmony_ci if (scissor_rect.x1 <= viewport_rect.x0 || 3554bf215546Sopenharmony_ci scissor_rect.y1 <= viewport_rect.y0 || 3555bf215546Sopenharmony_ci scissor_rect.x0 >= viewport_rect.x1 || 3556bf215546Sopenharmony_ci scissor_rect.y0 >= viewport_rect.y1) { 3557bf215546Sopenharmony_ci *rect_out = (VkRect2D){ 0 }; 3558bf215546Sopenharmony_ci return; 3559bf215546Sopenharmony_ci } 3560bf215546Sopenharmony_ci 3561bf215546Sopenharmony_ci /* Determine the overlapping rectangle. */ 3562bf215546Sopenharmony_ci viewport_rect.x0 = MAX2(viewport_rect.x0, scissor_rect.x0); 3563bf215546Sopenharmony_ci viewport_rect.y0 = MAX2(viewport_rect.y0, scissor_rect.y0); 3564bf215546Sopenharmony_ci viewport_rect.x1 = MIN2(viewport_rect.x1, scissor_rect.x1); 3565bf215546Sopenharmony_ci viewport_rect.y1 = MIN2(viewport_rect.y1, scissor_rect.y1); 3566bf215546Sopenharmony_ci 3567bf215546Sopenharmony_ci /* TODO: Is this conversion safe? Is this logic right? */ 3568bf215546Sopenharmony_ci rect_out->offset.x = (uint32_t)viewport_rect.x0; 3569bf215546Sopenharmony_ci rect_out->offset.y = (uint32_t)viewport_rect.y0; 3570bf215546Sopenharmony_ci rect_out->extent.height = (uint32_t)(viewport_rect.y1 - viewport_rect.y0); 3571bf215546Sopenharmony_ci rect_out->extent.width = (uint32_t)(viewport_rect.x1 - viewport_rect.x0); 3572bf215546Sopenharmony_ci} 3573bf215546Sopenharmony_ci 3574bf215546Sopenharmony_cistatic inline uint32_t 3575bf215546Sopenharmony_cipvr_get_geom_region_clip_align_size(struct pvr_device_info *const dev_info) 3576bf215546Sopenharmony_ci{ 3577bf215546Sopenharmony_ci /* TODO: This should come from rogue_ppp.xml. */ 3578bf215546Sopenharmony_ci return 16U + 16U * (!PVR_HAS_FEATURE(dev_info, tile_size_16x16)); 3579bf215546Sopenharmony_ci} 3580bf215546Sopenharmony_ci 3581bf215546Sopenharmony_ci/* FIXME: Remove device param when PVR_HAS_FEATURE() accepts const dev_info */ 3582bf215546Sopenharmony_cistatic void 3583bf215546Sopenharmony_cipvr_setup_isp_depth_bias_scissor_state(struct pvr_cmd_buffer *const cmd_buffer) 3584bf215546Sopenharmony_ci{ 3585bf215546Sopenharmony_ci struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state; 3586bf215546Sopenharmony_ci struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state; 3587bf215546Sopenharmony_ci const struct pvr_dynamic_state *const dynamic_state = 3588bf215546Sopenharmony_ci &cmd_buffer->state.dynamic.common; 3589bf215546Sopenharmony_ci const struct PVRX(TA_STATE_ISPCTL) *const ispctl = 3590bf215546Sopenharmony_ci &ppp_state->isp.control_struct; 3591bf215546Sopenharmony_ci struct pvr_device_info *const dev_info = 3592bf215546Sopenharmony_ci &cmd_buffer->device->pdevice->dev_info; 3593bf215546Sopenharmony_ci 3594bf215546Sopenharmony_ci if (ispctl->dbenable) 3595bf215546Sopenharmony_ci assert(!"Unimplemented"); 3596bf215546Sopenharmony_ci 3597bf215546Sopenharmony_ci if (ispctl->scenable) { 3598bf215546Sopenharmony_ci const uint32_t region_clip_align_size = 3599bf215546Sopenharmony_ci pvr_get_geom_region_clip_align_size(dev_info); 3600bf215546Sopenharmony_ci const VkViewport *const viewport = &dynamic_state->viewport.viewports[0]; 3601bf215546Sopenharmony_ci const VkRect2D *const scissor = &dynamic_state->scissor.scissors[0]; 3602bf215546Sopenharmony_ci VkRect2D overlap_rect; 3603bf215546Sopenharmony_ci uint32_t scissor_words[2]; 3604bf215546Sopenharmony_ci uint32_t height; 3605bf215546Sopenharmony_ci uint32_t width; 3606bf215546Sopenharmony_ci uint32_t x; 3607bf215546Sopenharmony_ci uint32_t y; 3608bf215546Sopenharmony_ci 3609bf215546Sopenharmony_ci /* For region clip. */ 3610bf215546Sopenharmony_ci uint32_t bottom; 3611bf215546Sopenharmony_ci uint32_t right; 3612bf215546Sopenharmony_ci uint32_t left; 3613bf215546Sopenharmony_ci uint32_t top; 3614bf215546Sopenharmony_ci 3615bf215546Sopenharmony_ci /* We don't support multiple viewport calculations. */ 3616bf215546Sopenharmony_ci assert(dynamic_state->viewport.count == 1); 3617bf215546Sopenharmony_ci /* We don't support multiple scissor calculations. */ 3618bf215546Sopenharmony_ci assert(dynamic_state->scissor.count == 1); 3619bf215546Sopenharmony_ci 3620bf215546Sopenharmony_ci pvr_get_viewport_scissor_overlap(viewport, scissor, &overlap_rect); 3621bf215546Sopenharmony_ci 3622bf215546Sopenharmony_ci x = overlap_rect.offset.x; 3623bf215546Sopenharmony_ci y = overlap_rect.offset.y; 3624bf215546Sopenharmony_ci width = overlap_rect.extent.width; 3625bf215546Sopenharmony_ci height = overlap_rect.extent.height; 3626bf215546Sopenharmony_ci 3627bf215546Sopenharmony_ci pvr_csb_pack (&scissor_words[0], IPF_SCISSOR_WORD_0, word0) { 3628bf215546Sopenharmony_ci word0.scw0_xmax = x + width; 3629bf215546Sopenharmony_ci word0.scw0_xmin = x; 3630bf215546Sopenharmony_ci } 3631bf215546Sopenharmony_ci 3632bf215546Sopenharmony_ci pvr_csb_pack (&scissor_words[1], IPF_SCISSOR_WORD_1, word1) { 3633bf215546Sopenharmony_ci word1.scw1_ymax = y + height; 3634bf215546Sopenharmony_ci word1.scw1_ymin = y; 3635bf215546Sopenharmony_ci } 3636bf215546Sopenharmony_ci 3637bf215546Sopenharmony_ci if (cmd_buffer->scissor_array.size && 3638bf215546Sopenharmony_ci cmd_buffer->scissor_words[0] == scissor_words[0] && 3639bf215546Sopenharmony_ci cmd_buffer->scissor_words[1] == scissor_words[1]) { 3640bf215546Sopenharmony_ci return; 3641bf215546Sopenharmony_ci } 3642bf215546Sopenharmony_ci 3643bf215546Sopenharmony_ci cmd_buffer->scissor_words[0] = scissor_words[0]; 3644bf215546Sopenharmony_ci cmd_buffer->scissor_words[1] = scissor_words[1]; 3645bf215546Sopenharmony_ci 3646bf215546Sopenharmony_ci /* Calculate region clip. */ 3647bf215546Sopenharmony_ci 3648bf215546Sopenharmony_ci left = x / region_clip_align_size; 3649bf215546Sopenharmony_ci top = y / region_clip_align_size; 3650bf215546Sopenharmony_ci 3651bf215546Sopenharmony_ci /* We prevent right=-1 with the multiplication. */ 3652bf215546Sopenharmony_ci /* TODO: Is there a better way of doing this? */ 3653bf215546Sopenharmony_ci if ((x + width) != 0U) 3654bf215546Sopenharmony_ci right = DIV_ROUND_UP(x + width, region_clip_align_size) - 1; 3655bf215546Sopenharmony_ci else 3656bf215546Sopenharmony_ci right = 0; 3657bf215546Sopenharmony_ci 3658bf215546Sopenharmony_ci if ((y + height) != 0U) 3659bf215546Sopenharmony_ci bottom = DIV_ROUND_UP(y + height, region_clip_align_size) - 1; 3660bf215546Sopenharmony_ci else 3661bf215546Sopenharmony_ci bottom = 0U; 3662bf215546Sopenharmony_ci 3663bf215546Sopenharmony_ci /* Setup region clip to clip everything outside what was calculated. */ 3664bf215546Sopenharmony_ci 3665bf215546Sopenharmony_ci /* FIXME: Should we mask to prevent writing over other words? */ 3666bf215546Sopenharmony_ci pvr_csb_pack (&ppp_state->region_clipping.word0, TA_REGION_CLIP0, word0) { 3667bf215546Sopenharmony_ci word0.right = right; 3668bf215546Sopenharmony_ci word0.left = left; 3669bf215546Sopenharmony_ci word0.mode = PVRX(TA_REGION_CLIP_MODE_OUTSIDE); 3670bf215546Sopenharmony_ci } 3671bf215546Sopenharmony_ci 3672bf215546Sopenharmony_ci pvr_csb_pack (&ppp_state->region_clipping.word1, TA_REGION_CLIP1, word1) { 3673bf215546Sopenharmony_ci word1.bottom = bottom; 3674bf215546Sopenharmony_ci word1.top = top; 3675bf215546Sopenharmony_ci } 3676bf215546Sopenharmony_ci 3677bf215546Sopenharmony_ci ppp_state->depthbias_scissor_indices.scissor_index = 3678bf215546Sopenharmony_ci util_dynarray_num_elements(&cmd_buffer->scissor_array, 3679bf215546Sopenharmony_ci __typeof__(cmd_buffer->scissor_words)); 3680bf215546Sopenharmony_ci 3681bf215546Sopenharmony_ci memcpy(util_dynarray_grow_bytes(&cmd_buffer->scissor_array, 3682bf215546Sopenharmony_ci 1, 3683bf215546Sopenharmony_ci sizeof(cmd_buffer->scissor_words)), 3684bf215546Sopenharmony_ci cmd_buffer->scissor_words, 3685bf215546Sopenharmony_ci sizeof(cmd_buffer->scissor_words)); 3686bf215546Sopenharmony_ci 3687bf215546Sopenharmony_ci emit_state->isp_dbsc = true; 3688bf215546Sopenharmony_ci emit_state->region_clip = true; 3689bf215546Sopenharmony_ci } 3690bf215546Sopenharmony_ci} 3691bf215546Sopenharmony_ci 3692bf215546Sopenharmony_cistatic void 3693bf215546Sopenharmony_cipvr_setup_triangle_merging_flag(struct pvr_cmd_buffer *const cmd_buffer, 3694bf215546Sopenharmony_ci struct PVRX(TA_STATE_ISPA) * ispa) 3695bf215546Sopenharmony_ci{ 3696bf215546Sopenharmony_ci struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state; 3697bf215546Sopenharmony_ci struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state; 3698bf215546Sopenharmony_ci uint32_t merge_word; 3699bf215546Sopenharmony_ci uint32_t mask; 3700bf215546Sopenharmony_ci 3701bf215546Sopenharmony_ci pvr_csb_pack (&merge_word, TA_STATE_PDS_SIZEINFO2, size_info) { 3702bf215546Sopenharmony_ci /* Disable for lines or punch-through or for DWD and depth compare 3703bf215546Sopenharmony_ci * always. 3704bf215546Sopenharmony_ci */ 3705bf215546Sopenharmony_ci if (ispa->objtype == PVRX(TA_OBJTYPE_LINE) || 3706bf215546Sopenharmony_ci ispa->passtype == PVRX(TA_PASSTYPE_PUNCH_THROUGH) || 3707bf215546Sopenharmony_ci (ispa->dwritedisable && ispa->dcmpmode == PVRX(TA_CMPMODE_ALWAYS))) { 3708bf215546Sopenharmony_ci size_info.pds_tri_merge_disable = true; 3709bf215546Sopenharmony_ci } 3710bf215546Sopenharmony_ci } 3711bf215546Sopenharmony_ci 3712bf215546Sopenharmony_ci pvr_csb_pack (&mask, TA_STATE_PDS_SIZEINFO2, size_info) { 3713bf215546Sopenharmony_ci size_info.pds_tri_merge_disable = true; 3714bf215546Sopenharmony_ci } 3715bf215546Sopenharmony_ci 3716bf215546Sopenharmony_ci merge_word |= ppp_state->pds.size_info2 & ~mask; 3717bf215546Sopenharmony_ci 3718bf215546Sopenharmony_ci if (merge_word != ppp_state->pds.size_info2) { 3719bf215546Sopenharmony_ci ppp_state->pds.size_info2 = merge_word; 3720bf215546Sopenharmony_ci emit_state->pds_fragment_stateptr0 = true; 3721bf215546Sopenharmony_ci } 3722bf215546Sopenharmony_ci} 3723bf215546Sopenharmony_ci 3724bf215546Sopenharmony_cistatic void 3725bf215546Sopenharmony_cipvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer, 3726bf215546Sopenharmony_ci struct pvr_sub_cmd_gfx *const sub_cmd) 3727bf215546Sopenharmony_ci{ 3728bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 3729bf215546Sopenharmony_ci const struct pvr_stage_allocation_descriptor_state *descriptor_shader_state = 3730bf215546Sopenharmony_ci &state->gfx_pipeline->fragment_shader_state.descriptor_state; 3731bf215546Sopenharmony_ci const struct pvr_pds_upload *pds_coeff_program = 3732bf215546Sopenharmony_ci &state->gfx_pipeline->fragment_shader_state.pds_coeff_program; 3733bf215546Sopenharmony_ci const struct pvr_pipeline_stage_state *fragment_state = 3734bf215546Sopenharmony_ci &state->gfx_pipeline->fragment_shader_state.stage_state; 3735bf215546Sopenharmony_ci const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice; 3736bf215546Sopenharmony_ci struct pvr_emit_state *const emit_state = &state->emit_state; 3737bf215546Sopenharmony_ci struct pvr_ppp_state *const ppp_state = &state->ppp_state; 3738bf215546Sopenharmony_ci 3739bf215546Sopenharmony_ci const uint32_t pds_uniform_size = 3740bf215546Sopenharmony_ci DIV_ROUND_UP(descriptor_shader_state->pds_info.data_size_in_dwords, 3741bf215546Sopenharmony_ci PVRX(TA_STATE_PDS_SIZEINFO1_PDS_UNIFORMSIZE_UNIT_SIZE)); 3742bf215546Sopenharmony_ci 3743bf215546Sopenharmony_ci const uint32_t pds_varying_state_size = 3744bf215546Sopenharmony_ci DIV_ROUND_UP(pds_coeff_program->data_size, 3745bf215546Sopenharmony_ci PVRX(TA_STATE_PDS_SIZEINFO1_PDS_VARYINGSIZE_UNIT_SIZE)); 3746bf215546Sopenharmony_ci 3747bf215546Sopenharmony_ci const uint32_t usc_varying_size = 3748bf215546Sopenharmony_ci DIV_ROUND_UP(fragment_state->coefficient_size, 3749bf215546Sopenharmony_ci PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE)); 3750bf215546Sopenharmony_ci 3751bf215546Sopenharmony_ci const uint32_t pds_temp_size = 3752bf215546Sopenharmony_ci DIV_ROUND_UP(fragment_state->temps_count, 3753bf215546Sopenharmony_ci PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE)); 3754bf215546Sopenharmony_ci 3755bf215546Sopenharmony_ci const uint32_t usc_shared_size = 3756bf215546Sopenharmony_ci DIV_ROUND_UP(fragment_state->const_shared_reg_count, 3757bf215546Sopenharmony_ci PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE)); 3758bf215546Sopenharmony_ci 3759bf215546Sopenharmony_ci const uint32_t max_tiles_in_flight = 3760bf215546Sopenharmony_ci pvr_calc_fscommon_size_and_tiles_in_flight( 3761bf215546Sopenharmony_ci pdevice, 3762bf215546Sopenharmony_ci usc_shared_size * 3763bf215546Sopenharmony_ci PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE), 3764bf215546Sopenharmony_ci 1); 3765bf215546Sopenharmony_ci uint32_t size_info_mask; 3766bf215546Sopenharmony_ci uint32_t size_info2; 3767bf215546Sopenharmony_ci 3768bf215546Sopenharmony_ci if (max_tiles_in_flight < sub_cmd->max_tiles_in_flight) 3769bf215546Sopenharmony_ci sub_cmd->max_tiles_in_flight = max_tiles_in_flight; 3770bf215546Sopenharmony_ci 3771bf215546Sopenharmony_ci pvr_csb_pack (&ppp_state->pds.pixel_shader_base, 3772bf215546Sopenharmony_ci TA_STATE_PDS_SHADERBASE, 3773bf215546Sopenharmony_ci shader_base) { 3774bf215546Sopenharmony_ci const struct pvr_pds_upload *const pds_upload = 3775bf215546Sopenharmony_ci &state->gfx_pipeline->fragment_shader_state.pds_fragment_program; 3776bf215546Sopenharmony_ci 3777bf215546Sopenharmony_ci shader_base.addr = PVR_DEV_ADDR(pds_upload->data_offset); 3778bf215546Sopenharmony_ci } 3779bf215546Sopenharmony_ci 3780bf215546Sopenharmony_ci if (descriptor_shader_state->pds_code.pvr_bo) { 3781bf215546Sopenharmony_ci pvr_csb_pack (&ppp_state->pds.texture_uniform_code_base, 3782bf215546Sopenharmony_ci TA_STATE_PDS_TEXUNICODEBASE, 3783bf215546Sopenharmony_ci tex_base) { 3784bf215546Sopenharmony_ci tex_base.addr = 3785bf215546Sopenharmony_ci PVR_DEV_ADDR(descriptor_shader_state->pds_code.code_offset); 3786bf215546Sopenharmony_ci } 3787bf215546Sopenharmony_ci } else { 3788bf215546Sopenharmony_ci ppp_state->pds.texture_uniform_code_base = 0U; 3789bf215546Sopenharmony_ci } 3790bf215546Sopenharmony_ci 3791bf215546Sopenharmony_ci pvr_csb_pack (&ppp_state->pds.size_info1, TA_STATE_PDS_SIZEINFO1, info1) { 3792bf215546Sopenharmony_ci info1.pds_uniformsize = pds_uniform_size; 3793bf215546Sopenharmony_ci info1.pds_texturestatesize = 0U; 3794bf215546Sopenharmony_ci info1.pds_varyingsize = pds_varying_state_size; 3795bf215546Sopenharmony_ci info1.usc_varyingsize = usc_varying_size; 3796bf215546Sopenharmony_ci info1.pds_tempsize = pds_temp_size; 3797bf215546Sopenharmony_ci } 3798bf215546Sopenharmony_ci 3799bf215546Sopenharmony_ci pvr_csb_pack (&size_info_mask, TA_STATE_PDS_SIZEINFO2, mask) { 3800bf215546Sopenharmony_ci mask.pds_tri_merge_disable = true; 3801bf215546Sopenharmony_ci } 3802bf215546Sopenharmony_ci 3803bf215546Sopenharmony_ci ppp_state->pds.size_info2 &= size_info_mask; 3804bf215546Sopenharmony_ci 3805bf215546Sopenharmony_ci pvr_csb_pack (&size_info2, TA_STATE_PDS_SIZEINFO2, info2) { 3806bf215546Sopenharmony_ci info2.usc_sharedsize = usc_shared_size; 3807bf215546Sopenharmony_ci } 3808bf215546Sopenharmony_ci 3809bf215546Sopenharmony_ci ppp_state->pds.size_info2 |= size_info2; 3810bf215546Sopenharmony_ci 3811bf215546Sopenharmony_ci if (pds_coeff_program->pvr_bo) { 3812bf215546Sopenharmony_ci state->emit_state.pds_fragment_stateptr1 = true; 3813bf215546Sopenharmony_ci 3814bf215546Sopenharmony_ci pvr_csb_pack (&ppp_state->pds.varying_base, 3815bf215546Sopenharmony_ci TA_STATE_PDS_VARYINGBASE, 3816bf215546Sopenharmony_ci base) { 3817bf215546Sopenharmony_ci base.addr = PVR_DEV_ADDR(pds_coeff_program->data_offset); 3818bf215546Sopenharmony_ci } 3819bf215546Sopenharmony_ci } else { 3820bf215546Sopenharmony_ci ppp_state->pds.varying_base = 0U; 3821bf215546Sopenharmony_ci } 3822bf215546Sopenharmony_ci 3823bf215546Sopenharmony_ci pvr_csb_pack (&ppp_state->pds.uniform_state_data_base, 3824bf215546Sopenharmony_ci TA_STATE_PDS_UNIFORMDATABASE, 3825bf215546Sopenharmony_ci base) { 3826bf215546Sopenharmony_ci base.addr = PVR_DEV_ADDR(state->pds_fragment_descriptor_data_offset); 3827bf215546Sopenharmony_ci } 3828bf215546Sopenharmony_ci 3829bf215546Sopenharmony_ci emit_state->pds_fragment_stateptr0 = true; 3830bf215546Sopenharmony_ci emit_state->pds_fragment_stateptr3 = true; 3831bf215546Sopenharmony_ci} 3832bf215546Sopenharmony_ci 3833bf215546Sopenharmony_cistatic void pvr_setup_viewport(struct pvr_cmd_buffer *const cmd_buffer) 3834bf215546Sopenharmony_ci{ 3835bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 3836bf215546Sopenharmony_ci struct pvr_emit_state *const emit_state = &state->emit_state; 3837bf215546Sopenharmony_ci struct pvr_ppp_state *const ppp_state = &state->ppp_state; 3838bf215546Sopenharmony_ci 3839bf215546Sopenharmony_ci if (ppp_state->viewport_count != state->dynamic.common.viewport.count) { 3840bf215546Sopenharmony_ci ppp_state->viewport_count = state->dynamic.common.viewport.count; 3841bf215546Sopenharmony_ci emit_state->viewport = true; 3842bf215546Sopenharmony_ci } 3843bf215546Sopenharmony_ci 3844bf215546Sopenharmony_ci if (state->gfx_pipeline->raster_state.discard_enable) { 3845bf215546Sopenharmony_ci /* We don't want to emit any viewport data as it'll just get thrown 3846bf215546Sopenharmony_ci * away. It's after the previous condition because we still want to 3847bf215546Sopenharmony_ci * stash the viewport_count as it's our trigger for when 3848bf215546Sopenharmony_ci * rasterizer discard gets disabled. 3849bf215546Sopenharmony_ci */ 3850bf215546Sopenharmony_ci emit_state->viewport = false; 3851bf215546Sopenharmony_ci return; 3852bf215546Sopenharmony_ci } 3853bf215546Sopenharmony_ci 3854bf215546Sopenharmony_ci for (uint32_t i = 0; i < ppp_state->viewport_count; i++) { 3855bf215546Sopenharmony_ci VkViewport *viewport = &state->dynamic.common.viewport.viewports[i]; 3856bf215546Sopenharmony_ci uint32_t x_scale = fui(viewport->width * 0.5f); 3857bf215546Sopenharmony_ci uint32_t y_scale = fui(viewport->height * 0.5f); 3858bf215546Sopenharmony_ci uint32_t z_scale = fui(viewport->maxDepth - viewport->minDepth); 3859bf215546Sopenharmony_ci uint32_t x_center = fui(viewport->x + viewport->width * 0.5f); 3860bf215546Sopenharmony_ci uint32_t y_center = fui(viewport->y + viewport->height * 0.5f); 3861bf215546Sopenharmony_ci uint32_t z_center = fui(viewport->minDepth); 3862bf215546Sopenharmony_ci 3863bf215546Sopenharmony_ci if (ppp_state->viewports[i].a0 != x_center || 3864bf215546Sopenharmony_ci ppp_state->viewports[i].m0 != x_scale || 3865bf215546Sopenharmony_ci ppp_state->viewports[i].a1 != y_center || 3866bf215546Sopenharmony_ci ppp_state->viewports[i].m1 != y_scale || 3867bf215546Sopenharmony_ci ppp_state->viewports[i].a2 != z_center || 3868bf215546Sopenharmony_ci ppp_state->viewports[i].m2 != z_scale) { 3869bf215546Sopenharmony_ci ppp_state->viewports[i].a0 = x_center; 3870bf215546Sopenharmony_ci ppp_state->viewports[i].m0 = x_scale; 3871bf215546Sopenharmony_ci ppp_state->viewports[i].a1 = y_center; 3872bf215546Sopenharmony_ci ppp_state->viewports[i].m1 = y_scale; 3873bf215546Sopenharmony_ci ppp_state->viewports[i].a2 = z_center; 3874bf215546Sopenharmony_ci ppp_state->viewports[i].m2 = z_scale; 3875bf215546Sopenharmony_ci 3876bf215546Sopenharmony_ci emit_state->viewport = true; 3877bf215546Sopenharmony_ci } 3878bf215546Sopenharmony_ci } 3879bf215546Sopenharmony_ci} 3880bf215546Sopenharmony_ci 3881bf215546Sopenharmony_cistatic void pvr_setup_ppp_control(struct pvr_cmd_buffer *const cmd_buffer) 3882bf215546Sopenharmony_ci{ 3883bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 3884bf215546Sopenharmony_ci const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline; 3885bf215546Sopenharmony_ci struct pvr_emit_state *const emit_state = &state->emit_state; 3886bf215546Sopenharmony_ci struct pvr_ppp_state *const ppp_state = &state->ppp_state; 3887bf215546Sopenharmony_ci uint32_t ppp_control; 3888bf215546Sopenharmony_ci 3889bf215546Sopenharmony_ci pvr_csb_pack (&ppp_control, TA_STATE_PPP_CTRL, control) { 3890bf215546Sopenharmony_ci const struct pvr_raster_state *raster_state = &gfx_pipeline->raster_state; 3891bf215546Sopenharmony_ci VkPrimitiveTopology topology = gfx_pipeline->input_asm_state.topology; 3892bf215546Sopenharmony_ci control.drawclippededges = true; 3893bf215546Sopenharmony_ci control.wclampen = true; 3894bf215546Sopenharmony_ci 3895bf215546Sopenharmony_ci if (topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) 3896bf215546Sopenharmony_ci control.flatshade_vtx = PVRX(TA_FLATSHADE_VTX_VERTEX_1); 3897bf215546Sopenharmony_ci else 3898bf215546Sopenharmony_ci control.flatshade_vtx = PVRX(TA_FLATSHADE_VTX_VERTEX_0); 3899bf215546Sopenharmony_ci 3900bf215546Sopenharmony_ci if (raster_state->depth_clamp_enable) 3901bf215546Sopenharmony_ci control.clip_mode = PVRX(TA_CLIP_MODE_NO_FRONT_OR_REAR); 3902bf215546Sopenharmony_ci else 3903bf215546Sopenharmony_ci control.clip_mode = PVRX(TA_CLIP_MODE_FRONT_REAR); 3904bf215546Sopenharmony_ci 3905bf215546Sopenharmony_ci /* +--- FrontIsCCW? 3906bf215546Sopenharmony_ci * | +--- Cull Front? 3907bf215546Sopenharmony_ci * v v 3908bf215546Sopenharmony_ci * 0|0 CULLMODE_CULL_CCW, 3909bf215546Sopenharmony_ci * 0|1 CULLMODE_CULL_CW, 3910bf215546Sopenharmony_ci * 1|0 CULLMODE_CULL_CW, 3911bf215546Sopenharmony_ci * 1|1 CULLMODE_CULL_CCW, 3912bf215546Sopenharmony_ci */ 3913bf215546Sopenharmony_ci switch (raster_state->cull_mode) { 3914bf215546Sopenharmony_ci case VK_CULL_MODE_BACK_BIT: 3915bf215546Sopenharmony_ci case VK_CULL_MODE_FRONT_BIT: 3916bf215546Sopenharmony_ci if ((raster_state->front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE) ^ 3917bf215546Sopenharmony_ci (raster_state->cull_mode == VK_CULL_MODE_FRONT_BIT)) { 3918bf215546Sopenharmony_ci control.cullmode = PVRX(TA_CULLMODE_CULL_CW); 3919bf215546Sopenharmony_ci } else { 3920bf215546Sopenharmony_ci control.cullmode = PVRX(TA_CULLMODE_CULL_CCW); 3921bf215546Sopenharmony_ci } 3922bf215546Sopenharmony_ci 3923bf215546Sopenharmony_ci break; 3924bf215546Sopenharmony_ci 3925bf215546Sopenharmony_ci case VK_CULL_MODE_NONE: 3926bf215546Sopenharmony_ci control.cullmode = PVRX(TA_CULLMODE_NO_CULLING); 3927bf215546Sopenharmony_ci break; 3928bf215546Sopenharmony_ci 3929bf215546Sopenharmony_ci default: 3930bf215546Sopenharmony_ci unreachable("Unsupported cull mode!"); 3931bf215546Sopenharmony_ci } 3932bf215546Sopenharmony_ci } 3933bf215546Sopenharmony_ci 3934bf215546Sopenharmony_ci if (ppp_control != ppp_state->ppp_control) { 3935bf215546Sopenharmony_ci ppp_state->ppp_control = ppp_control; 3936bf215546Sopenharmony_ci emit_state->ppp_control = true; 3937bf215546Sopenharmony_ci } 3938bf215546Sopenharmony_ci} 3939bf215546Sopenharmony_ci 3940bf215546Sopenharmony_ci/* Largest valid PPP State update in words = 31 3941bf215546Sopenharmony_ci * 1 - Header 3942bf215546Sopenharmony_ci * 3 - Stream Out Config words 0, 1 and 2 3943bf215546Sopenharmony_ci * 1 - PPP Control word 3944bf215546Sopenharmony_ci * 3 - Varying Config words 0, 1 and 2 3945bf215546Sopenharmony_ci * 1 - Output Select 3946bf215546Sopenharmony_ci * 1 - WClamp 3947bf215546Sopenharmony_ci * 6 - Viewport Transform words 3948bf215546Sopenharmony_ci * 2 - Region Clip words 3949bf215546Sopenharmony_ci * 3 - PDS State for fragment phase (PDSSTATEPTR 1-3) 3950bf215546Sopenharmony_ci * 4 - PDS State for fragment phase (PDSSTATEPTR0) 3951bf215546Sopenharmony_ci * 6 - ISP Control Words 3952bf215546Sopenharmony_ci */ 3953bf215546Sopenharmony_ci#define PVR_MAX_PPP_STATE_DWORDS 31 3954bf215546Sopenharmony_ci 3955bf215546Sopenharmony_cistatic VkResult pvr_emit_ppp_state(struct pvr_cmd_buffer *const cmd_buffer, 3956bf215546Sopenharmony_ci struct pvr_sub_cmd_gfx *const sub_cmd) 3957bf215546Sopenharmony_ci{ 3958bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 3959bf215546Sopenharmony_ci struct pvr_emit_state *const emit_state = &state->emit_state; 3960bf215546Sopenharmony_ci struct pvr_ppp_state *const ppp_state = &state->ppp_state; 3961bf215546Sopenharmony_ci struct pvr_csb *const control_stream = &sub_cmd->control_stream; 3962bf215546Sopenharmony_ci uint32_t ppp_state_words[PVR_MAX_PPP_STATE_DWORDS]; 3963bf215546Sopenharmony_ci uint32_t ppp_state_words_count; 3964bf215546Sopenharmony_ci uint32_t ppp_state_header; 3965bf215546Sopenharmony_ci bool deferred_secondary; 3966bf215546Sopenharmony_ci struct pvr_bo *pvr_bo; 3967bf215546Sopenharmony_ci uint32_t *buffer_ptr; 3968bf215546Sopenharmony_ci VkResult result; 3969bf215546Sopenharmony_ci 3970bf215546Sopenharmony_ci buffer_ptr = ppp_state_words; 3971bf215546Sopenharmony_ci 3972bf215546Sopenharmony_ci pvr_csb_pack (&ppp_state_header, TA_STATE_HEADER, header) { 3973bf215546Sopenharmony_ci header.view_port_count = (ppp_state->viewport_count == 0) 3974bf215546Sopenharmony_ci ? 0U 3975bf215546Sopenharmony_ci : (ppp_state->viewport_count - 1); 3976bf215546Sopenharmony_ci 3977bf215546Sopenharmony_ci /* Skip over header. */ 3978bf215546Sopenharmony_ci buffer_ptr++; 3979bf215546Sopenharmony_ci 3980bf215546Sopenharmony_ci /* Set ISP state. */ 3981bf215546Sopenharmony_ci if (emit_state->isp) { 3982bf215546Sopenharmony_ci header.pres_ispctl = true; 3983bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->isp.control; 3984bf215546Sopenharmony_ci header.pres_ispctl_fa = true; 3985bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->isp.front_a; 3986bf215546Sopenharmony_ci 3987bf215546Sopenharmony_ci if (emit_state->isp_fb) { 3988bf215546Sopenharmony_ci header.pres_ispctl_fb = true; 3989bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->isp.front_b; 3990bf215546Sopenharmony_ci } 3991bf215546Sopenharmony_ci 3992bf215546Sopenharmony_ci if (emit_state->isp_ba) { 3993bf215546Sopenharmony_ci header.pres_ispctl_ba = true; 3994bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->isp.back_a; 3995bf215546Sopenharmony_ci } 3996bf215546Sopenharmony_ci 3997bf215546Sopenharmony_ci if (emit_state->isp_bb) { 3998bf215546Sopenharmony_ci header.pres_ispctl_bb = true; 3999bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->isp.back_b; 4000bf215546Sopenharmony_ci } 4001bf215546Sopenharmony_ci } 4002bf215546Sopenharmony_ci 4003bf215546Sopenharmony_ci /* Depth bias / scissor 4004bf215546Sopenharmony_ci * If deferred_secondary is true then we do a separate state update 4005bf215546Sopenharmony_ci * which gets patched in ExecuteDeferredCommandBuffer. 4006bf215546Sopenharmony_ci */ 4007bf215546Sopenharmony_ci /* TODO: Update above comment when we port ExecuteDeferredCommandBuffer. 4008bf215546Sopenharmony_ci */ 4009bf215546Sopenharmony_ci deferred_secondary = 4010bf215546Sopenharmony_ci cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY && 4011bf215546Sopenharmony_ci cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; 4012bf215546Sopenharmony_ci 4013bf215546Sopenharmony_ci if (emit_state->isp_dbsc && !deferred_secondary) { 4014bf215546Sopenharmony_ci header.pres_ispctl_dbsc = true; 4015bf215546Sopenharmony_ci 4016bf215546Sopenharmony_ci pvr_csb_pack (buffer_ptr++, TA_STATE_ISPDBSC, ispdbsc) { 4017bf215546Sopenharmony_ci ispdbsc.dbindex = 4018bf215546Sopenharmony_ci ppp_state->depthbias_scissor_indices.depthbias_index; 4019bf215546Sopenharmony_ci ispdbsc.scindex = 4020bf215546Sopenharmony_ci ppp_state->depthbias_scissor_indices.scissor_index; 4021bf215546Sopenharmony_ci } 4022bf215546Sopenharmony_ci } 4023bf215546Sopenharmony_ci 4024bf215546Sopenharmony_ci /* PDS state. */ 4025bf215546Sopenharmony_ci if (emit_state->pds_fragment_stateptr0) { 4026bf215546Sopenharmony_ci header.pres_pds_state_ptr0 = true; 4027bf215546Sopenharmony_ci 4028bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->pds.pixel_shader_base; 4029bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->pds.texture_uniform_code_base; 4030bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->pds.size_info1; 4031bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->pds.size_info2; 4032bf215546Sopenharmony_ci } 4033bf215546Sopenharmony_ci 4034bf215546Sopenharmony_ci if (emit_state->pds_fragment_stateptr1) { 4035bf215546Sopenharmony_ci header.pres_pds_state_ptr1 = true; 4036bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->pds.varying_base; 4037bf215546Sopenharmony_ci } 4038bf215546Sopenharmony_ci 4039bf215546Sopenharmony_ci /* We don't use the pds_fragment_stateptr2 (texture state programs) 4040bf215546Sopenharmony_ci * control word, but this doesn't mean we need to set it to 0. This is 4041bf215546Sopenharmony_ci * because the hardware runs the texture state program only when the 4042bf215546Sopenharmony_ci * pds_texture state field of PDS_SIZEINFO1 is non-zero. 4043bf215546Sopenharmony_ci */ 4044bf215546Sopenharmony_ci 4045bf215546Sopenharmony_ci if (emit_state->pds_fragment_stateptr3) { 4046bf215546Sopenharmony_ci header.pres_pds_state_ptr3 = true; 4047bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->pds.uniform_state_data_base; 4048bf215546Sopenharmony_ci } 4049bf215546Sopenharmony_ci 4050bf215546Sopenharmony_ci /* Region clip. */ 4051bf215546Sopenharmony_ci if (emit_state->region_clip) { 4052bf215546Sopenharmony_ci header.pres_region_clip = true; 4053bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->region_clipping.word0; 4054bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->region_clipping.word1; 4055bf215546Sopenharmony_ci } 4056bf215546Sopenharmony_ci 4057bf215546Sopenharmony_ci /* Viewport. */ 4058bf215546Sopenharmony_ci if (emit_state->viewport) { 4059bf215546Sopenharmony_ci const uint32_t viewports = MAX2(1, ppp_state->viewport_count); 4060bf215546Sopenharmony_ci 4061bf215546Sopenharmony_ci header.pres_viewport = true; 4062bf215546Sopenharmony_ci for (uint32_t i = 0; i < viewports; i++) { 4063bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->viewports[i].a0; 4064bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->viewports[i].m0; 4065bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->viewports[i].a1; 4066bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->viewports[i].m1; 4067bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->viewports[i].a2; 4068bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->viewports[i].m2; 4069bf215546Sopenharmony_ci } 4070bf215546Sopenharmony_ci } 4071bf215546Sopenharmony_ci 4072bf215546Sopenharmony_ci /* W clamp. */ 4073bf215546Sopenharmony_ci if (emit_state->wclamp) { 4074bf215546Sopenharmony_ci const float wclamp = 0.00001f; 4075bf215546Sopenharmony_ci 4076bf215546Sopenharmony_ci header.pres_wclamp = true; 4077bf215546Sopenharmony_ci *buffer_ptr++ = fui(wclamp); 4078bf215546Sopenharmony_ci } 4079bf215546Sopenharmony_ci 4080bf215546Sopenharmony_ci /* Output selects. */ 4081bf215546Sopenharmony_ci if (emit_state->output_selects) { 4082bf215546Sopenharmony_ci header.pres_outselects = true; 4083bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->output_selects; 4084bf215546Sopenharmony_ci } 4085bf215546Sopenharmony_ci 4086bf215546Sopenharmony_ci /* Varying words. */ 4087bf215546Sopenharmony_ci if (emit_state->varying_word0) { 4088bf215546Sopenharmony_ci header.pres_varying_word0 = true; 4089bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->varying_word[0]; 4090bf215546Sopenharmony_ci } 4091bf215546Sopenharmony_ci 4092bf215546Sopenharmony_ci if (emit_state->varying_word1) { 4093bf215546Sopenharmony_ci header.pres_varying_word1 = true; 4094bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->varying_word[1]; 4095bf215546Sopenharmony_ci } 4096bf215546Sopenharmony_ci 4097bf215546Sopenharmony_ci if (emit_state->varying_word2) { 4098bf215546Sopenharmony_ci /* We only emit this on the first draw of a render job to prevent us 4099bf215546Sopenharmony_ci * from inheriting a non-zero value set elsewhere. 4100bf215546Sopenharmony_ci */ 4101bf215546Sopenharmony_ci header.pres_varying_word2 = true; 4102bf215546Sopenharmony_ci *buffer_ptr++ = 0; 4103bf215546Sopenharmony_ci } 4104bf215546Sopenharmony_ci 4105bf215546Sopenharmony_ci /* PPP control. */ 4106bf215546Sopenharmony_ci if (emit_state->ppp_control) { 4107bf215546Sopenharmony_ci header.pres_ppp_ctrl = true; 4108bf215546Sopenharmony_ci *buffer_ptr++ = ppp_state->ppp_control; 4109bf215546Sopenharmony_ci } 4110bf215546Sopenharmony_ci 4111bf215546Sopenharmony_ci if (emit_state->stream_out) { 4112bf215546Sopenharmony_ci /* We only emit this on the first draw of a render job to prevent us 4113bf215546Sopenharmony_ci * from inheriting a non-zero value set elsewhere. 4114bf215546Sopenharmony_ci */ 4115bf215546Sopenharmony_ci header.pres_stream_out_size = true; 4116bf215546Sopenharmony_ci *buffer_ptr++ = 0; 4117bf215546Sopenharmony_ci } 4118bf215546Sopenharmony_ci } 4119bf215546Sopenharmony_ci 4120bf215546Sopenharmony_ci if (!ppp_state_header) 4121bf215546Sopenharmony_ci return VK_SUCCESS; 4122bf215546Sopenharmony_ci 4123bf215546Sopenharmony_ci ppp_state_words_count = buffer_ptr - ppp_state_words; 4124bf215546Sopenharmony_ci ppp_state_words[0] = ppp_state_header; 4125bf215546Sopenharmony_ci 4126bf215546Sopenharmony_ci result = pvr_cmd_buffer_alloc_mem(cmd_buffer, 4127bf215546Sopenharmony_ci cmd_buffer->device->heaps.general_heap, 4128bf215546Sopenharmony_ci ppp_state_words_count * sizeof(uint32_t), 4129bf215546Sopenharmony_ci PVR_BO_ALLOC_FLAG_CPU_MAPPED, 4130bf215546Sopenharmony_ci &pvr_bo); 4131bf215546Sopenharmony_ci if (result != VK_SUCCESS) 4132bf215546Sopenharmony_ci return result; 4133bf215546Sopenharmony_ci 4134bf215546Sopenharmony_ci memcpy(pvr_bo->bo->map, 4135bf215546Sopenharmony_ci ppp_state_words, 4136bf215546Sopenharmony_ci ppp_state_words_count * sizeof(uint32_t)); 4137bf215546Sopenharmony_ci 4138bf215546Sopenharmony_ci /* Write the VDM state update into the VDM control stream. */ 4139bf215546Sopenharmony_ci pvr_csb_emit (control_stream, VDMCTRL_PPP_STATE0, state0) { 4140bf215546Sopenharmony_ci state0.word_count = ppp_state_words_count; 4141bf215546Sopenharmony_ci state0.addrmsb = pvr_bo->vma->dev_addr; 4142bf215546Sopenharmony_ci } 4143bf215546Sopenharmony_ci 4144bf215546Sopenharmony_ci pvr_csb_emit (control_stream, VDMCTRL_PPP_STATE1, state1) { 4145bf215546Sopenharmony_ci state1.addrlsb = pvr_bo->vma->dev_addr; 4146bf215546Sopenharmony_ci } 4147bf215546Sopenharmony_ci 4148bf215546Sopenharmony_ci if (emit_state->isp_dbsc && 4149bf215546Sopenharmony_ci cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { 4150bf215546Sopenharmony_ci pvr_finishme("Unimplemented path!!"); 4151bf215546Sopenharmony_ci } 4152bf215546Sopenharmony_ci 4153bf215546Sopenharmony_ci state->emit_state_bits = 0; 4154bf215546Sopenharmony_ci 4155bf215546Sopenharmony_ci return VK_SUCCESS; 4156bf215546Sopenharmony_ci} 4157bf215546Sopenharmony_ci 4158bf215546Sopenharmony_cistatic VkResult 4159bf215546Sopenharmony_cipvr_emit_dirty_ppp_state(struct pvr_cmd_buffer *const cmd_buffer, 4160bf215546Sopenharmony_ci struct pvr_sub_cmd_gfx *const sub_cmd) 4161bf215546Sopenharmony_ci{ 4162bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 4163bf215546Sopenharmony_ci const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline; 4164bf215546Sopenharmony_ci const bool dirty_stencil = state->dirty.compare_mask || 4165bf215546Sopenharmony_ci state->dirty.write_mask || state->dirty.reference; 4166bf215546Sopenharmony_ci VkResult result; 4167bf215546Sopenharmony_ci 4168bf215546Sopenharmony_ci if (!(dirty_stencil || state->dirty.depth_bias || 4169bf215546Sopenharmony_ci state->dirty.fragment_descriptors || state->dirty.line_width || 4170bf215546Sopenharmony_ci state->dirty.gfx_pipeline_binding || state->dirty.scissor || 4171bf215546Sopenharmony_ci state->dirty.userpass_spawn || state->dirty.viewport || 4172bf215546Sopenharmony_ci state->emit_state_bits)) { 4173bf215546Sopenharmony_ci return VK_SUCCESS; 4174bf215546Sopenharmony_ci } 4175bf215546Sopenharmony_ci 4176bf215546Sopenharmony_ci if (state->dirty.gfx_pipeline_binding) { 4177bf215546Sopenharmony_ci struct PVRX(TA_STATE_ISPA) ispa; 4178bf215546Sopenharmony_ci 4179bf215546Sopenharmony_ci pvr_setup_output_select(cmd_buffer); 4180bf215546Sopenharmony_ci pvr_setup_isp_faces_and_control(cmd_buffer, &ispa); 4181bf215546Sopenharmony_ci pvr_setup_triangle_merging_flag(cmd_buffer, &ispa); 4182bf215546Sopenharmony_ci } else if (dirty_stencil || state->dirty.line_width || 4183bf215546Sopenharmony_ci state->dirty.userpass_spawn) { 4184bf215546Sopenharmony_ci pvr_setup_isp_faces_and_control(cmd_buffer, NULL); 4185bf215546Sopenharmony_ci } 4186bf215546Sopenharmony_ci 4187bf215546Sopenharmony_ci if (!gfx_pipeline->raster_state.discard_enable && 4188bf215546Sopenharmony_ci state->dirty.fragment_descriptors && 4189bf215546Sopenharmony_ci gfx_pipeline->fragment_shader_state.bo) { 4190bf215546Sopenharmony_ci pvr_setup_fragment_state_pointers(cmd_buffer, sub_cmd); 4191bf215546Sopenharmony_ci } 4192bf215546Sopenharmony_ci 4193bf215546Sopenharmony_ci pvr_setup_isp_depth_bias_scissor_state(cmd_buffer); 4194bf215546Sopenharmony_ci 4195bf215546Sopenharmony_ci if (state->dirty.viewport) 4196bf215546Sopenharmony_ci pvr_setup_viewport(cmd_buffer); 4197bf215546Sopenharmony_ci 4198bf215546Sopenharmony_ci pvr_setup_ppp_control(cmd_buffer); 4199bf215546Sopenharmony_ci 4200bf215546Sopenharmony_ci if (gfx_pipeline->raster_state.cull_mode == VK_CULL_MODE_FRONT_AND_BACK) { 4201bf215546Sopenharmony_ci /* FIXME: Port SetNegativeViewport(). */ 4202bf215546Sopenharmony_ci } 4203bf215546Sopenharmony_ci 4204bf215546Sopenharmony_ci result = pvr_emit_ppp_state(cmd_buffer, sub_cmd); 4205bf215546Sopenharmony_ci if (result != VK_SUCCESS) 4206bf215546Sopenharmony_ci return result; 4207bf215546Sopenharmony_ci 4208bf215546Sopenharmony_ci return VK_SUCCESS; 4209bf215546Sopenharmony_ci} 4210bf215546Sopenharmony_ci 4211bf215546Sopenharmony_cistatic void 4212bf215546Sopenharmony_cipvr_calculate_vertex_cam_size(const struct pvr_device_info *dev_info, 4213bf215546Sopenharmony_ci const uint32_t vs_output_size, 4214bf215546Sopenharmony_ci const bool raster_enable, 4215bf215546Sopenharmony_ci uint32_t *const cam_size_out, 4216bf215546Sopenharmony_ci uint32_t *const vs_max_instances_out) 4217bf215546Sopenharmony_ci{ 4218bf215546Sopenharmony_ci /* First work out the size of a vertex in the UVS and multiply by 4 for 4219bf215546Sopenharmony_ci * column ordering. 4220bf215546Sopenharmony_ci */ 4221bf215546Sopenharmony_ci const uint32_t uvs_vertex_vector_size_in_dwords = 4222bf215546Sopenharmony_ci (vs_output_size + 1U + raster_enable * 4U) * 4U; 4223bf215546Sopenharmony_ci const uint32_t vdm_cam_size = 4224bf215546Sopenharmony_ci PVR_GET_FEATURE_VALUE(dev_info, vdm_cam_size, 32U); 4225bf215546Sopenharmony_ci 4226bf215546Sopenharmony_ci /* This is a proxy for 8XE. */ 4227bf215546Sopenharmony_ci if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) && 4228bf215546Sopenharmony_ci vdm_cam_size < 96U) { 4229bf215546Sopenharmony_ci /* Comparisons are based on size including scratch per vertex vector. */ 4230bf215546Sopenharmony_ci if (uvs_vertex_vector_size_in_dwords < (14U * 4U)) { 4231bf215546Sopenharmony_ci *cam_size_out = MIN2(31U, vdm_cam_size - 1U); 4232bf215546Sopenharmony_ci *vs_max_instances_out = 16U; 4233bf215546Sopenharmony_ci } else if (uvs_vertex_vector_size_in_dwords < (20U * 4U)) { 4234bf215546Sopenharmony_ci *cam_size_out = 15U; 4235bf215546Sopenharmony_ci *vs_max_instances_out = 16U; 4236bf215546Sopenharmony_ci } else if (uvs_vertex_vector_size_in_dwords < (28U * 4U)) { 4237bf215546Sopenharmony_ci *cam_size_out = 11U; 4238bf215546Sopenharmony_ci *vs_max_instances_out = 12U; 4239bf215546Sopenharmony_ci } else if (uvs_vertex_vector_size_in_dwords < (44U * 4U)) { 4240bf215546Sopenharmony_ci *cam_size_out = 7U; 4241bf215546Sopenharmony_ci *vs_max_instances_out = 8U; 4242bf215546Sopenharmony_ci } else if (PVR_HAS_FEATURE(dev_info, 4243bf215546Sopenharmony_ci simple_internal_parameter_format_v2) || 4244bf215546Sopenharmony_ci uvs_vertex_vector_size_in_dwords < (64U * 4U)) { 4245bf215546Sopenharmony_ci *cam_size_out = 7U; 4246bf215546Sopenharmony_ci *vs_max_instances_out = 4U; 4247bf215546Sopenharmony_ci } else { 4248bf215546Sopenharmony_ci *cam_size_out = 3U; 4249bf215546Sopenharmony_ci *vs_max_instances_out = 2U; 4250bf215546Sopenharmony_ci } 4251bf215546Sopenharmony_ci } else { 4252bf215546Sopenharmony_ci /* Comparisons are based on size including scratch per vertex vector. */ 4253bf215546Sopenharmony_ci if (uvs_vertex_vector_size_in_dwords <= (32U * 4U)) { 4254bf215546Sopenharmony_ci /* output size <= 27 + 5 scratch. */ 4255bf215546Sopenharmony_ci *cam_size_out = MIN2(95U, vdm_cam_size - 1U); 4256bf215546Sopenharmony_ci *vs_max_instances_out = 0U; 4257bf215546Sopenharmony_ci } else if (uvs_vertex_vector_size_in_dwords <= 48U * 4U) { 4258bf215546Sopenharmony_ci /* output size <= 43 + 5 scratch */ 4259bf215546Sopenharmony_ci *cam_size_out = 63U; 4260bf215546Sopenharmony_ci if (PVR_GET_FEATURE_VALUE(dev_info, uvs_vtx_entries, 144U) < 288U) 4261bf215546Sopenharmony_ci *vs_max_instances_out = 16U; 4262bf215546Sopenharmony_ci else 4263bf215546Sopenharmony_ci *vs_max_instances_out = 0U; 4264bf215546Sopenharmony_ci } else if (uvs_vertex_vector_size_in_dwords <= 64U * 4U) { 4265bf215546Sopenharmony_ci /* output size <= 59 + 5 scratch. */ 4266bf215546Sopenharmony_ci *cam_size_out = 31U; 4267bf215546Sopenharmony_ci if (PVR_GET_FEATURE_VALUE(dev_info, uvs_vtx_entries, 144U) < 288U) 4268bf215546Sopenharmony_ci *vs_max_instances_out = 16U; 4269bf215546Sopenharmony_ci else 4270bf215546Sopenharmony_ci *vs_max_instances_out = 0U; 4271bf215546Sopenharmony_ci } else { 4272bf215546Sopenharmony_ci *cam_size_out = 15U; 4273bf215546Sopenharmony_ci *vs_max_instances_out = 16U; 4274bf215546Sopenharmony_ci } 4275bf215546Sopenharmony_ci } 4276bf215546Sopenharmony_ci} 4277bf215546Sopenharmony_ci 4278bf215546Sopenharmony_cistatic void 4279bf215546Sopenharmony_cipvr_emit_dirty_vdm_state(const struct pvr_cmd_buffer *const cmd_buffer, 4280bf215546Sopenharmony_ci struct pvr_sub_cmd_gfx *const sub_cmd) 4281bf215546Sopenharmony_ci{ 4282bf215546Sopenharmony_ci /* FIXME: Assume all state is dirty for the moment. */ 4283bf215546Sopenharmony_ci struct pvr_device_info *const dev_info = 4284bf215546Sopenharmony_ci &cmd_buffer->device->pdevice->dev_info; 4285bf215546Sopenharmony_ci ASSERTED const uint32_t max_user_vertex_output_components = 4286bf215546Sopenharmony_ci pvr_get_max_user_vertex_output_components(dev_info); 4287bf215546Sopenharmony_ci struct PVRX(VDMCTRL_VDM_STATE0) 4288bf215546Sopenharmony_ci header = { pvr_cmd_header(VDMCTRL_VDM_STATE0) }; 4289bf215546Sopenharmony_ci const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 4290bf215546Sopenharmony_ci const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline; 4291bf215546Sopenharmony_ci struct pvr_csb *const csb = &sub_cmd->control_stream; 4292bf215546Sopenharmony_ci uint32_t vs_output_size; 4293bf215546Sopenharmony_ci uint32_t max_instances; 4294bf215546Sopenharmony_ci uint32_t cam_size; 4295bf215546Sopenharmony_ci 4296bf215546Sopenharmony_ci assert(gfx_pipeline); 4297bf215546Sopenharmony_ci 4298bf215546Sopenharmony_ci /* CAM Calculations and HW state take vertex size aligned to DWORDS. */ 4299bf215546Sopenharmony_ci vs_output_size = 4300bf215546Sopenharmony_ci DIV_ROUND_UP(gfx_pipeline->vertex_shader_state.vertex_output_size, 4301bf215546Sopenharmony_ci PVRX(VDMCTRL_VDM_STATE4_VS_OUTPUT_SIZE_UNIT_SIZE)); 4302bf215546Sopenharmony_ci 4303bf215546Sopenharmony_ci assert(vs_output_size <= max_user_vertex_output_components); 4304bf215546Sopenharmony_ci 4305bf215546Sopenharmony_ci pvr_calculate_vertex_cam_size(dev_info, 4306bf215546Sopenharmony_ci vs_output_size, 4307bf215546Sopenharmony_ci true, 4308bf215546Sopenharmony_ci &cam_size, 4309bf215546Sopenharmony_ci &max_instances); 4310bf215546Sopenharmony_ci 4311bf215546Sopenharmony_ci pvr_csb_emit (csb, VDMCTRL_VDM_STATE0, state0) { 4312bf215546Sopenharmony_ci state0.cam_size = cam_size; 4313bf215546Sopenharmony_ci 4314bf215546Sopenharmony_ci if (gfx_pipeline->input_asm_state.primitive_restart) { 4315bf215546Sopenharmony_ci state0.cut_index_enable = true; 4316bf215546Sopenharmony_ci state0.cut_index_present = true; 4317bf215546Sopenharmony_ci } 4318bf215546Sopenharmony_ci 4319bf215546Sopenharmony_ci switch (gfx_pipeline->input_asm_state.topology) { 4320bf215546Sopenharmony_ci case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: 4321bf215546Sopenharmony_ci state0.flatshade_control = PVRX(VDMCTRL_FLATSHADE_CONTROL_VERTEX_1); 4322bf215546Sopenharmony_ci break; 4323bf215546Sopenharmony_ci 4324bf215546Sopenharmony_ci default: 4325bf215546Sopenharmony_ci state0.flatshade_control = PVRX(VDMCTRL_FLATSHADE_CONTROL_VERTEX_0); 4326bf215546Sopenharmony_ci break; 4327bf215546Sopenharmony_ci } 4328bf215546Sopenharmony_ci 4329bf215546Sopenharmony_ci /* If we've bound a different vertex buffer, or this draw-call requires 4330bf215546Sopenharmony_ci * a different PDS attrib data-section from the last draw call (changed 4331bf215546Sopenharmony_ci * base_instance) then we need to specify a new data section. This is 4332bf215546Sopenharmony_ci * also the case if we've switched pipeline or attrib program as the 4333bf215546Sopenharmony_ci * data-section layout will be different. 4334bf215546Sopenharmony_ci */ 4335bf215546Sopenharmony_ci state0.vs_data_addr_present = 4336bf215546Sopenharmony_ci state->dirty.gfx_pipeline_binding || state->dirty.vertex_bindings || 4337bf215546Sopenharmony_ci state->dirty.draw_base_instance || state->dirty.draw_variant; 4338bf215546Sopenharmony_ci 4339bf215546Sopenharmony_ci /* Need to specify new PDS Attrib program if we've bound a different 4340bf215546Sopenharmony_ci * pipeline or we needed a different PDS Attrib variant for this 4341bf215546Sopenharmony_ci * draw-call. 4342bf215546Sopenharmony_ci */ 4343bf215546Sopenharmony_ci state0.vs_other_present = state->dirty.gfx_pipeline_binding || 4344bf215546Sopenharmony_ci state->dirty.draw_variant; 4345bf215546Sopenharmony_ci 4346bf215546Sopenharmony_ci /* UVB_SCRATCH_SELECT_ONE with no rasterization is only valid when 4347bf215546Sopenharmony_ci * stream output is enabled. We use UVB_SCRATCH_SELECT_FIVE because 4348bf215546Sopenharmony_ci * Vulkan doesn't support stream output and the vertex position is 4349bf215546Sopenharmony_ci * always emitted to the UVB. 4350bf215546Sopenharmony_ci */ 4351bf215546Sopenharmony_ci state0.uvs_scratch_size_select = 4352bf215546Sopenharmony_ci PVRX(VDMCTRL_UVS_SCRATCH_SIZE_SELECT_FIVE); 4353bf215546Sopenharmony_ci 4354bf215546Sopenharmony_ci header = state0; 4355bf215546Sopenharmony_ci } 4356bf215546Sopenharmony_ci 4357bf215546Sopenharmony_ci if (header.cut_index_present) { 4358bf215546Sopenharmony_ci pvr_csb_emit (csb, VDMCTRL_VDM_STATE1, state1) { 4359bf215546Sopenharmony_ci switch (state->index_buffer_binding.type) { 4360bf215546Sopenharmony_ci case VK_INDEX_TYPE_UINT32: 4361bf215546Sopenharmony_ci /* FIXME: Defines for these? These seem to come from the Vulkan 4362bf215546Sopenharmony_ci * spec. for VkPipelineInputAssemblyStateCreateInfo 4363bf215546Sopenharmony_ci * primitiveRestartEnable. 4364bf215546Sopenharmony_ci */ 4365bf215546Sopenharmony_ci state1.cut_index = 0xFFFFFFFF; 4366bf215546Sopenharmony_ci break; 4367bf215546Sopenharmony_ci 4368bf215546Sopenharmony_ci case VK_INDEX_TYPE_UINT16: 4369bf215546Sopenharmony_ci state1.cut_index = 0xFFFF; 4370bf215546Sopenharmony_ci break; 4371bf215546Sopenharmony_ci 4372bf215546Sopenharmony_ci default: 4373bf215546Sopenharmony_ci unreachable(!"Invalid index type"); 4374bf215546Sopenharmony_ci } 4375bf215546Sopenharmony_ci } 4376bf215546Sopenharmony_ci } 4377bf215546Sopenharmony_ci 4378bf215546Sopenharmony_ci if (header.vs_data_addr_present) { 4379bf215546Sopenharmony_ci pvr_csb_emit (csb, VDMCTRL_VDM_STATE2, state2) { 4380bf215546Sopenharmony_ci state2.vs_pds_data_base_addr = 4381bf215546Sopenharmony_ci PVR_DEV_ADDR(state->pds_vertex_attrib_offset); 4382bf215546Sopenharmony_ci } 4383bf215546Sopenharmony_ci } 4384bf215546Sopenharmony_ci 4385bf215546Sopenharmony_ci if (header.vs_other_present) { 4386bf215546Sopenharmony_ci const uint32_t usc_unified_store_size_in_bytes = 4387bf215546Sopenharmony_ci gfx_pipeline->vertex_shader_state.vertex_input_size << 2; 4388bf215546Sopenharmony_ci 4389bf215546Sopenharmony_ci pvr_csb_emit (csb, VDMCTRL_VDM_STATE3, state3) { 4390bf215546Sopenharmony_ci state3.vs_pds_code_base_addr = 4391bf215546Sopenharmony_ci PVR_DEV_ADDR(state->pds_shader.code_offset); 4392bf215546Sopenharmony_ci } 4393bf215546Sopenharmony_ci 4394bf215546Sopenharmony_ci pvr_csb_emit (csb, VDMCTRL_VDM_STATE4, state4) { 4395bf215546Sopenharmony_ci state4.vs_output_size = vs_output_size; 4396bf215546Sopenharmony_ci } 4397bf215546Sopenharmony_ci 4398bf215546Sopenharmony_ci pvr_csb_emit (csb, VDMCTRL_VDM_STATE5, state5) { 4399bf215546Sopenharmony_ci state5.vs_max_instances = max_instances; 4400bf215546Sopenharmony_ci state5.vs_usc_common_size = 0U; 4401bf215546Sopenharmony_ci state5.vs_usc_unified_size = DIV_ROUND_UP( 4402bf215546Sopenharmony_ci usc_unified_store_size_in_bytes, 4403bf215546Sopenharmony_ci PVRX(VDMCTRL_VDM_STATE5_VS_USC_UNIFIED_SIZE_UNIT_SIZE)); 4404bf215546Sopenharmony_ci state5.vs_pds_temp_size = 4405bf215546Sopenharmony_ci DIV_ROUND_UP(state->pds_shader.info->temps_required << 2, 4406bf215546Sopenharmony_ci PVRX(VDMCTRL_VDM_STATE5_VS_PDS_TEMP_SIZE_UNIT_SIZE)); 4407bf215546Sopenharmony_ci state5.vs_pds_data_size = 4408bf215546Sopenharmony_ci DIV_ROUND_UP(state->pds_shader.info->data_size_in_dwords << 2, 4409bf215546Sopenharmony_ci PVRX(VDMCTRL_VDM_STATE5_VS_PDS_DATA_SIZE_UNIT_SIZE)); 4410bf215546Sopenharmony_ci } 4411bf215546Sopenharmony_ci } 4412bf215546Sopenharmony_ci} 4413bf215546Sopenharmony_ci 4414bf215546Sopenharmony_cistatic VkResult pvr_validate_draw_state(struct pvr_cmd_buffer *cmd_buffer) 4415bf215546Sopenharmony_ci{ 4416bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 4417bf215546Sopenharmony_ci const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline; 4418bf215546Sopenharmony_ci const struct pvr_pipeline_stage_state *const fragment_state = 4419bf215546Sopenharmony_ci &gfx_pipeline->fragment_shader_state.stage_state; 4420bf215546Sopenharmony_ci struct pvr_sub_cmd_gfx *sub_cmd; 4421bf215546Sopenharmony_ci bool fstencil_writemask_zero; 4422bf215546Sopenharmony_ci bool bstencil_writemask_zero; 4423bf215546Sopenharmony_ci bool push_descriptors_dirty; 4424bf215546Sopenharmony_ci bool fstencil_keep; 4425bf215546Sopenharmony_ci bool bstencil_keep; 4426bf215546Sopenharmony_ci VkResult result; 4427bf215546Sopenharmony_ci 4428bf215546Sopenharmony_ci pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS); 4429bf215546Sopenharmony_ci 4430bf215546Sopenharmony_ci sub_cmd = &state->current_sub_cmd->gfx; 4431bf215546Sopenharmony_ci sub_cmd->empty_cmd = false; 4432bf215546Sopenharmony_ci 4433bf215546Sopenharmony_ci /* Determine pipeline depth/stencil usage. If a pipeline uses depth or 4434bf215546Sopenharmony_ci * stencil testing, those attachments are using their loaded values, and 4435bf215546Sopenharmony_ci * the loadOps cannot be optimized out. 4436bf215546Sopenharmony_ci */ 4437bf215546Sopenharmony_ci /* Pipeline uses depth testing. */ 4438bf215546Sopenharmony_ci if (sub_cmd->depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED && 4439bf215546Sopenharmony_ci gfx_pipeline->depth_compare_op != VK_COMPARE_OP_ALWAYS) { 4440bf215546Sopenharmony_ci sub_cmd->depth_usage = PVR_DEPTH_STENCIL_USAGE_NEEDED; 4441bf215546Sopenharmony_ci } 4442bf215546Sopenharmony_ci 4443bf215546Sopenharmony_ci /* Pipeline uses stencil testing. */ 4444bf215546Sopenharmony_ci if (sub_cmd->stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED && 4445bf215546Sopenharmony_ci (gfx_pipeline->stencil_front.compare_op != VK_COMPARE_OP_ALWAYS || 4446bf215546Sopenharmony_ci gfx_pipeline->stencil_back.compare_op != VK_COMPARE_OP_ALWAYS)) { 4447bf215546Sopenharmony_ci sub_cmd->stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEEDED; 4448bf215546Sopenharmony_ci } 4449bf215546Sopenharmony_ci 4450bf215546Sopenharmony_ci if (PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info, 4451bf215546Sopenharmony_ci compute_overlap)) { 4452bf215546Sopenharmony_ci uint32_t coefficient_size = 4453bf215546Sopenharmony_ci DIV_ROUND_UP(fragment_state->coefficient_size, 4454bf215546Sopenharmony_ci PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE)); 4455bf215546Sopenharmony_ci 4456bf215546Sopenharmony_ci if (coefficient_size > 4457bf215546Sopenharmony_ci PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_MAX_SIZE)) 4458bf215546Sopenharmony_ci sub_cmd->disable_compute_overlap = true; 4459bf215546Sopenharmony_ci } 4460bf215546Sopenharmony_ci 4461bf215546Sopenharmony_ci sub_cmd->frag_uses_atomic_ops |= fragment_state->uses_atomic_ops; 4462bf215546Sopenharmony_ci sub_cmd->frag_has_side_effects |= fragment_state->has_side_effects; 4463bf215546Sopenharmony_ci sub_cmd->frag_uses_texture_rw |= fragment_state->uses_texture_rw; 4464bf215546Sopenharmony_ci sub_cmd->vertex_uses_texture_rw |= 4465bf215546Sopenharmony_ci gfx_pipeline->vertex_shader_state.stage_state.uses_texture_rw; 4466bf215546Sopenharmony_ci 4467bf215546Sopenharmony_ci fstencil_keep = 4468bf215546Sopenharmony_ci (gfx_pipeline->stencil_front.fail_op == VK_STENCIL_OP_KEEP) && 4469bf215546Sopenharmony_ci (gfx_pipeline->stencil_front.pass_op == VK_STENCIL_OP_KEEP); 4470bf215546Sopenharmony_ci bstencil_keep = (gfx_pipeline->stencil_back.fail_op == VK_STENCIL_OP_KEEP) && 4471bf215546Sopenharmony_ci (gfx_pipeline->stencil_back.pass_op == VK_STENCIL_OP_KEEP); 4472bf215546Sopenharmony_ci fstencil_writemask_zero = (state->dynamic.common.write_mask.front == 0); 4473bf215546Sopenharmony_ci bstencil_writemask_zero = (state->dynamic.common.write_mask.back == 0); 4474bf215546Sopenharmony_ci 4475bf215546Sopenharmony_ci /* Set stencil modified flag if: 4476bf215546Sopenharmony_ci * - Neither front nor back-facing stencil has a fail_op/pass_op of KEEP. 4477bf215546Sopenharmony_ci * - Neither front nor back-facing stencil has a write_mask of zero. 4478bf215546Sopenharmony_ci */ 4479bf215546Sopenharmony_ci if (!(fstencil_keep && bstencil_keep) && 4480bf215546Sopenharmony_ci !(fstencil_writemask_zero && bstencil_writemask_zero)) { 4481bf215546Sopenharmony_ci sub_cmd->modifies_stencil = true; 4482bf215546Sopenharmony_ci } 4483bf215546Sopenharmony_ci 4484bf215546Sopenharmony_ci /* Set depth modified flag if depth write is enabled. */ 4485bf215546Sopenharmony_ci if (!gfx_pipeline->depth_write_disable) 4486bf215546Sopenharmony_ci sub_cmd->modifies_depth = true; 4487bf215546Sopenharmony_ci 4488bf215546Sopenharmony_ci /* If either the data or code changes for pds vertex attribs, regenerate the 4489bf215546Sopenharmony_ci * data segment. 4490bf215546Sopenharmony_ci */ 4491bf215546Sopenharmony_ci if (state->dirty.vertex_bindings || state->dirty.gfx_pipeline_binding || 4492bf215546Sopenharmony_ci state->dirty.draw_variant || state->dirty.draw_base_instance) { 4493bf215546Sopenharmony_ci enum pvr_pds_vertex_attrib_program_type prog_type; 4494bf215546Sopenharmony_ci const struct pvr_pds_attrib_program *program; 4495bf215546Sopenharmony_ci 4496bf215546Sopenharmony_ci if (state->draw_state.draw_indirect) 4497bf215546Sopenharmony_ci prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_DRAW_INDIRECT; 4498bf215546Sopenharmony_ci else if (state->draw_state.base_instance) 4499bf215546Sopenharmony_ci prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASE_INSTANCE; 4500bf215546Sopenharmony_ci else 4501bf215546Sopenharmony_ci prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASIC; 4502bf215546Sopenharmony_ci 4503bf215546Sopenharmony_ci program = 4504bf215546Sopenharmony_ci &gfx_pipeline->vertex_shader_state.pds_attrib_programs[prog_type]; 4505bf215546Sopenharmony_ci state->pds_shader.info = &program->info; 4506bf215546Sopenharmony_ci state->pds_shader.code_offset = program->program.code_offset; 4507bf215546Sopenharmony_ci 4508bf215546Sopenharmony_ci state->max_shared_regs = 4509bf215546Sopenharmony_ci MAX2(state->max_shared_regs, pvr_calc_shared_regs_count(gfx_pipeline)); 4510bf215546Sopenharmony_ci 4511bf215546Sopenharmony_ci pvr_setup_vertex_buffers(cmd_buffer, gfx_pipeline); 4512bf215546Sopenharmony_ci } 4513bf215546Sopenharmony_ci 4514bf215546Sopenharmony_ci /* TODO: Check for dirty push constants */ 4515bf215546Sopenharmony_ci 4516bf215546Sopenharmony_ci pvr_validate_push_descriptors(cmd_buffer, &push_descriptors_dirty); 4517bf215546Sopenharmony_ci 4518bf215546Sopenharmony_ci state->dirty.vertex_descriptors = push_descriptors_dirty || 4519bf215546Sopenharmony_ci state->dirty.gfx_pipeline_binding; 4520bf215546Sopenharmony_ci state->dirty.fragment_descriptors = state->dirty.vertex_descriptors; 4521bf215546Sopenharmony_ci 4522bf215546Sopenharmony_ci if (state->dirty.fragment_descriptors) { 4523bf215546Sopenharmony_ci result = pvr_setup_descriptor_mappings( 4524bf215546Sopenharmony_ci cmd_buffer, 4525bf215546Sopenharmony_ci PVR_STAGE_ALLOCATION_FRAGMENT, 4526bf215546Sopenharmony_ci &state->gfx_pipeline->fragment_shader_state.descriptor_state, 4527bf215546Sopenharmony_ci NULL, 4528bf215546Sopenharmony_ci &state->pds_fragment_descriptor_data_offset); 4529bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 4530bf215546Sopenharmony_ci mesa_loge("Could not setup fragment descriptor mappings."); 4531bf215546Sopenharmony_ci return result; 4532bf215546Sopenharmony_ci } 4533bf215546Sopenharmony_ci } 4534bf215546Sopenharmony_ci 4535bf215546Sopenharmony_ci if (state->dirty.vertex_descriptors) { 4536bf215546Sopenharmony_ci uint32_t pds_vertex_descriptor_data_offset; 4537bf215546Sopenharmony_ci 4538bf215546Sopenharmony_ci result = pvr_setup_descriptor_mappings( 4539bf215546Sopenharmony_ci cmd_buffer, 4540bf215546Sopenharmony_ci PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY, 4541bf215546Sopenharmony_ci &state->gfx_pipeline->vertex_shader_state.descriptor_state, 4542bf215546Sopenharmony_ci NULL, 4543bf215546Sopenharmony_ci &pds_vertex_descriptor_data_offset); 4544bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 4545bf215546Sopenharmony_ci mesa_loge("Could not setup vertex descriptor mappings."); 4546bf215546Sopenharmony_ci return result; 4547bf215546Sopenharmony_ci } 4548bf215546Sopenharmony_ci 4549bf215546Sopenharmony_ci pvr_emit_dirty_pds_state(cmd_buffer, 4550bf215546Sopenharmony_ci sub_cmd, 4551bf215546Sopenharmony_ci pds_vertex_descriptor_data_offset); 4552bf215546Sopenharmony_ci } 4553bf215546Sopenharmony_ci 4554bf215546Sopenharmony_ci pvr_emit_dirty_ppp_state(cmd_buffer, sub_cmd); 4555bf215546Sopenharmony_ci pvr_emit_dirty_vdm_state(cmd_buffer, sub_cmd); 4556bf215546Sopenharmony_ci 4557bf215546Sopenharmony_ci state->dirty.gfx_desc_dirty = false; 4558bf215546Sopenharmony_ci state->dirty.blend_constants = false; 4559bf215546Sopenharmony_ci state->dirty.compare_mask = false; 4560bf215546Sopenharmony_ci state->dirty.depth_bias = false; 4561bf215546Sopenharmony_ci state->dirty.draw_base_instance = false; 4562bf215546Sopenharmony_ci state->dirty.draw_variant = false; 4563bf215546Sopenharmony_ci state->dirty.fragment_descriptors = false; 4564bf215546Sopenharmony_ci state->dirty.line_width = false; 4565bf215546Sopenharmony_ci state->dirty.gfx_pipeline_binding = false; 4566bf215546Sopenharmony_ci state->dirty.reference = false; 4567bf215546Sopenharmony_ci state->dirty.scissor = false; 4568bf215546Sopenharmony_ci state->dirty.userpass_spawn = false; 4569bf215546Sopenharmony_ci state->dirty.vertex_bindings = false; 4570bf215546Sopenharmony_ci state->dirty.viewport = false; 4571bf215546Sopenharmony_ci state->dirty.write_mask = false; 4572bf215546Sopenharmony_ci 4573bf215546Sopenharmony_ci return VK_SUCCESS; 4574bf215546Sopenharmony_ci} 4575bf215546Sopenharmony_ci 4576bf215546Sopenharmony_cistatic uint32_t pvr_get_hw_primitive_topology(VkPrimitiveTopology topology) 4577bf215546Sopenharmony_ci{ 4578bf215546Sopenharmony_ci switch (topology) { 4579bf215546Sopenharmony_ci case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: 4580bf215546Sopenharmony_ci return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_POINT_LIST); 4581bf215546Sopenharmony_ci case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: 4582bf215546Sopenharmony_ci return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_LIST); 4583bf215546Sopenharmony_ci case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: 4584bf215546Sopenharmony_ci return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_STRIP); 4585bf215546Sopenharmony_ci case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: 4586bf215546Sopenharmony_ci return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_LIST); 4587bf215546Sopenharmony_ci case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: 4588bf215546Sopenharmony_ci return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_STRIP); 4589bf215546Sopenharmony_ci case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: 4590bf215546Sopenharmony_ci return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_FAN); 4591bf215546Sopenharmony_ci case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: 4592bf215546Sopenharmony_ci return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ); 4593bf215546Sopenharmony_ci case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: 4594bf215546Sopenharmony_ci return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ); 4595bf215546Sopenharmony_ci case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: 4596bf215546Sopenharmony_ci return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_LIST_ADJ); 4597bf215546Sopenharmony_ci case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: 4598bf215546Sopenharmony_ci return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_STRIP_ADJ); 4599bf215546Sopenharmony_ci case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: 4600bf215546Sopenharmony_ci return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_PATCH_LIST); 4601bf215546Sopenharmony_ci default: 4602bf215546Sopenharmony_ci unreachable("Undefined primitive topology"); 4603bf215546Sopenharmony_ci } 4604bf215546Sopenharmony_ci} 4605bf215546Sopenharmony_ci 4606bf215546Sopenharmony_cistatic void pvr_emit_vdm_index_list(struct pvr_cmd_buffer *cmd_buffer, 4607bf215546Sopenharmony_ci struct pvr_sub_cmd_gfx *const sub_cmd, 4608bf215546Sopenharmony_ci VkPrimitiveTopology topology, 4609bf215546Sopenharmony_ci uint32_t first_vertex, 4610bf215546Sopenharmony_ci uint32_t vertex_count, 4611bf215546Sopenharmony_ci uint32_t first_index, 4612bf215546Sopenharmony_ci uint32_t index_count, 4613bf215546Sopenharmony_ci uint32_t instance_count) 4614bf215546Sopenharmony_ci{ 4615bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *state = &cmd_buffer->state; 4616bf215546Sopenharmony_ci struct pvr_csb *const csb = &sub_cmd->control_stream; 4617bf215546Sopenharmony_ci struct PVRX(VDMCTRL_INDEX_LIST0) 4618bf215546Sopenharmony_ci list_hdr = { pvr_cmd_header(VDMCTRL_INDEX_LIST0) }; 4619bf215546Sopenharmony_ci pvr_dev_addr_t index_buffer_addr = PVR_DEV_ADDR_INVALID; 4620bf215546Sopenharmony_ci unsigned int index_stride = 0; 4621bf215546Sopenharmony_ci 4622bf215546Sopenharmony_ci pvr_csb_emit (csb, VDMCTRL_INDEX_LIST0, list0) { 4623bf215546Sopenharmony_ci const bool vertex_shader_has_side_effects = 4624bf215546Sopenharmony_ci cmd_buffer->state.gfx_pipeline->vertex_shader_state.stage_state 4625bf215546Sopenharmony_ci .has_side_effects; 4626bf215546Sopenharmony_ci 4627bf215546Sopenharmony_ci list0.primitive_topology = pvr_get_hw_primitive_topology(topology); 4628bf215546Sopenharmony_ci 4629bf215546Sopenharmony_ci /* First instance is not handled in the VDM state, it's implemented as 4630bf215546Sopenharmony_ci * an addition in the PDS vertex fetch. 4631bf215546Sopenharmony_ci */ 4632bf215546Sopenharmony_ci list0.index_count_present = true; 4633bf215546Sopenharmony_ci 4634bf215546Sopenharmony_ci if (instance_count > 1) 4635bf215546Sopenharmony_ci list0.index_instance_count_present = true; 4636bf215546Sopenharmony_ci 4637bf215546Sopenharmony_ci if (first_vertex != 0) 4638bf215546Sopenharmony_ci list0.index_offset_present = true; 4639bf215546Sopenharmony_ci 4640bf215546Sopenharmony_ci if (state->draw_state.draw_indexed) { 4641bf215546Sopenharmony_ci struct pvr_buffer *buffer = state->index_buffer_binding.buffer; 4642bf215546Sopenharmony_ci 4643bf215546Sopenharmony_ci switch (state->index_buffer_binding.type) { 4644bf215546Sopenharmony_ci case VK_INDEX_TYPE_UINT32: 4645bf215546Sopenharmony_ci list0.index_size = PVRX(VDMCTRL_INDEX_SIZE_B32); 4646bf215546Sopenharmony_ci index_stride = 4; 4647bf215546Sopenharmony_ci break; 4648bf215546Sopenharmony_ci 4649bf215546Sopenharmony_ci case VK_INDEX_TYPE_UINT16: 4650bf215546Sopenharmony_ci list0.index_size = PVRX(VDMCTRL_INDEX_SIZE_B16); 4651bf215546Sopenharmony_ci index_stride = 2; 4652bf215546Sopenharmony_ci break; 4653bf215546Sopenharmony_ci 4654bf215546Sopenharmony_ci default: 4655bf215546Sopenharmony_ci unreachable("Invalid index type"); 4656bf215546Sopenharmony_ci } 4657bf215546Sopenharmony_ci 4658bf215546Sopenharmony_ci list0.index_addr_present = true; 4659bf215546Sopenharmony_ci index_buffer_addr = PVR_DEV_ADDR_OFFSET( 4660bf215546Sopenharmony_ci buffer->dev_addr, 4661bf215546Sopenharmony_ci state->index_buffer_binding.offset + first_index * index_stride); 4662bf215546Sopenharmony_ci list0.index_base_addrmsb = index_buffer_addr; 4663bf215546Sopenharmony_ci } 4664bf215546Sopenharmony_ci 4665bf215546Sopenharmony_ci list0.degen_cull_enable = 4666bf215546Sopenharmony_ci PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info, 4667bf215546Sopenharmony_ci vdm_degenerate_culling) && 4668bf215546Sopenharmony_ci !vertex_shader_has_side_effects; 4669bf215546Sopenharmony_ci 4670bf215546Sopenharmony_ci list_hdr = list0; 4671bf215546Sopenharmony_ci } 4672bf215546Sopenharmony_ci 4673bf215546Sopenharmony_ci if (list_hdr.index_addr_present) { 4674bf215546Sopenharmony_ci pvr_csb_emit (csb, VDMCTRL_INDEX_LIST1, list1) { 4675bf215546Sopenharmony_ci list1.index_base_addrlsb = index_buffer_addr; 4676bf215546Sopenharmony_ci } 4677bf215546Sopenharmony_ci } 4678bf215546Sopenharmony_ci 4679bf215546Sopenharmony_ci if (list_hdr.index_count_present) { 4680bf215546Sopenharmony_ci pvr_csb_emit (csb, VDMCTRL_INDEX_LIST2, list2) { 4681bf215546Sopenharmony_ci list2.index_count = vertex_count | index_count; 4682bf215546Sopenharmony_ci } 4683bf215546Sopenharmony_ci } 4684bf215546Sopenharmony_ci 4685bf215546Sopenharmony_ci if (list_hdr.index_instance_count_present) { 4686bf215546Sopenharmony_ci pvr_csb_emit (csb, VDMCTRL_INDEX_LIST3, list3) { 4687bf215546Sopenharmony_ci list3.instance_count = instance_count - 1; 4688bf215546Sopenharmony_ci } 4689bf215546Sopenharmony_ci } 4690bf215546Sopenharmony_ci 4691bf215546Sopenharmony_ci if (list_hdr.index_offset_present) { 4692bf215546Sopenharmony_ci pvr_csb_emit (csb, VDMCTRL_INDEX_LIST4, list4) { 4693bf215546Sopenharmony_ci list4.index_offset = first_vertex; 4694bf215546Sopenharmony_ci } 4695bf215546Sopenharmony_ci } 4696bf215546Sopenharmony_ci 4697bf215546Sopenharmony_ci /* TODO: See if we need list_words[5-9]. */ 4698bf215546Sopenharmony_ci} 4699bf215546Sopenharmony_ci 4700bf215546Sopenharmony_civoid pvr_CmdDraw(VkCommandBuffer commandBuffer, 4701bf215546Sopenharmony_ci uint32_t vertexCount, 4702bf215546Sopenharmony_ci uint32_t instanceCount, 4703bf215546Sopenharmony_ci uint32_t firstVertex, 4704bf215546Sopenharmony_ci uint32_t firstInstance) 4705bf215546Sopenharmony_ci{ 4706bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 4707bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *state = &cmd_buffer->state; 4708bf215546Sopenharmony_ci struct pvr_cmd_buffer_draw_state draw_state; 4709bf215546Sopenharmony_ci VkResult result; 4710bf215546Sopenharmony_ci 4711bf215546Sopenharmony_ci PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); 4712bf215546Sopenharmony_ci 4713bf215546Sopenharmony_ci draw_state.base_vertex = firstVertex; 4714bf215546Sopenharmony_ci draw_state.base_instance = firstInstance; 4715bf215546Sopenharmony_ci draw_state.draw_indirect = false; 4716bf215546Sopenharmony_ci draw_state.draw_indexed = false; 4717bf215546Sopenharmony_ci pvr_update_draw_state(state, &draw_state); 4718bf215546Sopenharmony_ci 4719bf215546Sopenharmony_ci result = pvr_validate_draw_state(cmd_buffer); 4720bf215546Sopenharmony_ci if (result != VK_SUCCESS) 4721bf215546Sopenharmony_ci return; 4722bf215546Sopenharmony_ci 4723bf215546Sopenharmony_ci /* Write the VDM control stream for the primitive. */ 4724bf215546Sopenharmony_ci pvr_emit_vdm_index_list(cmd_buffer, 4725bf215546Sopenharmony_ci &state->current_sub_cmd->gfx, 4726bf215546Sopenharmony_ci state->gfx_pipeline->input_asm_state.topology, 4727bf215546Sopenharmony_ci firstVertex, 4728bf215546Sopenharmony_ci vertexCount, 4729bf215546Sopenharmony_ci 0U, 4730bf215546Sopenharmony_ci 0U, 4731bf215546Sopenharmony_ci instanceCount); 4732bf215546Sopenharmony_ci} 4733bf215546Sopenharmony_ci 4734bf215546Sopenharmony_civoid pvr_CmdDrawIndexed(VkCommandBuffer commandBuffer, 4735bf215546Sopenharmony_ci uint32_t indexCount, 4736bf215546Sopenharmony_ci uint32_t instanceCount, 4737bf215546Sopenharmony_ci uint32_t firstIndex, 4738bf215546Sopenharmony_ci int32_t vertexOffset, 4739bf215546Sopenharmony_ci uint32_t firstInstance) 4740bf215546Sopenharmony_ci{ 4741bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 4742bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *state = &cmd_buffer->state; 4743bf215546Sopenharmony_ci struct pvr_cmd_buffer_draw_state draw_state; 4744bf215546Sopenharmony_ci VkResult result; 4745bf215546Sopenharmony_ci 4746bf215546Sopenharmony_ci PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); 4747bf215546Sopenharmony_ci 4748bf215546Sopenharmony_ci draw_state.base_vertex = vertexOffset; 4749bf215546Sopenharmony_ci draw_state.base_instance = firstInstance; 4750bf215546Sopenharmony_ci draw_state.draw_indirect = false; 4751bf215546Sopenharmony_ci draw_state.draw_indexed = true; 4752bf215546Sopenharmony_ci pvr_update_draw_state(state, &draw_state); 4753bf215546Sopenharmony_ci 4754bf215546Sopenharmony_ci result = pvr_validate_draw_state(cmd_buffer); 4755bf215546Sopenharmony_ci if (result != VK_SUCCESS) 4756bf215546Sopenharmony_ci return; 4757bf215546Sopenharmony_ci 4758bf215546Sopenharmony_ci /* Write the VDM control stream for the primitive. */ 4759bf215546Sopenharmony_ci pvr_emit_vdm_index_list(cmd_buffer, 4760bf215546Sopenharmony_ci &state->current_sub_cmd->gfx, 4761bf215546Sopenharmony_ci state->gfx_pipeline->input_asm_state.topology, 4762bf215546Sopenharmony_ci vertexOffset, 4763bf215546Sopenharmony_ci 0, 4764bf215546Sopenharmony_ci firstIndex, 4765bf215546Sopenharmony_ci indexCount, 4766bf215546Sopenharmony_ci instanceCount); 4767bf215546Sopenharmony_ci} 4768bf215546Sopenharmony_ci 4769bf215546Sopenharmony_civoid pvr_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, 4770bf215546Sopenharmony_ci VkBuffer _buffer, 4771bf215546Sopenharmony_ci VkDeviceSize offset, 4772bf215546Sopenharmony_ci uint32_t drawCount, 4773bf215546Sopenharmony_ci uint32_t stride) 4774bf215546Sopenharmony_ci{ 4775bf215546Sopenharmony_ci assert(!"Unimplemented"); 4776bf215546Sopenharmony_ci} 4777bf215546Sopenharmony_ci 4778bf215546Sopenharmony_civoid pvr_CmdDrawIndirect(VkCommandBuffer commandBuffer, 4779bf215546Sopenharmony_ci VkBuffer _buffer, 4780bf215546Sopenharmony_ci VkDeviceSize offset, 4781bf215546Sopenharmony_ci uint32_t drawCount, 4782bf215546Sopenharmony_ci uint32_t stride) 4783bf215546Sopenharmony_ci{ 4784bf215546Sopenharmony_ci assert(!"Unimplemented"); 4785bf215546Sopenharmony_ci} 4786bf215546Sopenharmony_ci 4787bf215546Sopenharmony_cistatic VkResult 4788bf215546Sopenharmony_cipvr_resolve_unemitted_resolve_attachments(struct pvr_cmd_buffer *cmd_buffer) 4789bf215546Sopenharmony_ci{ 4790bf215546Sopenharmony_ci pvr_finishme("Add attachment resolve support!"); 4791bf215546Sopenharmony_ci return pvr_cmd_buffer_end_sub_cmd(cmd_buffer); 4792bf215546Sopenharmony_ci} 4793bf215546Sopenharmony_ci 4794bf215546Sopenharmony_civoid pvr_CmdEndRenderPass2(VkCommandBuffer commandBuffer, 4795bf215546Sopenharmony_ci const VkSubpassEndInfo *pSubpassEndInfo) 4796bf215546Sopenharmony_ci{ 4797bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 4798bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *state = &cmd_buffer->state; 4799bf215546Sopenharmony_ci struct pvr_image_view **attachments; 4800bf215546Sopenharmony_ci VkClearValue *clear_values; 4801bf215546Sopenharmony_ci VkResult result; 4802bf215546Sopenharmony_ci 4803bf215546Sopenharmony_ci PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); 4804bf215546Sopenharmony_ci 4805bf215546Sopenharmony_ci assert(state->render_pass_info.pass); 4806bf215546Sopenharmony_ci assert(state->render_pass_info.framebuffer); 4807bf215546Sopenharmony_ci 4808bf215546Sopenharmony_ci /* TODO: Investigate why pvr_cmd_buffer_end_sub_cmd/EndSubCommand is called 4809bf215546Sopenharmony_ci * twice in this path, one here and one from 4810bf215546Sopenharmony_ci * pvr_resolve_unemitted_resolve_attachments. 4811bf215546Sopenharmony_ci */ 4812bf215546Sopenharmony_ci result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer); 4813bf215546Sopenharmony_ci if (result != VK_SUCCESS) 4814bf215546Sopenharmony_ci return; 4815bf215546Sopenharmony_ci 4816bf215546Sopenharmony_ci result = pvr_resolve_unemitted_resolve_attachments(cmd_buffer); 4817bf215546Sopenharmony_ci if (result != VK_SUCCESS) 4818bf215546Sopenharmony_ci return; 4819bf215546Sopenharmony_ci 4820bf215546Sopenharmony_ci /* Save the required fields before clearing render_pass_info struct. */ 4821bf215546Sopenharmony_ci attachments = state->render_pass_info.attachments; 4822bf215546Sopenharmony_ci clear_values = state->render_pass_info.clear_values; 4823bf215546Sopenharmony_ci 4824bf215546Sopenharmony_ci memset(&state->render_pass_info, 0, sizeof(state->render_pass_info)); 4825bf215546Sopenharmony_ci 4826bf215546Sopenharmony_ci state->render_pass_info.attachments = attachments; 4827bf215546Sopenharmony_ci state->render_pass_info.clear_values = clear_values; 4828bf215546Sopenharmony_ci} 4829bf215546Sopenharmony_ci 4830bf215546Sopenharmony_civoid pvr_CmdExecuteCommands(VkCommandBuffer commandBuffer, 4831bf215546Sopenharmony_ci uint32_t commandBufferCount, 4832bf215546Sopenharmony_ci const VkCommandBuffer *pCommandBuffers) 4833bf215546Sopenharmony_ci{ 4834bf215546Sopenharmony_ci assert(!"Unimplemented"); 4835bf215546Sopenharmony_ci} 4836bf215546Sopenharmony_ci 4837bf215546Sopenharmony_civoid pvr_CmdNextSubpass2(VkCommandBuffer commandBuffer, 4838bf215546Sopenharmony_ci const VkSubpassBeginInfo *pSubpassBeginInfo, 4839bf215546Sopenharmony_ci const VkSubpassEndInfo *pSubpassEndInfo) 4840bf215546Sopenharmony_ci{ 4841bf215546Sopenharmony_ci assert(!"Unimplemented"); 4842bf215546Sopenharmony_ci} 4843bf215546Sopenharmony_ci 4844bf215546Sopenharmony_ci/* This is just enough to handle vkCmdPipelineBarrier(). 4845bf215546Sopenharmony_ci * TODO: Complete? 4846bf215546Sopenharmony_ci */ 4847bf215546Sopenharmony_civoid pvr_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, 4848bf215546Sopenharmony_ci const VkDependencyInfo *pDependencyInfo) 4849bf215546Sopenharmony_ci{ 4850bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 4851bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; 4852bf215546Sopenharmony_ci const struct pvr_render_pass *const render_pass = 4853bf215546Sopenharmony_ci state->render_pass_info.pass; 4854bf215546Sopenharmony_ci VkPipelineStageFlags vk_src_stage_mask = 0U; 4855bf215546Sopenharmony_ci VkPipelineStageFlags vk_dst_stage_mask = 0U; 4856bf215546Sopenharmony_ci uint32_t required_stage_mask = 0U; 4857bf215546Sopenharmony_ci uint32_t src_stage_mask; 4858bf215546Sopenharmony_ci uint32_t dst_stage_mask; 4859bf215546Sopenharmony_ci bool is_barrier_needed; 4860bf215546Sopenharmony_ci 4861bf215546Sopenharmony_ci PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); 4862bf215546Sopenharmony_ci 4863bf215546Sopenharmony_ci for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++) { 4864bf215546Sopenharmony_ci vk_src_stage_mask |= pDependencyInfo->pMemoryBarriers[i].srcStageMask; 4865bf215546Sopenharmony_ci vk_dst_stage_mask |= pDependencyInfo->pMemoryBarriers[i].dstStageMask; 4866bf215546Sopenharmony_ci } 4867bf215546Sopenharmony_ci 4868bf215546Sopenharmony_ci for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; i++) { 4869bf215546Sopenharmony_ci vk_src_stage_mask |= 4870bf215546Sopenharmony_ci pDependencyInfo->pBufferMemoryBarriers[i].srcStageMask; 4871bf215546Sopenharmony_ci vk_dst_stage_mask |= 4872bf215546Sopenharmony_ci pDependencyInfo->pBufferMemoryBarriers[i].dstStageMask; 4873bf215546Sopenharmony_ci } 4874bf215546Sopenharmony_ci 4875bf215546Sopenharmony_ci for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++) { 4876bf215546Sopenharmony_ci vk_src_stage_mask |= 4877bf215546Sopenharmony_ci pDependencyInfo->pImageMemoryBarriers[i].srcStageMask; 4878bf215546Sopenharmony_ci vk_dst_stage_mask |= 4879bf215546Sopenharmony_ci pDependencyInfo->pImageMemoryBarriers[i].dstStageMask; 4880bf215546Sopenharmony_ci } 4881bf215546Sopenharmony_ci 4882bf215546Sopenharmony_ci src_stage_mask = pvr_stage_mask_src(vk_src_stage_mask); 4883bf215546Sopenharmony_ci dst_stage_mask = pvr_stage_mask_dst(vk_dst_stage_mask); 4884bf215546Sopenharmony_ci 4885bf215546Sopenharmony_ci for (uint32_t stage = 0U; stage != PVR_NUM_SYNC_PIPELINE_STAGES; stage++) { 4886bf215546Sopenharmony_ci if (!(dst_stage_mask & BITFIELD_BIT(stage))) 4887bf215546Sopenharmony_ci continue; 4888bf215546Sopenharmony_ci 4889bf215546Sopenharmony_ci required_stage_mask |= state->barriers_needed[stage]; 4890bf215546Sopenharmony_ci } 4891bf215546Sopenharmony_ci 4892bf215546Sopenharmony_ci src_stage_mask &= required_stage_mask; 4893bf215546Sopenharmony_ci for (uint32_t stage = 0U; stage != PVR_NUM_SYNC_PIPELINE_STAGES; stage++) { 4894bf215546Sopenharmony_ci if (!(dst_stage_mask & BITFIELD_BIT(stage))) 4895bf215546Sopenharmony_ci continue; 4896bf215546Sopenharmony_ci 4897bf215546Sopenharmony_ci state->barriers_needed[stage] &= ~src_stage_mask; 4898bf215546Sopenharmony_ci } 4899bf215546Sopenharmony_ci 4900bf215546Sopenharmony_ci if (src_stage_mask == 0 || dst_stage_mask == 0) { 4901bf215546Sopenharmony_ci is_barrier_needed = false; 4902bf215546Sopenharmony_ci } else if (src_stage_mask == PVR_PIPELINE_STAGE_GEOM_BIT && 4903bf215546Sopenharmony_ci dst_stage_mask == PVR_PIPELINE_STAGE_FRAG_BIT) { 4904bf215546Sopenharmony_ci /* This is implicit so no need to barrier. */ 4905bf215546Sopenharmony_ci is_barrier_needed = false; 4906bf215546Sopenharmony_ci } else if (src_stage_mask == dst_stage_mask && 4907bf215546Sopenharmony_ci util_bitcount(src_stage_mask) == 1) { 4908bf215546Sopenharmony_ci switch (src_stage_mask) { 4909bf215546Sopenharmony_ci case PVR_PIPELINE_STAGE_FRAG_BIT: 4910bf215546Sopenharmony_ci pvr_finishme("Handle fragment stage pipeline barrier."); 4911bf215546Sopenharmony_ci is_barrier_needed = true; 4912bf215546Sopenharmony_ci break; 4913bf215546Sopenharmony_ci 4914bf215546Sopenharmony_ci case PVR_PIPELINE_STAGE_COMPUTE_BIT: { 4915bf215546Sopenharmony_ci struct pvr_sub_cmd *const current_sub_cmd = state->current_sub_cmd; 4916bf215546Sopenharmony_ci 4917bf215546Sopenharmony_ci is_barrier_needed = false; 4918bf215546Sopenharmony_ci 4919bf215546Sopenharmony_ci if (!current_sub_cmd || 4920bf215546Sopenharmony_ci current_sub_cmd->type != PVR_SUB_CMD_TYPE_COMPUTE) { 4921bf215546Sopenharmony_ci break; 4922bf215546Sopenharmony_ci } 4923bf215546Sopenharmony_ci 4924bf215546Sopenharmony_ci /* Multiple dispatches can be merged into a single job. When back to 4925bf215546Sopenharmony_ci * back dispatches have a sequential dependency (CDM -> CDM pipeline 4926bf215546Sopenharmony_ci * barrier) we need to do the following. 4927bf215546Sopenharmony_ci * - Dispatch a kernel which fences all previous memory writes and 4928bf215546Sopenharmony_ci * flushes the MADD cache. 4929bf215546Sopenharmony_ci * - Issue a CDM fence which ensures all previous tasks emitted by 4930bf215546Sopenharmony_ci * the CDM are completed before starting anything new. 4931bf215546Sopenharmony_ci */ 4932bf215546Sopenharmony_ci 4933bf215546Sopenharmony_ci /* Issue Data Fence, Wait for Data Fence (IDFWDF) makes the PDS wait 4934bf215546Sopenharmony_ci * for data. 4935bf215546Sopenharmony_ci */ 4936bf215546Sopenharmony_ci pvr_compute_generate_idfwdf(cmd_buffer, ¤t_sub_cmd->compute); 4937bf215546Sopenharmony_ci 4938bf215546Sopenharmony_ci pvr_compute_generate_fence(cmd_buffer, 4939bf215546Sopenharmony_ci ¤t_sub_cmd->compute, 4940bf215546Sopenharmony_ci false); 4941bf215546Sopenharmony_ci break; 4942bf215546Sopenharmony_ci } 4943bf215546Sopenharmony_ci 4944bf215546Sopenharmony_ci default: 4945bf215546Sopenharmony_ci is_barrier_needed = false; 4946bf215546Sopenharmony_ci break; 4947bf215546Sopenharmony_ci }; 4948bf215546Sopenharmony_ci } else { 4949bf215546Sopenharmony_ci is_barrier_needed = true; 4950bf215546Sopenharmony_ci } 4951bf215546Sopenharmony_ci 4952bf215546Sopenharmony_ci if (render_pass) { 4953bf215546Sopenharmony_ci pvr_finishme("Insert mid fragment stage barrier if needed."); 4954bf215546Sopenharmony_ci } else { 4955bf215546Sopenharmony_ci if (is_barrier_needed) 4956bf215546Sopenharmony_ci pvr_finishme("Insert barrier if needed."); 4957bf215546Sopenharmony_ci } 4958bf215546Sopenharmony_ci} 4959bf215546Sopenharmony_ci 4960bf215546Sopenharmony_civoid pvr_CmdResetEvent2KHR(VkCommandBuffer commandBuffer, 4961bf215546Sopenharmony_ci VkEvent _event, 4962bf215546Sopenharmony_ci VkPipelineStageFlags2 stageMask) 4963bf215546Sopenharmony_ci{ 4964bf215546Sopenharmony_ci assert(!"Unimplemented"); 4965bf215546Sopenharmony_ci} 4966bf215546Sopenharmony_ci 4967bf215546Sopenharmony_civoid pvr_CmdSetEvent2KHR(VkCommandBuffer commandBuffer, 4968bf215546Sopenharmony_ci VkEvent _event, 4969bf215546Sopenharmony_ci const VkDependencyInfo *pDependencyInfo) 4970bf215546Sopenharmony_ci{ 4971bf215546Sopenharmony_ci assert(!"Unimplemented"); 4972bf215546Sopenharmony_ci} 4973bf215546Sopenharmony_ci 4974bf215546Sopenharmony_civoid pvr_CmdWaitEvents2KHR(VkCommandBuffer commandBuffer, 4975bf215546Sopenharmony_ci uint32_t eventCount, 4976bf215546Sopenharmony_ci const VkEvent *pEvents, 4977bf215546Sopenharmony_ci const VkDependencyInfo *pDependencyInfos) 4978bf215546Sopenharmony_ci{ 4979bf215546Sopenharmony_ci assert(!"Unimplemented"); 4980bf215546Sopenharmony_ci} 4981bf215546Sopenharmony_ci 4982bf215546Sopenharmony_civoid pvr_CmdWriteTimestamp2KHR(VkCommandBuffer commandBuffer, 4983bf215546Sopenharmony_ci VkPipelineStageFlags2 stage, 4984bf215546Sopenharmony_ci VkQueryPool queryPool, 4985bf215546Sopenharmony_ci uint32_t query) 4986bf215546Sopenharmony_ci{ 4987bf215546Sopenharmony_ci unreachable("Timestamp queries are not supported."); 4988bf215546Sopenharmony_ci} 4989bf215546Sopenharmony_ci 4990bf215546Sopenharmony_ciVkResult pvr_EndCommandBuffer(VkCommandBuffer commandBuffer) 4991bf215546Sopenharmony_ci{ 4992bf215546Sopenharmony_ci PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); 4993bf215546Sopenharmony_ci struct pvr_cmd_buffer_state *state = &cmd_buffer->state; 4994bf215546Sopenharmony_ci VkResult result; 4995bf215546Sopenharmony_ci 4996bf215546Sopenharmony_ci /* From the Vulkan 1.0 spec: 4997bf215546Sopenharmony_ci * 4998bf215546Sopenharmony_ci * CommandBuffer must be in the recording state. 4999bf215546Sopenharmony_ci */ 5000bf215546Sopenharmony_ci assert(cmd_buffer->status == PVR_CMD_BUFFER_STATUS_RECORDING); 5001bf215546Sopenharmony_ci 5002bf215546Sopenharmony_ci if (state->status != VK_SUCCESS) 5003bf215546Sopenharmony_ci return state->status; 5004bf215546Sopenharmony_ci 5005bf215546Sopenharmony_ci result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer); 5006bf215546Sopenharmony_ci if (result != VK_SUCCESS) 5007bf215546Sopenharmony_ci return result; 5008bf215546Sopenharmony_ci 5009bf215546Sopenharmony_ci cmd_buffer->status = PVR_CMD_BUFFER_STATUS_EXECUTABLE; 5010bf215546Sopenharmony_ci 5011bf215546Sopenharmony_ci return VK_SUCCESS; 5012bf215546Sopenharmony_ci} 5013