1/* 2 * Copyright © 2022 Imagination Technologies Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#ifndef PVR_PDS_H 25#define PVR_PDS_H 26 27#include <stdbool.h> 28 29#include "pvr_device_info.h" 30#include "pvr_limits.h" 31#include "pds/pvr_rogue_pds_defs.h" 32#include "util/macros.h" 33 34#ifdef __cplusplus 35# define restrict __restrict__ 36#endif 37 38/***************************************************************************** 39 Macro definitions 40*****************************************************************************/ 41 42/* Based on Maximum number of passes that may emit DOUTW x Maximum number that 43 * might be emitted. 44 */ 45#define PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW 6 46/* Based on Maximum number of passes that may emit DOUTW x Maximum number that 47 * might be emitted. 48 */ 49#define PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW 3 50/* Based on max(max(UBOs,cbuffers), numTextures). */ 51#define PVR_PDS_MAX_NUM_DMA_KICKS 32 52#define PVR_PDS_NUM_VERTEX_STREAMS 32 53#define PVR_PDS_NUM_VERTEX_ELEMENTS 32 54#define PVR_MAXIMUM_ITERATIONS 128 55 56#define PVR_PDS_NUM_COMPUTE_INPUT_REGS 3 57 58#define PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info) \ 59 PVR_HAS_FEATURE(dev_info, compute_morton_capable) && \ 60 !PVR_HAS_ERN(dev_info, 45493) 61 62/* FIXME: Change BIL to SPV. */ 63/* Any variable location can have at most 4 32-bit components. */ 64#define BIL_COMPONENTS_PER_LOCATION 4 65 66/* Maximum number of DDMAD's that may be performed (Num attribs * Num DMA's per 67 * attribute). 68 */ 69#define PVR_MAX_VERTEX_ATTRIB_DMAS \ 70 (PVR_MAX_VERTEX_INPUT_BINDINGS * BIL_COMPONENTS_PER_LOCATION) 71 72/***************************************************************************** 73 Typedefs 74*****************************************************************************/ 75 76/* FIXME: We might need to change some bools to this. */ 77typedef uint32_t PVR_PDS_BOOL; 78 79/***************************************************************************** 80 Enums 81*****************************************************************************/ 82 83enum pvr_pds_generate_mode { 84 PDS_GENERATE_SIZES, 85 PDS_GENERATE_CODE_SEGMENT, 86 PDS_GENERATE_DATA_SEGMENT, 87 PDS_GENERATE_CODEDATA_SEGMENTS 88}; 89 90enum pvr_pds_store_type { PDS_COMMON_STORE, PDS_UNIFIED_STORE }; 91 92enum pvr_pds_vertex_attrib_program_type { 93 PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASIC, 94 PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASE_INSTANCE, 95 PVR_PDS_VERTEX_ATTRIB_PROGRAM_DRAW_INDIRECT, 96 PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT 97}; 98 99/***************************************************************************** 100 Structure definitions 101*****************************************************************************/ 102 103struct pvr_psc_register { 104 uint32_t num; 105 106 unsigned int size; /* size of each element. */ 107 unsigned int dim : 4; /* max number of elements. */ 108 unsigned int index; /* offset into array. */ 109 110 unsigned int cast; 111 112 unsigned int type; 113 uint64_t name; 114 bool auto_assign; 115 unsigned int original_type; 116}; 117 118struct pvr_psc_program_output { 119 const uint32_t *code; 120 121 struct pvr_psc_register *data; 122 unsigned int data_count; 123 124 unsigned int data_size_aligned; 125 unsigned int code_size_aligned; 126 unsigned int temp_size_aligned; 127 128 unsigned int data_size; 129 unsigned int code_size; 130 unsigned int temp_size; 131 132 void (*write_data)(void *data, uint32_t *buffer); 133}; 134 135struct pvr_pds_usc_task_control { 136 uint64_t src0; 137}; 138 139/* Up to 4 64-bit state words currently supported. */ 140#define PVR_PDS_MAX_NUM_DOUTW_CONSTANTS 4 141 142/* Structure for DOUTW. */ 143struct pvr_pds_doutw_control { 144 enum pvr_pds_store_type dest_store; 145 uint32_t num_const64; 146 uint64_t doutw_data[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS]; 147 bool last_instruction; 148 149 uint32_t *data_segment; 150 uint32_t data_size; 151 uint32_t code_size; 152}; 153 154/* Structure representing the PDS pixel event program. 155 * 156 * data_segment - pointer to the data segment 157 * task_control - USC task control words 158 * emit_words - array of Emit words 159 * data_size - size of data segment 160 * code_size - size of code segment 161 */ 162struct pvr_pds_event_program { 163 uint32_t *data_segment; 164 struct pvr_pds_usc_task_control task_control; 165 166 uint32_t num_emit_word_pairs; 167 uint32_t *emit_words; 168 169 uint32_t data_size; 170 uint32_t code_size; 171}; 172 173/* 174 * Structure representing the PDS pixel shader secondary attribute program. 175 * 176 * data_segment - pointer to the data segment 177 * 178 * num_uniform_dma_kicks - number of Uniform DMA kicks 179 * uniform_dma_control - array of Uniform DMA control words 180 * uniform_dma_address - array of Uniform DMA address words 181 * 182 * num_texture_dma_kicks - number of Texture State DMA kicks 183 * texture_dma_control - array of Texture State DMA control words 184 * texture_dma_address - array of Texture State DMA address words 185 * 186 * data_size - size of data segment 187 * code_size - size of code segment 188 * 189 * temps_used - PDS Temps 190 */ 191struct pvr_pds_pixel_shader_sa_program { 192 uint32_t *data_segment; 193 194 uint32_t num_dword_doutw; 195 uint32_t dword_doutw_value[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW]; 196 uint32_t dword_doutw_control[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW]; 197 198 uint32_t num_q_word_doutw; 199 uint32_t q_word_doutw_value[2 * PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW]; 200 uint32_t q_word_doutw_control[PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW]; 201 202 uint32_t num_uniform_dma_kicks; 203 uint64_t uniform_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS]; 204 uint32_t uniform_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS]; 205 206 uint32_t num_texture_dma_kicks; 207 uint64_t texture_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS]; 208 uint32_t texture_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS]; 209 210 bool kick_usc; 211 bool write_tile_position; 212 uint32_t tile_position_attr_dest; 213 struct pvr_pds_usc_task_control usc_task_control; 214 215 bool clear; 216 uint32_t *clear_color; 217 uint32_t clear_color_dest_reg; 218 bool packed_clear; 219 220 uint32_t data_size; 221 uint32_t code_size; 222 223 uint32_t temps_used; 224}; 225 226/* Structure representing the PDS pixel shader program. 227 * 228 * data_segment - pointer to the data segment 229 * usc_task_control - array of USC task control words 230 * 231 * data_size - size of data segment 232 * code_size - size of code segment 233 */ 234struct pvr_pds_kickusc_program { 235 uint32_t *data_segment; 236 struct pvr_pds_usc_task_control usc_task_control; 237 238 uint32_t data_size; 239 uint32_t code_size; 240}; 241 242/* Structure representing the PDS fence/doutc program. 243 * 244 * data_segment - pointer to the data segment 245 * data_size - size of data segment 246 * code_size - size of code segment 247 */ 248struct pvr_pds_fence_program { 249 uint32_t *data_segment; 250 uint32_t fence_constant_word; 251 uint32_t data_size; 252 uint32_t code_size; 253}; 254 255/* Structure representing the PDS coefficient loading. 256 * 257 * data_segment - pointer to the data segment 258 * num_fpu_iterators - number of FPU iterators 259 * FPU_iterators - array of FPU iterator control words 260 * destination - array of Common Store destinations 261 * 262 * data_size - size of data segment 263 * code_size - size of code segment 264 */ 265struct pvr_pds_coeff_loading_program { 266 uint32_t *data_segment; 267 uint32_t num_fpu_iterators; 268 uint32_t FPU_iterators[PVR_MAXIMUM_ITERATIONS]; 269 uint32_t destination[PVR_MAXIMUM_ITERATIONS]; 270 271 uint32_t data_size; 272 uint32_t code_size; 273 274 uint32_t temps_used; 275}; 276 277/* Structure representing the PDS vertex shader secondary attribute program. 278 * 279 * data_segment - pointer to the data segment 280 * num_dma_kicks - number of DMA kicks 281 * dma_control - array of DMA control words 282 * dma_address - array of DMA address words 283 * 284 * data_size - size of data segment 285 * code_size - size of code segment 286 */ 287struct pvr_pds_vertex_shader_sa_program { 288 uint32_t *data_segment; 289 290 /* num_uniform_dma_kicks, uniform_dma_address, uniform_dma_control, are not 291 * used for generating PDS data section and code section, they are currently 292 * only used to simpler the driver implementation. The driver should correct 293 * these information into num_dma_kicks, dma_address and dma_control to get 294 * the PDS properly generated. 295 */ 296 297 uint32_t num_dword_doutw; 298 uint32_t dword_doutw_value[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW]; 299 uint32_t dword_doutw_control[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW]; 300 301 uint32_t num_q_word_doutw; 302 uint32_t q_word_doutw_value[2 * PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW]; 303 uint32_t q_word_doutw_control[PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW]; 304 305 uint32_t num_uniform_dma_kicks; 306 uint64_t uniform_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS]; 307 uint32_t uniform_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS]; 308 309 uint32_t num_texture_dma_kicks; 310 uint64_t texture_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS]; 311 uint32_t texture_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS]; 312 313 uint32_t num_dma_kicks; 314 uint64_t dma_address[PVR_PDS_MAX_NUM_DMA_KICKS]; 315 uint32_t dma_control[PVR_PDS_MAX_NUM_DMA_KICKS]; 316 317 bool kick_usc; 318 struct pvr_pds_usc_task_control usc_task_control; 319 320 /* Shared register buffer base address (VDM/CDM context load case only). */ 321 bool clear_pds_barrier; 322 323 uint32_t data_size; 324 uint32_t code_size; 325}; 326 327/* Structure representing a PDS vertex stream element. 328 * 329 * There are two types of element, repeat DMA and non-repeat DMA. 330 * 331 * Non repeat DMA are the classic DMA of some number of bytes from an offset 332 * into contiguous registers. It is assumed the address and size are dword 333 * aligned. To use this, specify 0 for the component size. Each four bytes read 334 * will go to the next HW register. 335 * 336 * Repeat DMA enables copying of sub dword amounts at non dword aligned 337 * addresses. To use this, specify the component size as either 1,2,3 or 4 338 * bytes. Size specifies the number of components, and each component read 339 * will go to the next HW register. 340 * 341 * In both cases, HW registers are written contiguously. 342 * 343 * offset - offset of the vertex stream element 344 * size - size of the vertex stream element in bytes for non repeat DMA, or 345 * number of components for repeat DMA. 346 * reg - first vertex stream element register to DMA to. 347 * component_size - Size of component for repeat DMA, or 0 for non repeat dma. 348 */ 349struct pvr_pds_vertex_element { 350 uint32_t offset; 351 uint32_t size; 352 uint16_t reg; 353 uint16_t component_size; 354}; 355 356/* Structure representing a PDS vertex stream. 357 * 358 * instance_data - flag whether the vertex stream is indexed or instance data 359 * read_back - If True, vertex is reading back data output by GPU earlier in 360 * same kick. This will enable MCU coherency if relevant. 361 * multiplier - vertex stream frequency multiplier 362 * shift - vertex stream frequency shift 363 * address - vertex stream address in bytes 364 * buffer_size_in_bytes - buffer size in bytes if vertex attribute is sourced 365 * from buffer object 366 * stride - vertex stream stride in bytes 367 * num_vertices - number of vertices in buffer. Used for OOB checking. 368 - 0 = disable oob checking. 369 * num_elements - number of vertex stream elements 370 * elements - array of vertex stream elements 371 * use_ddmadt - When the has_pds_ddmadt feature is enabled. Boolean allowing 372 * DDMADT to be use per stream element. 373 */ 374struct pvr_pds_vertex_stream { 375 bool current_state; 376 bool instance_data; 377 bool read_back; 378 uint32_t multiplier; 379 uint32_t shift; 380 uint64_t address; 381 uint32_t buffer_size_in_bytes; 382 uint32_t stride; 383 uint32_t num_vertices; 384 uint32_t num_elements; 385 struct pvr_pds_vertex_element elements[PVR_PDS_NUM_VERTEX_ELEMENTS]; 386 387 bool use_ddmadt; 388}; 389 390/* Structure representing the PDS vertex shader program. 391 * 392 * This structure describes the USC code and vertex buffers required 393 * by the PDS vertex loading program. 394 * 395 * data_segment - Pointer to the data segment. 396 * usc_task_control - Description of USC task for vertex shader program. 397 * num_streams - Number of vertex streams. 398 * iterate_vtx_id - If set, the vertex id should be iterated. 399 * vtx_id_register - The register to iterate the VertexID into (if applicable) 400 * vtx_id_modifier - Value to pvr_add/SUB from index value received by PDS. 401 * This is used because the index value received by PDS has 402 * INDEX_OFFSET added, and generally VertexID wouldn't. 403 * vtx_id_sub_modifier - If true, vtx_id_modifier is subtracted, else added. 404 * iterate_instance_id - If set, the instance id should be iterated. 405 * instance_id_register - The register to iterate the InstanceID into (if 406 * applicable). The vertex and instance id will both be 407 * iterated as unsigned ints 408 * 409 * iterate_remap_id - Should be set to true if vertex shader needs 410 * VS_REMAPPED_INDEX_ID (e.g. Another TA shader runs after 411 * it). 412 * null_idx - Indicates no index buffer is bound, so every index should be 413 * null_idx_value. 414 * null_idx_value - The value to use as index if null_idx set. 415 * data_size - Size of data segment, in dwords. Output by call to 416 * pvr_pds_vertex_shader, and used as input when generating data. 417 * code_size - Size of code segment. Output by call to pvr_pds_vertex_shader. 418 * This is the number of dword instructions that are/were generated. 419 * temps_used - Number of temporaries used. Output by call to 420 * pvr_pds_vertex_shader. 421 */ 422struct pvr_pds_vertex_shader_program { 423 uint32_t *data_segment; 424 struct pvr_pds_usc_task_control usc_task_control; 425 uint32_t num_streams; 426 427 bool iterate_vtx_id; 428 uint32_t vtx_id_register; 429 uint32_t vtx_id_modifier; 430 bool vtx_id_sub_modifier; 431 432 bool iterate_instance_id; 433 uint32_t instance_id_register; 434 uint32_t instance_ID_modifier; 435 uint32_t base_instance; 436 437 bool iterate_remap_id; 438 439 bool null_idx; 440 uint32_t null_idx_value; 441 442 uint32_t *stream_patch_offsets; 443 uint32_t num_stream_patches; 444 445 uint32_t data_size; 446 uint32_t code_size; 447 uint32_t temps_used; 448 uint32_t ddmadt_enables; 449 uint32_t skip_stream_flag; 450 451 bool draw_indirect; 452 bool indexed; 453 454 struct pvr_pds_vertex_stream streams[PVR_PDS_NUM_VERTEX_STREAMS]; 455}; 456 457/* Structure representing PDS shared reg storing program. */ 458struct pvr_pds_shared_storing_program { 459 struct pvr_pds_doutw_control doutw_control; /*!< DOUTW state */ 460 struct pvr_pds_kickusc_program usc_task; /*!< DOUTU state */ 461 bool cc_enable; /*!< cc bit is set on the doutu instruction. */ 462 uint32_t data_size; /*!< total data size, non-aligned. */ 463 uint32_t code_size; /*!< total code size, non-aligned. */ 464}; 465 466#define PVR_MAX_STREAMOUT_BUFFERS 4 467 468/* Structure representing stream out init PDS programs. */ 469struct pvr_pds_stream_out_init_program { 470 /* --- Input to PDS_STREAM_OUT_INT_PROGRAM --- */ 471 472 /* Number of buffers to load/store. 473 * This indicates the number of entries in the next two arrays. 474 * Data is loaded/stored contiguously to persistent temps. 475 */ 476 uint32_t num_buffers; 477 478 /* Number of persistent temps in dword to load/store for each buffer. */ 479 uint32_t pds_buffer_data_size[PVR_MAX_STREAMOUT_BUFFERS]; 480 /* The device address for loading/storing persistent temps for each buffer. 481 * If address is zero, then no data is loaded/stored 482 * into pt registers for the buffer. 483 */ 484 uint64_t dev_address_for_buffer_data[PVR_MAX_STREAMOUT_BUFFERS]; 485 486 /* PDS state update Stream Out Init Programs. */ 487 uint32_t stream_out_init_pds_data_size; 488 uint32_t stream_out_init_pds_code_size; 489}; 490 491/* Structure representing stream out terminate PDS program. */ 492struct pvr_pds_stream_out_terminate_program { 493 /* Input to PDS_STREAM_OUT_TERMINATE_PROGRAM. 494 * 495 * Number of persistent temps in dword used in stream out PDS programs needs 496 * to be stored. 497 * The terminate program writes pds_persistent_temp_size_to_store number 498 * persistent temps to dev_address_for_storing_persistent_temp. 499 */ 500 uint32_t pds_persistent_temp_size_to_store; 501 502 /* The device address for storing persistent temps. */ 503 uint64_t dev_address_for_storing_persistent_temp; 504 505 /* PPP state update Stream Out Program for stream out terminate. */ 506 uint32_t stream_out_terminate_pds_data_size; 507 uint32_t stream_out_terminate_pds_code_size; 508}; 509 510/* Structure representing the PDS compute shader program. 511 * This structure describes the USC code and compute buffers required 512 * by the PDS compute task loading program 513 * 514 * data_segment 515 * pointer to the data segment 516 * usc_task_control 517 * Description of USC task for compute shader program. 518 * data_size 519 * Size of data segment, in dwords. 520 * Output by call to pvr_pds_compute_shader, and used as input when 521 * generating data. code_size Size of code segment. Output by call to 522 * pvr_pds_compute_shader. This is the number of dword instructions that 523 * are/were generated. temps_used Number of temporaries used. Output by call 524 *to pvr_pds_compute_shader. highest_temp The highest temp number used. Output 525 *by call to pvr_pds_compute_shader coeff_update_task_branch_size The number of 526 * instructions we need to branch over to skip the coefficient update task. 527 */ 528 529struct pvr_pds_compute_shader_program { 530 uint32_t *data_segment; 531 struct pvr_pds_usc_task_control usc_task_control; 532 struct pvr_pds_usc_task_control usc_task_control_coeff_update; 533 534 uint32_t data_size; 535 uint32_t code_size; 536 537 uint32_t temps_used; 538 uint32_t highest_temp; 539 540 uint32_t local_input_regs[3]; 541 uint32_t work_group_input_regs[3]; 542 uint32_t global_input_regs[3]; 543 544 uint32_t barrier_coefficient; 545 546 bool fence; 547 548 bool flattened_work_groups; 549 550 bool clear_pds_barrier; 551 552 bool has_coefficient_update_task; 553 554 uint32_t coeff_update_task_branch_size; 555 556 bool add_base_workgroup; 557 uint32_t base_workgroup_constant_offset_in_dwords[3]; 558 559 bool kick_usc; 560 561 bool conditional_render; 562 uint32_t cond_render_const_offset_in_dwords; 563 uint32_t cond_render_pred_temp; 564}; 565struct pvr_pds_ldst_control { 566 uint64_t cache_control_const; 567}; 568 569/* Define a value we can use as a register number in the driver to denote that 570 * the value is unused. 571 */ 572#define PVR_PDS_COMPUTE_INPUT_REG_UNUSED 0xFFFFFFFFU 573 574/***************************************************************************** 575 function declarations 576*****************************************************************************/ 577 578/***************************************************************************** 579 Constructors 580*****************************************************************************/ 581 582void pvr_pds_pixel_shader_sa_initialize( 583 struct pvr_pds_pixel_shader_sa_program *program); 584void pvr_pds_compute_shader_initialize( 585 struct pvr_pds_compute_shader_program *program); 586 587/* Utility */ 588 589uint32_t pvr_pds_append_constant64(uint32_t *constants, 590 uint64_t constant_value, 591 uint32_t *data_size); 592 593uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control, 594 uint64_t *dma_address, 595 uint32_t dest_offset, 596 uint32_t dma_size, 597 uint64_t src_address, 598 const struct pvr_device_info *dev_info); 599 600void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control, 601 uint64_t execution_address, 602 uint32_t usc_temps, 603 uint32_t sample_rate, 604 bool phase_rate_change); 605 606/* Pixel */ 607#define pvr_pds_set_sizes_pixel_shader(X) \ 608 pvr_pds_kick_usc(X, NULL, 0, false, PDS_GENERATE_SIZES) 609#define pvr_pds_generate_pixel_shader_program(X, Y) \ 610 pvr_pds_kick_usc(X, Y, 0, false, PDS_GENERATE_CODEDATA_SEGMENTS) 611 612#define pvr_pds_generate_VDM_sync_program(X, Y) \ 613 pvr_pds_kick_usc(X, Y, 0, false, PDS_GENERATE_CODEDATA_SEGMENTS) 614 615uint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program, 616 uint32_t *restrict buffer, 617 enum pvr_pds_generate_mode gen_mode); 618 619uint32_t * 620pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict psControl, 621 uint32_t *restrict buffer, 622 enum pvr_pds_generate_mode gen_mode, 623 const struct pvr_device_info *dev_info); 624 625uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program, 626 uint32_t *restrict buffer, 627 uint32_t start_next_constant, 628 bool cc_enabled, 629 enum pvr_pds_generate_mode gen_mode); 630 631/* Pixel Secondary */ 632#define pvr_pds_set_sizes_pixel_shader_sa_uniform_data(X, Y) \ 633 pvr_pds_pixel_shader_uniform_texture_data(X, \ 634 NULL, \ 635 PDS_GENERATE_SIZES, \ 636 true, \ 637 Y) 638#define pvr_pds_set_sizes_pixel_shader_sa_texture_data(X, Y) \ 639 pvr_pds_pixel_shader_uniform_texture_data(X, \ 640 NULL, \ 641 PDS_GENERATE_SIZES, \ 642 false, \ 643 Y) 644#define pvr_pds_set_sizes_pixel_shader_uniform_texture_code(X) \ 645 pvr_pds_pixel_shader_uniform_texture_code(X, NULL, PDS_GENERATE_SIZES) 646 647#define pvr_pds_generate_pixel_shader_sa_texture_state_data(X, Y, Z) \ 648 pvr_pds_pixel_shader_uniform_texture_data(X, \ 649 Y, \ 650 PDS_GENERATE_DATA_SEGMENT, \ 651 false, \ 652 Z) 653 654#define pvr_pds_generate_pixel_shader_sa_code_segment(X, Y) \ 655 pvr_pds_pixel_shader_uniform_texture_code(X, Y, PDS_GENERATE_CODE_SEGMENT) 656 657uint32_t *pvr_pds_pixel_shader_uniform_texture_data( 658 struct pvr_pds_pixel_shader_sa_program *restrict program, 659 uint32_t *restrict buffer, 660 enum pvr_pds_generate_mode gen_mode, 661 bool uniform, 662 const struct pvr_device_info *dev_info); 663 664uint32_t *pvr_pds_pixel_shader_uniform_texture_code( 665 struct pvr_pds_pixel_shader_sa_program *restrict program, 666 uint32_t *restrict buffer, 667 enum pvr_pds_generate_mode gen_mode); 668 669/* Vertex */ 670#define pvr_pds_set_sizes_vertex_shader(X, Y) \ 671 pvr_pds_vertex_shader(X, NULL, PDS_GENERATE_SIZES, Y) 672 673#define pvr_pds_generate_vertex_shader_data_segment(X, Y, Z) \ 674 pvr_pds_vertex_shader(X, Y, PDS_GENERATE_DATA_SEGMENT, Z) 675 676#define pvr_pds_generate_vertex_shader_code_segment(X, Y, Z) \ 677 pvr_pds_vertex_shader(X, Y, PDS_GENERATE_CODE_SEGMENT, Z) 678 679uint32_t * 680pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program, 681 uint32_t *restrict buffer, 682 enum pvr_pds_generate_mode gen_mode, 683 const struct pvr_device_info *dev_info); 684 685/* Compute */ 686uint32_t * 687pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program, 688 uint32_t *restrict buffer, 689 enum pvr_pds_generate_mode gen_mode, 690 const struct pvr_device_info *dev_info); 691 692#define pvr_pds_set_sizes_compute_shader(X, Y) \ 693 pvr_pds_compute_shader(X, NULL, PDS_GENERATE_SIZES, Y) 694 695#define pvr_pds_generate_compute_shader_data_segment(X, Y, Z) \ 696 pvr_pds_compute_shader(X, Y, PDS_GENERATE_DATA_SEGMENT, Z) 697 698#define pvr_pds_generate_compute_shader_code_segment(X, Y, Z) \ 699 pvr_pds_compute_shader(X, Y, PDS_GENERATE_CODE_SEGMENT, Z) 700 701/* Vertex Secondary */ 702#define pvr_pds_set_sizes_vertex_shader_sa(X, Y) \ 703 pvr_pds_vertex_shader_sa(X, NULL, PDS_GENERATE_SIZES, Y) 704 705#define pvr_pds_generate_vertex_shader_sa_data_segment(X, Y, Z) \ 706 pvr_pds_vertex_shader_sa(X, Y, PDS_GENERATE_DATA_SEGMENT, Z) 707 708#define pvr_pds_generate_vertex_shader_sa_code_segment(X, Y, Z) \ 709 pvr_pds_vertex_shader_sa(X, Y, PDS_GENERATE_CODE_SEGMENT, Z) 710 711uint32_t *pvr_pds_vertex_shader_sa( 712 struct pvr_pds_vertex_shader_sa_program *restrict program, 713 uint32_t *restrict buffer, 714 enum pvr_pds_generate_mode gen_mode, 715 const struct pvr_device_info *dev_info); 716 717/* Pixel Event */ 718#define pvr_pds_set_sizes_pixel_event(X, Y) \ 719 pvr_pds_generate_pixel_event(X, NULL, PDS_GENERATE_SIZES, Y) 720 721#define pvr_pds_generate_pixel_event_data_segment(X, Y, Z) \ 722 pvr_pds_generate_pixel_event(X, Y, PDS_GENERATE_DATA_SEGMENT, Z) 723 724#define pvr_pds_generate_pixel_event_code_segment(X, Y, Z) \ 725 pvr_pds_generate_pixel_event(X, Y, PDS_GENERATE_CODE_SEGMENT, Z) 726 727uint32_t * 728pvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program, 729 uint32_t *restrict buffer, 730 enum pvr_pds_generate_mode gen_mode, 731 const struct pvr_device_info *dev_info); 732 733/* Coefficient Loading */ 734#define pvr_pds_set_sizes_coeff_loading(X) \ 735 pvr_pds_coefficient_loading(X, NULL, PDS_GENERATE_SIZES) 736 737#define pvr_pds_generate_coeff_loading_program(X, Y) \ 738 pvr_pds_coefficient_loading(X, Y, PDS_GENERATE_CODE_SEGMENT) 739 740uint32_t *pvr_pds_coefficient_loading( 741 struct pvr_pds_coeff_loading_program *restrict program, 742 uint32_t *restrict buffer, 743 enum pvr_pds_generate_mode gen_mode); 744 745/* Compute DM barrier-specific conditional code */ 746uint32_t *pvr_pds_generate_compute_barrier_conditional( 747 uint32_t *buffer, 748 enum pvr_pds_generate_mode gen_mode); 749 750/* Shared register storing */ 751uint32_t *pvr_pds_generate_shared_storing_program( 752 struct pvr_pds_shared_storing_program *restrict program, 753 uint32_t *restrict buffer, 754 enum pvr_pds_generate_mode gen_mode, 755 const struct pvr_device_info *dev_info); 756 757/*Shared register loading */ 758uint32_t *pvr_pds_generate_fence_terminate_program( 759 struct pvr_pds_fence_program *restrict program, 760 uint32_t *restrict buffer, 761 enum pvr_pds_generate_mode gen_mode, 762 const struct pvr_device_info *dev_info); 763 764/* CDM Shared register loading */ 765uint32_t *pvr_pds_generate_compute_shared_loading_program( 766 struct pvr_pds_shared_storing_program *restrict program, 767 uint32_t *restrict buffer, 768 enum pvr_pds_generate_mode gen_mode, 769 const struct pvr_device_info *dev_info); 770 771/* Stream out */ 772uint32_t *pvr_pds_generate_stream_out_init_program( 773 struct pvr_pds_stream_out_init_program *restrict program, 774 uint32_t *restrict buffer, 775 bool store_mode, 776 enum pvr_pds_generate_mode gen_mode, 777 const struct pvr_device_info *dev_info); 778 779uint32_t *pvr_pds_generate_stream_out_terminate_program( 780 struct pvr_pds_stream_out_terminate_program *restrict program, 781 uint32_t *restrict buffer, 782 enum pvr_pds_generate_mode gen_mode, 783 const struct pvr_device_info *dev_info); 784 785/* Structure representing DrawIndirect PDS programs. */ 786struct pvr_pds_drawindirect_program { 787 /* --- Input to pvr_pds_drawindirect_program --- */ 788 789 /* Address of the index list block in the VDM control stream. 790 * This must point to a 128-bit aligned index list header. 791 */ 792 uint64_t index_list_addr_buffer; 793 /* Address of arguments for Draw call. Layout is defined by eArgFormat. */ 794 uint64_t arg_buffer; 795 796 /* Address of index buffer. */ 797 uint64_t index_buffer; 798 799 /* The raw (without addr msb in [7:0]) index block header. */ 800 uint32_t index_block_header; 801 802 /* Number of bytes per index. */ 803 uint32_t index_stride; 804 805 /* Used during/after compilation to fill in constant buffer. */ 806 struct pvr_psc_register data[32]; 807 808 /* Results of compilation. */ 809 struct pvr_psc_program_output program; 810 811 /* This is used for ARB_multi_draw_indirect. */ 812 unsigned int count; 813 unsigned int stride; 814 815 /* Internal stuff. */ 816 unsigned int num_views; 817 818 bool support_base_instance; 819 bool increment_draw_id; 820}; 821 822void pvr_pds_generate_draw_arrays_indirect( 823 struct pvr_pds_drawindirect_program *restrict program, 824 uint32_t *restrict buffer, 825 enum pvr_pds_generate_mode gen_mode, 826 const struct pvr_device_info *dev_info); 827void pvr_pds_generate_draw_elements_indirect( 828 struct pvr_pds_drawindirect_program *restrict program, 829 uint32_t *restrict buffer, 830 enum pvr_pds_generate_mode gen_mode, 831 const struct pvr_device_info *dev_info); 832 833uint64_t pvr_pds_encode_st_src0(uint64_t src, 834 uint64_t count4, 835 uint64_t dst_add, 836 bool write_through, 837 const struct pvr_device_info *dev_info); 838 839uint64_t pvr_pds_encode_ld_src0(uint64_t dest, 840 uint64_t count8, 841 uint64_t src_add, 842 bool cached, 843 const struct pvr_device_info *dev_info); 844 845uint32_t *pvr_pds_generate_single_ldst_instruction( 846 bool ld, 847 const struct pvr_pds_ldst_control *control, 848 uint32_t temp_index, 849 uint64_t address, 850 uint32_t count, 851 uint32_t *next_constant, 852 uint32_t *total_data_size, 853 uint32_t *total_code_size, 854 uint32_t *buffer, 855 bool data_fence, 856 enum pvr_pds_generate_mode gen_mode, 857 const struct pvr_device_info *dev_info); 858struct pvr_pds_descriptor_set { 859 unsigned int descriptor_set; /* id of the descriptor set. */ 860 unsigned int size_in_dwords; /* Number of dwords to transfer. */ 861 unsigned int destination; /* Destination shared register to which 862 * descriptor entries should be loaded. 863 */ 864 bool primary; /* Primary or secondary? */ 865 unsigned int offset_in_dwords; /* Offset from the start of the descriptor 866 * set to start DMA'ing from. 867 */ 868}; 869 870#define PVR_BUFFER_TYPE_UBO (0) 871#define PVR_BUFFER_TYPES_COMPILE_TIME (1) 872#define PVR_BUFFER_TYPE_BLEND_CONSTS (2) 873#define PVR_BUFFER_TYPE_PUSH_CONSTS (3) 874#define PVR_BUFFER_TYPES_BUFFER_LENGTHS (4) 875#define PVR_BUFFER_TYPE_DYNAMIC (5) 876#define PVR_BUFFER_TYPES_UBO_ZEROING (6) 877#define PVR_BUFFER_TYPE_INVALID (~0) 878 879struct pvr_pds_buffer { 880 uint16_t type; 881 882 uint16_t size_in_dwords; 883 uint32_t destination; 884 885 union { 886 uint32_t *data; 887 struct { 888 uint32_t buffer_id; 889 uint16_t desc_set; 890 uint16_t binding; 891 uint32_t source_offset; 892 }; 893 }; 894}; 895 896#define PVR_PDS_MAX_BUFFERS (24) 897 898struct pvr_descriptor_program_input { 899 /* User-specified descriptor sets. */ 900 unsigned int descriptor_set_count; 901 struct pvr_pds_descriptor_set descriptor_sets[8]; 902 903 /* "State" buffers, including: 904 * compile-time constants 905 * blend constants 906 * push constants 907 * UBOs that have been hoisted. 908 */ 909 uint32_t buffer_count; 910 struct pvr_pds_buffer buffers[PVR_PDS_MAX_BUFFERS]; 911 912 uint32_t blend_constants_used_mask; 913 914 bool secondary_program_present; 915 struct pvr_pds_usc_task_control secondary_task_control; 916 917 bool must_not_be_empty; 918}; 919 920#define PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED BITFIELD_BIT(0U) 921#define PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED BITFIELD_BIT(1U) 922#define PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT BITFIELD_BIT(2U) 923#define PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT BITFIELD_BIT(3U) 924#define PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED BITFIELD_BIT(4U) 925 926/* BaseVertex is used in shader. */ 927#define PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED BITFIELD_BIT(5U) 928 929#define PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED BITFIELD_BIT(6U) 930 931#define PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE BITFIELD_BIT(0U) 932 933struct pvr_pds_vertex_dma { 934 /* Try and keep this structure packing as small as possible. */ 935 uint16_t offset; 936 uint16_t stride; 937 938 uint8_t flags; 939 uint8_t size_in_dwords; 940 uint8_t component_size_in_bytes; 941 uint8_t destination; 942 uint8_t binding_index; 943 uint32_t divisor; 944 945 uint16_t robustness_buffer_offset; 946}; 947 948struct pvr_pds_vertex_primary_program_input { 949 /* Control for the DOUTU that kicks the vertex USC shader. */ 950 struct pvr_pds_usc_task_control usc_task_control; 951 /* List of DMAs (of size dma_count). */ 952 struct pvr_pds_vertex_dma *dma_list; 953 uint32_t dma_count; 954 955 /* ORd bitfield of PVR_PDS_VERTEX_FLAGS_* */ 956 uint32_t flags; 957 958 uint16_t vertex_id_register; 959 uint16_t instance_id_register; 960 961 /* API provided baseInstance (i.e. not from drawIndirect). */ 962 uint32_t base_instance; 963 964 uint16_t base_instance_register; 965 uint16_t base_vertex_register; 966 uint16_t draw_index_register; 967}; 968 969#define PVR_PDS_CONST_MAP_ENTRY_TYPE_NULL (0) 970#define PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL64 (1) 971#define PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32 (2) 972#define PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET (3) 973#define PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER (4) 974#define PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER (5) 975#define PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS (6) 976#define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS (7) 977#define PVR_PDS_CONST_MAP_ENTRY_TYPE_ROBUST_VERTEX_ATTRIBUTE_ADDRESS (8) 978 979/* Use if pds_ddmadt is enabled. */ 980#define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTR_DDMADT_OOB_BUFFER_SIZE (9) 981 982/* Use if pds_ddmadt is not enabled. */ 983#define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_MAX_INDEX (9) 984 985#define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE (10) 986#define PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER_ZEROING (11) 987#define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_VERTEX (12) 988#define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_WORKGROUP (13) 989#define PVR_PDS_CONST_MAP_ENTRY_TYPE_COND_RENDER (14) 990 991/* We pack all the following structs tightly into a buffer using += sizeof(x) 992 * offsets, this can lead to data that is not native aligned. Supplying the 993 * packed attribute indicates that unaligned accesses may be required, and the 994 * aligned attribute causes the size of the structure to be aligned to a 995 * specific boundary. 996 */ 997#define PVR_ALIGNED __attribute__((packed, aligned(1))) 998 999struct pvr_const_map_entry { 1000 uint8_t type; 1001 uint8_t const_offset; 1002} PVR_ALIGNED; 1003 1004struct pvr_const_map_entry_literal32 { 1005 uint8_t type; 1006 uint8_t const_offset; 1007 1008 uint32_t literal_value; 1009} PVR_ALIGNED; 1010 1011struct pvr_const_map_entry_literal64 { 1012 uint8_t type; 1013 uint8_t const_offset; 1014 1015 uint64_t literal_value; 1016} PVR_ALIGNED; 1017 1018struct pvr_const_map_entry_descriptor_set { 1019 uint8_t type; 1020 uint8_t const_offset; 1021 1022 uint32_t descriptor_set; 1023 PVR_PDS_BOOL primary; 1024 uint32_t offset_in_dwords; 1025} PVR_ALIGNED; 1026 1027struct pvr_const_map_entry_constant_buffer { 1028 uint8_t type; 1029 uint8_t const_offset; 1030 1031 uint16_t buffer_id; 1032 uint16_t desc_set; 1033 uint16_t binding; 1034 uint32_t offset; 1035 uint32_t size_in_dwords; 1036} PVR_ALIGNED; 1037 1038struct pvr_const_map_entry_constant_buffer_zeroing { 1039 uint8_t type; 1040 uint8_t const_offset; 1041 1042 uint16_t buffer_id; 1043 uint32_t offset; 1044 uint32_t size_in_dwords; 1045} PVR_ALIGNED; 1046 1047struct pvr_const_map_entry_special_buffer { 1048 uint8_t type; 1049 uint8_t const_offset; 1050 1051 uint8_t buffer_type; 1052 uint32_t buffer_index; 1053} PVR_ALIGNED; 1054 1055struct pvr_const_map_entry_doutu_address { 1056 uint8_t type; 1057 uint8_t const_offset; 1058 1059 uint64_t doutu_control; 1060} PVR_ALIGNED; 1061 1062struct pvr_const_map_entry_vertex_attribute_address { 1063 uint8_t type; 1064 uint8_t const_offset; 1065 1066 uint16_t offset; 1067 uint16_t stride; 1068 uint8_t binding_index; 1069 uint8_t size_in_dwords; 1070} PVR_ALIGNED; 1071 1072struct pvr_const_map_entry_robust_vertex_attribute_address { 1073 uint8_t type; 1074 uint8_t const_offset; 1075 1076 uint16_t offset; 1077 uint16_t stride; 1078 uint8_t binding_index; 1079 uint8_t size_in_dwords; 1080 uint16_t robustness_buffer_offset; 1081 uint8_t component_size_in_bytes; 1082} PVR_ALIGNED; 1083 1084struct pvr_const_map_entry_vertex_attribute_max_index { 1085 uint8_t type; 1086 uint8_t const_offset; 1087 1088 uint8_t binding_index; 1089 uint8_t size_in_dwords; 1090 uint16_t offset; 1091 uint16_t stride; 1092 uint8_t component_size_in_bytes; 1093} PVR_ALIGNED; 1094 1095struct pvr_const_map_entry_base_instance { 1096 uint8_t type; 1097 uint8_t const_offset; 1098} PVR_ALIGNED; 1099 1100struct pvr_const_map_entry_base_vertex { 1101 uint8_t type; 1102 uint8_t const_offset; 1103}; 1104 1105struct pvr_pds_const_map_entry_base_workgroup { 1106 uint8_t type; 1107 uint8_t const_offset; 1108 uint8_t workgroup_component; 1109} PVR_ALIGNED; 1110 1111struct pvr_pds_const_map_entry_vertex_attr_ddmadt_oob_buffer_size { 1112 uint8_t type; 1113 uint8_t const_offset; 1114 uint8_t binding_index; 1115} PVR_ALIGNED; 1116 1117struct pvr_pds_const_map_entry_cond_render { 1118 uint8_t type; 1119 uint8_t const_offset; 1120 1121 uint32_t cond_render_pred_temp; 1122} PVR_ALIGNED; 1123 1124struct pvr_pds_info { 1125 uint32_t temps_required; 1126 uint32_t code_size_in_dwords; 1127 uint32_t data_size_in_dwords; 1128 1129 uint32_t entry_count; 1130 size_t entries_size_in_bytes; 1131 size_t entries_written_size_in_bytes; 1132 struct pvr_const_map_entry *entries; 1133}; 1134 1135void pvr_pds_generate_descriptor_upload_program( 1136 struct pvr_descriptor_program_input *input_program, 1137 uint32_t *code_section, 1138 struct pvr_pds_info *info); 1139void pvr_pds_generate_vertex_primary_program( 1140 struct pvr_pds_vertex_primary_program_input *input_program, 1141 uint32_t *code, 1142 struct pvr_pds_info *info, 1143 bool use_robust_vertex_fetch, 1144 const struct pvr_device_info *dev_info); 1145 1146/** 1147 * Generate USC address. 1148 * 1149 * \param doutu Location to write the generated address. 1150 * \param execution_address Address to generate from. 1151 */ 1152static ALWAYS_INLINE void 1153pvr_set_usc_execution_address64(uint64_t *doutu, uint64_t execution_address) 1154{ 1155 doutu[0] |= (((execution_address >> 1156 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_ALIGNSHIFT) 1157 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_SHIFT) & 1158 ~PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_CLRMSK); 1159} 1160 1161#endif /* PVR_PDS_H */ 1162