1/* 2 * Copyright © 2022 Imagination Technologies Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <stdbool.h> 26#include <stdint.h> 27#include <stdio.h> 28#include <string.h> 29 30#include "pvr_device_info.h" 31#include "pvr_pds.h" 32#include "pvr_rogue_pds_defs.h" 33#include "pvr_rogue_pds_disasm.h" 34#include "pvr_rogue_pds_encode.h" 35#include "util/log.h" 36#include "util/macros.h" 37 38#define H32(X) (uint32_t)((((X) >> 32U) & 0xFFFFFFFFUL)) 39#define L32(X) (uint32_t)(((X)&0xFFFFFFFFUL)) 40 41/***************************************************************************** 42 Macro definitions 43*****************************************************************************/ 44 45#define PVR_PDS_DWORD_SHIFT 2 46 47#define PVR_PDS_CONSTANTS_BLOCK_BASE 0 48#define PVR_PDS_CONSTANTS_BLOCK_SIZE 128 49#define PVR_PDS_TEMPS_BLOCK_BASE 128 50#define PVR_PDS_TEMPS_BLOCK_SIZE 32 51 52#define PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE PVR_ROGUE_PDSINST_ST_COUNT4_MASK 53#define PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE PVR_ROGUE_PDSINST_LD_COUNT8_MASK 54 55/* Map PDS temp registers to the CDM values they contain Work-group IDs are only 56 * available in the coefficient sync task. 57 */ 58#define PVR_PDS_CDM_WORK_GROUP_ID_X 0 59#define PVR_PDS_CDM_WORK_GROUP_ID_Y 1 60#define PVR_PDS_CDM_WORK_GROUP_ID_Z 2 61/* Local IDs are available in every task. */ 62#define PVR_PDS_CDM_LOCAL_ID_X 0 63#define PVR_PDS_CDM_LOCAL_ID_YZ 1 64 65#define PVR_PDS_DOUTW_LOWER32 0x0 66#define PVR_PDS_DOUTW_UPPER32 0x1 67#define PVR_PDS_DOUTW_LOWER64 0x2 68#define PVR_PDS_DOUTW_LOWER128 0x3 69#define PVR_PDS_DOUTW_MAXMASK 0x4 70 71#define ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE 8U 72#define PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE (16U) 73 74/***************************************************************************** 75 Static variables 76*****************************************************************************/ 77 78static const uint32_t dword_mask_const[PVR_PDS_DOUTW_MAXMASK] = { 79 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER, 80 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER, 81 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64, 82 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64 83}; 84 85/* If has_slc_mcu_cache_control is enabled use cache_control_const[0], else use 86 * cache_control_const[1]. 87 */ 88static const uint32_t cache_control_const[2][2] = { 89 { PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_BYPASS, 90 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED }, 91 { 0, 0 } 92}; 93 94/***************************************************************************** 95 Function definitions 96*****************************************************************************/ 97 98uint64_t pvr_pds_encode_ld_src0(uint64_t dest, 99 uint64_t count8, 100 uint64_t src_add, 101 bool cached, 102 const struct pvr_device_info *dev_info) 103{ 104 uint64_t encoded = 0; 105 106 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) { 107 encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED 108 : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS); 109 } 110 111 encoded |= ((src_add & PVR_ROGUE_PDSINST_LD_SRCADD_MASK) 112 << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT); 113 encoded |= ((count8 & PVR_ROGUE_PDSINST_LD_COUNT8_MASK) 114 << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT); 115 encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED 116 : PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_BYPASS); 117 encoded |= ((dest & PVR_ROGUE_PDSINST_REGS64TP_MASK) 118 << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT); 119 120 return encoded; 121} 122 123uint64_t pvr_pds_encode_st_src0(uint64_t src, 124 uint64_t count4, 125 uint64_t dst_add, 126 bool write_through, 127 const struct pvr_device_info *device_info) 128{ 129 uint64_t encoded = 0; 130 131 if (device_info->features.has_slc_mcu_cache_controls) { 132 encoded |= (write_through 133 ? PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH 134 : PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_BACK); 135 } 136 137 encoded |= ((dst_add & PVR_ROGUE_PDSINST_ST_SRCADD_MASK) 138 << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT); 139 encoded |= ((count4 & PVR_ROGUE_PDSINST_ST_COUNT4_MASK) 140 << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT); 141 encoded |= (write_through ? PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH 142 : PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_BACK); 143 encoded |= ((src & PVR_ROGUE_PDSINST_REGS32TP_MASK) 144 << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT); 145 146 return encoded; 147} 148 149static ALWAYS_INLINE uint32_t 150pvr_pds_encode_doutw_src1(uint32_t dest, 151 uint32_t dword_mask, 152 uint32_t flags, 153 bool cached, 154 const struct pvr_device_info *dev_info) 155{ 156 assert(((dword_mask > PVR_PDS_DOUTW_LOWER64) && ((dest & 3) == 0)) || 157 ((dword_mask == PVR_PDS_DOUTW_LOWER64) && ((dest & 1) == 0)) || 158 (dword_mask < PVR_PDS_DOUTW_LOWER64)); 159 160 uint32_t encoded = 161 (dest << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT); 162 163 encoded |= dword_mask_const[dword_mask]; 164 165 encoded |= flags; 166 167 encoded |= 168 cache_control_const[PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) ? 0 169 : 1] 170 [cached ? 1 : 0]; 171 return encoded; 172} 173 174static ALWAYS_INLINE uint32_t pvr_pds_encode_doutw64(uint32_t cc, 175 uint32_t end, 176 uint32_t src1, 177 uint32_t src0) 178{ 179 return pvr_pds_inst_encode_dout(cc, 180 end, 181 src1, 182 src0, 183 PVR_ROGUE_PDSINST_DSTDOUT_DOUTW); 184} 185 186static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc, 187 uint32_t end, 188 uint32_t src0) 189{ 190 return pvr_pds_inst_encode_dout(cc, 191 end, 192 0, 193 src0, 194 PVR_ROGUE_PDSINST_DSTDOUT_DOUTU); 195} 196 197static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_doutc(uint32_t cc, 198 uint32_t end) 199{ 200 return pvr_pds_inst_encode_dout(cc, 201 end, 202 0, 203 0, 204 PVR_ROGUE_PDSINST_DSTDOUT_DOUTC); 205} 206 207static ALWAYS_INLINE uint32_t pvr_pds_encode_doutd(uint32_t cc, 208 uint32_t end, 209 uint32_t src1, 210 uint32_t src0) 211{ 212 return pvr_pds_inst_encode_dout(cc, 213 end, 214 src1, 215 src0, 216 PVR_ROGUE_PDSINST_DSTDOUT_DOUTD); 217} 218 219static ALWAYS_INLINE uint32_t pvr_pds_encode_douti(uint32_t cc, 220 uint32_t end, 221 uint32_t src0) 222{ 223 return pvr_pds_inst_encode_dout(cc, 224 end, 225 0, 226 src0, 227 PVR_ROGUE_PDSINST_DSTDOUT_DOUTI); 228} 229 230static ALWAYS_INLINE uint32_t pvr_pds_encode_bra(uint32_t srcc, 231 uint32_t neg, 232 uint32_t setc, 233 int32_t relative_address) 234{ 235 /* Address should be signed but API only allows unsigned value. */ 236 return pvr_pds_inst_encode_bra(srcc, neg, setc, (uint32_t)relative_address); 237} 238 239/** 240 * Gets the next constant address and moves the next constant pointer along. 241 * 242 * \param next_constant Pointer to the next constant address. 243 * \param num_constants The number of constants required. 244 * \param count The number of constants allocated. 245 * \return The address of the next constant. 246 */ 247static uint32_t pvr_pds_get_constants(uint32_t *next_constant, 248 uint32_t num_constants, 249 uint32_t *count) 250{ 251 uint32_t constant; 252 253 /* Work out starting constant number. For even number of constants, start on 254 * a 64-bit boundary. 255 */ 256 if (num_constants & 1) 257 constant = *next_constant; 258 else 259 constant = (*next_constant + 1) & ~1; 260 261 /* Update the count with the number of constants actually allocated. */ 262 *count += constant + num_constants - *next_constant; 263 264 /* Move the next constant pointer. */ 265 *next_constant = constant + num_constants; 266 267 assert((constant + num_constants) <= PVR_PDS_CONSTANTS_BLOCK_SIZE); 268 269 return constant; 270} 271 272/** 273 * Gets the next temp address and moves the next temp pointer along. 274 * 275 * \param next_temp Pointer to the next temp address. 276 * \param num_temps The number of temps required. 277 * \param count The number of temps allocated. 278 * \return The address of the next temp. 279 */ 280static uint32_t 281pvr_pds_get_temps(uint32_t *next_temp, uint32_t num_temps, uint32_t *count) 282{ 283 uint32_t temp; 284 285 /* Work out starting temp number. For even number of temps, start on a 286 * 64-bit boundary. 287 */ 288 if (num_temps & 1) 289 temp = *next_temp; 290 else 291 temp = (*next_temp + 1) & ~1; 292 293 /* Update the count with the number of temps actually allocated. */ 294 *count += temp + num_temps - *next_temp; 295 296 /* Move the next temp pointer. */ 297 *next_temp = temp + num_temps; 298 299 assert((temp + num_temps) <= 300 (PVR_PDS_TEMPS_BLOCK_SIZE + PVR_PDS_TEMPS_BLOCK_BASE)); 301 302 return temp; 303} 304 305/** 306 * Write a 32-bit constant indexed by the long range. 307 * 308 * \param data_block Pointer to data block to write to. 309 * \param index Index within the data to write to. 310 * \param dword The 32-bit constant to write. 311 */ 312static void 313pvr_pds_write_constant32(uint32_t *data_block, uint32_t index, uint32_t dword0) 314{ 315 /* Check range. */ 316 assert(index <= (PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER - 317 PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER)); 318 319 data_block[index + 0] = dword0; 320 321 PVR_PDS_PRINT_DATA("WriteConstant32", (uint64_t)dword0, index); 322} 323 324/** 325 * Write a 64-bit constant indexed by the long range. 326 * 327 * \param data_block Pointer to data block to write to. 328 * \param index Index within the data to write to. 329 * \param dword0 Lower half of the 64 bit constant. 330 * \param dword1 Upper half of the 64 bit constant. 331 */ 332static void pvr_pds_write_constant64(uint32_t *data_block, 333 uint32_t index, 334 uint32_t dword0, 335 uint32_t dword1) 336{ 337 /* Has to be on 64 bit boundary. */ 338 assert((index & 1) == 0); 339 340 /* Check range. */ 341 assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER - 342 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER)); 343 344 data_block[index + 0] = dword0; 345 data_block[index + 1] = dword1; 346 347 PVR_PDS_PRINT_DATA("WriteConstant64", 348 ((uint64_t)dword0 << 32) | (uint64_t)dword1, 349 index); 350} 351 352/** 353 * Write a 64-bit constant from a single wide word indexed by the long-range 354 * number. 355 * 356 * \param data_block Pointer to data block to write to. 357 * \param index Index within the data to write to. 358 * \param word The 64-bit constant to write. 359 */ 360 361static void 362pvr_pds_write_wide_constant(uint32_t *data_block, uint32_t index, uint64_t word) 363{ 364 /* Has to be on 64 bit boundary. */ 365 assert((index & 1) == 0); 366 367 /* Check range. */ 368 assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER - 369 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER)); 370 371 data_block[index + 0] = L32(word); 372 data_block[index + 1] = H32(word); 373 374 PVR_PDS_PRINT_DATA("WriteWideConstant", word, index); 375} 376 377static void pvr_pds_write_dma_address(uint32_t *data_block, 378 uint32_t index, 379 uint64_t address, 380 bool coherent, 381 const struct pvr_device_info *dev_info) 382{ 383 /* Has to be on 64 bit boundary. */ 384 assert((index & 1) == 0); 385 386 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) 387 address |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED; 388 389 /* Check range. */ 390 assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER - 391 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER)); 392 393 data_block[index + 0] = L32(address); 394 data_block[index + 1] = H32(address); 395 396 PVR_PDS_PRINT_DATA("WriteDMAAddress", address, index); 397} 398 399/** 400 * External API to append a 64-bit constant to an existing data segment 401 * allocation. 402 * 403 * \param constants Pointer to start of data segment. 404 * \param constant_value Value to write to constant. 405 * \param data_size The number of constants allocated. 406 * \returns The address of the next constant. 407 */ 408uint32_t pvr_pds_append_constant64(uint32_t *constants, 409 uint64_t constant_value, 410 uint32_t *data_size) 411{ 412 /* Calculate next constant from current data size. */ 413 uint32_t next_constant = *data_size; 414 uint32_t constant = pvr_pds_get_constants(&next_constant, 2, data_size); 415 416 /* Set the value. */ 417 pvr_pds_write_wide_constant(constants, constant, constant_value); 418 419 return constant; 420} 421 422void pvr_pds_pixel_shader_sa_initialize( 423 struct pvr_pds_pixel_shader_sa_program *program) 424{ 425 memset(program, 0, sizeof(*program)); 426} 427 428/** 429 * Encode a DMA burst. 430 * 431 * \param dma_control DMA control words. 432 * \param dma_address DMA address. 433 * \param dest_offset Destination offset in the attribute. 434 * \param dma_size The size of the DMA in words. 435 * \param src_address Source address for the burst. 436 * \param dev_info PVR device info structure. 437 * \returns The number of DMA transfers required. 438 */ 439 440uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control, 441 uint64_t *dma_address, 442 uint32_t dest_offset, 443 uint32_t dma_size, 444 uint64_t src_address, 445 const struct pvr_device_info *dev_info) 446{ 447 /* Simplified for MS2. */ 448 449 /* Force to 1 DMA. */ 450 const uint32_t num_kicks = 1; 451 452 dma_control[0] = dma_size 453 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT; 454 dma_control[0] |= dest_offset 455 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT; 456 457 dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED | 458 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE; 459 460 dma_address[0] = src_address; 461 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) { 462 dma_address[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED; 463 } 464 465 return num_kicks; 466} 467 468/* FIXME: use the csbgen interface and pvr_csb_pack. 469 * FIXME: use bool for phase_rate_change. 470 */ 471/** 472 * Sets up the USC control words for a DOUTU. 473 * 474 * \param usc_task_control USC task control structure to be setup. 475 * \param execution_address USC execution virtual address. 476 * \param usc_temps Number of USC temps. 477 * \param sample_rate Sample rate for the DOUTU. 478 * \param phase_rate_change Phase rate change for the DOUTU. 479 */ 480void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control, 481 uint64_t execution_address, 482 uint32_t usc_temps, 483 uint32_t sample_rate, 484 bool phase_rate_change) 485{ 486 usc_task_control->src0 = UINT64_C(0); 487 488 /* Set the execution address. */ 489 pvr_set_usc_execution_address64(&(usc_task_control->src0), 490 execution_address); 491 492 if (usc_temps > 0) { 493 /* Temps are allocated in blocks of 4 dwords. */ 494 usc_temps = 495 DIV_ROUND_UP(usc_temps, 496 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSIZE); 497 498 /* Check for losing temps due to too many requested. */ 499 assert((usc_temps & PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK) == 500 usc_temps); 501 502 usc_task_control->src0 |= 503 ((uint64_t)(usc_temps & 504 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK)) 505 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_SHIFT; 506 } 507 508 if (sample_rate > 0) { 509 usc_task_control->src0 |= 510 ((uint64_t)sample_rate) 511 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SHIFT; 512 } 513 514 if (phase_rate_change) { 515 usc_task_control->src0 |= 516 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_EN; 517 } 518} 519 520/** 521 * Generates the PDS pixel event program. 522 * 523 * \param program Pointer to the PDS pixel event program. 524 * \param buffer Pointer to the buffer for the program. 525 * \param gen_mode Generate either a data segment or code segment. 526 * \param dev_info PVR device info structure. 527 * \returns Pointer to just beyond the buffer for the program. 528 */ 529uint32_t * 530pvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program, 531 uint32_t *restrict buffer, 532 enum pvr_pds_generate_mode gen_mode, 533 const struct pvr_device_info *dev_info) 534{ 535 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; 536 uint32_t *constants = buffer; 537 538 uint32_t data_size = 0; 539 540 /* Copy the DMA control words and USC task control words to constants, then 541 * arrange them so that the 64-bit words are together followed by the 32-bit 542 * words. 543 */ 544 uint32_t control_constant = 545 pvr_pds_get_constants(&next_constant, 2, &data_size); 546 uint32_t emit_constant = 547 pvr_pds_get_constants(&next_constant, 548 (2 * program->num_emit_word_pairs), 549 &data_size); 550 551 uint32_t control_word_constant = 552 pvr_pds_get_constants(&next_constant, 553 program->num_emit_word_pairs, 554 &data_size); 555 556 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 557 /* Src0 for DOUTU. */ 558 pvr_pds_write_wide_constant(buffer, 559 control_constant, 560 program->task_control.src0); /* DOUTU */ 561 /* 64-bit Src0. */ 562 563 /* Emit words for end of tile program. */ 564 for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) { 565 pvr_pds_write_constant64(constants, 566 emit_constant + (2 * i), 567 program->emit_words[(2 * i) + 0], 568 program->emit_words[(2 * i) + 1]); 569 } 570 571 /* Control words. */ 572 for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) { 573 uint32_t doutw = pvr_pds_encode_doutw_src1( 574 (2 * i), 575 PVR_PDS_DOUTW_LOWER64, 576 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, 577 false, 578 dev_info); 579 580 if (i == (program->num_emit_word_pairs - 1)) 581 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; 582 583 pvr_pds_write_constant32(constants, control_word_constant + i, doutw); 584 } 585 } 586 587 else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 588 /* DOUTW the state into the shared register. */ 589 for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) { 590 *buffer++ = pvr_pds_encode_doutw64( 591 /* cc */ 0, 592 /* END */ 0, 593 /* SRC1 */ (control_word_constant + i), /* DOUTW 32-bit Src1 */ 594 /* SRC0 */ (emit_constant + (2 * i)) >> 1); /* DOUTW 64-bit Src0 595 */ 596 } 597 598 /* Kick the USC. */ 599 *buffer++ = pvr_pds_encode_doutu( 600 /* cc */ 0, 601 /* END */ 1, 602 /* SRC0 */ control_constant >> 1); 603 } 604 605 uint32_t code_size = 1 + program->num_emit_word_pairs; 606 607 /* Save the data segment Pointer and size. */ 608 program->data_segment = constants; 609 program->data_size = data_size; 610 program->code_size = code_size; 611 612 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) 613 return (constants + next_constant); 614 615 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) 616 return buffer; 617 618 return NULL; 619} 620 621/** 622 * Checks if any of the vertex streams contains instance data. 623 * 624 * \param streams Streams contained in the vertex shader. 625 * \param num_streams Number of vertex streams. 626 * \returns true if one or more of the given vertex streams contains 627 * instance data, otherwise false. 628 */ 629static bool pvr_pds_vertex_streams_contains_instance_data( 630 const struct pvr_pds_vertex_stream *streams, 631 uint32_t num_streams) 632{ 633 for (uint32_t i = 0; i < num_streams; i++) { 634 const struct pvr_pds_vertex_stream *vertex_stream = &streams[i]; 635 if (vertex_stream->instance_data) 636 return true; 637 } 638 639 return false; 640} 641 642static uint32_t pvr_pds_get_bank_based_constants(uint32_t num_backs, 643 uint32_t *next_constant, 644 uint32_t num_constants, 645 uint32_t *count) 646{ 647 /* Allocate constant for PDS vertex shader where constant is divided into 648 * banks. 649 */ 650 uint32_t constant; 651 652 assert(num_constants == 1 || num_constants == 2); 653 654 if (*next_constant >= (num_backs << 3)) 655 return pvr_pds_get_constants(next_constant, num_constants, count); 656 657 if ((*next_constant % 8) == 0) { 658 constant = *next_constant; 659 660 if (num_constants == 1) 661 *next_constant += 1; 662 else 663 *next_constant += 8; 664 } else if (num_constants == 1) { 665 constant = *next_constant; 666 *next_constant += 7; 667 } else { 668 *next_constant += 7; 669 constant = *next_constant; 670 671 if (*next_constant >= (num_backs << 3)) { 672 *next_constant += 2; 673 *count += 2; 674 } else { 675 *next_constant += 8; 676 } 677 } 678 return constant; 679} 680 681/** 682 * Generates a PDS program to load USC vertex inputs based from one or more 683 * vertex buffers, each containing potentially multiple elements, and then a 684 * DOUTU to execute the USC. 685 * 686 * \param program Pointer to the description of the program which should be 687 * generated. 688 * \param buffer Pointer to buffer that receives the output of this function. 689 * Will either be the data segment or code segment depending on 690 * gen_mode. 691 * \param gen_mode Which part to generate, either data segment or 692 * code segment. If PDS_GENERATE_SIZES is specified, nothing is 693 * written, but size information in program is updated. 694 * \param dev_info PVR device info structure. 695 * \returns Pointer to just beyond the buffer for the data - i.e the value 696 * of the buffer after writing its contents. 697 */ 698uint32_t * 699pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program, 700 uint32_t *restrict buffer, 701 enum pvr_pds_generate_mode gen_mode, 702 const struct pvr_device_info *dev_info) 703{ 704 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; 705 uint32_t next_stream_constant; 706 uint32_t next_temp; 707 uint32_t usc_control_constant64; 708 uint32_t stride_constant32 = 0; 709 uint32_t dma_address_constant64 = 0; 710 uint32_t dma_control_constant64; 711 uint32_t multiplier_constant32 = 0; 712 uint32_t base_instance_const32 = 0; 713 714 uint32_t temp = 0; 715 uint32_t index_temp64 = 0; 716 uint32_t num_vertices_temp64 = 0; 717 uint32_t pre_index_temp = (uint32_t)(-1); 718 bool first_ddmadt = true; 719 uint32_t input_register0; 720 uint32_t input_register1; 721 uint32_t input_register2; 722 723 struct pvr_pds_vertex_stream *vertex_stream; 724 struct pvr_pds_vertex_element *vertex_element; 725 uint32_t shift_2s_comp; 726 727 uint32_t data_size = 0; 728 uint32_t code_size = 0; 729 uint32_t temps_used = 0; 730 731 bool direct_writes_needed = false; 732 733 uint32_t consts_size = 0; 734 uint32_t vertex_id_control_word_const32 = 0; 735 uint32_t instance_id_control_word_const32 = 0; 736 uint32_t instance_id_modifier_word_const32 = 0; 737 uint32_t geometry_id_control_word_const64 = 0; 738 uint32_t empty_dma_control_constant64 = 0; 739 740 bool any_instanced_stream = 741 pvr_pds_vertex_streams_contains_instance_data(program->streams, 742 program->num_streams); 743 744 uint32_t base_instance_register = 0; 745 uint32_t ddmadt_enables = 0; 746 747 bool issue_empty_ddmad = false; 748 uint32_t last_stream_index = program->num_streams - 1; 749 bool current_p0 = false; 750 uint32_t skip_stream_flag = 0; 751 752 /* Generate the PDS vertex shader data. */ 753 754#if defined(DEBUG) 755 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 756 for (uint32_t i = 0; i < program->data_size; i++) 757 buffer[i] = 0xDEADBEEF; 758 } 759#endif 760 761 /* Generate the PDS vertex shader program */ 762 next_temp = PVR_PDS_TEMPS_BLOCK_BASE; 763 /* IR0 is in first 32-bit temp, temp[0].32, vertex_Index. */ 764 input_register0 = pvr_pds_get_temps(&next_temp, 1, &temps_used); 765 /* IR1 is in second 32-bit temp, temp[1].32, instance_ID. */ 766 input_register1 = pvr_pds_get_temps(&next_temp, 1, &temps_used); 767 768 if (program->iterate_remap_id) 769 input_register2 = pvr_pds_get_temps(&next_temp, 1, &temps_used); 770 else 771 input_register2 = 0; /* Not used, but need to silence the compiler. */ 772 773 /* Generate the PDS vertex shader code. The constants in the data block are 774 * arranged as follows: 775 * 776 * 64 bit bank 0 64 bit bank 1 64 bit bank 2 64 bit bank 777 * 3 Not used (tmps) Stride | Multiplier Address Control 778 */ 779 780 /* Find out how many constants are needed by streams. */ 781 for (uint32_t stream = 0; stream < program->num_streams; stream++) { 782 pvr_pds_get_constants(&next_constant, 783 8 * program->streams[stream].num_elements, 784 &consts_size); 785 } 786 787 /* If there are no vertex streams allocate the first bank for USC Code 788 * Address. 789 */ 790 if (consts_size == 0) 791 pvr_pds_get_constants(&next_constant, 2, &consts_size); 792 else 793 next_constant = 8; 794 795 direct_writes_needed = program->iterate_instance_id || 796 program->iterate_vtx_id || program->iterate_remap_id; 797 798 if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) { 799 /* Evaluate what config of DDMAD should be used for each stream. */ 800 for (uint32_t stream = 0; stream < program->num_streams; stream++) { 801 vertex_stream = &program->streams[stream]; 802 803 if (vertex_stream->use_ddmadt) { 804 ddmadt_enables |= (1 << stream); 805 806 /* The condition for index value is: 807 * index * stride + size <= bufferSize (all in unit of byte) 808 */ 809 if (vertex_stream->stride == 0) { 810 if (vertex_stream->elements[0].size <= 811 vertex_stream->buffer_size_in_bytes) { 812 /* index can be any value -> no need to use DDMADT. */ 813 ddmadt_enables &= (~(1 << stream)); 814 } else { 815 /* No index works -> no need to issue DDMAD instruction. 816 */ 817 skip_stream_flag |= (1 << stream); 818 } 819 } else { 820 /* index * stride + size <= bufferSize 821 * 822 * can be converted to: 823 * index <= (bufferSize - size) / stride 824 * 825 * where maximum index is: 826 * integer((bufferSize - size) / stride). 827 */ 828 if (vertex_stream->buffer_size_in_bytes < 829 vertex_stream->elements[0].size) { 830 /* No index works -> no need to issue DDMAD instruction. 831 */ 832 skip_stream_flag |= (1 << stream); 833 } else { 834 uint32_t max_index = (vertex_stream->buffer_size_in_bytes - 835 vertex_stream->elements[0].size) / 836 vertex_stream->stride; 837 if (max_index == 0xFFFFFFFFu) { 838 /* No need to use DDMADT as all possible indices can 839 * pass the test. 840 */ 841 ddmadt_enables &= (~(1 << stream)); 842 } else { 843 /* In this case, test condition can be changed to 844 * index < max_index + 1. 845 */ 846 program->streams[stream].num_vertices = 847 pvr_pds_get_bank_based_constants(program->num_streams, 848 &next_constant, 849 1, 850 &consts_size); 851 852 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 853 pvr_pds_write_constant32( 854 buffer, 855 program->streams[stream].num_vertices, 856 max_index + 1); 857 } 858 } 859 } 860 } 861 } 862 863 if ((skip_stream_flag & (1 << stream)) == 0) { 864 issue_empty_ddmad = (ddmadt_enables & (1 << stream)) != 0; 865 last_stream_index = stream; 866 } 867 } 868 } else { 869 if (program->num_streams > 0 && 870 program->streams[program->num_streams - 1].use_ddmadt) { 871 issue_empty_ddmad = true; 872 } 873 } 874 875 if (direct_writes_needed) 876 issue_empty_ddmad = false; 877 878 if (issue_empty_ddmad) { 879 /* An empty DMA control const (DMA size = 0) is required in case the 880 * last DDMADD is predicated out and last flag does not have any usage. 881 */ 882 empty_dma_control_constant64 = 883 pvr_pds_get_bank_based_constants(program->num_streams, 884 &next_constant, 885 2, 886 &consts_size); 887 } 888 889 /* Assign constants for non stream or base instance if there is any 890 * instanced stream. 891 */ 892 if (direct_writes_needed || any_instanced_stream || 893 program->instance_ID_modifier) { 894 if (program->iterate_vtx_id) { 895 vertex_id_control_word_const32 = 896 pvr_pds_get_bank_based_constants(program->num_streams, 897 &next_constant, 898 1, 899 &consts_size); 900 } 901 902 if (program->iterate_instance_id || program->instance_ID_modifier) { 903 if (program->instance_ID_modifier == 0) { 904 instance_id_control_word_const32 = 905 pvr_pds_get_bank_based_constants(program->num_streams, 906 &next_constant, 907 1, 908 &consts_size); 909 } else { 910 instance_id_modifier_word_const32 = 911 pvr_pds_get_bank_based_constants(program->num_streams, 912 &next_constant, 913 1, 914 &consts_size); 915 if ((instance_id_modifier_word_const32 % 2) == 0) { 916 instance_id_control_word_const32 = 917 pvr_pds_get_bank_based_constants(program->num_streams, 918 &next_constant, 919 1, 920 &consts_size); 921 } else { 922 instance_id_control_word_const32 = 923 instance_id_modifier_word_const32; 924 instance_id_modifier_word_const32 = 925 pvr_pds_get_bank_based_constants(program->num_streams, 926 &next_constant, 927 1, 928 &consts_size); 929 } 930 } 931 } 932 933 if (program->base_instance != 0) { 934 base_instance_const32 = 935 pvr_pds_get_bank_based_constants(program->num_streams, 936 &next_constant, 937 1, 938 &consts_size); 939 } 940 941 if (program->iterate_remap_id) { 942 geometry_id_control_word_const64 = 943 pvr_pds_get_bank_based_constants(program->num_streams, 944 &next_constant, 945 2, 946 &consts_size); 947 } 948 } 949 950 if (program->instance_ID_modifier != 0) { 951 /* This instanceID modifier is used when a draw array instanced call 952 * sourcing from client data cannot fit into vertex buffer and needs to 953 * be broken down into several draw calls. 954 */ 955 956 code_size += 1; 957 958 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 959 pvr_pds_write_constant32(buffer, 960 instance_id_modifier_word_const32, 961 program->instance_ID_modifier); 962 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 963 *buffer++ = pvr_pds_inst_encode_add32( 964 /* cc */ 0x0, 965 /* ALUM */ 0, /* Unsigned */ 966 /* SNA */ 0, /* Add */ 967 /* SRC0 32b */ instance_id_modifier_word_const32, 968 /* SRC1 32b */ input_register1, 969 /* DST 32b */ input_register1); 970 } 971 } 972 973 /* Adjust instanceID if necessary. */ 974 if (any_instanced_stream || program->iterate_instance_id) { 975 if (program->base_instance != 0) { 976 assert(!program->draw_indirect); 977 978 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 979 pvr_pds_write_constant32(buffer, 980 base_instance_const32, 981 program->base_instance); 982 } 983 984 base_instance_register = base_instance_const32; 985 } 986 987 if (program->draw_indirect) { 988 assert((program->instance_ID_modifier == 0) && 989 (program->base_instance == 0)); 990 991 base_instance_register = PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER + 1; 992 } 993 } 994 995 next_constant = next_stream_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; 996 usc_control_constant64 = 997 pvr_pds_get_constants(&next_stream_constant, 2, &data_size); 998 999 for (uint32_t stream = 0; stream < program->num_streams; stream++) { 1000 bool instance_data_with_base_instance; 1001 1002 if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) && 1003 ((skip_stream_flag & (1 << stream)) != 0)) { 1004 continue; 1005 } 1006 1007 vertex_stream = &program->streams[stream]; 1008 1009 instance_data_with_base_instance = 1010 ((vertex_stream->instance_data) && 1011 ((program->base_instance > 0) || (program->draw_indirect))); 1012 1013 /* Get all 8 32-bit constants at once, only 6 for first stream due to 1014 * USC constants. 1015 */ 1016 if (stream == 0) { 1017 stride_constant32 = 1018 pvr_pds_get_constants(&next_stream_constant, 6, &data_size); 1019 } else { 1020 next_constant = 1021 pvr_pds_get_constants(&next_stream_constant, 8, &data_size); 1022 1023 /* Skip bank 0. */ 1024 stride_constant32 = next_constant + 2; 1025 } 1026 1027 multiplier_constant32 = stride_constant32 + 1; 1028 1029 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 1030 pvr_pds_write_constant32(buffer, 1031 stride_constant32, 1032 vertex_stream->stride); 1033 1034 /* Vertex stream frequency multiplier. */ 1035 if (vertex_stream->multiplier) 1036 pvr_pds_write_constant32(buffer, 1037 multiplier_constant32, 1038 vertex_stream->multiplier); 1039 } 1040 1041 /* Update the code size count and temps count for the above code 1042 * segment. 1043 */ 1044 if (vertex_stream->current_state) { 1045 code_size += 1; 1046 temp = pvr_pds_get_temps(&next_temp, 1, &temps_used); /* 32-bit */ 1047 } else { 1048 unsigned int num_temps_required = 0; 1049 1050 if (vertex_stream->multiplier) { 1051 num_temps_required += 2; 1052 code_size += 3; 1053 1054 if (vertex_stream->shift) { 1055 code_size += 1; 1056 1057 if ((int32_t)vertex_stream->shift > 0) 1058 code_size += 1; 1059 } 1060 } else if (vertex_stream->shift) { 1061 code_size += 1; 1062 num_temps_required += 1; 1063 } else if (instance_data_with_base_instance) { 1064 num_temps_required += 1; 1065 } 1066 1067 if (num_temps_required != 0) { 1068 temp = pvr_pds_get_temps(&next_temp, 1069 num_temps_required, 1070 &temps_used); /* 64-bit */ 1071 } else { 1072 temp = vertex_stream->instance_data ? input_register1 1073 : input_register0; 1074 } 1075 1076 if (instance_data_with_base_instance) 1077 code_size += 1; 1078 } 1079 1080 /* The real code segment. */ 1081 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 1082 /* If it's current state stream, then index = 0 always. */ 1083 if (vertex_stream->current_state) { 1084 /* Put zero in temp. */ 1085 *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0); 1086 } else if (vertex_stream->multiplier) { 1087 /* old: Iout = (Iin * (Multiplier+2^24)) >> (Shift+24) 1088 * new: Iout = (Iin * Multiplier) >> (shift+31) 1089 */ 1090 1091 /* Put zero in temp. Need zero for add part of the following 1092 * MAD. MAD source is 64 bit, so need two LIMMs. 1093 */ 1094 *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0); 1095 /* Put zero in temp. Need zero for add part of the following 1096 * MAD. 1097 */ 1098 *buffer++ = pvr_pds_inst_encode_limm(0, temp + 1, 0, 0); 1099 1100 /* old: (Iin * (Multiplier+2^24)) 1101 * new: (Iin * Multiplier) 1102 */ 1103 *buffer++ = pvr_rogue_inst_encode_mad( 1104 0, /* Sign of add is positive. */ 1105 0, /* Unsigned ALU mode */ 1106 0, /* Unconditional */ 1107 multiplier_constant32, 1108 vertex_stream->instance_data ? input_register1 : input_register0, 1109 temp / 2, 1110 temp / 2); 1111 1112 if (vertex_stream->shift) { 1113 int32_t shift = (int32_t)vertex_stream->shift; 1114 1115 /* new: >> (shift + 31) */ 1116 shift += 31; 1117 shift *= -1; 1118 1119 if (shift < -31) { 1120 /* >> (31) */ 1121 shift_2s_comp = 0xFFFE1; 1122 *buffer++ = pvr_pds_inst_encode_stflp64( 1123 /* cc */ 0, 1124 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE, 1125 /* IM */ 1, /* enable immediate */ 1126 /* SRC0 */ temp / 2, 1127 /* SRC1 */ input_register0, /* This won't be used in 1128 * a shift operation. 1129 */ 1130 /* SRC2 (Shift) */ shift_2s_comp, 1131 /* DST */ temp / 2); 1132 shift += 31; 1133 } 1134 1135 /* old: >> (Shift+24) 1136 * new: >> (shift + 31) 1137 */ 1138 shift_2s_comp = *((uint32_t *)&shift); 1139 *buffer++ = pvr_pds_inst_encode_stflp64( 1140 /* cc */ 0, 1141 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE, 1142 /* IM */ 1, /*enable immediate */ 1143 /* SRC0 */ temp / 2, 1144 /* SRC1 */ input_register0, /* This won't be used in 1145 * a shift operation. 1146 */ 1147 /* SRC2 (Shift) */ shift_2s_comp, 1148 /* DST */ temp / 2); 1149 } 1150 1151 if (instance_data_with_base_instance) { 1152 *buffer++ = 1153 pvr_pds_inst_encode_add32(0, /* cc */ 1154 0, /* ALNUM */ 1155 0, /* SNA */ 1156 base_instance_register, /* src0 1157 */ 1158 temp, /* src1 */ 1159 temp /* dst */ 1160 ); 1161 } 1162 } else { /* NOT vertex_stream->multiplier */ 1163 if (vertex_stream->shift) { 1164 /* Shift Index/InstanceNum Right by shift bits. Put result 1165 * in a Temp. 1166 */ 1167 1168 /* 2's complement of shift as this will be a right shift. */ 1169 shift_2s_comp = ~(vertex_stream->shift) + 1; 1170 1171 *buffer++ = pvr_pds_inst_encode_stflp32( 1172 /* IM */ 1, /* enable immediate. */ 1173 /* cc */ 0, 1174 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE, 1175 /* SRC0 */ vertex_stream->instance_data ? input_register1 1176 : input_register0, 1177 /* SRC1 */ input_register0, /* This won't be used in 1178 * a shift operation. 1179 */ 1180 /* SRC2 (Shift) */ shift_2s_comp, 1181 /* DST */ temp); 1182 1183 if (instance_data_with_base_instance) { 1184 *buffer++ = 1185 pvr_pds_inst_encode_add32(0, /* cc */ 1186 0, /* ALNUM */ 1187 0, /* SNA */ 1188 base_instance_register, /* src0 1189 */ 1190 temp, /* src1 */ 1191 temp /* dst */ 1192 ); 1193 } 1194 } else { 1195 if (instance_data_with_base_instance) { 1196 *buffer++ = 1197 pvr_pds_inst_encode_add32(0, /* cc */ 1198 0, /* ALNUM */ 1199 0, /* SNA */ 1200 base_instance_register, /* src0 1201 */ 1202 input_register1, /* src1 */ 1203 temp /* dst */ 1204 ); 1205 } else { 1206 /* If the shift instruction doesn't happen, use the IR 1207 * directly into the following MAD. 1208 */ 1209 temp = vertex_stream->instance_data ? input_register1 1210 : input_register0; 1211 } 1212 } 1213 } 1214 } 1215 1216 if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) { 1217 if (vertex_stream->use_ddmadt) 1218 ddmadt_enables |= (1 << stream); 1219 } else { 1220 if ((ddmadt_enables & (1 << stream)) != 0) { 1221 /* Emulate what DDMADT does for range checking. */ 1222 if (first_ddmadt) { 1223 /* Get an 64 bits temp such that cmp current index with 1224 * allowed vertex number can work. 1225 */ 1226 index_temp64 = 1227 pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit 1228 */ 1229 num_vertices_temp64 = 1230 pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit 1231 */ 1232 1233 index_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER; 1234 num_vertices_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER; 1235 1236 code_size += 3; 1237 current_p0 = true; 1238 } 1239 1240 code_size += (temp == pre_index_temp ? 1 : 2); 1241 1242 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 1243 if (first_ddmadt) { 1244 /* Set predicate to be P0. */ 1245 *buffer++ = pvr_pds_encode_bra( 1246 PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC 1247 */ 1248 0, /* Neg */ 1249 PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETCC 1250 */ 1251 1); /* Addr */ 1252 1253 *buffer++ = 1254 pvr_pds_inst_encode_limm(0, index_temp64 + 1, 0, 0); 1255 *buffer++ = 1256 pvr_pds_inst_encode_limm(0, num_vertices_temp64 + 1, 0, 0); 1257 } 1258 1259 if (temp != pre_index_temp) { 1260 *buffer++ = pvr_pds_inst_encode_stflp32( 1261 /* IM */ 1, /* enable immediate. */ 1262 /* cc */ 0, 1263 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE, 1264 /* SRC0 */ temp - PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER, 1265 /* SRC1 */ 0, 1266 /* SRC2 (Shift) */ 0, 1267 /* DST */ index_temp64); 1268 } 1269 1270 *buffer++ = pvr_pds_inst_encode_stflp32( 1271 /* IM */ 1, /* enable immediate. */ 1272 /* cc */ 0, 1273 /* LOP */ PVR_ROGUE_PDSINST_LOP_OR, 1274 /* SRC0 */ num_vertices_temp64 + 1, 1275 /* SRC1 */ vertex_stream->num_vertices, 1276 /* SRC2 (Shift) */ 0, 1277 /* DST */ num_vertices_temp64); 1278 } 1279 1280 first_ddmadt = false; 1281 1282 pre_index_temp = temp; 1283 } 1284 } 1285 1286 /* Process the elements in the stream. */ 1287 for (uint32_t element = 0; element < vertex_stream->num_elements; 1288 element++) { 1289 bool terminate = false; 1290 1291 vertex_element = &vertex_stream->elements[element]; 1292 /* Check if last DDMAD needs terminate or not. */ 1293 if ((element == (vertex_stream->num_elements - 1)) && 1294 (stream == last_stream_index)) { 1295 terminate = !issue_empty_ddmad && !direct_writes_needed; 1296 } 1297 1298 /* Get a new set of constants for this element. */ 1299 if (element) { 1300 /* Get all 8 32 bit constants at once. */ 1301 next_constant = 1302 pvr_pds_get_constants(&next_stream_constant, 8, &data_size); 1303 } 1304 1305 dma_address_constant64 = next_constant + 4; 1306 dma_control_constant64 = dma_address_constant64 + 2; 1307 1308 if (vertex_element->component_size == 0) { 1309 /* Standard DMA. 1310 * 1311 * Write the DMA transfer control words into the PDS data 1312 * section. 1313 * 1314 * DMA Address is 40-bit. 1315 */ 1316 1317 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 1318 uint32_t dma_control_word; 1319 uint64_t dma_control_word64 = 0; 1320 uint32_t dma_size; 1321 1322 /* Write the address to the constant. */ 1323 pvr_pds_write_dma_address(buffer, 1324 dma_address_constant64, 1325 vertex_stream->address + 1326 (uint64_t)vertex_element->offset, 1327 false, 1328 dev_info); 1329 { 1330 if (program->stream_patch_offsets) { 1331 program 1332 ->stream_patch_offsets[program->num_stream_patches++] = 1333 (stream << 16) | (dma_address_constant64 >> 1); 1334 } 1335 } 1336 1337 /* Size is in bytes - round up to nearest 32 bit word. */ 1338 dma_size = 1339 (vertex_element->size + (1 << PVR_PDS_DWORD_SHIFT) - 1) >> 1340 PVR_PDS_DWORD_SHIFT; 1341 1342 assert(dma_size <= PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_UPPER); 1343 1344 /* Set up the dma transfer control word. */ 1345 dma_control_word = 1346 dma_size << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT; 1347 1348 dma_control_word |= 1349 vertex_element->reg 1350 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT; 1351 1352 dma_control_word |= 1353 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE | 1354 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED; 1355 1356 if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) { 1357 if ((ddmadt_enables & (1 << stream)) != 0) { 1358 assert( 1359 ((((uint64_t)vertex_stream->buffer_size_in_bytes 1360 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) & 1361 ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK) >> 1362 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) == 1363 (uint64_t)vertex_stream->buffer_size_in_bytes); 1364 dma_control_word64 = 1365 (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_EN | 1366 (((uint64_t)vertex_stream->buffer_size_in_bytes 1367 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) & 1368 ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK)); 1369 } 1370 } 1371 /* If this is the last dma then also set the last flag. */ 1372 if (terminate) { 1373 dma_control_word |= 1374 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN; 1375 } 1376 1377 /* Write the 32-Bit SRC3 word to a 64-bit constant as per 1378 * spec. 1379 */ 1380 pvr_pds_write_wide_constant(buffer, 1381 dma_control_constant64, 1382 dma_control_word64 | 1383 (uint64_t)dma_control_word); 1384 } 1385 1386 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 1387 if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) { 1388 if ((ddmadt_enables & (1 << stream)) != 0) { 1389 *buffer++ = pvr_pds_inst_encode_cmp( 1390 0, /* cc enable */ 1391 PVR_ROGUE_PDSINST_COP_LT, /* Operation */ 1392 index_temp64 >> 1, /* SRC0 (REGS64TP) */ 1393 (num_vertices_temp64 >> 1) + 1394 PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER); /* SRC1 1395 (REGS64) 1396 */ 1397 } 1398 } 1399 /* Multiply by the vertex stream stride and add the base 1400 * followed by a DOUTD. 1401 * 1402 * dmad32 (C0 * T0) + C1, C2 1403 * src0 = stride src1 = index src2 = baseaddr src3 = 1404 * doutd part 1405 */ 1406 1407 uint32_t cc; 1408 if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) 1409 cc = 0; 1410 else 1411 cc = (ddmadt_enables & (1 << stream)) != 0 ? 1 : 0; 1412 1413 *buffer++ = pvr_pds_inst_encode_ddmad( 1414 /* cc */ cc, 1415 /* END */ 0, 1416 /* SRC0 */ stride_constant32, /* Stride 32-bit*/ 1417 /* SRC1 */ temp, /* Index 32-bit*/ 1418 /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream 1419 * Address 1420 * + 1421 * Offset 1422 */ 1423 /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA 1424 * Transfer 1425 * Control 1426 * Word. 1427 */ 1428 ); 1429 } 1430 1431 if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) && 1432 ((ddmadt_enables & (1 << stream)) != 0)) { 1433 code_size += 1; 1434 } 1435 code_size += 1; 1436 } else { 1437 /* Repeat DMA. 1438 * 1439 * Write the DMA transfer control words into the PDS data 1440 * section. 1441 * 1442 * DMA address is 40-bit. 1443 */ 1444 1445 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 1446 uint32_t dma_control_word; 1447 1448 /* Write the address to the constant. */ 1449 pvr_pds_write_dma_address(buffer, 1450 dma_address_constant64, 1451 vertex_stream->address + 1452 (uint64_t)vertex_element->offset, 1453 false, 1454 dev_info); 1455 1456 /* Set up the DMA transfer control word. */ 1457 dma_control_word = 1458 vertex_element->size 1459 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT; 1460 1461 dma_control_word |= 1462 vertex_element->reg 1463 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT; 1464 1465 switch (vertex_element->component_size) { 1466 case 4: { 1467 dma_control_word |= 1468 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_FOUR; 1469 break; 1470 } 1471 case 3: { 1472 dma_control_word |= 1473 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_THREE; 1474 break; 1475 } 1476 case 2: { 1477 dma_control_word |= 1478 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_TWO; 1479 break; 1480 } 1481 default: { 1482 dma_control_word |= 1483 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_ONE; 1484 break; 1485 } 1486 } 1487 1488 dma_control_word |= 1489 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_REPEAT; 1490 1491 dma_control_word |= 1492 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE | 1493 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED; 1494 1495 /* If this is the last dma then also set the last flag. */ 1496 if (terminate) { 1497 dma_control_word |= 1498 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN; 1499 } 1500 1501 /* Write the 32-Bit SRC3 word to a 64-bit constant as per 1502 * spec. 1503 */ 1504 pvr_pds_write_wide_constant(buffer, 1505 dma_control_constant64, 1506 (uint64_t)dma_control_word); 1507 } 1508 1509 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 1510 /* Multiply by the vertex stream stride and add the base 1511 * followed by a DOUTD. 1512 * 1513 * dmad32 (C0 * T0) + C1, C2 1514 * src0 = stride src1 = index src2 = baseaddr src3 = 1515 * doutd part 1516 */ 1517 *buffer++ = pvr_pds_inst_encode_ddmad( 1518 /* cc */ 0, 1519 /* END */ 0, 1520 /* SRC0 */ stride_constant32, /* Stride 32-bit*/ 1521 /* SRC1 */ temp, /* Index 32-bit*/ 1522 /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream 1523 * Address 1524 * + 1525 * Offset. 1526 */ 1527 /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA 1528 * Transfer 1529 * Control 1530 * Word. 1531 */ 1532 ); 1533 } 1534 1535 code_size += 1; 1536 } /* End of repeat DMA. */ 1537 } /* Element loop */ 1538 } /* Stream loop */ 1539 1540 if (issue_empty_ddmad) { 1541 /* Issue an empty last DDMAD, always executed. */ 1542 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 1543 pvr_pds_write_wide_constant( 1544 buffer, 1545 empty_dma_control_constant64, 1546 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN); 1547 } 1548 1549 code_size += 1; 1550 1551 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 1552 *buffer++ = pvr_pds_inst_encode_ddmad( 1553 /* cc */ 0, 1554 /* END */ 0, 1555 /* SRC0 */ stride_constant32, /* Stride 32-bit*/ 1556 /* SRC1 */ temp, /* Index 32-bit*/ 1557 /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream 1558 *Address + 1559 *Offset. 1560 */ 1561 /* SRC3 64-bit */ empty_dma_control_constant64 >> 1 /* DMA 1562 * Transfer 1563 * Control 1564 * Word. 1565 */ 1566 ); 1567 } 1568 } 1569 1570 if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) { 1571 if (current_p0) { 1572 code_size += 1; 1573 1574 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 1575 /* Revert predicate back to IF0 which is required by DOUTU. */ 1576 *buffer++ = 1577 pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC 1578 */ 1579 0, /* Neg */ 1580 PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC 1581 */ 1582 1); /* Addr */ 1583 } 1584 } 1585 } 1586 /* Send VertexID if requested. */ 1587 if (program->iterate_vtx_id) { 1588 if (program->draw_indirect) { 1589 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 1590 *buffer++ = pvr_pds_inst_encode_add32( 1591 /* cc */ 0x0, 1592 /* ALUM */ 0, /* Unsigned */ 1593 /* SNA */ 1, /* Minus */ 1594 /* SRC0 32b */ input_register0, /* vertexID */ 1595 /* SRC1 32b */ PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER, /* base 1596 * vertexID. 1597 */ 1598 /* DST 32b */ input_register0); 1599 } 1600 1601 code_size += 1; 1602 } 1603 1604 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 1605 uint32_t doutw = pvr_pds_encode_doutw_src1( 1606 program->vtx_id_register, 1607 PVR_PDS_DOUTW_LOWER32, 1608 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, 1609 false, 1610 dev_info); 1611 1612 if (!program->iterate_instance_id && !program->iterate_remap_id) 1613 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; 1614 1615 pvr_pds_write_constant32(buffer, 1616 vertex_id_control_word_const32, 1617 doutw); 1618 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 1619 *buffer++ = pvr_pds_encode_doutw64( 1620 /* cc */ 0, 1621 /* END */ 0, 1622 /* SRC1 */ vertex_id_control_word_const32, /* DOUTW 32-bit Src1 1623 */ 1624 /* SRC0 */ input_register0 >> 1); /* DOUTW 64-bit Src0 */ 1625 } 1626 1627 code_size += 1; 1628 } 1629 1630 /* Send InstanceID if requested. */ 1631 if (program->iterate_instance_id) { 1632 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 1633 uint32_t doutw = pvr_pds_encode_doutw_src1( 1634 program->instance_id_register, 1635 PVR_PDS_DOUTW_UPPER32, 1636 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, 1637 true, 1638 dev_info); 1639 1640 if (!program->iterate_remap_id) 1641 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; 1642 1643 pvr_pds_write_constant32(buffer, 1644 instance_id_control_word_const32, 1645 doutw); 1646 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 1647 *buffer++ = pvr_pds_encode_doutw64( 1648 /* cc */ 0, 1649 /* END */ 0, 1650 /* SRC1 */ instance_id_control_word_const32, /* DOUTW 32-bit Src1 */ 1651 /* SRC0 */ input_register1 >> 1); /* DOUTW 64-bit Src0 */ 1652 } 1653 1654 code_size += 1; 1655 } 1656 1657 /* Send remapped index number to vi0. */ 1658 if (program->iterate_remap_id) { 1659 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 1660 uint32_t doutw = pvr_pds_encode_doutw_src1( 1661 0 /* vi0 */, 1662 PVR_PDS_DOUTW_LOWER32, 1663 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE | 1664 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN, 1665 false, 1666 dev_info); 1667 1668 pvr_pds_write_constant64(buffer, 1669 geometry_id_control_word_const64, 1670 doutw, 1671 0); 1672 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 1673 *buffer++ = pvr_pds_encode_doutw64( 1674 /* cc */ 0, 1675 /* END */ 0, 1676 /* SRC1 */ geometry_id_control_word_const64, /* DOUTW 32-bit 1677 * Src1 1678 */ 1679 /* SRC0 */ input_register2 >> 1); /* DOUTW 64-bit Src0 */ 1680 } 1681 1682 code_size += 1; 1683 } 1684 1685 /* Copy the USC task control words to constants. */ 1686 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 1687 pvr_pds_write_wide_constant(buffer, 1688 usc_control_constant64, 1689 program->usc_task_control.src0); /* 64-bit 1690 * Src0 1691 */ 1692 if (program->stream_patch_offsets) { 1693 /* USC TaskControl is always the first patch. */ 1694 program->stream_patch_offsets[0] = usc_control_constant64 >> 1; 1695 } 1696 } 1697 1698 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 1699 /* Conditionally (if last in task) issue the task to the USC 1700 * (if0) DOUTU src1=USC Code Base address, src2=DOUTU word 2. 1701 */ 1702 1703 *buffer++ = pvr_pds_encode_doutu( 1704 /* cc */ 1, 1705 /* END */ 1, 1706 /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0 */ 1707 1708 /* End the program if the Dout did not already end it. */ 1709 *buffer++ = pvr_pds_inst_encode_halt(0); 1710 } 1711 1712 code_size += 2; 1713 1714 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 1715 /* Set the data segment pointer and ensure we return 1 past the buffer 1716 * ptr. 1717 */ 1718 program->data_segment = buffer; 1719 1720 buffer += consts_size; 1721 } 1722 1723 program->temps_used = temps_used; 1724 program->data_size = consts_size; 1725 program->code_size = code_size; 1726 program->ddmadt_enables = ddmadt_enables; 1727 if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) 1728 program->skip_stream_flag = skip_stream_flag; 1729 1730 return buffer; 1731} 1732 1733/** 1734 * Generates a PDS program to load USC compute shader global/local/workgroup 1735 * sizes/ids and then a DOUTU to execute the USC. 1736 * 1737 * \param program Pointer to description of the program that should be 1738 * generated. 1739 * \param buffer Pointer to buffer that receives the output of this function. 1740 * This will be either the data segment, or the code depending on 1741 * gen_mode. 1742 * \param gen_mode Which part to generate, either data segment or code segment. 1743 * If PDS_GENERATE_SIZES is specified, nothing is written, but 1744 * size information in program is updated. 1745 * \param dev_info PVR device info struct. 1746 * \returns Pointer to just beyond the buffer for the data - i.e. the value of 1747 * the buffer after writing its contents. 1748 */ 1749uint32_t * 1750pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program, 1751 uint32_t *restrict buffer, 1752 enum pvr_pds_generate_mode gen_mode, 1753 const struct pvr_device_info *dev_info) 1754{ 1755 uint32_t usc_control_constant64; 1756 uint32_t usc_control_constant64_coeff_update = 0; 1757 uint32_t zero_constant64 = 0; 1758 1759 uint32_t data_size = 0; 1760 uint32_t code_size = 0; 1761 uint32_t temps_used = 0; 1762 uint32_t doutw = 0; 1763 1764 uint32_t barrier_ctrl_word = 0; 1765 uint32_t barrier_ctrl_word2 = 0; 1766 1767 /* Even though there are 3 IDs for local and global we only need max one 1768 * DOUTW for local, and two for global. 1769 */ 1770 uint32_t work_group_id_ctrl_words[2] = { 0 }; 1771 uint32_t local_id_ctrl_word = 0; 1772 uint32_t local_input_register; 1773 1774 /* For the constant value to load into ptemp (SW fence). */ 1775 uint64_t predicate_ld_src0_constant = 0; 1776 uint32_t cond_render_negate_constant = 0; 1777 1778 uint32_t cond_render_pred_temp; 1779 uint32_t cond_render_negate_temp; 1780 1781 /* 2x 64 bit registers that will mask out the Predicate load. */ 1782 uint32_t cond_render_pred_mask_constant = 0; 1783 1784#if defined(DEBUG) 1785 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 1786 for (uint32_t j = 0; j < program->data_size; j++) 1787 buffer[j] = 0xDEADBEEF; 1788 } 1789#endif 1790 1791 /* All the compute input registers are in temps. */ 1792 temps_used += PVR_PDS_NUM_COMPUTE_INPUT_REGS; 1793 1794 uint32_t next_temp = PVR_PDS_TEMPS_BLOCK_BASE + temps_used; 1795 1796 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; 1797 1798 if (program->kick_usc) { 1799 /* Copy the USC task control words to constants. */ 1800 usc_control_constant64 = 1801 pvr_pds_get_constants(&next_constant, 2, &data_size); 1802 } 1803 1804 if (program->has_coefficient_update_task) { 1805 usc_control_constant64_coeff_update = 1806 pvr_pds_get_constants(&next_constant, 2, &data_size); 1807 } 1808 1809 if (program->conditional_render) { 1810 predicate_ld_src0_constant = 1811 pvr_pds_get_constants(&next_constant, 2, &data_size); 1812 cond_render_negate_constant = 1813 pvr_pds_get_constants(&next_constant, 2, &data_size); 1814 cond_render_pred_mask_constant = 1815 pvr_pds_get_constants(&next_constant, 4, &data_size); 1816 1817 /* LD will load a 64 bit value. */ 1818 cond_render_pred_temp = pvr_pds_get_temps(&next_temp, 4, &temps_used); 1819 cond_render_negate_temp = pvr_pds_get_temps(&next_temp, 2, &temps_used); 1820 1821 program->cond_render_const_offset_in_dwords = predicate_ld_src0_constant; 1822 program->cond_render_pred_temp = cond_render_pred_temp; 1823 } 1824 1825 if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || 1826 (program->clear_pds_barrier) || 1827 (program->kick_usc && program->conditional_render)) { 1828 zero_constant64 = pvr_pds_get_constants(&next_constant, 2, &data_size); 1829 } 1830 1831 if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 1832 barrier_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size); 1833 if (PVR_HAS_QUIRK(dev_info, 51210)) { 1834 barrier_ctrl_word2 = 1835 pvr_pds_get_constants(&next_constant, 1, &data_size); 1836 } 1837 } 1838 1839 if (program->work_group_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED || 1840 program->work_group_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 1841 work_group_id_ctrl_words[0] = 1842 pvr_pds_get_constants(&next_constant, 1, &data_size); 1843 } 1844 1845 if (program->work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 1846 work_group_id_ctrl_words[1] = 1847 pvr_pds_get_constants(&next_constant, 1, &data_size); 1848 } 1849 1850 if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || 1851 (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || 1852 (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) { 1853 local_id_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size); 1854 } 1855 1856 if (program->add_base_workgroup) { 1857 for (uint32_t workgroup_component = 0; workgroup_component < 3; 1858 workgroup_component++) { 1859 if (program->work_group_input_regs[workgroup_component] != 1860 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 1861 program 1862 ->base_workgroup_constant_offset_in_dwords[workgroup_component] = 1863 pvr_pds_get_constants(&next_constant, 1, &data_size); 1864 } 1865 } 1866 } 1867 1868 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 1869 if (program->kick_usc) { 1870 /* Src0 for DOUTU */ 1871 pvr_pds_write_wide_constant(buffer, 1872 usc_control_constant64, 1873 program->usc_task_control.src0); /* 64-bit 1874 * Src0. 1875 */ 1876 } 1877 1878 if (program->has_coefficient_update_task) { 1879 /* Src0 for DOUTU. */ 1880 pvr_pds_write_wide_constant( 1881 buffer, 1882 usc_control_constant64_coeff_update, 1883 program->usc_task_control_coeff_update.src0); /* 64-bit Src0 */ 1884 } 1885 1886 if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || 1887 (program->clear_pds_barrier) || 1888 (program->kick_usc && program->conditional_render)) { 1889 pvr_pds_write_wide_constant(buffer, zero_constant64, 0); /* 64-bit 1890 * Src0 1891 */ 1892 } 1893 1894 if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 1895 if (PVR_HAS_QUIRK(dev_info, 51210)) { 1896 /* Write the constant for the coefficient register write. */ 1897 doutw = pvr_pds_encode_doutw_src1( 1898 program->barrier_coefficient + 4, 1899 PVR_PDS_DOUTW_LOWER64, 1900 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, 1901 true, 1902 dev_info); 1903 pvr_pds_write_constant32(buffer, barrier_ctrl_word2, doutw); 1904 } 1905 /* Write the constant for the coefficient register write. */ 1906 doutw = pvr_pds_encode_doutw_src1( 1907 program->barrier_coefficient, 1908 PVR_PDS_DOUTW_LOWER64, 1909 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, 1910 true, 1911 dev_info); 1912 1913 /* Check whether the barrier is going to be the last DOUTW done by 1914 * the coefficient sync task. 1915 */ 1916 if ((program->work_group_input_regs[0] == 1917 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) && 1918 (program->work_group_input_regs[1] == 1919 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) && 1920 (program->work_group_input_regs[2] == 1921 PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) { 1922 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; 1923 } 1924 1925 pvr_pds_write_constant32(buffer, barrier_ctrl_word, doutw); 1926 } 1927 1928 /* If we want work-group id X, see if we also want work-group id Y. */ 1929 if (program->work_group_input_regs[0] != 1930 PVR_PDS_COMPUTE_INPUT_REG_UNUSED && 1931 program->work_group_input_regs[1] != 1932 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 1933 /* Make sure we are going to DOUTW them into adjacent registers 1934 * otherwise we can't do it in one. 1935 */ 1936 assert(program->work_group_input_regs[1] == 1937 (program->work_group_input_regs[0] + 1)); 1938 1939 doutw = pvr_pds_encode_doutw_src1( 1940 program->work_group_input_regs[0], 1941 PVR_PDS_DOUTW_LOWER64, 1942 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, 1943 true, 1944 dev_info); 1945 1946 /* If we don't want the Z work-group id then this is the last one. 1947 */ 1948 if (program->work_group_input_regs[2] == 1949 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 1950 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; 1951 } 1952 1953 pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[0], doutw); 1954 } 1955 /* If we only want one of X or Y then handle them separately. */ 1956 else { 1957 if (program->work_group_input_regs[0] != 1958 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 1959 doutw = pvr_pds_encode_doutw_src1( 1960 program->work_group_input_regs[0], 1961 PVR_PDS_DOUTW_LOWER32, 1962 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, 1963 true, 1964 dev_info); 1965 1966 /* If we don't want the Z work-group id then this is the last 1967 * one. 1968 */ 1969 if (program->work_group_input_regs[2] == 1970 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 1971 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; 1972 } 1973 1974 pvr_pds_write_constant32(buffer, 1975 work_group_id_ctrl_words[0], 1976 doutw); 1977 } else if (program->work_group_input_regs[1] != 1978 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 1979 doutw = pvr_pds_encode_doutw_src1( 1980 program->work_group_input_regs[1], 1981 PVR_PDS_DOUTW_UPPER32, 1982 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, 1983 true, 1984 dev_info); 1985 1986 /* If we don't want the Z work-group id then this is the last 1987 * one. 1988 */ 1989 if (program->work_group_input_regs[2] == 1990 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 1991 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; 1992 } 1993 1994 pvr_pds_write_constant32(buffer, 1995 work_group_id_ctrl_words[0], 1996 doutw); 1997 } 1998 } 1999 2000 /* Handle work-group id Z. */ 2001 if (program->work_group_input_regs[2] != 2002 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 2003 doutw = pvr_pds_encode_doutw_src1( 2004 program->work_group_input_regs[2], 2005 PVR_PDS_DOUTW_UPPER32, 2006 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE | 2007 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN, 2008 true, 2009 dev_info); 2010 2011 pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[1], doutw); 2012 } 2013 2014 /* Handle the local IDs. */ 2015 if ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || 2016 (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) { 2017 uint32_t dest_reg; 2018 2019 /* If we want local id Y and Z make sure the compiler wants them in 2020 * the same register. 2021 */ 2022 if (!program->flattened_work_groups) { 2023 if ((program->local_input_regs[1] != 2024 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) && 2025 (program->local_input_regs[2] != 2026 PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) { 2027 assert(program->local_input_regs[1] == 2028 program->local_input_regs[2]); 2029 } 2030 } 2031 2032 if (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) 2033 dest_reg = program->local_input_regs[1]; 2034 else 2035 dest_reg = program->local_input_regs[2]; 2036 2037 /* If we want local id X and (Y or Z) then we can do that in a 2038 * single 64-bit DOUTW. 2039 */ 2040 if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 2041 assert(dest_reg == (program->local_input_regs[0] + 1)); 2042 2043 doutw = pvr_pds_encode_doutw_src1( 2044 program->local_input_regs[0], 2045 PVR_PDS_DOUTW_LOWER64, 2046 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, 2047 true, 2048 dev_info); 2049 2050 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; 2051 2052 pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw); 2053 } 2054 /* Otherwise just DMA in Y and Z together in a single 32-bit DOUTW. 2055 */ 2056 else { 2057 doutw = pvr_pds_encode_doutw_src1( 2058 dest_reg, 2059 PVR_PDS_DOUTW_UPPER32, 2060 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, 2061 true, 2062 dev_info); 2063 2064 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; 2065 2066 pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw); 2067 } 2068 } 2069 /* If we don't want Y or Z then just DMA in X in a single 32-bit DOUTW. 2070 */ 2071 else if (program->local_input_regs[0] != 2072 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 2073 doutw = pvr_pds_encode_doutw_src1( 2074 program->local_input_regs[0], 2075 PVR_PDS_DOUTW_LOWER32, 2076 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE | 2077 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN, 2078 true, 2079 dev_info); 2080 2081 pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw); 2082 } 2083 } 2084 2085 if (gen_mode == PDS_GENERATE_CODE_SEGMENT || 2086 gen_mode == PDS_GENERATE_SIZES) { 2087 const bool encode = (gen_mode == PDS_GENERATE_CODE_SEGMENT); 2088#define APPEND(X) \ 2089 if (encode) { \ 2090 *buffer = X; \ 2091 buffer++; \ 2092 } else { \ 2093 code_size += sizeof(uint32_t); \ 2094 } 2095 2096 /* Assert that coeff_update_task_branch_size is > 0 because if it is 0 2097 * then we will be doing an infinite loop. 2098 */ 2099 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) 2100 assert(program->coeff_update_task_branch_size > 0); 2101 2102 /* Test whether this is the coefficient update task or not. */ 2103 APPEND( 2104 pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SRCC */ 2105 PVR_ROGUE_PDSINST_NEG_ENABLE, /* NEG */ 2106 PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC */ 2107 program->coeff_update_task_branch_size /* ADDR */)); 2108 2109 /* Do we need to initialize the barrier coefficient? */ 2110 if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 2111 if (PVR_HAS_QUIRK(dev_info, 51210)) { 2112 /* Initialize the second barrier coefficient registers to zero. 2113 */ 2114 APPEND(pvr_pds_encode_doutw64(0, /* cc */ 2115 0, /* END */ 2116 barrier_ctrl_word2, /* SRC1 */ 2117 zero_constant64 >> 1)); /* SRC0 */ 2118 } 2119 /* Initialize the coefficient register to zero. */ 2120 APPEND(pvr_pds_encode_doutw64(0, /* cc */ 2121 0, /* END */ 2122 barrier_ctrl_word, /* SRC1 */ 2123 zero_constant64 >> 1)); /* SRC0 */ 2124 } 2125 2126 if (program->add_base_workgroup) { 2127 const uint32_t temp_values[3] = { 0, 1, 3 }; 2128 for (uint32_t workgroup_component = 0; workgroup_component < 3; 2129 workgroup_component++) { 2130 if (program->work_group_input_regs[workgroup_component] == 2131 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) 2132 continue; 2133 2134 APPEND(pvr_pds_inst_encode_add32( 2135 /* cc */ 0x0, 2136 /* ALUM */ 0, 2137 /* SNA */ 0, 2138 /* SRC0 (R32)*/ PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER + 2139 program->base_workgroup_constant_offset_in_dwords 2140 [workgroup_component], 2141 /* SRC1 (R32)*/ PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER + 2142 PVR_PDS_CDM_WORK_GROUP_ID_X + 2143 temp_values[workgroup_component], 2144 /* DST (R32TP)*/ PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER + 2145 PVR_PDS_CDM_WORK_GROUP_ID_X + 2146 temp_values[workgroup_component])); 2147 } 2148 } 2149 2150 /* If we are going to put the work-group IDs in coefficients then we 2151 * just need to do the DOUTWs. 2152 */ 2153 if ((program->work_group_input_regs[0] != 2154 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || 2155 (program->work_group_input_regs[1] != 2156 PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) { 2157 uint32_t dest_reg; 2158 2159 if (program->work_group_input_regs[0] != 2160 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 2161 dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_X; 2162 } else { 2163 dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Y; 2164 } 2165 2166 APPEND(pvr_pds_encode_doutw64(0, /* cc */ 2167 0, /* END */ 2168 work_group_id_ctrl_words[0], /* SRC1 2169 */ 2170 dest_reg >> 1)); /* SRC0 */ 2171 } 2172 2173 if (program->work_group_input_regs[2] != 2174 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 2175 APPEND(pvr_pds_encode_doutw64( 2176 0, /* cc */ 2177 0, /* END */ 2178 work_group_id_ctrl_words[1], /* SRC1 */ 2179 (PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Z) >> 2180 1)); /* SRC0 */ 2181 } 2182 2183 /* Issue the task to the USC. */ 2184 if (program->kick_usc && program->has_coefficient_update_task) { 2185 APPEND(pvr_pds_encode_doutu(0, /* cc */ 2186 1, /* END */ 2187 usc_control_constant64_coeff_update >> 2188 1)); /* SRC0; DOUTU 64-bit Src0 */ 2189 } 2190 2191 /* Encode a HALT */ 2192 APPEND(pvr_pds_inst_encode_halt(0)); 2193 2194 /* Set the branch size used to skip the coefficient sync task. */ 2195 program->coeff_update_task_branch_size = code_size / sizeof(uint32_t); 2196 2197 /* DOUTW in the local IDs. */ 2198 2199 /* If we want X and Y or Z, we only need one DOUTW. */ 2200 if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) && 2201 ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || 2202 (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED))) { 2203 local_input_register = 2204 PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X; 2205 } else { 2206 /* If we just want X. */ 2207 if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 2208 local_input_register = 2209 PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X; 2210 } 2211 /* If we just want Y or Z. */ 2212 else if (program->local_input_regs[1] != 2213 PVR_PDS_COMPUTE_INPUT_REG_UNUSED || 2214 program->local_input_regs[2] != 2215 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) { 2216 local_input_register = 2217 PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_YZ; 2218 } 2219 } 2220 2221 if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || 2222 (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) || 2223 (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) { 2224 APPEND(pvr_pds_encode_doutw64(0, /* cc */ 2225 0, /* END */ 2226 local_id_ctrl_word, /* SRC1 */ 2227 local_input_register >> 1)); /* SRC0 2228 */ 2229 } 2230 2231 if (program->clear_pds_barrier) { 2232 /* Zero the persistent temp (SW fence for context switch). */ 2233 APPEND(pvr_pds_inst_encode_add64( 2234 0, /* cc */ 2235 PVR_ROGUE_PDSINST_ALUM_UNSIGNED, 2236 PVR_ROGUE_PDSINST_MAD_SNA_ADD, 2237 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + 2238 (zero_constant64 >> 1), /* src0 = 0 */ 2239 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + 2240 (zero_constant64 >> 1), /* src1 = 0 */ 2241 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0)); /* dest = 2242 * ptemp64[0] 2243 */ 2244 } 2245 2246 /* If this is a fence, issue the DOUTC. */ 2247 if (program->fence) { 2248 APPEND(pvr_pds_inst_encode_doutc(0, /* cc */ 2249 0 /* END */)); 2250 } 2251 2252 if (program->kick_usc) { 2253 if (program->conditional_render) { 2254 /* Skip if coefficient update task. */ 2255 APPEND(pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1, 2256 0, 2257 PVR_ROGUE_PDSINST_PREDICATE_KEEP, 2258 16)); 2259 2260 /* Load the predicate. */ 2261 APPEND(pvr_pds_inst_encode_ld(0, predicate_ld_src0_constant >> 1)); 2262 2263 /* Load negate constant into temp for CMP. */ 2264 APPEND(pvr_pds_inst_encode_add64( 2265 0, /* cc */ 2266 PVR_ROGUE_PDSINST_ALUM_UNSIGNED, 2267 PVR_ROGUE_PDSINST_MAD_SNA_ADD, 2268 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + 2269 (cond_render_negate_constant >> 1), /* src0 = 0 */ 2270 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + 2271 (zero_constant64 >> 1), /* src1 = 0 */ 2272 PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER + 2273 (cond_render_negate_temp >> 1))); /* dest = ptemp64[0] 2274 */ 2275 2276 APPEND(pvr_pds_inst_encode_wdf(0)); 2277 2278 for (uint32_t i = 0; i < 4; i++) { 2279 APPEND(pvr_pds_inst_encode_stflp32( 2280 1, /* enable immediate */ 2281 0, /* cc */ 2282 PVR_ROGUE_PDSINST_LOP_AND, /* LOP */ 2283 cond_render_pred_temp + i, /* SRC0 */ 2284 cond_render_pred_mask_constant + i, /* SRC1 */ 2285 0, /* SRC2 (Shift) */ 2286 cond_render_pred_temp + i)); /* DST */ 2287 2288 APPEND( 2289 pvr_pds_inst_encode_stflp32(1, /* enable immediate */ 2290 0, /* cc */ 2291 PVR_ROGUE_PDSINST_LOP_OR, /* LOP 2292 */ 2293 cond_render_pred_temp + i, /* SRC0 2294 */ 2295 cond_render_pred_temp, /* SRC1 */ 2296 0, /* SRC2 (Shift) */ 2297 cond_render_pred_temp)); /* DST */ 2298 } 2299 2300 APPEND(pvr_pds_inst_encode_limm(0, /* cc */ 2301 cond_render_pred_temp + 1, /* SRC1 2302 */ 2303 0, /* SRC0 */ 2304 0)); /* GLOBALREG */ 2305 2306 APPEND(pvr_pds_inst_encode_stflp32(1, /* enable immediate */ 2307 0, /* cc */ 2308 PVR_ROGUE_PDSINST_LOP_XOR, /* LOP 2309 */ 2310 cond_render_pred_temp, /* SRC0 */ 2311 cond_render_negate_temp, /* SRC1 2312 */ 2313 0, /* SRC2 (Shift) */ 2314 cond_render_pred_temp)); /* DST 2315 */ 2316 2317 /* Check that the predicate is 0. */ 2318 APPEND(pvr_pds_inst_encode_cmpi( 2319 0, /* cc */ 2320 PVR_ROGUE_PDSINST_COP_EQ, /* LOP */ 2321 (cond_render_pred_temp >> 1) + 2322 PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER, /* SRC0 */ 2323 0)); /* SRC1 */ 2324 2325 /* If predicate is 0, skip DOUTU. */ 2326 APPEND(pvr_pds_inst_encode_bra( 2327 PVR_ROGUE_PDSINST_PREDICATE_P0, /* SRCC: 2328 P0 */ 2329 0, /* NEG */ 2330 PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC: 2331 keep 2332 */ 2333 2)); 2334 } 2335 2336 /* Issue the task to the USC. 2337 * DoutU src1=USC Code Base address, src2=doutu word 2. 2338 */ 2339 APPEND(pvr_pds_encode_doutu(1, /* cc */ 2340 1, /* END */ 2341 usc_control_constant64 >> 1)); /* SRC0; 2342 * DOUTU 2343 * 64-bit 2344 * Src0. 2345 */ 2346 } 2347 2348 /* End the program if the Dout did not already end it. */ 2349 APPEND(pvr_pds_inst_encode_halt(0)); 2350#undef APPEND 2351 } 2352 2353 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 2354 /* Set the data segment pointer and ensure we return 1 past the buffer 2355 * ptr. 2356 */ 2357 program->data_segment = buffer; 2358 2359 buffer += next_constant; 2360 } 2361 2362 /* Require at least one DWORD of PDS data so the program runs. */ 2363 data_size = MAX2(1, data_size); 2364 2365 program->temps_used = temps_used; 2366 program->highest_temp = temps_used; 2367 program->data_size = data_size; 2368 if (gen_mode == PDS_GENERATE_SIZES) 2369 program->code_size = code_size; 2370 2371 return buffer; 2372} 2373 2374/** 2375 * Generates the PDS vertex shader data or code block. This program will do a 2376 * DMA into USC Constants followed by a DOUTU. 2377 * 2378 * \param program Pointer to the PDS vertex shader program. 2379 * \param buffer Pointer to the buffer for the program. 2380 * \param gen_mode Generate code or data. 2381 * \param dev_info PVR device information struct. 2382 * \returns Pointer to just beyond the code/data. 2383 */ 2384uint32_t *pvr_pds_vertex_shader_sa( 2385 struct pvr_pds_vertex_shader_sa_program *restrict program, 2386 uint32_t *restrict buffer, 2387 enum pvr_pds_generate_mode gen_mode, 2388 const struct pvr_device_info *dev_info) 2389{ 2390 uint32_t next_constant; 2391 uint32_t data_size = 0; 2392 uint32_t code_size = 0; 2393 2394 uint32_t usc_control_constant64 = 0; 2395 uint32_t dma_address_constant64 = 0; 2396 uint32_t dma_control_constant32 = 0; 2397 uint32_t doutw_value_constant64 = 0; 2398 uint32_t doutw_control_constant32 = 0; 2399 uint32_t fence_constant_word = 0; 2400 uint32_t *buffer_base; 2401 uint32_t kick_index; 2402 2403 uint32_t total_num_doutw = 2404 program->num_dword_doutw + program->num_q_word_doutw; 2405 uint32_t total_size_dma = 2406 program->num_dword_doutw + 2 * program->num_q_word_doutw; 2407 2408 next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; 2409 2410 /* Copy the DMA control words and USC task control words to constants. 2411 * 2412 * Arrange them so that the 64-bit words are together followed by the 32-bit 2413 * words. 2414 */ 2415 if (program->kick_usc) { 2416 usc_control_constant64 = 2417 pvr_pds_get_constants(&next_constant, 2, &data_size); 2418 } 2419 2420 if (program->clear_pds_barrier) { 2421 fence_constant_word = 2422 pvr_pds_get_constants(&next_constant, 2, &data_size); 2423 } 2424 dma_address_constant64 = pvr_pds_get_constants(&next_constant, 2425 2 * program->num_dma_kicks, 2426 &data_size); 2427 2428 /* Assign all unaligned constants together to avoid alignment issues caused 2429 * by pvr_pds_get_constants with even allocation sizes. 2430 */ 2431 doutw_value_constant64 = pvr_pds_get_constants( 2432 &next_constant, 2433 total_size_dma + total_num_doutw + program->num_dma_kicks, 2434 &data_size); 2435 doutw_control_constant32 = doutw_value_constant64 + total_size_dma; 2436 dma_control_constant32 = doutw_control_constant32 + total_num_doutw; 2437 2438 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 2439 buffer_base = buffer; 2440 2441 if (program->kick_usc) { 2442 /* Src0 for DOUTU. */ 2443 pvr_pds_write_wide_constant(buffer_base, 2444 usc_control_constant64, 2445 program->usc_task_control.src0); /* DOUTU 2446 * 64-bit 2447 * Src0. 2448 */ 2449 buffer += 2; 2450 } 2451 2452 if (program->clear_pds_barrier) { 2453 /* Encode the fence constant src0. Fence barrier is initialized to 2454 * zero. 2455 */ 2456 pvr_pds_write_wide_constant(buffer_base, fence_constant_word, 0); 2457 buffer += 2; 2458 } 2459 2460 if (total_num_doutw > 0) { 2461 for (uint32_t i = 0; i < program->num_q_word_doutw; i++) { 2462 /* Write the constant for the coefficient register write. */ 2463 pvr_pds_write_constant64(buffer_base, 2464 doutw_value_constant64, 2465 program->q_word_doutw_value[2 * i], 2466 program->q_word_doutw_value[2 * i + 1]); 2467 pvr_pds_write_constant32( 2468 buffer_base, 2469 doutw_control_constant32, 2470 program->q_word_doutw_control[i] | 2471 ((!program->num_dma_kicks && i == total_num_doutw - 1) 2472 ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN 2473 : 0)); 2474 2475 doutw_value_constant64 += 2; 2476 doutw_control_constant32 += 1; 2477 } 2478 2479 for (uint32_t i = 0; i < program->num_dword_doutw; i++) { 2480 /* Write the constant for the coefficient register write. */ 2481 pvr_pds_write_constant32(buffer_base, 2482 doutw_value_constant64, 2483 program->dword_doutw_value[i]); 2484 pvr_pds_write_constant32( 2485 buffer_base, 2486 doutw_control_constant32, 2487 program->dword_doutw_control[i] | 2488 ((!program->num_dma_kicks && i == program->num_dword_doutw - 1) 2489 ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN 2490 : 0)); 2491 2492 doutw_value_constant64 += 1; 2493 doutw_control_constant32 += 1; 2494 } 2495 2496 buffer += total_size_dma + total_num_doutw; 2497 } 2498 2499 if (program->num_dma_kicks == 1) /* Most-common case. */ 2500 { 2501 /* Src0 for DOUTD - Address. */ 2502 pvr_pds_write_dma_address(buffer_base, 2503 dma_address_constant64, 2504 program->dma_address[0], 2505 false, 2506 dev_info); 2507 2508 /* Src1 for DOUTD - Control Word. */ 2509 pvr_pds_write_constant32( 2510 buffer_base, 2511 dma_control_constant32, 2512 program->dma_control[0] | 2513 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN); 2514 2515 /* Move the buffer ptr along as we will return 1 past the buffer. */ 2516 buffer += 3; 2517 } else if (program->num_dma_kicks > 1) { 2518 for (kick_index = 0; kick_index < program->num_dma_kicks - 1; 2519 kick_index++) { 2520 /* Src0 for DOUTD - Address. */ 2521 pvr_pds_write_dma_address(buffer_base, 2522 dma_address_constant64, 2523 program->dma_address[kick_index], 2524 false, 2525 dev_info); 2526 2527 /* Src1 for DOUTD - Control Word. */ 2528 pvr_pds_write_constant32(buffer_base, 2529 dma_control_constant32, 2530 program->dma_control[kick_index]); 2531 dma_address_constant64 += 2; 2532 dma_control_constant32 += 1; 2533 } 2534 2535 /* Src0 for DOUTD - Address. */ 2536 pvr_pds_write_dma_address(buffer_base, 2537 dma_address_constant64, 2538 program->dma_address[kick_index], 2539 false, 2540 dev_info); 2541 2542 /* Src1 for DOUTD - Control Word. */ 2543 pvr_pds_write_constant32( 2544 buffer_base, 2545 dma_control_constant32, 2546 program->dma_control[kick_index] | 2547 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN); 2548 2549 buffer += 3 * program->num_dma_kicks; 2550 } 2551 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 2552 if (program->clear_pds_barrier) { 2553 /* Zero the persistent temp (SW fence for context switch). */ 2554 *buffer++ = pvr_pds_inst_encode_add64( 2555 0, /* cc */ 2556 PVR_ROGUE_PDSINST_ALUM_UNSIGNED, 2557 PVR_ROGUE_PDSINST_MAD_SNA_ADD, 2558 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + 2559 (fence_constant_word >> 1), /* src0 = 0 */ 2560 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + 2561 (fence_constant_word >> 1), /* src1 = 0 */ 2562 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest = 2563 * ptemp[0] 2564 */ 2565 } 2566 2567 if (total_num_doutw > 0) { 2568 for (uint32_t i = 0; i < program->num_q_word_doutw; i++) { 2569 /* Set the coefficient register to data value. */ 2570 *buffer++ = pvr_pds_encode_doutw64( 2571 /* cc */ 0, 2572 /* END */ !program->num_dma_kicks && !program->kick_usc && 2573 (i == total_num_doutw - 1), 2574 /* SRC1 */ doutw_control_constant32, 2575 /* SRC0 */ doutw_value_constant64 >> 1); 2576 2577 doutw_value_constant64 += 2; 2578 doutw_control_constant32 += 1; 2579 } 2580 2581 for (uint32_t i = 0; i < program->num_dword_doutw; i++) { 2582 /* Set the coefficient register to data value. */ 2583 *buffer++ = pvr_pds_encode_doutw64( 2584 /* cc */ 0, 2585 /* END */ !program->num_dma_kicks && !program->kick_usc && 2586 (i == program->num_dword_doutw - 1), 2587 /* SRC1 */ doutw_control_constant32, 2588 /* SRC0 */ doutw_value_constant64 >> 1); 2589 2590 doutw_value_constant64 += 1; 2591 doutw_control_constant32 += 1; 2592 } 2593 } 2594 2595 if (program->num_dma_kicks != 0) { 2596 /* DMA the state into the secondary attributes. */ 2597 2598 if (program->num_dma_kicks == 1) /* Most-common case. */ 2599 { 2600 *buffer++ = pvr_pds_encode_doutd( 2601 /* cc */ 0, 2602 /* END */ !program->kick_usc, 2603 /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit Src1 */ 2604 /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD 64-bit 2605 * Src0. 2606 */ 2607 } else { 2608 for (kick_index = 0; kick_index < program->num_dma_kicks; 2609 kick_index++) { 2610 *buffer++ = pvr_pds_encode_doutd( 2611 /* cc */ 0, 2612 /* END */ (!program->kick_usc) && 2613 (kick_index + 1 == program->num_dma_kicks), 2614 /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit 2615 * Src1. 2616 */ 2617 /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD 2618 * 64-bit 2619 * Src0. 2620 */ 2621 dma_address_constant64 += 2; 2622 dma_control_constant32 += 1; 2623 } 2624 } 2625 } 2626 2627 if (program->kick_usc) { 2628 /* Kick the USC. */ 2629 *buffer++ = pvr_pds_encode_doutu( 2630 /* cc */ 0, 2631 /* END */ 1, 2632 /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0. 2633 */ 2634 } 2635 2636 if (!program->kick_usc && program->num_dma_kicks == 0 && 2637 total_num_doutw == 0) { 2638 *buffer++ = pvr_pds_inst_encode_halt(0); 2639 } 2640 } 2641 2642 code_size = program->num_dma_kicks + total_num_doutw; 2643 if (program->clear_pds_barrier) 2644 code_size++; /* ADD64 instruction. */ 2645 2646 if (program->kick_usc) 2647 code_size++; 2648 2649 /* If there are no DMAs and no USC kick then code is HALT only. */ 2650 if (code_size == 0) 2651 code_size = 1; 2652 2653 program->data_size = data_size; 2654 program->code_size = code_size; 2655 2656 return buffer; 2657} 2658 2659/** 2660 * Writes the Uniform Data block for the PDS pixel shader secondary attributes 2661 * program. 2662 * 2663 * \param program Pointer to the PDS pixel shader secondary attributes program. 2664 * \param buffer Pointer to the buffer for the code/data. 2665 * \param gen_mode Either code or data can be generated or sizes only updated. 2666 * \returns Pointer to just beyond the buffer for the program/data. 2667 */ 2668uint32_t *pvr_pds_pixel_shader_uniform_texture_code( 2669 struct pvr_pds_pixel_shader_sa_program *restrict program, 2670 uint32_t *restrict buffer, 2671 enum pvr_pds_generate_mode gen_mode) 2672{ 2673 uint32_t *instruction; 2674 uint32_t code_size = 0; 2675 uint32_t data_size = 0; 2676 uint32_t temps_used = 0; 2677 uint32_t next_constant; 2678 2679 assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) == 2680 0); 2681 2682 assert(gen_mode != PDS_GENERATE_DATA_SEGMENT); 2683 2684 /* clang-format off */ 2685 /* Shape of code segment (note: clear is different) 2686 * 2687 * Code 2688 * +------------+ 2689 * | BRA if0 | 2690 * | DOUTD | 2691 * | ... | 2692 * | DOUTD.halt | 2693 * | uniform | 2694 * | DOUTD | 2695 * | ... | 2696 * | ... | 2697 * | DOUTW | 2698 * | ... | 2699 * | ... | 2700 * | DOUTU.halt | 2701 * | HALT | 2702 * +------------+ 2703 */ 2704 /* clang-format on */ 2705 instruction = buffer; 2706 2707 next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; 2708 2709 /* The clear color can arrive packed in the right form in the first (or 2710 * first 2) dwords of the shared registers and the program will issue a 2711 * single doutw for this. 2712 */ 2713 if (program->clear && program->packed_clear) { 2714 uint32_t color_constant1 = 2715 pvr_pds_get_constants(&next_constant, 2, &data_size); 2716 2717 uint32_t control_word_constant1 = 2718 pvr_pds_get_constants(&next_constant, 2, &data_size); 2719 2720 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { 2721 /* DOUTW the clear color to the USC constants. Predicate with 2722 * uniform loading flag (IF0). 2723 */ 2724 *instruction++ = pvr_pds_encode_doutw64( 2725 /* cc */ 1, /* Only for uniform loading program. */ 2726 /* END */ program->kick_usc ? 0 : 1, /* Last 2727 * instruction 2728 * for a clear. 2729 */ 2730 /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */ 2731 /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */ 2732 2733 code_size += 1; 2734 } 2735 } else if (program->clear) { 2736 uint32_t color_constant1, color_constant2; 2737 2738 if (program->clear_color_dest_reg & 0x1) { 2739 uint32_t color_constant3, control_word_constant1, 2740 control_word_constant2, color_constant4; 2741 2742 color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size); 2743 color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size); 2744 color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size); 2745 2746 control_word_constant1 = 2747 pvr_pds_get_constants(&next_constant, 2, &data_size); 2748 control_word_constant2 = 2749 pvr_pds_get_constants(&next_constant, 2, &data_size); 2750 color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size); 2751 2752 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { 2753 /* DOUTW the clear color to the USSE constants. Predicate with 2754 * uniform loading flag (IF0). 2755 */ 2756 *instruction++ = pvr_pds_encode_doutw64( 2757 /* cc */ 1, /* Only for Uniform Loading program */ 2758 /* END */ 0, 2759 /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */ 2760 /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */ 2761 2762 *instruction++ = pvr_pds_encode_doutw64( 2763 /* cc */ 1, /* Only for Uniform Loading program */ 2764 /* END */ 0, 2765 /* SRC1 */ control_word_constant2, /* DOUTW 32-bit Src1 */ 2766 /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */ 2767 2768 *instruction++ = pvr_pds_encode_doutw64( 2769 /* cc */ 1, /* Only for uniform loading program */ 2770 /* END */ program->kick_usc ? 0 : 1, /* Last 2771 * instruction 2772 * for a clear. 2773 */ 2774 /* SRC1 */ color_constant4, /* DOUTW 32-bit Src1 */ 2775 /* SRC0 */ color_constant3 >> 1); /* DOUTW 64-bit Src0 */ 2776 } 2777 2778 code_size += 3; 2779 } else { 2780 uint32_t control_word_constant, control_word_last_constant; 2781 2782 /* Put the clear color and control words into the first 8 2783 * constants. 2784 */ 2785 color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size); 2786 color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size); 2787 control_word_constant = 2788 pvr_pds_get_constants(&next_constant, 2, &data_size); 2789 control_word_last_constant = 2790 pvr_pds_get_constants(&next_constant, 2, &data_size); 2791 2792 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { 2793 /* DOUTW the clear color to the USSE constants. Predicate with 2794 * uniform loading flag (IF0). 2795 */ 2796 *instruction++ = pvr_pds_encode_doutw64( 2797 /* cc */ 1, /* Only for Uniform Loading program */ 2798 /* END */ 0, 2799 /* SRC1 */ control_word_constant, /* DOUTW 32-bit Src1 */ 2800 /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */ 2801 2802 *instruction++ = pvr_pds_encode_doutw64( 2803 /* cc */ 1, /* Only for uniform loading program */ 2804 /* END */ program->kick_usc ? 0 : 1, /* Last 2805 * instruction 2806 * for a clear. 2807 */ 2808 /* SRC1 */ control_word_last_constant, /* DOUTW 32-bit Src1 2809 */ 2810 /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */ 2811 } 2812 2813 code_size += 2; 2814 } 2815 2816 if (program->kick_usc) { 2817 uint32_t doutu_constant64; 2818 2819 doutu_constant64 = 2820 pvr_pds_get_constants(&next_constant, 2, &data_size); 2821 2822 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { 2823 /* Issue the task to the USC. 2824 * 2825 * dout ds1[constant_use], ds0[constant_use], 2826 * ds1[constant_use], emit 2827 */ 2828 *instruction++ = pvr_pds_encode_doutu( 2829 /* cc */ 0, 2830 /* END */ 1, 2831 /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0 2832 */ 2833 } 2834 2835 code_size += 1; 2836 } 2837 2838 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { 2839 /* End the program. */ 2840 *instruction++ = pvr_pds_inst_encode_halt(0); 2841 } 2842 code_size += 1; 2843 } else { 2844 uint32_t total_num_doutw = 2845 program->num_dword_doutw + program->num_q_word_doutw; 2846 bool both_textures_and_uniforms = 2847 ((program->num_texture_dma_kicks > 0) && 2848 ((program->num_uniform_dma_kicks > 0 || total_num_doutw > 0) || 2849 program->kick_usc)); 2850 uint32_t doutu_constant64 = 0; 2851 2852 if (both_textures_and_uniforms) { 2853 /* If the size of a PDS data section is 0, the hardware won't run 2854 * it. We therefore don't need to branch when there is only a 2855 * texture OR a uniform update program. 2856 */ 2857 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { 2858 uint32_t branch_address = 2859 MAX2(1 + program->num_texture_dma_kicks, 2); 2860 2861 /* Use If0 to BRAnch to uniform code. */ 2862 *instruction++ = pvr_pds_encode_bra( 2863 /* SRCC */ PVR_ROGUE_PDSINST_PREDICATE_IF0, 2864 /* NEG */ PVR_ROGUE_PDSINST_NEG_DISABLE, 2865 /* SETC */ PVR_ROGUE_PDSINST_PREDICATE_KEEP, 2866 /* ADDR */ branch_address); 2867 } 2868 2869 code_size += 1; 2870 } 2871 2872 if (program->num_texture_dma_kicks > 0) { 2873 uint32_t dma_address_constant64; 2874 uint32_t dma_control_constant32; 2875 /* Allocate 3 constant spaces for each kick. The 64-bit constants 2876 * come first followed by the 32-bit constants. 2877 */ 2878 dma_address_constant64 = PVR_PDS_CONSTANTS_BLOCK_BASE; 2879 dma_control_constant32 = 2880 dma_address_constant64 + (program->num_texture_dma_kicks * 2); 2881 2882 for (uint32_t dma = 0; dma < program->num_texture_dma_kicks; dma++) { 2883 code_size += 1; 2884 if (gen_mode != PDS_GENERATE_CODE_SEGMENT || !instruction) 2885 continue; 2886 2887 /* DMA the state into the secondary attributes. */ 2888 *instruction++ = pvr_pds_encode_doutd( 2889 /* cc */ 0, 2890 /* END */ dma == (program->num_texture_dma_kicks - 1), 2891 /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1 */ 2892 /* SRC0 */ dma_address_constant64 >> 1); /* DOUT 2893 * 64-bit 2894 * Src0 2895 */ 2896 dma_address_constant64 += 2; 2897 dma_control_constant32 += 1; 2898 } 2899 } else if (both_textures_and_uniforms) { 2900 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { 2901 /* End the program. */ 2902 *instruction++ = pvr_pds_inst_encode_halt(0); 2903 } 2904 2905 code_size += 1; 2906 } 2907 2908 /* Reserve space at the beginning of the data segment for the DOUTU Task 2909 * Control if one is needed. 2910 */ 2911 if (program->kick_usc) { 2912 doutu_constant64 = 2913 pvr_pds_get_constants(&next_constant, 2, &data_size); 2914 } 2915 2916 /* Allocate 3 constant spaces for each DMA and 2 for a USC kick. The 2917 * 64-bit constants come first followed by the 32-bit constants. 2918 */ 2919 uint32_t total_size_dma = 2920 program->num_dword_doutw + 2 * program->num_q_word_doutw; 2921 2922 uint32_t dma_address_constant64 = pvr_pds_get_constants( 2923 &next_constant, 2924 program->num_uniform_dma_kicks * 3 + total_size_dma + total_num_doutw, 2925 &data_size); 2926 uint32_t doutw_value_constant64 = 2927 dma_address_constant64 + program->num_uniform_dma_kicks * 2; 2928 uint32_t dma_control_constant32 = doutw_value_constant64 + total_size_dma; 2929 uint32_t doutw_control_constant32 = 2930 dma_control_constant32 + program->num_uniform_dma_kicks; 2931 2932 if (total_num_doutw > 0) { 2933 pvr_pds_get_constants(&next_constant, 0, &data_size); 2934 2935 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 2936 for (uint32_t i = 0; i < program->num_q_word_doutw; i++) { 2937 /* Set the coefficient register to data value. */ 2938 *instruction++ = pvr_pds_encode_doutw64( 2939 /* cc */ 0, 2940 /* END */ !program->num_uniform_dma_kicks && 2941 !program->kick_usc && (i == total_num_doutw - 1), 2942 /* SRC1 */ doutw_control_constant32, 2943 /* SRC0 */ doutw_value_constant64 >> 1); 2944 2945 doutw_value_constant64 += 2; 2946 doutw_control_constant32 += 1; 2947 } 2948 2949 for (uint32_t i = 0; i < program->num_dword_doutw; i++) { 2950 /* Set the coefficient register to data value. */ 2951 *instruction++ = pvr_pds_encode_doutw64( 2952 /* cc */ 0, 2953 /* END */ !program->num_uniform_dma_kicks && 2954 !program->kick_usc && (i == program->num_dword_doutw - 1), 2955 /* SRC1 */ doutw_control_constant32, 2956 /* SRC0 */ doutw_value_constant64 >> 1); 2957 2958 doutw_value_constant64 += 1; 2959 doutw_control_constant32 += 1; 2960 } 2961 } 2962 code_size += total_num_doutw; 2963 } 2964 2965 if (program->num_uniform_dma_kicks > 0) { 2966 for (uint32_t dma = 0; dma < program->num_uniform_dma_kicks; dma++) { 2967 code_size += 1; 2968 2969 if (gen_mode != PDS_GENERATE_CODE_SEGMENT || !instruction) 2970 continue; 2971 2972 bool last_instruction = false; 2973 if (!program->kick_usc && 2974 (dma == program->num_uniform_dma_kicks - 1)) { 2975 last_instruction = true; 2976 } 2977 /* DMA the state into the secondary attributes. */ 2978 *instruction++ = pvr_pds_encode_doutd( 2979 /* cc */ 0, 2980 /* END */ last_instruction, 2981 /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1 2982 */ 2983 /* SRC0 */ dma_address_constant64 >> 1); /* DOUT 2984 * 64-bit 2985 * Src0 2986 */ 2987 dma_address_constant64 += 2; 2988 dma_control_constant32 += 1; 2989 } 2990 } 2991 2992 if (program->kick_usc) { 2993 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { 2994 /* Issue the task to the USC. 2995 * 2996 * dout ds1[constant_use], ds0[constant_use], 2997 * ds1[constant_use], emit 2998 */ 2999 3000 *instruction++ = pvr_pds_encode_doutu( 3001 /* cc */ 0, 3002 /* END */ 1, 3003 /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0 */ 3004 } 3005 3006 code_size += 1; 3007 } else if (program->num_uniform_dma_kicks == 0 && total_num_doutw == 0) { 3008 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { 3009 /* End the program. */ 3010 *instruction++ = pvr_pds_inst_encode_halt(0); 3011 } 3012 3013 code_size += 1; 3014 } 3015 } 3016 3017 /* Minimum temp count is 1. */ 3018 program->temps_used = MAX2(temps_used, 1); 3019 program->code_size = code_size; 3020 3021 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) 3022 return instruction; 3023 else 3024 return NULL; 3025} 3026 3027/** 3028 * Writes the Uniform Data block for the PDS pixel shader secondary attributes 3029 * program. 3030 * 3031 * \param program Pointer to the PDS pixel shader secondary attributes program. 3032 * \param buffer Pointer to the buffer for the code/data. 3033 * \param gen_mode Either code or data can be generated or sizes only updated. 3034 * \param dev_info PVR device information struct. 3035 * \returns Pointer to just beyond the buffer for the program/data. 3036 */ 3037uint32_t *pvr_pds_pixel_shader_uniform_texture_data( 3038 struct pvr_pds_pixel_shader_sa_program *restrict program, 3039 uint32_t *restrict buffer, 3040 enum pvr_pds_generate_mode gen_mode, 3041 bool uniform, 3042 const struct pvr_device_info *dev_info) 3043{ 3044 uint32_t *constants = buffer; 3045 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; 3046 uint32_t temps_used = 0; 3047 uint32_t data_size = 0; 3048 3049 assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) == 3050 0); 3051 3052 assert(gen_mode != PDS_GENERATE_CODE_SEGMENT); 3053 3054 /* Shape of data segment (note: clear is different). 3055 * 3056 * Uniform Texture 3057 * +--------------+ +-------------+ 3058 * | USC Task L | | USC Task L | 3059 * | H | | H | 3060 * | DMA1 Src0 L | | DMA1 Src0 L | 3061 * | H | | H | 3062 * | DMA2 Src0 L | | | 3063 * | H | | | 3064 * | DMA1 Src1 | | DMA1 Src1 | 3065 * | DMA2 Src1 | | | 3066 * | DOUTW0 Src1 | | | 3067 * | DOUTW1 Src1 | | | 3068 * | ... | | | 3069 * | DOUTWn Srcn | | | 3070 * | other data | | | 3071 * +--------------+ +-------------+ 3072 */ 3073 3074 /* Generate the PDS pixel shader secondary attributes data. 3075 * 3076 * Packed Clear 3077 * The clear color can arrive packed in the right form in the first (or 3078 * first 2) dwords of the shared registers and the program will issue a 3079 * single DOUTW for this. 3080 */ 3081 if (program->clear && uniform && program->packed_clear) { 3082 uint32_t color_constant1 = 3083 pvr_pds_get_constants(&next_constant, 2, &data_size); 3084 3085 uint32_t control_word_constant1 = 3086 pvr_pds_get_constants(&next_constant, 2, &data_size); 3087 3088 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 3089 uint32_t doutw; 3090 3091 pvr_pds_write_constant64(constants, 3092 color_constant1, 3093 program->clear_color[0], 3094 program->clear_color[1]); 3095 3096 /* Load into first constant in common store. */ 3097 doutw = pvr_pds_encode_doutw_src1( 3098 program->clear_color_dest_reg, 3099 PVR_PDS_DOUTW_LOWER64, 3100 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, 3101 false, 3102 dev_info); 3103 3104 /* Set the last flag. */ 3105 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; 3106 pvr_pds_write_constant64(constants, control_word_constant1, doutw, 0); 3107 } 3108 } else if (program->clear && uniform) { 3109 uint32_t color_constant1, color_constant2; 3110 3111 if (program->clear_color_dest_reg & 0x1) { 3112 uint32_t color_constant3, control_word_constant1, 3113 control_word_constant2, color_constant4; 3114 3115 color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size); 3116 color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size); 3117 color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size); 3118 3119 control_word_constant1 = 3120 pvr_pds_get_constants(&next_constant, 2, &data_size); 3121 control_word_constant2 = 3122 pvr_pds_get_constants(&next_constant, 2, &data_size); 3123 color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size); 3124 3125 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 3126 uint32_t doutw; 3127 3128 pvr_pds_write_constant32(constants, 3129 color_constant1, 3130 program->clear_color[0]); 3131 3132 pvr_pds_write_constant64(constants, 3133 color_constant2, 3134 program->clear_color[1], 3135 program->clear_color[2]); 3136 3137 pvr_pds_write_constant32(constants, 3138 color_constant3, 3139 program->clear_color[3]); 3140 3141 /* Load into first constant in common store. */ 3142 doutw = pvr_pds_encode_doutw_src1( 3143 program->clear_color_dest_reg, 3144 PVR_PDS_DOUTW_LOWER32, 3145 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, 3146 false, 3147 dev_info); 3148 3149 pvr_pds_write_constant64(constants, 3150 control_word_constant1, 3151 doutw, 3152 0); 3153 3154 /* Move the destination register along. */ 3155 doutw = pvr_pds_encode_doutw_src1( 3156 program->clear_color_dest_reg + 1, 3157 PVR_PDS_DOUTW_LOWER64, 3158 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, 3159 false, 3160 dev_info); 3161 3162 pvr_pds_write_constant64(constants, 3163 control_word_constant2, 3164 doutw, 3165 0); 3166 3167 /* Move the destination register along. */ 3168 doutw = pvr_pds_encode_doutw_src1( 3169 program->clear_color_dest_reg + 3, 3170 PVR_PDS_DOUTW_LOWER32, 3171 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, 3172 false, 3173 dev_info); 3174 3175 /* Set the last flag. */ 3176 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; 3177 pvr_pds_write_constant64(constants, color_constant4, doutw, 0); 3178 } 3179 } else { 3180 uint32_t control_word_constant, control_word_last_constant; 3181 3182 /* Put the clear color and control words into the first 8 3183 * constants. 3184 */ 3185 color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size); 3186 color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size); 3187 control_word_constant = 3188 pvr_pds_get_constants(&next_constant, 2, &data_size); 3189 control_word_last_constant = 3190 pvr_pds_get_constants(&next_constant, 2, &data_size); 3191 3192 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 3193 uint32_t doutw; 3194 pvr_pds_write_constant64(constants, 3195 color_constant1, 3196 program->clear_color[0], 3197 program->clear_color[1]); 3198 3199 pvr_pds_write_constant64(constants, 3200 color_constant2, 3201 program->clear_color[2], 3202 program->clear_color[3]); 3203 3204 /* Load into first constant in common store. */ 3205 doutw = pvr_pds_encode_doutw_src1( 3206 program->clear_color_dest_reg, 3207 PVR_PDS_DOUTW_LOWER64, 3208 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, 3209 false, 3210 dev_info); 3211 3212 pvr_pds_write_constant64(constants, control_word_constant, doutw, 0); 3213 3214 /* Move the destination register along. */ 3215 doutw &= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_CLRMSK; 3216 doutw |= (program->clear_color_dest_reg + 2) 3217 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT; 3218 3219 /* Set the last flag. */ 3220 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; 3221 pvr_pds_write_constant64(constants, 3222 control_word_last_constant, 3223 doutw, 3224 0); 3225 } 3226 } 3227 3228 /* Constants for the DOUTU Task Control, if needed. */ 3229 if (program->kick_usc) { 3230 uint32_t doutu_constant64 = 3231 pvr_pds_get_constants(&next_constant, 2, &data_size); 3232 3233 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 3234 pvr_pds_write_wide_constant( 3235 constants, 3236 doutu_constant64, 3237 program->usc_task_control.src0); /* 64-bit 3238 */ 3239 /* Src0 */ 3240 } 3241 } 3242 } else { 3243 if (uniform) { 3244 /* Reserve space at the beginning of the data segment for the DOUTU 3245 * Task Control if one is needed. 3246 */ 3247 if (program->kick_usc) { 3248 uint32_t doutu_constant64 = 3249 pvr_pds_get_constants(&next_constant, 2, &data_size); 3250 3251 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 3252 pvr_pds_write_wide_constant( 3253 constants, 3254 doutu_constant64, 3255 program->usc_task_control.src0); /* 64-bit Src0 */ 3256 } 3257 } 3258 3259 uint32_t total_num_doutw = 3260 program->num_dword_doutw + program->num_q_word_doutw; 3261 uint32_t total_size_dma = 3262 program->num_dword_doutw + 2 * program->num_q_word_doutw; 3263 3264 /* Allocate 3 constant spaces for each kick. The 64-bit constants 3265 * come first followed by the 32-bit constants. 3266 */ 3267 uint32_t dma_address_constant64 = 3268 pvr_pds_get_constants(&next_constant, 3269 program->num_uniform_dma_kicks * 3 + 3270 total_size_dma + total_num_doutw, 3271 &data_size); 3272 uint32_t doutw_value_constant64 = 3273 dma_address_constant64 + program->num_uniform_dma_kicks * 2; 3274 uint32_t dma_control_constant32 = 3275 doutw_value_constant64 + total_size_dma; 3276 uint32_t doutw_control_constant32 = 3277 dma_control_constant32 + program->num_uniform_dma_kicks; 3278 3279 if (total_num_doutw > 0) { 3280 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 3281 for (uint32_t i = 0; i < program->num_q_word_doutw; i++) { 3282 pvr_pds_write_constant64( 3283 constants, 3284 doutw_value_constant64, 3285 program->q_word_doutw_value[2 * i], 3286 program->q_word_doutw_value[2 * i + 1]); 3287 pvr_pds_write_constant32( 3288 constants, 3289 doutw_control_constant32, 3290 program->q_word_doutw_control[i] | 3291 ((!program->num_uniform_dma_kicks && 3292 i == total_num_doutw - 1) 3293 ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN 3294 : 0)); 3295 3296 doutw_value_constant64 += 2; 3297 doutw_control_constant32 += 1; 3298 } 3299 3300 for (uint32_t i = 0; i < program->num_dword_doutw; i++) { 3301 pvr_pds_write_constant32(constants, 3302 doutw_value_constant64, 3303 program->dword_doutw_value[i]); 3304 pvr_pds_write_constant32( 3305 constants, 3306 doutw_control_constant32, 3307 program->dword_doutw_control[i] | 3308 ((!program->num_uniform_dma_kicks && 3309 i == program->num_dword_doutw - 1) 3310 ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN 3311 : 0)); 3312 3313 doutw_value_constant64 += 1; 3314 doutw_control_constant32 += 1; 3315 } 3316 } 3317 } 3318 3319 if (program->num_uniform_dma_kicks > 0) { 3320 uint32_t kick; 3321 3322 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 3323 for (kick = 0; kick < program->num_uniform_dma_kicks - 1; 3324 kick++) { 3325 /* Copy the dma control words to constants. */ 3326 pvr_pds_write_dma_address(constants, 3327 dma_address_constant64, 3328 program->uniform_dma_address[kick], 3329 false, 3330 dev_info); 3331 pvr_pds_write_constant32(constants, 3332 dma_control_constant32, 3333 program->uniform_dma_control[kick]); 3334 3335 dma_address_constant64 += 2; 3336 dma_control_constant32 += 1; 3337 } 3338 3339 pvr_pds_write_dma_address(constants, 3340 dma_address_constant64, 3341 program->uniform_dma_address[kick], 3342 false, 3343 dev_info); 3344 pvr_pds_write_constant32( 3345 constants, 3346 dma_control_constant32, 3347 program->uniform_dma_control[kick] | 3348 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN); 3349 } 3350 } 3351 3352 } else if (program->num_texture_dma_kicks > 0) { 3353 /* Allocate 3 constant spaces for each kick. The 64-bit constants 3354 * come first followed by the 32-bit constants. 3355 */ 3356 uint32_t dma_address_constant64 = 3357 pvr_pds_get_constants(&next_constant, 3358 program->num_texture_dma_kicks * 3, 3359 &data_size); 3360 uint32_t dma_control_constant32 = 3361 dma_address_constant64 + (program->num_texture_dma_kicks * 2); 3362 3363 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 3364 uint32_t kick; 3365 for (kick = 0; kick < program->num_texture_dma_kicks - 1; kick++) { 3366 /* Copy the DMA control words to constants. */ 3367 pvr_pds_write_dma_address(constants, 3368 dma_address_constant64, 3369 program->texture_dma_address[kick], 3370 false, 3371 dev_info); 3372 3373 pvr_pds_write_constant32(constants, 3374 dma_control_constant32, 3375 program->texture_dma_control[kick]); 3376 3377 dma_address_constant64 += 2; 3378 dma_control_constant32 += 1; 3379 } 3380 3381 pvr_pds_write_dma_address(constants, 3382 dma_address_constant64, 3383 program->texture_dma_address[kick], 3384 false, 3385 dev_info); 3386 3387 pvr_pds_write_constant32( 3388 constants, 3389 dma_control_constant32, 3390 program->texture_dma_control[kick] | 3391 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN); 3392 } 3393 } 3394 } 3395 3396 /* Save the data segment pointer and size. */ 3397 program->data_segment = constants; 3398 3399 /* Minimum temp count is 1. */ 3400 program->temps_used = MAX2(temps_used, 1); 3401 program->data_size = data_size; 3402 3403 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) 3404 return (constants + next_constant); 3405 else 3406 return NULL; 3407} 3408 3409/** 3410 * Generates generic DOUTC PDS program. 3411 * 3412 * \param program Pointer to the PDS kick USC. 3413 * \param buffer Pointer to the buffer for the program. 3414 * \param gen_mode Either code and data can be generated, or sizes only updated. 3415 * \returns Pointer to just beyond the buffer for the code or program segment. 3416 */ 3417uint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program, 3418 uint32_t *restrict buffer, 3419 enum pvr_pds_generate_mode gen_mode) 3420{ 3421 uint32_t constant = 0; 3422 3423 /* Automatically get a data size of 1x 128bit chunks. */ 3424 uint32_t data_size = 0, code_size = 0; 3425 3426 /* Setup the data part. */ 3427 uint32_t *constants = buffer; /* Constants placed at front of buffer. */ 3428 uint32_t *instruction = buffer; 3429 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in 3430 * dwords. 3431 */ 3432 3433 /* Update the program sizes. */ 3434 program->data_size = data_size; 3435 program->code_size = code_size; 3436 program->data_segment = constants; 3437 3438 if (gen_mode == PDS_GENERATE_SIZES) 3439 return NULL; 3440 3441 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 3442 /* Copy the USC task control words to constants. */ 3443 3444 constant = pvr_pds_get_constants(&next_constant, 2, &data_size); 3445 pvr_pds_write_wide_constant(constants, constant + 0, 0); /* 64-bit 3446 * Src0 3447 */ 3448 3449 uint32_t control_word_constant = 3450 pvr_pds_get_constants(&next_constant, 2, &data_size); 3451 pvr_pds_write_constant64(constants, control_word_constant, 0, 0); /* 32-bit 3452 * Src1 3453 */ 3454 3455 program->data_size = data_size; 3456 buffer += data_size; 3457 3458 return buffer; 3459 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { 3460 *instruction++ = pvr_pds_inst_encode_doutc( 3461 /* cc */ 0, 3462 /* END */ 0); 3463 3464 code_size++; 3465 3466 /* End the program. */ 3467 *instruction++ = pvr_pds_inst_encode_halt(0); 3468 code_size++; 3469 3470 program->code_size = code_size; 3471 } 3472 3473 return instruction; 3474} 3475 3476/** 3477 * Generates generic kick DOUTU PDS program in a single data+code block. 3478 * 3479 * \param control Pointer to the PDS kick USC. 3480 * \param buffer Pointer to the buffer for the program. 3481 * \param gen_mode Either code and data can be generated or sizes only updated. 3482 * \param dev_info PVR device information structure. 3483 * \returns Pointer to just beyond the buffer for the code or program segment. 3484 */ 3485uint32_t *pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict control, 3486 uint32_t *restrict buffer, 3487 enum pvr_pds_generate_mode gen_mode, 3488 const struct pvr_device_info *dev_info) 3489{ 3490 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; 3491 uint32_t doutw; 3492 uint32_t data_size = 0, code_size = 0; 3493 uint32_t constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS]; 3494 uint32_t control_word_constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS]; 3495 3496 /* Assert if buffer is exceeded. */ 3497 assert(control->num_const64 <= PVR_PDS_MAX_NUM_DOUTW_CONSTANTS); 3498 3499 uint32_t *constants = buffer; 3500 uint32_t *instruction = buffer; 3501 3502 /* Put the constants and control words interleaved in the data region. */ 3503 for (uint32_t const_pair = 0; const_pair < control->num_const64; 3504 const_pair++) { 3505 constant[const_pair] = 3506 pvr_pds_get_constants(&next_constant, 2, &data_size); 3507 control_word_constant[const_pair] = 3508 pvr_pds_get_constants(&next_constant, 2, &data_size); 3509 } 3510 3511 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 3512 /* Data segment points to start of constants. */ 3513 control->data_segment = constants; 3514 3515 for (uint32_t const_pair = 0; const_pair < control->num_const64; 3516 const_pair++) { 3517 pvr_pds_write_constant64(constants, 3518 constant[const_pair], 3519 H32(control->doutw_data[const_pair]), 3520 L32(control->doutw_data[const_pair])); 3521 3522 /* Start loading at offset 0. */ 3523 if (control->dest_store == PDS_COMMON_STORE) { 3524 doutw = pvr_pds_encode_doutw_src1( 3525 (2 * const_pair), 3526 PVR_PDS_DOUTW_LOWER64, 3527 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, 3528 false, 3529 dev_info); 3530 } else { 3531 doutw = pvr_pds_encode_doutw_src1( 3532 (2 * const_pair), 3533 PVR_PDS_DOUTW_LOWER64, 3534 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE, 3535 false, 3536 dev_info); 3537 } 3538 3539 if (const_pair + 1 == control->num_const64) { 3540 /* Set the last flag for the MCU (assume there are no following 3541 * DOUTD's). 3542 */ 3543 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; 3544 } 3545 pvr_pds_write_constant64(constants, 3546 control_word_constant[const_pair], 3547 doutw, 3548 0); 3549 } 3550 3551 control->data_size = data_size; 3552 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) { 3553 /* Code section. */ 3554 3555 for (uint32_t const_pair = 0; const_pair < control->num_const64; 3556 const_pair++) { 3557 /* DOUTW the PDS data to the USC constants. */ 3558 *instruction++ = pvr_pds_encode_doutw64( 3559 /* cc */ 0, 3560 /* END */ control->last_instruction && 3561 (const_pair + 1 == control->num_const64), 3562 /* SRC1 */ control_word_constant[const_pair], /* DOUTW 32-bit 3563 * Src1. 3564 */ 3565 /* SRC0 */ constant[const_pair] >> 1); /* DOUTW 64-bit Src0. */ 3566 3567 code_size++; 3568 } 3569 3570 if (control->last_instruction) { 3571 /* End the program. */ 3572 *instruction++ = pvr_pds_inst_encode_halt(0); 3573 code_size++; 3574 } 3575 3576 control->code_size = code_size; 3577 } 3578 3579 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) 3580 return (constants + next_constant); 3581 else 3582 return instruction; 3583} 3584 3585/** 3586 * Generates generic kick DOUTU PDS program in a single data+code block. 3587 * 3588 * \param program Pointer to the PDS kick USC. 3589 * \param buffer Pointer to the buffer for the program. 3590 * \param start_next_constant Next constant in data segment. Non-zero if another 3591 * instruction precedes the DOUTU. 3592 * \param cc_enabled If true then the DOUTU is predicated (cc set). 3593 * \param gen_mode Either code and data can be generated or sizes only updated. 3594 * \returns Pointer to just beyond the buffer for the code or program segment. 3595 */ 3596uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program, 3597 uint32_t *restrict buffer, 3598 uint32_t start_next_constant, 3599 bool cc_enabled, 3600 enum pvr_pds_generate_mode gen_mode) 3601{ 3602 uint32_t constant = 0; 3603 3604 /* Automatically get a data size of 2 128bit chunks. */ 3605 uint32_t data_size = ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE; 3606 uint32_t code_size = 1; /* Single doutu */ 3607 uint32_t dummy_count = 0; 3608 3609 /* Setup the data part. */ 3610 uint32_t *constants = buffer; /* Constants placed at front of buffer. */ 3611 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in 3612 * dwords. 3613 */ 3614 3615 /* Update the program sizes. */ 3616 program->data_size = data_size; 3617 program->code_size = code_size; 3618 program->data_segment = constants; 3619 3620 if (gen_mode == PDS_GENERATE_SIZES) 3621 return NULL; 3622 3623 if (gen_mode == PDS_GENERATE_DATA_SEGMENT || 3624 gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) { 3625 /* Copy the USC task control words to constants. */ 3626 3627 constant = pvr_pds_get_constants(&next_constant, 2, &dummy_count); 3628 3629 pvr_pds_write_wide_constant(constants, 3630 constant + 0, 3631 program->usc_task_control.src0); /* 64-bit 3632 * Src0. 3633 */ 3634 buffer += data_size; 3635 3636 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) 3637 return buffer; 3638 } 3639 3640 if (gen_mode == PDS_GENERATE_CODE_SEGMENT || 3641 gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) { 3642 /* Generate the PDS pixel shader code. */ 3643 3644 /* Setup the instruction pointer. */ 3645 uint32_t *instruction = buffer; 3646 3647 /* Issue the task to the USC. 3648 * 3649 * dout ds1[constant_use], ds0[constant_use], ds1[constant_use], emit ; 3650 * halt halt 3651 */ 3652 3653 *instruction++ = pvr_pds_encode_doutu( 3654 /* cc */ cc_enabled, 3655 /* END */ 1, 3656 /* SRC0 */ (constant + start_next_constant) >> 1); /* DOUTU 3657 * 64-bit Src0 3658 */ 3659 3660 /* Return pointer to just after last instruction. */ 3661 return instruction; 3662 } 3663 3664 /* Execution should never reach here; keep compiler happy. */ 3665 return NULL; 3666} 3667 3668uint32_t *pvr_pds_generate_compute_barrier_conditional( 3669 uint32_t *buffer, 3670 enum pvr_pds_generate_mode gen_mode) 3671{ 3672 /* Compute barriers supported. Need to test for coeff sync task. */ 3673 3674 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) 3675 return buffer; /* No data segment. */ 3676 3677 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 3678 /* Test whether this is the coefficient update task or not. */ 3679 *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC 3680 */ 3681 PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG 3682 */ 3683 PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SETC 3684 */ 3685 1 /* ADDR */); 3686 3687 /* Encode a HALT. */ 3688 *buffer++ = pvr_pds_inst_encode_halt(1); 3689 3690 /* Reset the default predicate to IF0. */ 3691 *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC 3692 */ 3693 PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG 3694 */ 3695 PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETC 3696 */ 3697 1 /* ADDR */); 3698 } 3699 3700 return buffer; 3701} 3702 3703/** 3704 * Generates program to kick the USC task to store shared. 3705 * 3706 * \param program Pointer to the PDS shared register. 3707 * \param buffer Pointer to the buffer for the program. 3708 * \param gen_mode Either code and data can be generated or sizes only updated. 3709 * \param dev_info PVR device information structure. 3710 * \returns Pointer to just beyond the buffer for the program. 3711 */ 3712uint32_t *pvr_pds_generate_shared_storing_program( 3713 struct pvr_pds_shared_storing_program *restrict program, 3714 uint32_t *restrict buffer, 3715 enum pvr_pds_generate_mode gen_mode, 3716 const struct pvr_device_info *dev_info) 3717{ 3718 struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task; 3719 struct pvr_pds_doutw_control *doutw_control = &program->doutw_control; 3720 3721 if (gen_mode == PDS_GENERATE_SIZES) 3722 return NULL; 3723 3724 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 3725 uint32_t *constants = buffer; 3726 3727 constants = 3728 pvr_pds_generate_doutw(doutw_control, constants, gen_mode, dev_info); 3729 program->data_size = doutw_control->data_size; 3730 3731 constants = pvr_pds_kick_usc(kick_usc_program, 3732 constants, 3733 0, 3734 program->cc_enable, 3735 gen_mode); 3736 program->data_size += kick_usc_program->data_size; 3737 3738 return constants; 3739 } 3740 3741 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 3742 /* Generate PDS code segment. */ 3743 uint32_t *instruction = buffer; 3744 3745 /* doutw vi1, vi0 3746 * doutu ds1[constant_use], ds0[constant_use], ds1[constant_use], 3747 * emit 3748 */ 3749 instruction = 3750 pvr_pds_generate_doutw(doutw_control, buffer, gen_mode, dev_info); 3751 program->code_size = doutw_control->code_size; 3752 3753 /* Offset into data segment follows on from doutw data segment. */ 3754 instruction = pvr_pds_kick_usc(kick_usc_program, 3755 instruction, 3756 doutw_control->data_size, 3757 program->cc_enable, 3758 gen_mode); 3759 program->code_size += kick_usc_program->code_size; 3760 3761 return instruction; 3762 } 3763 3764 /* Execution should never reach here. */ 3765 return NULL; 3766} 3767 3768uint32_t *pvr_pds_generate_fence_terminate_program( 3769 struct pvr_pds_fence_program *restrict program, 3770 uint32_t *restrict buffer, 3771 enum pvr_pds_generate_mode gen_mode, 3772 const struct pvr_device_info *dev_info) 3773{ 3774 uint32_t data_size = 0; 3775 uint32_t code_size = 0; 3776 3777 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 3778 /* Data segment. */ 3779 uint32_t *constants, *constants_base; 3780 3781 constants = constants_base = (uint32_t *)buffer; 3782 3783 /* DOUTC sources are not used, but they must be valid. */ 3784 pvr_pds_generate_doutc(program, constants, PDS_GENERATE_DATA_SEGMENT); 3785 data_size += program->data_size; 3786 3787 if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) { 3788 /* Append a 64-bit constant with value 1. Used to increment ptemp. 3789 * Return the offset into the data segment. 3790 */ 3791 program->fence_constant_word = 3792 pvr_pds_append_constant64(constants_base, 1, &data_size); 3793 } 3794 3795 program->data_size = data_size; 3796 return constants; 3797 } 3798 3799 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 3800 /* Code segment. */ 3801 uint32_t *instruction = (uint32_t *)buffer; 3802 3803 instruction = pvr_pds_generate_compute_barrier_conditional( 3804 instruction, 3805 PDS_GENERATE_CODE_SEGMENT); 3806 code_size += 3; 3807 3808 if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) { 3809 /* lock */ 3810 *instruction++ = pvr_pds_inst_encode_lock(0); /* cc */ 3811 3812 /* add64 pt[0], pt[0], #1 */ 3813 *instruction++ = pvr_pds_inst_encode_add64( 3814 0, /* cc */ 3815 PVR_ROGUE_PDSINST_ALUM_UNSIGNED, 3816 PVR_ROGUE_PDSINST_MAD_SNA_ADD, 3817 PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER + 0, /* src0 = ptemp[0] 3818 */ 3819 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + 3820 (program->fence_constant_word >> 1), /* src1 = 1 */ 3821 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest = 3822 * ptemp[0] 3823 */ 3824 3825 /* release */ 3826 *instruction++ = pvr_pds_inst_encode_release(0); /* cc */ 3827 3828 /* cmp pt[0] EQ 0x4 == Number of USC clusters per phantom */ 3829 *instruction++ = pvr_pds_inst_encode_cmpi( 3830 0, /* cc */ 3831 PVR_ROGUE_PDSINST_COP_EQ, 3832 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0, /* src0 3833 * = ptemp[0] 3834 */ 3835 PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0)); 3836 3837 /* bra -1 */ 3838 *instruction++ = 3839 pvr_pds_encode_bra(0, /* cc */ 3840 1, /* PVR_ROGUE_PDSINST_BRA_NEG_ENABLE 3841 */ 3842 0, /* PVR_ROGUE_PDSINST_BRA_SETC_P0 3843 */ 3844 -1); /* bra PC */ 3845 code_size += 5; 3846 } 3847 3848 /* DOUTC */ 3849 instruction = pvr_pds_generate_doutc(program, 3850 instruction, 3851 PDS_GENERATE_CODE_SEGMENT); 3852 code_size += program->code_size; 3853 3854 program->code_size = code_size; 3855 return instruction; 3856 } 3857 3858 /* Execution should never reach here. */ 3859 return NULL; 3860} 3861 3862/** 3863 * Generates program to kick the USC task to load shared registers from memory. 3864 * 3865 * \param program Pointer to the PDS shared register. 3866 * \param buffer Pointer to the buffer for the program. 3867 * \param gen_mode Either code and data can be generated or sizes only updated. 3868 * \param dev_info PVR device information struct. 3869 * \returns Pointer to just beyond the buffer for the program. 3870 */ 3871uint32_t *pvr_pds_generate_compute_shared_loading_program( 3872 struct pvr_pds_shared_storing_program *restrict program, 3873 uint32_t *restrict buffer, 3874 enum pvr_pds_generate_mode gen_mode, 3875 const struct pvr_device_info *dev_info) 3876{ 3877 struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task; 3878 struct pvr_pds_doutw_control *doutw_control = &program->doutw_control; 3879 3880 uint32_t next_constant; 3881 uint32_t data_size = 0; 3882 uint32_t code_size = 0; 3883 3884 /* This needs to persist to the CODE_SEGMENT call. */ 3885 static uint32_t fence_constant_word = 0; 3886 uint64_t zero_constant64 = 0; 3887 3888 if (gen_mode == PDS_GENERATE_SIZES) 3889 return NULL; 3890 3891 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 3892 uint32_t *constants = buffer; 3893 3894 constants = pvr_pds_generate_doutw(doutw_control, 3895 constants, 3896 PDS_GENERATE_DATA_SEGMENT, 3897 dev_info); 3898 data_size += doutw_control->data_size; 3899 3900 constants = pvr_pds_kick_usc(kick_usc_program, 3901 constants, 3902 0, 3903 program->cc_enable, 3904 gen_mode); 3905 data_size += kick_usc_program->data_size; 3906 3907 /* Copy the fence constant value (64-bit). */ 3908 next_constant = data_size; /* Assumes data words fully packed. */ 3909 fence_constant_word = 3910 pvr_pds_get_constants(&next_constant, 2, &data_size); 3911 3912 /* Encode the fence constant src0 (offset measured from start of data 3913 * buffer). Fence barrier is initialized to zero. 3914 */ 3915 pvr_pds_write_wide_constant(buffer, fence_constant_word, zero_constant64); 3916 /* Update the const size. */ 3917 data_size += 2; 3918 constants += 2; 3919 3920 program->data_size = data_size; 3921 return constants; 3922 } 3923 3924 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 3925 /* Generate PDS code segment. */ 3926 uint32_t *instruction = buffer; 3927 3928 /* add64 pt0, c0, c0 3929 * IF [2x Phantoms] 3930 * add64 pt1, c0, c0 3931 * st [constant_mem_addr], pt0, 4 3932 * ENDIF 3933 * doutw vi1, vi0 3934 * doutu ds1[constant_use], ds0[constant_use], ds1[constant_use], 3935 * emit 3936 * 3937 * Zero the persistent temp (SW fence for context switch). 3938 */ 3939 *instruction++ = pvr_pds_inst_encode_add64( 3940 0, /* cc */ 3941 PVR_ROGUE_PDSINST_ALUM_UNSIGNED, 3942 PVR_ROGUE_PDSINST_MAD_SNA_ADD, 3943 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + 3944 (fence_constant_word >> 1), /* src0 3945 * = 0 3946 */ 3947 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER + 3948 (fence_constant_word >> 1), /* src1 3949 * = 0 3950 */ 3951 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest = ptemp64[0] 3952 */ 3953 code_size++; 3954 3955 instruction = pvr_pds_generate_doutw(doutw_control, 3956 instruction, 3957 PDS_GENERATE_CODE_SEGMENT, 3958 dev_info); 3959 code_size += doutw_control->code_size; 3960 3961 /* Offset into data segment follows on from doutw data segment. */ 3962 instruction = pvr_pds_kick_usc(kick_usc_program, 3963 instruction, 3964 doutw_control->data_size, 3965 program->cc_enable, 3966 gen_mode); 3967 code_size += kick_usc_program->code_size; 3968 3969 program->code_size = code_size; 3970 return instruction; 3971 } 3972 3973 /* Execution should never reach here. */ 3974 return NULL; 3975} 3976 3977/** 3978 * Generates both code and data when gen_mode is not PDS_GENERATE_SIZES. 3979 * Relies on num_fpu_iterators being initialized for size calculation. 3980 * Relies on num_fpu_iterators, destination[], and FPU_iterators[] being 3981 * initialized for program generation. 3982 * 3983 * \param program Pointer to the PDS pixel shader program. 3984 * \param buffer Pointer to the buffer for the program. 3985 * \param gen_mode Either code and data can be generated or sizes only updated. 3986 * \returns Pointer to just beyond the buffer for the program. 3987 */ 3988uint32_t *pvr_pds_coefficient_loading( 3989 struct pvr_pds_coeff_loading_program *restrict program, 3990 uint32_t *restrict buffer, 3991 enum pvr_pds_generate_mode gen_mode) 3992{ 3993 uint32_t constant; 3994 uint32_t *instruction; 3995 uint32_t total_data_size, code_size; 3996 3997 /* Place constants at the front of the buffer. */ 3998 uint32_t *constants = buffer; 3999 /* Start counting constants from 0. */ 4000 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; 4001 4002 /* Save the data segment pointer and size. */ 4003 program->data_segment = constants; 4004 4005 total_data_size = 0; 4006 code_size = 0; 4007 4008 total_data_size += 2 * program->num_fpu_iterators; 4009 code_size += program->num_fpu_iterators; 4010 4011 /* Instructions start where constants finished, but we must take note of 4012 * alignment. 4013 * 4014 * 128-bit boundary = 4 dwords. 4015 */ 4016 total_data_size = ALIGN_POT(total_data_size, 4); 4017 if (gen_mode != PDS_GENERATE_SIZES) { 4018 uint32_t data_size = 0; 4019 uint32_t iterator = 0; 4020 4021 instruction = buffer + total_data_size; 4022 4023 while (iterator < program->num_fpu_iterators) { 4024 uint64_t iterator_word; 4025 4026 /* Copy the USC task control words to constants. */ 4027 constant = pvr_pds_get_constants(&next_constant, 2, &data_size); 4028 4029 /* Write the first iterator. */ 4030 iterator_word = 4031 (uint64_t)program->FPU_iterators[iterator] 4032 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHIFT; 4033 4034 /* Write the destination. */ 4035 iterator_word |= 4036 (uint64_t)program->destination[iterator++] 4037 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_SHIFT; 4038 4039 /* If this is the last DOUTI word the "Last Issue" bit should be 4040 * set. 4041 */ 4042 if (iterator >= program->num_fpu_iterators) { 4043 iterator_word |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_EN; 4044 } 4045 4046 /* Write the word to the buffer. */ 4047 pvr_pds_write_wide_constant(constants, 4048 constant, 4049 iterator_word); /* 64-bit 4050 Src0 4051 */ 4052 4053 /* Write the DOUT instruction. */ 4054 *instruction++ = pvr_pds_encode_douti( 4055 /* cc */ 0, 4056 /* END */ 0, 4057 /* SRC0 */ constant >> 1); /* DOUT Issue word 0 64-bit */ 4058 } 4059 4060 /* Update the last DOUTI instruction to have the END flag set. */ 4061 *(instruction - 1) |= 1 << PVR_ROGUE_PDSINST_DOUT_END_SHIFT; 4062 } else { 4063 instruction = NULL; 4064 } 4065 4066 /* Update the data size and code size. Minimum temp count is 1. */ 4067 program->temps_used = 1; 4068 program->data_size = total_data_size; 4069 program->code_size = code_size; 4070 4071 return instruction; 4072} 4073 4074/** 4075 * Generate a single ld/st instruction. This can correspond to one or more 4076 * real ld/st instructions based on the value of count. 4077 * 4078 * \param ld true to generate load, false to generate store. 4079 * \param control Cache mode control. 4080 * \param temp_index Dest temp for load/source temp for store, in 32bits 4081 * register index. 4082 * \param address Source for load/dest for store in bytes. 4083 * \param count Number of dwords for load/store. 4084 * \param next_constant 4085 * \param total_data_size 4086 * \param total_code_size 4087 * \param buffer Pointer to the buffer for the program. 4088 * \param data_fence Issue data fence. 4089 * \param gen_mode Either code and data can be generated or sizes only updated. 4090 * \param dev_info PVR device information structure. 4091 * \returns Pointer to just beyond the buffer for the program. 4092 */ 4093uint32_t *pvr_pds_generate_single_ldst_instruction( 4094 bool ld, 4095 const struct pvr_pds_ldst_control *control, 4096 uint32_t temp_index, 4097 uint64_t address, 4098 uint32_t count, 4099 uint32_t *next_constant, 4100 uint32_t *total_data_size, 4101 uint32_t *total_code_size, 4102 uint32_t *restrict buffer, 4103 bool data_fence, 4104 enum pvr_pds_generate_mode gen_mode, 4105 const struct pvr_device_info *dev_info) 4106{ 4107 /* A single ld/ST here does NOT actually correspond to a single ld/ST 4108 * instruction, but may needs multiple ld/ST instructions because each ld/ST 4109 * instruction can only ld/ST a restricted max number of dwords which may 4110 * less than count passed here. 4111 */ 4112 4113 uint32_t num_inst; 4114 uint32_t constant; 4115 4116 if (ld) { 4117 /* ld must operate on 64bits unit, and it needs to load from and to 128 4118 * bits aligned. Apart from the last ld, all the other need to ld 2x(x = 4119 * 1, 2, ...) times 64bits unit. 4120 */ 4121 uint32_t per_inst_count = 0; 4122 uint32_t last_inst_count; 4123 4124 assert((gen_mode == PDS_GENERATE_SIZES) || 4125 (((count % 2) == 0) && ((address % 16) == 0) && 4126 (temp_index % 2) == 0)); 4127 4128 count >>= 1; 4129 temp_index >>= 1; 4130 4131 /* Found out how many ld instructions are needed and ld size for the all 4132 * possible ld instructions. 4133 */ 4134 if (count <= PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE) { 4135 num_inst = 1; 4136 last_inst_count = count; 4137 } else { 4138 per_inst_count = PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE; 4139 if ((per_inst_count % 2) != 0) 4140 per_inst_count -= 1; 4141 4142 num_inst = count / per_inst_count; 4143 last_inst_count = count - per_inst_count * num_inst; 4144 num_inst += 1; 4145 } 4146 4147 /* Generate all the instructions. */ 4148 for (uint32_t i = 0; i < num_inst; i++) { 4149 if ((i == (num_inst - 1)) && (last_inst_count == 0)) 4150 break; 4151 4152 /* A single load instruction. */ 4153 constant = pvr_pds_get_constants(next_constant, 2, total_data_size); 4154 4155 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 4156 uint64_t ld_src0 = 0; 4157 4158 ld_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_LD_SRCADD_MASK) 4159 << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT); 4160 ld_src0 |= (((uint64_t)((i == num_inst - 1) ? last_inst_count 4161 : per_inst_count) & 4162 PVR_ROGUE_PDSINST_LD_COUNT8_MASK) 4163 << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT); 4164 ld_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS64TP_MASK) 4165 << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT); 4166 4167 if (!control) { 4168 ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED; 4169 4170 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) 4171 ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED; 4172 4173 } else { 4174 ld_src0 |= control->cache_control_const; 4175 } 4176 4177 /* Write it to the constant. */ 4178 pvr_pds_write_constant64(buffer, 4179 constant, 4180 (uint32_t)(ld_src0), 4181 (uint32_t)(ld_src0 >> 32)); 4182 4183 /* Adjust value for next ld instruction. */ 4184 temp_index += per_inst_count; 4185 address += (((uint64_t)(per_inst_count)) << 3); 4186 } 4187 4188 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 4189 *buffer++ = pvr_pds_inst_encode_ld(0, constant >> 1); 4190 4191 if (data_fence) 4192 *buffer++ = pvr_pds_inst_encode_wdf(0); 4193 } 4194 } 4195 } else { 4196 /* ST needs source memory address to be 32bits aligned. */ 4197 assert((gen_mode == PDS_GENERATE_SIZES) || ((address % 4) == 0)); 4198 4199 /* Found out how many ST instructions are needed, each ST can only store 4200 * PVR_ROGUE_PDSINST_ST_COUNT4_MASK number of 32bits. 4201 */ 4202 num_inst = count / PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE; 4203 num_inst += ((count % PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE) == 0 ? 0 : 1); 4204 4205 /* Generate all the instructions. */ 4206 for (uint32_t i = 0; i < num_inst; i++) { 4207 /* A single store instruction. */ 4208 constant = pvr_pds_get_constants(next_constant, 2, total_data_size); 4209 4210 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { 4211 uint32_t per_inst_count = 4212 (count <= PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE 4213 ? count 4214 : PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE); 4215 uint64_t st_src0 = 0; 4216 4217 st_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_ST_SRCADD_MASK) 4218 << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT); 4219 st_src0 |= 4220 (((uint64_t)per_inst_count & PVR_ROGUE_PDSINST_ST_COUNT4_MASK) 4221 << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT); 4222 st_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS32TP_MASK) 4223 << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT); 4224 4225 if (!control) { 4226 st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH; 4227 4228 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) { 4229 st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH; 4230 } 4231 4232 } else { 4233 st_src0 |= control->cache_control_const; 4234 } 4235 4236 /* Write it to the constant. */ 4237 pvr_pds_write_constant64(buffer, 4238 constant, 4239 (uint32_t)(st_src0), 4240 (uint32_t)(st_src0 >> 32)); 4241 4242 /* Adjust value for next ST instruction. */ 4243 temp_index += per_inst_count; 4244 count -= per_inst_count; 4245 address += (((uint64_t)(per_inst_count)) << 2); 4246 } 4247 4248 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 4249 *buffer++ = pvr_pds_inst_encode_st(0, constant >> 1); 4250 4251 if (data_fence) 4252 *buffer++ = pvr_pds_inst_encode_wdf(0); 4253 } 4254 } 4255 } 4256 4257 (*total_code_size) += num_inst; 4258 if (data_fence) 4259 (*total_code_size) += num_inst; 4260 4261 if (gen_mode != PDS_GENERATE_SIZES) 4262 return buffer; 4263 return NULL; 4264} 4265 4266/** 4267 * Generate programs used to prepare stream out, i.e., clear stream out buffer 4268 * overflow flags and update Persistent temps by a ld instruction. 4269 * 4270 * This must be used in PPP state update. 4271 * 4272 * \param program Pointer to the stream out program. 4273 * \param buffer Pointer to the buffer for the program. 4274 * \param store_mode If true then the data is stored to memory. If false then 4275 * the data is loaded from memory. 4276 * \param gen_mode Either code and data can be generated or sizes only updated. 4277 * \param dev_info PVR device information structure. 4278 * \returns Pointer to just beyond the buffer for the program. 4279 */ 4280uint32_t *pvr_pds_generate_stream_out_init_program( 4281 struct pvr_pds_stream_out_init_program *restrict program, 4282 uint32_t *restrict buffer, 4283 bool store_mode, 4284 enum pvr_pds_generate_mode gen_mode, 4285 const struct pvr_device_info *dev_info) 4286{ 4287 uint32_t total_data_size = 0; 4288 uint32_t PTDst = PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER; 4289 4290 /* Start counting constants from 0. */ 4291 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; 4292 4293 uint32_t total_code_size = 1; 4294 4295 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 4296 /* We only need to clear global stream out predicate, other predicates 4297 * are not used during the stream out buffer overflow test. 4298 */ 4299 *buffer++ = pvr_pds_inst_encode_stmc(0, 0x10); 4300 } 4301 4302 for (uint32_t index = 0; index < program->num_buffers; index++) { 4303 if (program->dev_address_for_buffer_data[index] != 0) { 4304 /* Generate load/store program to load/store persistent temps. */ 4305 4306 /* NOTE: store_mode == true case should be handled by 4307 * StreamOutTerminate. 4308 */ 4309 buffer = pvr_pds_generate_single_ldst_instruction( 4310 !store_mode, 4311 NULL, 4312 PTDst, 4313 program->dev_address_for_buffer_data[index], 4314 program->pds_buffer_data_size[index], 4315 &next_constant, 4316 &total_data_size, 4317 &total_code_size, 4318 buffer, 4319 false, 4320 gen_mode, 4321 dev_info); 4322 } 4323 4324 PTDst += program->pds_buffer_data_size[index]; 4325 } 4326 4327 total_code_size += 2; 4328 4329 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 4330 /* We need to fence the loading. */ 4331 *buffer++ = pvr_pds_inst_encode_wdf(0); 4332 *buffer++ = pvr_pds_inst_encode_halt(0); 4333 } 4334 4335 /* Save size information to program */ 4336 program->stream_out_init_pds_data_size = 4337 ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */ 4338 /* PDS program code size. */ 4339 program->stream_out_init_pds_code_size = total_code_size; 4340 4341 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) 4342 return buffer + program->stream_out_init_pds_data_size; 4343 else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) 4344 return buffer; 4345 4346 return NULL; 4347} 4348 4349/** 4350 * Generate stream out terminate program for stream out. 4351 * 4352 * If pds_persistent_temp_size_to_store is 0, the final primitive written value 4353 * will be stored. 4354 * 4355 * If pds_persistent_temp_size_to_store is non 0, the value of persistent temps 4356 * will be stored into memory. 4357 * 4358 * The stream out terminate program is used to update the PPP state and the data 4359 * and code section cannot be separate. 4360 * 4361 * \param program Pointer to the stream out program. 4362 * \param buffer Pointer to the buffer for the program. 4363 * \param gen_mode Either code and data can be generated or sizes only updated. 4364 * \param dev_info PVR device info structure. 4365 * \returns Pointer to just beyond the buffer for the program. 4366 */ 4367uint32_t *pvr_pds_generate_stream_out_terminate_program( 4368 struct pvr_pds_stream_out_terminate_program *restrict program, 4369 uint32_t *restrict buffer, 4370 enum pvr_pds_generate_mode gen_mode, 4371 const struct pvr_device_info *dev_info) 4372{ 4373 uint32_t next_constant; 4374 uint32_t total_data_size = 0, total_code_size = 0; 4375 4376 /* Start counting constants from 0. */ 4377 next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; 4378 4379 /* Generate store program to store persistent temps. */ 4380 buffer = pvr_pds_generate_single_ldst_instruction( 4381 false, 4382 NULL, 4383 PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER, 4384 program->dev_address_for_storing_persistent_temp, 4385 program->pds_persistent_temp_size_to_store, 4386 &next_constant, 4387 &total_data_size, 4388 &total_code_size, 4389 buffer, 4390 false, 4391 gen_mode, 4392 dev_info); 4393 4394 total_code_size += 2; 4395 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 4396 *buffer++ = pvr_pds_inst_encode_wdf(0); 4397 *buffer++ = pvr_pds_inst_encode_halt(0); 4398 } 4399 4400 /* Save size information to program. */ 4401 program->stream_out_terminate_pds_data_size = 4402 ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */ 4403 /* PDS program code size. */ 4404 program->stream_out_terminate_pds_code_size = total_code_size; 4405 4406 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) 4407 return buffer + program->stream_out_terminate_pds_data_size; 4408 else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) 4409 return buffer; 4410 4411 return NULL; 4412} 4413 4414/* DrawArrays works in several steps: 4415 * 4416 * 1) load data from draw_indirect buffer 4417 * 2) tweak data to match hardware formats 4418 * 3) write data to indexblock 4419 * 4) signal the VDM to continue 4420 * 4421 * This is complicated by HW limitations on alignment, as well as a HWBRN. 4422 * 4423 * 1) Load data. 4424 * Loads _must_ be 128-bit aligned. Because there is no such limitation in the 4425 * spec we must deal with this by choosing an appropriate earlier address and 4426 * loading enough dwords that we load the entirety of the buffer. 4427 * 4428 * if addr & 0xf: 4429 * load [addr & ~0xf] 6 dwords -> tmp[0, 1, 2, 3, 4, 5] 4430 * data = tmp[0 + (uiAddr & 0xf) >> 2]... 4431 * else 4432 * load [addr] 4 dwords -> tmp[0, 1, 2, 3] 4433 * data = tmp[0]... 4434 * 4435 * 4436 * 2) Tweak data. 4437 * primCount in the spec does not match the encoding of INDEX_INSTANCE_COUNT in 4438 * the VDM control stream. We must subtract 1 from the loaded primCount. 4439 * 4440 * However, there is a HWBRN that disallows the ADD32 instruction from sourcing 4441 * a tmp that is non-64-bit-aligned. To work around this, we must move primCount 4442 * into another tmp that has the correct alignment. Note: this is only required 4443 * when data = tmp[even], as primCount is data+1: 4444 * 4445 * if data = tmp[even]: 4446 * primCount = data + 1 = tmp[odd] -- not 64-bit aligned! 4447 * else: 4448 * primCount = data + 1 = tmp[even] -- already aligned, don't need workaround. 4449 * 4450 * This boils down to: 4451 * 4452 * primCount = data[1] 4453 * primCountSrc = data[1] 4454 * if brn_present && (data is even): 4455 * mov scratch, primCount 4456 * primCountSrc = scratch 4457 * endif 4458 * sub primCount, primCountSrc, 1 4459 * 4460 * 3) Store Data. 4461 * Write the now-tweaked data over the top of the indexblock. 4462 * To ensure the write completes before the VDM re-reads the data, we must cause 4463 * a data hazard by doing a dummy (dummy meaning we don't care about the 4464 * returned data) load from the same addresses. Again, because the ld must 4465 * always be 128-bit aligned (note: the ST is dword-aligned), we must ensure the 4466 * index block is 128-bit aligned. This is the client driver's responsibility. 4467 * 4468 * st data[0, 1, 2] -> (idxblock + 4) 4469 * load [idxblock] 4 dwords 4470 * 4471 * 4) Signal the VDM 4472 * This is simply a DOUTV with a src1 of 0, indicating the VDM should continue 4473 * where it is currently fenced on a dummy idxblock that has been inserted by 4474 * the driver. 4475 */ 4476 4477#include "pvr_draw_indirect_arrays0.h" 4478#include "pvr_draw_indirect_arrays1.h" 4479#include "pvr_draw_indirect_arrays2.h" 4480#include "pvr_draw_indirect_arrays3.h" 4481 4482#include "pvr_draw_indirect_arrays_base_instance0.h" 4483#include "pvr_draw_indirect_arrays_base_instance1.h" 4484#include "pvr_draw_indirect_arrays_base_instance2.h" 4485#include "pvr_draw_indirect_arrays_base_instance3.h" 4486 4487#include "pvr_draw_indirect_arrays_base_instance_drawid0.h" 4488#include "pvr_draw_indirect_arrays_base_instance_drawid1.h" 4489#include "pvr_draw_indirect_arrays_base_instance_drawid2.h" 4490#include "pvr_draw_indirect_arrays_base_instance_drawid3.h" 4491 4492#define ENABLE_SLC_MCU_CACHE_CONTROLS(device) \ 4493 ((device)->features.has_slc_mcu_cache_controls \ 4494 ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED \ 4495 : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS) 4496 4497void pvr_pds_generate_draw_arrays_indirect( 4498 struct pvr_pds_drawindirect_program *restrict program, 4499 uint32_t *restrict buffer, 4500 enum pvr_pds_generate_mode gen_mode, 4501 const struct pvr_device_info *dev_info) 4502{ 4503 if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) || 4504 (gen_mode == PDS_GENERATE_SIZES)) { 4505 const struct pvr_psc_program_output *psc_program = NULL; 4506 switch ((program->arg_buffer >> 2) % 4) { 4507 case 0: 4508 if (program->support_base_instance) { 4509 if (program->increment_draw_id) { 4510 psc_program = 4511 &pvr_draw_indirect_arrays_base_instance_drawid0_program; 4512 } else { 4513 psc_program = &pvr_draw_indirect_arrays_base_instance0_program; 4514 } 4515 } else { 4516 psc_program = &pvr_draw_indirect_arrays0_program; 4517 } 4518 break; 4519 case 1: 4520 if (program->support_base_instance) { 4521 if (program->increment_draw_id) { 4522 psc_program = 4523 &pvr_draw_indirect_arrays_base_instance_drawid1_program; 4524 } else { 4525 psc_program = &pvr_draw_indirect_arrays_base_instance1_program; 4526 } 4527 } else { 4528 psc_program = &pvr_draw_indirect_arrays1_program; 4529 } 4530 break; 4531 case 2: 4532 if (program->support_base_instance) { 4533 if (program->increment_draw_id) { 4534 psc_program = 4535 &pvr_draw_indirect_arrays_base_instance_drawid2_program; 4536 } else { 4537 psc_program = &pvr_draw_indirect_arrays_base_instance2_program; 4538 } 4539 } else { 4540 psc_program = &pvr_draw_indirect_arrays2_program; 4541 } 4542 break; 4543 case 3: 4544 if (program->support_base_instance) { 4545 if (program->increment_draw_id) { 4546 psc_program = 4547 &pvr_draw_indirect_arrays_base_instance_drawid3_program; 4548 } else { 4549 psc_program = &pvr_draw_indirect_arrays_base_instance3_program; 4550 } 4551 } else { 4552 psc_program = &pvr_draw_indirect_arrays3_program; 4553 } 4554 break; 4555 } 4556 4557 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 4558 memcpy(buffer, 4559 psc_program->code, 4560 psc_program->code_size * sizeof(uint32_t)); 4561#if defined(DUMP_PDS) 4562 for (uint32_t i = 0; i < psc_program->code_size; i++) 4563 PVR_PDS_PRINT_INST(buffer[i]); 4564#endif 4565 } 4566 4567 program->program = *psc_program; 4568 } else { 4569 switch ((program->arg_buffer >> 2) % 4) { 4570 case 0: 4571 if (program->support_base_instance) { 4572 if (program->increment_draw_id) { 4573 pvr_write_draw_indirect_arrays_base_instance_drawid0_di_data( 4574 buffer, 4575 program->arg_buffer & ~0xfull, 4576 dev_info); 4577 pvr_write_draw_indirect_arrays_base_instance_drawid0_write_vdm( 4578 buffer, 4579 program->index_list_addr_buffer + 4); 4580 pvr_write_draw_indirect_arrays_base_instance_drawid0_flush_vdm( 4581 buffer, 4582 program->index_list_addr_buffer); 4583 pvr_write_draw_indirect_arrays_base_instance_drawid0_num_views( 4584 buffer, 4585 program->num_views); 4586 pvr_write_draw_indirect_arrays_base_instance_drawid0_immediates( 4587 buffer); 4588 } else { 4589 pvr_write_draw_indirect_arrays_base_instance0_di_data( 4590 buffer, 4591 program->arg_buffer & ~0xfull, 4592 dev_info); 4593 pvr_write_draw_indirect_arrays_base_instance0_write_vdm( 4594 buffer, 4595 program->index_list_addr_buffer + 4); 4596 pvr_write_draw_indirect_arrays_base_instance0_flush_vdm( 4597 buffer, 4598 program->index_list_addr_buffer); 4599 pvr_write_draw_indirect_arrays_base_instance0_num_views( 4600 buffer, 4601 program->num_views); 4602 pvr_write_draw_indirect_arrays_base_instance0_immediates(buffer); 4603 } 4604 } else { 4605 pvr_write_draw_indirect_arrays0_di_data(buffer, 4606 program->arg_buffer & 4607 ~0xfull, 4608 dev_info); 4609 pvr_write_draw_indirect_arrays0_write_vdm( 4610 buffer, 4611 program->index_list_addr_buffer + 4); 4612 pvr_write_draw_indirect_arrays0_flush_vdm( 4613 buffer, 4614 program->index_list_addr_buffer); 4615 pvr_write_draw_indirect_arrays0_num_views(buffer, 4616 program->num_views); 4617 pvr_write_draw_indirect_arrays0_immediates(buffer); 4618 } 4619 break; 4620 case 1: 4621 if (program->support_base_instance) { 4622 if (program->increment_draw_id) { 4623 pvr_write_draw_indirect_arrays_base_instance_drawid1_di_data( 4624 buffer, 4625 program->arg_buffer & ~0xfull, 4626 dev_info); 4627 pvr_write_draw_indirect_arrays_base_instance_drawid1_write_vdm( 4628 buffer, 4629 program->index_list_addr_buffer + 4); 4630 pvr_write_draw_indirect_arrays_base_instance_drawid1_flush_vdm( 4631 buffer, 4632 program->index_list_addr_buffer); 4633 pvr_write_draw_indirect_arrays_base_instance_drawid1_num_views( 4634 buffer, 4635 program->num_views); 4636 pvr_write_draw_indirect_arrays_base_instance_drawid1_immediates( 4637 buffer); 4638 } else { 4639 pvr_write_draw_indirect_arrays_base_instance1_di_data( 4640 buffer, 4641 program->arg_buffer & ~0xfull, 4642 dev_info); 4643 pvr_write_draw_indirect_arrays_base_instance1_write_vdm( 4644 buffer, 4645 program->index_list_addr_buffer + 4); 4646 pvr_write_draw_indirect_arrays_base_instance1_flush_vdm( 4647 buffer, 4648 program->index_list_addr_buffer); 4649 pvr_write_draw_indirect_arrays_base_instance1_num_views( 4650 buffer, 4651 program->num_views); 4652 pvr_write_draw_indirect_arrays_base_instance1_immediates(buffer); 4653 } 4654 } else { 4655 pvr_write_draw_indirect_arrays1_di_data(buffer, 4656 program->arg_buffer & 4657 ~0xfull, 4658 dev_info); 4659 pvr_write_draw_indirect_arrays1_write_vdm( 4660 buffer, 4661 program->index_list_addr_buffer + 4); 4662 pvr_write_draw_indirect_arrays1_flush_vdm( 4663 buffer, 4664 program->index_list_addr_buffer); 4665 pvr_write_draw_indirect_arrays1_num_views(buffer, 4666 program->num_views); 4667 pvr_write_draw_indirect_arrays1_immediates(buffer); 4668 } 4669 break; 4670 case 2: 4671 if (program->support_base_instance) { 4672 if (program->increment_draw_id) { 4673 pvr_write_draw_indirect_arrays_base_instance_drawid2_di_data( 4674 buffer, 4675 program->arg_buffer & ~0xfull, 4676 dev_info); 4677 pvr_write_draw_indirect_arrays_base_instance_drawid2_write_vdm( 4678 buffer, 4679 program->index_list_addr_buffer + 4); 4680 pvr_write_draw_indirect_arrays_base_instance_drawid2_flush_vdm( 4681 buffer, 4682 program->index_list_addr_buffer); 4683 pvr_write_draw_indirect_arrays_base_instance_drawid2_num_views( 4684 buffer, 4685 program->num_views); 4686 pvr_write_draw_indirect_arrays_base_instance_drawid2_immediates( 4687 buffer); 4688 } else { 4689 pvr_write_draw_indirect_arrays_base_instance2_di_data( 4690 buffer, 4691 program->arg_buffer & ~0xfull, 4692 dev_info); 4693 pvr_write_draw_indirect_arrays_base_instance2_write_vdm( 4694 buffer, 4695 program->index_list_addr_buffer + 4); 4696 pvr_write_draw_indirect_arrays_base_instance2_flush_vdm( 4697 buffer, 4698 program->index_list_addr_buffer); 4699 pvr_write_draw_indirect_arrays_base_instance2_num_views( 4700 buffer, 4701 program->num_views); 4702 pvr_write_draw_indirect_arrays_base_instance2_immediates(buffer); 4703 } 4704 } else { 4705 pvr_write_draw_indirect_arrays2_di_data(buffer, 4706 program->arg_buffer & 4707 ~0xfull, 4708 dev_info); 4709 pvr_write_draw_indirect_arrays2_write_vdm( 4710 buffer, 4711 program->index_list_addr_buffer + 4); 4712 pvr_write_draw_indirect_arrays2_flush_vdm( 4713 buffer, 4714 program->index_list_addr_buffer); 4715 pvr_write_draw_indirect_arrays2_num_views(buffer, 4716 program->num_views); 4717 pvr_write_draw_indirect_arrays2_immediates(buffer); 4718 } 4719 break; 4720 case 3: 4721 if (program->support_base_instance) { 4722 if (program->increment_draw_id) { 4723 pvr_write_draw_indirect_arrays_base_instance_drawid3_di_data( 4724 buffer, 4725 program->arg_buffer & ~0xfull, 4726 dev_info); 4727 pvr_write_draw_indirect_arrays_base_instance_drawid3_write_vdm( 4728 buffer, 4729 program->index_list_addr_buffer + 4); 4730 pvr_write_draw_indirect_arrays_base_instance_drawid3_flush_vdm( 4731 buffer, 4732 program->index_list_addr_buffer); 4733 pvr_write_draw_indirect_arrays_base_instance_drawid3_num_views( 4734 buffer, 4735 program->num_views); 4736 pvr_write_draw_indirect_arrays_base_instance_drawid3_immediates( 4737 buffer); 4738 } else { 4739 pvr_write_draw_indirect_arrays_base_instance3_di_data( 4740 buffer, 4741 program->arg_buffer & ~0xfull, 4742 dev_info); 4743 pvr_write_draw_indirect_arrays_base_instance3_write_vdm( 4744 buffer, 4745 program->index_list_addr_buffer + 4); 4746 pvr_write_draw_indirect_arrays_base_instance3_flush_vdm( 4747 buffer, 4748 program->index_list_addr_buffer); 4749 pvr_write_draw_indirect_arrays_base_instance3_num_views( 4750 buffer, 4751 program->num_views); 4752 pvr_write_draw_indirect_arrays_base_instance3_immediates(buffer); 4753 } 4754 } else { 4755 pvr_write_draw_indirect_arrays3_di_data(buffer, 4756 program->arg_buffer & 4757 ~0xfull, 4758 dev_info); 4759 pvr_write_draw_indirect_arrays3_write_vdm( 4760 buffer, 4761 program->index_list_addr_buffer + 4); 4762 pvr_write_draw_indirect_arrays3_flush_vdm( 4763 buffer, 4764 program->index_list_addr_buffer); 4765 pvr_write_draw_indirect_arrays3_num_views(buffer, 4766 program->num_views); 4767 pvr_write_draw_indirect_arrays3_immediates(buffer); 4768 } 4769 break; 4770 } 4771 } 4772} 4773 4774#include "pvr_draw_indirect_elements0.h" 4775#include "pvr_draw_indirect_elements1.h" 4776#include "pvr_draw_indirect_elements2.h" 4777#include "pvr_draw_indirect_elements3.h" 4778#include "pvr_draw_indirect_elements_base_instance0.h" 4779#include "pvr_draw_indirect_elements_base_instance1.h" 4780#include "pvr_draw_indirect_elements_base_instance2.h" 4781#include "pvr_draw_indirect_elements_base_instance3.h" 4782#include "pvr_draw_indirect_elements_base_instance_drawid0.h" 4783#include "pvr_draw_indirect_elements_base_instance_drawid1.h" 4784#include "pvr_draw_indirect_elements_base_instance_drawid2.h" 4785#include "pvr_draw_indirect_elements_base_instance_drawid3.h" 4786 4787void pvr_pds_generate_draw_elements_indirect( 4788 struct pvr_pds_drawindirect_program *restrict program, 4789 uint32_t *restrict buffer, 4790 enum pvr_pds_generate_mode gen_mode, 4791 const struct pvr_device_info *dev_info) 4792{ 4793 if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) || 4794 (gen_mode == PDS_GENERATE_SIZES)) { 4795 const struct pvr_psc_program_output *psc_program = NULL; 4796 switch ((program->arg_buffer >> 2) % 4) { 4797 case 0: 4798 if (program->support_base_instance) { 4799 if (program->increment_draw_id) { 4800 psc_program = 4801 &pvr_draw_indirect_elements_base_instance_drawid0_program; 4802 } else { 4803 psc_program = &pvr_draw_indirect_elements_base_instance0_program; 4804 } 4805 } else { 4806 psc_program = &pvr_draw_indirect_elements0_program; 4807 } 4808 break; 4809 case 1: 4810 if (program->support_base_instance) { 4811 if (program->increment_draw_id) { 4812 psc_program = 4813 &pvr_draw_indirect_elements_base_instance_drawid1_program; 4814 } else { 4815 psc_program = &pvr_draw_indirect_elements_base_instance1_program; 4816 } 4817 } else { 4818 psc_program = &pvr_draw_indirect_elements1_program; 4819 } 4820 break; 4821 case 2: 4822 if (program->support_base_instance) { 4823 if (program->increment_draw_id) { 4824 psc_program = 4825 &pvr_draw_indirect_elements_base_instance_drawid2_program; 4826 } else { 4827 psc_program = &pvr_draw_indirect_elements_base_instance2_program; 4828 } 4829 } else { 4830 psc_program = &pvr_draw_indirect_elements2_program; 4831 } 4832 break; 4833 case 3: 4834 if (program->support_base_instance) { 4835 if (program->increment_draw_id) { 4836 psc_program = 4837 &pvr_draw_indirect_elements_base_instance_drawid3_program; 4838 } else { 4839 psc_program = &pvr_draw_indirect_elements_base_instance3_program; 4840 } 4841 } else { 4842 psc_program = &pvr_draw_indirect_elements3_program; 4843 } 4844 break; 4845 } 4846 4847 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) { 4848 memcpy(buffer, 4849 psc_program->code, 4850 psc_program->code_size * sizeof(uint32_t)); 4851 4852#if defined(DUMP_PDS) 4853 for (uint32_t i = 0; i < psc_program->code_size; i++) 4854 PVR_PDS_PRINT_INST(buffer[i]); 4855#endif 4856 } 4857 4858 program->program = *psc_program; 4859 } else { 4860 switch ((program->arg_buffer >> 2) % 4) { 4861 case 0: 4862 if (program->support_base_instance) { 4863 if (program->increment_draw_id) { 4864 pvr_write_draw_indirect_elements_base_instance_drawid0_di_data( 4865 buffer, 4866 program->arg_buffer & ~0xfull, 4867 dev_info); 4868 pvr_write_draw_indirect_elements_base_instance_drawid0_write_vdm( 4869 buffer, 4870 program->index_list_addr_buffer); 4871 pvr_write_draw_indirect_elements_base_instance_drawid0_flush_vdm( 4872 buffer, 4873 program->index_list_addr_buffer); 4874 pvr_write_draw_indirect_elements_base_instance_drawid0_num_views( 4875 buffer, 4876 program->num_views); 4877 pvr_write_draw_indirect_elements_base_instance_drawid0_idx_stride( 4878 buffer, 4879 program->index_stride); 4880 pvr_write_draw_indirect_elements_base_instance_drawid0_idx_base( 4881 buffer, 4882 program->index_buffer); 4883 pvr_write_draw_indirect_elements_base_instance_drawid0_idx_header( 4884 buffer, 4885 program->index_block_header); 4886 pvr_write_draw_indirect_elements_base_instance_drawid0_immediates( 4887 buffer); 4888 } else { 4889 pvr_write_draw_indirect_elements_base_instance0_di_data( 4890 buffer, 4891 program->arg_buffer & ~0xfull, 4892 dev_info); 4893 pvr_write_draw_indirect_elements_base_instance0_write_vdm( 4894 buffer, 4895 program->index_list_addr_buffer); 4896 pvr_write_draw_indirect_elements_base_instance0_flush_vdm( 4897 buffer, 4898 program->index_list_addr_buffer); 4899 pvr_write_draw_indirect_elements_base_instance0_num_views( 4900 buffer, 4901 program->num_views); 4902 pvr_write_draw_indirect_elements_base_instance0_idx_stride( 4903 buffer, 4904 program->index_stride); 4905 pvr_write_draw_indirect_elements_base_instance0_idx_base( 4906 buffer, 4907 program->index_buffer); 4908 pvr_write_draw_indirect_elements_base_instance0_idx_header( 4909 buffer, 4910 program->index_block_header); 4911 pvr_write_draw_indirect_elements_base_instance0_immediates( 4912 buffer); 4913 } 4914 } else { 4915 pvr_write_draw_indirect_elements0_di_data(buffer, 4916 program->arg_buffer & 4917 ~0xfull, 4918 dev_info); 4919 pvr_write_draw_indirect_elements0_write_vdm( 4920 buffer, 4921 program->index_list_addr_buffer); 4922 pvr_write_draw_indirect_elements0_flush_vdm( 4923 buffer, 4924 program->index_list_addr_buffer); 4925 pvr_write_draw_indirect_elements0_num_views(buffer, 4926 program->num_views); 4927 pvr_write_draw_indirect_elements0_idx_stride(buffer, 4928 program->index_stride); 4929 pvr_write_draw_indirect_elements0_idx_base(buffer, 4930 program->index_buffer); 4931 pvr_write_draw_indirect_elements0_idx_header( 4932 buffer, 4933 program->index_block_header); 4934 pvr_write_draw_indirect_elements0_immediates(buffer); 4935 } 4936 break; 4937 case 1: 4938 if (program->support_base_instance) { 4939 if (program->increment_draw_id) { 4940 pvr_write_draw_indirect_elements_base_instance_drawid1_di_data( 4941 buffer, 4942 program->arg_buffer & ~0xfull, 4943 dev_info); 4944 pvr_write_draw_indirect_elements_base_instance_drawid1_write_vdm( 4945 buffer, 4946 program->index_list_addr_buffer); 4947 pvr_write_draw_indirect_elements_base_instance_drawid1_flush_vdm( 4948 buffer, 4949 program->index_list_addr_buffer); 4950 pvr_write_draw_indirect_elements_base_instance_drawid1_num_views( 4951 buffer, 4952 program->num_views); 4953 pvr_write_draw_indirect_elements_base_instance_drawid1_idx_stride( 4954 buffer, 4955 program->index_stride); 4956 pvr_write_draw_indirect_elements_base_instance_drawid1_idx_base( 4957 buffer, 4958 program->index_buffer); 4959 pvr_write_draw_indirect_elements_base_instance_drawid1_idx_header( 4960 buffer, 4961 program->index_block_header); 4962 pvr_write_draw_indirect_elements_base_instance_drawid1_immediates( 4963 buffer); 4964 } else { 4965 pvr_write_draw_indirect_elements_base_instance1_di_data( 4966 buffer, 4967 program->arg_buffer & ~0xfull, 4968 dev_info); 4969 pvr_write_draw_indirect_elements_base_instance1_write_vdm( 4970 buffer, 4971 program->index_list_addr_buffer); 4972 pvr_write_draw_indirect_elements_base_instance1_flush_vdm( 4973 buffer, 4974 program->index_list_addr_buffer); 4975 pvr_write_draw_indirect_elements_base_instance1_num_views( 4976 buffer, 4977 program->num_views); 4978 pvr_write_draw_indirect_elements_base_instance1_idx_stride( 4979 buffer, 4980 program->index_stride); 4981 pvr_write_draw_indirect_elements_base_instance1_idx_base( 4982 buffer, 4983 program->index_buffer); 4984 pvr_write_draw_indirect_elements_base_instance1_idx_header( 4985 buffer, 4986 program->index_block_header); 4987 pvr_write_draw_indirect_elements_base_instance1_immediates( 4988 buffer); 4989 } 4990 } else { 4991 pvr_write_draw_indirect_elements1_di_data(buffer, 4992 program->arg_buffer & 4993 ~0xfull, 4994 dev_info); 4995 pvr_write_draw_indirect_elements1_write_vdm( 4996 buffer, 4997 program->index_list_addr_buffer); 4998 pvr_write_draw_indirect_elements1_flush_vdm( 4999 buffer, 5000 program->index_list_addr_buffer); 5001 pvr_write_draw_indirect_elements1_num_views(buffer, 5002 program->num_views); 5003 pvr_write_draw_indirect_elements1_idx_stride(buffer, 5004 program->index_stride); 5005 pvr_write_draw_indirect_elements1_idx_base(buffer, 5006 program->index_buffer); 5007 pvr_write_draw_indirect_elements1_idx_header( 5008 buffer, 5009 program->index_block_header); 5010 pvr_write_draw_indirect_elements1_immediates(buffer); 5011 } 5012 break; 5013 case 2: 5014 if (program->support_base_instance) { 5015 if (program->increment_draw_id) { 5016 pvr_write_draw_indirect_elements_base_instance_drawid2_di_data( 5017 buffer, 5018 program->arg_buffer & ~0xfull, 5019 dev_info); 5020 pvr_write_draw_indirect_elements_base_instance_drawid2_write_vdm( 5021 buffer, 5022 program->index_list_addr_buffer); 5023 pvr_write_draw_indirect_elements_base_instance_drawid2_flush_vdm( 5024 buffer, 5025 program->index_list_addr_buffer); 5026 pvr_write_draw_indirect_elements_base_instance_drawid2_num_views( 5027 buffer, 5028 program->num_views); 5029 pvr_write_draw_indirect_elements_base_instance_drawid2_idx_stride( 5030 buffer, 5031 program->index_stride); 5032 pvr_write_draw_indirect_elements_base_instance_drawid2_idx_base( 5033 buffer, 5034 program->index_buffer); 5035 pvr_write_draw_indirect_elements_base_instance_drawid2_idx_header( 5036 buffer, 5037 program->index_block_header); 5038 pvr_write_draw_indirect_elements_base_instance_drawid2_immediates( 5039 buffer); 5040 } else { 5041 pvr_write_draw_indirect_elements_base_instance2_di_data( 5042 buffer, 5043 program->arg_buffer & ~0xfull, 5044 dev_info); 5045 pvr_write_draw_indirect_elements_base_instance2_write_vdm( 5046 buffer, 5047 program->index_list_addr_buffer); 5048 pvr_write_draw_indirect_elements_base_instance2_flush_vdm( 5049 buffer, 5050 program->index_list_addr_buffer); 5051 pvr_write_draw_indirect_elements_base_instance2_num_views( 5052 buffer, 5053 program->num_views); 5054 pvr_write_draw_indirect_elements_base_instance2_idx_stride( 5055 buffer, 5056 program->index_stride); 5057 pvr_write_draw_indirect_elements_base_instance2_idx_base( 5058 buffer, 5059 program->index_buffer); 5060 pvr_write_draw_indirect_elements_base_instance2_idx_header( 5061 buffer, 5062 program->index_block_header); 5063 pvr_write_draw_indirect_elements_base_instance2_immediates( 5064 buffer); 5065 } 5066 } else { 5067 pvr_write_draw_indirect_elements2_di_data(buffer, 5068 program->arg_buffer & 5069 ~0xfull, 5070 dev_info); 5071 pvr_write_draw_indirect_elements2_write_vdm( 5072 buffer, 5073 program->index_list_addr_buffer); 5074 pvr_write_draw_indirect_elements2_flush_vdm( 5075 buffer, 5076 program->index_list_addr_buffer); 5077 pvr_write_draw_indirect_elements2_num_views(buffer, 5078 program->num_views); 5079 pvr_write_draw_indirect_elements2_idx_stride(buffer, 5080 program->index_stride); 5081 pvr_write_draw_indirect_elements2_idx_base(buffer, 5082 program->index_buffer); 5083 pvr_write_draw_indirect_elements2_idx_header( 5084 buffer, 5085 program->index_block_header); 5086 pvr_write_draw_indirect_elements2_immediates(buffer); 5087 } 5088 break; 5089 case 3: 5090 if (program->support_base_instance) { 5091 if (program->increment_draw_id) { 5092 pvr_write_draw_indirect_elements_base_instance_drawid3_di_data( 5093 buffer, 5094 program->arg_buffer & ~0xfull, 5095 dev_info); 5096 pvr_write_draw_indirect_elements_base_instance_drawid3_write_vdm( 5097 buffer, 5098 program->index_list_addr_buffer); 5099 pvr_write_draw_indirect_elements_base_instance_drawid3_flush_vdm( 5100 buffer, 5101 program->index_list_addr_buffer); 5102 pvr_write_draw_indirect_elements_base_instance_drawid3_num_views( 5103 buffer, 5104 program->num_views); 5105 pvr_write_draw_indirect_elements_base_instance_drawid3_idx_stride( 5106 buffer, 5107 program->index_stride); 5108 pvr_write_draw_indirect_elements_base_instance_drawid3_idx_base( 5109 buffer, 5110 program->index_buffer); 5111 pvr_write_draw_indirect_elements_base_instance_drawid3_idx_header( 5112 buffer, 5113 program->index_block_header); 5114 pvr_write_draw_indirect_elements_base_instance_drawid3_immediates( 5115 buffer); 5116 } else { 5117 pvr_write_draw_indirect_elements_base_instance3_di_data( 5118 buffer, 5119 program->arg_buffer & ~0xfull, 5120 dev_info); 5121 pvr_write_draw_indirect_elements_base_instance3_write_vdm( 5122 buffer, 5123 program->index_list_addr_buffer); 5124 pvr_write_draw_indirect_elements_base_instance3_flush_vdm( 5125 buffer, 5126 program->index_list_addr_buffer); 5127 pvr_write_draw_indirect_elements_base_instance3_num_views( 5128 buffer, 5129 program->num_views); 5130 pvr_write_draw_indirect_elements_base_instance3_idx_stride( 5131 buffer, 5132 program->index_stride); 5133 pvr_write_draw_indirect_elements_base_instance3_idx_base( 5134 buffer, 5135 program->index_buffer); 5136 pvr_write_draw_indirect_elements_base_instance3_idx_header( 5137 buffer, 5138 program->index_block_header); 5139 pvr_write_draw_indirect_elements_base_instance3_immediates( 5140 buffer); 5141 } 5142 } else { 5143 pvr_write_draw_indirect_elements3_di_data(buffer, 5144 program->arg_buffer & 5145 ~0xfull, 5146 dev_info); 5147 pvr_write_draw_indirect_elements3_write_vdm( 5148 buffer, 5149 program->index_list_addr_buffer); 5150 pvr_write_draw_indirect_elements3_flush_vdm( 5151 buffer, 5152 program->index_list_addr_buffer); 5153 pvr_write_draw_indirect_elements3_num_views(buffer, 5154 program->num_views); 5155 pvr_write_draw_indirect_elements3_idx_stride(buffer, 5156 program->index_stride); 5157 pvr_write_draw_indirect_elements3_idx_base(buffer, 5158 program->index_buffer); 5159 pvr_write_draw_indirect_elements3_idx_header( 5160 buffer, 5161 program->index_block_header); 5162 pvr_write_draw_indirect_elements3_immediates(buffer); 5163 } 5164 break; 5165 } 5166 } 5167} 5168