1/*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdbool.h>
26#include <stdint.h>
27#include <stdio.h>
28#include <string.h>
29
30#include "pvr_device_info.h"
31#include "pvr_pds.h"
32#include "pvr_rogue_pds_defs.h"
33#include "pvr_rogue_pds_disasm.h"
34#include "pvr_rogue_pds_encode.h"
35#include "util/log.h"
36#include "util/macros.h"
37
38#define H32(X) (uint32_t)((((X) >> 32U) & 0xFFFFFFFFUL))
39#define L32(X) (uint32_t)(((X)&0xFFFFFFFFUL))
40
41/*****************************************************************************
42 Macro definitions
43*****************************************************************************/
44
45#define PVR_PDS_DWORD_SHIFT 2
46
47#define PVR_PDS_CONSTANTS_BLOCK_BASE 0
48#define PVR_PDS_CONSTANTS_BLOCK_SIZE 128
49#define PVR_PDS_TEMPS_BLOCK_BASE 128
50#define PVR_PDS_TEMPS_BLOCK_SIZE 32
51
52#define PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE PVR_ROGUE_PDSINST_ST_COUNT4_MASK
53#define PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE PVR_ROGUE_PDSINST_LD_COUNT8_MASK
54
55/* Map PDS temp registers to the CDM values they contain Work-group IDs are only
56 * available in the coefficient sync task.
57 */
58#define PVR_PDS_CDM_WORK_GROUP_ID_X 0
59#define PVR_PDS_CDM_WORK_GROUP_ID_Y 1
60#define PVR_PDS_CDM_WORK_GROUP_ID_Z 2
61/* Local IDs are available in every task. */
62#define PVR_PDS_CDM_LOCAL_ID_X 0
63#define PVR_PDS_CDM_LOCAL_ID_YZ 1
64
65#define PVR_PDS_DOUTW_LOWER32 0x0
66#define PVR_PDS_DOUTW_UPPER32 0x1
67#define PVR_PDS_DOUTW_LOWER64 0x2
68#define PVR_PDS_DOUTW_LOWER128 0x3
69#define PVR_PDS_DOUTW_MAXMASK 0x4
70
71#define ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE 8U
72#define PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE (16U)
73
74/*****************************************************************************
75 Static variables
76*****************************************************************************/
77
78static const uint32_t dword_mask_const[PVR_PDS_DOUTW_MAXMASK] = {
79   PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER,
80   PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER,
81   PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64,
82   PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64
83};
84
85/* If has_slc_mcu_cache_control is enabled use cache_control_const[0], else use
86 * cache_control_const[1].
87 */
88static const uint32_t cache_control_const[2][2] = {
89   { PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_BYPASS,
90     PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED },
91   { 0, 0 }
92};
93
94/*****************************************************************************
95 Function definitions
96*****************************************************************************/
97
98uint64_t pvr_pds_encode_ld_src0(uint64_t dest,
99                                uint64_t count8,
100                                uint64_t src_add,
101                                bool cached,
102                                const struct pvr_device_info *dev_info)
103{
104   uint64_t encoded = 0;
105
106   if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
107      encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED
108                         : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS);
109   }
110
111   encoded |= ((src_add & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
112               << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
113   encoded |= ((count8 & PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
114               << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
115   encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED
116                      : PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_BYPASS);
117   encoded |= ((dest & PVR_ROGUE_PDSINST_REGS64TP_MASK)
118               << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
119
120   return encoded;
121}
122
123uint64_t pvr_pds_encode_st_src0(uint64_t src,
124                                uint64_t count4,
125                                uint64_t dst_add,
126                                bool write_through,
127                                const struct pvr_device_info *device_info)
128{
129   uint64_t encoded = 0;
130
131   if (device_info->features.has_slc_mcu_cache_controls) {
132      encoded |= (write_through
133                     ? PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH
134                     : PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_BACK);
135   }
136
137   encoded |= ((dst_add & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
138               << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
139   encoded |= ((count4 & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
140               << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
141   encoded |= (write_through ? PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH
142                             : PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_BACK);
143   encoded |= ((src & PVR_ROGUE_PDSINST_REGS32TP_MASK)
144               << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
145
146   return encoded;
147}
148
149static ALWAYS_INLINE uint32_t
150pvr_pds_encode_doutw_src1(uint32_t dest,
151                          uint32_t dword_mask,
152                          uint32_t flags,
153                          bool cached,
154                          const struct pvr_device_info *dev_info)
155{
156   assert(((dword_mask > PVR_PDS_DOUTW_LOWER64) && ((dest & 3) == 0)) ||
157          ((dword_mask == PVR_PDS_DOUTW_LOWER64) && ((dest & 1) == 0)) ||
158          (dword_mask < PVR_PDS_DOUTW_LOWER64));
159
160   uint32_t encoded =
161      (dest << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT);
162
163   encoded |= dword_mask_const[dword_mask];
164
165   encoded |= flags;
166
167   encoded |=
168      cache_control_const[PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) ? 0
169                                                                            : 1]
170                         [cached ? 1 : 0];
171   return encoded;
172}
173
174static ALWAYS_INLINE uint32_t pvr_pds_encode_doutw64(uint32_t cc,
175                                                     uint32_t end,
176                                                     uint32_t src1,
177                                                     uint32_t src0)
178{
179   return pvr_pds_inst_encode_dout(cc,
180                                   end,
181                                   src1,
182                                   src0,
183                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTW);
184}
185
186static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc,
187                                                   uint32_t end,
188                                                   uint32_t src0)
189{
190   return pvr_pds_inst_encode_dout(cc,
191                                   end,
192                                   0,
193                                   src0,
194                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTU);
195}
196
197static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_doutc(uint32_t cc,
198                                                        uint32_t end)
199{
200   return pvr_pds_inst_encode_dout(cc,
201                                   end,
202                                   0,
203                                   0,
204                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTC);
205}
206
207static ALWAYS_INLINE uint32_t pvr_pds_encode_doutd(uint32_t cc,
208                                                   uint32_t end,
209                                                   uint32_t src1,
210                                                   uint32_t src0)
211{
212   return pvr_pds_inst_encode_dout(cc,
213                                   end,
214                                   src1,
215                                   src0,
216                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTD);
217}
218
219static ALWAYS_INLINE uint32_t pvr_pds_encode_douti(uint32_t cc,
220                                                   uint32_t end,
221                                                   uint32_t src0)
222{
223   return pvr_pds_inst_encode_dout(cc,
224                                   end,
225                                   0,
226                                   src0,
227                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTI);
228}
229
230static ALWAYS_INLINE uint32_t pvr_pds_encode_bra(uint32_t srcc,
231                                                 uint32_t neg,
232                                                 uint32_t setc,
233                                                 int32_t relative_address)
234{
235   /* Address should be signed but API only allows unsigned value. */
236   return pvr_pds_inst_encode_bra(srcc, neg, setc, (uint32_t)relative_address);
237}
238
239/**
240 * Gets the next constant address and moves the next constant pointer along.
241 *
242 * \param next_constant Pointer to the next constant address.
243 * \param num_constants The number of constants required.
244 * \param count The number of constants allocated.
245 * \return The address of the next constant.
246 */
247static uint32_t pvr_pds_get_constants(uint32_t *next_constant,
248                                      uint32_t num_constants,
249                                      uint32_t *count)
250{
251   uint32_t constant;
252
253   /* Work out starting constant number. For even number of constants, start on
254    * a 64-bit boundary.
255    */
256   if (num_constants & 1)
257      constant = *next_constant;
258   else
259      constant = (*next_constant + 1) & ~1;
260
261   /* Update the count with the number of constants actually allocated. */
262   *count += constant + num_constants - *next_constant;
263
264   /* Move the next constant pointer. */
265   *next_constant = constant + num_constants;
266
267   assert((constant + num_constants) <= PVR_PDS_CONSTANTS_BLOCK_SIZE);
268
269   return constant;
270}
271
272/**
273 * Gets the next temp address and moves the next temp pointer along.
274 *
275 * \param next_temp Pointer to the next temp address.
276 * \param num_temps The number of temps required.
277 * \param count The number of temps allocated.
278 * \return The address of the next temp.
279 */
280static uint32_t
281pvr_pds_get_temps(uint32_t *next_temp, uint32_t num_temps, uint32_t *count)
282{
283   uint32_t temp;
284
285   /* Work out starting temp number. For even number of temps, start on a
286    * 64-bit boundary.
287    */
288   if (num_temps & 1)
289      temp = *next_temp;
290   else
291      temp = (*next_temp + 1) & ~1;
292
293   /* Update the count with the number of temps actually allocated. */
294   *count += temp + num_temps - *next_temp;
295
296   /* Move the next temp pointer. */
297   *next_temp = temp + num_temps;
298
299   assert((temp + num_temps) <=
300          (PVR_PDS_TEMPS_BLOCK_SIZE + PVR_PDS_TEMPS_BLOCK_BASE));
301
302   return temp;
303}
304
305/**
306 * Write a 32-bit constant indexed by the long range.
307 *
308 * \param data_block Pointer to data block to write to.
309 * \param index Index within the data to write to.
310 * \param dword The 32-bit constant to write.
311 */
312static void
313pvr_pds_write_constant32(uint32_t *data_block, uint32_t index, uint32_t dword0)
314{
315   /* Check range. */
316   assert(index <= (PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER -
317                    PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER));
318
319   data_block[index + 0] = dword0;
320
321   PVR_PDS_PRINT_DATA("WriteConstant32", (uint64_t)dword0, index);
322}
323
324/**
325 * Write a 64-bit constant indexed by the long range.
326 *
327 * \param data_block Pointer to data block to write to.
328 * \param index Index within the data to write to.
329 * \param dword0 Lower half of the 64 bit constant.
330 * \param dword1 Upper half of the 64 bit constant.
331 */
332static void pvr_pds_write_constant64(uint32_t *data_block,
333                                     uint32_t index,
334                                     uint32_t dword0,
335                                     uint32_t dword1)
336{
337   /* Has to be on 64 bit boundary. */
338   assert((index & 1) == 0);
339
340   /* Check range. */
341   assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
342                           PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
343
344   data_block[index + 0] = dword0;
345   data_block[index + 1] = dword1;
346
347   PVR_PDS_PRINT_DATA("WriteConstant64",
348                      ((uint64_t)dword0 << 32) | (uint64_t)dword1,
349                      index);
350}
351
352/**
353 * Write a 64-bit constant from a single wide word indexed by the long-range
354 * number.
355 *
356 * \param data_block Pointer to data block to write to.
357 * \param index Index within the data to write to.
358 * \param word The 64-bit constant to write.
359 */
360
361static void
362pvr_pds_write_wide_constant(uint32_t *data_block, uint32_t index, uint64_t word)
363{
364   /* Has to be on 64 bit boundary. */
365   assert((index & 1) == 0);
366
367   /* Check range. */
368   assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
369                           PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
370
371   data_block[index + 0] = L32(word);
372   data_block[index + 1] = H32(word);
373
374   PVR_PDS_PRINT_DATA("WriteWideConstant", word, index);
375}
376
377static void pvr_pds_write_dma_address(uint32_t *data_block,
378                                      uint32_t index,
379                                      uint64_t address,
380                                      bool coherent,
381                                      const struct pvr_device_info *dev_info)
382{
383   /* Has to be on 64 bit boundary. */
384   assert((index & 1) == 0);
385
386   if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
387      address |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
388
389   /* Check range. */
390   assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
391                           PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
392
393   data_block[index + 0] = L32(address);
394   data_block[index + 1] = H32(address);
395
396   PVR_PDS_PRINT_DATA("WriteDMAAddress", address, index);
397}
398
399/**
400 * External API to append a 64-bit constant to an existing data segment
401 * allocation.
402 *
403 * \param constants Pointer to start of data segment.
404 * \param constant_value Value to write to constant.
405 * \param data_size The number of constants allocated.
406 * \returns The address of the next constant.
407 */
408uint32_t pvr_pds_append_constant64(uint32_t *constants,
409                                   uint64_t constant_value,
410                                   uint32_t *data_size)
411{
412   /* Calculate next constant from current data size. */
413   uint32_t next_constant = *data_size;
414   uint32_t constant = pvr_pds_get_constants(&next_constant, 2, data_size);
415
416   /* Set the value. */
417   pvr_pds_write_wide_constant(constants, constant, constant_value);
418
419   return constant;
420}
421
422void pvr_pds_pixel_shader_sa_initialize(
423   struct pvr_pds_pixel_shader_sa_program *program)
424{
425   memset(program, 0, sizeof(*program));
426}
427
428/**
429 * Encode a DMA burst.
430 *
431 * \param dma_control DMA control words.
432 * \param dma_address DMA address.
433 * \param dest_offset Destination offset in the attribute.
434 * \param dma_size The size of the DMA in words.
435 * \param src_address Source address for the burst.
436 * \param dev_info PVR device info structure.
437 * \returns The number of DMA transfers required.
438 */
439
440uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
441                                  uint64_t *dma_address,
442                                  uint32_t dest_offset,
443                                  uint32_t dma_size,
444                                  uint64_t src_address,
445                                  const struct pvr_device_info *dev_info)
446{
447   /* Simplified for MS2. */
448
449   /* Force to 1 DMA. */
450   const uint32_t num_kicks = 1;
451
452   dma_control[0] = dma_size
453                    << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
454   dma_control[0] |= dest_offset
455                     << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
456
457   dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
458                     PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE;
459
460   dma_address[0] = src_address;
461   if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
462      dma_address[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
463   }
464
465   return num_kicks;
466}
467
468/* FIXME: use the csbgen interface and pvr_csb_pack.
469 * FIXME: use bool for phase_rate_change.
470 */
471/**
472 * Sets up the USC control words for a DOUTU.
473 *
474 * \param usc_task_control USC task control structure to be setup.
475 * \param execution_address USC execution virtual address.
476 * \param usc_temps Number of USC temps.
477 * \param sample_rate Sample rate for the DOUTU.
478 * \param phase_rate_change Phase rate change for the DOUTU.
479 */
480void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control,
481                         uint64_t execution_address,
482                         uint32_t usc_temps,
483                         uint32_t sample_rate,
484                         bool phase_rate_change)
485{
486   usc_task_control->src0 = UINT64_C(0);
487
488   /* Set the execution address. */
489   pvr_set_usc_execution_address64(&(usc_task_control->src0),
490                                   execution_address);
491
492   if (usc_temps > 0) {
493      /* Temps are allocated in blocks of 4 dwords. */
494      usc_temps =
495         DIV_ROUND_UP(usc_temps,
496                      PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSIZE);
497
498      /* Check for losing temps due to too many requested. */
499      assert((usc_temps & PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK) ==
500             usc_temps);
501
502      usc_task_control->src0 |=
503         ((uint64_t)(usc_temps &
504                     PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK))
505         << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_SHIFT;
506   }
507
508   if (sample_rate > 0) {
509      usc_task_control->src0 |=
510         ((uint64_t)sample_rate)
511         << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SHIFT;
512   }
513
514   if (phase_rate_change) {
515      usc_task_control->src0 |=
516         PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_EN;
517   }
518}
519
520/**
521 * Generates the PDS pixel event program.
522 *
523 * \param program Pointer to the PDS pixel event program.
524 * \param buffer Pointer to the buffer for the program.
525 * \param gen_mode Generate either a data segment or code segment.
526 * \param dev_info PVR device info structure.
527 * \returns Pointer to just beyond the buffer for the program.
528 */
529uint32_t *
530pvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program,
531                             uint32_t *restrict buffer,
532                             enum pvr_pds_generate_mode gen_mode,
533                             const struct pvr_device_info *dev_info)
534{
535   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
536   uint32_t *constants = buffer;
537
538   uint32_t data_size = 0;
539
540   /* Copy the DMA control words and USC task control words to constants, then
541    * arrange them so that the 64-bit words are together followed by the 32-bit
542    * words.
543    */
544   uint32_t control_constant =
545      pvr_pds_get_constants(&next_constant, 2, &data_size);
546   uint32_t emit_constant =
547      pvr_pds_get_constants(&next_constant,
548                            (2 * program->num_emit_word_pairs),
549                            &data_size);
550
551   uint32_t control_word_constant =
552      pvr_pds_get_constants(&next_constant,
553                            program->num_emit_word_pairs,
554                            &data_size);
555
556   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
557      /* Src0 for DOUTU. */
558      pvr_pds_write_wide_constant(buffer,
559                                  control_constant,
560                                  program->task_control.src0); /* DOUTU */
561      /* 64-bit Src0. */
562
563      /* Emit words for end of tile program. */
564      for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
565         pvr_pds_write_constant64(constants,
566                                  emit_constant + (2 * i),
567                                  program->emit_words[(2 * i) + 0],
568                                  program->emit_words[(2 * i) + 1]);
569      }
570
571      /* Control words. */
572      for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
573         uint32_t doutw = pvr_pds_encode_doutw_src1(
574            (2 * i),
575            PVR_PDS_DOUTW_LOWER64,
576            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
577            false,
578            dev_info);
579
580         if (i == (program->num_emit_word_pairs - 1))
581            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
582
583         pvr_pds_write_constant32(constants, control_word_constant + i, doutw);
584      }
585   }
586
587   else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
588      /* DOUTW the state into the shared register. */
589      for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
590         *buffer++ = pvr_pds_encode_doutw64(
591            /* cc */ 0,
592            /* END */ 0,
593            /* SRC1 */ (control_word_constant + i), /* DOUTW 32-bit Src1 */
594            /* SRC0 */ (emit_constant + (2 * i)) >> 1); /* DOUTW 64-bit Src0
595                                                         */
596      }
597
598      /* Kick the USC. */
599      *buffer++ = pvr_pds_encode_doutu(
600         /* cc */ 0,
601         /* END */ 1,
602         /* SRC0 */ control_constant >> 1);
603   }
604
605   uint32_t code_size = 1 + program->num_emit_word_pairs;
606
607   /* Save the data segment Pointer and size. */
608   program->data_segment = constants;
609   program->data_size = data_size;
610   program->code_size = code_size;
611
612   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
613      return (constants + next_constant);
614
615   if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
616      return buffer;
617
618   return NULL;
619}
620
621/**
622 * Checks if any of the vertex streams contains instance data.
623 *
624 * \param streams Streams contained in the vertex shader.
625 * \param num_streams Number of vertex streams.
626 * \returns true if one or more of the given vertex streams contains
627 *          instance data, otherwise false.
628 */
629static bool pvr_pds_vertex_streams_contains_instance_data(
630   const struct pvr_pds_vertex_stream *streams,
631   uint32_t num_streams)
632{
633   for (uint32_t i = 0; i < num_streams; i++) {
634      const struct pvr_pds_vertex_stream *vertex_stream = &streams[i];
635      if (vertex_stream->instance_data)
636         return true;
637   }
638
639   return false;
640}
641
642static uint32_t pvr_pds_get_bank_based_constants(uint32_t num_backs,
643                                                 uint32_t *next_constant,
644                                                 uint32_t num_constants,
645                                                 uint32_t *count)
646{
647   /* Allocate constant for PDS vertex shader where constant is divided into
648    * banks.
649    */
650   uint32_t constant;
651
652   assert(num_constants == 1 || num_constants == 2);
653
654   if (*next_constant >= (num_backs << 3))
655      return pvr_pds_get_constants(next_constant, num_constants, count);
656
657   if ((*next_constant % 8) == 0) {
658      constant = *next_constant;
659
660      if (num_constants == 1)
661         *next_constant += 1;
662      else
663         *next_constant += 8;
664   } else if (num_constants == 1) {
665      constant = *next_constant;
666      *next_constant += 7;
667   } else {
668      *next_constant += 7;
669      constant = *next_constant;
670
671      if (*next_constant >= (num_backs << 3)) {
672         *next_constant += 2;
673         *count += 2;
674      } else {
675         *next_constant += 8;
676      }
677   }
678   return constant;
679}
680
681/**
682 * Generates a PDS program to load USC vertex inputs based from one or more
683 * vertex buffers, each containing potentially multiple elements, and then a
684 * DOUTU to execute the USC.
685 *
686 * \param program Pointer to the description of the program which should be
687 *                generated.
688 * \param buffer Pointer to buffer that receives the output of this function.
689 *               Will either be the data segment or code segment depending on
690 *               gen_mode.
691 * \param gen_mode Which part to generate, either data segment or
692 *                 code segment. If PDS_GENERATE_SIZES is specified, nothing is
693 *                 written, but size information in program is updated.
694 * \param dev_info PVR device info structure.
695 * \returns Pointer to just beyond the buffer for the data - i.e the value
696 *          of the buffer after writing its contents.
697 */
698uint32_t *
699pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program,
700                      uint32_t *restrict buffer,
701                      enum pvr_pds_generate_mode gen_mode,
702                      const struct pvr_device_info *dev_info)
703{
704   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
705   uint32_t next_stream_constant;
706   uint32_t next_temp;
707   uint32_t usc_control_constant64;
708   uint32_t stride_constant32 = 0;
709   uint32_t dma_address_constant64 = 0;
710   uint32_t dma_control_constant64;
711   uint32_t multiplier_constant32 = 0;
712   uint32_t base_instance_const32 = 0;
713
714   uint32_t temp = 0;
715   uint32_t index_temp64 = 0;
716   uint32_t num_vertices_temp64 = 0;
717   uint32_t pre_index_temp = (uint32_t)(-1);
718   bool first_ddmadt = true;
719   uint32_t input_register0;
720   uint32_t input_register1;
721   uint32_t input_register2;
722
723   struct pvr_pds_vertex_stream *vertex_stream;
724   struct pvr_pds_vertex_element *vertex_element;
725   uint32_t shift_2s_comp;
726
727   uint32_t data_size = 0;
728   uint32_t code_size = 0;
729   uint32_t temps_used = 0;
730
731   bool direct_writes_needed = false;
732
733   uint32_t consts_size = 0;
734   uint32_t vertex_id_control_word_const32 = 0;
735   uint32_t instance_id_control_word_const32 = 0;
736   uint32_t instance_id_modifier_word_const32 = 0;
737   uint32_t geometry_id_control_word_const64 = 0;
738   uint32_t empty_dma_control_constant64 = 0;
739
740   bool any_instanced_stream =
741      pvr_pds_vertex_streams_contains_instance_data(program->streams,
742                                                    program->num_streams);
743
744   uint32_t base_instance_register = 0;
745   uint32_t ddmadt_enables = 0;
746
747   bool issue_empty_ddmad = false;
748   uint32_t last_stream_index = program->num_streams - 1;
749   bool current_p0 = false;
750   uint32_t skip_stream_flag = 0;
751
752   /* Generate the PDS vertex shader data. */
753
754#if defined(DEBUG)
755   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
756      for (uint32_t i = 0; i < program->data_size; i++)
757         buffer[i] = 0xDEADBEEF;
758   }
759#endif
760
761   /* Generate the PDS vertex shader program */
762   next_temp = PVR_PDS_TEMPS_BLOCK_BASE;
763   /* IR0 is in first 32-bit temp, temp[0].32, vertex_Index. */
764   input_register0 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
765   /* IR1 is in second 32-bit temp, temp[1].32, instance_ID. */
766   input_register1 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
767
768   if (program->iterate_remap_id)
769      input_register2 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
770   else
771      input_register2 = 0; /* Not used, but need to silence the compiler. */
772
773   /* Generate the PDS vertex shader code. The constants in the data block are
774    * arranged as follows:
775    *
776    * 64 bit bank 0        64 bit bank 1          64 bit bank 2    64 bit bank
777    * 3 Not used (tmps)    Stride | Multiplier    Address          Control
778    */
779
780   /* Find out how many constants are needed by streams. */
781   for (uint32_t stream = 0; stream < program->num_streams; stream++) {
782      pvr_pds_get_constants(&next_constant,
783                            8 * program->streams[stream].num_elements,
784                            &consts_size);
785   }
786
787   /* If there are no vertex streams allocate the first bank for USC Code
788    * Address.
789    */
790   if (consts_size == 0)
791      pvr_pds_get_constants(&next_constant, 2, &consts_size);
792   else
793      next_constant = 8;
794
795   direct_writes_needed = program->iterate_instance_id ||
796                          program->iterate_vtx_id || program->iterate_remap_id;
797
798   if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
799      /* Evaluate what config of DDMAD should be used for each stream. */
800      for (uint32_t stream = 0; stream < program->num_streams; stream++) {
801         vertex_stream = &program->streams[stream];
802
803         if (vertex_stream->use_ddmadt) {
804            ddmadt_enables |= (1 << stream);
805
806            /* The condition for index value is:
807             * index * stride + size <= bufferSize (all in unit of byte)
808             */
809            if (vertex_stream->stride == 0) {
810               if (vertex_stream->elements[0].size <=
811                   vertex_stream->buffer_size_in_bytes) {
812                  /* index can be any value -> no need to use DDMADT. */
813                  ddmadt_enables &= (~(1 << stream));
814               } else {
815                  /* No index works -> no need to issue DDMAD instruction.
816                   */
817                  skip_stream_flag |= (1 << stream);
818               }
819            } else {
820               /* index * stride + size <= bufferSize
821                *
822                * can be converted to:
823                * index <= (bufferSize - size) / stride
824                *
825                * where maximum index is:
826                * integer((bufferSize - size) / stride).
827                */
828               if (vertex_stream->buffer_size_in_bytes <
829                   vertex_stream->elements[0].size) {
830                  /* No index works -> no need to issue DDMAD instruction.
831                   */
832                  skip_stream_flag |= (1 << stream);
833               } else {
834                  uint32_t max_index = (vertex_stream->buffer_size_in_bytes -
835                                        vertex_stream->elements[0].size) /
836                                       vertex_stream->stride;
837                  if (max_index == 0xFFFFFFFFu) {
838                     /* No need to use DDMADT as all possible indices can
839                      * pass the test.
840                      */
841                     ddmadt_enables &= (~(1 << stream));
842                  } else {
843                     /* In this case, test condition can be changed to
844                      * index < max_index + 1.
845                      */
846                     program->streams[stream].num_vertices =
847                        pvr_pds_get_bank_based_constants(program->num_streams,
848                                                         &next_constant,
849                                                         1,
850                                                         &consts_size);
851
852                     if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
853                        pvr_pds_write_constant32(
854                           buffer,
855                           program->streams[stream].num_vertices,
856                           max_index + 1);
857                     }
858                  }
859               }
860            }
861         }
862
863         if ((skip_stream_flag & (1 << stream)) == 0) {
864            issue_empty_ddmad = (ddmadt_enables & (1 << stream)) != 0;
865            last_stream_index = stream;
866         }
867      }
868   } else {
869      if (program->num_streams > 0 &&
870          program->streams[program->num_streams - 1].use_ddmadt) {
871         issue_empty_ddmad = true;
872      }
873   }
874
875   if (direct_writes_needed)
876      issue_empty_ddmad = false;
877
878   if (issue_empty_ddmad) {
879      /* An empty DMA control const (DMA size = 0) is required in case the
880       * last DDMADD is predicated out and last flag does not have any usage.
881       */
882      empty_dma_control_constant64 =
883         pvr_pds_get_bank_based_constants(program->num_streams,
884                                          &next_constant,
885                                          2,
886                                          &consts_size);
887   }
888
889   /* Assign constants for non stream or base instance if there is any
890    * instanced stream.
891    */
892   if (direct_writes_needed || any_instanced_stream ||
893       program->instance_ID_modifier) {
894      if (program->iterate_vtx_id) {
895         vertex_id_control_word_const32 =
896            pvr_pds_get_bank_based_constants(program->num_streams,
897                                             &next_constant,
898                                             1,
899                                             &consts_size);
900      }
901
902      if (program->iterate_instance_id || program->instance_ID_modifier) {
903         if (program->instance_ID_modifier == 0) {
904            instance_id_control_word_const32 =
905               pvr_pds_get_bank_based_constants(program->num_streams,
906                                                &next_constant,
907                                                1,
908                                                &consts_size);
909         } else {
910            instance_id_modifier_word_const32 =
911               pvr_pds_get_bank_based_constants(program->num_streams,
912                                                &next_constant,
913                                                1,
914                                                &consts_size);
915            if ((instance_id_modifier_word_const32 % 2) == 0) {
916               instance_id_control_word_const32 =
917                  pvr_pds_get_bank_based_constants(program->num_streams,
918                                                   &next_constant,
919                                                   1,
920                                                   &consts_size);
921            } else {
922               instance_id_control_word_const32 =
923                  instance_id_modifier_word_const32;
924               instance_id_modifier_word_const32 =
925                  pvr_pds_get_bank_based_constants(program->num_streams,
926                                                   &next_constant,
927                                                   1,
928                                                   &consts_size);
929            }
930         }
931      }
932
933      if (program->base_instance != 0) {
934         base_instance_const32 =
935            pvr_pds_get_bank_based_constants(program->num_streams,
936                                             &next_constant,
937                                             1,
938                                             &consts_size);
939      }
940
941      if (program->iterate_remap_id) {
942         geometry_id_control_word_const64 =
943            pvr_pds_get_bank_based_constants(program->num_streams,
944                                             &next_constant,
945                                             2,
946                                             &consts_size);
947      }
948   }
949
950   if (program->instance_ID_modifier != 0) {
951      /* This instanceID modifier is used when a draw array instanced call
952       * sourcing from client data cannot fit into vertex buffer and needs to
953       * be broken down into several draw calls.
954       */
955
956      code_size += 1;
957
958      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
959         pvr_pds_write_constant32(buffer,
960                                  instance_id_modifier_word_const32,
961                                  program->instance_ID_modifier);
962      } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
963         *buffer++ = pvr_pds_inst_encode_add32(
964            /* cc */ 0x0,
965            /* ALUM */ 0, /* Unsigned */
966            /* SNA */ 0, /* Add */
967            /* SRC0 32b */ instance_id_modifier_word_const32,
968            /* SRC1 32b */ input_register1,
969            /* DST 32b */ input_register1);
970      }
971   }
972
973   /* Adjust instanceID if necessary. */
974   if (any_instanced_stream || program->iterate_instance_id) {
975      if (program->base_instance != 0) {
976         assert(!program->draw_indirect);
977
978         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
979            pvr_pds_write_constant32(buffer,
980                                     base_instance_const32,
981                                     program->base_instance);
982         }
983
984         base_instance_register = base_instance_const32;
985      }
986
987      if (program->draw_indirect) {
988         assert((program->instance_ID_modifier == 0) &&
989                (program->base_instance == 0));
990
991         base_instance_register = PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER + 1;
992      }
993   }
994
995   next_constant = next_stream_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
996   usc_control_constant64 =
997      pvr_pds_get_constants(&next_stream_constant, 2, &data_size);
998
999   for (uint32_t stream = 0; stream < program->num_streams; stream++) {
1000      bool instance_data_with_base_instance;
1001
1002      if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
1003          ((skip_stream_flag & (1 << stream)) != 0)) {
1004         continue;
1005      }
1006
1007      vertex_stream = &program->streams[stream];
1008
1009      instance_data_with_base_instance =
1010         ((vertex_stream->instance_data) &&
1011          ((program->base_instance > 0) || (program->draw_indirect)));
1012
1013      /* Get all 8 32-bit constants at once, only 6 for first stream due to
1014       * USC constants.
1015       */
1016      if (stream == 0) {
1017         stride_constant32 =
1018            pvr_pds_get_constants(&next_stream_constant, 6, &data_size);
1019      } else {
1020         next_constant =
1021            pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
1022
1023         /* Skip bank 0. */
1024         stride_constant32 = next_constant + 2;
1025      }
1026
1027      multiplier_constant32 = stride_constant32 + 1;
1028
1029      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1030         pvr_pds_write_constant32(buffer,
1031                                  stride_constant32,
1032                                  vertex_stream->stride);
1033
1034         /* Vertex stream frequency multiplier. */
1035         if (vertex_stream->multiplier)
1036            pvr_pds_write_constant32(buffer,
1037                                     multiplier_constant32,
1038                                     vertex_stream->multiplier);
1039      }
1040
1041      /* Update the code size count and temps count for the above code
1042       * segment.
1043       */
1044      if (vertex_stream->current_state) {
1045         code_size += 1;
1046         temp = pvr_pds_get_temps(&next_temp, 1, &temps_used); /* 32-bit */
1047      } else {
1048         unsigned int num_temps_required = 0;
1049
1050         if (vertex_stream->multiplier) {
1051            num_temps_required += 2;
1052            code_size += 3;
1053
1054            if (vertex_stream->shift) {
1055               code_size += 1;
1056
1057               if ((int32_t)vertex_stream->shift > 0)
1058                  code_size += 1;
1059            }
1060         } else if (vertex_stream->shift) {
1061            code_size += 1;
1062            num_temps_required += 1;
1063         } else if (instance_data_with_base_instance) {
1064            num_temps_required += 1;
1065         }
1066
1067         if (num_temps_required != 0) {
1068            temp = pvr_pds_get_temps(&next_temp,
1069                                     num_temps_required,
1070                                     &temps_used); /* 64-bit */
1071         } else {
1072            temp = vertex_stream->instance_data ? input_register1
1073                                                : input_register0;
1074         }
1075
1076         if (instance_data_with_base_instance)
1077            code_size += 1;
1078      }
1079
1080      /* The real code segment. */
1081      if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1082         /* If it's current state stream, then index = 0 always. */
1083         if (vertex_stream->current_state) {
1084            /* Put zero in temp. */
1085            *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
1086         } else if (vertex_stream->multiplier) {
1087            /* old: Iout = (Iin * (Multiplier+2^24)) >> (Shift+24)
1088             * new: Iout = (Iin * Multiplier) >> (shift+31)
1089             */
1090
1091            /* Put zero in temp. Need zero for add part of the following
1092             * MAD. MAD source is 64 bit, so need two LIMMs.
1093             */
1094            *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
1095            /* Put zero in temp. Need zero for add part of the following
1096             * MAD.
1097             */
1098            *buffer++ = pvr_pds_inst_encode_limm(0, temp + 1, 0, 0);
1099
1100            /* old: (Iin * (Multiplier+2^24))
1101             * new: (Iin * Multiplier)
1102             */
1103            *buffer++ = pvr_rogue_inst_encode_mad(
1104               0, /* Sign of add is positive. */
1105               0, /* Unsigned ALU mode */
1106               0, /* Unconditional */
1107               multiplier_constant32,
1108               vertex_stream->instance_data ? input_register1 : input_register0,
1109               temp / 2,
1110               temp / 2);
1111
1112            if (vertex_stream->shift) {
1113               int32_t shift = (int32_t)vertex_stream->shift;
1114
1115               /* new: >> (shift + 31) */
1116               shift += 31;
1117               shift *= -1;
1118
1119               if (shift < -31) {
1120                  /* >> (31) */
1121                  shift_2s_comp = 0xFFFE1;
1122                  *buffer++ = pvr_pds_inst_encode_stflp64(
1123                     /* cc */ 0,
1124                     /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1125                     /* IM */ 1, /*  enable immediate */
1126                     /* SRC0 */ temp / 2,
1127                     /* SRC1 */ input_register0, /* This won't be used in
1128                                                  * a shift operation.
1129                                                  */
1130                     /* SRC2 (Shift) */ shift_2s_comp,
1131                     /* DST */ temp / 2);
1132                  shift += 31;
1133               }
1134
1135               /* old: >> (Shift+24)
1136                * new: >> (shift + 31)
1137                */
1138               shift_2s_comp = *((uint32_t *)&shift);
1139               *buffer++ = pvr_pds_inst_encode_stflp64(
1140                  /* cc */ 0,
1141                  /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1142                  /* IM */ 1, /*enable immediate */
1143                  /* SRC0 */ temp / 2,
1144                  /* SRC1 */ input_register0, /* This won't be used in
1145                                               * a shift operation.
1146                                               */
1147                  /* SRC2 (Shift) */ shift_2s_comp,
1148                  /* DST */ temp / 2);
1149            }
1150
1151            if (instance_data_with_base_instance) {
1152               *buffer++ =
1153                  pvr_pds_inst_encode_add32(0, /* cc */
1154                                            0, /* ALNUM */
1155                                            0, /* SNA */
1156                                            base_instance_register, /* src0
1157                                                                     */
1158                                            temp, /* src1 */
1159                                            temp /* dst */
1160                  );
1161            }
1162         } else { /* NOT vertex_stream->multiplier */
1163            if (vertex_stream->shift) {
1164               /* Shift Index/InstanceNum Right by shift bits. Put result
1165                * in a Temp.
1166                */
1167
1168               /* 2's complement of shift as this will be a right shift. */
1169               shift_2s_comp = ~(vertex_stream->shift) + 1;
1170
1171               *buffer++ = pvr_pds_inst_encode_stflp32(
1172                  /* IM */ 1, /*  enable immediate. */
1173                  /* cc */ 0,
1174                  /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1175                  /* SRC0 */ vertex_stream->instance_data ? input_register1
1176                                                          : input_register0,
1177                  /* SRC1 */ input_register0, /* This won't be used in
1178                                               * a shift operation.
1179                                               */
1180                  /* SRC2 (Shift) */ shift_2s_comp,
1181                  /* DST */ temp);
1182
1183               if (instance_data_with_base_instance) {
1184                  *buffer++ =
1185                     pvr_pds_inst_encode_add32(0, /* cc */
1186                                               0, /* ALNUM */
1187                                               0, /* SNA */
1188                                               base_instance_register, /* src0
1189                                                                        */
1190                                               temp, /* src1 */
1191                                               temp /* dst */
1192                     );
1193               }
1194            } else {
1195               if (instance_data_with_base_instance) {
1196                  *buffer++ =
1197                     pvr_pds_inst_encode_add32(0, /* cc */
1198                                               0, /* ALNUM */
1199                                               0, /* SNA */
1200                                               base_instance_register, /* src0
1201                                                                        */
1202                                               input_register1, /* src1 */
1203                                               temp /* dst */
1204                     );
1205               } else {
1206                  /* If the shift instruction doesn't happen, use the IR
1207                   * directly into the following MAD.
1208                   */
1209                  temp = vertex_stream->instance_data ? input_register1
1210                                                      : input_register0;
1211               }
1212            }
1213         }
1214      }
1215
1216      if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1217         if (vertex_stream->use_ddmadt)
1218            ddmadt_enables |= (1 << stream);
1219      } else {
1220         if ((ddmadt_enables & (1 << stream)) != 0) {
1221            /* Emulate what DDMADT does for range checking. */
1222            if (first_ddmadt) {
1223               /* Get an 64 bits temp such that cmp current index with
1224                * allowed vertex number can work.
1225                */
1226               index_temp64 =
1227                  pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
1228                                                                  */
1229               num_vertices_temp64 =
1230                  pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
1231                                                                  */
1232
1233               index_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
1234               num_vertices_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
1235
1236               code_size += 3;
1237               current_p0 = true;
1238            }
1239
1240            code_size += (temp == pre_index_temp ? 1 : 2);
1241
1242            if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1243               if (first_ddmadt) {
1244                  /* Set predicate to be P0. */
1245                  *buffer++ = pvr_pds_encode_bra(
1246                     PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
1247                                                        */
1248                     0, /* Neg */
1249                     PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETCC
1250                                                      */
1251                     1); /* Addr */
1252
1253                  *buffer++ =
1254                     pvr_pds_inst_encode_limm(0, index_temp64 + 1, 0, 0);
1255                  *buffer++ =
1256                     pvr_pds_inst_encode_limm(0, num_vertices_temp64 + 1, 0, 0);
1257               }
1258
1259               if (temp != pre_index_temp) {
1260                  *buffer++ = pvr_pds_inst_encode_stflp32(
1261                     /* IM */ 1, /*  enable immediate. */
1262                     /* cc */ 0,
1263                     /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1264                     /* SRC0 */ temp - PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER,
1265                     /* SRC1 */ 0,
1266                     /* SRC2 (Shift) */ 0,
1267                     /* DST */ index_temp64);
1268               }
1269
1270               *buffer++ = pvr_pds_inst_encode_stflp32(
1271                  /* IM */ 1, /*  enable immediate. */
1272                  /* cc */ 0,
1273                  /* LOP */ PVR_ROGUE_PDSINST_LOP_OR,
1274                  /* SRC0 */ num_vertices_temp64 + 1,
1275                  /* SRC1 */ vertex_stream->num_vertices,
1276                  /* SRC2 (Shift) */ 0,
1277                  /* DST */ num_vertices_temp64);
1278            }
1279
1280            first_ddmadt = false;
1281
1282            pre_index_temp = temp;
1283         }
1284      }
1285
1286      /* Process the elements in the stream. */
1287      for (uint32_t element = 0; element < vertex_stream->num_elements;
1288           element++) {
1289         bool terminate = false;
1290
1291         vertex_element = &vertex_stream->elements[element];
1292         /* Check if last DDMAD needs terminate or not. */
1293         if ((element == (vertex_stream->num_elements - 1)) &&
1294             (stream == last_stream_index)) {
1295            terminate = !issue_empty_ddmad && !direct_writes_needed;
1296         }
1297
1298         /* Get a new set of constants for this element. */
1299         if (element) {
1300            /* Get all 8 32 bit constants at once. */
1301            next_constant =
1302               pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
1303         }
1304
1305         dma_address_constant64 = next_constant + 4;
1306         dma_control_constant64 = dma_address_constant64 + 2;
1307
1308         if (vertex_element->component_size == 0) {
1309            /* Standard DMA.
1310             *
1311             * Write the DMA transfer control words into the PDS data
1312             * section.
1313             *
1314             * DMA Address is 40-bit.
1315             */
1316
1317            if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1318               uint32_t dma_control_word;
1319               uint64_t dma_control_word64 = 0;
1320               uint32_t dma_size;
1321
1322               /* Write the address to the constant. */
1323               pvr_pds_write_dma_address(buffer,
1324                                         dma_address_constant64,
1325                                         vertex_stream->address +
1326                                            (uint64_t)vertex_element->offset,
1327                                         false,
1328                                         dev_info);
1329               {
1330                  if (program->stream_patch_offsets) {
1331                     program
1332                        ->stream_patch_offsets[program->num_stream_patches++] =
1333                        (stream << 16) | (dma_address_constant64 >> 1);
1334                  }
1335               }
1336
1337               /* Size is in bytes - round up to nearest 32 bit word. */
1338               dma_size =
1339                  (vertex_element->size + (1 << PVR_PDS_DWORD_SHIFT) - 1) >>
1340                  PVR_PDS_DWORD_SHIFT;
1341
1342               assert(dma_size <= PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_UPPER);
1343
1344               /* Set up the dma transfer control word. */
1345               dma_control_word =
1346                  dma_size << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1347
1348               dma_control_word |=
1349                  vertex_element->reg
1350                  << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1351
1352               dma_control_word |=
1353                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1354                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
1355
1356               if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1357                  if ((ddmadt_enables & (1 << stream)) != 0) {
1358                     assert(
1359                        ((((uint64_t)vertex_stream->buffer_size_in_bytes
1360                           << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
1361                          ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK) >>
1362                         PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) ==
1363                        (uint64_t)vertex_stream->buffer_size_in_bytes);
1364                     dma_control_word64 =
1365                        (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_EN |
1366                         (((uint64_t)vertex_stream->buffer_size_in_bytes
1367                           << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
1368                          ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK));
1369                  }
1370               }
1371               /* If this is the last dma then also set the last flag. */
1372               if (terminate) {
1373                  dma_control_word |=
1374                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1375               }
1376
1377               /* Write the 32-Bit SRC3 word to a 64-bit constant as per
1378                * spec.
1379                */
1380               pvr_pds_write_wide_constant(buffer,
1381                                           dma_control_constant64,
1382                                           dma_control_word64 |
1383                                              (uint64_t)dma_control_word);
1384            }
1385
1386            if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1387               if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1388                  if ((ddmadt_enables & (1 << stream)) != 0) {
1389                     *buffer++ = pvr_pds_inst_encode_cmp(
1390                        0, /* cc enable */
1391                        PVR_ROGUE_PDSINST_COP_LT, /* Operation */
1392                        index_temp64 >> 1, /* SRC0 (REGS64TP) */
1393                        (num_vertices_temp64 >> 1) +
1394                           PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER); /* SRC1
1395                                                                      (REGS64)
1396                                                                    */
1397                  }
1398               }
1399               /* Multiply by the vertex stream stride and add the base
1400                * followed by a DOUTD.
1401                *
1402                * dmad32 (C0 * T0) + C1, C2
1403                * src0 = stride  src1 = index  src2 = baseaddr src3 =
1404                * doutd part
1405                */
1406
1407               uint32_t cc;
1408               if (PVR_HAS_FEATURE(dev_info, pds_ddmadt))
1409                  cc = 0;
1410               else
1411                  cc = (ddmadt_enables & (1 << stream)) != 0 ? 1 : 0;
1412
1413               *buffer++ = pvr_pds_inst_encode_ddmad(
1414                  /* cc */ cc,
1415                  /* END */ 0,
1416                  /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1417                  /* SRC1 */ temp, /* Index 32-bit*/
1418                  /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1419                                                                  * Address
1420                                                                  * +
1421                                                                  * Offset
1422                                                                  */
1423                  /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
1424                                                                 * Transfer
1425                                                                 * Control
1426                                                                 * Word.
1427                                                                 */
1428               );
1429            }
1430
1431            if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
1432                ((ddmadt_enables & (1 << stream)) != 0)) {
1433               code_size += 1;
1434            }
1435            code_size += 1;
1436         } else {
1437            /* Repeat DMA.
1438             *
1439             * Write the DMA transfer control words into the PDS data
1440             * section.
1441             *
1442             * DMA address is 40-bit.
1443             */
1444
1445            if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1446               uint32_t dma_control_word;
1447
1448               /* Write the address to the constant. */
1449               pvr_pds_write_dma_address(buffer,
1450                                         dma_address_constant64,
1451                                         vertex_stream->address +
1452                                            (uint64_t)vertex_element->offset,
1453                                         false,
1454                                         dev_info);
1455
1456               /* Set up the DMA transfer control word. */
1457               dma_control_word =
1458                  vertex_element->size
1459                  << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1460
1461               dma_control_word |=
1462                  vertex_element->reg
1463                  << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1464
1465               switch (vertex_element->component_size) {
1466               case 4: {
1467                  dma_control_word |=
1468                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_FOUR;
1469                  break;
1470               }
1471               case 3: {
1472                  dma_control_word |=
1473                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_THREE;
1474                  break;
1475               }
1476               case 2: {
1477                  dma_control_word |=
1478                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_TWO;
1479                  break;
1480               }
1481               default: {
1482                  dma_control_word |=
1483                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_ONE;
1484                  break;
1485               }
1486               }
1487
1488               dma_control_word |=
1489                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_REPEAT;
1490
1491               dma_control_word |=
1492                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1493                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
1494
1495               /* If this is the last dma then also set the last flag. */
1496               if (terminate) {
1497                  dma_control_word |=
1498                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1499               }
1500
1501               /* Write the 32-Bit SRC3 word to a 64-bit constant as per
1502                * spec.
1503                */
1504               pvr_pds_write_wide_constant(buffer,
1505                                           dma_control_constant64,
1506                                           (uint64_t)dma_control_word);
1507            }
1508
1509            if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1510               /* Multiply by the vertex stream stride and add the base
1511                * followed by a DOUTD.
1512                *
1513                * dmad32 (C0 * T0) + C1, C2
1514                * src0 = stride  src1 = index  src2 = baseaddr src3 =
1515                * doutd part
1516                */
1517               *buffer++ = pvr_pds_inst_encode_ddmad(
1518                  /* cc */ 0,
1519                  /* END */ 0,
1520                  /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1521                  /* SRC1 */ temp, /* Index 32-bit*/
1522                  /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1523                                                                  * Address
1524                                                                  * +
1525                                                                  * Offset.
1526                                                                  */
1527                  /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
1528                                                                 * Transfer
1529                                                                 * Control
1530                                                                 * Word.
1531                                                                 */
1532               );
1533            }
1534
1535            code_size += 1;
1536         } /* End of repeat DMA. */
1537      } /* Element loop */
1538   } /* Stream loop */
1539
1540   if (issue_empty_ddmad) {
1541      /* Issue an empty last DDMAD, always executed. */
1542      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1543         pvr_pds_write_wide_constant(
1544            buffer,
1545            empty_dma_control_constant64,
1546            PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN);
1547      }
1548
1549      code_size += 1;
1550
1551      if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1552         *buffer++ = pvr_pds_inst_encode_ddmad(
1553            /* cc */ 0,
1554            /* END */ 0,
1555            /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1556            /* SRC1 */ temp, /* Index 32-bit*/
1557            /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1558                                                            *Address +
1559                                                            *Offset.
1560                                                            */
1561            /* SRC3 64-bit */ empty_dma_control_constant64 >> 1 /* DMA
1562                                                                 * Transfer
1563                                                                 * Control
1564                                                                 * Word.
1565                                                                 */
1566         );
1567      }
1568   }
1569
1570   if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1571      if (current_p0) {
1572         code_size += 1;
1573
1574         if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1575            /* Revert predicate back to IF0 which is required by DOUTU. */
1576            *buffer++ =
1577               pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
1578                                                                     */
1579                                  0, /* Neg */
1580                                  PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC
1581                                                                    */
1582                                  1); /* Addr */
1583         }
1584      }
1585   }
1586   /* Send VertexID if requested. */
1587   if (program->iterate_vtx_id) {
1588      if (program->draw_indirect) {
1589         if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1590            *buffer++ = pvr_pds_inst_encode_add32(
1591               /* cc */ 0x0,
1592               /* ALUM */ 0, /* Unsigned */
1593               /* SNA */ 1, /* Minus */
1594               /* SRC0 32b */ input_register0, /* vertexID */
1595               /* SRC1 32b */ PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER, /* base
1596                                                                       * vertexID.
1597                                                                       */
1598               /* DST 32b */ input_register0);
1599         }
1600
1601         code_size += 1;
1602      }
1603
1604      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1605         uint32_t doutw = pvr_pds_encode_doutw_src1(
1606            program->vtx_id_register,
1607            PVR_PDS_DOUTW_LOWER32,
1608            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1609            false,
1610            dev_info);
1611
1612         if (!program->iterate_instance_id && !program->iterate_remap_id)
1613            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1614
1615         pvr_pds_write_constant32(buffer,
1616                                  vertex_id_control_word_const32,
1617                                  doutw);
1618      } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1619         *buffer++ = pvr_pds_encode_doutw64(
1620            /* cc */ 0,
1621            /* END */ 0,
1622            /* SRC1 */ vertex_id_control_word_const32, /* DOUTW 32-bit Src1
1623                                                        */
1624            /* SRC0 */ input_register0 >> 1); /* DOUTW 64-bit Src0 */
1625      }
1626
1627      code_size += 1;
1628   }
1629
1630   /* Send InstanceID if requested. */
1631   if (program->iterate_instance_id) {
1632      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1633         uint32_t doutw = pvr_pds_encode_doutw_src1(
1634            program->instance_id_register,
1635            PVR_PDS_DOUTW_UPPER32,
1636            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1637            true,
1638            dev_info);
1639
1640         if (!program->iterate_remap_id)
1641            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1642
1643         pvr_pds_write_constant32(buffer,
1644                                  instance_id_control_word_const32,
1645                                  doutw);
1646      } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1647         *buffer++ = pvr_pds_encode_doutw64(
1648            /* cc */ 0,
1649            /* END */ 0,
1650            /* SRC1 */ instance_id_control_word_const32, /* DOUTW 32-bit Src1 */
1651            /* SRC0 */ input_register1 >> 1); /* DOUTW 64-bit Src0 */
1652      }
1653
1654      code_size += 1;
1655   }
1656
1657   /* Send remapped index number to vi0. */
1658   if (program->iterate_remap_id) {
1659      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1660         uint32_t doutw = pvr_pds_encode_doutw_src1(
1661            0 /* vi0 */,
1662            PVR_PDS_DOUTW_LOWER32,
1663            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
1664               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
1665            false,
1666            dev_info);
1667
1668         pvr_pds_write_constant64(buffer,
1669                                  geometry_id_control_word_const64,
1670                                  doutw,
1671                                  0);
1672      } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1673         *buffer++ = pvr_pds_encode_doutw64(
1674            /* cc */ 0,
1675            /* END */ 0,
1676            /* SRC1 */ geometry_id_control_word_const64, /* DOUTW 32-bit
1677                                                          * Src1
1678                                                          */
1679            /* SRC0 */ input_register2 >> 1); /* DOUTW 64-bit Src0 */
1680      }
1681
1682      code_size += 1;
1683   }
1684
1685   /* Copy the USC task control words to constants. */
1686   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1687      pvr_pds_write_wide_constant(buffer,
1688                                  usc_control_constant64,
1689                                  program->usc_task_control.src0); /* 64-bit
1690                                                                    * Src0
1691                                                                    */
1692      if (program->stream_patch_offsets) {
1693         /* USC TaskControl is always the first patch. */
1694         program->stream_patch_offsets[0] = usc_control_constant64 >> 1;
1695      }
1696   }
1697
1698   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1699      /* Conditionally (if last in task) issue the task to the USC
1700       * (if0) DOUTU src1=USC Code Base address, src2=DOUTU word 2.
1701       */
1702
1703      *buffer++ = pvr_pds_encode_doutu(
1704         /* cc */ 1,
1705         /* END */ 1,
1706         /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0 */
1707
1708      /* End the program if the Dout did not already end it. */
1709      *buffer++ = pvr_pds_inst_encode_halt(0);
1710   }
1711
1712   code_size += 2;
1713
1714   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1715      /* Set the data segment pointer and ensure we return 1 past the buffer
1716       * ptr.
1717       */
1718      program->data_segment = buffer;
1719
1720      buffer += consts_size;
1721   }
1722
1723   program->temps_used = temps_used;
1724   program->data_size = consts_size;
1725   program->code_size = code_size;
1726   program->ddmadt_enables = ddmadt_enables;
1727   if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt))
1728      program->skip_stream_flag = skip_stream_flag;
1729
1730   return buffer;
1731}
1732
1733/**
1734 * Generates a PDS program to load USC compute shader global/local/workgroup
1735 * sizes/ids and then a DOUTU to execute the USC.
1736 *
1737 * \param program Pointer to description of the program that should be
1738 *                generated.
1739 * \param buffer Pointer to buffer that receives the output of this function.
1740 *               This will be either the data segment, or the code depending on
1741 *               gen_mode.
1742 * \param gen_mode Which part to generate, either data segment or code segment.
1743 *                 If PDS_GENERATE_SIZES is specified, nothing is written, but
1744 *                 size information in program is updated.
1745 * \param dev_info PVR device info struct.
1746 * \returns Pointer to just beyond the buffer for the data - i.e. the value of
1747 *          the buffer after writing its contents.
1748 */
1749uint32_t *
1750pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
1751                       uint32_t *restrict buffer,
1752                       enum pvr_pds_generate_mode gen_mode,
1753                       const struct pvr_device_info *dev_info)
1754{
1755   uint32_t usc_control_constant64;
1756   uint32_t usc_control_constant64_coeff_update = 0;
1757   uint32_t zero_constant64 = 0;
1758
1759   uint32_t data_size = 0;
1760   uint32_t code_size = 0;
1761   uint32_t temps_used = 0;
1762   uint32_t doutw = 0;
1763
1764   uint32_t barrier_ctrl_word = 0;
1765   uint32_t barrier_ctrl_word2 = 0;
1766
1767   /* Even though there are 3 IDs for local and global we only need max one
1768    * DOUTW for local, and two for global.
1769    */
1770   uint32_t work_group_id_ctrl_words[2] = { 0 };
1771   uint32_t local_id_ctrl_word = 0;
1772   uint32_t local_input_register;
1773
1774   /* For the constant value to load into ptemp (SW fence). */
1775   uint64_t predicate_ld_src0_constant = 0;
1776   uint32_t cond_render_negate_constant = 0;
1777
1778   uint32_t cond_render_pred_temp;
1779   uint32_t cond_render_negate_temp;
1780
1781   /* 2x 64 bit registers that will mask out the Predicate load. */
1782   uint32_t cond_render_pred_mask_constant = 0;
1783
1784#if defined(DEBUG)
1785   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1786      for (uint32_t j = 0; j < program->data_size; j++)
1787         buffer[j] = 0xDEADBEEF;
1788   }
1789#endif
1790
1791   /* All the compute input registers are in temps. */
1792   temps_used += PVR_PDS_NUM_COMPUTE_INPUT_REGS;
1793
1794   uint32_t next_temp = PVR_PDS_TEMPS_BLOCK_BASE + temps_used;
1795
1796   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
1797
1798   if (program->kick_usc) {
1799      /* Copy the USC task control words to constants. */
1800      usc_control_constant64 =
1801         pvr_pds_get_constants(&next_constant, 2, &data_size);
1802   }
1803
1804   if (program->has_coefficient_update_task) {
1805      usc_control_constant64_coeff_update =
1806         pvr_pds_get_constants(&next_constant, 2, &data_size);
1807   }
1808
1809   if (program->conditional_render) {
1810      predicate_ld_src0_constant =
1811         pvr_pds_get_constants(&next_constant, 2, &data_size);
1812      cond_render_negate_constant =
1813         pvr_pds_get_constants(&next_constant, 2, &data_size);
1814      cond_render_pred_mask_constant =
1815         pvr_pds_get_constants(&next_constant, 4, &data_size);
1816
1817      /* LD will load a 64 bit value. */
1818      cond_render_pred_temp = pvr_pds_get_temps(&next_temp, 4, &temps_used);
1819      cond_render_negate_temp = pvr_pds_get_temps(&next_temp, 2, &temps_used);
1820
1821      program->cond_render_const_offset_in_dwords = predicate_ld_src0_constant;
1822      program->cond_render_pred_temp = cond_render_pred_temp;
1823   }
1824
1825   if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1826       (program->clear_pds_barrier) ||
1827       (program->kick_usc && program->conditional_render)) {
1828      zero_constant64 = pvr_pds_get_constants(&next_constant, 2, &data_size);
1829   }
1830
1831   if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1832      barrier_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
1833      if (PVR_HAS_QUIRK(dev_info, 51210)) {
1834         barrier_ctrl_word2 =
1835            pvr_pds_get_constants(&next_constant, 1, &data_size);
1836      }
1837   }
1838
1839   if (program->work_group_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
1840       program->work_group_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1841      work_group_id_ctrl_words[0] =
1842         pvr_pds_get_constants(&next_constant, 1, &data_size);
1843   }
1844
1845   if (program->work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1846      work_group_id_ctrl_words[1] =
1847         pvr_pds_get_constants(&next_constant, 1, &data_size);
1848   }
1849
1850   if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1851       (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1852       (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
1853      local_id_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
1854   }
1855
1856   if (program->add_base_workgroup) {
1857      for (uint32_t workgroup_component = 0; workgroup_component < 3;
1858           workgroup_component++) {
1859         if (program->work_group_input_regs[workgroup_component] !=
1860             PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1861            program
1862               ->base_workgroup_constant_offset_in_dwords[workgroup_component] =
1863               pvr_pds_get_constants(&next_constant, 1, &data_size);
1864         }
1865      }
1866   }
1867
1868   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1869      if (program->kick_usc) {
1870         /* Src0 for DOUTU */
1871         pvr_pds_write_wide_constant(buffer,
1872                                     usc_control_constant64,
1873                                     program->usc_task_control.src0); /* 64-bit
1874                                                                       * Src0.
1875                                                                       */
1876      }
1877
1878      if (program->has_coefficient_update_task) {
1879         /* Src0 for DOUTU. */
1880         pvr_pds_write_wide_constant(
1881            buffer,
1882            usc_control_constant64_coeff_update,
1883            program->usc_task_control_coeff_update.src0); /* 64-bit Src0 */
1884      }
1885
1886      if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1887          (program->clear_pds_barrier) ||
1888          (program->kick_usc && program->conditional_render)) {
1889         pvr_pds_write_wide_constant(buffer, zero_constant64, 0); /* 64-bit
1890                                                                   * Src0
1891                                                                   */
1892      }
1893
1894      if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1895         if (PVR_HAS_QUIRK(dev_info, 51210)) {
1896            /* Write the constant for the coefficient register write. */
1897            doutw = pvr_pds_encode_doutw_src1(
1898               program->barrier_coefficient + 4,
1899               PVR_PDS_DOUTW_LOWER64,
1900               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1901               true,
1902               dev_info);
1903            pvr_pds_write_constant32(buffer, barrier_ctrl_word2, doutw);
1904         }
1905         /* Write the constant for the coefficient register write. */
1906         doutw = pvr_pds_encode_doutw_src1(
1907            program->barrier_coefficient,
1908            PVR_PDS_DOUTW_LOWER64,
1909            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1910            true,
1911            dev_info);
1912
1913         /* Check whether the barrier is going to be the last DOUTW done by
1914          * the coefficient sync task.
1915          */
1916         if ((program->work_group_input_regs[0] ==
1917              PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
1918             (program->work_group_input_regs[1] ==
1919              PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
1920             (program->work_group_input_regs[2] ==
1921              PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
1922            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1923         }
1924
1925         pvr_pds_write_constant32(buffer, barrier_ctrl_word, doutw);
1926      }
1927
1928      /* If we want work-group id X, see if we also want work-group id Y. */
1929      if (program->work_group_input_regs[0] !=
1930             PVR_PDS_COMPUTE_INPUT_REG_UNUSED &&
1931          program->work_group_input_regs[1] !=
1932             PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1933         /* Make sure we are going to DOUTW them into adjacent registers
1934          * otherwise we can't do it in one.
1935          */
1936         assert(program->work_group_input_regs[1] ==
1937                (program->work_group_input_regs[0] + 1));
1938
1939         doutw = pvr_pds_encode_doutw_src1(
1940            program->work_group_input_regs[0],
1941            PVR_PDS_DOUTW_LOWER64,
1942            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1943            true,
1944            dev_info);
1945
1946         /* If we don't want the Z work-group id then this is the last one.
1947          */
1948         if (program->work_group_input_regs[2] ==
1949             PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1950            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1951         }
1952
1953         pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[0], doutw);
1954      }
1955      /* If we only want one of X or Y then handle them separately. */
1956      else {
1957         if (program->work_group_input_regs[0] !=
1958             PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1959            doutw = pvr_pds_encode_doutw_src1(
1960               program->work_group_input_regs[0],
1961               PVR_PDS_DOUTW_LOWER32,
1962               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1963               true,
1964               dev_info);
1965
1966            /* If we don't want the Z work-group id then this is the last
1967             * one.
1968             */
1969            if (program->work_group_input_regs[2] ==
1970                PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1971               doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1972            }
1973
1974            pvr_pds_write_constant32(buffer,
1975                                     work_group_id_ctrl_words[0],
1976                                     doutw);
1977         } else if (program->work_group_input_regs[1] !=
1978                    PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1979            doutw = pvr_pds_encode_doutw_src1(
1980               program->work_group_input_regs[1],
1981               PVR_PDS_DOUTW_UPPER32,
1982               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1983               true,
1984               dev_info);
1985
1986            /* If we don't want the Z work-group id then this is the last
1987             * one.
1988             */
1989            if (program->work_group_input_regs[2] ==
1990                PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1991               doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1992            }
1993
1994            pvr_pds_write_constant32(buffer,
1995                                     work_group_id_ctrl_words[0],
1996                                     doutw);
1997         }
1998      }
1999
2000      /* Handle work-group id Z. */
2001      if (program->work_group_input_regs[2] !=
2002          PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2003         doutw = pvr_pds_encode_doutw_src1(
2004            program->work_group_input_regs[2],
2005            PVR_PDS_DOUTW_UPPER32,
2006            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE |
2007               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
2008            true,
2009            dev_info);
2010
2011         pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[1], doutw);
2012      }
2013
2014      /* Handle the local IDs. */
2015      if ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2016          (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2017         uint32_t dest_reg;
2018
2019         /* If we want local id Y and Z make sure the compiler wants them in
2020          * the same register.
2021          */
2022         if (!program->flattened_work_groups) {
2023            if ((program->local_input_regs[1] !=
2024                 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
2025                (program->local_input_regs[2] !=
2026                 PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2027               assert(program->local_input_regs[1] ==
2028                      program->local_input_regs[2]);
2029            }
2030         }
2031
2032         if (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)
2033            dest_reg = program->local_input_regs[1];
2034         else
2035            dest_reg = program->local_input_regs[2];
2036
2037         /* If we want local id X and (Y or Z) then we can do that in a
2038          * single 64-bit DOUTW.
2039          */
2040         if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2041            assert(dest_reg == (program->local_input_regs[0] + 1));
2042
2043            doutw = pvr_pds_encode_doutw_src1(
2044               program->local_input_regs[0],
2045               PVR_PDS_DOUTW_LOWER64,
2046               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
2047               true,
2048               dev_info);
2049
2050            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
2051
2052            pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2053         }
2054         /* Otherwise just DMA in Y and Z together in a single 32-bit DOUTW.
2055          */
2056         else {
2057            doutw = pvr_pds_encode_doutw_src1(
2058               dest_reg,
2059               PVR_PDS_DOUTW_UPPER32,
2060               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
2061               true,
2062               dev_info);
2063
2064            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
2065
2066            pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2067         }
2068      }
2069      /* If we don't want Y or Z then just DMA in X in a single 32-bit DOUTW.
2070       */
2071      else if (program->local_input_regs[0] !=
2072               PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2073         doutw = pvr_pds_encode_doutw_src1(
2074            program->local_input_regs[0],
2075            PVR_PDS_DOUTW_LOWER32,
2076            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
2077               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
2078            true,
2079            dev_info);
2080
2081         pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2082      }
2083   }
2084
2085   if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
2086       gen_mode == PDS_GENERATE_SIZES) {
2087      const bool encode = (gen_mode == PDS_GENERATE_CODE_SEGMENT);
2088#define APPEND(X)                    \
2089   if (encode) {                     \
2090      *buffer = X;                   \
2091      buffer++;                      \
2092   } else {                          \
2093      code_size += sizeof(uint32_t); \
2094   }
2095
2096      /* Assert that coeff_update_task_branch_size is > 0 because if it is 0
2097       * then we will be doing an infinite loop.
2098       */
2099      if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
2100         assert(program->coeff_update_task_branch_size > 0);
2101
2102      /* Test whether this is the coefficient update task or not. */
2103      APPEND(
2104         pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SRCC */
2105                            PVR_ROGUE_PDSINST_NEG_ENABLE, /* NEG */
2106                            PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC */
2107                            program->coeff_update_task_branch_size /* ADDR */));
2108
2109      /* Do we need to initialize the barrier coefficient? */
2110      if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2111         if (PVR_HAS_QUIRK(dev_info, 51210)) {
2112            /* Initialize the second barrier coefficient registers to zero.
2113             */
2114            APPEND(pvr_pds_encode_doutw64(0, /* cc */
2115                                          0, /* END */
2116                                          barrier_ctrl_word2, /* SRC1 */
2117                                          zero_constant64 >> 1)); /* SRC0 */
2118         }
2119         /* Initialize the coefficient register to zero. */
2120         APPEND(pvr_pds_encode_doutw64(0, /* cc */
2121                                       0, /* END */
2122                                       barrier_ctrl_word, /* SRC1 */
2123                                       zero_constant64 >> 1)); /* SRC0 */
2124      }
2125
2126      if (program->add_base_workgroup) {
2127         const uint32_t temp_values[3] = { 0, 1, 3 };
2128         for (uint32_t workgroup_component = 0; workgroup_component < 3;
2129              workgroup_component++) {
2130            if (program->work_group_input_regs[workgroup_component] ==
2131                PVR_PDS_COMPUTE_INPUT_REG_UNUSED)
2132               continue;
2133
2134            APPEND(pvr_pds_inst_encode_add32(
2135               /* cc */ 0x0,
2136               /* ALUM */ 0,
2137               /* SNA */ 0,
2138               /* SRC0 (R32)*/ PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER +
2139                  program->base_workgroup_constant_offset_in_dwords
2140                     [workgroup_component],
2141               /* SRC1 (R32)*/ PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER +
2142                  PVR_PDS_CDM_WORK_GROUP_ID_X +
2143                  temp_values[workgroup_component],
2144               /* DST  (R32TP)*/ PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER +
2145                  PVR_PDS_CDM_WORK_GROUP_ID_X +
2146                  temp_values[workgroup_component]));
2147         }
2148      }
2149
2150      /* If we are going to put the work-group IDs in coefficients then we
2151       * just need to do the DOUTWs.
2152       */
2153      if ((program->work_group_input_regs[0] !=
2154           PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2155          (program->work_group_input_regs[1] !=
2156           PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2157         uint32_t dest_reg;
2158
2159         if (program->work_group_input_regs[0] !=
2160             PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2161            dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_X;
2162         } else {
2163            dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Y;
2164         }
2165
2166         APPEND(pvr_pds_encode_doutw64(0, /* cc */
2167                                       0, /* END */
2168                                       work_group_id_ctrl_words[0], /* SRC1
2169                                                                     */
2170                                       dest_reg >> 1)); /* SRC0 */
2171      }
2172
2173      if (program->work_group_input_regs[2] !=
2174          PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2175         APPEND(pvr_pds_encode_doutw64(
2176            0, /* cc */
2177            0, /* END */
2178            work_group_id_ctrl_words[1], /* SRC1 */
2179            (PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Z) >>
2180               1)); /* SRC0 */
2181      }
2182
2183      /* Issue the task to the USC. */
2184      if (program->kick_usc && program->has_coefficient_update_task) {
2185         APPEND(pvr_pds_encode_doutu(0, /* cc */
2186                                     1, /* END */
2187                                     usc_control_constant64_coeff_update >>
2188                                        1)); /* SRC0; DOUTU 64-bit Src0 */
2189      }
2190
2191      /* Encode a HALT */
2192      APPEND(pvr_pds_inst_encode_halt(0));
2193
2194      /* Set the branch size used to skip the coefficient sync task. */
2195      program->coeff_update_task_branch_size = code_size / sizeof(uint32_t);
2196
2197      /* DOUTW in the local IDs. */
2198
2199      /* If we want X and Y or Z, we only need one DOUTW. */
2200      if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
2201          ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2202           (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED))) {
2203         local_input_register =
2204            PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
2205      } else {
2206         /* If we just want X. */
2207         if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2208            local_input_register =
2209               PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
2210         }
2211         /* If we just want Y or Z. */
2212         else if (program->local_input_regs[1] !=
2213                     PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
2214                  program->local_input_regs[2] !=
2215                     PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2216            local_input_register =
2217               PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_YZ;
2218         }
2219      }
2220
2221      if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2222          (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2223          (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2224         APPEND(pvr_pds_encode_doutw64(0, /* cc */
2225                                       0, /* END */
2226                                       local_id_ctrl_word, /* SRC1 */
2227                                       local_input_register >> 1)); /* SRC0
2228                                                                     */
2229      }
2230
2231      if (program->clear_pds_barrier) {
2232         /* Zero the persistent temp (SW fence for context switch). */
2233         APPEND(pvr_pds_inst_encode_add64(
2234            0, /* cc */
2235            PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2236            PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2237            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2238               (zero_constant64 >> 1), /* src0 = 0 */
2239            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2240               (zero_constant64 >> 1), /* src1 = 0 */
2241            PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0)); /* dest =
2242                                                             * ptemp64[0]
2243                                                             */
2244      }
2245
2246      /* If this is a fence, issue the DOUTC. */
2247      if (program->fence) {
2248         APPEND(pvr_pds_inst_encode_doutc(0, /* cc */
2249                                          0 /* END */));
2250      }
2251
2252      if (program->kick_usc) {
2253         if (program->conditional_render) {
2254            /* Skip if coefficient update task. */
2255            APPEND(pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1,
2256                                           0,
2257                                           PVR_ROGUE_PDSINST_PREDICATE_KEEP,
2258                                           16));
2259
2260            /* Load the predicate. */
2261            APPEND(pvr_pds_inst_encode_ld(0, predicate_ld_src0_constant >> 1));
2262
2263            /* Load negate constant into temp for CMP. */
2264            APPEND(pvr_pds_inst_encode_add64(
2265               0, /* cc */
2266               PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2267               PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2268               PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2269                  (cond_render_negate_constant >> 1), /* src0 = 0 */
2270               PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2271                  (zero_constant64 >> 1), /* src1 = 0 */
2272               PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER +
2273                  (cond_render_negate_temp >> 1))); /* dest = ptemp64[0]
2274                                                     */
2275
2276            APPEND(pvr_pds_inst_encode_wdf(0));
2277
2278            for (uint32_t i = 0; i < 4; i++) {
2279               APPEND(pvr_pds_inst_encode_stflp32(
2280                  1, /* enable immediate */
2281                  0, /* cc */
2282                  PVR_ROGUE_PDSINST_LOP_AND, /* LOP */
2283                  cond_render_pred_temp + i, /* SRC0 */
2284                  cond_render_pred_mask_constant + i, /* SRC1 */
2285                  0, /* SRC2 (Shift) */
2286                  cond_render_pred_temp + i)); /* DST */
2287
2288               APPEND(
2289                  pvr_pds_inst_encode_stflp32(1, /* enable immediate */
2290                                              0, /* cc */
2291                                              PVR_ROGUE_PDSINST_LOP_OR, /* LOP
2292                                                                         */
2293                                              cond_render_pred_temp + i, /* SRC0
2294                                                                          */
2295                                              cond_render_pred_temp, /* SRC1 */
2296                                              0, /* SRC2 (Shift) */
2297                                              cond_render_pred_temp)); /* DST */
2298            }
2299
2300            APPEND(pvr_pds_inst_encode_limm(0, /* cc */
2301                                            cond_render_pred_temp + 1, /* SRC1
2302                                                                        */
2303                                            0, /* SRC0 */
2304                                            0)); /* GLOBALREG */
2305
2306            APPEND(pvr_pds_inst_encode_stflp32(1, /* enable immediate */
2307                                               0, /* cc */
2308                                               PVR_ROGUE_PDSINST_LOP_XOR, /* LOP
2309                                                                           */
2310                                               cond_render_pred_temp, /* SRC0 */
2311                                               cond_render_negate_temp, /* SRC1
2312                                                                         */
2313                                               0, /* SRC2 (Shift) */
2314                                               cond_render_pred_temp)); /* DST
2315                                                                         */
2316
2317            /* Check that the predicate is 0. */
2318            APPEND(pvr_pds_inst_encode_cmpi(
2319               0, /* cc */
2320               PVR_ROGUE_PDSINST_COP_EQ, /* LOP */
2321               (cond_render_pred_temp >> 1) +
2322                  PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER, /* SRC0 */
2323               0)); /* SRC1 */
2324
2325            /* If predicate is 0, skip DOUTU. */
2326            APPEND(pvr_pds_inst_encode_bra(
2327               PVR_ROGUE_PDSINST_PREDICATE_P0, /* SRCC:
2328                                                  P0 */
2329               0, /* NEG */
2330               PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC:
2331                                                    keep
2332                                                  */
2333               2));
2334         }
2335
2336         /* Issue the task to the USC.
2337          * DoutU src1=USC Code Base address, src2=doutu word 2.
2338          */
2339         APPEND(pvr_pds_encode_doutu(1, /* cc */
2340                                     1, /* END */
2341                                     usc_control_constant64 >> 1)); /* SRC0;
2342                                                                     * DOUTU
2343                                                                     * 64-bit
2344                                                                     * Src0.
2345                                                                     */
2346      }
2347
2348      /* End the program if the Dout did not already end it. */
2349      APPEND(pvr_pds_inst_encode_halt(0));
2350#undef APPEND
2351   }
2352
2353   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
2354      /* Set the data segment pointer and ensure we return 1 past the buffer
2355       * ptr.
2356       */
2357      program->data_segment = buffer;
2358
2359      buffer += next_constant;
2360   }
2361
2362   /* Require at least one DWORD of PDS data so the program runs. */
2363   data_size = MAX2(1, data_size);
2364
2365   program->temps_used = temps_used;
2366   program->highest_temp = temps_used;
2367   program->data_size = data_size;
2368   if (gen_mode == PDS_GENERATE_SIZES)
2369      program->code_size = code_size;
2370
2371   return buffer;
2372}
2373
2374/**
2375 * Generates the PDS vertex shader data or code block. This program will do a
2376 * DMA into USC Constants followed by a DOUTU.
2377 *
2378 * \param program Pointer to the PDS vertex shader program.
2379 * \param buffer Pointer to the buffer for the program.
2380 * \param gen_mode Generate code or data.
2381 * \param dev_info PVR device information struct.
2382 * \returns Pointer to just beyond the code/data.
2383 */
2384uint32_t *pvr_pds_vertex_shader_sa(
2385   struct pvr_pds_vertex_shader_sa_program *restrict program,
2386   uint32_t *restrict buffer,
2387   enum pvr_pds_generate_mode gen_mode,
2388   const struct pvr_device_info *dev_info)
2389{
2390   uint32_t next_constant;
2391   uint32_t data_size = 0;
2392   uint32_t code_size = 0;
2393
2394   uint32_t usc_control_constant64 = 0;
2395   uint32_t dma_address_constant64 = 0;
2396   uint32_t dma_control_constant32 = 0;
2397   uint32_t doutw_value_constant64 = 0;
2398   uint32_t doutw_control_constant32 = 0;
2399   uint32_t fence_constant_word = 0;
2400   uint32_t *buffer_base;
2401   uint32_t kick_index;
2402
2403   uint32_t total_num_doutw =
2404      program->num_dword_doutw + program->num_q_word_doutw;
2405   uint32_t total_size_dma =
2406      program->num_dword_doutw + 2 * program->num_q_word_doutw;
2407
2408   next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
2409
2410   /* Copy the DMA control words and USC task control words to constants.
2411    *
2412    * Arrange them so that the 64-bit words are together followed by the 32-bit
2413    * words.
2414    */
2415   if (program->kick_usc) {
2416      usc_control_constant64 =
2417         pvr_pds_get_constants(&next_constant, 2, &data_size);
2418   }
2419
2420   if (program->clear_pds_barrier) {
2421      fence_constant_word =
2422         pvr_pds_get_constants(&next_constant, 2, &data_size);
2423   }
2424   dma_address_constant64 = pvr_pds_get_constants(&next_constant,
2425                                                  2 * program->num_dma_kicks,
2426                                                  &data_size);
2427
2428   /* Assign all unaligned constants together to avoid alignment issues caused
2429    * by pvr_pds_get_constants with even allocation sizes.
2430    */
2431   doutw_value_constant64 = pvr_pds_get_constants(
2432      &next_constant,
2433      total_size_dma + total_num_doutw + program->num_dma_kicks,
2434      &data_size);
2435   doutw_control_constant32 = doutw_value_constant64 + total_size_dma;
2436   dma_control_constant32 = doutw_control_constant32 + total_num_doutw;
2437
2438   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
2439      buffer_base = buffer;
2440
2441      if (program->kick_usc) {
2442         /* Src0 for DOUTU. */
2443         pvr_pds_write_wide_constant(buffer_base,
2444                                     usc_control_constant64,
2445                                     program->usc_task_control.src0); /* DOUTU
2446                                                                       * 64-bit
2447                                                                       * Src0.
2448                                                                       */
2449         buffer += 2;
2450      }
2451
2452      if (program->clear_pds_barrier) {
2453         /* Encode the fence constant src0. Fence barrier is initialized to
2454          * zero.
2455          */
2456         pvr_pds_write_wide_constant(buffer_base, fence_constant_word, 0);
2457         buffer += 2;
2458      }
2459
2460      if (total_num_doutw > 0) {
2461         for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2462            /* Write the constant for the coefficient register write. */
2463            pvr_pds_write_constant64(buffer_base,
2464                                     doutw_value_constant64,
2465                                     program->q_word_doutw_value[2 * i],
2466                                     program->q_word_doutw_value[2 * i + 1]);
2467            pvr_pds_write_constant32(
2468               buffer_base,
2469               doutw_control_constant32,
2470               program->q_word_doutw_control[i] |
2471                  ((!program->num_dma_kicks && i == total_num_doutw - 1)
2472                      ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
2473                      : 0));
2474
2475            doutw_value_constant64 += 2;
2476            doutw_control_constant32 += 1;
2477         }
2478
2479         for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2480            /* Write the constant for the coefficient register write. */
2481            pvr_pds_write_constant32(buffer_base,
2482                                     doutw_value_constant64,
2483                                     program->dword_doutw_value[i]);
2484            pvr_pds_write_constant32(
2485               buffer_base,
2486               doutw_control_constant32,
2487               program->dword_doutw_control[i] |
2488                  ((!program->num_dma_kicks && i == program->num_dword_doutw - 1)
2489                      ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
2490                      : 0));
2491
2492            doutw_value_constant64 += 1;
2493            doutw_control_constant32 += 1;
2494         }
2495
2496         buffer += total_size_dma + total_num_doutw;
2497      }
2498
2499      if (program->num_dma_kicks == 1) /* Most-common case. */
2500      {
2501         /* Src0 for DOUTD - Address. */
2502         pvr_pds_write_dma_address(buffer_base,
2503                                   dma_address_constant64,
2504                                   program->dma_address[0],
2505                                   false,
2506                                   dev_info);
2507
2508         /* Src1 for DOUTD - Control Word. */
2509         pvr_pds_write_constant32(
2510            buffer_base,
2511            dma_control_constant32,
2512            program->dma_control[0] |
2513               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
2514
2515         /* Move the buffer ptr along as we will return 1 past the buffer. */
2516         buffer += 3;
2517      } else if (program->num_dma_kicks > 1) {
2518         for (kick_index = 0; kick_index < program->num_dma_kicks - 1;
2519              kick_index++) {
2520            /* Src0 for DOUTD - Address. */
2521            pvr_pds_write_dma_address(buffer_base,
2522                                      dma_address_constant64,
2523                                      program->dma_address[kick_index],
2524                                      false,
2525                                      dev_info);
2526
2527            /* Src1 for DOUTD - Control Word. */
2528            pvr_pds_write_constant32(buffer_base,
2529                                     dma_control_constant32,
2530                                     program->dma_control[kick_index]);
2531            dma_address_constant64 += 2;
2532            dma_control_constant32 += 1;
2533         }
2534
2535         /* Src0 for DOUTD - Address. */
2536         pvr_pds_write_dma_address(buffer_base,
2537                                   dma_address_constant64,
2538                                   program->dma_address[kick_index],
2539                                   false,
2540                                   dev_info);
2541
2542         /* Src1 for DOUTD - Control Word. */
2543         pvr_pds_write_constant32(
2544            buffer_base,
2545            dma_control_constant32,
2546            program->dma_control[kick_index] |
2547               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
2548
2549         buffer += 3 * program->num_dma_kicks;
2550      }
2551   } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2552      if (program->clear_pds_barrier) {
2553         /* Zero the persistent temp (SW fence for context switch). */
2554         *buffer++ = pvr_pds_inst_encode_add64(
2555            0, /* cc */
2556            PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2557            PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2558            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2559               (fence_constant_word >> 1), /* src0 = 0 */
2560            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2561               (fence_constant_word >> 1), /* src1 = 0 */
2562            PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
2563                                                            * ptemp[0]
2564                                                            */
2565      }
2566
2567      if (total_num_doutw > 0) {
2568         for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2569            /* Set the coefficient register to data value. */
2570            *buffer++ = pvr_pds_encode_doutw64(
2571               /* cc */ 0,
2572               /* END */ !program->num_dma_kicks && !program->kick_usc &&
2573                  (i == total_num_doutw - 1),
2574               /* SRC1 */ doutw_control_constant32,
2575               /* SRC0 */ doutw_value_constant64 >> 1);
2576
2577            doutw_value_constant64 += 2;
2578            doutw_control_constant32 += 1;
2579         }
2580
2581         for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2582            /* Set the coefficient register to data value. */
2583            *buffer++ = pvr_pds_encode_doutw64(
2584               /* cc */ 0,
2585               /* END */ !program->num_dma_kicks && !program->kick_usc &&
2586                  (i == program->num_dword_doutw - 1),
2587               /* SRC1 */ doutw_control_constant32,
2588               /* SRC0 */ doutw_value_constant64 >> 1);
2589
2590            doutw_value_constant64 += 1;
2591            doutw_control_constant32 += 1;
2592         }
2593      }
2594
2595      if (program->num_dma_kicks != 0) {
2596         /* DMA the state into the secondary attributes. */
2597
2598         if (program->num_dma_kicks == 1) /* Most-common case. */
2599         {
2600            *buffer++ = pvr_pds_encode_doutd(
2601               /* cc */ 0,
2602               /* END */ !program->kick_usc,
2603               /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit Src1 */
2604               /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD 64-bit
2605                                                         * Src0.
2606                                                         */
2607         } else {
2608            for (kick_index = 0; kick_index < program->num_dma_kicks;
2609                 kick_index++) {
2610               *buffer++ = pvr_pds_encode_doutd(
2611                  /* cc */ 0,
2612                  /* END */ (!program->kick_usc) &&
2613                     (kick_index + 1 == program->num_dma_kicks),
2614                  /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit
2615                                                      * Src1.
2616                                                      */
2617                  /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD
2618                                                            * 64-bit
2619                                                            * Src0.
2620                                                            */
2621               dma_address_constant64 += 2;
2622               dma_control_constant32 += 1;
2623            }
2624         }
2625      }
2626
2627      if (program->kick_usc) {
2628         /* Kick the USC. */
2629         *buffer++ = pvr_pds_encode_doutu(
2630            /* cc */ 0,
2631            /* END */ 1,
2632            /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0.
2633                                                      */
2634      }
2635
2636      if (!program->kick_usc && program->num_dma_kicks == 0 &&
2637          total_num_doutw == 0) {
2638         *buffer++ = pvr_pds_inst_encode_halt(0);
2639      }
2640   }
2641
2642   code_size = program->num_dma_kicks + total_num_doutw;
2643   if (program->clear_pds_barrier)
2644      code_size++; /* ADD64 instruction. */
2645
2646   if (program->kick_usc)
2647      code_size++;
2648
2649   /* If there are no DMAs and no USC kick then code is HALT only. */
2650   if (code_size == 0)
2651      code_size = 1;
2652
2653   program->data_size = data_size;
2654   program->code_size = code_size;
2655
2656   return buffer;
2657}
2658
2659/**
2660 * Writes the Uniform Data block for the PDS pixel shader secondary attributes
2661 * program.
2662 *
2663 * \param program Pointer to the PDS pixel shader secondary attributes program.
2664 * \param buffer Pointer to the buffer for the code/data.
2665 * \param gen_mode Either code or data can be generated or sizes only updated.
2666 * \returns Pointer to just beyond the buffer for the program/data.
2667 */
2668uint32_t *pvr_pds_pixel_shader_uniform_texture_code(
2669   struct pvr_pds_pixel_shader_sa_program *restrict program,
2670   uint32_t *restrict buffer,
2671   enum pvr_pds_generate_mode gen_mode)
2672{
2673   uint32_t *instruction;
2674   uint32_t code_size = 0;
2675   uint32_t data_size = 0;
2676   uint32_t temps_used = 0;
2677   uint32_t next_constant;
2678
2679   assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
2680          0);
2681
2682   assert(gen_mode != PDS_GENERATE_DATA_SEGMENT);
2683
2684   /* clang-format off */
2685   /* Shape of code segment (note: clear is different)
2686    *
2687    *      Code
2688    *    +------------+
2689    *    | BRA if0    |
2690    *    | DOUTD      |
2691    *    |  ...       |
2692    *    | DOUTD.halt |
2693    *    | uniform    |
2694    *    | DOUTD      |
2695    *    |  ...       |
2696    *    |  ...       |
2697    *    | DOUTW      |
2698    *    |  ...       |
2699    *    |  ...       |
2700    *    | DOUTU.halt |
2701    *    | HALT       |
2702    *    +------------+
2703    */
2704   /* clang-format on */
2705   instruction = buffer;
2706
2707   next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
2708
2709   /* The clear color can arrive packed in the right form in the first (or
2710    * first 2) dwords of the shared registers and the program will issue a
2711    * single doutw for this.
2712    */
2713   if (program->clear && program->packed_clear) {
2714      uint32_t color_constant1 =
2715         pvr_pds_get_constants(&next_constant, 2, &data_size);
2716
2717      uint32_t control_word_constant1 =
2718         pvr_pds_get_constants(&next_constant, 2, &data_size);
2719
2720      if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2721         /* DOUTW the clear color to the USC constants. Predicate with
2722          * uniform loading flag (IF0).
2723          */
2724         *instruction++ = pvr_pds_encode_doutw64(
2725            /* cc */ 1, /* Only for uniform loading program. */
2726            /* END */ program->kick_usc ? 0 : 1, /* Last
2727                                                  * instruction
2728                                                  * for a clear.
2729                                                  */
2730            /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
2731            /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2732
2733         code_size += 1;
2734      }
2735   } else if (program->clear) {
2736      uint32_t color_constant1, color_constant2;
2737
2738      if (program->clear_color_dest_reg & 0x1) {
2739         uint32_t color_constant3, control_word_constant1,
2740            control_word_constant2, color_constant4;
2741
2742         color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
2743         color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2744         color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
2745
2746         control_word_constant1 =
2747            pvr_pds_get_constants(&next_constant, 2, &data_size);
2748         control_word_constant2 =
2749            pvr_pds_get_constants(&next_constant, 2, &data_size);
2750         color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2751
2752         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2753            /* DOUTW the clear color to the USSE constants. Predicate with
2754             * uniform loading flag (IF0).
2755             */
2756            *instruction++ = pvr_pds_encode_doutw64(
2757               /* cc */ 1, /* Only for Uniform Loading program */
2758               /* END */ 0,
2759               /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
2760               /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2761
2762            *instruction++ = pvr_pds_encode_doutw64(
2763               /* cc */ 1, /* Only for Uniform Loading program */
2764               /* END */ 0,
2765               /* SRC1 */ control_word_constant2, /* DOUTW 32-bit Src1 */
2766               /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
2767
2768            *instruction++ = pvr_pds_encode_doutw64(
2769               /* cc */ 1, /* Only for uniform loading program */
2770               /* END */ program->kick_usc ? 0 : 1, /* Last
2771                                                     * instruction
2772                                                     * for a clear.
2773                                                     */
2774               /* SRC1 */ color_constant4, /* DOUTW 32-bit Src1 */
2775               /* SRC0 */ color_constant3 >> 1); /* DOUTW 64-bit Src0 */
2776         }
2777
2778         code_size += 3;
2779      } else {
2780         uint32_t control_word_constant, control_word_last_constant;
2781
2782         /* Put the clear color and control words into the first 8
2783          * constants.
2784          */
2785         color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2786         color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2787         control_word_constant =
2788            pvr_pds_get_constants(&next_constant, 2, &data_size);
2789         control_word_last_constant =
2790            pvr_pds_get_constants(&next_constant, 2, &data_size);
2791
2792         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2793            /* DOUTW the clear color to the USSE constants. Predicate with
2794             * uniform loading flag (IF0).
2795             */
2796            *instruction++ = pvr_pds_encode_doutw64(
2797               /* cc */ 1, /* Only for Uniform Loading program */
2798               /* END */ 0,
2799               /* SRC1 */ control_word_constant, /* DOUTW 32-bit Src1 */
2800               /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2801
2802            *instruction++ = pvr_pds_encode_doutw64(
2803               /* cc */ 1, /* Only for uniform loading program */
2804               /* END */ program->kick_usc ? 0 : 1, /* Last
2805                                                     * instruction
2806                                                     * for a clear.
2807                                                     */
2808               /* SRC1 */ control_word_last_constant, /* DOUTW 32-bit Src1
2809                                                       */
2810               /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
2811         }
2812
2813         code_size += 2;
2814      }
2815
2816      if (program->kick_usc) {
2817         uint32_t doutu_constant64;
2818
2819         doutu_constant64 =
2820            pvr_pds_get_constants(&next_constant, 2, &data_size);
2821
2822         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2823            /* Issue the task to the USC.
2824             *
2825             * dout ds1[constant_use], ds0[constant_use],
2826             * ds1[constant_use], emit
2827             */
2828            *instruction++ = pvr_pds_encode_doutu(
2829               /* cc */ 0,
2830               /* END */ 1,
2831               /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0
2832                                                   */
2833         }
2834
2835         code_size += 1;
2836      }
2837
2838      if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2839         /* End the program. */
2840         *instruction++ = pvr_pds_inst_encode_halt(0);
2841      }
2842      code_size += 1;
2843   } else {
2844      uint32_t total_num_doutw =
2845         program->num_dword_doutw + program->num_q_word_doutw;
2846      bool both_textures_and_uniforms =
2847         ((program->num_texture_dma_kicks > 0) &&
2848          ((program->num_uniform_dma_kicks > 0 || total_num_doutw > 0) ||
2849           program->kick_usc));
2850      uint32_t doutu_constant64 = 0;
2851
2852      if (both_textures_and_uniforms) {
2853         /* If the size of a PDS data section is 0, the hardware won't run
2854          * it. We therefore don't need to branch when there is only a
2855          * texture OR a uniform update program.
2856          */
2857         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2858            uint32_t branch_address =
2859               MAX2(1 + program->num_texture_dma_kicks, 2);
2860
2861            /* Use If0 to BRAnch to uniform code. */
2862            *instruction++ = pvr_pds_encode_bra(
2863               /* SRCC */ PVR_ROGUE_PDSINST_PREDICATE_IF0,
2864               /* NEG */ PVR_ROGUE_PDSINST_NEG_DISABLE,
2865               /* SETC */ PVR_ROGUE_PDSINST_PREDICATE_KEEP,
2866               /* ADDR */ branch_address);
2867         }
2868
2869         code_size += 1;
2870      }
2871
2872      if (program->num_texture_dma_kicks > 0) {
2873         uint32_t dma_address_constant64;
2874         uint32_t dma_control_constant32;
2875         /* Allocate 3 constant spaces for each kick. The 64-bit constants
2876          * come first followed by the 32-bit constants.
2877          */
2878         dma_address_constant64 = PVR_PDS_CONSTANTS_BLOCK_BASE;
2879         dma_control_constant32 =
2880            dma_address_constant64 + (program->num_texture_dma_kicks * 2);
2881
2882         for (uint32_t dma = 0; dma < program->num_texture_dma_kicks; dma++) {
2883            code_size += 1;
2884            if (gen_mode != PDS_GENERATE_CODE_SEGMENT || !instruction)
2885               continue;
2886
2887            /* DMA the state into the secondary attributes. */
2888            *instruction++ = pvr_pds_encode_doutd(
2889               /* cc */ 0,
2890               /* END */ dma == (program->num_texture_dma_kicks - 1),
2891               /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1 */
2892               /* SRC0 */ dma_address_constant64 >> 1); /* DOUT
2893                                                         * 64-bit
2894                                                         * Src0
2895                                                         */
2896            dma_address_constant64 += 2;
2897            dma_control_constant32 += 1;
2898         }
2899      } else if (both_textures_and_uniforms) {
2900         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2901            /* End the program. */
2902            *instruction++ = pvr_pds_inst_encode_halt(0);
2903         }
2904
2905         code_size += 1;
2906      }
2907
2908      /* Reserve space at the beginning of the data segment for the DOUTU Task
2909       * Control if one is needed.
2910       */
2911      if (program->kick_usc) {
2912         doutu_constant64 =
2913            pvr_pds_get_constants(&next_constant, 2, &data_size);
2914      }
2915
2916      /* Allocate 3 constant spaces for each DMA and 2 for a USC kick. The
2917       * 64-bit constants come first followed by the 32-bit constants.
2918       */
2919      uint32_t total_size_dma =
2920         program->num_dword_doutw + 2 * program->num_q_word_doutw;
2921
2922      uint32_t dma_address_constant64 = pvr_pds_get_constants(
2923         &next_constant,
2924         program->num_uniform_dma_kicks * 3 + total_size_dma + total_num_doutw,
2925         &data_size);
2926      uint32_t doutw_value_constant64 =
2927         dma_address_constant64 + program->num_uniform_dma_kicks * 2;
2928      uint32_t dma_control_constant32 = doutw_value_constant64 + total_size_dma;
2929      uint32_t doutw_control_constant32 =
2930         dma_control_constant32 + program->num_uniform_dma_kicks;
2931
2932      if (total_num_doutw > 0) {
2933         pvr_pds_get_constants(&next_constant, 0, &data_size);
2934
2935         if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2936            for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2937               /* Set the coefficient register to data value. */
2938               *instruction++ = pvr_pds_encode_doutw64(
2939                  /* cc */ 0,
2940                  /* END */ !program->num_uniform_dma_kicks &&
2941                     !program->kick_usc && (i == total_num_doutw - 1),
2942                  /* SRC1 */ doutw_control_constant32,
2943                  /* SRC0 */ doutw_value_constant64 >> 1);
2944
2945               doutw_value_constant64 += 2;
2946               doutw_control_constant32 += 1;
2947            }
2948
2949            for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2950               /* Set the coefficient register to data value. */
2951               *instruction++ = pvr_pds_encode_doutw64(
2952                  /* cc */ 0,
2953                  /* END */ !program->num_uniform_dma_kicks &&
2954                     !program->kick_usc && (i == program->num_dword_doutw - 1),
2955                  /* SRC1 */ doutw_control_constant32,
2956                  /* SRC0 */ doutw_value_constant64 >> 1);
2957
2958               doutw_value_constant64 += 1;
2959               doutw_control_constant32 += 1;
2960            }
2961         }
2962         code_size += total_num_doutw;
2963      }
2964
2965      if (program->num_uniform_dma_kicks > 0) {
2966         for (uint32_t dma = 0; dma < program->num_uniform_dma_kicks; dma++) {
2967            code_size += 1;
2968
2969            if (gen_mode != PDS_GENERATE_CODE_SEGMENT || !instruction)
2970               continue;
2971
2972            bool last_instruction = false;
2973            if (!program->kick_usc &&
2974                (dma == program->num_uniform_dma_kicks - 1)) {
2975               last_instruction = true;
2976            }
2977            /* DMA the state into the secondary attributes. */
2978            *instruction++ = pvr_pds_encode_doutd(
2979               /* cc */ 0,
2980               /* END */ last_instruction,
2981               /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1
2982                                                   */
2983               /* SRC0 */ dma_address_constant64 >> 1); /* DOUT
2984                                                         * 64-bit
2985                                                         * Src0
2986                                                         */
2987            dma_address_constant64 += 2;
2988            dma_control_constant32 += 1;
2989         }
2990      }
2991
2992      if (program->kick_usc) {
2993         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2994            /* Issue the task to the USC.
2995             *
2996             * dout ds1[constant_use], ds0[constant_use],
2997             * ds1[constant_use], emit
2998             */
2999
3000            *instruction++ = pvr_pds_encode_doutu(
3001               /* cc */ 0,
3002               /* END */ 1,
3003               /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0 */
3004         }
3005
3006         code_size += 1;
3007      } else if (program->num_uniform_dma_kicks == 0 && total_num_doutw == 0) {
3008         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
3009            /* End the program. */
3010            *instruction++ = pvr_pds_inst_encode_halt(0);
3011         }
3012
3013         code_size += 1;
3014      }
3015   }
3016
3017   /* Minimum temp count is 1. */
3018   program->temps_used = MAX2(temps_used, 1);
3019   program->code_size = code_size;
3020
3021   if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
3022      return instruction;
3023   else
3024      return NULL;
3025}
3026
3027/**
3028 * Writes the Uniform Data block for the PDS pixel shader secondary attributes
3029 * program.
3030 *
3031 * \param program Pointer to the PDS pixel shader secondary attributes program.
3032 * \param buffer Pointer to the buffer for the code/data.
3033 * \param gen_mode Either code or data can be generated or sizes only updated.
3034 * \param dev_info PVR device information struct.
3035 * \returns Pointer to just beyond the buffer for the program/data.
3036 */
3037uint32_t *pvr_pds_pixel_shader_uniform_texture_data(
3038   struct pvr_pds_pixel_shader_sa_program *restrict program,
3039   uint32_t *restrict buffer,
3040   enum pvr_pds_generate_mode gen_mode,
3041   bool uniform,
3042   const struct pvr_device_info *dev_info)
3043{
3044   uint32_t *constants = buffer;
3045   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
3046   uint32_t temps_used = 0;
3047   uint32_t data_size = 0;
3048
3049   assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
3050          0);
3051
3052   assert(gen_mode != PDS_GENERATE_CODE_SEGMENT);
3053
3054   /* Shape of data segment (note: clear is different).
3055    *
3056    *        Uniform            Texture
3057    *    +--------------+   +-------------+
3058    *    | USC Task   L |   | USC Task  L |
3059    *    |            H |   |           H |
3060    *    | DMA1 Src0  L |   | DMA1 Src0 L |
3061    *    |            H |   |           H |
3062    *    | DMA2 Src0  L |   |             |
3063    *    |            H |   |             |
3064    *    | DMA1 Src1    |   | DMA1 Src1   |
3065    *    | DMA2 Src1    |   |             |
3066    *    | DOUTW0 Src1  |   |             |
3067    *    | DOUTW1 Src1  |   |             |
3068    *    |   ...        |   |             |
3069    *    | DOUTWn Srcn  |   |             |
3070    *    | other data   |   |             |
3071    *    +--------------+   +-------------+
3072    */
3073
3074   /* Generate the PDS pixel shader secondary attributes data.
3075    *
3076    * Packed Clear
3077    * The clear color can arrive packed in the right form in the first (or
3078    * first 2) dwords of the shared registers and the program will issue a
3079    * single DOUTW for this.
3080    */
3081   if (program->clear && uniform && program->packed_clear) {
3082      uint32_t color_constant1 =
3083         pvr_pds_get_constants(&next_constant, 2, &data_size);
3084
3085      uint32_t control_word_constant1 =
3086         pvr_pds_get_constants(&next_constant, 2, &data_size);
3087
3088      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3089         uint32_t doutw;
3090
3091         pvr_pds_write_constant64(constants,
3092                                  color_constant1,
3093                                  program->clear_color[0],
3094                                  program->clear_color[1]);
3095
3096         /* Load into first constant in common store. */
3097         doutw = pvr_pds_encode_doutw_src1(
3098            program->clear_color_dest_reg,
3099            PVR_PDS_DOUTW_LOWER64,
3100            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3101            false,
3102            dev_info);
3103
3104         /* Set the last flag. */
3105         doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3106         pvr_pds_write_constant64(constants, control_word_constant1, doutw, 0);
3107      }
3108   } else if (program->clear && uniform) {
3109      uint32_t color_constant1, color_constant2;
3110
3111      if (program->clear_color_dest_reg & 0x1) {
3112         uint32_t color_constant3, control_word_constant1,
3113            control_word_constant2, color_constant4;
3114
3115         color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
3116         color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3117         color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
3118
3119         control_word_constant1 =
3120            pvr_pds_get_constants(&next_constant, 2, &data_size);
3121         control_word_constant2 =
3122            pvr_pds_get_constants(&next_constant, 2, &data_size);
3123         color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3124
3125         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3126            uint32_t doutw;
3127
3128            pvr_pds_write_constant32(constants,
3129                                     color_constant1,
3130                                     program->clear_color[0]);
3131
3132            pvr_pds_write_constant64(constants,
3133                                     color_constant2,
3134                                     program->clear_color[1],
3135                                     program->clear_color[2]);
3136
3137            pvr_pds_write_constant32(constants,
3138                                     color_constant3,
3139                                     program->clear_color[3]);
3140
3141            /* Load into first constant in common store. */
3142            doutw = pvr_pds_encode_doutw_src1(
3143               program->clear_color_dest_reg,
3144               PVR_PDS_DOUTW_LOWER32,
3145               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3146               false,
3147               dev_info);
3148
3149            pvr_pds_write_constant64(constants,
3150                                     control_word_constant1,
3151                                     doutw,
3152                                     0);
3153
3154            /* Move the destination register along. */
3155            doutw = pvr_pds_encode_doutw_src1(
3156               program->clear_color_dest_reg + 1,
3157               PVR_PDS_DOUTW_LOWER64,
3158               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3159               false,
3160               dev_info);
3161
3162            pvr_pds_write_constant64(constants,
3163                                     control_word_constant2,
3164                                     doutw,
3165                                     0);
3166
3167            /* Move the destination register along. */
3168            doutw = pvr_pds_encode_doutw_src1(
3169               program->clear_color_dest_reg + 3,
3170               PVR_PDS_DOUTW_LOWER32,
3171               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3172               false,
3173               dev_info);
3174
3175            /* Set the last flag. */
3176            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3177            pvr_pds_write_constant64(constants, color_constant4, doutw, 0);
3178         }
3179      } else {
3180         uint32_t control_word_constant, control_word_last_constant;
3181
3182         /* Put the clear color and control words into the first 8
3183          * constants.
3184          */
3185         color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3186         color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3187         control_word_constant =
3188            pvr_pds_get_constants(&next_constant, 2, &data_size);
3189         control_word_last_constant =
3190            pvr_pds_get_constants(&next_constant, 2, &data_size);
3191
3192         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3193            uint32_t doutw;
3194            pvr_pds_write_constant64(constants,
3195                                     color_constant1,
3196                                     program->clear_color[0],
3197                                     program->clear_color[1]);
3198
3199            pvr_pds_write_constant64(constants,
3200                                     color_constant2,
3201                                     program->clear_color[2],
3202                                     program->clear_color[3]);
3203
3204            /* Load into first constant in common store. */
3205            doutw = pvr_pds_encode_doutw_src1(
3206               program->clear_color_dest_reg,
3207               PVR_PDS_DOUTW_LOWER64,
3208               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3209               false,
3210               dev_info);
3211
3212            pvr_pds_write_constant64(constants, control_word_constant, doutw, 0);
3213
3214            /* Move the destination register along. */
3215            doutw &= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_CLRMSK;
3216            doutw |= (program->clear_color_dest_reg + 2)
3217                     << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT;
3218
3219            /* Set the last flag. */
3220            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3221            pvr_pds_write_constant64(constants,
3222                                     control_word_last_constant,
3223                                     doutw,
3224                                     0);
3225         }
3226      }
3227
3228      /* Constants for the DOUTU Task Control, if needed. */
3229      if (program->kick_usc) {
3230         uint32_t doutu_constant64 =
3231            pvr_pds_get_constants(&next_constant, 2, &data_size);
3232
3233         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3234            pvr_pds_write_wide_constant(
3235               constants,
3236               doutu_constant64,
3237               program->usc_task_control.src0); /* 64-bit
3238                                                 */
3239            /* Src0 */
3240         }
3241      }
3242   } else {
3243      if (uniform) {
3244         /* Reserve space at the beginning of the data segment for the DOUTU
3245          * Task Control if one is needed.
3246          */
3247         if (program->kick_usc) {
3248            uint32_t doutu_constant64 =
3249               pvr_pds_get_constants(&next_constant, 2, &data_size);
3250
3251            if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3252               pvr_pds_write_wide_constant(
3253                  constants,
3254                  doutu_constant64,
3255                  program->usc_task_control.src0); /* 64-bit Src0 */
3256            }
3257         }
3258
3259         uint32_t total_num_doutw =
3260            program->num_dword_doutw + program->num_q_word_doutw;
3261         uint32_t total_size_dma =
3262            program->num_dword_doutw + 2 * program->num_q_word_doutw;
3263
3264         /* Allocate 3 constant spaces for each kick. The 64-bit constants
3265          * come first followed by the 32-bit constants.
3266          */
3267         uint32_t dma_address_constant64 =
3268            pvr_pds_get_constants(&next_constant,
3269                                  program->num_uniform_dma_kicks * 3 +
3270                                     total_size_dma + total_num_doutw,
3271                                  &data_size);
3272         uint32_t doutw_value_constant64 =
3273            dma_address_constant64 + program->num_uniform_dma_kicks * 2;
3274         uint32_t dma_control_constant32 =
3275            doutw_value_constant64 + total_size_dma;
3276         uint32_t doutw_control_constant32 =
3277            dma_control_constant32 + program->num_uniform_dma_kicks;
3278
3279         if (total_num_doutw > 0) {
3280            if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3281               for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
3282                  pvr_pds_write_constant64(
3283                     constants,
3284                     doutw_value_constant64,
3285                     program->q_word_doutw_value[2 * i],
3286                     program->q_word_doutw_value[2 * i + 1]);
3287                  pvr_pds_write_constant32(
3288                     constants,
3289                     doutw_control_constant32,
3290                     program->q_word_doutw_control[i] |
3291                        ((!program->num_uniform_dma_kicks &&
3292                          i == total_num_doutw - 1)
3293                            ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
3294                            : 0));
3295
3296                  doutw_value_constant64 += 2;
3297                  doutw_control_constant32 += 1;
3298               }
3299
3300               for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
3301                  pvr_pds_write_constant32(constants,
3302                                           doutw_value_constant64,
3303                                           program->dword_doutw_value[i]);
3304                  pvr_pds_write_constant32(
3305                     constants,
3306                     doutw_control_constant32,
3307                     program->dword_doutw_control[i] |
3308                        ((!program->num_uniform_dma_kicks &&
3309                          i == program->num_dword_doutw - 1)
3310                            ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
3311                            : 0));
3312
3313                  doutw_value_constant64 += 1;
3314                  doutw_control_constant32 += 1;
3315               }
3316            }
3317         }
3318
3319         if (program->num_uniform_dma_kicks > 0) {
3320            uint32_t kick;
3321
3322            if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3323               for (kick = 0; kick < program->num_uniform_dma_kicks - 1;
3324                    kick++) {
3325                  /* Copy the dma control words to constants. */
3326                  pvr_pds_write_dma_address(constants,
3327                                            dma_address_constant64,
3328                                            program->uniform_dma_address[kick],
3329                                            false,
3330                                            dev_info);
3331                  pvr_pds_write_constant32(constants,
3332                                           dma_control_constant32,
3333                                           program->uniform_dma_control[kick]);
3334
3335                  dma_address_constant64 += 2;
3336                  dma_control_constant32 += 1;
3337               }
3338
3339               pvr_pds_write_dma_address(constants,
3340                                         dma_address_constant64,
3341                                         program->uniform_dma_address[kick],
3342                                         false,
3343                                         dev_info);
3344               pvr_pds_write_constant32(
3345                  constants,
3346                  dma_control_constant32,
3347                  program->uniform_dma_control[kick] |
3348                     PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
3349            }
3350         }
3351
3352      } else if (program->num_texture_dma_kicks > 0) {
3353         /* Allocate 3 constant spaces for each kick. The 64-bit constants
3354          * come first followed by the 32-bit constants.
3355          */
3356         uint32_t dma_address_constant64 =
3357            pvr_pds_get_constants(&next_constant,
3358                                  program->num_texture_dma_kicks * 3,
3359                                  &data_size);
3360         uint32_t dma_control_constant32 =
3361            dma_address_constant64 + (program->num_texture_dma_kicks * 2);
3362
3363         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3364            uint32_t kick;
3365            for (kick = 0; kick < program->num_texture_dma_kicks - 1; kick++) {
3366               /* Copy the DMA control words to constants. */
3367               pvr_pds_write_dma_address(constants,
3368                                         dma_address_constant64,
3369                                         program->texture_dma_address[kick],
3370                                         false,
3371                                         dev_info);
3372
3373               pvr_pds_write_constant32(constants,
3374                                        dma_control_constant32,
3375                                        program->texture_dma_control[kick]);
3376
3377               dma_address_constant64 += 2;
3378               dma_control_constant32 += 1;
3379            }
3380
3381            pvr_pds_write_dma_address(constants,
3382                                      dma_address_constant64,
3383                                      program->texture_dma_address[kick],
3384                                      false,
3385                                      dev_info);
3386
3387            pvr_pds_write_constant32(
3388               constants,
3389               dma_control_constant32,
3390               program->texture_dma_control[kick] |
3391                  PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
3392         }
3393      }
3394   }
3395
3396   /* Save the data segment pointer and size. */
3397   program->data_segment = constants;
3398
3399   /* Minimum temp count is 1. */
3400   program->temps_used = MAX2(temps_used, 1);
3401   program->data_size = data_size;
3402
3403   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3404      return (constants + next_constant);
3405   else
3406      return NULL;
3407}
3408
3409/**
3410 * Generates generic DOUTC PDS program.
3411 *
3412 * \param program Pointer to the PDS kick USC.
3413 * \param buffer Pointer to the buffer for the program.
3414 * \param gen_mode Either code and data can be generated, or sizes only updated.
3415 * \returns Pointer to just beyond the buffer for the code or program segment.
3416 */
3417uint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program,
3418                                 uint32_t *restrict buffer,
3419                                 enum pvr_pds_generate_mode gen_mode)
3420{
3421   uint32_t constant = 0;
3422
3423   /* Automatically get a data size of 1x 128bit chunks. */
3424   uint32_t data_size = 0, code_size = 0;
3425
3426   /* Setup the data part. */
3427   uint32_t *constants = buffer; /* Constants placed at front of buffer. */
3428   uint32_t *instruction = buffer;
3429   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
3430                                                           * dwords.
3431                                                           */
3432
3433   /* Update the program sizes. */
3434   program->data_size = data_size;
3435   program->code_size = code_size;
3436   program->data_segment = constants;
3437
3438   if (gen_mode == PDS_GENERATE_SIZES)
3439      return NULL;
3440
3441   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3442      /* Copy the USC task control words to constants. */
3443
3444      constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
3445      pvr_pds_write_wide_constant(constants, constant + 0, 0); /* 64-bit
3446                                                                * Src0
3447                                                                */
3448
3449      uint32_t control_word_constant =
3450         pvr_pds_get_constants(&next_constant, 2, &data_size);
3451      pvr_pds_write_constant64(constants, control_word_constant, 0, 0); /* 32-bit
3452                                                                         * Src1
3453                                                                         */
3454
3455      program->data_size = data_size;
3456      buffer += data_size;
3457
3458      return buffer;
3459   } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
3460      *instruction++ = pvr_pds_inst_encode_doutc(
3461         /* cc */ 0,
3462         /* END */ 0);
3463
3464      code_size++;
3465
3466      /* End the program. */
3467      *instruction++ = pvr_pds_inst_encode_halt(0);
3468      code_size++;
3469
3470      program->code_size = code_size;
3471   }
3472
3473   return instruction;
3474}
3475
3476/**
3477 * Generates generic kick DOUTU PDS program in a single data+code block.
3478 *
3479 * \param control Pointer to the PDS kick USC.
3480 * \param buffer Pointer to the buffer for the program.
3481 * \param gen_mode Either code and data can be generated or sizes only updated.
3482 * \param dev_info PVR device information structure.
3483 * \returns Pointer to just beyond the buffer for the code or program segment.
3484 */
3485uint32_t *pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict control,
3486                                 uint32_t *restrict buffer,
3487                                 enum pvr_pds_generate_mode gen_mode,
3488                                 const struct pvr_device_info *dev_info)
3489{
3490   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
3491   uint32_t doutw;
3492   uint32_t data_size = 0, code_size = 0;
3493   uint32_t constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
3494   uint32_t control_word_constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
3495
3496   /* Assert if buffer is exceeded. */
3497   assert(control->num_const64 <= PVR_PDS_MAX_NUM_DOUTW_CONSTANTS);
3498
3499   uint32_t *constants = buffer;
3500   uint32_t *instruction = buffer;
3501
3502   /* Put the constants and control words interleaved in the data region. */
3503   for (uint32_t const_pair = 0; const_pair < control->num_const64;
3504        const_pair++) {
3505      constant[const_pair] =
3506         pvr_pds_get_constants(&next_constant, 2, &data_size);
3507      control_word_constant[const_pair] =
3508         pvr_pds_get_constants(&next_constant, 2, &data_size);
3509   }
3510
3511   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3512      /* Data segment points to start of constants. */
3513      control->data_segment = constants;
3514
3515      for (uint32_t const_pair = 0; const_pair < control->num_const64;
3516           const_pair++) {
3517         pvr_pds_write_constant64(constants,
3518                                  constant[const_pair],
3519                                  H32(control->doutw_data[const_pair]),
3520                                  L32(control->doutw_data[const_pair]));
3521
3522         /* Start loading at offset 0. */
3523         if (control->dest_store == PDS_COMMON_STORE) {
3524            doutw = pvr_pds_encode_doutw_src1(
3525               (2 * const_pair),
3526               PVR_PDS_DOUTW_LOWER64,
3527               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3528               false,
3529               dev_info);
3530         } else {
3531            doutw = pvr_pds_encode_doutw_src1(
3532               (2 * const_pair),
3533               PVR_PDS_DOUTW_LOWER64,
3534               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
3535               false,
3536               dev_info);
3537         }
3538
3539         if (const_pair + 1 == control->num_const64) {
3540            /* Set the last flag for the MCU (assume there are no following
3541             * DOUTD's).
3542             */
3543            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3544         }
3545         pvr_pds_write_constant64(constants,
3546                                  control_word_constant[const_pair],
3547                                  doutw,
3548                                  0);
3549      }
3550
3551      control->data_size = data_size;
3552   } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
3553      /* Code section. */
3554
3555      for (uint32_t const_pair = 0; const_pair < control->num_const64;
3556           const_pair++) {
3557         /* DOUTW the PDS data to the USC constants. */
3558         *instruction++ = pvr_pds_encode_doutw64(
3559            /* cc */ 0,
3560            /* END */ control->last_instruction &&
3561               (const_pair + 1 == control->num_const64),
3562            /* SRC1 */ control_word_constant[const_pair], /* DOUTW 32-bit
3563                                                           * Src1.
3564                                                           */
3565            /* SRC0 */ constant[const_pair] >> 1); /* DOUTW 64-bit Src0. */
3566
3567         code_size++;
3568      }
3569
3570      if (control->last_instruction) {
3571         /* End the program. */
3572         *instruction++ = pvr_pds_inst_encode_halt(0);
3573         code_size++;
3574      }
3575
3576      control->code_size = code_size;
3577   }
3578
3579   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3580      return (constants + next_constant);
3581   else
3582      return instruction;
3583}
3584
3585/**
3586 * Generates generic kick DOUTU PDS program in a single data+code block.
3587 *
3588 * \param program Pointer to the PDS kick USC.
3589 * \param buffer Pointer to the buffer for the program.
3590 * \param start_next_constant Next constant in data segment. Non-zero if another
3591 *                            instruction precedes the DOUTU.
3592 * \param cc_enabled If true then the DOUTU is predicated (cc set).
3593 * \param gen_mode Either code and data can be generated or sizes only updated.
3594 * \returns Pointer to just beyond the buffer for the code or program segment.
3595 */
3596uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program,
3597                           uint32_t *restrict buffer,
3598                           uint32_t start_next_constant,
3599                           bool cc_enabled,
3600                           enum pvr_pds_generate_mode gen_mode)
3601{
3602   uint32_t constant = 0;
3603
3604   /* Automatically get a data size of 2 128bit chunks. */
3605   uint32_t data_size = ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE;
3606   uint32_t code_size = 1; /* Single doutu */
3607   uint32_t dummy_count = 0;
3608
3609   /* Setup the data part. */
3610   uint32_t *constants = buffer; /* Constants placed at front of buffer. */
3611   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
3612                                                           * dwords.
3613                                                           */
3614
3615   /* Update the program sizes. */
3616   program->data_size = data_size;
3617   program->code_size = code_size;
3618   program->data_segment = constants;
3619
3620   if (gen_mode == PDS_GENERATE_SIZES)
3621      return NULL;
3622
3623   if (gen_mode == PDS_GENERATE_DATA_SEGMENT ||
3624       gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
3625      /* Copy the USC task control words to constants. */
3626
3627      constant = pvr_pds_get_constants(&next_constant, 2, &dummy_count);
3628
3629      pvr_pds_write_wide_constant(constants,
3630                                  constant + 0,
3631                                  program->usc_task_control.src0); /* 64-bit
3632                                                                    * Src0.
3633                                                                    */
3634      buffer += data_size;
3635
3636      if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3637         return buffer;
3638   }
3639
3640   if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
3641       gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
3642      /* Generate the PDS pixel shader code. */
3643
3644      /* Setup the instruction pointer. */
3645      uint32_t *instruction = buffer;
3646
3647      /* Issue the task to the USC.
3648       *
3649       * dout ds1[constant_use], ds0[constant_use], ds1[constant_use], emit ;
3650       * halt halt
3651       */
3652
3653      *instruction++ = pvr_pds_encode_doutu(
3654         /* cc */ cc_enabled,
3655         /* END */ 1,
3656         /* SRC0 */ (constant + start_next_constant) >> 1); /* DOUTU
3657                                                             * 64-bit Src0
3658                                                             */
3659
3660      /* Return pointer to just after last instruction. */
3661      return instruction;
3662   }
3663
3664   /* Execution should never reach here; keep compiler happy. */
3665   return NULL;
3666}
3667
3668uint32_t *pvr_pds_generate_compute_barrier_conditional(
3669   uint32_t *buffer,
3670   enum pvr_pds_generate_mode gen_mode)
3671{
3672   /* Compute barriers supported. Need to test for coeff sync task. */
3673
3674   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3675      return buffer; /* No data segment. */
3676
3677   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3678      /* Test whether this is the coefficient update task or not. */
3679      *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
3680                                                                       */
3681                                     PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
3682                                                                         */
3683                                     PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SETC
3684                                                                       */
3685                                     1 /* ADDR */);
3686
3687      /* Encode a HALT. */
3688      *buffer++ = pvr_pds_inst_encode_halt(1);
3689
3690      /* Reset the default predicate to IF0. */
3691      *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
3692                                                                       */
3693                                     PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
3694                                                                         */
3695                                     PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETC
3696                                                                       */
3697                                     1 /* ADDR */);
3698   }
3699
3700   return buffer;
3701}
3702
3703/**
3704 * Generates program to kick the USC task to store shared.
3705 *
3706 * \param program Pointer to the PDS shared register.
3707 * \param buffer Pointer to the buffer for the program.
3708 * \param gen_mode Either code and data can be generated or sizes only updated.
3709 * \param dev_info PVR device information structure.
3710 * \returns Pointer to just beyond the buffer for the program.
3711 */
3712uint32_t *pvr_pds_generate_shared_storing_program(
3713   struct pvr_pds_shared_storing_program *restrict program,
3714   uint32_t *restrict buffer,
3715   enum pvr_pds_generate_mode gen_mode,
3716   const struct pvr_device_info *dev_info)
3717{
3718   struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
3719   struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
3720
3721   if (gen_mode == PDS_GENERATE_SIZES)
3722      return NULL;
3723
3724   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3725      uint32_t *constants = buffer;
3726
3727      constants =
3728         pvr_pds_generate_doutw(doutw_control, constants, gen_mode, dev_info);
3729      program->data_size = doutw_control->data_size;
3730
3731      constants = pvr_pds_kick_usc(kick_usc_program,
3732                                   constants,
3733                                   0,
3734                                   program->cc_enable,
3735                                   gen_mode);
3736      program->data_size += kick_usc_program->data_size;
3737
3738      return constants;
3739   }
3740
3741   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3742      /* Generate PDS code segment. */
3743      uint32_t *instruction = buffer;
3744
3745      /* doutw	vi1, vi0
3746       * doutu	ds1[constant_use], ds0[constant_use], ds1[constant_use],
3747       * emit
3748       */
3749      instruction =
3750         pvr_pds_generate_doutw(doutw_control, buffer, gen_mode, dev_info);
3751      program->code_size = doutw_control->code_size;
3752
3753      /* Offset into data segment follows on from doutw data segment. */
3754      instruction = pvr_pds_kick_usc(kick_usc_program,
3755                                     instruction,
3756                                     doutw_control->data_size,
3757                                     program->cc_enable,
3758                                     gen_mode);
3759      program->code_size += kick_usc_program->code_size;
3760
3761      return instruction;
3762   }
3763
3764   /* Execution should never reach here. */
3765   return NULL;
3766}
3767
3768uint32_t *pvr_pds_generate_fence_terminate_program(
3769   struct pvr_pds_fence_program *restrict program,
3770   uint32_t *restrict buffer,
3771   enum pvr_pds_generate_mode gen_mode,
3772   const struct pvr_device_info *dev_info)
3773{
3774   uint32_t data_size = 0;
3775   uint32_t code_size = 0;
3776
3777   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3778      /* Data segment. */
3779      uint32_t *constants, *constants_base;
3780
3781      constants = constants_base = (uint32_t *)buffer;
3782
3783      /* DOUTC sources are not used, but they must be valid. */
3784      pvr_pds_generate_doutc(program, constants, PDS_GENERATE_DATA_SEGMENT);
3785      data_size += program->data_size;
3786
3787      if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
3788         /* Append a 64-bit constant with value 1. Used to increment ptemp.
3789          * Return the offset into the data segment.
3790          */
3791         program->fence_constant_word =
3792            pvr_pds_append_constant64(constants_base, 1, &data_size);
3793      }
3794
3795      program->data_size = data_size;
3796      return constants;
3797   }
3798
3799   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3800      /* Code segment. */
3801      uint32_t *instruction = (uint32_t *)buffer;
3802
3803      instruction = pvr_pds_generate_compute_barrier_conditional(
3804         instruction,
3805         PDS_GENERATE_CODE_SEGMENT);
3806      code_size += 3;
3807
3808      if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
3809         /* lock */
3810         *instruction++ = pvr_pds_inst_encode_lock(0); /* cc */
3811
3812         /* add64	pt[0], pt[0], #1 */
3813         *instruction++ = pvr_pds_inst_encode_add64(
3814            0, /* cc */
3815            PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
3816            PVR_ROGUE_PDSINST_MAD_SNA_ADD,
3817            PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER + 0, /* src0 = ptemp[0]
3818                                                         */
3819            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3820               (program->fence_constant_word >> 1), /* src1 = 1 */
3821            PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
3822                                                            * ptemp[0]
3823                                                            */
3824
3825         /* release */
3826         *instruction++ = pvr_pds_inst_encode_release(0); /* cc */
3827
3828         /* cmp		pt[0] EQ 0x4 == Number of USC clusters per phantom */
3829         *instruction++ = pvr_pds_inst_encode_cmpi(
3830            0, /* cc */
3831            PVR_ROGUE_PDSINST_COP_EQ,
3832            PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0, /* src0
3833                                                           * = ptemp[0]
3834                                                           */
3835            PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0));
3836
3837         /* bra		-1 */
3838         *instruction++ =
3839            pvr_pds_encode_bra(0, /* cc */
3840                               1, /* PVR_ROGUE_PDSINST_BRA_NEG_ENABLE
3841                                   */
3842                               0, /* PVR_ROGUE_PDSINST_BRA_SETC_P0
3843                                   */
3844                               -1); /* bra PC */
3845         code_size += 5;
3846      }
3847
3848      /* DOUTC */
3849      instruction = pvr_pds_generate_doutc(program,
3850                                           instruction,
3851                                           PDS_GENERATE_CODE_SEGMENT);
3852      code_size += program->code_size;
3853
3854      program->code_size = code_size;
3855      return instruction;
3856   }
3857
3858   /* Execution should never reach here. */
3859   return NULL;
3860}
3861
3862/**
3863 * Generates program to kick the USC task to load shared registers from memory.
3864 *
3865 * \param program Pointer to the PDS shared register.
3866 * \param buffer Pointer to the buffer for the program.
3867 * \param gen_mode Either code and data can be generated or sizes only updated.
3868 * \param dev_info PVR device information struct.
3869 * \returns Pointer to just beyond the buffer for the program.
3870 */
3871uint32_t *pvr_pds_generate_compute_shared_loading_program(
3872   struct pvr_pds_shared_storing_program *restrict program,
3873   uint32_t *restrict buffer,
3874   enum pvr_pds_generate_mode gen_mode,
3875   const struct pvr_device_info *dev_info)
3876{
3877   struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
3878   struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
3879
3880   uint32_t next_constant;
3881   uint32_t data_size = 0;
3882   uint32_t code_size = 0;
3883
3884   /* This needs to persist to the CODE_SEGMENT call. */
3885   static uint32_t fence_constant_word = 0;
3886   uint64_t zero_constant64 = 0;
3887
3888   if (gen_mode == PDS_GENERATE_SIZES)
3889      return NULL;
3890
3891   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3892      uint32_t *constants = buffer;
3893
3894      constants = pvr_pds_generate_doutw(doutw_control,
3895                                         constants,
3896                                         PDS_GENERATE_DATA_SEGMENT,
3897                                         dev_info);
3898      data_size += doutw_control->data_size;
3899
3900      constants = pvr_pds_kick_usc(kick_usc_program,
3901                                   constants,
3902                                   0,
3903                                   program->cc_enable,
3904                                   gen_mode);
3905      data_size += kick_usc_program->data_size;
3906
3907      /* Copy the fence constant value (64-bit). */
3908      next_constant = data_size; /* Assumes data words fully packed. */
3909      fence_constant_word =
3910         pvr_pds_get_constants(&next_constant, 2, &data_size);
3911
3912      /* Encode the fence constant src0 (offset measured from start of data
3913       * buffer). Fence barrier is initialized to zero.
3914       */
3915      pvr_pds_write_wide_constant(buffer, fence_constant_word, zero_constant64);
3916      /* Update the const size. */
3917      data_size += 2;
3918      constants += 2;
3919
3920      program->data_size = data_size;
3921      return constants;
3922   }
3923
3924   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3925      /* Generate PDS code segment. */
3926      uint32_t *instruction = buffer;
3927
3928      /* add64	pt0, c0, c0
3929       * IF [2x Phantoms]
3930       * add64	pt1, c0, c0
3931       * st		[constant_mem_addr], pt0, 4
3932       * ENDIF
3933       * doutw	vi1, vi0
3934       * doutu	ds1[constant_use], ds0[constant_use], ds1[constant_use],
3935       * emit
3936       *
3937       * Zero the persistent temp (SW fence for context switch).
3938       */
3939      *instruction++ = pvr_pds_inst_encode_add64(
3940         0, /* cc */
3941         PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
3942         PVR_ROGUE_PDSINST_MAD_SNA_ADD,
3943         PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3944            (fence_constant_word >> 1), /* src0
3945                                         *  = 0
3946                                         */
3947         PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3948            (fence_constant_word >> 1), /* src1
3949                                         * = 0
3950                                         */
3951         PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest = ptemp64[0]
3952                                                         */
3953      code_size++;
3954
3955      instruction = pvr_pds_generate_doutw(doutw_control,
3956                                           instruction,
3957                                           PDS_GENERATE_CODE_SEGMENT,
3958                                           dev_info);
3959      code_size += doutw_control->code_size;
3960
3961      /* Offset into data segment follows on from doutw data segment. */
3962      instruction = pvr_pds_kick_usc(kick_usc_program,
3963                                     instruction,
3964                                     doutw_control->data_size,
3965                                     program->cc_enable,
3966                                     gen_mode);
3967      code_size += kick_usc_program->code_size;
3968
3969      program->code_size = code_size;
3970      return instruction;
3971   }
3972
3973   /* Execution should never reach here. */
3974   return NULL;
3975}
3976
3977/**
3978 * Generates both code and data when gen_mode is not PDS_GENERATE_SIZES.
3979 * Relies on num_fpu_iterators being initialized for size calculation.
3980 * Relies on num_fpu_iterators, destination[], and FPU_iterators[] being
3981 * initialized for program generation.
3982 *
3983 * \param program Pointer to the PDS pixel shader program.
3984 * \param buffer Pointer to the buffer for the program.
3985 * \param gen_mode Either code and data can be generated or sizes only updated.
3986 * \returns Pointer to just beyond the buffer for the program.
3987 */
3988uint32_t *pvr_pds_coefficient_loading(
3989   struct pvr_pds_coeff_loading_program *restrict program,
3990   uint32_t *restrict buffer,
3991   enum pvr_pds_generate_mode gen_mode)
3992{
3993   uint32_t constant;
3994   uint32_t *instruction;
3995   uint32_t total_data_size, code_size;
3996
3997   /* Place constants at the front of the buffer. */
3998   uint32_t *constants = buffer;
3999   /* Start counting constants from 0. */
4000   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4001
4002   /* Save the data segment pointer and size. */
4003   program->data_segment = constants;
4004
4005   total_data_size = 0;
4006   code_size = 0;
4007
4008   total_data_size += 2 * program->num_fpu_iterators;
4009   code_size += program->num_fpu_iterators;
4010
4011   /* Instructions start where constants finished, but we must take note of
4012    * alignment.
4013    *
4014    * 128-bit boundary = 4 dwords.
4015    */
4016   total_data_size = ALIGN_POT(total_data_size, 4);
4017   if (gen_mode != PDS_GENERATE_SIZES) {
4018      uint32_t data_size = 0;
4019      uint32_t iterator = 0;
4020
4021      instruction = buffer + total_data_size;
4022
4023      while (iterator < program->num_fpu_iterators) {
4024         uint64_t iterator_word;
4025
4026         /* Copy the USC task control words to constants. */
4027         constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
4028
4029         /* Write the first iterator. */
4030         iterator_word =
4031            (uint64_t)program->FPU_iterators[iterator]
4032            << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHIFT;
4033
4034         /* Write the destination. */
4035         iterator_word |=
4036            (uint64_t)program->destination[iterator++]
4037            << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_SHIFT;
4038
4039         /* If this is the last DOUTI word the "Last Issue" bit should be
4040          * set.
4041          */
4042         if (iterator >= program->num_fpu_iterators) {
4043            iterator_word |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_EN;
4044         }
4045
4046         /* Write the word to the buffer. */
4047         pvr_pds_write_wide_constant(constants,
4048                                     constant,
4049                                     iterator_word); /* 64-bit
4050                                                        Src0
4051                                                      */
4052
4053         /* Write the DOUT instruction. */
4054         *instruction++ = pvr_pds_encode_douti(
4055            /* cc */ 0,
4056            /* END */ 0,
4057            /* SRC0 */ constant >> 1); /* DOUT Issue word 0 64-bit */
4058      }
4059
4060      /* Update the last DOUTI instruction to have the END flag set. */
4061      *(instruction - 1) |= 1 << PVR_ROGUE_PDSINST_DOUT_END_SHIFT;
4062   } else {
4063      instruction = NULL;
4064   }
4065
4066   /* Update the data size and code size. Minimum temp count is 1. */
4067   program->temps_used = 1;
4068   program->data_size = total_data_size;
4069   program->code_size = code_size;
4070
4071   return instruction;
4072}
4073
4074/**
4075 * Generate a single ld/st instruction. This can correspond to one or more
4076 * real ld/st instructions based on the value of count.
4077 *
4078 * \param ld true to generate load, false to generate store.
4079 * \param control Cache mode control.
4080 * \param temp_index Dest temp for load/source temp for store, in 32bits
4081 *                   register index.
4082 * \param address Source for load/dest for store in bytes.
4083 * \param count Number of dwords for load/store.
4084 * \param next_constant
4085 * \param total_data_size
4086 * \param total_code_size
4087 * \param buffer Pointer to the buffer for the program.
4088 * \param data_fence Issue data fence.
4089 * \param gen_mode Either code and data can be generated or sizes only updated.
4090 * \param dev_info PVR device information structure.
4091 * \returns Pointer to just beyond the buffer for the program.
4092 */
4093uint32_t *pvr_pds_generate_single_ldst_instruction(
4094   bool ld,
4095   const struct pvr_pds_ldst_control *control,
4096   uint32_t temp_index,
4097   uint64_t address,
4098   uint32_t count,
4099   uint32_t *next_constant,
4100   uint32_t *total_data_size,
4101   uint32_t *total_code_size,
4102   uint32_t *restrict buffer,
4103   bool data_fence,
4104   enum pvr_pds_generate_mode gen_mode,
4105   const struct pvr_device_info *dev_info)
4106{
4107   /* A single ld/ST here does NOT actually correspond to a single ld/ST
4108    * instruction, but may needs multiple ld/ST instructions because each ld/ST
4109    * instruction can only ld/ST a restricted max number of dwords which may
4110    * less than count passed here.
4111    */
4112
4113   uint32_t num_inst;
4114   uint32_t constant;
4115
4116   if (ld) {
4117      /* ld must operate on 64bits unit, and it needs to load from and to 128
4118       * bits aligned. Apart from the last ld, all the other need to ld 2x(x =
4119       * 1, 2, ...) times 64bits unit.
4120       */
4121      uint32_t per_inst_count = 0;
4122      uint32_t last_inst_count;
4123
4124      assert((gen_mode == PDS_GENERATE_SIZES) ||
4125             (((count % 2) == 0) && ((address % 16) == 0) &&
4126              (temp_index % 2) == 0));
4127
4128      count >>= 1;
4129      temp_index >>= 1;
4130
4131      /* Found out how many ld instructions are needed and ld size for the all
4132       * possible ld instructions.
4133       */
4134      if (count <= PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE) {
4135         num_inst = 1;
4136         last_inst_count = count;
4137      } else {
4138         per_inst_count = PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE;
4139         if ((per_inst_count % 2) != 0)
4140            per_inst_count -= 1;
4141
4142         num_inst = count / per_inst_count;
4143         last_inst_count = count - per_inst_count * num_inst;
4144         num_inst += 1;
4145      }
4146
4147      /* Generate all the instructions. */
4148      for (uint32_t i = 0; i < num_inst; i++) {
4149         if ((i == (num_inst - 1)) && (last_inst_count == 0))
4150            break;
4151
4152         /* A single load instruction. */
4153         constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
4154
4155         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
4156            uint64_t ld_src0 = 0;
4157
4158            ld_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
4159                        << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
4160            ld_src0 |= (((uint64_t)((i == num_inst - 1) ? last_inst_count
4161                                                        : per_inst_count) &
4162                         PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
4163                        << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
4164            ld_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS64TP_MASK)
4165                        << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
4166
4167            if (!control) {
4168               ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED;
4169
4170               if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
4171                  ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED;
4172
4173            } else {
4174               ld_src0 |= control->cache_control_const;
4175            }
4176
4177            /* Write it to the constant. */
4178            pvr_pds_write_constant64(buffer,
4179                                     constant,
4180                                     (uint32_t)(ld_src0),
4181                                     (uint32_t)(ld_src0 >> 32));
4182
4183            /* Adjust value for next ld instruction. */
4184            temp_index += per_inst_count;
4185            address += (((uint64_t)(per_inst_count)) << 3);
4186         }
4187
4188         if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4189            *buffer++ = pvr_pds_inst_encode_ld(0, constant >> 1);
4190
4191            if (data_fence)
4192               *buffer++ = pvr_pds_inst_encode_wdf(0);
4193         }
4194      }
4195   } else {
4196      /* ST needs source memory address to be 32bits aligned. */
4197      assert((gen_mode == PDS_GENERATE_SIZES) || ((address % 4) == 0));
4198
4199      /* Found out how many ST instructions are needed, each ST can only store
4200       * PVR_ROGUE_PDSINST_ST_COUNT4_MASK number of 32bits.
4201       */
4202      num_inst = count / PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE;
4203      num_inst += ((count % PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE) == 0 ? 0 : 1);
4204
4205      /* Generate all the instructions. */
4206      for (uint32_t i = 0; i < num_inst; i++) {
4207         /* A single store instruction. */
4208         constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
4209
4210         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
4211            uint32_t per_inst_count =
4212               (count <= PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE
4213                   ? count
4214                   : PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE);
4215            uint64_t st_src0 = 0;
4216
4217            st_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
4218                        << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
4219            st_src0 |=
4220               (((uint64_t)per_inst_count & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
4221                << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
4222            st_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS32TP_MASK)
4223                        << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
4224
4225            if (!control) {
4226               st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH;
4227
4228               if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
4229                  st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH;
4230               }
4231
4232            } else {
4233               st_src0 |= control->cache_control_const;
4234            }
4235
4236            /* Write it to the constant. */
4237            pvr_pds_write_constant64(buffer,
4238                                     constant,
4239                                     (uint32_t)(st_src0),
4240                                     (uint32_t)(st_src0 >> 32));
4241
4242            /* Adjust value for next ST instruction. */
4243            temp_index += per_inst_count;
4244            count -= per_inst_count;
4245            address += (((uint64_t)(per_inst_count)) << 2);
4246         }
4247
4248         if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4249            *buffer++ = pvr_pds_inst_encode_st(0, constant >> 1);
4250
4251            if (data_fence)
4252               *buffer++ = pvr_pds_inst_encode_wdf(0);
4253         }
4254      }
4255   }
4256
4257   (*total_code_size) += num_inst;
4258   if (data_fence)
4259      (*total_code_size) += num_inst;
4260
4261   if (gen_mode != PDS_GENERATE_SIZES)
4262      return buffer;
4263   return NULL;
4264}
4265
4266/**
4267 * Generate programs used to prepare stream out, i.e., clear stream out buffer
4268 * overflow flags and update Persistent temps by a ld instruction.
4269 *
4270 * This must be used in PPP state update.
4271 *
4272 * \param program Pointer to the stream out program.
4273 * \param buffer Pointer to the buffer for the program.
4274 * \param store_mode If true then the data is stored to memory. If false then
4275 *                   the data is loaded from memory.
4276 * \param gen_mode Either code and data can be generated or sizes only updated.
4277 * \param dev_info PVR device information structure.
4278 * \returns Pointer to just beyond the buffer for the program.
4279 */
4280uint32_t *pvr_pds_generate_stream_out_init_program(
4281   struct pvr_pds_stream_out_init_program *restrict program,
4282   uint32_t *restrict buffer,
4283   bool store_mode,
4284   enum pvr_pds_generate_mode gen_mode,
4285   const struct pvr_device_info *dev_info)
4286{
4287   uint32_t total_data_size = 0;
4288   uint32_t PTDst = PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER;
4289
4290   /* Start counting constants from 0. */
4291   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4292
4293   uint32_t total_code_size = 1;
4294
4295   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4296      /* We only need to clear global stream out predicate, other predicates
4297       * are not used during the stream out buffer overflow test.
4298       */
4299      *buffer++ = pvr_pds_inst_encode_stmc(0, 0x10);
4300   }
4301
4302   for (uint32_t index = 0; index < program->num_buffers; index++) {
4303      if (program->dev_address_for_buffer_data[index] != 0) {
4304         /* Generate load/store program to load/store persistent temps. */
4305
4306         /* NOTE: store_mode == true case should be handled by
4307          * StreamOutTerminate.
4308          */
4309         buffer = pvr_pds_generate_single_ldst_instruction(
4310            !store_mode,
4311            NULL,
4312            PTDst,
4313            program->dev_address_for_buffer_data[index],
4314            program->pds_buffer_data_size[index],
4315            &next_constant,
4316            &total_data_size,
4317            &total_code_size,
4318            buffer,
4319            false,
4320            gen_mode,
4321            dev_info);
4322      }
4323
4324      PTDst += program->pds_buffer_data_size[index];
4325   }
4326
4327   total_code_size += 2;
4328
4329   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4330      /* We need to fence the loading. */
4331      *buffer++ = pvr_pds_inst_encode_wdf(0);
4332      *buffer++ = pvr_pds_inst_encode_halt(0);
4333   }
4334
4335   /* Save size information to program */
4336   program->stream_out_init_pds_data_size =
4337      ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
4338   /* PDS program code size. */
4339   program->stream_out_init_pds_code_size = total_code_size;
4340
4341   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
4342      return buffer + program->stream_out_init_pds_data_size;
4343   else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
4344      return buffer;
4345
4346   return NULL;
4347}
4348
4349/**
4350 * Generate stream out terminate program for stream out.
4351 *
4352 * If pds_persistent_temp_size_to_store is 0, the final primitive written value
4353 * will be stored.
4354 *
4355 * If pds_persistent_temp_size_to_store is non 0, the value of persistent temps
4356 * will be stored into memory.
4357 *
4358 * The stream out terminate program is used to update the PPP state and the data
4359 * and code section cannot be separate.
4360 *
4361 * \param program Pointer to the stream out program.
4362 * \param buffer Pointer to the buffer for the program.
4363 * \param gen_mode Either code and data can be generated or sizes only updated.
4364 * \param dev_info PVR device info structure.
4365 * \returns Pointer to just beyond the buffer for the program.
4366 */
4367uint32_t *pvr_pds_generate_stream_out_terminate_program(
4368   struct pvr_pds_stream_out_terminate_program *restrict program,
4369   uint32_t *restrict buffer,
4370   enum pvr_pds_generate_mode gen_mode,
4371   const struct pvr_device_info *dev_info)
4372{
4373   uint32_t next_constant;
4374   uint32_t total_data_size = 0, total_code_size = 0;
4375
4376   /* Start counting constants from 0. */
4377   next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4378
4379   /* Generate store program to store persistent temps. */
4380   buffer = pvr_pds_generate_single_ldst_instruction(
4381      false,
4382      NULL,
4383      PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER,
4384      program->dev_address_for_storing_persistent_temp,
4385      program->pds_persistent_temp_size_to_store,
4386      &next_constant,
4387      &total_data_size,
4388      &total_code_size,
4389      buffer,
4390      false,
4391      gen_mode,
4392      dev_info);
4393
4394   total_code_size += 2;
4395   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4396      *buffer++ = pvr_pds_inst_encode_wdf(0);
4397      *buffer++ = pvr_pds_inst_encode_halt(0);
4398   }
4399
4400   /* Save size information to program. */
4401   program->stream_out_terminate_pds_data_size =
4402      ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
4403   /* PDS program code size. */
4404   program->stream_out_terminate_pds_code_size = total_code_size;
4405
4406   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
4407      return buffer + program->stream_out_terminate_pds_data_size;
4408   else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
4409      return buffer;
4410
4411   return NULL;
4412}
4413
4414/* DrawArrays works in several steps:
4415 *
4416 * 1) load data from draw_indirect buffer
4417 * 2) tweak data to match hardware formats
4418 * 3) write data to indexblock
4419 * 4) signal the VDM to continue
4420 *
4421 * This is complicated by HW limitations on alignment, as well as a HWBRN.
4422 *
4423 * 1) Load data.
4424 * Loads _must_ be 128-bit aligned. Because there is no such limitation in the
4425 * spec we must deal with this by choosing an appropriate earlier address and
4426 * loading enough dwords that we load the entirety of the buffer.
4427 *
4428 * if addr & 0xf:
4429 *   load [addr & ~0xf] 6 dwords -> tmp[0, 1, 2, 3, 4, 5]
4430 *   data = tmp[0 + (uiAddr & 0xf) >> 2]...
4431 * else
4432 *   load [addr] 4 dwords -> tmp[0, 1, 2, 3]
4433 *   data = tmp[0]...
4434 *
4435 *
4436 * 2) Tweak data.
4437 * primCount in the spec does not match the encoding of INDEX_INSTANCE_COUNT in
4438 * the VDM control stream. We must subtract 1 from the loaded primCount.
4439 *
4440 * However, there is a HWBRN that disallows the ADD32 instruction from sourcing
4441 * a tmp that is non-64-bit-aligned. To work around this, we must move primCount
4442 * into another tmp that has the correct alignment. Note: this is only required
4443 * when data = tmp[even], as primCount is data+1:
4444 *
4445 * if data = tmp[even]:
4446 *   primCount = data + 1 = tmp[odd] -- not 64-bit aligned!
4447 * else:
4448 *   primCount = data + 1 = tmp[even] -- already aligned, don't need workaround.
4449 *
4450 * This boils down to:
4451 *
4452 * primCount = data[1]
4453 * primCountSrc = data[1]
4454 * if brn_present && (data is even):
4455 *   mov scratch, primCount
4456 *   primCountSrc = scratch
4457 * endif
4458 * sub primCount, primCountSrc, 1
4459 *
4460 * 3) Store Data.
4461 * Write the now-tweaked data over the top of the indexblock.
4462 * To ensure the write completes before the VDM re-reads the data, we must cause
4463 * a data hazard by doing a dummy (dummy meaning we don't care about the
4464 * returned data) load from the same addresses. Again, because the ld must
4465 * always be 128-bit aligned (note: the ST is dword-aligned), we must ensure the
4466 * index block is 128-bit aligned. This is the client driver's responsibility.
4467 *
4468 * st data[0, 1, 2] -> (idxblock + 4)
4469 * load [idxblock] 4 dwords
4470 *
4471 * 4) Signal the VDM
4472 * This is simply a DOUTV with a src1 of 0, indicating the VDM should continue
4473 * where it is currently fenced on a dummy idxblock that has been inserted by
4474 * the driver.
4475 */
4476
4477#include "pvr_draw_indirect_arrays0.h"
4478#include "pvr_draw_indirect_arrays1.h"
4479#include "pvr_draw_indirect_arrays2.h"
4480#include "pvr_draw_indirect_arrays3.h"
4481
4482#include "pvr_draw_indirect_arrays_base_instance0.h"
4483#include "pvr_draw_indirect_arrays_base_instance1.h"
4484#include "pvr_draw_indirect_arrays_base_instance2.h"
4485#include "pvr_draw_indirect_arrays_base_instance3.h"
4486
4487#include "pvr_draw_indirect_arrays_base_instance_drawid0.h"
4488#include "pvr_draw_indirect_arrays_base_instance_drawid1.h"
4489#include "pvr_draw_indirect_arrays_base_instance_drawid2.h"
4490#include "pvr_draw_indirect_arrays_base_instance_drawid3.h"
4491
4492#define ENABLE_SLC_MCU_CACHE_CONTROLS(device)        \
4493   ((device)->features.has_slc_mcu_cache_controls    \
4494       ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED \
4495       : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS)
4496
4497void pvr_pds_generate_draw_arrays_indirect(
4498   struct pvr_pds_drawindirect_program *restrict program,
4499   uint32_t *restrict buffer,
4500   enum pvr_pds_generate_mode gen_mode,
4501   const struct pvr_device_info *dev_info)
4502{
4503   if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
4504       (gen_mode == PDS_GENERATE_SIZES)) {
4505      const struct pvr_psc_program_output *psc_program = NULL;
4506      switch ((program->arg_buffer >> 2) % 4) {
4507      case 0:
4508         if (program->support_base_instance) {
4509            if (program->increment_draw_id) {
4510               psc_program =
4511                  &pvr_draw_indirect_arrays_base_instance_drawid0_program;
4512            } else {
4513               psc_program = &pvr_draw_indirect_arrays_base_instance0_program;
4514            }
4515         } else {
4516            psc_program = &pvr_draw_indirect_arrays0_program;
4517         }
4518         break;
4519      case 1:
4520         if (program->support_base_instance) {
4521            if (program->increment_draw_id) {
4522               psc_program =
4523                  &pvr_draw_indirect_arrays_base_instance_drawid1_program;
4524            } else {
4525               psc_program = &pvr_draw_indirect_arrays_base_instance1_program;
4526            }
4527         } else {
4528            psc_program = &pvr_draw_indirect_arrays1_program;
4529         }
4530         break;
4531      case 2:
4532         if (program->support_base_instance) {
4533            if (program->increment_draw_id) {
4534               psc_program =
4535                  &pvr_draw_indirect_arrays_base_instance_drawid2_program;
4536            } else {
4537               psc_program = &pvr_draw_indirect_arrays_base_instance2_program;
4538            }
4539         } else {
4540            psc_program = &pvr_draw_indirect_arrays2_program;
4541         }
4542         break;
4543      case 3:
4544         if (program->support_base_instance) {
4545            if (program->increment_draw_id) {
4546               psc_program =
4547                  &pvr_draw_indirect_arrays_base_instance_drawid3_program;
4548            } else {
4549               psc_program = &pvr_draw_indirect_arrays_base_instance3_program;
4550            }
4551         } else {
4552            psc_program = &pvr_draw_indirect_arrays3_program;
4553         }
4554         break;
4555      }
4556
4557      if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4558         memcpy(buffer,
4559                psc_program->code,
4560                psc_program->code_size * sizeof(uint32_t));
4561#if defined(DUMP_PDS)
4562         for (uint32_t i = 0; i < psc_program->code_size; i++)
4563            PVR_PDS_PRINT_INST(buffer[i]);
4564#endif
4565      }
4566
4567      program->program = *psc_program;
4568   } else {
4569      switch ((program->arg_buffer >> 2) % 4) {
4570      case 0:
4571         if (program->support_base_instance) {
4572            if (program->increment_draw_id) {
4573               pvr_write_draw_indirect_arrays_base_instance_drawid0_di_data(
4574                  buffer,
4575                  program->arg_buffer & ~0xfull,
4576                  dev_info);
4577               pvr_write_draw_indirect_arrays_base_instance_drawid0_write_vdm(
4578                  buffer,
4579                  program->index_list_addr_buffer + 4);
4580               pvr_write_draw_indirect_arrays_base_instance_drawid0_flush_vdm(
4581                  buffer,
4582                  program->index_list_addr_buffer);
4583               pvr_write_draw_indirect_arrays_base_instance_drawid0_num_views(
4584                  buffer,
4585                  program->num_views);
4586               pvr_write_draw_indirect_arrays_base_instance_drawid0_immediates(
4587                  buffer);
4588            } else {
4589               pvr_write_draw_indirect_arrays_base_instance0_di_data(
4590                  buffer,
4591                  program->arg_buffer & ~0xfull,
4592                  dev_info);
4593               pvr_write_draw_indirect_arrays_base_instance0_write_vdm(
4594                  buffer,
4595                  program->index_list_addr_buffer + 4);
4596               pvr_write_draw_indirect_arrays_base_instance0_flush_vdm(
4597                  buffer,
4598                  program->index_list_addr_buffer);
4599               pvr_write_draw_indirect_arrays_base_instance0_num_views(
4600                  buffer,
4601                  program->num_views);
4602               pvr_write_draw_indirect_arrays_base_instance0_immediates(buffer);
4603            }
4604         } else {
4605            pvr_write_draw_indirect_arrays0_di_data(buffer,
4606                                                    program->arg_buffer &
4607                                                       ~0xfull,
4608                                                    dev_info);
4609            pvr_write_draw_indirect_arrays0_write_vdm(
4610               buffer,
4611               program->index_list_addr_buffer + 4);
4612            pvr_write_draw_indirect_arrays0_flush_vdm(
4613               buffer,
4614               program->index_list_addr_buffer);
4615            pvr_write_draw_indirect_arrays0_num_views(buffer,
4616                                                      program->num_views);
4617            pvr_write_draw_indirect_arrays0_immediates(buffer);
4618         }
4619         break;
4620      case 1:
4621         if (program->support_base_instance) {
4622            if (program->increment_draw_id) {
4623               pvr_write_draw_indirect_arrays_base_instance_drawid1_di_data(
4624                  buffer,
4625                  program->arg_buffer & ~0xfull,
4626                  dev_info);
4627               pvr_write_draw_indirect_arrays_base_instance_drawid1_write_vdm(
4628                  buffer,
4629                  program->index_list_addr_buffer + 4);
4630               pvr_write_draw_indirect_arrays_base_instance_drawid1_flush_vdm(
4631                  buffer,
4632                  program->index_list_addr_buffer);
4633               pvr_write_draw_indirect_arrays_base_instance_drawid1_num_views(
4634                  buffer,
4635                  program->num_views);
4636               pvr_write_draw_indirect_arrays_base_instance_drawid1_immediates(
4637                  buffer);
4638            } else {
4639               pvr_write_draw_indirect_arrays_base_instance1_di_data(
4640                  buffer,
4641                  program->arg_buffer & ~0xfull,
4642                  dev_info);
4643               pvr_write_draw_indirect_arrays_base_instance1_write_vdm(
4644                  buffer,
4645                  program->index_list_addr_buffer + 4);
4646               pvr_write_draw_indirect_arrays_base_instance1_flush_vdm(
4647                  buffer,
4648                  program->index_list_addr_buffer);
4649               pvr_write_draw_indirect_arrays_base_instance1_num_views(
4650                  buffer,
4651                  program->num_views);
4652               pvr_write_draw_indirect_arrays_base_instance1_immediates(buffer);
4653            }
4654         } else {
4655            pvr_write_draw_indirect_arrays1_di_data(buffer,
4656                                                    program->arg_buffer &
4657                                                       ~0xfull,
4658                                                    dev_info);
4659            pvr_write_draw_indirect_arrays1_write_vdm(
4660               buffer,
4661               program->index_list_addr_buffer + 4);
4662            pvr_write_draw_indirect_arrays1_flush_vdm(
4663               buffer,
4664               program->index_list_addr_buffer);
4665            pvr_write_draw_indirect_arrays1_num_views(buffer,
4666                                                      program->num_views);
4667            pvr_write_draw_indirect_arrays1_immediates(buffer);
4668         }
4669         break;
4670      case 2:
4671         if (program->support_base_instance) {
4672            if (program->increment_draw_id) {
4673               pvr_write_draw_indirect_arrays_base_instance_drawid2_di_data(
4674                  buffer,
4675                  program->arg_buffer & ~0xfull,
4676                  dev_info);
4677               pvr_write_draw_indirect_arrays_base_instance_drawid2_write_vdm(
4678                  buffer,
4679                  program->index_list_addr_buffer + 4);
4680               pvr_write_draw_indirect_arrays_base_instance_drawid2_flush_vdm(
4681                  buffer,
4682                  program->index_list_addr_buffer);
4683               pvr_write_draw_indirect_arrays_base_instance_drawid2_num_views(
4684                  buffer,
4685                  program->num_views);
4686               pvr_write_draw_indirect_arrays_base_instance_drawid2_immediates(
4687                  buffer);
4688            } else {
4689               pvr_write_draw_indirect_arrays_base_instance2_di_data(
4690                  buffer,
4691                  program->arg_buffer & ~0xfull,
4692                  dev_info);
4693               pvr_write_draw_indirect_arrays_base_instance2_write_vdm(
4694                  buffer,
4695                  program->index_list_addr_buffer + 4);
4696               pvr_write_draw_indirect_arrays_base_instance2_flush_vdm(
4697                  buffer,
4698                  program->index_list_addr_buffer);
4699               pvr_write_draw_indirect_arrays_base_instance2_num_views(
4700                  buffer,
4701                  program->num_views);
4702               pvr_write_draw_indirect_arrays_base_instance2_immediates(buffer);
4703            }
4704         } else {
4705            pvr_write_draw_indirect_arrays2_di_data(buffer,
4706                                                    program->arg_buffer &
4707                                                       ~0xfull,
4708                                                    dev_info);
4709            pvr_write_draw_indirect_arrays2_write_vdm(
4710               buffer,
4711               program->index_list_addr_buffer + 4);
4712            pvr_write_draw_indirect_arrays2_flush_vdm(
4713               buffer,
4714               program->index_list_addr_buffer);
4715            pvr_write_draw_indirect_arrays2_num_views(buffer,
4716                                                      program->num_views);
4717            pvr_write_draw_indirect_arrays2_immediates(buffer);
4718         }
4719         break;
4720      case 3:
4721         if (program->support_base_instance) {
4722            if (program->increment_draw_id) {
4723               pvr_write_draw_indirect_arrays_base_instance_drawid3_di_data(
4724                  buffer,
4725                  program->arg_buffer & ~0xfull,
4726                  dev_info);
4727               pvr_write_draw_indirect_arrays_base_instance_drawid3_write_vdm(
4728                  buffer,
4729                  program->index_list_addr_buffer + 4);
4730               pvr_write_draw_indirect_arrays_base_instance_drawid3_flush_vdm(
4731                  buffer,
4732                  program->index_list_addr_buffer);
4733               pvr_write_draw_indirect_arrays_base_instance_drawid3_num_views(
4734                  buffer,
4735                  program->num_views);
4736               pvr_write_draw_indirect_arrays_base_instance_drawid3_immediates(
4737                  buffer);
4738            } else {
4739               pvr_write_draw_indirect_arrays_base_instance3_di_data(
4740                  buffer,
4741                  program->arg_buffer & ~0xfull,
4742                  dev_info);
4743               pvr_write_draw_indirect_arrays_base_instance3_write_vdm(
4744                  buffer,
4745                  program->index_list_addr_buffer + 4);
4746               pvr_write_draw_indirect_arrays_base_instance3_flush_vdm(
4747                  buffer,
4748                  program->index_list_addr_buffer);
4749               pvr_write_draw_indirect_arrays_base_instance3_num_views(
4750                  buffer,
4751                  program->num_views);
4752               pvr_write_draw_indirect_arrays_base_instance3_immediates(buffer);
4753            }
4754         } else {
4755            pvr_write_draw_indirect_arrays3_di_data(buffer,
4756                                                    program->arg_buffer &
4757                                                       ~0xfull,
4758                                                    dev_info);
4759            pvr_write_draw_indirect_arrays3_write_vdm(
4760               buffer,
4761               program->index_list_addr_buffer + 4);
4762            pvr_write_draw_indirect_arrays3_flush_vdm(
4763               buffer,
4764               program->index_list_addr_buffer);
4765            pvr_write_draw_indirect_arrays3_num_views(buffer,
4766                                                      program->num_views);
4767            pvr_write_draw_indirect_arrays3_immediates(buffer);
4768         }
4769         break;
4770      }
4771   }
4772}
4773
4774#include "pvr_draw_indirect_elements0.h"
4775#include "pvr_draw_indirect_elements1.h"
4776#include "pvr_draw_indirect_elements2.h"
4777#include "pvr_draw_indirect_elements3.h"
4778#include "pvr_draw_indirect_elements_base_instance0.h"
4779#include "pvr_draw_indirect_elements_base_instance1.h"
4780#include "pvr_draw_indirect_elements_base_instance2.h"
4781#include "pvr_draw_indirect_elements_base_instance3.h"
4782#include "pvr_draw_indirect_elements_base_instance_drawid0.h"
4783#include "pvr_draw_indirect_elements_base_instance_drawid1.h"
4784#include "pvr_draw_indirect_elements_base_instance_drawid2.h"
4785#include "pvr_draw_indirect_elements_base_instance_drawid3.h"
4786
4787void pvr_pds_generate_draw_elements_indirect(
4788   struct pvr_pds_drawindirect_program *restrict program,
4789   uint32_t *restrict buffer,
4790   enum pvr_pds_generate_mode gen_mode,
4791   const struct pvr_device_info *dev_info)
4792{
4793   if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
4794       (gen_mode == PDS_GENERATE_SIZES)) {
4795      const struct pvr_psc_program_output *psc_program = NULL;
4796      switch ((program->arg_buffer >> 2) % 4) {
4797      case 0:
4798         if (program->support_base_instance) {
4799            if (program->increment_draw_id) {
4800               psc_program =
4801                  &pvr_draw_indirect_elements_base_instance_drawid0_program;
4802            } else {
4803               psc_program = &pvr_draw_indirect_elements_base_instance0_program;
4804            }
4805         } else {
4806            psc_program = &pvr_draw_indirect_elements0_program;
4807         }
4808         break;
4809      case 1:
4810         if (program->support_base_instance) {
4811            if (program->increment_draw_id) {
4812               psc_program =
4813                  &pvr_draw_indirect_elements_base_instance_drawid1_program;
4814            } else {
4815               psc_program = &pvr_draw_indirect_elements_base_instance1_program;
4816            }
4817         } else {
4818            psc_program = &pvr_draw_indirect_elements1_program;
4819         }
4820         break;
4821      case 2:
4822         if (program->support_base_instance) {
4823            if (program->increment_draw_id) {
4824               psc_program =
4825                  &pvr_draw_indirect_elements_base_instance_drawid2_program;
4826            } else {
4827               psc_program = &pvr_draw_indirect_elements_base_instance2_program;
4828            }
4829         } else {
4830            psc_program = &pvr_draw_indirect_elements2_program;
4831         }
4832         break;
4833      case 3:
4834         if (program->support_base_instance) {
4835            if (program->increment_draw_id) {
4836               psc_program =
4837                  &pvr_draw_indirect_elements_base_instance_drawid3_program;
4838            } else {
4839               psc_program = &pvr_draw_indirect_elements_base_instance3_program;
4840            }
4841         } else {
4842            psc_program = &pvr_draw_indirect_elements3_program;
4843         }
4844         break;
4845      }
4846
4847      if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4848         memcpy(buffer,
4849                psc_program->code,
4850                psc_program->code_size * sizeof(uint32_t));
4851
4852#if defined(DUMP_PDS)
4853         for (uint32_t i = 0; i < psc_program->code_size; i++)
4854            PVR_PDS_PRINT_INST(buffer[i]);
4855#endif
4856      }
4857
4858      program->program = *psc_program;
4859   } else {
4860      switch ((program->arg_buffer >> 2) % 4) {
4861      case 0:
4862         if (program->support_base_instance) {
4863            if (program->increment_draw_id) {
4864               pvr_write_draw_indirect_elements_base_instance_drawid0_di_data(
4865                  buffer,
4866                  program->arg_buffer & ~0xfull,
4867                  dev_info);
4868               pvr_write_draw_indirect_elements_base_instance_drawid0_write_vdm(
4869                  buffer,
4870                  program->index_list_addr_buffer);
4871               pvr_write_draw_indirect_elements_base_instance_drawid0_flush_vdm(
4872                  buffer,
4873                  program->index_list_addr_buffer);
4874               pvr_write_draw_indirect_elements_base_instance_drawid0_num_views(
4875                  buffer,
4876                  program->num_views);
4877               pvr_write_draw_indirect_elements_base_instance_drawid0_idx_stride(
4878                  buffer,
4879                  program->index_stride);
4880               pvr_write_draw_indirect_elements_base_instance_drawid0_idx_base(
4881                  buffer,
4882                  program->index_buffer);
4883               pvr_write_draw_indirect_elements_base_instance_drawid0_idx_header(
4884                  buffer,
4885                  program->index_block_header);
4886               pvr_write_draw_indirect_elements_base_instance_drawid0_immediates(
4887                  buffer);
4888            } else {
4889               pvr_write_draw_indirect_elements_base_instance0_di_data(
4890                  buffer,
4891                  program->arg_buffer & ~0xfull,
4892                  dev_info);
4893               pvr_write_draw_indirect_elements_base_instance0_write_vdm(
4894                  buffer,
4895                  program->index_list_addr_buffer);
4896               pvr_write_draw_indirect_elements_base_instance0_flush_vdm(
4897                  buffer,
4898                  program->index_list_addr_buffer);
4899               pvr_write_draw_indirect_elements_base_instance0_num_views(
4900                  buffer,
4901                  program->num_views);
4902               pvr_write_draw_indirect_elements_base_instance0_idx_stride(
4903                  buffer,
4904                  program->index_stride);
4905               pvr_write_draw_indirect_elements_base_instance0_idx_base(
4906                  buffer,
4907                  program->index_buffer);
4908               pvr_write_draw_indirect_elements_base_instance0_idx_header(
4909                  buffer,
4910                  program->index_block_header);
4911               pvr_write_draw_indirect_elements_base_instance0_immediates(
4912                  buffer);
4913            }
4914         } else {
4915            pvr_write_draw_indirect_elements0_di_data(buffer,
4916                                                      program->arg_buffer &
4917                                                         ~0xfull,
4918                                                      dev_info);
4919            pvr_write_draw_indirect_elements0_write_vdm(
4920               buffer,
4921               program->index_list_addr_buffer);
4922            pvr_write_draw_indirect_elements0_flush_vdm(
4923               buffer,
4924               program->index_list_addr_buffer);
4925            pvr_write_draw_indirect_elements0_num_views(buffer,
4926                                                        program->num_views);
4927            pvr_write_draw_indirect_elements0_idx_stride(buffer,
4928                                                         program->index_stride);
4929            pvr_write_draw_indirect_elements0_idx_base(buffer,
4930                                                       program->index_buffer);
4931            pvr_write_draw_indirect_elements0_idx_header(
4932               buffer,
4933               program->index_block_header);
4934            pvr_write_draw_indirect_elements0_immediates(buffer);
4935         }
4936         break;
4937      case 1:
4938         if (program->support_base_instance) {
4939            if (program->increment_draw_id) {
4940               pvr_write_draw_indirect_elements_base_instance_drawid1_di_data(
4941                  buffer,
4942                  program->arg_buffer & ~0xfull,
4943                  dev_info);
4944               pvr_write_draw_indirect_elements_base_instance_drawid1_write_vdm(
4945                  buffer,
4946                  program->index_list_addr_buffer);
4947               pvr_write_draw_indirect_elements_base_instance_drawid1_flush_vdm(
4948                  buffer,
4949                  program->index_list_addr_buffer);
4950               pvr_write_draw_indirect_elements_base_instance_drawid1_num_views(
4951                  buffer,
4952                  program->num_views);
4953               pvr_write_draw_indirect_elements_base_instance_drawid1_idx_stride(
4954                  buffer,
4955                  program->index_stride);
4956               pvr_write_draw_indirect_elements_base_instance_drawid1_idx_base(
4957                  buffer,
4958                  program->index_buffer);
4959               pvr_write_draw_indirect_elements_base_instance_drawid1_idx_header(
4960                  buffer,
4961                  program->index_block_header);
4962               pvr_write_draw_indirect_elements_base_instance_drawid1_immediates(
4963                  buffer);
4964            } else {
4965               pvr_write_draw_indirect_elements_base_instance1_di_data(
4966                  buffer,
4967                  program->arg_buffer & ~0xfull,
4968                  dev_info);
4969               pvr_write_draw_indirect_elements_base_instance1_write_vdm(
4970                  buffer,
4971                  program->index_list_addr_buffer);
4972               pvr_write_draw_indirect_elements_base_instance1_flush_vdm(
4973                  buffer,
4974                  program->index_list_addr_buffer);
4975               pvr_write_draw_indirect_elements_base_instance1_num_views(
4976                  buffer,
4977                  program->num_views);
4978               pvr_write_draw_indirect_elements_base_instance1_idx_stride(
4979                  buffer,
4980                  program->index_stride);
4981               pvr_write_draw_indirect_elements_base_instance1_idx_base(
4982                  buffer,
4983                  program->index_buffer);
4984               pvr_write_draw_indirect_elements_base_instance1_idx_header(
4985                  buffer,
4986                  program->index_block_header);
4987               pvr_write_draw_indirect_elements_base_instance1_immediates(
4988                  buffer);
4989            }
4990         } else {
4991            pvr_write_draw_indirect_elements1_di_data(buffer,
4992                                                      program->arg_buffer &
4993                                                         ~0xfull,
4994                                                      dev_info);
4995            pvr_write_draw_indirect_elements1_write_vdm(
4996               buffer,
4997               program->index_list_addr_buffer);
4998            pvr_write_draw_indirect_elements1_flush_vdm(
4999               buffer,
5000               program->index_list_addr_buffer);
5001            pvr_write_draw_indirect_elements1_num_views(buffer,
5002                                                        program->num_views);
5003            pvr_write_draw_indirect_elements1_idx_stride(buffer,
5004                                                         program->index_stride);
5005            pvr_write_draw_indirect_elements1_idx_base(buffer,
5006                                                       program->index_buffer);
5007            pvr_write_draw_indirect_elements1_idx_header(
5008               buffer,
5009               program->index_block_header);
5010            pvr_write_draw_indirect_elements1_immediates(buffer);
5011         }
5012         break;
5013      case 2:
5014         if (program->support_base_instance) {
5015            if (program->increment_draw_id) {
5016               pvr_write_draw_indirect_elements_base_instance_drawid2_di_data(
5017                  buffer,
5018                  program->arg_buffer & ~0xfull,
5019                  dev_info);
5020               pvr_write_draw_indirect_elements_base_instance_drawid2_write_vdm(
5021                  buffer,
5022                  program->index_list_addr_buffer);
5023               pvr_write_draw_indirect_elements_base_instance_drawid2_flush_vdm(
5024                  buffer,
5025                  program->index_list_addr_buffer);
5026               pvr_write_draw_indirect_elements_base_instance_drawid2_num_views(
5027                  buffer,
5028                  program->num_views);
5029               pvr_write_draw_indirect_elements_base_instance_drawid2_idx_stride(
5030                  buffer,
5031                  program->index_stride);
5032               pvr_write_draw_indirect_elements_base_instance_drawid2_idx_base(
5033                  buffer,
5034                  program->index_buffer);
5035               pvr_write_draw_indirect_elements_base_instance_drawid2_idx_header(
5036                  buffer,
5037                  program->index_block_header);
5038               pvr_write_draw_indirect_elements_base_instance_drawid2_immediates(
5039                  buffer);
5040            } else {
5041               pvr_write_draw_indirect_elements_base_instance2_di_data(
5042                  buffer,
5043                  program->arg_buffer & ~0xfull,
5044                  dev_info);
5045               pvr_write_draw_indirect_elements_base_instance2_write_vdm(
5046                  buffer,
5047                  program->index_list_addr_buffer);
5048               pvr_write_draw_indirect_elements_base_instance2_flush_vdm(
5049                  buffer,
5050                  program->index_list_addr_buffer);
5051               pvr_write_draw_indirect_elements_base_instance2_num_views(
5052                  buffer,
5053                  program->num_views);
5054               pvr_write_draw_indirect_elements_base_instance2_idx_stride(
5055                  buffer,
5056                  program->index_stride);
5057               pvr_write_draw_indirect_elements_base_instance2_idx_base(
5058                  buffer,
5059                  program->index_buffer);
5060               pvr_write_draw_indirect_elements_base_instance2_idx_header(
5061                  buffer,
5062                  program->index_block_header);
5063               pvr_write_draw_indirect_elements_base_instance2_immediates(
5064                  buffer);
5065            }
5066         } else {
5067            pvr_write_draw_indirect_elements2_di_data(buffer,
5068                                                      program->arg_buffer &
5069                                                         ~0xfull,
5070                                                      dev_info);
5071            pvr_write_draw_indirect_elements2_write_vdm(
5072               buffer,
5073               program->index_list_addr_buffer);
5074            pvr_write_draw_indirect_elements2_flush_vdm(
5075               buffer,
5076               program->index_list_addr_buffer);
5077            pvr_write_draw_indirect_elements2_num_views(buffer,
5078                                                        program->num_views);
5079            pvr_write_draw_indirect_elements2_idx_stride(buffer,
5080                                                         program->index_stride);
5081            pvr_write_draw_indirect_elements2_idx_base(buffer,
5082                                                       program->index_buffer);
5083            pvr_write_draw_indirect_elements2_idx_header(
5084               buffer,
5085               program->index_block_header);
5086            pvr_write_draw_indirect_elements2_immediates(buffer);
5087         }
5088         break;
5089      case 3:
5090         if (program->support_base_instance) {
5091            if (program->increment_draw_id) {
5092               pvr_write_draw_indirect_elements_base_instance_drawid3_di_data(
5093                  buffer,
5094                  program->arg_buffer & ~0xfull,
5095                  dev_info);
5096               pvr_write_draw_indirect_elements_base_instance_drawid3_write_vdm(
5097                  buffer,
5098                  program->index_list_addr_buffer);
5099               pvr_write_draw_indirect_elements_base_instance_drawid3_flush_vdm(
5100                  buffer,
5101                  program->index_list_addr_buffer);
5102               pvr_write_draw_indirect_elements_base_instance_drawid3_num_views(
5103                  buffer,
5104                  program->num_views);
5105               pvr_write_draw_indirect_elements_base_instance_drawid3_idx_stride(
5106                  buffer,
5107                  program->index_stride);
5108               pvr_write_draw_indirect_elements_base_instance_drawid3_idx_base(
5109                  buffer,
5110                  program->index_buffer);
5111               pvr_write_draw_indirect_elements_base_instance_drawid3_idx_header(
5112                  buffer,
5113                  program->index_block_header);
5114               pvr_write_draw_indirect_elements_base_instance_drawid3_immediates(
5115                  buffer);
5116            } else {
5117               pvr_write_draw_indirect_elements_base_instance3_di_data(
5118                  buffer,
5119                  program->arg_buffer & ~0xfull,
5120                  dev_info);
5121               pvr_write_draw_indirect_elements_base_instance3_write_vdm(
5122                  buffer,
5123                  program->index_list_addr_buffer);
5124               pvr_write_draw_indirect_elements_base_instance3_flush_vdm(
5125                  buffer,
5126                  program->index_list_addr_buffer);
5127               pvr_write_draw_indirect_elements_base_instance3_num_views(
5128                  buffer,
5129                  program->num_views);
5130               pvr_write_draw_indirect_elements_base_instance3_idx_stride(
5131                  buffer,
5132                  program->index_stride);
5133               pvr_write_draw_indirect_elements_base_instance3_idx_base(
5134                  buffer,
5135                  program->index_buffer);
5136               pvr_write_draw_indirect_elements_base_instance3_idx_header(
5137                  buffer,
5138                  program->index_block_header);
5139               pvr_write_draw_indirect_elements_base_instance3_immediates(
5140                  buffer);
5141            }
5142         } else {
5143            pvr_write_draw_indirect_elements3_di_data(buffer,
5144                                                      program->arg_buffer &
5145                                                         ~0xfull,
5146                                                      dev_info);
5147            pvr_write_draw_indirect_elements3_write_vdm(
5148               buffer,
5149               program->index_list_addr_buffer);
5150            pvr_write_draw_indirect_elements3_flush_vdm(
5151               buffer,
5152               program->index_list_addr_buffer);
5153            pvr_write_draw_indirect_elements3_num_views(buffer,
5154                                                        program->num_views);
5155            pvr_write_draw_indirect_elements3_idx_stride(buffer,
5156                                                         program->index_stride);
5157            pvr_write_draw_indirect_elements3_idx_base(buffer,
5158                                                       program->index_buffer);
5159            pvr_write_draw_indirect_elements3_idx_header(
5160               buffer,
5161               program->index_block_header);
5162            pvr_write_draw_indirect_elements3_immediates(buffer);
5163         }
5164         break;
5165      }
5166   }
5167}
5168